From 316cc8b607db558b982bd9f5a0367e1d0995b091 Mon Sep 17 00:00:00 2001 From: Kyle Chesney Date: Wed, 25 Jan 2023 13:14:24 -0500 Subject: [PATCH 01/14] Add batch_size parm to dbx calls in sync_table Prevents possible error: Expression tree is too large (maximum depth 1000) --- R/write_data.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/write_data.R b/R/write_data.R index 94dc82c..6639f19 100644 --- a/R/write_data.R +++ b/R/write_data.R @@ -151,7 +151,8 @@ sync_table <- function( conn = conn, table = table_name, records = data_diff_output$update_records, - where_cols = primary_key + where_cols = primary_key, + batch_size = 100 ) update_n <- nrow(data_diff_output$update_records) } @@ -160,7 +161,8 @@ sync_table <- function( dbx::dbxDelete( conn = conn, table = table_name, - where = data_diff_output$delete_records + where = data_diff_output$delete_records, + batch_size = 100 ) delete_n <- nrow(data_diff_output$delete_records) } From e4a465da1aecdec78cddd16705b3b1eb0ae431ae Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Wed, 25 Jan 2023 13:55:00 -0500 Subject: [PATCH 02/14] Add batch_size parm to dbx calls in sync_table_2 --- R/write_data.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/write_data.R b/R/write_data.R index 6639f19..550961d 100644 --- a/R/write_data.R +++ b/R/write_data.R @@ -249,7 +249,8 @@ sync_table_2 <- function( conn = conn, table = table_name, records = update_records, - where_cols = target_pk + where_cols = target_pk, + batch_size = 100 ) update_n <- nrow(update_records) } else { @@ -266,7 +267,8 @@ sync_table_2 <- function( dbx::dbxDelete( conn = conn, table = table_name, - where = delete_records + where = delete_records, + batch_size = 100 ) delete_n <- nrow(delete_records) } else { From c9092e389d38d9e8641d99b21c8250d8ccd69a36 Mon Sep 17 00:00:00 2001 From: Kyle Chesney Date: Mon, 6 Feb 2023 14:37:44 -0500 Subject: [PATCH 03/14] Address fatal bug in sync_table caused when delete = T but there are no records to delete --- R/write_data.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/R/write_data.R b/R/write_data.R index 550961d..2cb3aac 100644 --- a/R/write_data.R +++ b/R/write_data.R @@ -158,13 +158,19 @@ sync_table <- function( } if (delete) { - dbx::dbxDelete( - conn = conn, - table = table_name, - where = data_diff_output$delete_records, - batch_size = 100 - ) - delete_n <- nrow(data_diff_output$delete_records) + # HACK: if there are no deletions, delete_records is NA rather than an empty dataframe + # this causes "Error: x must be character or SQL" + if (is.na(data_diff_output$delete_records)) { + delete_n <- 0 + } else { + dbx::dbxDelete( + conn = conn, + table = table_name, + where = data_diff_output$delete_records, + batch_size = 100 + ) + delete_n <- nrow(data_diff_output$delete_records) + } } result <- list( From dfb284f382c20a71f8145c865105b95bb010f4ff Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Mon, 6 Feb 2023 15:58:04 -0500 Subject: [PATCH 04/14] Revise test for sync_table record deletion bug Test dataframe rather than NA. Replace update-only test using our data with insert/update/delete test using mtcars. --- R/write_data.R | 2 +- tests/testthat/test-write_data.R | 91 ++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/R/write_data.R b/R/write_data.R index 2cb3aac..dd3a49a 100644 --- a/R/write_data.R +++ b/R/write_data.R @@ -160,7 +160,7 @@ sync_table <- function( if (delete) { # HACK: if there are no deletions, delete_records is NA rather than an empty dataframe # this causes "Error: x must be character or SQL" - if (is.na(data_diff_output$delete_records)) { + if (!is.data.frame(data_diff_output$delete_records)) { delete_n <- 0 } else { dbx::dbxDelete( diff --git a/tests/testthat/test-write_data.R b/tests/testthat/test-write_data.R index 1f4277c..7225158 100644 --- a/tests/testthat/test-write_data.R +++ b/tests/testthat/test-write_data.R @@ -212,3 +212,94 @@ testthat::test_that( testthat::expect_equal(nrow(result$update_records), result$update_n) } ) + +testthat::test_that( + "sync_table can do an insert/update/delete", + { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) %>% + rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) + + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) + + # determine what we want to update + diff_output <- dataset_diff( + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id" + ) + + # update the data + result <- sync_table( + conn = conn, + table_name = "mtcars", + primary_key = "id", + data_diff_output = diff_output, + insert = T, + update = T, + delete = T + ) + + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) + } +) + +testthat::test_that( + "sync_table works when deletion count = 0", + { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) + + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) + + # determine what we want to update + diff_output <- dataset_diff( + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id" + ) + + # update the data + result <- sync_table( + conn = conn, + table_name = "mtcars", + primary_key = "id", + data_diff_output = diff_output, + insert = T, + update = T, + delete = T + ) + + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) + } +) From de13ee32c01110832a5a42fbac2d6edf5af8f888 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Mon, 6 Feb 2023 16:11:57 -0500 Subject: [PATCH 05/14] Modern tests of sync_table_2 Replace update-only test using our data with insert/update/delete test using mtcars. --- tests/testthat/test-write_data.R | 147 ++++++++++++++++--------------- 1 file changed, 75 insertions(+), 72 deletions(-) diff --git a/tests/testthat/test-write_data.R b/tests/testthat/test-write_data.R index 7225158..2604bc5 100644 --- a/tests/testthat/test-write_data.R +++ b/tests/testthat/test-write_data.R @@ -138,83 +138,53 @@ test_that("on error, write_to_sql_db does not log a failure when continue_on_err }) testthat::test_that( - "sync_table can do an update", + "sync_table can do an insert/update/delete", { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) %>% + rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) - df = dataset_diff_test_user_data - - # Set up target table - drv <- RSQLite::SQLite() - con <- connect_to_db(drv) - table_name <- "target" - - DBI::dbWriteTable( - conn = con, - name = table_name, - value = df$target, - schema = table_name, - overwrite = T - ) + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) # determine what we want to update diff_output <- dataset_diff( - source = df$source, - source_pk = df$source_pk, - target = df$target, - target_pk = df$target_pk + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id" ) # update the data - sync_table( - conn = con, - table_name = table_name, - primary_key = df$target_pk, - data_diff_output = diff_output - ) - - # test that the target was updated - testthat::expect_true(dplyr::all_equal(tbl(con, "target") %>% dplyr::collect(), sync_table_test_user_data_result)) - } -) - -testthat::test_that( - "sync_table_2 can do an update", - { - - df = dataset_diff_test_user_data - - # Set up target table - drv <- RSQLite::SQLite() - con <- connect_to_db(drv) - table_name <- "target" - - DBI::dbWriteTable( - conn = con, - name = table_name, - value = df$target, - schema = table_name, - overwrite = T + result <- sync_table( + conn = conn, + table_name = "mtcars", + primary_key = "id", + data_diff_output = diff_output, + insert = T, + update = T, + delete = T ) - # update the data - result <- sync_table_2( - conn = con, - table_name = table_name, - source = df$source, - source_pk = df$source_pk, - target = df$target, - target_pk = df$target_pk - ) + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) - # test that the target was updated - testthat::expect_true(dplyr::all_equal(tbl(con, "target") %>% dplyr::collect(), sync_table_test_user_data_result)) - # test that the number of rows updated matches record count of the update dataframe - testthat::expect_equal(nrow(result$update_records), result$update_n) + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) } ) testthat::test_that( - "sync_table can do an insert/update/delete", + "sync_table works when deletion count = 0", { # make test data mtcars_for_db <- mtcars %>% @@ -223,8 +193,7 @@ testthat::test_that( select(id, model, everything()) damaged_mtcars_for_db <- mtcars_for_db %>% filter(id <= 20) %>% - mutate(cyl = if_else(id <= 10, 1, cyl)) %>% - rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) + mutate(cyl = if_else(id <= 10, 1, cyl)) # write damaged data to a DB conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") @@ -260,7 +229,7 @@ testthat::test_that( ) testthat::test_that( - "sync_table works when deletion count = 0", + "sync_table_2 can do an insert/update/delete", { # make test data mtcars_for_db <- mtcars %>% @@ -269,26 +238,60 @@ testthat::test_that( select(id, model, everything()) damaged_mtcars_for_db <- mtcars_for_db %>% filter(id <= 20) %>% - mutate(cyl = if_else(id <= 10, 1, cyl)) + mutate(cyl = if_else(id <= 10, 1, cyl)) %>% + rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) # write damaged data to a DB conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) - # determine what we want to update - diff_output <- dataset_diff( + # update the data + result <- sync_table_2( + conn = conn, + table_name = "mtcars", source = mtcars_for_db, source_pk = "id", target = damaged_mtcars_for_db, - target_pk = "id" + target_pk = "id", + insert = T, + update = T, + delete = T ) + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) + } +) + +testthat::test_that( + "sync_table_2 works when deletion count = 0", + { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) + + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) + # update the data - result <- sync_table( + result <- sync_table_2( conn = conn, table_name = "mtcars", - primary_key = "id", - data_diff_output = diff_output, + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id", insert = T, update = T, delete = T From 73622f150e01035fd4021dd950b97dd06f89a8b5 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Thu, 16 Feb 2023 10:15:41 -0500 Subject: [PATCH 06/14] Add randomization management functions and a sample ETL --- R/randomization.R | 284 +++++++++++++++++++++++++++++ etl/copy_allocated_randomization.R | 49 +++++ 2 files changed, 333 insertions(+) create mode 100644 R/randomization.R create mode 100644 etl/copy_allocated_randomization.R diff --git a/R/randomization.R b/R/randomization.R new file mode 100644 index 0000000..950d650 --- /dev/null +++ b/R/randomization.R @@ -0,0 +1,284 @@ +export_allocation_tables_from_project <- function(conn, + project_id_to_export) { + # Get column names from randomization_source + # target_field and target_event describe the randomization group + # source_fieldN and source_eventN describe the randomization variables + # Pivot the data longer to prep it for renaming the strata fields in Allocations + column_names_in_source <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(project_id == project_id_to_export) |> + dplyr::collect() |> + dplyr::select(target_field, starts_with("source_field")) |> + tidyr::pivot_longer( + cols = contains("field"), + names_to = "strata", + values_to = "redcap_field_name" + ) |> + dplyr::filter(!is.na(redcap_field_name)) + + rid_to_export <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(project_id == !!project_id_to_export) |> + dplyr::collect() |> + dplyr::pull(rid) + + # Allocation data is in allocation_source + allocations <- + dplyr::tbl(conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == rid_to_export) |> + dplyr::collect() |> + dplyr::select(aid, project_status, target_field, starts_with("source_field")) |> + # Pivot longer to facilitate renaming the abstract field names to redcap field names + tidyr::pivot_longer( + cols = contains("field"), + names_to = "strata", + values_to = "value" + ) |> + dplyr::filter(!is.na(value)) |> + # dplyr::rename the *field* columns + dplyr::inner_join(column_names_in_source, by = "strata") |> + dplyr::select(-strata) |> + tidyr::pivot_wider( + id_cols = c("aid", "project_status"), + names_from = "redcap_field_name", + values_from = "value" + ) + + return(allocations) +} + + +# Write the allocation tables +write_allocations <- function(project_status_to_write, allocations, target_directory = ".") { + base_name <- "RandomizationAllocation" + date_time_stamp <- format(get_script_run_time(), "%Y%m%d%H%M%S") + project_statuses <- setNames(c(0, 1), c("development", "production")) + + if (!fs::dir_exists(here::here(target_directory))) { + fs::dir_create(here::here(target_directory)) + } + + allocations |> + dplyr::filter(project_status == project_status_to_write) |> + dplyr::select(-aid, -project_status) |> + readr::write_csv(here::here(target_directory, paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_"))) +} + + +create_randomization_row <- function(source_conn, + target_conn, + source_project_id, + target_project_id) { + # get the current state + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(project_id == target_project_id) |> + dplyr::collect() + + # create row in redcap_randomization on target if there is no current state + if (nrow(target_project_randomization_state) == 0) { + # get replacement event_ids + source_event_ids <- dplyr::tbl(source_conn, "redcap_events_arms") |> + dplyr::filter(project_id == source_project_id) |> + dplyr::inner_join(dplyr::tbl(source_conn, "redcap_events_metadata"), by = "arm_id") |> + dplyr::collect() + + target_event_ids <- dplyr::tbl(target_conn, "redcap_events_arms") |> + dplyr::filter(project_id == target_project_id) |> + dplyr::inner_join(dplyr::tbl(target_conn, "redcap_events_metadata"), by = "arm_id") |> + dplyr::collect() + + max_rid_target <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::arrange(dplyr::desc(rid)) |> + head(n = 1) |> + dplyr::collect() |> + dplyr::pull(rid) + + new_randomization_target_data <- randomization_source |> + dplyr::filter(project_id == source_project_id) |> + dplyr::collect() |> + # Replace the easy stuff + dplyr::mutate( + rid = max_rid_target + 1, + project_id = target_project_id + ) |> + # Pivot longer so that we can replace each event_id with the + # corresponding event ID for the target project. + tidyr::pivot_longer( + cols = contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + tidyr::pivot_longer( + cols = contains("event"), + names_to = "event_label", + values_to = "event_value", + values_drop_na = T + ) |> + # Replace the event_id by aligning the Event Description + dplyr::inner_join(source_event_ids |> dplyr::select(event_id, descrip), by = c("event_value" = "event_id")) |> + dplyr::inner_join(target_event_ids |> dplyr::select(event_id, descrip), by = "descrip") |> + dplyr::select(-event_value, descrip) |> + dplyr::rename(event_value = event_id) |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c(rid, project_id, stratified, group_by, field_label, field_value), + names_from = "event_label", + values_from = "event_value" + ) |> + tidyr::pivot_wider( + id_cols = c(rid, project_id, stratified, group_by, target_event, source_event1, source_event2), + names_from = "field_label", + values_from = "field_value" + ) + + # Write the new randomization record + DBI::dbAppendTable( + conn = target_conn, + name = "redcap_randomization", + value = new_randomization_target_data + ) + + target_project_randomization_state <- new_randomization_target_data + } + return(target_project_randomization_state) +} + + +create_allocation_rows <- function(source_conn, + target_conn, + source_project_id) { + # create row in redcap_randomization on target if needed + target_project_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == !!target_project_randomization_state$rid) |> + dplyr::collect() + + if (!nrow(target_project_allocation_state) == 0) { + message(paste("Allocation records exist for target project with ID", project_id_target, "Not writing allocation records")) + result <- 0 + } else { + max_aid_target <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::arrange(dplyr::desc(aid)) |> + head(n = 1) |> + dplyr::collect() |> + dplyr::pull(aid) + + rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(project_id == !!source_project_id) |> + dplyr::collect() |> + dplyr::pull(rid) + + new_allocation_target_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == rid_source) |> + dplyr::arrange(aid) |> + dplyr::collect() |> + dplyr::mutate( + rid = target_project_randomization_state$rid, + aid = max_aid_target + row_number() + ) + + # Write the new allocation data to the target + result <- DBI::dbAppendTable( + conn = target_conn, + name = "redcap_randomization_allocation", + value = new_allocation_target_data + ) + + target_project_allocation_state <- new_allocation_target_data + } + return(target_project_allocation_state) +} + +update_production_allocation_state <- function(source_conn, + target_conn, + source_project_id, + target_rid) { + rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(project_id == !!source_project_id) |> + dplyr::collect() |> + dplyr::pull(rid) + + # get the source's production allocation data, but control the order and add an alignment column + source_allocation_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == rid_source) |> + dplyr::filter(project_status == 1) |> + dplyr::arrange(dplyr::desc(aid)) |> + dplyr::collect() |> + dplyr::mutate( + aid.alignment = aid - min(aid) + ) + + # get the target's production allocation data, but control the order and add an alignment column + target_allocation_data <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == target_rid) |> + dplyr::filter(project_status == 1) |> + dplyr::arrange(dplyr::desc(aid)) |> + dplyr::collect() |> + dplyr::mutate( + aid.alignment = aid - min(aid) + ) + + # Make the update dataset by replacing the RID and AID columns in the source data + target_allocation_update <- source_allocation_data |> + dplyr::filter(!is.na(is_used_by)) |> + dplyr::select(-aid, -rid) |> + dplyr::inner_join(target_allocation_data |> dplyr::select(aid, rid, aid.alignment), by = "aid.alignment") |> + dplyr::select(-aid.alignment) + + # Write updates to target allocation data + sync_result <- sync_table_2( + conn = target_conn, + table_name = "redcap_randomization_allocation", + source = target_allocation_update, + source_pk = "aid", + target = target_allocation_data |> dplyr::select(-aid.alignment), + target_pk = "aid", + update = T, + insert = F, + delete = F + ) + + return(sync_result) +} + + +enable_randomization_on_a_preconfigured_project_in_production <- function(target_conn, + target_project_id) { + # Turn on randomization in the target project but only if + # 1) it has already been moved to production + # 2) randomization has been configured + + # get the state of the project + target_project_state <- dplyr::tbl(rc_conn_target, "redcap_projects") |> + dplyr::filter(project_id == project_id_target) |> + dplyr::select(project_id, randomization, status, production_time) |> + dplyr::collect() + + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(project_id == target_project_id) |> + dplyr::collect() + + target_project_production_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == !!target_project_randomization_state$rid) |> + dplyr::filter(project_status == 1) |> + dplyr::collect() + + if (target_project_state$randomization == 0 & + target_project_state$status == 1 & + nrow(target_project_randomization_state) == 1 & + nrow(target_project_production_allocation_state) > 0 + ) { + sync_table_2( + conn = rc_conn_target, + table_name = "redcap_projects", + source = target_project_state |> dplyr::mutate(randomization = 1), + source_pk = "project_id", + target = target_project_state, + target_pk = "project_id" + ) + message("Randomization enabled.") + result <- TRUE + } else { + message("Doing nothing. The project must be in production, with randomization configured, but randomization turned off.") + result <- FALSE + } + return(result) +} diff --git a/etl/copy_allocated_randomization.R b/etl/copy_allocated_randomization.R new file mode 100644 index 0000000..a1ca5cd --- /dev/null +++ b/etl/copy_allocated_randomization.R @@ -0,0 +1,49 @@ +library(redcapcustodian) +library(DBI) +library(tidyverse) +library(lubridate) +library(dotenv) + +init_etl("copy_allocated_randomization") + +source_conn <- connect_to_redcap_db() +# specify a second database connection if the target project is on another host +target_conn <- source_conn +source_project_id <- 18 +target_project_id <- 25 + +# get and print importable allocations if we need them for reference +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) + +walk(c(0,1), write_allocations, allocations, "output") + +# Configure randomization on the target project +target_project_randomization_state <- create_randomization_row( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) + +target_project_allocation_state <- create_allocation_rows( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id +) + +# Update randomization on the target project +target_project_allocation_update <- update_production_allocation_state( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid +) + +# Enable randomization on the target +enable_randomization_on_a_preconfigured_project_in_production( + target_conn = target_conn, + target_project_id = target_project_id +) From f111aee79b036f14ed6222ddd876683e0d3a267a Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Thu, 16 Feb 2023 10:32:01 -0500 Subject: [PATCH 07/14] Eschew deprecated tidyselect features Replace .data$foo in selects with "foo". --- R/redcap.R | 32 ++++++++++++++++---------------- R/summary_metrics.R | 14 +++++++------- R/user_rights.R | 2 +- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/R/redcap.R b/R/redcap.R index 3a368bb..cb937fe 100644 --- a/R/redcap.R +++ b/R/redcap.R @@ -87,12 +87,12 @@ get_redcap_db_connection <- function() { get_redcap_emails <- function(conn) { wide <- dplyr::tbl(conn, "redcap_user_information") %>% dplyr::select( - .data$ui_id, - .data$username, - .data$user_suspended_time, - .data$user_email, - .data$user_email2, - .data$user_email3 + "ui_id", + "username", + "user_suspended_time", + "user_email", + "user_email2", + "user_email3" ) %>% dplyr::collect() %>% dplyr::mutate(user_suspended_time = as.POSIXct(.data$user_suspended_time)) @@ -137,7 +137,7 @@ get_redcap_emails <- function(conn) { #' } get_redcap_email_revisions <- function(bad_redcap_user_emails, person) { person_data_for_redcap_users_with_bad_emails <- person %>% - dplyr::select(.data$user_id, .data$email) %>% + dplyr::select("user_id", "email") %>% dplyr::filter(.data$user_id %in% bad_redcap_user_emails$username) replacement_email_addresses_for_bad_redcap_emails <- bad_redcap_user_emails %>% @@ -146,10 +146,10 @@ get_redcap_email_revisions <- function(bad_redcap_user_emails, person) { dplyr::filter(!is.na(.data$email.replacement)) %>% dplyr::mutate(corrected_email = .data$email.replacement) %>% dplyr::select( - .data$ui_id, - .data$username, - .data$email_field_name, - .data$corrected_email + "ui_id", + "username", + "email_field_name", + "corrected_email" ) redcap_email_revisions <- replacement_email_addresses_for_bad_redcap_emails %>% @@ -206,7 +206,7 @@ update_redcap_email_addresses <- function(conn, for (email_field in email_fields) { wide_revisions <- redcap_email_revisions %>% - dplyr::select(-.data$email) %>% + dplyr::select(-"email") %>% dplyr::filter(.data$email_field_name == email_field) %>% tidyr::pivot_wider( names_from = "email_field_name", @@ -255,10 +255,10 @@ suspend_users_with_no_primary_email <- function(conn) { user_comments = paste("Account suspended on", get_script_run_time(), "due to no valid email address") ) %>% dplyr::select( - .data$ui_id, - .data$username, - .data$user_suspended_time, - .data$user_comments + "ui_id", + "username", + "user_suspended_time", + "user_comments" ) result <- sync_table_2( diff --git a/R/summary_metrics.R b/R/summary_metrics.R index c84b572..786a6a6 100644 --- a/R/summary_metrics.R +++ b/R/summary_metrics.R @@ -36,13 +36,13 @@ write_summary_metrics <- function(reporting_period_start, script_run_time = get_script_run_time() ) %>% dplyr::select( - reporting_period_start, - reporting_period_end, - .data$key, - .data$value, - .data$metric_type, - .data$script_name, - .data$script_run_time + "reporting_period_start", + "reporting_period_end", + "key", + "value", + "metric_type", + "script_name", + "script_run_time" ) log_conn <- get_package_scope_var("log_con") diff --git a/R/user_rights.R b/R/user_rights.R index 75db69e..408b81d 100644 --- a/R/user_rights.R +++ b/R/user_rights.R @@ -69,7 +69,7 @@ expire_user_project_rights <- function(conn, update_records <- users_to_expire %>% dplyr::mutate(expiration = expiration_date) %>% - dplyr::select(.data$project_id, .data$username, .data$expiration) + dplyr::select("project_id", "username", "expiration") diff_data <- list(update_records = update_records) From 5553716d0e1d49b15929e2d681f7ef0ed9ac538e Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Thu, 16 Feb 2023 12:14:07 -0500 Subject: [PATCH 08/14] Update randomization functions to pass Build Check --- DESCRIPTION | 39 +-- NAMESPACE | 6 + R/randomization.R | 301 ++++++++++++++---- etl/copy_allocated_randomization.R | 3 +- man/create_allocation_rows.Rd | 44 +++ man/create_randomization_row.Rd | 44 +++ ...n_a_preconfigured_project_in_production.Rd | 33 ++ man/export_allocation_tables_from_project.Rd | 31 ++ man/update_production_allocation_state.Rd | 44 +++ man/write_allocations.Rd | 36 +++ 10 files changed, 494 insertions(+), 87 deletions(-) create mode 100644 man/create_allocation_rows.Rd create mode 100644 man/create_randomization_row.Rd create mode 100644 man/enable_randomization_on_a_preconfigured_project_in_production.Rd create mode 100644 man/export_allocation_tables_from_project.Rd create mode 100644 man/update_production_allocation_state.Rd create mode 100644 man/write_allocations.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2ad6f7c..08f7cea 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,25 +34,26 @@ License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true Imports: - DBI, - dbx, - RMariaDB, - REDCapR, - dplyr, - glue, - lubridate, - magrittr, - mRpostman, - purrr, - rjson, - rlang, - rstudioapi, - sendmailR, - stringr, - tibble, - tidyr, - readr, - vctrs + DBI, + dbx, + RMariaDB, + REDCapR, + dplyr, + glue, + lubridate, + magrittr, + mRpostman, + purrr, + rjson, + rlang, + rstudioapi, + sendmailR, + stringr, + tibble, + tidyr, + readr, + vctrs, + here Suggests: testthat (>= 3.0.0), digest, diff --git a/NAMESPACE b/NAMESPACE index ca97be0..aca82ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,11 +4,15 @@ export(connect_to_db) export(connect_to_log_db) export(connect_to_redcap_db) export(convert_schema_to_sqlite) +export(create_allocation_rows) +export(create_randomization_row) export(create_test_table) export(create_test_tables) export(dataset_diff) export(disable_non_interactive_quit) +export(enable_randomization_on_a_preconfigured_project_in_production) export(expire_user_project_rights) +export(export_allocation_tables_from_project) export(get_bad_emails_from_individual_emails) export(get_bad_emails_from_listserv_digest) export(get_current_time) @@ -41,7 +45,9 @@ export(suspend_users_with_no_primary_email) export(sync_metadata) export(sync_table) export(sync_table_2) +export(update_production_allocation_state) export(update_redcap_email_addresses) +export(write_allocations) export(write_error_log_entry) export(write_info_log_entry) export(write_summary_metrics) diff --git a/R/randomization.R b/R/randomization.R index 950d650..2cfe515 100644 --- a/R/randomization.R +++ b/R/randomization.R @@ -1,3 +1,25 @@ +#' export_allocation_tables_from_project +#' +#' Export randomization allocation data for a project from the REDCap +#' randomization tables but in a form that reflects the allocation tables +#' REDCap requests for import +#' +#' @param conn - a DBI connection object pointing at a REDCap +#' database that houses the project on interest +#' @param project_id_to_export - The project ID of a REDCap project that +#' contains randomization to be exported. +#' +#' @return a dataframe in the shape of REDCap randomization table CSVs +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' allocations <- export_allocation_tables_from_project( +#' conn = source_conn, +#' project_id_to_export = source_project_id +#' ) +#' } export_allocation_tables_from_project <- function(conn, project_id_to_export) { # Get column names from randomization_source @@ -5,37 +27,37 @@ export_allocation_tables_from_project <- function(conn, # source_fieldN and source_eventN describe the randomization variables # Pivot the data longer to prep it for renaming the strata fields in Allocations column_names_in_source <- dplyr::tbl(conn, "redcap_randomization") |> - dplyr::filter(project_id == project_id_to_export) |> + dplyr::filter(.data$project_id == project_id_to_export) |> dplyr::collect() |> - dplyr::select(target_field, starts_with("source_field")) |> + dplyr::select("target_field", dplyr::starts_with("source_field")) |> tidyr::pivot_longer( - cols = contains("field"), + cols = dplyr::contains("field"), names_to = "strata", values_to = "redcap_field_name" ) |> - dplyr::filter(!is.na(redcap_field_name)) + dplyr::filter(!is.na(.data$redcap_field_name)) rid_to_export <- dplyr::tbl(conn, "redcap_randomization") |> - dplyr::filter(project_id == !!project_id_to_export) |> + dplyr::filter(.data$project_id == !!project_id_to_export) |> dplyr::collect() |> - dplyr::pull(rid) + dplyr::pull(.data$rid) # Allocation data is in allocation_source allocations <- dplyr::tbl(conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == rid_to_export) |> + dplyr::filter(.data$rid == rid_to_export) |> dplyr::collect() |> - dplyr::select(aid, project_status, target_field, starts_with("source_field")) |> + dplyr::select("aid", "project_status", "target_field", dplyr::starts_with("source_field")) |> # Pivot longer to facilitate renaming the abstract field names to redcap field names tidyr::pivot_longer( - cols = contains("field"), + cols = dplyr::contains("field"), names_to = "strata", values_to = "value" ) |> - dplyr::filter(!is.na(value)) |> + dplyr::filter(!is.na(.data$value)) |> # dplyr::rename the *field* columns dplyr::inner_join(column_names_in_source, by = "strata") |> - dplyr::select(-strata) |> + dplyr::select(-"strata") |> tidyr::pivot_wider( id_cols = c("aid", "project_status"), names_from = "redcap_field_name", @@ -45,54 +67,115 @@ export_allocation_tables_from_project <- function(conn, return(allocations) } - -# Write the allocation tables +#' write_allocations +#' +#' Write the development or production randomization allocation table in +#' the same form in which it was loaded. +#' +#' @param project_status_to_write - the value of project_status to export. +#' Use 0 for development. Use 1 for Production +#' @param allocations - the dataframe of randomization allocation data as +#' exported by `export_allocation_tables_from_project` +#' @param target_directory - the directory into which the function should write the files +#' +#' @return the full path to the allocations file +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' # get and print importable allocations if we need them for reference +#' allocations <- export_allocation_tables_from_project( +#' conn = source_conn, +#' project_id_to_export = source_project_id +#' ) +#' +#' # write both files +#' walk(c(0,1), write_allocations, allocations, "output") +#' } write_allocations <- function(project_status_to_write, allocations, target_directory = ".") { base_name <- "RandomizationAllocation" date_time_stamp <- format(get_script_run_time(), "%Y%m%d%H%M%S") - project_statuses <- setNames(c(0, 1), c("development", "production")) + project_statuses <- stats::setNames(c(0, 1), c("development", "production")) if (!fs::dir_exists(here::here(target_directory))) { fs::dir_create(here::here(target_directory)) } + filename <- here::here( + target_directory, + paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_") + ) + allocations |> - dplyr::filter(project_status == project_status_to_write) |> - dplyr::select(-aid, -project_status) |> - readr::write_csv(here::here(target_directory, paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_"))) + dplyr::filter(.data$project_status == project_status_to_write) |> + dplyr::select(-"aid", -"project_status") |> + readr::write_csv(filename) + + return(filename) } +#' create_randomization_row +#' +#' Create a single row in the redcap_randomization table that mirrors +#' that in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return - A dataframe containing the current randomization row for the +#' target project. +#' +#' @return +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_randomization_state <- create_randomization_row( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_project_id = target_project_id +#' ) +#' } create_randomization_row <- function(source_conn, target_conn, source_project_id, target_project_id) { # get the current state target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> - dplyr::filter(project_id == target_project_id) |> + dplyr::filter(.data$project_id == target_project_id) |> dplyr::collect() # create row in redcap_randomization on target if there is no current state if (nrow(target_project_randomization_state) == 0) { # get replacement event_ids source_event_ids <- dplyr::tbl(source_conn, "redcap_events_arms") |> - dplyr::filter(project_id == source_project_id) |> + dplyr::filter(.data$project_id == source_project_id) |> dplyr::inner_join(dplyr::tbl(source_conn, "redcap_events_metadata"), by = "arm_id") |> dplyr::collect() target_event_ids <- dplyr::tbl(target_conn, "redcap_events_arms") |> - dplyr::filter(project_id == target_project_id) |> + dplyr::filter(.data$project_id == target_project_id) |> dplyr::inner_join(dplyr::tbl(target_conn, "redcap_events_metadata"), by = "arm_id") |> dplyr::collect() max_rid_target <- dplyr::tbl(target_conn, "redcap_randomization") |> - dplyr::arrange(dplyr::desc(rid)) |> - head(n = 1) |> + dplyr::arrange(dplyr::desc(.data$rid)) |> + utils::head(n = 1) |> dplyr::collect() |> - dplyr::pull(rid) + dplyr::pull(.data$rid) - new_randomization_target_data <- randomization_source |> - dplyr::filter(project_id == source_project_id) |> + new_randomization_target_data <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == source_project_id) |> dplyr::collect() |> # Replace the easy stuff dplyr::mutate( @@ -102,30 +185,30 @@ create_randomization_row <- function(source_conn, # Pivot longer so that we can replace each event_id with the # corresponding event ID for the target project. tidyr::pivot_longer( - cols = contains("field"), + cols = dplyr::contains("field"), names_to = "field_label", values_to = "field_value", values_drop_na = T ) |> tidyr::pivot_longer( - cols = contains("event"), + cols = dplyr::contains("event"), names_to = "event_label", values_to = "event_value", values_drop_na = T ) |> # Replace the event_id by aligning the Event Description - dplyr::inner_join(source_event_ids |> dplyr::select(event_id, descrip), by = c("event_value" = "event_id")) |> - dplyr::inner_join(target_event_ids |> dplyr::select(event_id, descrip), by = "descrip") |> - dplyr::select(-event_value, descrip) |> - dplyr::rename(event_value = event_id) |> + dplyr::inner_join(source_event_ids |> dplyr::select("event_id", "descrip"), by = c("event_value" = "event_id")) |> + dplyr::inner_join(target_event_ids |> dplyr::select("event_id", "descrip"), by = "descrip") |> + dplyr::select(-"event_value", "descrip") |> + dplyr::rename(event_value = .data$event_id) |> # pivot wider to restore the original shape of the data tidyr::pivot_wider( - id_cols = c(rid, project_id, stratified, group_by, field_label, field_value), + id_cols = c("rid", "project_id", "stratified", "group_by", "field_label", "field_value"), names_from = "event_label", values_from = "event_value" ) |> tidyr::pivot_wider( - id_cols = c(rid, project_id, stratified, group_by, target_event, source_event1, source_event2), + id_cols = c("rid", "project_id", "stratified", "group_by", "target_event", "source_event1", "source_event2"), names_from = "field_label", values_from = "field_value" ) @@ -143,36 +226,70 @@ create_randomization_row <- function(source_conn, } +#' create_allocation_rows +#' +#' Create rows in the redcap_randomization_allocation table that mirror +#' those in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return - A dataframe containing the current allocation rows for the +#' target project. +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_allocation_state <- create_allocation_rows( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_project_id = target_project_id +#' ) +#' } create_allocation_rows <- function(source_conn, target_conn, - source_project_id) { - # create row in redcap_randomization on target if needed + source_project_id, + target_project_id) { + # get the current state + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::collect() + + # create row in redcap_randomization on target if needed target_project_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == !!target_project_randomization_state$rid) |> + dplyr::filter(.data$rid == !!target_project_randomization_state$rid) |> dplyr::collect() if (!nrow(target_project_allocation_state) == 0) { - message(paste("Allocation records exist for target project with ID", project_id_target, "Not writing allocation records")) + message(paste("Allocation records exist for target project with ID", target_project_id, "Not writing allocation records")) result <- 0 } else { max_aid_target <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> - dplyr::arrange(dplyr::desc(aid)) |> - head(n = 1) |> + dplyr::arrange(dplyr::desc(.data$aid)) |> + utils::head(n = 1) |> dplyr::collect() |> - dplyr::pull(aid) + dplyr::pull(.data$aid) rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> - dplyr::filter(project_id == !!source_project_id) |> + dplyr::filter(.data$project_id == !!source_project_id) |> dplyr::collect() |> - dplyr::pull(rid) + dplyr::pull(.data$rid) new_allocation_target_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == rid_source) |> - dplyr::arrange(aid) |> + dplyr::filter(.data$rid == rid_source) |> + dplyr::arrange(.data$aid) |> dplyr::collect() |> dplyr::mutate( rid = target_project_randomization_state$rid, - aid = max_aid_target + row_number() + aid = max_aid_target + dplyr::row_number() ) # Write the new allocation data to the target @@ -187,41 +304,70 @@ create_allocation_rows <- function(source_conn, return(target_project_allocation_state) } + +#' update_production_allocation_state +#' +#' Update producition rows in the redcap_randomization_allocation table to +#' mirror those in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_rid - The randomization id of the REDCap project that +#' will receive the updated randomization data. +#' +#' @return - The list output of sync_table_2 from the update of the +#' randomization allocation table. +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_allocation_update <- update_production_allocation_state( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_rid = target_project_randomization_state$rid +#' ) +#' } update_production_allocation_state <- function(source_conn, target_conn, source_project_id, target_rid) { rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> - dplyr::filter(project_id == !!source_project_id) |> + dplyr::filter(.data$project_id == !!source_project_id) |> dplyr::collect() |> - dplyr::pull(rid) + dplyr::pull(.data$rid) # get the source's production allocation data, but control the order and add an alignment column source_allocation_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == rid_source) |> - dplyr::filter(project_status == 1) |> - dplyr::arrange(dplyr::desc(aid)) |> + dplyr::filter(.data$rid == rid_source) |> + dplyr::filter(.data$project_status == 1) |> + dplyr::arrange(dplyr::desc(.data$aid)) |> dplyr::collect() |> dplyr::mutate( - aid.alignment = aid - min(aid) + aid.alignment = .data$aid - min(.data$aid) ) # get the target's production allocation data, but control the order and add an alignment column target_allocation_data <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == target_rid) |> - dplyr::filter(project_status == 1) |> - dplyr::arrange(dplyr::desc(aid)) |> + dplyr::filter(.data$rid == target_rid) |> + dplyr::filter(.data$project_status == 1) |> + dplyr::arrange(dplyr::desc(.data$aid)) |> dplyr::collect() |> dplyr::mutate( - aid.alignment = aid - min(aid) + aid.alignment = .data$aid - min(.data$aid) ) # Make the update dataset by replacing the RID and AID columns in the source data target_allocation_update <- source_allocation_data |> - dplyr::filter(!is.na(is_used_by)) |> - dplyr::select(-aid, -rid) |> - dplyr::inner_join(target_allocation_data |> dplyr::select(aid, rid, aid.alignment), by = "aid.alignment") |> - dplyr::select(-aid.alignment) + dplyr::filter(!is.na(.data$is_used_by)) |> + dplyr::select(-"aid", -"rid") |> + dplyr::inner_join(target_allocation_data |> dplyr::select("aid", "rid", "aid.alignment"), by = "aid.alignment") |> + dplyr::select(-"aid.alignment") # Write updates to target allocation data sync_result <- sync_table_2( @@ -229,7 +375,7 @@ update_production_allocation_state <- function(source_conn, table_name = "redcap_randomization_allocation", source = target_allocation_update, source_pk = "aid", - target = target_allocation_data |> dplyr::select(-aid.alignment), + target = target_allocation_data |> dplyr::select(-"aid.alignment"), target_pk = "aid", update = T, insert = F, @@ -240,6 +386,27 @@ update_production_allocation_state <- function(source_conn, } +#' enable_randomization_on_a_preconfigured_project_in_production +#' +#' Turn on randomization in the target project but only if it has already +#' been moved to production and randomization has been configured. +#' +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return A logical indicating success or failure of the operation +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' enable_randomization_on_a_preconfigured_project_in_production( +#' target_conn = target_conn, +#' target_project_id = target_project_id +#' ) +#' } enable_randomization_on_a_preconfigured_project_in_production <- function(target_conn, target_project_id) { # Turn on randomization in the target project but only if @@ -247,18 +414,18 @@ enable_randomization_on_a_preconfigured_project_in_production <- function(target # 2) randomization has been configured # get the state of the project - target_project_state <- dplyr::tbl(rc_conn_target, "redcap_projects") |> - dplyr::filter(project_id == project_id_target) |> - dplyr::select(project_id, randomization, status, production_time) |> + target_project_state <- dplyr::tbl(target_conn, "redcap_projects") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::select("project_id", "randomization", "status", "production_time") |> dplyr::collect() target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> - dplyr::filter(project_id == target_project_id) |> + dplyr::filter(.data$project_id == target_project_id) |> dplyr::collect() target_project_production_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> - dplyr::filter(rid == !!target_project_randomization_state$rid) |> - dplyr::filter(project_status == 1) |> + dplyr::filter(.data$rid == !!target_project_randomization_state$rid) |> + dplyr::filter(.data$project_status == 1) |> dplyr::collect() if (target_project_state$randomization == 0 & @@ -267,7 +434,7 @@ enable_randomization_on_a_preconfigured_project_in_production <- function(target nrow(target_project_production_allocation_state) > 0 ) { sync_table_2( - conn = rc_conn_target, + conn = target_conn, table_name = "redcap_projects", source = target_project_state |> dplyr::mutate(randomization = 1), source_pk = "project_id", diff --git a/etl/copy_allocated_randomization.R b/etl/copy_allocated_randomization.R index a1ca5cd..e9fc783 100644 --- a/etl/copy_allocated_randomization.R +++ b/etl/copy_allocated_randomization.R @@ -31,7 +31,8 @@ target_project_randomization_state <- create_randomization_row( target_project_allocation_state <- create_allocation_rows( source_conn = source_conn, target_conn = target_conn, - source_project_id = source_project_id + source_project_id = source_project_id, + target_project_id = target_project_id ) # Update randomization on the target project diff --git a/man/create_allocation_rows.Rd b/man/create_allocation_rows.Rd new file mode 100644 index 0000000..2b4bc63 --- /dev/null +++ b/man/create_allocation_rows.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{create_allocation_rows} +\alias{create_allocation_rows} +\title{create_allocation_rows} +\usage{ +create_allocation_rows( + source_conn, + target_conn, + source_project_id, + target_project_id +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +- A dataframe containing the current allocation rows for the + target project. +} +\description{ +Create rows in the redcap_randomization_allocation table that mirror +those in another project. +} +\examples{ +\dontrun{ +target_project_allocation_state <- create_allocation_rows( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) +} +} diff --git a/man/create_randomization_row.Rd b/man/create_randomization_row.Rd new file mode 100644 index 0000000..d32da0d --- /dev/null +++ b/man/create_randomization_row.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{create_randomization_row} +\alias{create_randomization_row} +\title{create_randomization_row} +\usage{ +create_randomization_row( + source_conn, + target_conn, + source_project_id, + target_project_id +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +- A dataframe containing the current randomization row for the + target project. +} +\description{ +Create a single row in the redcap_randomization table that mirrors +that in another project. +} +\examples{ +\dontrun{ +target_project_randomization_state <- create_randomization_row( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) +} +} diff --git a/man/enable_randomization_on_a_preconfigured_project_in_production.Rd b/man/enable_randomization_on_a_preconfigured_project_in_production.Rd new file mode 100644 index 0000000..b657075 --- /dev/null +++ b/man/enable_randomization_on_a_preconfigured_project_in_production.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{enable_randomization_on_a_preconfigured_project_in_production} +\alias{enable_randomization_on_a_preconfigured_project_in_production} +\title{enable_randomization_on_a_preconfigured_project_in_production} +\usage{ +enable_randomization_on_a_preconfigured_project_in_production( + target_conn, + target_project_id +) +} +\arguments{ +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +A logical indicating success or failure of the operation +} +\description{ +Turn on randomization in the target project but only if it has already + been moved to production and randomization has been configured. +} +\examples{ +\dontrun{ +enable_randomization_on_a_preconfigured_project_in_production( + target_conn = target_conn, + target_project_id = target_project_id +) +} +} diff --git a/man/export_allocation_tables_from_project.Rd b/man/export_allocation_tables_from_project.Rd new file mode 100644 index 0000000..5724836 --- /dev/null +++ b/man/export_allocation_tables_from_project.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{export_allocation_tables_from_project} +\alias{export_allocation_tables_from_project} +\title{export_allocation_tables_from_project} +\usage{ +export_allocation_tables_from_project(conn, project_id_to_export) +} +\arguments{ +\item{conn}{- a DBI connection object pointing at a REDCap +database that houses the project on interest} + +\item{project_id_to_export}{- The project ID of a REDCap project that +contains randomization to be exported.} +} +\value{ +a dataframe in the shape of REDCap randomization table CSVs +} +\description{ +Export randomization allocation data for a project from the REDCap + randomization tables but in a form that reflects the allocation tables + REDCap requests for import +} +\examples{ +\dontrun{ +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) +} +} diff --git a/man/update_production_allocation_state.Rd b/man/update_production_allocation_state.Rd new file mode 100644 index 0000000..d382f85 --- /dev/null +++ b/man/update_production_allocation_state.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{update_production_allocation_state} +\alias{update_production_allocation_state} +\title{update_production_allocation_state} +\usage{ +update_production_allocation_state( + source_conn, + target_conn, + source_project_id, + target_rid +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_rid}{- The randomization id of the REDCap project that +will receive the updated randomization data.} +} +\value{ +- The list output of sync_table_2 from the update of the + randomization allocation table. +} +\description{ +Update producition rows in the redcap_randomization_allocation table to + mirror those in another project. +} +\examples{ +\dontrun{ +target_project_allocation_update <- update_production_allocation_state( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid +) +} +} diff --git a/man/write_allocations.Rd b/man/write_allocations.Rd new file mode 100644 index 0000000..02c46d3 --- /dev/null +++ b/man/write_allocations.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{write_allocations} +\alias{write_allocations} +\title{write_allocations} +\usage{ +write_allocations(project_status_to_write, allocations, target_directory = ".") +} +\arguments{ +\item{project_status_to_write}{- the value of project_status to export. +Use 0 for development. Use 1 for Production} + +\item{allocations}{- the dataframe of randomization allocation data as +exported by `export_allocation_tables_from_project`} + +\item{target_directory}{- the directory into which the function should write the files} +} +\value{ +the full path to the allocations file +} +\description{ +Write the development or production randomization allocation table in + the same form in which it was loaded. +} +\examples{ +\dontrun{ +# get and print importable allocations if we need them for reference +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) + +# write both files +walk(c(0,1), write_allocations, allocations, "output") +} +} From 88e5fd1eb1a74c9a2aa0c3bc5a086db7fe3c87f4 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Thu, 16 Feb 2023 22:34:47 -0500 Subject: [PATCH 09/14] Add tests for randomization.R functions --- R/randomization.R | 8 +- tests/testthat/helper.R | 19 +++ tests/testthat/randomization/.gitignore | 1 + .../randomization/create_allocation_rows.csv | 81 +++++++++ .../create_randomization_row.csv | 2 + .../export_allocation_tables_from_project.csv | 81 +++++++++ tests/testthat/randomization/make_test_data.R | 160 ++++++++++++++++++ .../randomization/redcap_events_arms.csv | 3 + .../randomization/redcap_events_metadata.csv | 5 + .../randomization/redcap_randomization.csv | 2 + .../redcap_randomization_allocation.csv | 81 +++++++++ tests/testthat/test-randomization.R | 106 ++++++++++++ 12 files changed, 546 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/helper.R create mode 100644 tests/testthat/randomization/.gitignore create mode 100644 tests/testthat/randomization/create_allocation_rows.csv create mode 100644 tests/testthat/randomization/create_randomization_row.csv create mode 100644 tests/testthat/randomization/export_allocation_tables_from_project.csv create mode 100644 tests/testthat/randomization/make_test_data.R create mode 100644 tests/testthat/randomization/redcap_events_arms.csv create mode 100644 tests/testthat/randomization/redcap_events_metadata.csv create mode 100644 tests/testthat/randomization/redcap_randomization.csv create mode 100644 tests/testthat/randomization/redcap_randomization_allocation.csv create mode 100644 tests/testthat/test-randomization.R diff --git a/R/randomization.R b/R/randomization.R index 2cfe515..09fc859 100644 --- a/R/randomization.R +++ b/R/randomization.R @@ -104,7 +104,10 @@ write_allocations <- function(project_status_to_write, allocations, target_direc filename <- here::here( target_directory, - paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_") + paste0( + paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_"), + ".csv" + ) ) allocations |> @@ -133,7 +136,6 @@ write_allocations <- function(project_status_to_write, allocations, target_direc #' @return - A dataframe containing the current randomization row for the #' target project. #' -#' @return #' @export #' @importFrom rlang .data #' @@ -263,7 +265,7 @@ create_allocation_rows <- function(source_conn, dplyr::filter(.data$project_id == target_project_id) |> dplyr::collect() - # create row in redcap_randomization on target if needed + # create row in redcap_randomization on target if needed target_project_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> dplyr::filter(.data$rid == !!target_project_randomization_state$rid) |> dplyr::collect() diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..a4c56d6 --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,19 @@ +randomization_test_tables <- c( + "redcap_randomization", + "redcap_randomization_allocation", + "redcap_events_arms", + "redcap_events_metadata" +) + +create_a_table_from_test_data <- function(table_name, conn, directory_under_test_path) { + readr::read_csv(testthat::test_path(directory_under_test_path, paste0(table_name, ".csv"))) %>% + DBI::dbWriteTable(conn = conn, name = table_name, value = .) +} + +fix_randomization_tables <- function(conn) { + # fix target_field in redcap_randomization_allocation + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation RENAME COLUMN target_field TO tf") + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation ADD COLUMN target_field TEXT") + DBI::dbExecute(conn, "UPDATE redcap_randomization_allocation SET target_field = CAST(tf as INTEGER)") + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation DROP COLUMN tf") +} diff --git a/tests/testthat/randomization/.gitignore b/tests/testthat/randomization/.gitignore new file mode 100644 index 0000000..09ebfa6 --- /dev/null +++ b/tests/testthat/randomization/.gitignore @@ -0,0 +1 @@ +!*.csv diff --git a/tests/testthat/randomization/create_allocation_rows.csv b/tests/testthat/randomization/create_allocation_rows.csv new file mode 100644 index 0000000..aab51b4 --- /dev/null +++ b/tests/testthat/randomization/create_allocation_rows.csv @@ -0,0 +1,81 @@ +aid,rid,project_status,is_used_by,group_id,source_field1,source_field2,target_field +121,4,0,NA,NA,f,fl,0 +122,4,0,NA,NA,f,fl,1 +123,4,0,NA,NA,m,fl,0 +124,4,0,NA,NA,m,fl,1 +125,4,0,NA,NA,f,ga,0 +126,4,0,NA,NA,f,ga,1 +127,4,0,NA,NA,m,ga,0 +128,4,0,NA,NA,m,ga,1 +129,4,0,NA,NA,f,fl,0 +130,4,0,NA,NA,f,fl,1 +131,4,0,NA,NA,m,fl,0 +132,4,0,NA,NA,m,fl,1 +133,4,0,NA,NA,f,ga,0 +134,4,0,NA,NA,f,ga,1 +135,4,0,NA,NA,m,ga,0 +136,4,0,NA,NA,m,ga,1 +137,4,0,NA,NA,f,fl,0 +138,4,0,NA,NA,f,fl,1 +139,4,0,NA,NA,m,fl,0 +140,4,0,NA,NA,m,fl,1 +141,4,0,NA,NA,f,ga,0 +142,4,0,NA,NA,f,ga,1 +143,4,0,NA,NA,m,ga,0 +144,4,0,NA,NA,m,ga,1 +145,4,0,NA,NA,f,fl,0 +146,4,0,NA,NA,f,fl,1 +147,4,0,NA,NA,m,fl,0 +148,4,0,NA,NA,m,fl,1 +149,4,0,NA,NA,f,ga,0 +150,4,0,NA,NA,f,ga,1 +151,4,0,NA,NA,m,ga,0 +152,4,0,NA,NA,m,ga,1 +153,4,0,NA,NA,f,fl,0 +154,4,0,NA,NA,f,fl,1 +155,4,0,NA,NA,m,fl,0 +156,4,0,NA,NA,m,fl,1 +157,4,0,NA,NA,f,ga,0 +158,4,0,NA,NA,f,ga,1 +159,4,0,NA,NA,m,ga,0 +160,4,0,NA,NA,m,ga,1 +161,4,1,NA,NA,f,fl,1 +162,4,1,NA,NA,f,fl,0 +163,4,1,2,NA,m,fl,1 +164,4,1,3,NA,m,fl,0 +165,4,1,1,NA,f,ga,1 +166,4,1,NA,NA,f,ga,0 +167,4,1,NA,NA,m,ga,1 +168,4,1,NA,NA,m,ga,0 +169,4,1,NA,NA,f,fl,1 +170,4,1,NA,NA,f,fl,0 +171,4,1,NA,NA,m,fl,1 +172,4,1,NA,NA,m,fl,0 +173,4,1,NA,NA,f,ga,1 +174,4,1,NA,NA,f,ga,0 +175,4,1,NA,NA,m,ga,1 +176,4,1,NA,NA,m,ga,0 +177,4,1,NA,NA,f,fl,1 +178,4,1,NA,NA,f,fl,0 +179,4,1,NA,NA,m,fl,1 +180,4,1,NA,NA,m,fl,0 +181,4,1,NA,NA,f,ga,1 +182,4,1,NA,NA,f,ga,0 +183,4,1,NA,NA,m,ga,1 +184,4,1,NA,NA,m,ga,0 +185,4,1,NA,NA,f,fl,1 +186,4,1,NA,NA,f,fl,0 +187,4,1,NA,NA,m,fl,1 +188,4,1,NA,NA,m,fl,0 +189,4,1,NA,NA,f,ga,1 +190,4,1,NA,NA,f,ga,0 +191,4,1,NA,NA,m,ga,1 +192,4,1,NA,NA,m,ga,0 +193,4,1,NA,NA,f,fl,1 +194,4,1,NA,NA,f,fl,0 +195,4,1,NA,NA,m,fl,1 +196,4,1,NA,NA,m,fl,0 +197,4,1,NA,NA,f,ga,1 +198,4,1,NA,NA,f,ga,0 +199,4,1,NA,NA,m,ga,1 +200,4,1,NA,NA,m,ga,0 diff --git a/tests/testthat/randomization/create_randomization_row.csv b/tests/testthat/randomization/create_randomization_row.csv new file mode 100644 index 0000000..9fdc3c4 --- /dev/null +++ b/tests/testthat/randomization/create_randomization_row.csv @@ -0,0 +1,2 @@ +rid,project_id,stratified,group_by,target_event,source_event1,source_event2,target_field,source_field1,source_field2 +4,27,1,NA,87,87,87,randomization,sex,birthplace diff --git a/tests/testthat/randomization/export_allocation_tables_from_project.csv b/tests/testthat/randomization/export_allocation_tables_from_project.csv new file mode 100644 index 0000000..13fa8f6 --- /dev/null +++ b/tests/testthat/randomization/export_allocation_tables_from_project.csv @@ -0,0 +1,81 @@ +aid,project_status,randomization,sex,birthplace +1,0,0,f,fl +2,0,1,f,fl +3,0,0,m,fl +4,0,1,m,fl +5,0,0,f,ga +6,0,1,f,ga +7,0,0,m,ga +8,0,1,m,ga +9,0,0,f,fl +10,0,1,f,fl +11,0,0,m,fl +12,0,1,m,fl +13,0,0,f,ga +14,0,1,f,ga +15,0,0,m,ga +16,0,1,m,ga +17,0,0,f,fl +18,0,1,f,fl +19,0,0,m,fl +20,0,1,m,fl +21,0,0,f,ga +22,0,1,f,ga +23,0,0,m,ga +24,0,1,m,ga +25,0,0,f,fl +26,0,1,f,fl +27,0,0,m,fl +28,0,1,m,fl +29,0,0,f,ga +30,0,1,f,ga +31,0,0,m,ga +32,0,1,m,ga +33,0,0,f,fl +34,0,1,f,fl +35,0,0,m,fl +36,0,1,m,fl +37,0,0,f,ga +38,0,1,f,ga +39,0,0,m,ga +40,0,1,m,ga +81,1,1,f,fl +82,1,0,f,fl +86,1,0,f,ga +87,1,1,m,ga +88,1,0,m,ga +89,1,1,f,fl +90,1,0,f,fl +91,1,1,m,fl +92,1,0,m,fl +93,1,1,f,ga +94,1,0,f,ga +95,1,1,m,ga +96,1,0,m,ga +97,1,1,f,fl +98,1,0,f,fl +99,1,1,m,fl +100,1,0,m,fl +101,1,1,f,ga +102,1,0,f,ga +103,1,1,m,ga +104,1,0,m,ga +105,1,1,f,fl +106,1,0,f,fl +107,1,1,m,fl +108,1,0,m,fl +109,1,1,f,ga +110,1,0,f,ga +111,1,1,m,ga +112,1,0,m,ga +113,1,1,f,fl +114,1,0,f,fl +115,1,1,m,fl +116,1,0,m,fl +117,1,1,f,ga +118,1,0,f,ga +119,1,1,m,ga +120,1,0,m,ga +85,1,1,f,ga +83,1,1,m,fl +84,1,0,m,fl diff --git a/tests/testthat/randomization/make_test_data.R b/tests/testthat/randomization/make_test_data.R new file mode 100644 index 0000000..735b602 --- /dev/null +++ b/tests/testthat/randomization/make_test_data.R @@ -0,0 +1,160 @@ +library(redcapcustodian) +library(DBI) +library(tidyverse) +library(lubridate) +library(dotenv) + +# randomization/make_test_data.R +# This script is designed to extract the REDCap tables for two test projects +# on the same redcap system to test the randomization management functions. +# Should you need to regenerate the test data, follow the procedure here. +# +# Note: these randomization management tools do not support DAG group_ids. +# They could, but they don't as they were not needed for the project that +# inspired these tools. Do not try to use these on a project that uses +# DAGs in the randomization configuration. +# +# Create a .env file according to the specs of +# redcapcustodian::connect_to_redcap_db with credentials. Save it at the root +# of this R Project. +# +# The first project--the source project--should be a small project with just +# a few categorical variables. Randomization should be enabled. Two or more +# strata should be configured.The allocation tables for development and +# production should be generated and uploaded. The project should be moved +# to production and randomized. Do a full XML export of this project and note +# its project ID. Replace the value of project_id_to_read below with this new +# project ID. +# +# Create the second project as an XML import of the source project. Make sure +# randomization is turned off. Note this new project ID. Replace the value of +# target_project below with this new project ID. +# +# With these changes in place, you can run +conn <- connect_to_redcap_db() + +project_id_to_read <- 18 +target_project <- 27 + +# Create a one-project redcap_randomization with no columns with NA field name or event id. +# We need this form so that the field types are correct when read back from csv and then +# when pushed into a DBI-managed table. +redcap_randomization <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(project_id == project_id_to_read) |> + dplyr::collect() |> + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + tidyr::pivot_longer( + cols = dplyr::contains("event"), + names_to = "event_label", + values_to = "event_value", + values_drop_na = T + ) |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "field_label", "field_value"), + names_from = "event_label", + values_from = "event_value" + ) |> + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "target_event", "source_event1", "source_event2"), + names_from = "field_label", + values_from = "field_value" + ) + +rid_to_read <- redcap_randomization |> + dplyr::pull(rid) + +# Create a one-rid redcap_randomization_allocation with no columns with NA field name +# We need this form so that the field types are correct when read back from csv and then +# when pushed into a DBI-managed table. +redcap_randomization_allocation <- dplyr::tbl(conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == rid_to_read) |> + collect() |> + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c("aid", "rid", "project_status", "is_used_by", "group_id"), + names_from = "field_label", + values_from = "field_value" + ) + +redcap_events_arms <- dplyr::tbl(conn, "redcap_events_arms") |> + filter(project_id %in% c(project_id_to_read, target_project)) |> + collect() + +redcap_events_metadata <- dplyr::tbl(conn, "redcap_events_metadata") |> + filter(arm_id %in% !!redcap_events_arms$arm_id) |> + collect() + +test_tables <- c( + "redcap_randomization", + "redcap_randomization_allocation", + "redcap_events_arms", + "redcap_events_metadata" +) + +write_to_testing_csv <- function(dataframe, basename) { + dataframe %>% write_csv(testthat::test_path("randomization", paste0(basename, ".csv"))) +} + +# write all of the test inputs +walk(test_tables, ~ write_to_testing_csv(get(.), .)) + +# write expected dataframe for export_allocation_tables_from_project +conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") +walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +project_id_to_export <- 18 +export_allocation_tables_from_project(conn, project_id_to_export) |> + write_csv(testthat::test_path("randomization", "export_allocation_tables_from_project.csv")) + +# write expected dataframe for create_randomization_row +conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +source_project_id <- 18 +target_project_id <- 27 + +create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) |> + write_csv(testthat::test_path("randomization", "create_randomization_row.csv")) + + +# write expected dataframe for create_allocation_rows +conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +source_project_id <- 18 +target_project_id <- 27 + +create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) + +create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) |> + write_csv(testthat::test_path("randomization", "create_allocation_rows.csv")) diff --git a/tests/testthat/randomization/redcap_events_arms.csv b/tests/testthat/randomization/redcap_events_arms.csv new file mode 100644 index 0000000..f8d14bc --- /dev/null +++ b/tests/testthat/randomization/redcap_events_arms.csv @@ -0,0 +1,3 @@ +arm_id,project_id,arm_num,arm_name +19,18,1,Arm 1 +28,27,1,Arm 1 diff --git a/tests/testthat/randomization/redcap_events_metadata.csv b/tests/testthat/randomization/redcap_events_metadata.csv new file mode 100644 index 0000000..6d27578 --- /dev/null +++ b/tests/testthat/randomization/redcap_events_metadata.csv @@ -0,0 +1,5 @@ +event_id,arm_id,day_offset,offset_min,offset_max,descrip,external_id,custom_event_label +71,19,1,0,0,Event 1,NA, +73,19,2,0,0,Event 2,NA,NA +87,28,1,0,0,Event 1,NA, +88,28,2,0,0,Event 2,NA, diff --git a/tests/testthat/randomization/redcap_randomization.csv b/tests/testthat/randomization/redcap_randomization.csv new file mode 100644 index 0000000..68683b1 --- /dev/null +++ b/tests/testthat/randomization/redcap_randomization.csv @@ -0,0 +1,2 @@ +rid,project_id,stratified,group_by,target_event,source_event1,source_event2,target_field,source_field1,source_field2 +3,18,1,NA,71,71,71,randomization,sex,birthplace diff --git a/tests/testthat/randomization/redcap_randomization_allocation.csv b/tests/testthat/randomization/redcap_randomization_allocation.csv new file mode 100644 index 0000000..3ab7205 --- /dev/null +++ b/tests/testthat/randomization/redcap_randomization_allocation.csv @@ -0,0 +1,81 @@ +aid,rid,project_status,is_used_by,group_id,target_field,source_field1,source_field2 +1,3,0,NA,NA,0,f,fl +2,3,0,NA,NA,1,f,fl +3,3,0,NA,NA,0,m,fl +4,3,0,NA,NA,1,m,fl +5,3,0,NA,NA,0,f,ga +6,3,0,NA,NA,1,f,ga +7,3,0,NA,NA,0,m,ga +8,3,0,NA,NA,1,m,ga +9,3,0,NA,NA,0,f,fl +10,3,0,NA,NA,1,f,fl +11,3,0,NA,NA,0,m,fl +12,3,0,NA,NA,1,m,fl +13,3,0,NA,NA,0,f,ga +14,3,0,NA,NA,1,f,ga +15,3,0,NA,NA,0,m,ga +16,3,0,NA,NA,1,m,ga +17,3,0,NA,NA,0,f,fl +18,3,0,NA,NA,1,f,fl +19,3,0,NA,NA,0,m,fl +20,3,0,NA,NA,1,m,fl +21,3,0,NA,NA,0,f,ga +22,3,0,NA,NA,1,f,ga +23,3,0,NA,NA,0,m,ga +24,3,0,NA,NA,1,m,ga +25,3,0,NA,NA,0,f,fl +26,3,0,NA,NA,1,f,fl +27,3,0,NA,NA,0,m,fl +28,3,0,NA,NA,1,m,fl +29,3,0,NA,NA,0,f,ga +30,3,0,NA,NA,1,f,ga +31,3,0,NA,NA,0,m,ga +32,3,0,NA,NA,1,m,ga +33,3,0,NA,NA,0,f,fl +34,3,0,NA,NA,1,f,fl +35,3,0,NA,NA,0,m,fl +36,3,0,NA,NA,1,m,fl +37,3,0,NA,NA,0,f,ga +38,3,0,NA,NA,1,f,ga +39,3,0,NA,NA,0,m,ga +40,3,0,NA,NA,1,m,ga +81,3,1,NA,NA,1,f,fl +82,3,1,NA,NA,0,f,fl +86,3,1,NA,NA,0,f,ga +87,3,1,NA,NA,1,m,ga +88,3,1,NA,NA,0,m,ga +89,3,1,NA,NA,1,f,fl +90,3,1,NA,NA,0,f,fl +91,3,1,NA,NA,1,m,fl +92,3,1,NA,NA,0,m,fl +93,3,1,NA,NA,1,f,ga +94,3,1,NA,NA,0,f,ga +95,3,1,NA,NA,1,m,ga +96,3,1,NA,NA,0,m,ga +97,3,1,NA,NA,1,f,fl +98,3,1,NA,NA,0,f,fl +99,3,1,NA,NA,1,m,fl +100,3,1,NA,NA,0,m,fl +101,3,1,NA,NA,1,f,ga +102,3,1,NA,NA,0,f,ga +103,3,1,NA,NA,1,m,ga +104,3,1,NA,NA,0,m,ga +105,3,1,NA,NA,1,f,fl +106,3,1,NA,NA,0,f,fl +107,3,1,NA,NA,1,m,fl +108,3,1,NA,NA,0,m,fl +109,3,1,NA,NA,1,f,ga +110,3,1,NA,NA,0,f,ga +111,3,1,NA,NA,1,m,ga +112,3,1,NA,NA,0,m,ga +113,3,1,NA,NA,1,f,fl +114,3,1,NA,NA,0,f,fl +115,3,1,NA,NA,1,m,fl +116,3,1,NA,NA,0,m,fl +117,3,1,NA,NA,1,f,ga +118,3,1,NA,NA,0,f,ga +119,3,1,NA,NA,1,m,ga +120,3,1,NA,NA,0,m,ga +85,3,1,1,NA,1,f,ga +83,3,1,2,NA,1,m,fl +84,3,1,3,NA,0,m,fl diff --git a/tests/testthat/test-randomization.R b/tests/testthat/test-randomization.R new file mode 100644 index 0000000..b0df863 --- /dev/null +++ b/tests/testthat/test-randomization.R @@ -0,0 +1,106 @@ +testthat::test_that("export_allocation_tables_from_project works", { + # Create test tables + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + project_id_to_export <- 18 + + testthat::expect_equal( + export_allocation_tables_from_project(conn, project_id_to_export), + readr::read_csv( + testthat::test_path("randomization", "export_allocation_tables_from_project.csv")) %>% + dplyr::mutate(randomization = as.character(randomization) + ) + ) +}) + +testthat::test_that("create_randomization_row works", { + # Create test tables + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + testthat::expect_equal( + create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ), + readr::read_csv( + testthat::test_path("randomization", "create_randomization_row.csv")) %>% + dplyr::mutate(group_by = as.integer(group_by)) + ) +}) + +testthat::test_that("create_allocation_rows works", { + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + testthat::expect_equal( + create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ), + readr::read_csv( + testthat::test_path("randomization", "create_allocation_rows.csv")) %>% + dplyr::mutate(group_id = as.integer(group_id)) %>% + dplyr::mutate(target_field = as.character(target_field)) + ) +}) + +testthat::test_that("update_production_allocation_state works", { + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + target_project_randomization_state <- create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + target_project_allocation_state <- create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + # now set some aids in the source so we can watch them sync + aids_to_set <- c(seq(91, 95)) + DBI::dbExecute(conn, "update redcap_randomization_allocation set is_used_by = aid where aid in (91,92,93,94,95)") + target_project_allocation_update <- update_production_allocation_state( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid + ) + testthat::expect_equal( + target_project_allocation_update$update_records %>% + arrange(is_used_by) %>% + dplyr::pull(is_used_by), + aids_to_set + ) +}) From dd07f951e204a90f3d926c562ad8eeb5f83a31f1 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Fri, 17 Feb 2023 14:58:15 -0500 Subject: [PATCH 10/14] Add docs/randomization_management.md Add links to randomization_management.md in the README.md. Add 'Areas of REDCap interest' section to README.md. --- README.md | 8 ++++++++ docs/randomization_management.md | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 docs/randomization_management.md diff --git a/README.md b/README.md index 7f324d8..7b1e2d4 100644 --- a/README.md +++ b/README.md @@ -37,3 +37,11 @@ cd redcapcustodian ``` The procedure to use the study template is more involved, but it offers the most reward as well. See [Writing your own redcapcustodian Rscripts](./docs/custom_rscript.md). It might also help to look at the [Developer Notes](./docs/developer_notes.md) + +# Areas of REDCap interest + +While much of the REDCap Custodian repository and package is about automating workflows, the package includes tools specific to REDCap. + +- For API token management, see [Credential Scraping](docs/credential-scraping.html) +- For tools and procedures for moving production projects that use randomization, See [Randomization Management](docs/randomization_management.md) +- For bulk rights expiration, see the function `expire_user_project_rights()` in the package docs diff --git a/docs/randomization_management.md b/docs/randomization_management.md new file mode 100644 index 0000000..2374971 --- /dev/null +++ b/docs/randomization_management.md @@ -0,0 +1,31 @@ +# Randomization Management + +REDCap Custodian contains a suite of functions to help a developer work with randomization data in ways that are not supported withing the REDCap code. + +## Moving a Production project with allocated randomization records + +These tools were created to allow a production project with randomization turned on to be moved to another REDCap project. REDCap doesn't allow that, so the work has to be done in the backend with database reads and write. As the tables involved have REDCap project IDs, randomization IDs, eventIDs, and allocations IDs embedded, the work requires multiple transformations before writing the randomization configuration to the target project. + +An example ETL is saved at [`../etl/copy_allocated_randomization.R`](../etl/copy_allocated_randomization.R) That script and the functions in it calls were designed to fit into this workflow: + +### Preparation +1. Start with a production project with randomization turned on and configured, data entered and records randomized. This is the _source project_. Note its project ID. +1. Copy/clone the source project. Either use the _Copy the Project_ button in REDCap Project Setup, or do an XML export and an import. This new project is the _target project_. Note its project ID. +1. Turn off randomization in the target project if the copy/cloning process turned it on. This probably seems strange, but it's needed to allow data import into the randomization field and to trick REDCap into moving the project to production with data in the "randomization" field and the assignments in the allocation table. +1. Do any reconfiguration work needed on the target project. You should be able to move the fields to other forms and to other events if needed. That said, do not change the names of the stratification and randomization fields. +1. Copy the script `./etl/copy_allocated_randomization.R` and setting your own values for source and target project ids. +1. Run your `copy_allocated_randomization.R` script. It should mirror the randomization configuration from the source project to the target project. If you cloned the project with the _Copy the Project_ button, the script will complain that some configuration data exists. That is fine. Regardless how you cloned the project, the script should complain that you have not met the requirements for turn on randomization. You are _supposed_ to see that warning at this point. + +### Activation +1. Take the source project offline. +1. If any changes have occurred to the data in the source project since you cloned it, re-export that data from the source project and import it into the target project. +1. Immediately move the target project to production. +1. Immediately re-run your `copy_allocated_randomization.R` script. It should turn on randomization in the target project. +1. Revoke access to the source project. +1. You are done. + +## Limitations + +These randomization management tools do not support DAG group_ids as randomization variables. They could, but they don't as they were not needed for the project that inspired these tools. Do not try to use these on a project that uses DAGs in the randomization configuration. + +The tools do not support changing the randomization configuration. They might form a good foundation for that, but they do not support it. From 293f070b173eb4efa48c91c5027519988c0143cd Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Fri, 17 Feb 2023 16:10:41 -0500 Subject: [PATCH 11/14] Don't use here::here() in write_allocations() --- R/randomization.R | 9 +++------ etl/copy_allocated_randomization.R | 7 ++++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/randomization.R b/R/randomization.R index 09fc859..02c0ded 100644 --- a/R/randomization.R +++ b/R/randomization.R @@ -98,16 +98,13 @@ write_allocations <- function(project_status_to_write, allocations, target_direc date_time_stamp <- format(get_script_run_time(), "%Y%m%d%H%M%S") project_statuses <- stats::setNames(c(0, 1), c("development", "production")) - if (!fs::dir_exists(here::here(target_directory))) { - fs::dir_create(here::here(target_directory)) - } - - filename <- here::here( + filename <- paste( target_directory, paste0( paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_"), ".csv" - ) + ), + sep = "/" ) allocations |> diff --git a/etl/copy_allocated_randomization.R b/etl/copy_allocated_randomization.R index e9fc783..a3edca7 100644 --- a/etl/copy_allocated_randomization.R +++ b/etl/copy_allocated_randomization.R @@ -18,7 +18,12 @@ allocations <- export_allocation_tables_from_project( project_id_to_export = source_project_id ) -walk(c(0,1), write_allocations, allocations, "output") +target_directory = "output" +if (!fs::dir_exists(here::here(target_directory))) { + fs::dir_create(here::here(target_directory)) +} + +walk(c(0,1), write_allocations, allocations, target_directory) # Configure randomization on the target project target_project_randomization_state <- create_randomization_row( From 1a852cc509da92932cc973f3067063a366b3251b Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Fri, 17 Feb 2023 18:04:45 -0500 Subject: [PATCH 12/14] Fix package check errors --- DESCRIPTION | 3 +-- R/randomization.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 08f7cea..cd8c502 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,8 +52,7 @@ Imports: tibble, tidyr, readr, - vctrs, - here + vctrs Suggests: testthat (>= 3.0.0), digest, diff --git a/R/randomization.R b/R/randomization.R index 02c0ded..3e9232a 100644 --- a/R/randomization.R +++ b/R/randomization.R @@ -199,7 +199,7 @@ create_randomization_row <- function(source_conn, dplyr::inner_join(source_event_ids |> dplyr::select("event_id", "descrip"), by = c("event_value" = "event_id")) |> dplyr::inner_join(target_event_ids |> dplyr::select("event_id", "descrip"), by = "descrip") |> dplyr::select(-"event_value", "descrip") |> - dplyr::rename(event_value = .data$event_id) |> + dplyr::rename(event_value = "event_id") |> # pivot wider to restore the original shape of the data tidyr::pivot_wider( id_cols = c("rid", "project_id", "stratified", "group_by", "field_label", "field_value"), From 5557eed37c4309259f8ba384088fb2cafbac05f2 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Tue, 21 Feb 2023 10:02:46 -0500 Subject: [PATCH 13/14] Update testing image used at github --- .github/workflows/run-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 161d747..f1cb221 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest container: - image: ghcr.io/ctsit/rstudio-ci:4.1.0 + image: ghcr.io/ctsit/rstudio-ci:4.2.1 env: CI: "TRUE" From 869ed2836a73fe780348c529b7023565ca35c7b9 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Tue, 21 Feb 2023 10:19:18 -0500 Subject: [PATCH 14/14] Bump VERSION and update NEWS.md for release 1.6.0 --- DESCRIPTION | 2 +- NEWS.md | 13 +++++++++++++ VERSION | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cd8c502..0da829a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: redcapcustodian Type: Package Title: System data cleaning for REDCap -Version: 1.5.0 +Version: 1.6.0 Authors@R: c( person("Philip", "Chase", email = "pbc@ufl.edu", diff --git a/NEWS.md b/NEWS.md index 54a3094..4e816f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,19 @@ All notable changes to the redcapcustodian package and its contained scripts wil This project adheres to [Semantic Versioning](http://semver.org/). +## [1.6.0] - 2023-02-21 +### Added +- Add randomization management functions and a sample ETL (Philip Chase) +- Add batch_size parm to dbx calls in sync_table_2 (Philip Chase) +- Add batch_size parm to dbx calls in sync_table Prevents possible error: Expression tree is too large (maximum depth 1000) (Kyle Chesney) + +### Changed +- Update testing image used at github (Philip Chase) +- Eschew deprecated tidyselect features (Philip Chase) +- Modernize tests of sync_table_2 (Philip Chase) +- Address fatal bug in sync_table caused when delete = T but there are no records to delete (Kyle Chesney) + + ## [1.5.0] - 2023-01-25 ### Added - Create write_summary_metrics function, corresponding schema and test (Kyle Chesney) diff --git a/VERSION b/VERSION index bc80560..dc1e644 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.0 +1.6.0