diff --git a/analysis/institution-1/figure-png/marginals-inst2-1.png b/analysis/institution-1/figure-png/marginals-inst2-1.png new file mode 100644 index 0000000..2ef6073 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-1.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-10.png b/analysis/institution-1/figure-png/marginals-inst2-10.png new file mode 100644 index 0000000..0c994c4 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-10.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-11.png b/analysis/institution-1/figure-png/marginals-inst2-11.png new file mode 100644 index 0000000..0688164 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-11.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-12.png b/analysis/institution-1/figure-png/marginals-inst2-12.png new file mode 100644 index 0000000..4050098 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-12.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-13.png b/analysis/institution-1/figure-png/marginals-inst2-13.png new file mode 100644 index 0000000..6f8667e Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-13.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-14.png b/analysis/institution-1/figure-png/marginals-inst2-14.png new file mode 100644 index 0000000..1b4c6a9 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-14.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-15.png b/analysis/institution-1/figure-png/marginals-inst2-15.png new file mode 100644 index 0000000..df3c113 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-15.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-16.png b/analysis/institution-1/figure-png/marginals-inst2-16.png new file mode 100644 index 0000000..d462f96 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-16.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-17.png b/analysis/institution-1/figure-png/marginals-inst2-17.png new file mode 100644 index 0000000..ffa2eb9 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-17.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-18.png b/analysis/institution-1/figure-png/marginals-inst2-18.png new file mode 100644 index 0000000..d392a57 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-18.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-19.png b/analysis/institution-1/figure-png/marginals-inst2-19.png new file mode 100644 index 0000000..dbc3819 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-19.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-2.png b/analysis/institution-1/figure-png/marginals-inst2-2.png new file mode 100644 index 0000000..5fc0916 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-2.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-3.png b/analysis/institution-1/figure-png/marginals-inst2-3.png new file mode 100644 index 0000000..32b15e4 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-3.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-4.png b/analysis/institution-1/figure-png/marginals-inst2-4.png new file mode 100644 index 0000000..785de2b Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-4.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-5.png b/analysis/institution-1/figure-png/marginals-inst2-5.png new file mode 100644 index 0000000..8ecdee9 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-5.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-6.png b/analysis/institution-1/figure-png/marginals-inst2-6.png new file mode 100644 index 0000000..7d83b64 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-6.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-7.png b/analysis/institution-1/figure-png/marginals-inst2-7.png new file mode 100644 index 0000000..61365b3 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-7.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-8.png b/analysis/institution-1/figure-png/marginals-inst2-8.png new file mode 100644 index 0000000..da26aa1 Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-8.png differ diff --git a/analysis/institution-1/figure-png/marginals-inst2-9.png b/analysis/institution-1/figure-png/marginals-inst2-9.png new file mode 100644 index 0000000..6bef8ae Binary files /dev/null and b/analysis/institution-1/figure-png/marginals-inst2-9.png differ diff --git a/analysis/institution-1/institution-1.R b/analysis/institution-1/institution-1.R index ecdc7da..15d5d4f 100644 --- a/analysis/institution-1/institution-1.R +++ b/analysis/institution-1/institution-1.R @@ -28,7 +28,6 @@ ds <- # ) # ---- marginals-inst1 --------------------------------------------------------------- -# Inspect continuous variables TabularManifest::histogram_discrete(ds, variable_name="inst1_country_cut3") TabularManifest::histogram_discrete(ds, variable_name="inst1_county_usa") TabularManifest::histogram_discrete(ds, variable_name="inst1_status") @@ -57,6 +56,27 @@ TabularManifest::histogram_discrete(ds, variable_name="inst1_complete") # cat('TabularManifest::histogram_discrete(ds, variable_name="', column,'")\n', sep="") # } +# ---- marginals-inst2 --------------------------------------------------------------- +TabularManifest::histogram_discrete( ds, "inst2_instance_count") +TabularManifest::histogram_continuous(ds, "inst2_instance_count", bin_width = 1, rounded_digits = 1) +TabularManifest::histogram_discrete( ds, "inst2_client_limited") +TabularManifest::histogram_discrete( ds, "inst2_client_institution_single") +TabularManifest::histogram_discrete( ds, "inst2_client_institution_multiple") +TabularManifest::histogram_discrete( ds, "inst2_client_other") +TabularManifest::histogram_discrete( ds, "inst2_start_year") +TabularManifest::histogram_continuous(ds, "inst2_start_year" , bin_width = 1, rounded_digits = 1) +TabularManifest::histogram_continuous(ds, "inst2_user_count" , bin_width = 1000, rounded_digits = 1) +TabularManifest::histogram_continuous(ds, "inst2_project_count" , bin_width = 1000, rounded_digits = 1) +TabularManifest::histogram_continuous(ds, "inst2_log_count_recent" , bin_width = 1000000, rounded_digits = 1) +TabularManifest::histogram_continuous(ds, "inst2_em_count" , bin_width = 5, rounded_digits = 1) +TabularManifest::histogram_discrete( ds, "inst2_allow_create") +TabularManifest::histogram_discrete( ds, "inst2_allow_production_move") +TabularManifest::histogram_discrete( ds, "inst2_allow_production_change") +TabularManifest::histogram_discrete( ds, "inst2_allow_repeating_change") +TabularManifest::histogram_discrete( ds, "inst2_allow_events") +TabularManifest::histogram_discrete( ds, "inst2_authenticate") +TabularManifest::histogram_discrete( ds, "inst2_complete") + # ---- scatterplots ------------------------------------------------------------ # g1 <- # ggplot(ds, aes(x=horsepower, y=quarter_mile_sec, color=forward_gear_count_f)) + diff --git a/analysis/institution-1/institution-1.Rmd b/analysis/institution-1/institution-1.Rmd index e24ccf3..ee3fb2e 100644 --- a/analysis/institution-1/institution-1.Rmd +++ b/analysis/institution-1/institution-1.Rmd @@ -78,12 +78,13 @@ Notes --------------------------------------------------------------------------- 1. The current report covers `r nrow(ds)` survey responses to the institutional survey. -1. Other information is available at: +1. Other survey information is available at: Unanswered Questions --------------------------------------------------------------------------- +1. Please see the full list at 1. What does `VS` stand for? How was it measured? 1. Where the cars at the Philly track measured with the same phluguerstometer and the Cleveland track? @@ -95,12 +96,18 @@ Answered Questions Marginals =========================================================================== -Institution 1 Instrument +Institution 1 (Instrument) --------------------------------------------------------------------------- ```{r marginals-inst1, echo=echo_chunks, message=message_chunks} ``` +Institution 2 (Instrument) +--------------------------------------------------------------------------- + +```{r marginals-inst2, echo=echo_chunks, message=message_chunks} +``` + Scatterplots =========================================================================== diff --git a/analysis/institution-1/institution-1.html b/analysis/institution-1/institution-1.html index 04b42dc..9ecdf36 100644 --- a/analysis/institution-1/institution-1.html +++ b/analysis/institution-1/institution-1.html @@ -1508,13 +1508,14 @@

1.1 Notes

  1. The current report covers 82 survey responses to the institutional survey.
  2. -
  3. Other information is available at: https://redcap.link/bmw_institutiondata
  4. +
  5. Other survey information is available at: https://redcap.link/bmw_institutiondata

1.2 Unanswered Questions

    +
  1. Please see the full list at https://github.com/OuhscBbmc/redcap-business-model-survey-2024/blob/main/documentation/notes-ingestion.md
  2. What does VS stand for? How was it measured?
  3. Where the cars at the Philly track measured with the same phluguerstometer and the Cleveland track?
  4. @@ -1529,9 +1530,14 @@

    1.3 Answered

    2 Marginals

    2.1 Institution 1 -Instrument

    +(Instrument)

    +
    +

    2.2 Institution 2 +(Instrument)

    +

    +

3 Scatterplots

@@ -1640,7 +1646,7 @@

6 Session ────────────────────────────────────────────────────────────────────────────── -

Report rendered by wibeasley at 2024-09-15, 13:57 -0500 in 5 +

Report rendered by wibeasley at 2024-09-15, 15:16 -0500 in 9 seconds.

diff --git a/analysis/institution-1/institution-1.md b/analysis/institution-1/institution-1.md index 9c1719e..c484034 100644 --- a/analysis/institution-1/institution-1.md +++ b/analysis/institution-1/institution-1.md @@ -47,12 +47,13 @@ Notes --------------------------------------------------------------------------- 1. The current report covers 82 survey responses to the institutional survey. -1. Other information is available at: +1. Other survey information is available at: Unanswered Questions --------------------------------------------------------------------------- +1. Please see the full list at 1. What does `VS` stand for? How was it measured? 1. Where the cars at the Philly track measured with the same phluguerstometer and the Cleveland track? @@ -64,11 +65,16 @@ Answered Questions Marginals =========================================================================== -Institution 1 Instrument +Institution 1 (Instrument) --------------------------------------------------------------------------- ![](figure-png/marginals-inst1-1.png)![](figure-png/marginals-inst1-2.png)![](figure-png/marginals-inst1-3.png)![](figure-png/marginals-inst1-4.png)![](figure-png/marginals-inst1-5.png)![](figure-png/marginals-inst1-6.png)![](figure-png/marginals-inst1-7.png)![](figure-png/marginals-inst1-8.png)![](figure-png/marginals-inst1-9.png)![](figure-png/marginals-inst1-10.png)![](figure-png/marginals-inst1-11.png)![](figure-png/marginals-inst1-12.png)![](figure-png/marginals-inst1-13.png)![](figure-png/marginals-inst1-14.png)![](figure-png/marginals-inst1-15.png)![](figure-png/marginals-inst1-16.png)![](figure-png/marginals-inst1-17.png)![](figure-png/marginals-inst1-18.png)![](figure-png/marginals-inst1-19.png)![](figure-png/marginals-inst1-20.png) +Institution 2 (Instrument) +--------------------------------------------------------------------------- + +![](figure-png/marginals-inst2-1.png)![](figure-png/marginals-inst2-2.png)![](figure-png/marginals-inst2-3.png)![](figure-png/marginals-inst2-4.png)![](figure-png/marginals-inst2-5.png)![](figure-png/marginals-inst2-6.png)![](figure-png/marginals-inst2-7.png)![](figure-png/marginals-inst2-8.png)![](figure-png/marginals-inst2-9.png)![](figure-png/marginals-inst2-10.png)![](figure-png/marginals-inst2-11.png)![](figure-png/marginals-inst2-12.png)![](figure-png/marginals-inst2-13.png)![](figure-png/marginals-inst2-14.png)![](figure-png/marginals-inst2-15.png)![](figure-png/marginals-inst2-16.png)![](figure-png/marginals-inst2-17.png)![](figure-png/marginals-inst2-18.png)![](figure-png/marginals-inst2-19.png) + Scatterplots =========================================================================== @@ -190,4 +196,4 @@ For the sake of documentation and reproducibility, the current report was render -Report rendered by wibeasley at 2024-09-15, 13:57 -0500 in 5 seconds. +Report rendered by wibeasley at 2024-09-15, 15:16 -0500 in 9 seconds. diff --git a/data-public/derived/variable-label.parquet b/data-public/derived/variable-label.parquet index c604433..f577c10 100644 Binary files a/data-public/derived/variable-label.parquet and b/data-public/derived/variable-label.parquet differ diff --git a/data-public/metadata/variable-label.csv b/data-public/metadata/variable-label.csv index 921fbb6..dbc3e85 100644 --- a/data-public/metadata/variable-label.csv +++ b/data-public/metadata/variable-label.csv @@ -26,3 +26,16 @@ inst1_dept_home,5,"Biostats/Analytics",5, inst1_dept_home,7,"Clinical Trials",6, inst1_dept_home,6,"Dedicated Team",7, inst1_dept_home,98,"Other",98, +inst2_client,1,"limited",1, +inst2_client,2,"institution_single",2, +inst2_client,3,"institution_multiple",3, +inst2_client,98,"other",98, +inst2_allow_production_change,0,"Never",0, +inst2_allow_production_change,1,"Yes, if no existing fields are modified",1, +inst2_allow_production_change,2,"Yes, if empty or no existing fields are modified",2, +inst2_allow_production_change,3,"Yes, if not critical issues exist",3, +inst2_allow_production_change,4,"Yes, if empty or no critical issues exists",4, +inst2_authenticate,1,"table",1, +inst2_authenticate,2,"institution",2, +inst2_authenticate,3,"institution & table",3, +inst2_authenticate,98,"I don't know",98, \ No newline at end of file diff --git a/documentation/notes-ingestion.md b/documentation/notes-ingestion.md index f09764a..75835dd 100644 --- a/documentation/notes-ingestion.md +++ b/documentation/notes-ingestion.md @@ -1,5 +1,13 @@ Notes for Ingestion Process ================ -1. For `county`, both "1" and "13" are labelled "Australia". -1. For `program_funding`, future surveys might say "NIH CTSA/CTR Grant" instead of just "NIH CTSA Grant". +1. For `county`, both "1" and "13" are labelled "Australia". +1. For `program_funding`, future surveys might say "NIH CTSA/CTR Grant" instead of just "NIH CTSA Grant". +1. For `redcap_pop`, how can the checkboxes be selected for both "2" ("One Institution/Organization") and "3" ("Multiple Institutions/Organizations")? +1. For `redcap_instance_count`, the max is "147". Is this likely? If not, what is a reasonable upper bound? +1. If we release the (multivariate) dataset publicly, let's cap some variables so they don't identify the institution. I think marginals are fine (eg, the earliest year was 2004), but not if it can re-identify their other responses. Variables to cap include: + * `redcap_start_date`, limit lower bound to ~2010, . + * `active_users`, limit upper bound to ~4k. + * `active_projects`, limit upper bound to ~4k. + * `logged_events`, limit upper bound to ~1M. + * `em_no`, limit upper bound to ~25. diff --git a/manipulation/institution-ellis.R b/manipulation/institution-ellis.R index 17a69df..c1fce0d 100644 --- a/manipulation/institution-ellis.R +++ b/manipulation/institution-ellis.R @@ -180,20 +180,22 @@ ds <- inst1_salary_mid = `sal_mid`, inst1_salary_senior = `sal_sen`, inst1_complete = `institutional_questionnaire_complete`, - # redcap_instance_count = `redcap_instance_count`, - # redcap_pop = `redcap_pop`, - # redcap_start_date = `redcap_start_date`, - # active_users = `active_users`, - # active_projects = `active_projects`, - # logged_events = `logged_events`, - # em_no = `em_no`, - # ccus_createprojects = `ccus_createprojects`, - # ccus_moveprod = `ccus_moveprod`, - # ccus_changerequests = `ccus_changerequests`, - # ccus_repeatingsetup = `ccus_repeatingsetup`, - # ccus_addevents = `ccus_addevents`, - # ccus_authenticate = `ccus_authenticate`, - # institutional_questionnaire2_complete = `institutional_questionnaire2_complete`, + + inst2_instance_count = `redcap_instance_count`, + inst2_client = `redcap_pop`, + inst2_start_date = `redcap_start_date`, + inst2_user_count = `active_users`, + inst2_project_count = `active_projects`, + inst2_log_count_recent = `logged_events`, + inst2_em_count = `em_no`, + inst2_allow_create = `ccus_createprojects`, + inst2_allow_production_move = `ccus_moveprod`, + inst2_allow_production_change = `ccus_changerequests`, + inst2_allow_repeating_change = `ccus_repeatingsetup`, + inst2_allow_events = `ccus_addevents`, + inst2_authenticate = `ccus_authenticate`, + inst2_complete = `institutional_questionnaire2_complete`, + # manageusers = `manageusers`, # create = `create`, # create_charge = `create_charge`, @@ -301,7 +303,24 @@ ds <- -inst1_country, ) #|> View() -# ds$inst1_status +# ---- groom-institution-2 ----------------------------------------------------- +ds <- +ds |> + dplyr::mutate( + inst2_start_year = as.integer(lubridate::year(inst2_start_date)), + inst2_allow_create = as.logical(inst2_allow_create ), + inst2_allow_production_move = as.logical(inst2_allow_production_move ), + inst2_allow_repeating_change = as.logical(inst2_allow_repeating_change ), + inst2_allow_events = as.logical(inst2_allow_events ), + inst2_complete = REDCapR::constant_to_form_completion(inst2_complete), + ) |> + map_to_checkbox("inst2_client") |> + map_to_radio( "inst2_allow_production_change") |> + map_to_radio( "inst2_authenticate") |> + # dplyr::select(tidyselect::starts_with("inst2_")) |> + dplyr::select( + -inst2_start_date + ) # ---- reestablish-column-order ------------------------------------------------ ds <- @@ -310,9 +329,11 @@ ds <- institution_index, tidyselect::matches("inst1_(?!complete)", perl = TRUE), # A "negative-lookahead" inst1_complete, + tidyselect::matches("inst2_(?!complete)", perl = TRUE), # A "negative-lookahead" + inst2_complete, ) -# ---- verify-values ----------------------------------------------------------- +# ---- verify-values-inst1 ----------------------------------------------------------- # OuhscMunge::verify_value_headstart(ds) checkmate::assert_integer( ds$institution_index , any.missing=F , lower=1, upper=999 , unique=T) checkmate::assert_character(ds$inst1_country_cut3 , any.missing=F , pattern="^.{3,9}$" ) @@ -340,6 +361,25 @@ checkmate::assert_numeric( ds$inst1_admin_coding_fte , any.missing=T , lower=0, # checkmate::assert_character(ds$inst1_salary_senior , any.missing=T , pattern="^.{1,100}$" ) checkmate::assert_factor( ds$inst1_complete , any.missing=F ) +# ---- verify-values-inst2 ----------------------------------------------------------- +checkmate::assert_integer( ds$inst2_instance_count , any.missing=T , lower=1, upper=9999 ) +checkmate::assert_logical( ds$inst2_client_limited , any.missing=F ) +checkmate::assert_logical( ds$inst2_client_institution_single , any.missing=F ) +checkmate::assert_logical( ds$inst2_client_institution_multiple , any.missing=F ) +checkmate::assert_logical( ds$inst2_client_other , any.missing=F ) +checkmate::assert_integer( ds$inst2_start_year , any.missing=T ) +checkmate::assert_integer( ds$inst2_user_count , any.missing=T , lower=20, upper=99999 ) +checkmate::assert_integer( ds$inst2_project_count , any.missing=T , lower=5, upper=99999 ) +checkmate::assert_numeric( ds$inst2_log_count_recent , any.missing=T , lower=5, upper=99999999 ) +checkmate::assert_integer( ds$inst2_em_count , any.missing=T , lower=0, upper=999 ) +checkmate::assert_logical( ds$inst2_allow_create , any.missing=T ) +checkmate::assert_logical( ds$inst2_allow_production_move , any.missing=T ) +checkmate::assert_factor( ds$inst2_allow_production_change , any.missing=T ) +checkmate::assert_logical( ds$inst2_allow_repeating_change , any.missing=T ) +checkmate::assert_logical( ds$inst2_allow_events , any.missing=T ) +checkmate::assert_factor( ds$inst2_authenticate , any.missing=T ) +checkmate::assert_factor( ds$inst2_complete , any.missing=F ) + # ---- specify-columns-to-upload ----------------------------------------------- # Print colnames that `dplyr::select()` should contain below: # cat(paste0(" ", colnames(ds), collapse=",\n")) @@ -352,6 +392,7 @@ ds_slim <- # dplyr::slice(1:100) |> dplyr::select( institution_index, + inst1_country_cut3, inst1_county_usa, inst1_status, @@ -376,6 +417,24 @@ ds_slim <- # inst1_salary_mid, # inst1_salary_senior, inst1_complete, + + inst2_instance_count, + inst2_client_limited, + inst2_client_institution_single, + inst2_client_institution_multiple, + inst2_client_other, + inst2_start_year, + inst2_user_count, + inst2_project_count, + inst2_log_count_recent, + inst2_em_count, + inst2_allow_create, + inst2_allow_production_move, + inst2_allow_production_change, + inst2_allow_repeating_change, + inst2_allow_events, + inst2_authenticate, + inst2_complete, ) ds_slim diff --git a/manipulation/retrieve-variable-labels.R b/manipulation/retrieve-variable-labels.R index 7174dc0..6c9d6cc 100644 --- a/manipulation/retrieve-variable-labels.R +++ b/manipulation/retrieve-variable-labels.R @@ -113,6 +113,6 @@ map_to_checkbox <- function( # .variable = "inst1_funding" d |> dplyr::left_join(d_wide, by = "institution_index") |> dplyr::select( - -!!"inst1_funding" + -!!.variable ) }