Skip to content

Commit

Permalink
inst2 marginals
Browse files Browse the repository at this point in the history
ref #3
  • Loading branch information
wibeasley committed Sep 15, 2024
1 parent 6603a52 commit 5fd93d4
Show file tree
Hide file tree
Showing 28 changed files with 147 additions and 28 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 21 additions & 1 deletion analysis/institution-1/institution-1.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ ds <-
# )

# ---- marginals-inst1 ---------------------------------------------------------------
# Inspect continuous variables
TabularManifest::histogram_discrete(ds, variable_name="inst1_country_cut3")
TabularManifest::histogram_discrete(ds, variable_name="inst1_county_usa")
TabularManifest::histogram_discrete(ds, variable_name="inst1_status")
Expand Down Expand Up @@ -57,6 +56,27 @@ TabularManifest::histogram_discrete(ds, variable_name="inst1_complete")
# cat('TabularManifest::histogram_discrete(ds, variable_name="', column,'")\n', sep="")
# }

# ---- marginals-inst2 ---------------------------------------------------------------
TabularManifest::histogram_discrete( ds, "inst2_instance_count")
TabularManifest::histogram_continuous(ds, "inst2_instance_count", bin_width = 1, rounded_digits = 1)
TabularManifest::histogram_discrete( ds, "inst2_client_limited")
TabularManifest::histogram_discrete( ds, "inst2_client_institution_single")
TabularManifest::histogram_discrete( ds, "inst2_client_institution_multiple")
TabularManifest::histogram_discrete( ds, "inst2_client_other")
TabularManifest::histogram_discrete( ds, "inst2_start_year")
TabularManifest::histogram_continuous(ds, "inst2_start_year" , bin_width = 1, rounded_digits = 1)
TabularManifest::histogram_continuous(ds, "inst2_user_count" , bin_width = 1000, rounded_digits = 1)
TabularManifest::histogram_continuous(ds, "inst2_project_count" , bin_width = 1000, rounded_digits = 1)
TabularManifest::histogram_continuous(ds, "inst2_log_count_recent" , bin_width = 1000000, rounded_digits = 1)
TabularManifest::histogram_continuous(ds, "inst2_em_count" , bin_width = 5, rounded_digits = 1)
TabularManifest::histogram_discrete( ds, "inst2_allow_create")
TabularManifest::histogram_discrete( ds, "inst2_allow_production_move")
TabularManifest::histogram_discrete( ds, "inst2_allow_production_change")
TabularManifest::histogram_discrete( ds, "inst2_allow_repeating_change")
TabularManifest::histogram_discrete( ds, "inst2_allow_events")
TabularManifest::histogram_discrete( ds, "inst2_authenticate")
TabularManifest::histogram_discrete( ds, "inst2_complete")

# ---- scatterplots ------------------------------------------------------------
# g1 <-
# ggplot(ds, aes(x=horsepower, y=quarter_mile_sec, color=forward_gear_count_f)) +
Expand Down
11 changes: 9 additions & 2 deletions analysis/institution-1/institution-1.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,13 @@ Notes
---------------------------------------------------------------------------

1. The current report covers `r nrow(ds)` survey responses to the institutional survey.
1. Other information is available at:
1. Other survey information is available at:
<https://redcap.link/bmw_institutiondata>

Unanswered Questions
---------------------------------------------------------------------------

1. Please see the full list at <https://github.com/OuhscBbmc/redcap-business-model-survey-2024/blob/main/documentation/notes-ingestion.md>
1. What does `VS` stand for? How was it measured?
1. Where the cars at the Philly track measured with the same phluguerstometer and the Cleveland track?

Expand All @@ -95,12 +96,18 @@ Answered Questions
Marginals
===========================================================================

Institution 1 Instrument
Institution 1 (Instrument)
---------------------------------------------------------------------------

```{r marginals-inst1, echo=echo_chunks, message=message_chunks}
```

Institution 2 (Instrument)
---------------------------------------------------------------------------

```{r marginals-inst2, echo=echo_chunks, message=message_chunks}
```


Scatterplots
===========================================================================
Expand Down
12 changes: 9 additions & 3 deletions analysis/institution-1/institution-1.html

Large diffs are not rendered by default.

12 changes: 9 additions & 3 deletions analysis/institution-1/institution-1.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ Notes
---------------------------------------------------------------------------

1. The current report covers 82 survey responses to the institutional survey.
1. Other information is available at:
1. Other survey information is available at:
<https://redcap.link/bmw_institutiondata>

Unanswered Questions
---------------------------------------------------------------------------

1. Please see the full list at <https://github.com/OuhscBbmc/redcap-business-model-survey-2024/blob/main/documentation/notes-ingestion.md>
1. What does `VS` stand for? How was it measured?
1. Where the cars at the Philly track measured with the same phluguerstometer and the Cleveland track?

Expand All @@ -64,11 +65,16 @@ Answered Questions
Marginals
===========================================================================

Institution 1 Instrument
Institution 1 (Instrument)
---------------------------------------------------------------------------

![](figure-png/marginals-inst1-1.png)<!-- -->![](figure-png/marginals-inst1-2.png)<!-- -->![](figure-png/marginals-inst1-3.png)<!-- -->![](figure-png/marginals-inst1-4.png)<!-- -->![](figure-png/marginals-inst1-5.png)<!-- -->![](figure-png/marginals-inst1-6.png)<!-- -->![](figure-png/marginals-inst1-7.png)<!-- -->![](figure-png/marginals-inst1-8.png)<!-- -->![](figure-png/marginals-inst1-9.png)<!-- -->![](figure-png/marginals-inst1-10.png)<!-- -->![](figure-png/marginals-inst1-11.png)<!-- -->![](figure-png/marginals-inst1-12.png)<!-- -->![](figure-png/marginals-inst1-13.png)<!-- -->![](figure-png/marginals-inst1-14.png)<!-- -->![](figure-png/marginals-inst1-15.png)<!-- -->![](figure-png/marginals-inst1-16.png)<!-- -->![](figure-png/marginals-inst1-17.png)<!-- -->![](figure-png/marginals-inst1-18.png)<!-- -->![](figure-png/marginals-inst1-19.png)<!-- -->![](figure-png/marginals-inst1-20.png)<!-- -->

Institution 2 (Instrument)
---------------------------------------------------------------------------

![](figure-png/marginals-inst2-1.png)<!-- -->![](figure-png/marginals-inst2-2.png)<!-- -->![](figure-png/marginals-inst2-3.png)<!-- -->![](figure-png/marginals-inst2-4.png)<!-- -->![](figure-png/marginals-inst2-5.png)<!-- -->![](figure-png/marginals-inst2-6.png)<!-- -->![](figure-png/marginals-inst2-7.png)<!-- -->![](figure-png/marginals-inst2-8.png)<!-- -->![](figure-png/marginals-inst2-9.png)<!-- -->![](figure-png/marginals-inst2-10.png)<!-- -->![](figure-png/marginals-inst2-11.png)<!-- -->![](figure-png/marginals-inst2-12.png)<!-- -->![](figure-png/marginals-inst2-13.png)<!-- -->![](figure-png/marginals-inst2-14.png)<!-- -->![](figure-png/marginals-inst2-15.png)<!-- -->![](figure-png/marginals-inst2-16.png)<!-- -->![](figure-png/marginals-inst2-17.png)<!-- -->![](figure-png/marginals-inst2-18.png)<!-- -->![](figure-png/marginals-inst2-19.png)<!-- -->


Scatterplots
===========================================================================
Expand Down Expand Up @@ -190,4 +196,4 @@ For the sake of documentation and reproducibility, the current report was render



Report rendered by wibeasley at 2024-09-15, 13:57 -0500 in 5 seconds.
Report rendered by wibeasley at 2024-09-15, 15:16 -0500 in 9 seconds.
Binary file modified data-public/derived/variable-label.parquet
Binary file not shown.
13 changes: 13 additions & 0 deletions data-public/metadata/variable-label.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,16 @@ inst1_dept_home,5,"Biostats/Analytics",5,
inst1_dept_home,7,"Clinical Trials",6,
inst1_dept_home,6,"Dedicated Team",7,
inst1_dept_home,98,"Other",98,
inst2_client,1,"limited",1,
inst2_client,2,"institution_single",2,
inst2_client,3,"institution_multiple",3,
inst2_client,98,"other",98,
inst2_allow_production_change,0,"Never",0,
inst2_allow_production_change,1,"Yes, if no existing fields are modified",1,
inst2_allow_production_change,2,"Yes, if empty or no existing fields are modified",2,
inst2_allow_production_change,3,"Yes, if not critical issues exist",3,
inst2_allow_production_change,4,"Yes, if empty or no critical issues exists",4,
inst2_authenticate,1,"table",1,
inst2_authenticate,2,"institution",2,
inst2_authenticate,3,"institution & table",3,
inst2_authenticate,98,"I don't know",98,
12 changes: 10 additions & 2 deletions documentation/notes-ingestion.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
Notes for Ingestion Process
================

1. For `county`, both "1" and "13" are labelled "Australia".
1. For `program_funding`, future surveys might say "NIH CTSA/CTR Grant" instead of just "NIH CTSA Grant".
1. For `county`, both "1" and "13" are labelled "Australia".
1. For `program_funding`, future surveys might say "NIH CTSA/CTR Grant" instead of just "NIH CTSA Grant".
1. For `redcap_pop`, how can the checkboxes be selected for both "2" ("One Institution/Organization") and "3" ("Multiple Institutions/Organizations")?
1. For `redcap_instance_count`, the max is "147". Is this likely? If not, what is a reasonable upper bound?
1. If we release the (multivariate) dataset publicly, let's cap some variables so they don't identify the institution. I think marginals are fine (eg, the earliest year was 2004), but not if it can re-identify their other responses. Variables to cap include:
* `redcap_start_date`, limit lower bound to ~2010, .
* `active_users`, limit upper bound to ~4k.
* `active_projects`, limit upper bound to ~4k.
* `logged_events`, limit upper bound to ~1M.
* `em_no`, limit upper bound to ~25.
91 changes: 75 additions & 16 deletions manipulation/institution-ellis.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,20 +180,22 @@ ds <-
inst1_salary_mid = `sal_mid`,
inst1_salary_senior = `sal_sen`,
inst1_complete = `institutional_questionnaire_complete`,
# redcap_instance_count = `redcap_instance_count`,
# redcap_pop = `redcap_pop`,
# redcap_start_date = `redcap_start_date`,
# active_users = `active_users`,
# active_projects = `active_projects`,
# logged_events = `logged_events`,
# em_no = `em_no`,
# ccus_createprojects = `ccus_createprojects`,
# ccus_moveprod = `ccus_moveprod`,
# ccus_changerequests = `ccus_changerequests`,
# ccus_repeatingsetup = `ccus_repeatingsetup`,
# ccus_addevents = `ccus_addevents`,
# ccus_authenticate = `ccus_authenticate`,
# institutional_questionnaire2_complete = `institutional_questionnaire2_complete`,

inst2_instance_count = `redcap_instance_count`,
inst2_client = `redcap_pop`,
inst2_start_date = `redcap_start_date`,
inst2_user_count = `active_users`,
inst2_project_count = `active_projects`,
inst2_log_count_recent = `logged_events`,
inst2_em_count = `em_no`,
inst2_allow_create = `ccus_createprojects`,
inst2_allow_production_move = `ccus_moveprod`,
inst2_allow_production_change = `ccus_changerequests`,
inst2_allow_repeating_change = `ccus_repeatingsetup`,
inst2_allow_events = `ccus_addevents`,
inst2_authenticate = `ccus_authenticate`,
inst2_complete = `institutional_questionnaire2_complete`,

# manageusers = `manageusers`,
# create = `create`,
# create_charge = `create_charge`,
Expand Down Expand Up @@ -301,7 +303,24 @@ ds <-
-inst1_country,
) #|> View()

# ds$inst1_status
# ---- groom-institution-2 -----------------------------------------------------
ds <-
ds |>
dplyr::mutate(
inst2_start_year = as.integer(lubridate::year(inst2_start_date)),
inst2_allow_create = as.logical(inst2_allow_create ),
inst2_allow_production_move = as.logical(inst2_allow_production_move ),
inst2_allow_repeating_change = as.logical(inst2_allow_repeating_change ),
inst2_allow_events = as.logical(inst2_allow_events ),
inst2_complete = REDCapR::constant_to_form_completion(inst2_complete),
) |>
map_to_checkbox("inst2_client") |>
map_to_radio( "inst2_allow_production_change") |>
map_to_radio( "inst2_authenticate") |>
# dplyr::select(tidyselect::starts_with("inst2_")) |>
dplyr::select(
-inst2_start_date
)

# ---- reestablish-column-order ------------------------------------------------
ds <-
Expand All @@ -310,9 +329,11 @@ ds <-
institution_index,
tidyselect::matches("inst1_(?!complete)", perl = TRUE), # A "negative-lookahead"
inst1_complete,
tidyselect::matches("inst2_(?!complete)", perl = TRUE), # A "negative-lookahead"
inst2_complete,
)

# ---- verify-values -----------------------------------------------------------
# ---- verify-values-inst1 -----------------------------------------------------------
# OuhscMunge::verify_value_headstart(ds)
checkmate::assert_integer( ds$institution_index , any.missing=F , lower=1, upper=999 , unique=T)
checkmate::assert_character(ds$inst1_country_cut3 , any.missing=F , pattern="^.{3,9}$" )
Expand Down Expand Up @@ -340,6 +361,25 @@ checkmate::assert_numeric( ds$inst1_admin_coding_fte , any.missing=T , lower=0,
# checkmate::assert_character(ds$inst1_salary_senior , any.missing=T , pattern="^.{1,100}$" )
checkmate::assert_factor( ds$inst1_complete , any.missing=F )

# ---- verify-values-inst2 -----------------------------------------------------------
checkmate::assert_integer( ds$inst2_instance_count , any.missing=T , lower=1, upper=9999 )
checkmate::assert_logical( ds$inst2_client_limited , any.missing=F )
checkmate::assert_logical( ds$inst2_client_institution_single , any.missing=F )
checkmate::assert_logical( ds$inst2_client_institution_multiple , any.missing=F )
checkmate::assert_logical( ds$inst2_client_other , any.missing=F )
checkmate::assert_integer( ds$inst2_start_year , any.missing=T )
checkmate::assert_integer( ds$inst2_user_count , any.missing=T , lower=20, upper=99999 )
checkmate::assert_integer( ds$inst2_project_count , any.missing=T , lower=5, upper=99999 )
checkmate::assert_numeric( ds$inst2_log_count_recent , any.missing=T , lower=5, upper=99999999 )
checkmate::assert_integer( ds$inst2_em_count , any.missing=T , lower=0, upper=999 )
checkmate::assert_logical( ds$inst2_allow_create , any.missing=T )
checkmate::assert_logical( ds$inst2_allow_production_move , any.missing=T )
checkmate::assert_factor( ds$inst2_allow_production_change , any.missing=T )
checkmate::assert_logical( ds$inst2_allow_repeating_change , any.missing=T )
checkmate::assert_logical( ds$inst2_allow_events , any.missing=T )
checkmate::assert_factor( ds$inst2_authenticate , any.missing=T )
checkmate::assert_factor( ds$inst2_complete , any.missing=F )

# ---- specify-columns-to-upload -----------------------------------------------
# Print colnames that `dplyr::select()` should contain below:
# cat(paste0(" ", colnames(ds), collapse=",\n"))
Expand All @@ -352,6 +392,7 @@ ds_slim <-
# dplyr::slice(1:100) |>
dplyr::select(
institution_index,

inst1_country_cut3,
inst1_county_usa,
inst1_status,
Expand All @@ -376,6 +417,24 @@ ds_slim <-
# inst1_salary_mid,
# inst1_salary_senior,
inst1_complete,

inst2_instance_count,
inst2_client_limited,
inst2_client_institution_single,
inst2_client_institution_multiple,
inst2_client_other,
inst2_start_year,
inst2_user_count,
inst2_project_count,
inst2_log_count_recent,
inst2_em_count,
inst2_allow_create,
inst2_allow_production_move,
inst2_allow_production_change,
inst2_allow_repeating_change,
inst2_allow_events,
inst2_authenticate,
inst2_complete,
)

ds_slim
Expand Down
2 changes: 1 addition & 1 deletion manipulation/retrieve-variable-labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,6 @@ map_to_checkbox <- function( # .variable = "inst1_funding"
d |>
dplyr::left_join(d_wide, by = "institution_index") |>
dplyr::select(
-!!"inst1_funding"
-!!.variable
)
}

0 comments on commit 5fd93d4

Please sign in to comment.