Skip to content

Commit

Permalink
Merge pull request #41 from atorus-research/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
statasaurus authored Apr 1, 2022
2 parents 56f4c88 + 10952d8 commit 73d819b
Show file tree
Hide file tree
Showing 31 changed files with 476 additions and 207 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ on:
branches:
- main
- master
- dev
pull_request:
branches:
- main
- master
- dev

name: R-CMD-check

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ on:
branches:
- main
- master
- dev
pull_request:
branches:
- main
- master
- dev

name: test-coverage

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: metacore
Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows
Version: 0.0.3
Version: 0.0.4
Authors@R:
c(person(given = "Christina",
family = "Fillmore",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

# Metacore 0.0.4
- Adds a supp table to store the `idvar` and `qeval` information
- Adds `sig_dig`column to the `value_spec` table


# Metacore 0.0.3
- Fixes bugs found in the `get_control_term` function and improves error messages for `get_control_term`
- Improves internal naming consistency by renaming `define_to_MetaCore` to `define_to_metacore`
Expand Down
95 changes: 73 additions & 22 deletions R/metacore.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' This file includes the internal functions needed to create the readonly
#' Metacore R6 object
#'
#' @param ds_spec contians each dataset in the study, with the labels for each
#' @param ds_spec contains each dataset in the study, with the labels for each
#' @param ds_vars information on what variables are in each dataset + plus
#' dataset specific variable information
#' @param var_spec variable information that is shared across all datasets
Expand All @@ -10,12 +10,13 @@
#' @param derivations contains derivation, it allows for different variables to
#' have the same derivation
#' @param code_list contains the code/decode information
#' @param supp contains the idvar and qeval information for supplemental variables
#'
#' @family Metacore
#' @noRd
#'
#'
MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist){
MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp){

private$.ds_spec <- ds_spec %>%
add_labs(dataset = "Dataset Name",
Expand Down Expand Up @@ -58,6 +59,18 @@ MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivati
type = "Code List/Permitted Values/External Library",
codes = "List of Codes")

private$.codelist <- codelist %>%
add_labs(code_id = "ID of the Code List",
names = "Name of the Code List",
type = "Code List/Permitted Values/External Library",
codes = "List of Codes")

private$.supp <- supp %>%
add_labs(dataset = "Dataset Name",
variable = "Variable Name",
idvar = "Identifying Variable",
qeval = "Evaluator")

self$validate()
message("\n Metadata successfully imported")
}
Expand Down Expand Up @@ -85,18 +98,34 @@ MetaCore_print <- function(...){
MetaCore_validate <- function() {
if(var_name_check(private)){

check_columns(private$.ds_spec,
private$.ds_vars,
private$.var_spec,
private$.value_spec,
private$.derivations,
private$.codelist
)
if(nrow(private$.ds_spec) == 0 &
nrow(private$.ds_vars) == 0 &
nrow(private$.var_spec) == 0 &
nrow(private$.value_spec) == 0 &
nrow(private$.derivations) == 0 &
nrow(private$.codelist) == 0 &
nrow(private$.supp) == 0 ){
warning("Other checks were not preformed, because all datasets are empty",
call. = FALSE)
} else {
check_columns(private$.ds_spec,
private$.ds_vars,
private$.var_spec,
private$.value_spec,
private$.derivations,
private$.codelist,
private$.supp
)

ds_vars_check(private$.ds_vars, private$.var_spec)
value_check(private$.ds_vars, private$.value_spec)
derivation_check(private$.value_spec, private$.derivations)
codelist_check(private$.value_spec, private$.codelist)
if(nrow(private$.supp) == 0){
supp_check(private$.ds_vars, private$.supp)
}

ds_vars_check(private$.ds_vars, private$.var_spec)
value_check(private$.ds_vars, private$.value_spec)
derivation_check(private$.value_spec, private$.derivations)
codelist_check(private$.value_spec, private$.codelist)
}

} else {
warning("Other checks were not preformed, because column names were incorrect",
Expand Down Expand Up @@ -162,6 +191,8 @@ MetaCore_filter <- function(value) {

private$.codelist <- private$.codelist %>%
right_join(private$.value_spec %>% select(code_id) %>% na.omit(), by = "code_id")

private$.supp <- private$.supp %>% filter(dataset == value)
}

#' The Metacore R6 Class
Expand All @@ -180,22 +211,24 @@ MetaCore <- R6::R6Class("Metacore",
metacore_filter = MetaCore_filter
),
private = list(
.ds_spec = tibble(dataset = character(), label = character()),
.ds_spec = tibble(dataset = character(), structure = character(), label = character()),
.ds_vars = tibble(dataset = character(), variable = character(), keep = logical(),
key_seq = integer(), order = integer(), core = character(),
supp_flag = logical()),
.var_spec = tibble(variable = character(), label = character(), length = integer()),
.var_spec = tibble(variable = character(), label = character(), length = integer(),
type = character(), common = character(), format = character()),
.value_spec = tibble(dataset = character(),
variable = character(),
where = character(),
type = character(),
codelist = character(),
sig_dig = integer(),
code_id = character(),
origin = character(),
derivation_id = integer()),
.derivations = tibble(derivation_id = integer(), derivation = character()),
# code_type == df | permitted_val | external_lib
.codelist = tibble(code_id = character(), code_type = character(), codelist = list()),
.change_log = tibble(table_chg = character(), column_chg = character(), what_chg = list())
.codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()),
.supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character())
),
active = list(
ds_spec = readonly('ds_spec'),
Expand All @@ -204,7 +237,7 @@ MetaCore <- R6::R6Class("Metacore",
value_spec = readonly('value_spec'),
derivations = readonly('derivations'),
codelist = readonly('codelist'),
changelog = readonly('changelog')
supp = readonly('supp')
)
)

Expand All @@ -217,12 +250,29 @@ MetaCore <- R6::R6Class("Metacore",
#' @param value_spec parameter specific information, as data is long the specs for wbc might be difference the hgb
#' @param derivations contains derivation, it allows for different variables to have the same derivation
#' @param codelist contains the code/decode information
#' @param supp contains the idvar and qeval information for supplemental variables
#'
#' @family Metacore
#'
#' @export
#'
metacore <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) {
metacore <- function(ds_spec = tibble(dataset = character(), structure = character(), label = character()),
ds_vars = tibble(dataset = character(), variable = character(), keep = logical(),
key_seq = integer(), order = integer(), core = character(),
supp_flag = logical()),
var_spec = tibble(variable = character(), label = character(), length = integer(),
type = character(), common = character(), format = character()),
value_spec = tibble(dataset = character(),
variable = character(),
where = character(),
type = character(),
sig_dig = integer(),
code_id = character(),
origin = character(),
derivation_id = integer()),
derivations = tibble(derivation_id = integer(), derivation = character()),
codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()),
supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character())) {
# Check if there are any empty datasets that need adding
is_empty_df <- as.list(environment()) %>%
keep(is.null)
Expand Down Expand Up @@ -252,7 +302,7 @@ metacore <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codeli
names(replaced) <- to_replace %>% map_chr(~unique(.$dataset))
list2env(replaced, environment())
}
MetaCore$new(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist)
MetaCore$new(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp)
}


Expand All @@ -279,7 +329,8 @@ select_dataset <- function(.data, dataset, simplify = FALSE) {
cl$var_spec,
cl$value_spec,
cl$derivations,
cl$codelist
cl$codelist,
cl$supp
) %>%
reduce(left_join)
)
Expand Down
9 changes: 6 additions & 3 deletions R/spec_builder.R
Original file line number Diff line number Diff line change
Expand Up @@ -314,24 +314,26 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]
"origin" = "[O|o]rigin",
"type" = "[T|t]ype",
"code_id" = "[C|c]odelist|Controlled Term",
"sig_dig" = "[S|s]ignificant",
"where" = "[W|w]here",
"derivation_id" = "[M|m]ethod"),
sheet = NULL,
where_sep_sheet = TRUE,
where_cols = c("id" = "ID",
"where" = c("Variable", "Comparator", "Value")),
var_sheet = "[V|v]ar"){
name_check <- names(cols) %in% c("variable", "origin", "code_id",
name_check <- names(cols) %in% c("variable", "origin", "code_id", "sig_dig",
"type", "dataset", "where", "derivation_id") %>%
all()

if(!name_check| is.null(names(cols))){
stop("Supplied column vector must be named using the following names:
'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'derivation_id'
'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'sig_dig', 'derivation_id'
If derivation_id is not avaliable it can be excluded and dataset.variable will be used.
If the where information is on a seperate sheet, put the column with cross ref as where.")
}

# Select a subset of sheets if specified
if(!is.null(sheet)){
sheet_ls <- str_subset(names(doc), sheet)
Expand Down Expand Up @@ -405,7 +407,8 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]
discard(~. %in% names(out))
out %>%
`is.na<-`(missing) %>%
distinct()
distinct() %>%
mutate(sig_dig = as.integer(.data$sig_dig))

}

Expand Down
63 changes: 58 additions & 5 deletions R/validators.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ derivation_check <- function(value_spec, derivations){
variables <- not_in_val %>%
pull(.data$variable) %>%
str_c(collapse = "\n ")
message <- paste("The following variables are missing derivations:\n",
message <- paste("The following variables have derivation ids not found in the derivations table:\n",
variables, "\n\n")
warning(message, call. = FALSE)
}
Expand Down Expand Up @@ -126,7 +126,7 @@ codelist_check <- function(value_spec, codelist){
variables <- not_in_val %>%
pull(.data$variable) %>%
str_c(collapse = "\n ")
message <- paste("The following variables are missing codelist(s):\n",
message <- paste("The following variables hace code ids not found in the codelist(s):\n",
variables, "\n")
warning(message, call. = FALSE)
}
Expand All @@ -142,6 +142,53 @@ codelist_check <- function(value_spec, codelist){
}
}


#' Check Supp
#'
#'
#' Check the supp table works with the ds_var tables. All variables in the
#' ds_var with a TRUE supp flag should be in the supp and all variables in supp
#' should be in ds_vars
#' @param ds_vars ds_vars table
#' @param supp supp table
#'
#' @return writes warning to console if there is an issue
#' @noRd
supp_check <- function(ds_vars, supp){
dist_test <- supp %>%
distinct(.data$dataset, .data$variable) %>%
nrow() == nrow(supp)
if(!dist_test){
warning("Supp table contains non-unique dataset/variable combinations")
}

ds_vars <- ds_vars %>%
filter(.data$supp_flag)

#Check the variables in ds_vars that don't have value specs
not_in_supp <- anti_join(ds_vars, supp, by = c("dataset", "variable"))
if(nrow(not_in_supp) != 0){
variables <- not_in_supp %>%
mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>%
pull(.data$full) %>%
str_c(collapse = ", ")
message <- paste("The following variables are in the ds_vars table and tagged as supplement, but don't have supp specs:\n",
variables, "\n\n")
warning(message, call. = FALSE)
}
# Check the variables in value spec that aren't in ds_vars
not_in_ds <- anti_join(supp, ds_vars, by = c("dataset", "variable"))
if(nrow(not_in_ds) != 0){
variables <- not_in_ds %>%
pull(.data$variable) %>%
str_c(collapse = ", ")
message <- paste("The following variables are have supp specifications, but aren't in the ds_vars table:\n",
variables, "\n\n")
warning(message, call. = FALSE)
}
}


#' Column Names by dataset
#'
#' @return list of column names by dataset
Expand All @@ -150,10 +197,10 @@ col_vars <- function(){
list(.ds_spec = c("dataset", "structure", "label"),
.ds_vars = c("dataset", "variable", "key_seq", "order","keep", "core", "supp_flag"),
.var_spec = c("variable", "length", "label", "type", "common", "format"),
.value_spec = c("type", "origin", "code_id", "dataset", "variable", "where", "derivation_id"),
.value_spec = c("dataset", "variable", "type", "origin","sig_dig", "code_id", "where", "derivation_id"),
.derivations = c("derivation_id", "derivation"),
.codelist= c("code_id", "name","type", "codes"),
.change_log = c("table_chg", "column_chg", "what_chg"))
.supp = c("dataset", "variable", "idvar", "qeval"))
}


Expand Down Expand Up @@ -223,6 +270,7 @@ all_message <- function() {
"var_spec", "format", is.character, TRUE,
"var_spec", "common", is.logical, TRUE,
"value_spec", "type", is.character, TRUE,
"value_spec", "sig_dig", is.integer, TRUE,
"value_spec", "origin", is.character, TRUE,
"value_spec", "code_id", is.character, TRUE,
"value_spec", "dataset", is.character, FALSE,
Expand All @@ -234,6 +282,10 @@ all_message <- function() {
"codelist", "name", is.character, TRUE,
"codelist", "codes", function(x){!is.null(x)}, TRUE,
"codelist", "type", is.character, TRUE,
"supp", "dataset", is.character, FALSE,
"supp", "variable", is.character, FALSE,
"supp", "idvar", is.character, TRUE,
"supp", "qeval", is.character, TRUE,
)
}

Expand All @@ -248,8 +300,9 @@ all_message <- function() {
#' @param value_spec value specification
#' @param derivations derivation information
#' @param codelist codelist information
#' @param supp supp information
#'
check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) {
check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp) {


messages <- purrr::pmap(all_message(),
Expand Down
2 changes: 2 additions & 0 deletions R/xml_builders.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ xml_to_value_spec <- function(doc) {
var_info <- tibble(
id = var_nodes %>% get_node_attr("OID"),
type = var_nodes %>% get_node_attr("DataType"),
sig_dig = var_nodes %>% get_node_attr("SignificantDigits") %>%
as.integer(),
origin = or_vec,
code_id = code_id_vec
) %>%
Expand Down
Loading

0 comments on commit 73d819b

Please sign in to comment.