Skip to content

Commit

Permalink
Merge pull request #100 from atorus-research/fix-simplification
Browse files Browse the repository at this point in the history
Fix issues simplification
  • Loading branch information
statasaurus authored Apr 11, 2024
2 parents 94a2519 + e21eb69 commit da47c6a
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 40 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
config:
- {os: windows-latest, r: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: '3.5', repos: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: '3.6', repos: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}

Expand Down
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: metacore
Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows
Version: 0.1.2
Version: 0.1.3
Authors@R:
c(person(given = "Christina",
family = "Fillmore",
Expand All @@ -27,7 +27,9 @@ Description: Create an immutable container holding metadata for the purpose of b
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE, r6 = FALSE)
RoxygenNote: 7.2.1
RoxygenNote: 7.3.1
Depends:
R (>= 3.6)
Suggests:
testthat,
knitr,
Expand Down
15 changes: 9 additions & 6 deletions R/metacore.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,17 @@ MetaCore_filter <- function(value) {
multiple = "all") %>%
distinct(variable, .keep_all = TRUE) # for when duplicates gett through and have different lables but the same name

# Get values/variables that need derivations
val_deriv <- private$.value_spec %>%
distinct(.data$derivation_id) %>%
na.omit()

private$.derivations <- private$.derivations %>%
right_join(private$.value_spec %>%
select(derivation_id) %>%
na.omit(), by = "derivation_id", multiple = "all")
right_join(val_deriv, by = "derivation_id", multiple = "all")

private$.codelist <- private$.codelist %>%
right_join(private$.value_spec %>%
select(code_id) %>%
distinct(.data$code_id) %>%
na.omit(), by = "code_id", multiple = "all")

private$.supp <- private$.supp %>% filter(dataset == value)
Expand Down Expand Up @@ -330,13 +333,13 @@ select_dataset <- function(.data, dataset, simplify = FALSE) {

if (simplify) {

suppressMessages(
test <- suppressMessages(
list(
cl$ds_vars,
cl$var_spec,
cl$value_spec,
cl$derivations,
cl$codelist,
select(cl$codelist, code_id, codes),
cl$supp
) %>%
reduce(left_join)
Expand Down
39 changes: 28 additions & 11 deletions R/spec_builder.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#'
#' This function takes the location of an excel specification document and reads
#' it in as a meta core object. At the moment it only supports specification in
#' the format of pinnacle 21 specifications. But, the @family spec builder can
#' be used as building blocks for bespoke specification documents
#' the format of pinnacle 21 specifications. But, the section level spec builder can
#' be used as building blocks for bespoke specification documents.
#'
#' @param path string of file location
#' @param quiet Option to quietly load in, this will suppress warnings, but not
Expand Down Expand Up @@ -96,7 +96,7 @@ read_all_sheets <- function(path){
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
spec_type_to_ds_spec <- function(doc, cols = c("dataset" = "[N|n]ame|[D|d]ataset|[D|d]omain",
"structure" = "[S|s]tructure",
"label" = "[L|l]abel|[D|d]escription"), sheet = NULL){
Expand Down Expand Up @@ -140,7 +140,7 @@ spec_type_to_ds_spec <- function(doc, cols = c("dataset" = "[N|n]ame|[D|d]ataset
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
spec_type_to_ds_vars <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain",
"variable" = "[V|v]ariable [[N|n]ame]?|[V|v]ariables?",
"order" = "[V|v]ariable [O|o]rder|[O|o]rder",
Expand Down Expand Up @@ -214,7 +214,7 @@ spec_type_to_ds_vars <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]oma
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
spec_type_to_var_spec <- function(doc, cols = c("variable" = "[N|n]ame|[V|v]ariables?",
"length" = "[L|l]ength",
"label" = "[L|l]abel",
Expand Down Expand Up @@ -314,7 +314,7 @@ spec_type_to_var_spec <- function(doc, cols = c("variable" = "[N|n]ame|[V|v]aria
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain",
"variable" = "[N|n]ame|[V|v]ariables?",
"origin" = "[O|o]rigin",
Expand Down Expand Up @@ -408,7 +408,10 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]

if(!"derivation_id" %in% names(cols)){
out <- out %>%
mutate(derivation_id = paste0(dataset, ".", variable))
mutate(derivation_id =
if_else(str_to_lower(.data$origin) == "assigned",
paste0(dataset, ".", variable),
paste0("pred.", dataset, ".", variable)))
}

# Get missing columns
Expand All @@ -421,7 +424,7 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]
mutate(sig_dig = as.integer(.data$sig_dig),
derivation_id = case_when(
!is.na(.data$derivation_id) ~ .data$derivation_id,
str_to_lower(.data$origin) == "predecessor" ~ as.character(.data$predecessor),
str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)),
str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable))
) %>%
select(-.data$predecessor)
Expand Down Expand Up @@ -453,7 +456,7 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
spec_type_to_codelist <- function(doc, codelist_cols = c("code_id" = "ID",
"name" = "[N|n]ame",
"code" = "^[C|c]ode|^[T|t]erm",
Expand Down Expand Up @@ -558,7 +561,7 @@ spec_type_to_codelist <- function(doc, codelist_cols = c("code_id" = "ID",
#' @return a dataset formatted for the metacore object
#' @export
#'
#' @family spec builder
#' @family {spec builder}
#' @importFrom purrr quietly
spec_type_to_derivations <- function(doc, cols = c("derivation_id" = "ID",
"derivation" = "[D|d]efinition|[D|d]escription"),
Expand Down Expand Up @@ -587,11 +590,25 @@ spec_type_to_derivations <- function(doc, cols = c("derivation_id" = "ID",
if(class(ls_derivations)[1] == "list"){
ls_derivations <- ls_derivations %>%
reduce(bind_rows)
# Get the comments
if(any(str_detect(names(doc), "[C|c]omment"))){
comments <- doc[str_detect(names(doc), "[C|c]omment")][[1]] |>
select(matches("ID|Description"))
with_comments <- ls_derivations |>
filter(str_to_lower(.data$origin) == "assigned") |>
left_join(comments, by = c("comment" = "ID" )) |>
mutate(comment = .data$Description) |>
select(-.data$Description)
ls_derivations <- ls_derivations |>
filter(str_to_lower(.data$origin) != "assigned") |>
bind_rows(with_comments)
}
}

other_derivations <- ls_derivations %>%
mutate(
derivation_id = case_when(
str_to_lower(.data$origin) == "predecessor" ~ as.character(.data$predecessor),
str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)),
str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable),
TRUE ~ NA_character_
),
Expand Down
8 changes: 6 additions & 2 deletions R/xml_builders.R
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,12 @@ xml_to_codelist <- function(doc) {
version = xml_attr(node, "Version"),
type = "external_library"
)
}) %>%
nest(codes = c(.data$dictionary, .data$version))
})
if(nrow(external_libs) > 0){
external_libs <- external_libs |>
nest(codes = c(.data$dictionary, .data$version))
}


# Combinging the code decode with the permitted values
bind_rows(code_decode, permitted_val, external_libs) %>%
Expand Down
2 changes: 2 additions & 0 deletions man/get_control_term.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_to_metacore.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_codelist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_derivations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_ds_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_ds_vars.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_value_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/spec_type_to_var_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions tests/testthat/test-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,18 @@ test_that("Test var_spec readers", {
spec_var_spec <- spec_type_to_var_spec(spec) %>%
arrange(variable) %>%
select(variable, type, length, label, format)
# remove common as it is derived when reading in specs but left alone from defines

spec2 <- spec
spec2$Variables |>
select(-Dataset)
no_ds <- spec_type_to_var_spec(spec2) |>
arrange(variable) %>%
select(variable, type, length, label, format)
expect_equal(no_ds, spec_var_spec)

# Tests
expect_equal(def_var_spec, ref_var_spec)
# remove common as it is derived when reading in specs but left alone from defines
expect_equal(spec_var_spec,
ref_var_spec %>%
select(-common))
Expand Down Expand Up @@ -397,7 +405,6 @@ test_that("values_spec reader tests", {
map(~paste0(.[1], " '", .[2])),
where = if_else(where == "NA 'NA", NA_character_, paste0(where, "'")))


# Tests
expect_equal(def_value_spec, ref_value_spec)
expect_equal(spec_value_spec, ref_value_spec)
Expand Down Expand Up @@ -466,10 +473,12 @@ test_that("derivation reader tests", {
select(derivation_id = ID,
derivation = Description) %>%
mutate(derivation_id = paste0("MT.", derivation_id))

ref_deriv <- spec$Variables %>%
filter(Origin %in% c("Assigned")) %>%
left_join(select(spec$Comments, ID, Description), by = c("Comment" = "ID")) %>%
mutate(derivation_id = paste0("MT.", Dataset, ".", Variable),
derivation = Comment) %>%
derivation = Description) %>%
select(starts_with("derivation")) %>%
bind_rows(ref_deriv, .) %>%
arrange(derivation_id) %>%
Expand Down

0 comments on commit da47c6a

Please sign in to comment.