Skip to content

Commit

Permalink
Merge pull request #40 from pharmaverse/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
statasaurus authored Oct 6, 2022
2 parents 0c5c356 + fe84c95 commit f6cf28a
Show file tree
Hide file tree
Showing 24 changed files with 332 additions and 245 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
branches: [main, master, dev]
pull_request:
branches: [main, master]
branches: [main, master, dev]

name: R-CMD-check

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
branches: [main, master, dev]
pull_request:
branches: [main, master]
branches: [main, master, dev]

name: test-coverage

Expand Down
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: metatools
Type: Package
Title: Enable the Use of 'metacore' to Help Create and Check Dataset
Version: 0.1.2
Version: 0.1.3
Authors@R: c(
person(given = "Christina",
family = "Fillmore",
Expand All @@ -21,19 +21,19 @@ Description: Uses the metadata information stored in 'metacore' objects to check
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.0
RoxygenNote: 7.2.1
Imports:
dplyr,
metacore (>= 0.0.4),
purrr,
rlang,
stringr,
tidyr
Suggests:
tidyr,
tibble,
magrittr
Suggests:
testthat (>= 3.0.0),
haven,
magrittr,
covr,
safetyData,
admiral.test
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(add_labels)
export(add_variables)
export(build_from_derived)
Expand Down Expand Up @@ -27,6 +28,7 @@ importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,distinct)
importFrom(dplyr,everything)
Expand All @@ -41,6 +43,7 @@ importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(magrittr,"%>%")
importFrom(metacore,get_control_term)
importFrom(metacore,select_dataset)
importFrom(purrr,discard)
Expand Down Expand Up @@ -69,5 +72,7 @@ importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_to_lower)
importFrom(stringr,str_to_upper)
importFrom(tibble,tibble)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,unnest)
importFrom(utils,capture.output)
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# metatools 0.1.3
* correct bug with `build_from_derived()` that prevented multiple from being combined
* removed library calls from tests
* remove `floating_pt_correction` from `combine_supp()` as it is best to never change the type of the main dataset
* Add a message to `drop_unspec_vars` to explain which variables are dropped
* Correct bug in `order_cols`, so it will still work when not all columns are present

# metatools 0.1.2
* correct bug with `combine_supp()` when the data and the supp have different classes for the IDVARVAL
* add error to `combine_supp()` to report when not all the rows in the supp have merged
Expand Down
80 changes: 53 additions & 27 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,32 @@
#'
#' This function builds a dataset out of the columns that just need to be pulled
#' through. So any variable that has a derivation in the format of
#' 'dataset.variable' will be pulled through to create the new dataset. These
#' columns are often called 'Predecessors' in ADaM, but this is not universal so
#' that is optional to specify.
#' 'dataset.variable' will be pulled through to create the new dataset. When
#' there are multiple datasets present, they will be joined by the shared
#' `key_seq` variables. These columns are often called 'Predecessors' in ADaM,
#' but this is not universal so that is optional to specify.
#' @param metacore metacore object that contains the specifications for the
#' dataset of interest.
#' @param ds_list Named list of datasets that are needed to build the from
#' @param dataset_name Optional string to specify the dataset. This is only
#' needed if the metacore object provided hasn't already been subsetted.
#' @param dataset_name Optional string to specify the dataset that is being
#' built. This is only needed if the metacore object provided hasn't already
#' been subsetted.
#' @param predecessor_only By default `FALSE`, but if `TRUE` will only use
#' derivations with the origin of 'Predecessor'
#' @param keep Boolean to determine if the original columns should be kept. By
#' default `FALSE`, so only the ADaM columns are kept. If `TRUE` the resulting
#' dataset will have all the ADaM columns as well as any SDTM column that were
#' renamed in the ADaM (i.e `ARM` and `TRT01P` will be in the resulting dataset)
#' renamed in the ADaM (i.e `ARM` and `TRT01P` will be in the resulting
#' dataset)
#'
#' @return dataset
#' @export
#' @importFrom stringr str_to_lower str_detect str_extract str_to_upper
#' @importFrom dplyr filter pull mutate group_by group_split inner_join select
#' full_join
#' full_join bind_rows
#' @importFrom tidyr unnest
#' @importFrom purrr map reduce
#' @importFrom tibble tibble
#'
#' @examples
#' library(metacore)
Expand All @@ -35,26 +40,29 @@
build_from_derived <- function(metacore, ds_list, dataset_name = NULL,
predecessor_only = TRUE, keep = FALSE) {
metacore <- make_lone_dataset(metacore, dataset_name)
derirvations <- metacore$derivations
derirvations <- metacore$derivations %>%
mutate(derivation = trimws(derivation))

if (predecessor_only) {
limited_dev_ids <- metacore$value_spec %>%
filter(str_detect(.data$origin, "[P|p]redecessor")) %>%
pull(.data$derivation_id)
filter(str_detect(str_to_lower(origin), "predecessor")) %>%
pull(derivation_id)

derirvations <- derirvations %>%
filter(.data$derivation_id %in% limited_dev_ids)
filter(derivation_id %in% limited_dev_ids)
if (nrow(derirvations) == 0) {
stop("No presecessor variables found please check your metacore object")
}
}

vars_to_pull_through <- derirvations %>%
filter(str_detect(.data$derivation, "^\\w*\\.[a-zA-Z0-9]*$"))
filter(str_detect(derivation, "^\\w*\\.[a-zA-Z0-9]*$"))
# To lower so it is flexible about how people name their ds list
vars_w_ds <- vars_to_pull_through %>%
mutate(ds = str_extract(.data$derivation, "^\\w*(?=\\.)") %>%
mutate(ds = str_extract(derivation, "^\\w*(?=\\.)") %>%
str_to_lower())
ds_names <- vars_w_ds %>%
pull(.data$ds) %>%
pull(ds) %>%
unique()
names(ds_list) <- names(ds_list) %>%
str_to_lower()
Expand All @@ -64,14 +72,30 @@ build_from_derived <- function(metacore, ds_list, dataset_name = NULL,
paste0(str_to_upper(ds_names), collapse = "\n")
))
}
join_by <- metacore$ds_vars %>%
filter(!is.na(.data$key_seq)) %>%
pull(.data$variable)

ds_keys <- metacore$ds_vars %>%
filter(!is.na(key_seq)) %>%
pull(variable)

joining_vals_to_add <- ds_list %>%
map(function(x){
names(x) %>%
keep(~ . %in% ds_keys)
})

join_by = joining_vals_to_add %>%
reduce(intersect)
additional_vals <- tibble(ds = names(ds_list),
variable = joining_vals_to_add) %>%
unnest(variable) %>%
mutate(col_name = variable)

vars_w_ds %>%
mutate(col_name = str_extract(.data$derivation, "(?<=\\.).*")) %>%
mutate(col_name = str_extract(derivation, "(?<=\\.).*")) %>%
inner_join(metacore$value_spec, ., by = "derivation_id") %>%
select(.data$variable, .data$ds, .data$col_name) %>%
group_by(.data$ds) %>%
select(variable, ds, col_name) %>%
bind_rows(additional_vals) %>%
group_by(ds) %>%
group_split() %>%
map(get_variables, ds_list, keep) %>%
reduce(full_join, by = join_by)
Expand Down Expand Up @@ -134,13 +158,15 @@ get_variables <- function(x, ds_list, keep) {
drop_unspec_vars <- function(dataset, metacore, dataset_name = NULL) {
metacore <- make_lone_dataset(metacore, dataset_name)
var_list <- metacore$ds_vars %>%
filter(is.na(.data$supp_flag) | !(.data$supp_flag)) %>%
pull(.data$variable)
filter(is.na(supp_flag) | !(supp_flag)) %>%
pull(variable)
to_drop <- names(dataset) %>%
discard(~ . %in% var_list)
if (length(to_drop) > 0) {
out <- dataset %>%
select(-all_of(to_drop))
message(paste0("The following variable(s) were dropped:\n ",
paste0(to_drop, collapse = "\n ")))
} else {
out <- dataset
}
Expand Down Expand Up @@ -182,19 +208,19 @@ drop_unspec_vars <- function(dataset, metacore, dataset_name = NULL) {
add_variables <- function(dataset, metacore, dataset_name = NULL){
metacore <- make_lone_dataset(metacore, dataset_name)
var_list <- metacore$ds_vars %>%
filter(is.na(.data$supp_flag) | !(.data$supp_flag)) %>%
pull(.data$variable)
filter(is.na(supp_flag) | !(supp_flag)) %>%
pull(variable)

to_add <- var_list %>%
discard(~ . %in% names(dataset))
if(length(to_add) > 0){
n <- nrow(dataset)
typing <- metacore$var_spec %>%
filter(.data$variable %in% to_add) %>%
mutate(type_fmt = str_to_lower(.data$type),
filter(variable %in% to_add) %>%
mutate(type_fmt = str_to_lower(type),
out_type =
case_when(
str_detect(str_to_lower(.data$format), "date") ~ "date",
str_detect(str_to_lower(format), "date") ~ "date",
type_fmt == "integer" ~ "integer",
type_fmt == "numeric" ~ "double",
type_fmt == "text" ~ "character",
Expand Down
20 changes: 10 additions & 10 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ check_ct_col <- function(data, metacore, var, na_acceptable = NULL) {
if (is.vector(ct)) {
check <- ct
} else if ("code" %in% names(ct)) {
check <- ct %>% pull(.data$code)
check <- ct %>% pull(code)
} else {
stop("We currently don't have the ability to check against external libraries")
}
core <- metacore$ds_vars %>%
filter(.data$variable == col_name_str) %>%
filter(variable == col_name_str) %>%
pull(core)
attr(core, "label") <- NULL
test <- ifelse(is.null(na_acceptable), !identical(core, "Required"), na_acceptable)
Expand Down Expand Up @@ -99,18 +99,18 @@ check_ct_col <- function(data, metacore, var, na_acceptable = NULL) {
#' check_ct_data(data, spec)
check_ct_data <- function(data, metacore, na_acceptable = NULL) {
codes_in_data <- metacore$value_spec %>%
filter(.data$variable %in% names(data), !is.na(.data$code_id)) %>%
pull(.data$code_id) %>%
filter(variable %in% names(data), !is.na(code_id)) %>%
pull(code_id) %>%
unique()
# Remove any codes that have external libraries
codes_to_check <- metacore$codelist %>%
filter(.data$type != "external_library", .data$code_id %in% codes_in_data) %>%
select(.data$code_id)
filter(type != "external_library", code_id %in% codes_in_data) %>%
select(code_id)
# convert list of codes to variables
cols_to_check <- metacore$value_spec %>%
inner_join(codes_to_check, by = "code_id") %>%
filter(.data$variable %in% names(data)) %>%
pull(.data$variable) %>%
filter(variable %in% names(data)) %>%
pull(variable) %>%
unique()
# send all variables through check_ct_col
safe_chk <- safely(check_ct_col)
Expand Down Expand Up @@ -174,8 +174,8 @@ check_ct_data <- function(data, metacore, na_acceptable = NULL) {
check_variables <- function(data, metacore, dataset_name = NULL) {
metacore <- make_lone_dataset(metacore, dataset_name)
var_list <- metacore$ds_vars %>%
filter(is.na(.data$supp_flag) | !(.data$supp_flag)) %>%
pull(.data$variable)
filter(is.na(supp_flag) | !(supp_flag)) %>%
pull(variable)
missing <- var_list %>%
discard(~ . %in% names(data))
extra <- names(data) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/codelists.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ create_cat_var <- function(data, metacore, ref_var, grp_var,
stop("Expecting 'code_decode' type of control terminology. Please check metacore object")
}
grp_defs <- ct %>%
pull(.data$code)
pull(code)

out <- data %>%
mutate({{ grp_var }} := create_subgrps({{ ref_var }}, grp_defs))
Expand Down
2 changes: 1 addition & 1 deletion R/labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ set_variable_labels <- function(data, metacore, dataset_name = NULL) {

# Grab out the var names and labels
var_spec <- metacore$var_spec %>%
select(.data$variable, .data$label)
select(variable, label)


ns <- var_spec$variable
Expand Down
31 changes: 17 additions & 14 deletions R/sort.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@
#' data <- read_xpt(metatools_example("adsl.xpt"))
#' order_cols(data, spec)
order_cols <- function(data, metacore, dataset_name = NULL) {
metacore <- make_lone_dataset(metacore, dataset_name)
var_ord <- metacore$ds_vars %>%
filter(!is.na(.data$order)) %>%
arrange(.data$order) %>%
pull(.data$variable)
data %>%
select(all_of(var_ord), everything())
metacore <- make_lone_dataset(metacore, dataset_name)

var_ord <- metacore$ds_vars %>%
filter(!is.na(order)) %>%
arrange(order) %>%
pull(variable) %>%
keep(~. %in% names(data))

data %>%
select(all_of(var_ord), everything())
}


Expand Down Expand Up @@ -57,12 +60,12 @@ order_cols <- function(data, metacore, dataset_name = NULL) {
#' data <- read_xpt(metatools_example("adsl.xpt"))
#' sort_by_key(data, spec)
sort_by_key <- function(data, metacore, dataset_name = NULL) {
metacore <- make_lone_dataset(metacore, dataset_name)
var_ord <- metacore$ds_vars %>%
filter(!is.na(.data$key_seq)) %>%
arrange(.data$key_seq) %>%
pull(.data$variable)
metacore <- make_lone_dataset(metacore, dataset_name)
var_ord <- metacore$ds_vars %>%
filter(!is.na(key_seq)) %>%
arrange(key_seq) %>%
pull(variable)

data %>%
arrange(across(var_ord))
data %>%
arrange(across(var_ord))
}
Loading

0 comments on commit f6cf28a

Please sign in to comment.