Skip to content

Commit

Permalink
Closes #2526 data raw data round/2 (#2539)
Browse files Browse the repository at this point in the history
* Corrected code for data-raw/create_admiral_*.R functions.    But throws errors.

* Delete data-backup directory

* chore: #2526 remove old admiral adlb script; push up newly run data

* chore: #2526 rename file to be explicit in what it is doing; reran queries code and some fun things were found!! /s

---------

Co-authored-by: Ben Straub <[email protected]>
  • Loading branch information
jimrothstein and bms63 authored Oct 25, 2024
1 parent ed25276 commit b44fd3d
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 624 deletions.
Binary file removed data-backup/admiral_adlb.rda
Binary file not shown.
Binary file removed data-backup/admiral_adsl.rda
Binary file not shown.
466 changes: 0 additions & 466 deletions data-raw/admiral_adlb.R

This file was deleted.

103 changes: 15 additions & 88 deletions data-raw/create_admiral_adlb.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,13 @@
# Create dataset: data/admiral_adlb.rda
# This script: create_admiral_adlb.R creates dataset data/admiral_adlb.rda.
#

# Preliminary
library(diffdf) # nolint
# Create dataset data/admiral_adlb.rda

# To clarify directories (can be removed)
# nolint start: object_name_linter
CACHE_DIR <- "~/.cache/R/admiral_templates_data/"
DATA_DIR <- "data-dir"
DATA_RAW <- "data-raw"
TEMPLATE_DIR <- "inst/templates/"
# Run template script to create adlb
source("inst/templates/ad_adlb.R", echo = TRUE) # nolint

# clean CACHE_DIR
THE_FILE <- paste0(CACHE_DIR, "/adlb.rda")
THE_FILE <- paste0(CACHE_DIR, "/adsl.rda")
if (file.exists(THE_FILE)) file.remove(THE_FILE)
# nolint end

#
# STEPS
#
# First, use template to create the R script (in data-raw/admiral_adlb.R).
# Next, source this script and create the data (~/.cache/R/admiral_template_data/admiral_adlb.rda)
# Finally, shorten this data (now ~ 1.2 MB) by selecting only certain USUBJID

# orignal method - OMIT
if (FALSE) {
# ### original mehtod (method 1)
# # First, create the R script (from a template)
adam_name <- "adlb"
save_path <- paste0("./data-raw/admiral_", adam_name, ".R")
use_ad_template(
adam_name = adam_name,
save_path = save_path,
open = FALSE,
overwrite = TRUE
)
# Second, source the script and save data in .cache
source("data-raw/admiral_adlb.R") # nolint
load("~/.cache/R/admiral_templates_data/adlb.rda")
}

#
# Instead, USE template, as recommened by Buzz
#
source(paste0(TEMPLATE_DIR, "/ad_adlb.R")) # nolint
load(paste0(CACHE_DIR, "adlb.rda"))

#
# limit rows, by selecting only these USUBJID
#
#' 01-701-1015, 01-701-1023, 01-701-1028, 01-701-1033,
#' 01-701-1034, 01-701-1047, 01-701-1097, 01-705-1186,
#' 01-705-1292, 01-705-1310, 01-708-1286

usubjid <-
# Limit rows by selecting only these USUBJIDs
usubjids <-
c(
"01-701-1015",
"01-701-1023",
Expand All @@ -70,42 +22,17 @@ usubjid <-
"01-708-1286"
)

# prepare for inner join
user <- tibble(
USUBJID = usubjid
)
result <- inner_join(adlb, user)
admiral_adlb <- result
admiral_adlb

#
# Finally, save reduced ds
#
use_data(admiral_adlb, overwrite = TRUE)

#
# TEST - is dataset identical to .... backup of unaltered dataset
#
e1 <- new.env()
e2 <- new.env()
load("data/admiral_adlb.rda", e1)

# CHANGE to YOUR location of original dataset
load("data-backup/admiral_adlb.rda", e2)

# compare field names
t <- tibble(e1 = names(e1$admiral_adlb), e2 = names(e2$admiral_adlb))
t |> print(n = 111)
admiral_adlb <- filter(adlb, USUBJID %in% usubjids)

identical(e1$admiral_adlb, e2$admiral_adlb)
diffdf(e1$admiral_adlb, e2$admiral_adlb)
# Get previous dataset for comparison
adlb_old <- admiral::admiral_adlb

## Capture diffdf to file
# Finally, save reduced dataset
usethis::use_data(admiral_adlb, overwrite = TRUE)

capture.output(
diffdf(
compare = e1$admiral_adlb, base = e2$admiral_adlb,
keys = c("STUDYID", "DOMAIN", "USUBJID", "AVAL", "VISIT")
),
file = "data-raw/diffdf_adlb_23SEPT"
# Compare with previous version
diffdf::diffdf(
base = adlb_old,
compare = admiral_adlb,
keys = c("USUBJID", "PARAMCD", "AVISIT", "ADT")
)
83 changes: 14 additions & 69 deletions data-raw/create_admiral_adsl.R
Original file line number Diff line number Diff line change
@@ -1,78 +1,23 @@
# Create dataset: data/admiral_adsl.rda
# This script: create_admiral_adsl.R creates dataset data/admiral_adsl.rda.
# This script, create_admiral_adsl.R, creates dataset data/admiral_adsl.rda.
#

# Preliminary
library(diffdf) # nolint
# Create dataset data/admiral_adsl.rda

# Run template script to create adsl
source("inst/templates/ad_adsl.R", echo = TRUE) # nolint

# nolint start: object_name_linter.
# To clarify directories (can be removed)
CACHE_DIR <- "~/.cache/R/admiral_templates_data/"
DATA_DIR <- "data-dir"
DATA_RAW <- "data-raw"
TEMPLATE_DIR <- "inst/templates/"
admiral_adsl <- adsl # use correct name

# clean CACHE_DIR
THE_FILE <- paste0(CACHE_DIR, "/adlb.rda")
THE_FILE <- paste0(CACHE_DIR, "/adsl.rda")
if (file.exists(THE_FILE)) file.remove(THE_FILE)
# nolint end

#
# STEPS
#
# First, use template to create the R script (in data-raw/admiral_adlb.R).
# Next, source this script and create the data (~/.cache/R/admiral_template_data/admiral_adlb.rda)
# Finally, shorten this data (now ~ 1.2 MB) by selecting only certain USUBJID

# OMIT -- orignal method - OMIT
if (FALSE) {
# ### original mehtod (method 1)
# # First, create the R script (from a template)
adam_name <- "adlb"
save_path <- paste0("./data-raw/admiral_", adam_name, ".R")
use_ad_template(
adam_name = adam_name,
save_path = save_path,
open = FALSE,
overwrite = TRUE
)
# Second, source the script and save data in .cache
source("data-raw/admiral_adlb.R") # nolint
load("~/.cache/R/admiral_templates_data/adlb.rda")
}

#
#
# Instead, USE template, as recommened by Buzz
#
source(paste0(TEMPLATE_DIR, "/ad_adsl.R")) # nolint
load(paste0(CACHE_DIR, "adsl.rda"))
admiral_adsl <- adsl

#
# Finally, save reduced ds
#
use_data(admiral_adsl, overwrite = TRUE)

#
# TEST - is dataset identical to .... backup of unaltered dataset
#
e1 <- new.env()
e2 <- new.env()
load("data/admiral_adsl.rda", e1)

# CHANGE to YOUR location of original dataset
load("data-backup/admiral_adsl.rda", e2)
# Get previous dataset for comparison
adsl_old <- admiral::admiral_adsl

# Finally, save reduced dataset
usethis::use_data(admiral_adsl, overwrite = TRUE)

identical(e1$admiral_adsl, e2$admiral_adsl)
diffdf(compare = e1$admiral_adsl, base = e2$admiral_adsl, keys = c("STUDYID", "USUBJID"))
capture.output(
diffdf(
compare = e1$admiral_adsl, base = e2$admiral_adsl,
keys = c("STUDYID", "USUBJID")
),
file = "data-raw/diffdf_23SEPT"
# Compare with previous version
diffdf::diffdf(
base = adsl_old,
compare = admiral_adsl,
keys = c("STUDYID", "USUBJID")
)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,20 @@ adae <- tibble::tribble(
"APPLICATION SITE ERYTHEMA", "Application site erythema", 1
)

# create queries.rda in data/
usethis::use_data(queries, overwrite = TRUE)

# try below:
derive_vars_query(adae, queries)


# example to use for ADMH:
queries_mh <- queries %>%
filter(SRCVAR %in% c("AELLT", "AEDECOD")) %>%
mutate(SRCVAR = ifelse(SRCVAR == "AELLT", "MHLLT", "MHDECOD"))

usethis::use_data(queries_mh, overwrite = TRUE)

source("inst/templates/ad_admh.R", echo = TRUE) # nolint

# Currently producing an error
derive_vars_query(admh, queries_mh)
Binary file modified data/admiral_adlb.rda
Binary file not shown.
Binary file modified data/queries.rda
Binary file not shown.
Binary file modified data/queries_mh.rda
Binary file not shown.

0 comments on commit b44fd3d

Please sign in to comment.