Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #2526 data raw data round/2 #2539

Merged
merged 5 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed data-backup/admiral_adlb.rda
Binary file not shown.
Binary file removed data-backup/admiral_adsl.rda
Binary file not shown.
466 changes: 0 additions & 466 deletions data-raw/admiral_adlb.R

This file was deleted.

103 changes: 15 additions & 88 deletions data-raw/create_admiral_adlb.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,13 @@
# Create dataset: data/admiral_adlb.rda
# This script: create_admiral_adlb.R creates dataset data/admiral_adlb.rda.
#

# Preliminary
library(diffdf) # nolint
# Create dataset data/admiral_adlb.rda

# To clarify directories (can be removed)
# nolint start: object_name_linter
CACHE_DIR <- "~/.cache/R/admiral_templates_data/"
DATA_DIR <- "data-dir"
DATA_RAW <- "data-raw"
TEMPLATE_DIR <- "inst/templates/"
# Run template script to create adlb
source("inst/templates/ad_adlb.R", echo = TRUE) # nolint

# clean CACHE_DIR
THE_FILE <- paste0(CACHE_DIR, "/adlb.rda")
THE_FILE <- paste0(CACHE_DIR, "/adsl.rda")
if (file.exists(THE_FILE)) file.remove(THE_FILE)
# nolint end

#
# STEPS
#
# First, use template to create the R script (in data-raw/admiral_adlb.R).
# Next, source this script and create the data (~/.cache/R/admiral_template_data/admiral_adlb.rda)
# Finally, shorten this data (now ~ 1.2 MB) by selecting only certain USUBJID

# orignal method - OMIT
if (FALSE) {
# ### original mehtod (method 1)
# # First, create the R script (from a template)
adam_name <- "adlb"
save_path <- paste0("./data-raw/admiral_", adam_name, ".R")
use_ad_template(
adam_name = adam_name,
save_path = save_path,
open = FALSE,
overwrite = TRUE
)
# Second, source the script and save data in .cache
source("data-raw/admiral_adlb.R") # nolint
load("~/.cache/R/admiral_templates_data/adlb.rda")
}

#
# Instead, USE template, as recommened by Buzz
#
source(paste0(TEMPLATE_DIR, "/ad_adlb.R")) # nolint
load(paste0(CACHE_DIR, "adlb.rda"))

#
# limit rows, by selecting only these USUBJID
#
#' 01-701-1015, 01-701-1023, 01-701-1028, 01-701-1033,
#' 01-701-1034, 01-701-1047, 01-701-1097, 01-705-1186,
#' 01-705-1292, 01-705-1310, 01-708-1286

usubjid <-
# Limit rows by selecting only these USUBJIDs
usubjids <-
c(
"01-701-1015",
"01-701-1023",
Expand All @@ -70,42 +22,17 @@ usubjid <-
"01-708-1286"
)

# prepare for inner join
user <- tibble(
USUBJID = usubjid
)
result <- inner_join(adlb, user)
admiral_adlb <- result
admiral_adlb

#
# Finally, save reduced ds
#
use_data(admiral_adlb, overwrite = TRUE)

#
# TEST - is dataset identical to .... backup of unaltered dataset
#
e1 <- new.env()
e2 <- new.env()
load("data/admiral_adlb.rda", e1)

# CHANGE to YOUR location of original dataset
load("data-backup/admiral_adlb.rda", e2)

# compare field names
t <- tibble(e1 = names(e1$admiral_adlb), e2 = names(e2$admiral_adlb))
t |> print(n = 111)
admiral_adlb <- filter(adlb, USUBJID %in% usubjids)

identical(e1$admiral_adlb, e2$admiral_adlb)
diffdf(e1$admiral_adlb, e2$admiral_adlb)
# Get previous dataset for comparison
adlb_old <- admiral::admiral_adlb

## Capture diffdf to file
# Finally, save reduced dataset
usethis::use_data(admiral_adlb, overwrite = TRUE)

capture.output(
diffdf(
compare = e1$admiral_adlb, base = e2$admiral_adlb,
keys = c("STUDYID", "DOMAIN", "USUBJID", "AVAL", "VISIT")
),
file = "data-raw/diffdf_adlb_23SEPT"
# Compare with previous version
diffdf::diffdf(
base = adlb_old,
compare = admiral_adlb,
keys = c("USUBJID", "PARAMCD", "AVISIT", "ADT")
)
83 changes: 14 additions & 69 deletions data-raw/create_admiral_adsl.R
Original file line number Diff line number Diff line change
@@ -1,78 +1,23 @@
# Create dataset: data/admiral_adsl.rda
# This script: create_admiral_adsl.R creates dataset data/admiral_adsl.rda.
# This script, create_admiral_adsl.R, creates dataset data/admiral_adsl.rda.
#

# Preliminary
library(diffdf) # nolint
# Create dataset data/admiral_adsl.rda

# Run template script to create adsl
source("inst/templates/ad_adsl.R", echo = TRUE) # nolint

# nolint start: object_name_linter.
# To clarify directories (can be removed)
CACHE_DIR <- "~/.cache/R/admiral_templates_data/"
DATA_DIR <- "data-dir"
DATA_RAW <- "data-raw"
TEMPLATE_DIR <- "inst/templates/"
admiral_adsl <- adsl # use correct name

# clean CACHE_DIR
THE_FILE <- paste0(CACHE_DIR, "/adlb.rda")
THE_FILE <- paste0(CACHE_DIR, "/adsl.rda")
if (file.exists(THE_FILE)) file.remove(THE_FILE)
# nolint end

#
# STEPS
#
# First, use template to create the R script (in data-raw/admiral_adlb.R).
# Next, source this script and create the data (~/.cache/R/admiral_template_data/admiral_adlb.rda)
# Finally, shorten this data (now ~ 1.2 MB) by selecting only certain USUBJID

# OMIT -- orignal method - OMIT
if (FALSE) {
# ### original mehtod (method 1)
# # First, create the R script (from a template)
adam_name <- "adlb"
save_path <- paste0("./data-raw/admiral_", adam_name, ".R")
use_ad_template(
adam_name = adam_name,
save_path = save_path,
open = FALSE,
overwrite = TRUE
)
# Second, source the script and save data in .cache
source("data-raw/admiral_adlb.R") # nolint
load("~/.cache/R/admiral_templates_data/adlb.rda")
}

#
#
# Instead, USE template, as recommened by Buzz
#
source(paste0(TEMPLATE_DIR, "/ad_adsl.R")) # nolint
load(paste0(CACHE_DIR, "adsl.rda"))
admiral_adsl <- adsl

#
# Finally, save reduced ds
#
use_data(admiral_adsl, overwrite = TRUE)

#
# TEST - is dataset identical to .... backup of unaltered dataset
#
e1 <- new.env()
e2 <- new.env()
load("data/admiral_adsl.rda", e1)

# CHANGE to YOUR location of original dataset
load("data-backup/admiral_adsl.rda", e2)
# Get previous dataset for comparison
adsl_old <- admiral::admiral_adsl

# Finally, save reduced dataset
usethis::use_data(admiral_adsl, overwrite = TRUE)

identical(e1$admiral_adsl, e2$admiral_adsl)
diffdf(compare = e1$admiral_adsl, base = e2$admiral_adsl, keys = c("STUDYID", "USUBJID"))
capture.output(
diffdf(
compare = e1$admiral_adsl, base = e2$admiral_adsl,
keys = c("STUDYID", "USUBJID")
),
file = "data-raw/diffdf_23SEPT"
# Compare with previous version
diffdf::diffdf(
base = adsl_old,
compare = admiral_adsl,
keys = c("STUDYID", "USUBJID")
)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,20 @@ adae <- tibble::tribble(
"APPLICATION SITE ERYTHEMA", "Application site erythema", 1
)

# create queries.rda in data/
usethis::use_data(queries, overwrite = TRUE)

# try below:
derive_vars_query(adae, queries)


# example to use for ADMH:
queries_mh <- queries %>%
filter(SRCVAR %in% c("AELLT", "AEDECOD")) %>%
mutate(SRCVAR = ifelse(SRCVAR == "AELLT", "MHLLT", "MHDECOD"))

usethis::use_data(queries_mh, overwrite = TRUE)

source("inst/templates/ad_admh.R", echo = TRUE) # nolint

# Currently producing an error
derive_vars_query(admh, queries_mh)
Binary file modified data/admiral_adlb.rda
Binary file not shown.
Binary file modified data/queries.rda
Binary file not shown.
Binary file modified data/queries_mh.rda
Binary file not shown.
Loading