Closes #2584 country_code_lookup moved to data (#2614)

* #2584 country_code_lookup moved to data * #2584 Doc: update R/data.R Co-authored-by: Stefan Bundfuss <[email protected]> * #2584 Doc: update R/data.R Co-authored-by: Stefan Bundfuss <[email protected]> * #2584 also move dose_freq_lookup to data --------- Co-authored-by: Stefan Bundfuss <[email protected]> Co-authored-by: Ben Straub <[email protected]>
pharmaverse · Dec 23, 2024 · 068b108 · 068b108
1 parent ebced9c
commit 068b108
Show file tree

Hide file tree

Showing 11 changed files with 241 additions and 241 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -49,7 +49,6 @@ export(convert_dtc_to_dt)
 export(convert_dtc_to_dtm)
 export(convert_na_to_blanks)
 export(count_vals)
-export(country_code_lookup)
 export(create_period_dataset)
 export(create_query_data)
 export(create_single_dose_dataset)
@@ -117,7 +116,6 @@ export(derive_vars_period)
 export(derive_vars_query)
 export(derive_vars_transposed)
 export(desc)
-export(dose_freq_lookup)
 export(dthcaus_source)
 export(event)
 export(event_joined)

diff --git a/NEWS.md b/NEWS.md
@@ -90,7 +90,7 @@ memory consumption. (#2590)
 <summary>Developer Notes</summary>
 
 - Created unit tests for developer internal function `restricted_imputed_dtc_dt()` (#2495)
-- Adopted `data-raw/data` R Package Convention (#2427)
+- Adopted `data-raw/data` R Package Convention (#2427, #2584)
 - `compute_bsa()` now uses the more common (but equivalent) version of the DuBois-DuBois formula for BSA. The results have not changed. (#2532)  
 - Removed `.devcontainer` file (codespace) (#2524)
 

diff --git a/R/create_single_dose_dataset.R b/R/create_single_dose_dataset.R
@@ -1,184 +1,3 @@
-#' Pre-Defined Dose Frequencies
-#'
-#' @description
-#' These pre-defined dose frequencies are sourced from
-#' [CDISC](https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM%20Terminology.pdf). The
-#' number of rows to generate using `create_single_dose_dataset()` arguments
-#' `start_date` and `end_date` is derived from `DOSE_COUNT`, `DOSE_WINDOW`, and
-#' `CONVERSION_FACTOR` with appropriate functions from `lubridate`.
-#'
-#' @details
-#' `NCI_CODE` and `CDISC_VALUE` are included from the CDISC source for
-#' traceability.
-#'
-#' `DOSE_COUNT` represents the number of doses received in one single unit of
-#' `DOSE_WINDOW`. For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`,
-#' `DOSE_WINDOW=="MONTH"` and `DOSE_COUNT==10`. Similarly, for
-#' `CDISC_VALUE=="EVERY 2 WEEKS"`, `DOSE_WINDOW=="WEEK"` and
-#' `DOSE_COUNT==0.5` (to yield one dose every two weeks).
-#'
-#' `CONVERSION_FACTOR` is used to convert `DOSE_WINDOW` units `"WEEK"`,
-#'  `"MONTH"`, and `"YEAR"` to the unit `"DAY"`.
-#'
-#' For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`, `CONVERSION_FACTOR`
-#' is `0.0329`. One day of a month is assumed to be `1 / 30.4375` of a month (one
-#' day is assumed to be `1/365.25` of a year).
-#' Given only `start_date` and `end_date` in the aggregate dataset, `CONVERSION_FACTOR`
-#' is used to calculate specific dates for`start_date` and `end_date` in the
-#' resulting single dose dataset for the doses that occur. In such cases, doses
-#' are assumed to occur at evenly spaced increments over the interval.
-#'
-#'
-#' To see the entire table in the console, run `print(dose_freq_lookup)`.
-#'
-#' @seealso [create_single_dose_dataset()]
-#'
-#' @export
-#'
-#' @keywords metadata
-#' @family metadata
-#'
-#' @rdname dose_freq_lookup
-
-dose_freq_lookup <- tribble(
-  ~NCI_CODE, ~CDISC_VALUE,
-  "C64526", "1 TIME PER WEEK",
-  "C139179", "10 DAYS PER MONTH",
-  "C64497", "2 TIMES PER WEEK",
-  "C98861", "2 TIMES PER YEAR",
-  "C98859", "3 TIMES PER MONTH",
-  "C64528", "3 TIMES PER WEEK",
-  "C98860", "3 TIMES PER YEAR",
-  "C98852", "4 TIMES PER MONTH",
-  "C64531", "4 TIMES PER WEEK",
-  "C98853", "4 TIMES PER YEAR",
-  "C98849", "5 TIMES PER DAY",
-  "C98850", "5 TIMES PER MONTH",
-  "C85552", "5 TIMES PER WEEK",
-  "C98851", "5 TIMES PER YEAR",
-  "C98855", "6 TIMES PER DAY",
-  "C98856", "6 TIMES PER MONTH",
-  "C98857", "6 TIMES PER WEEK",
-  "C98858", "6 TIMES PER YEAR",
-  "C139180", "7 TIMES PER DAY",
-  "C98854", "7 TIMES PER WEEK",
-  "C139181", "8 TIMES PER DAY",
-  "C139182", "9 TIMES PER DAY",
-  "C64496", "BID",
-  "C71129", "BIM",
-  "C161332", "EVERY 12 WEEKS",
-  "C161336", "EVERY 16 WEEKS",
-  "C71127", "EVERY 2 WEEKS",
-  "C64535", "EVERY 3 WEEKS",
-  "C161333", "EVERY 3 YEARS",
-  "C64529", "EVERY 4 WEEKS",
-  "C103390", "EVERY 5 WEEKS",
-  "C161334", "EVERY 5 YEARS",
-  "C89788", "EVERY 6 WEEKS",
-  "C116149", "EVERY 7 WEEKS",
-  "C103389", "EVERY 8 WEEKS",
-  "C154484", "EVERY AFTERNOON",
-  "C160957", "EVERY EVENING",
-  "C67069", "EVERY WEEK",
-  "C74924", "PA",
-  "C64500", "Q10H",
-  "C64501", "Q11H",
-  "C64502", "Q12H",
-  "C64503", "Q13H",
-  "C64504", "Q14H",
-  "C64505", "Q15H",
-  "C64506", "Q16H",
-  "C64507", "Q17H",
-  "C64508", "Q18H",
-  "C64509", "Q19H",
-  "C64511", "Q20H",
-  "C64512", "Q21H",
-  "C64513", "Q22H",
-  "C64514", "Q23H",
-  "C64515", "Q24H",
-  "C64516", "Q2H",
-  "C64536", "Q2M",
-  "C89791", "Q36H",
-  "C64533", "Q3D",
-  "C64517", "Q3H",
-  "C64537", "Q3M",
-  "C139183", "Q45MIN",
-  "C89790", "Q48H",
-  "C64534", "Q4D",
-  "C64518", "Q4H",
-  "C64538", "Q4M",
-  "C71124", "Q5D",
-  "C64519", "Q5H",
-  "C161335", "Q6D",
-  "C64520", "Q6H",
-  "C89789", "Q6M",
-  "C174288", "Q72H",
-  "C139177", "Q7D",
-  "C64521", "Q7H",
-  "C64523", "Q8H",
-  "C64524", "Q9H",
-  "C64595", "QAM",
-  "C25473", "QD",
-  "C64510", "QH",
-  "C64593", "QHS",
-  "C64530", "QID",
-  "C64498", "QM",
-  "C139178", "QN",
-  "C64525", "QOD",
-  "C64525", "Q2D",
-  "C64596", "QPM",
-  "C64527", "TID"
-) %>%
-  mutate(
-    DOSE_COUNT = case_when(
-      str_detect(CDISC_VALUE, "PER [WMY]") ~
-        as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
-      str_detect(CDISC_VALUE, "PER [D]") ~
-        24 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
-      str_detect(CDISC_VALUE, "^Q\\d{1,2}(H|MIN)") ~
-        1 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
-      str_detect(CDISC_VALUE, "^(Q|EVERY)\\s?\\d{1,2}") ~
-        1 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
-      str_detect(CDISC_VALUE, "^EVERY (A|E|W)[:alpha:]+") ~ 1,
-      str_detect(CDISC_VALUE, "^Q(AM|PM|M|N|D|HS)|^PA$") ~ 1,
-      str_detect(CDISC_VALUE, "^QH$") ~ 1,
-      str_detect(CDISC_VALUE, "BIM") ~ 2,
-      str_detect(CDISC_VALUE, "BID") ~ 1 / 12,
-      str_detect(CDISC_VALUE, "TID") ~ 1 / 8,
-      str_detect(CDISC_VALUE, "QID") ~ 1 / 6,
-      str_detect(CDISC_VALUE, "QOD") ~ 0.5,
-    ),
-    DOSE_WINDOW = case_when(
-      str_detect(CDISC_VALUE, "EVERY \\d{1,2}|PER [WMY]") ~
-        str_remove_all(sub(".* (\\w+)$", "\\1", CDISC_VALUE), "S"),
-      str_detect(CDISC_VALUE, "^Q\\d{1,2}D$") ~ "DAY",
-      str_detect(CDISC_VALUE, "^Q\\d{1,2}M$") ~ "MONTH",
-      str_detect(CDISC_VALUE, "^Q\\d{0,2}H$|PER D") ~ "HOUR",
-      str_detect(CDISC_VALUE, "^Q\\d{1,2}MIN$") ~ "MINUTE",
-      CDISC_VALUE %in% c("EVERY AFTERNOON", "EVERY EVENING") ~ "DAY",
-      CDISC_VALUE %in% c("EVERY WEEK") ~ "WEEK",
-      CDISC_VALUE %in% c(
-        "QAM", "QPM", "QHS",
-        "QD", "QN", "QOD"
-      ) ~ "DAY",
-      CDISC_VALUE %in% c(
-        "BID", "TID", "QID"
-      ) ~ "HOUR",
-      CDISC_VALUE %in% c("QM", "BIM") ~ "MONTH",
-      CDISC_VALUE == "PA" ~ "YEAR",
-    )
-  ) %>%
-  mutate(
-    CONVERSION_FACTOR = case_when(
-      DOSE_WINDOW == "MINUTE" ~ 1,
-      DOSE_WINDOW == "HOUR" ~ 1,
-      DOSE_WINDOW == "DAY" ~ 1,
-      DOSE_WINDOW == "WEEK" ~ (1 / 7),
-      DOSE_WINDOW == "MONTH" ~ (1 / 30.4375),
-      DOSE_WINDOW == "YEAR" ~ (1 / 365.25),
-    )
-  )
-
 #' Create dataset of single doses
 #'
 #' Derives dataset of single dose from aggregate dose information. This may be

diff --git a/R/data.R b/R/data.R
@@ -171,3 +171,92 @@
 #' @keywords metadata
 #' @family metadata
 "atoxgr_criteria_daids"
+
+#' Country Code Lookup
+#'
+#' @description
+#' These pre-defined country codes are sourced from
+#' [ISO 3166 Standards](https://www.iso.org/iso-3166-country-codes.html).
+#' See also [Wikipedia](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).
+#'
+#' @details
+#'
+#' `country_code` is the 3-letter ISO 3166-1 county code commonly found in the
+#' ADSL `COUNTRY` variable.
+#' `country_name` is the country long name corresponding to to the 3-letter code.
+#' `country_number` is the numeric code corresponding to an alphabetic sorting of
+#' the 3-letter codes.
+#'
+#' To see the entire table in the console, run `print(country_code_lookup)`.
+#'
+#' @seealso [dose_freq_lookup]
+#'
+#' @keywords metadata
+#'
+#' @family metadata
+#'
+#' @examples
+#' library(tibble)
+#' library(dplyr, warn.conflicts = FALSE)
+#'
+#' # Create reference dataset for periods
+#' adsl <- tribble(
+#'   ~USUBJID, ~SEX, ~COUNTRY,
+#'   "ST01-01", "F", "AUT",
+#'   "ST01-02", "M", "MWI",
+#'   "ST01-03", "F", "GBR",
+#'   "ST01-04", "M", "CHE",
+#'   "ST01-05", "M", "NOR",
+#'   "ST01-06", "F", "JPN",
+#'   "ST01-07", "F", "USA"
+#' )
+#'
+#' adsl %>%
+#'   derive_vars_merged(
+#'     dataset_add = country_code_lookup,
+#'     new_vars = exprs(COUNTRYN = country_number, COUNTRYL = country_name),
+#'     by_vars = exprs(COUNTRY = country_code)
+#'   )
+#'
+#' @rdname country_code_lookup
+"country_code_lookup"
+
+#' Pre-Defined Dose Frequencies
+#'
+#' @description
+#' These pre-defined dose frequencies are sourced from
+#' [CDISC](https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM%20Terminology.pdf). The
+#' number of rows to generate using `create_single_dose_dataset()` arguments
+#' `start_date` and `end_date` is derived from `DOSE_COUNT`, `DOSE_WINDOW`, and
+#' `CONVERSION_FACTOR` with appropriate functions from `lubridate`.
+#'
+#' @details
+#' `NCI_CODE` and `CDISC_VALUE` are included from the CDISC source for
+#' traceability.
+#'
+#' `DOSE_COUNT` represents the number of doses received in one single unit of
+#' `DOSE_WINDOW`. For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`,
+#' `DOSE_WINDOW=="MONTH"` and `DOSE_COUNT==10`. Similarly, for
+#' `CDISC_VALUE=="EVERY 2 WEEKS"`, `DOSE_WINDOW=="WEEK"` and
+#' `DOSE_COUNT==0.5` (to yield one dose every two weeks).
+#'
+#' `CONVERSION_FACTOR` is used to convert `DOSE_WINDOW` units `"WEEK"`,
+#'  `"MONTH"`, and `"YEAR"` to the unit `"DAY"`.
+#'
+#' For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`, `CONVERSION_FACTOR`
+#' is `0.0329`. One day of a month is assumed to be `1 / 30.4375` of a month (one
+#' day is assumed to be `1/365.25` of a year).
+#' Given only `start_date` and `end_date` in the aggregate dataset, `CONVERSION_FACTOR`
+#' is used to calculate specific dates for`start_date` and `end_date` in the
+#' resulting single dose dataset for the doses that occur. In such cases, doses
+#' are assumed to occur at evenly spaced increments over the interval.
+#'
+#' To see the entire table in the console, run `print(dose_freq_lookup)`.
+#'
+#' @seealso [create_single_dose_dataset()]
+#'
+#' @keywords metadata
+#' @family metadata
+#'
+#' @rdname dose_freq_lookup
+"dose_freq_lookup"
diff --git a/R/globals.R b/R/globals.R
@@ -133,5 +133,6 @@ globalVariables(c(
   "num_records",
   "worsen_date",
   "prev_intensity",
-  "srfl"
+  "srfl",
+  "dose_freq_lookup"
 ))
diff --git a/R/create_country_codes.R → data-raw/create_country_codes.R b/R/create_country_codes.R → data-raw/create_country_codes.R
@@ -1,54 +1,4 @@
-#' Country Code Lookup
-#'
-#' @description
-#' These pre-defined country codes are sourced from
-#' [ISO 3166 Standards](https://www.iso.org/iso-3166-country-codes.html).
-#' See also [Wikipedia](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).
-#'
-#' @details
-#'
-#' `country_code` is the 3-letter ISO 3166-1 county code commonly found in the
-#' ADSL `COUNTRY` variable.
-#' `country_name` is the country long name corresponding to to the 3-letter code.
-#' `country_number` is the numeric code corresponding to an alphabetic sorting of
-#' the 3-letter codes.
-#'
-#' To see the entire table in the console, run `print(country_code_lookup)`.
-#'
-#' @seealso [dose_freq_lookup]
-#'
-#' @export
-#'
-#' @keywords metadata
-#'
-#' @family metadata
-#'
-#' @examples
-#' library(tibble)
-#' library(dplyr, warn.conflicts = FALSE)
-#' library(lubridate)
-#'
-#' # Create reference dataset for periods
-#' adsl <- tribble(
-#'   ~USUBJID, ~SEX, ~COUNTRY,
-#'   "ST01-01", "F", "AUT",
-#'   "ST01-02", "M", "MWI",
-#'   "ST01-03", "F", "GBR",
-#'   "ST01-04", "M", "CHE",
-#'   "ST01-05", "M", "NOR",
-#'   "ST01-06", "F", "JPN",
-#'   "ST01-07", "F", "USA"
-#' )
-#'
-#' covar <- adsl %>%
-#'   derive_vars_merged(
-#'     dataset_add = country_code_lookup,
-#'     new_vars = exprs(COUNTRYN = country_number, COUNTRYL = country_name),
-#'     by_vars = exprs(COUNTRY = country_code)
-#'   )
-#' covar
-#'
-#' @rdname country_code_lookup
+# Code to create data/country_code_lookup
 country_code_lookup <- tribble(
   ~country_code, ~country_name,
   "ABW", "Aruba",
@@ -305,3 +255,6 @@ country_code_lookup <- tribble(
 
 # Convert ISO 3166 alpha 3 country codes to numbers 1-249
 country_code_lookup$country_number <- as.numeric(seq_len(nrow(country_code_lookup)))
+
+#  create country_code_lookup.rda in data/
+usethis::use_data(country_code_lookup, overwrite = TRUE)