Skip to content

Commit

Permalink
Closes #2584 country_code_lookup moved to data (#2614)
Browse files Browse the repository at this point in the history
* #2584 country_code_lookup moved to data

* #2584 Doc: update R/data.R

Co-authored-by: Stefan Bundfuss <[email protected]>

* #2584 Doc: update R/data.R

Co-authored-by: Stefan Bundfuss <[email protected]>

* #2584 also move dose_freq_lookup to data

---------

Co-authored-by: Stefan Bundfuss <[email protected]>
Co-authored-by: Ben Straub <[email protected]>
  • Loading branch information
3 people authored Dec 23, 2024
1 parent ebced9c commit 068b108
Show file tree
Hide file tree
Showing 11 changed files with 241 additions and 241 deletions.
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ export(convert_dtc_to_dt)
export(convert_dtc_to_dtm)
export(convert_na_to_blanks)
export(count_vals)
export(country_code_lookup)
export(create_period_dataset)
export(create_query_data)
export(create_single_dose_dataset)
Expand Down Expand Up @@ -117,7 +116,6 @@ export(derive_vars_period)
export(derive_vars_query)
export(derive_vars_transposed)
export(desc)
export(dose_freq_lookup)
export(dthcaus_source)
export(event)
export(event_joined)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ memory consumption. (#2590)
<summary>Developer Notes</summary>

- Created unit tests for developer internal function `restricted_imputed_dtc_dt()` (#2495)
- Adopted `data-raw/data` R Package Convention (#2427)
- Adopted `data-raw/data` R Package Convention (#2427, #2584)
- `compute_bsa()` now uses the more common (but equivalent) version of the DuBois-DuBois formula for BSA. The results have not changed. (#2532)
- Removed `.devcontainer` file (codespace) (#2524)

Expand Down
181 changes: 0 additions & 181 deletions R/create_single_dose_dataset.R
Original file line number Diff line number Diff line change
@@ -1,184 +1,3 @@
#' Pre-Defined Dose Frequencies
#'
#' @description
#' These pre-defined dose frequencies are sourced from
#' [CDISC](https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM%20Terminology.pdf). The
#' number of rows to generate using `create_single_dose_dataset()` arguments
#' `start_date` and `end_date` is derived from `DOSE_COUNT`, `DOSE_WINDOW`, and
#' `CONVERSION_FACTOR` with appropriate functions from `lubridate`.
#'
#' @details
#' `NCI_CODE` and `CDISC_VALUE` are included from the CDISC source for
#' traceability.
#'
#' `DOSE_COUNT` represents the number of doses received in one single unit of
#' `DOSE_WINDOW`. For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`,
#' `DOSE_WINDOW=="MONTH"` and `DOSE_COUNT==10`. Similarly, for
#' `CDISC_VALUE=="EVERY 2 WEEKS"`, `DOSE_WINDOW=="WEEK"` and
#' `DOSE_COUNT==0.5` (to yield one dose every two weeks).
#'
#' `CONVERSION_FACTOR` is used to convert `DOSE_WINDOW` units `"WEEK"`,
#' `"MONTH"`, and `"YEAR"` to the unit `"DAY"`.
#'
#' For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`, `CONVERSION_FACTOR`
#' is `0.0329`. One day of a month is assumed to be `1 / 30.4375` of a month (one
#' day is assumed to be `1/365.25` of a year).
#' Given only `start_date` and `end_date` in the aggregate dataset, `CONVERSION_FACTOR`
#' is used to calculate specific dates for`start_date` and `end_date` in the
#' resulting single dose dataset for the doses that occur. In such cases, doses
#' are assumed to occur at evenly spaced increments over the interval.
#'
#'
#' To see the entire table in the console, run `print(dose_freq_lookup)`.
#'
#' @seealso [create_single_dose_dataset()]
#'
#' @export
#'
#' @keywords metadata
#' @family metadata
#'
#' @rdname dose_freq_lookup

dose_freq_lookup <- tribble(
~NCI_CODE, ~CDISC_VALUE,
"C64526", "1 TIME PER WEEK",
"C139179", "10 DAYS PER MONTH",
"C64497", "2 TIMES PER WEEK",
"C98861", "2 TIMES PER YEAR",
"C98859", "3 TIMES PER MONTH",
"C64528", "3 TIMES PER WEEK",
"C98860", "3 TIMES PER YEAR",
"C98852", "4 TIMES PER MONTH",
"C64531", "4 TIMES PER WEEK",
"C98853", "4 TIMES PER YEAR",
"C98849", "5 TIMES PER DAY",
"C98850", "5 TIMES PER MONTH",
"C85552", "5 TIMES PER WEEK",
"C98851", "5 TIMES PER YEAR",
"C98855", "6 TIMES PER DAY",
"C98856", "6 TIMES PER MONTH",
"C98857", "6 TIMES PER WEEK",
"C98858", "6 TIMES PER YEAR",
"C139180", "7 TIMES PER DAY",
"C98854", "7 TIMES PER WEEK",
"C139181", "8 TIMES PER DAY",
"C139182", "9 TIMES PER DAY",
"C64496", "BID",
"C71129", "BIM",
"C161332", "EVERY 12 WEEKS",
"C161336", "EVERY 16 WEEKS",
"C71127", "EVERY 2 WEEKS",
"C64535", "EVERY 3 WEEKS",
"C161333", "EVERY 3 YEARS",
"C64529", "EVERY 4 WEEKS",
"C103390", "EVERY 5 WEEKS",
"C161334", "EVERY 5 YEARS",
"C89788", "EVERY 6 WEEKS",
"C116149", "EVERY 7 WEEKS",
"C103389", "EVERY 8 WEEKS",
"C154484", "EVERY AFTERNOON",
"C160957", "EVERY EVENING",
"C67069", "EVERY WEEK",
"C74924", "PA",
"C64500", "Q10H",
"C64501", "Q11H",
"C64502", "Q12H",
"C64503", "Q13H",
"C64504", "Q14H",
"C64505", "Q15H",
"C64506", "Q16H",
"C64507", "Q17H",
"C64508", "Q18H",
"C64509", "Q19H",
"C64511", "Q20H",
"C64512", "Q21H",
"C64513", "Q22H",
"C64514", "Q23H",
"C64515", "Q24H",
"C64516", "Q2H",
"C64536", "Q2M",
"C89791", "Q36H",
"C64533", "Q3D",
"C64517", "Q3H",
"C64537", "Q3M",
"C139183", "Q45MIN",
"C89790", "Q48H",
"C64534", "Q4D",
"C64518", "Q4H",
"C64538", "Q4M",
"C71124", "Q5D",
"C64519", "Q5H",
"C161335", "Q6D",
"C64520", "Q6H",
"C89789", "Q6M",
"C174288", "Q72H",
"C139177", "Q7D",
"C64521", "Q7H",
"C64523", "Q8H",
"C64524", "Q9H",
"C64595", "QAM",
"C25473", "QD",
"C64510", "QH",
"C64593", "QHS",
"C64530", "QID",
"C64498", "QM",
"C139178", "QN",
"C64525", "QOD",
"C64525", "Q2D",
"C64596", "QPM",
"C64527", "TID"
) %>%
mutate(
DOSE_COUNT = case_when(
str_detect(CDISC_VALUE, "PER [WMY]") ~
as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
str_detect(CDISC_VALUE, "PER [D]") ~
24 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
str_detect(CDISC_VALUE, "^Q\\d{1,2}(H|MIN)") ~
1 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
str_detect(CDISC_VALUE, "^(Q|EVERY)\\s?\\d{1,2}") ~
1 / as.numeric(str_remove_all(CDISC_VALUE, "[\\D]")),
str_detect(CDISC_VALUE, "^EVERY (A|E|W)[:alpha:]+") ~ 1,
str_detect(CDISC_VALUE, "^Q(AM|PM|M|N|D|HS)|^PA$") ~ 1,
str_detect(CDISC_VALUE, "^QH$") ~ 1,
str_detect(CDISC_VALUE, "BIM") ~ 2,
str_detect(CDISC_VALUE, "BID") ~ 1 / 12,
str_detect(CDISC_VALUE, "TID") ~ 1 / 8,
str_detect(CDISC_VALUE, "QID") ~ 1 / 6,
str_detect(CDISC_VALUE, "QOD") ~ 0.5,
),
DOSE_WINDOW = case_when(
str_detect(CDISC_VALUE, "EVERY \\d{1,2}|PER [WMY]") ~
str_remove_all(sub(".* (\\w+)$", "\\1", CDISC_VALUE), "S"),
str_detect(CDISC_VALUE, "^Q\\d{1,2}D$") ~ "DAY",
str_detect(CDISC_VALUE, "^Q\\d{1,2}M$") ~ "MONTH",
str_detect(CDISC_VALUE, "^Q\\d{0,2}H$|PER D") ~ "HOUR",
str_detect(CDISC_VALUE, "^Q\\d{1,2}MIN$") ~ "MINUTE",
CDISC_VALUE %in% c("EVERY AFTERNOON", "EVERY EVENING") ~ "DAY",
CDISC_VALUE %in% c("EVERY WEEK") ~ "WEEK",
CDISC_VALUE %in% c(
"QAM", "QPM", "QHS",
"QD", "QN", "QOD"
) ~ "DAY",
CDISC_VALUE %in% c(
"BID", "TID", "QID"
) ~ "HOUR",
CDISC_VALUE %in% c("QM", "BIM") ~ "MONTH",
CDISC_VALUE == "PA" ~ "YEAR",
)
) %>%
mutate(
CONVERSION_FACTOR = case_when(
DOSE_WINDOW == "MINUTE" ~ 1,
DOSE_WINDOW == "HOUR" ~ 1,
DOSE_WINDOW == "DAY" ~ 1,
DOSE_WINDOW == "WEEK" ~ (1 / 7),
DOSE_WINDOW == "MONTH" ~ (1 / 30.4375),
DOSE_WINDOW == "YEAR" ~ (1 / 365.25),
)
)

#' Create dataset of single doses
#'
#' Derives dataset of single dose from aggregate dose information. This may be
Expand Down
89 changes: 89 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,92 @@
#' @keywords metadata
#' @family metadata
"atoxgr_criteria_daids"

#' Country Code Lookup
#'
#' @description
#' These pre-defined country codes are sourced from
#' [ISO 3166 Standards](https://www.iso.org/iso-3166-country-codes.html).
#' See also [Wikipedia](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).
#'
#' @details
#'
#' `country_code` is the 3-letter ISO 3166-1 county code commonly found in the
#' ADSL `COUNTRY` variable.
#' `country_name` is the country long name corresponding to to the 3-letter code.
#' `country_number` is the numeric code corresponding to an alphabetic sorting of
#' the 3-letter codes.
#'
#' To see the entire table in the console, run `print(country_code_lookup)`.
#'
#' @seealso [dose_freq_lookup]
#'
#' @keywords metadata
#'
#' @family metadata
#'
#' @examples
#' library(tibble)
#' library(dplyr, warn.conflicts = FALSE)
#'
#' # Create reference dataset for periods
#' adsl <- tribble(
#' ~USUBJID, ~SEX, ~COUNTRY,
#' "ST01-01", "F", "AUT",
#' "ST01-02", "M", "MWI",
#' "ST01-03", "F", "GBR",
#' "ST01-04", "M", "CHE",
#' "ST01-05", "M", "NOR",
#' "ST01-06", "F", "JPN",
#' "ST01-07", "F", "USA"
#' )
#'
#' adsl %>%
#' derive_vars_merged(
#' dataset_add = country_code_lookup,
#' new_vars = exprs(COUNTRYN = country_number, COUNTRYL = country_name),
#' by_vars = exprs(COUNTRY = country_code)
#' )
#'
#' @rdname country_code_lookup
"country_code_lookup"

#' Pre-Defined Dose Frequencies
#'
#' @description
#' These pre-defined dose frequencies are sourced from
#' [CDISC](https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM%20Terminology.pdf). The
#' number of rows to generate using `create_single_dose_dataset()` arguments
#' `start_date` and `end_date` is derived from `DOSE_COUNT`, `DOSE_WINDOW`, and
#' `CONVERSION_FACTOR` with appropriate functions from `lubridate`.
#'
#' @details
#' `NCI_CODE` and `CDISC_VALUE` are included from the CDISC source for
#' traceability.
#'
#' `DOSE_COUNT` represents the number of doses received in one single unit of
#' `DOSE_WINDOW`. For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`,
#' `DOSE_WINDOW=="MONTH"` and `DOSE_COUNT==10`. Similarly, for
#' `CDISC_VALUE=="EVERY 2 WEEKS"`, `DOSE_WINDOW=="WEEK"` and
#' `DOSE_COUNT==0.5` (to yield one dose every two weeks).
#'
#' `CONVERSION_FACTOR` is used to convert `DOSE_WINDOW` units `"WEEK"`,
#' `"MONTH"`, and `"YEAR"` to the unit `"DAY"`.
#'
#' For example, for `CDISC_VALUE=="10 DAYS PER MONTH"`, `CONVERSION_FACTOR`
#' is `0.0329`. One day of a month is assumed to be `1 / 30.4375` of a month (one
#' day is assumed to be `1/365.25` of a year).
#' Given only `start_date` and `end_date` in the aggregate dataset, `CONVERSION_FACTOR`
#' is used to calculate specific dates for`start_date` and `end_date` in the
#' resulting single dose dataset for the doses that occur. In such cases, doses
#' are assumed to occur at evenly spaced increments over the interval.
#'
#' To see the entire table in the console, run `print(dose_freq_lookup)`.
#'
#' @seealso [create_single_dose_dataset()]
#'
#' @keywords metadata
#' @family metadata
#'
#' @rdname dose_freq_lookup
"dose_freq_lookup"
3 changes: 2 additions & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,6 @@ globalVariables(c(
"num_records",
"worsen_date",
"prev_intensity",
"srfl"
"srfl",
"dose_freq_lookup"
))
55 changes: 4 additions & 51 deletions R/create_country_codes.R → data-raw/create_country_codes.R
Original file line number Diff line number Diff line change
@@ -1,54 +1,4 @@
#' Country Code Lookup
#'
#' @description
#' These pre-defined country codes are sourced from
#' [ISO 3166 Standards](https://www.iso.org/iso-3166-country-codes.html).
#' See also [Wikipedia](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).
#'
#' @details
#'
#' `country_code` is the 3-letter ISO 3166-1 county code commonly found in the
#' ADSL `COUNTRY` variable.
#' `country_name` is the country long name corresponding to to the 3-letter code.
#' `country_number` is the numeric code corresponding to an alphabetic sorting of
#' the 3-letter codes.
#'
#' To see the entire table in the console, run `print(country_code_lookup)`.
#'
#' @seealso [dose_freq_lookup]
#'
#' @export
#'
#' @keywords metadata
#'
#' @family metadata
#'
#' @examples
#' library(tibble)
#' library(dplyr, warn.conflicts = FALSE)
#' library(lubridate)
#'
#' # Create reference dataset for periods
#' adsl <- tribble(
#' ~USUBJID, ~SEX, ~COUNTRY,
#' "ST01-01", "F", "AUT",
#' "ST01-02", "M", "MWI",
#' "ST01-03", "F", "GBR",
#' "ST01-04", "M", "CHE",
#' "ST01-05", "M", "NOR",
#' "ST01-06", "F", "JPN",
#' "ST01-07", "F", "USA"
#' )
#'
#' covar <- adsl %>%
#' derive_vars_merged(
#' dataset_add = country_code_lookup,
#' new_vars = exprs(COUNTRYN = country_number, COUNTRYL = country_name),
#' by_vars = exprs(COUNTRY = country_code)
#' )
#' covar
#'
#' @rdname country_code_lookup
# Code to create data/country_code_lookup
country_code_lookup <- tribble(
~country_code, ~country_name,
"ABW", "Aruba",
Expand Down Expand Up @@ -305,3 +255,6 @@ country_code_lookup <- tribble(

# Convert ISO 3166 alpha 3 country codes to numbers 1-249
country_code_lookup$country_number <- as.numeric(seq_len(nrow(country_code_lookup)))

# create country_code_lookup.rda in data/
usethis::use_data(country_code_lookup, overwrite = TRUE)
Loading

0 comments on commit 068b108

Please sign in to comment.