From c4213d32bf029be7d311c4166dfa86989c863c94 Mon Sep 17 00:00:00 2001
From: Ben Straub <ben.x.straub@gsk.com>
Date: Fri, 28 Jul 2023 08:28:43 -0400
Subject: [PATCH 1/5] Closes #123 Add Edoardo to status check (#2031)

fix: https://github.com/pharmaverse/admiralci/issues/123 add Edoardo to status check
---
 .github/workflows/cran-status.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cran-status.yml b/.github/workflows/cran-status.yml
index c1802ff19f..504680a914 100644
--- a/.github/workflows/cran-status.yml
+++ b/.github/workflows/cran-status.yml
@@ -17,7 +17,7 @@ jobs:
     with:
       # Whom should the issue be assigned to if errors are encountered
       # in the CRAN status checks?
-      issue-assignees: "bundfussr,esimms999-gsk,thomas-neitmann,bms63"
+      issue-assignees: "bundfussr,esimms999-gsk,manciniedoardo,bms63"
       # Create an issue if one or more of the following
       # statuses are reported on the check report.
       statuses: "WARN,ERROR,NOTE"

From 2e487b4386b83f1b375e3d35c32e1293b9ba4f2e Mon Sep 17 00:00:00 2001
From: Sophie Shapcott <90790226+sophie-gem@users.noreply.github.com>
Date: Fri, 28 Jul 2023 14:31:43 +0100
Subject: [PATCH 2/5] Closes #2014 derive_var_shift change na_val@devel (#2032)

* #2014 `na_val` deprecated and `missing_value` inserted into `derive_var_shift`.

* #2014 - Update `NEWS.md`.

* #2014 - update tests and write deprecation parameter test

* #2014 - run required tasks for PR

---------

Co-authored-by: Ben Straub <ben.x.straub@gsk.com>
---
 .Rprofile                              |  4 +--
 NEWS.md                                |  3 ++
 R/derive_var_shift.R                   | 22 ++++++++++----
 man/derive_var_shift.Rd                |  9 ++++--
 tests/testthat/test-derive_var_shift.R | 41 ++++++++++++++++++++++----
 5 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/.Rprofile b/.Rprofile
index 63eee44871..f9983c698b 100644
--- a/.Rprofile
+++ b/.Rprofile
@@ -1,7 +1,7 @@
 # Set renv profile base on R version.
 renv_profile <- paste(R.version$major, substr(R.version$minor, 1, 1), sep = ".")
 if (file.exists("./renv/profile")) {
-   message("Using renv profile from `renv/profile` file.")
+  message("Using renv profile from `renv/profile` file.")
 } else if (renv_profile %in% c("4.1", "4.2", "4.3")) {
   message("Set renv profile to `", renv_profile, "`")
   Sys.setenv("RENV_PROFILE" = renv_profile)
@@ -11,6 +11,6 @@ if (file.exists("./renv/profile")) {
 
 if ((Sys.getenv("GITHUB_ACTIONS") != "") || (Sys.getenv("DOCKER_CONTAINER_CONTEXT") != "")) {
   options(repos = c(CRAN = "https://cran.rstudio.com"))
-  Sys.setenv(RENV_AUTOLOADER_ENABLED=FALSE)
+  Sys.setenv(RENV_AUTOLOADER_ENABLED = FALSE)
 }
 source("renv/activate.R")
diff --git a/NEWS.md b/NEWS.md
index ddfcac1408..a16e4f0db4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -34,6 +34,9 @@
 - The `filter` argument in `derive_extreme_records()` was deprecated in favor of
 the `filter_add` using the next phase of the deprecation process. (#1950)
 
+- The `na_val` argument in `derive_var_shift()` has been deprecated in favor of 
+`missing_value` using the first phase of the deprecation process. (#2014)
+
 ## Documentation
 
 
diff --git a/R/derive_var_shift.R b/R/derive_var_shift.R
index dcbe9904e0..1305cfa218 100644
--- a/R/derive_var_shift.R
+++ b/R/derive_var_shift.R
@@ -14,7 +14,9 @@
 #'
 #' @param to_var Variable containing value to shift to.
 #'
-#' @param na_val Character string to replace missing values in `from_var` or `to_var`.
+#' @param na_val *Deprecated*, please use `missing_value` instead.
+#'
+#' @param missing_value Character string to replace missing values in `from_var` or `to_var`.
 #'
 #'  Default: "NULL"
 #'
@@ -24,7 +26,7 @@
 #'
 #' @details `new_var` is derived by concatenating the values of `from_var` to values of `to_var`
 #' (e.g. "NORMAL to HIGH"). When `from_var` or `to_var` has missing value, the
-#' missing value is replaced by `na_val` (e.g. "NORMAL to NULL").
+#' missing value is replaced by `missing_value` (e.g. "NORMAL to NULL").
 #'
 #'
 #' @return The input dataset with the character shift variable added
@@ -71,20 +73,28 @@ derive_var_shift <- function(dataset,
                              new_var,
                              from_var,
                              to_var,
-                             na_val = "NULL",
+                             na_val,
+                             missing_value = "NULL",
                              sep_val = " to ") {
+  ### BEGIN DEPRECATION
+  if (!missing(na_val)) {
+    deprecate_warn("0.12.0", "derive_var_shift(na_val = )", "derive_var_shift(missing_value = )")
+    missing_value <- na_val
+  }
+  ### END DEPRECATION
+
   new_var <- assert_symbol(enexpr(new_var))
   from_var <- assert_symbol(enexpr(from_var))
   to_var <- assert_symbol(enexpr(to_var))
-  na_val <- assert_character_scalar(na_val)
+  missing_value <- assert_character_scalar(missing_value)
   sep_val <- assert_character_scalar(sep_val)
   assert_data_frame(dataset, required_vars = exprs(!!from_var, !!to_var))
 
   # Derive shift variable. If from_var or to_var has missing value then set to na_val.
   dataset %>%
     mutate(
-      temp_from_var = if_else(is.na(!!from_var), !!na_val, as.character(!!from_var)),
-      temp_to_var = if_else(is.na(!!to_var), !!na_val, as.character(!!to_var))
+      temp_from_var = if_else(is.na(!!from_var), !!missing_value, as.character(!!from_var)),
+      temp_to_var = if_else(is.na(!!to_var), !!missing_value, as.character(!!to_var))
     ) %>%
     mutate(
       !!new_var := paste(temp_from_var, temp_to_var, sep = !!sep_val)
diff --git a/man/derive_var_shift.Rd b/man/derive_var_shift.Rd
index 76c6a38679..ac4ff9fdfd 100644
--- a/man/derive_var_shift.Rd
+++ b/man/derive_var_shift.Rd
@@ -9,7 +9,8 @@ derive_var_shift(
   new_var,
   from_var,
   to_var,
-  na_val = "NULL",
+  na_val,
+  missing_value = "NULL",
   sep_val = " to "
 )
 }
@@ -24,7 +25,9 @@ The columns specified by \code{from_var} and the \code{to_var} parameters are ex
 
 \item{to_var}{Variable containing value to shift to.}
 
-\item{na_val}{Character string to replace missing values in \code{from_var} or \code{to_var}.
+\item{na_val}{\emph{Deprecated}, please use \code{missing_value} instead.}
+
+\item{missing_value}{Character string to replace missing values in \code{from_var} or \code{to_var}.
 
 Default: "NULL"}
 
@@ -43,7 +46,7 @@ analysis value, shift from baseline grade to analysis grade, ...
 \details{
 \code{new_var} is derived by concatenating the values of \code{from_var} to values of \code{to_var}
 (e.g. "NORMAL to HIGH"). When \code{from_var} or \code{to_var} has missing value, the
-missing value is replaced by \code{na_val} (e.g. "NORMAL to NULL").
+missing value is replaced by \code{missing_value} (e.g. "NORMAL to NULL").
 }
 \examples{
 library(tibble)
diff --git a/tests/testthat/test-derive_var_shift.R b/tests/testthat/test-derive_var_shift.R
index e5dcdb733a..bebbabb1f4 100644
--- a/tests/testthat/test-derive_var_shift.R
+++ b/tests/testthat/test-derive_var_shift.R
@@ -1,4 +1,7 @@
-test_that("Shift based on character variables", {
+# derive_var_shift ----
+
+## Test 1: Shift based on character variables ----
+test_that("derive_var_shift Test 1: Shift based on character variables", {
   input <- tibble::tribble(
     ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND,
     "P01", "ALB", 33, "Y", "LOW", "LOW",
@@ -28,7 +31,8 @@ test_that("Shift based on character variables", {
 })
 
 
-test_that("Shift based on character variables with missing values", {
+## Test 2: Shift based on character variables with missing values ----
+test_that("derive_var_shift Test 2: Shift based on character variables with missing values", {
   input <- tibble::tribble(
     ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND,
     "P01", "ALB", 33, "Y", "LOW", "LOW",
@@ -60,7 +64,8 @@ test_that("Shift based on character variables with missing values", {
 })
 
 
-test_that("Shift based on numeric variables with missing values", {
+## Test 3: Shift based on numeric variables with missing values ----
+test_that("derive_var_shift Test 3: Shift based on numeric variables with missing values", {
   input <- tibble::tribble(
     ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BASE,
     "P01", "ALB", 33.1, "Y", 33.1,
@@ -91,7 +96,8 @@ test_that("Shift based on numeric variables with missing values", {
   )
 })
 
-test_that("Shift with user-specified na_val and sep_val", {
+## Test 4: Shift with user-specified missing_value and sep_val ----
+test_that("derive_var_shift Test 4: Shift with user-specified missing_value and sep_val", {
   input <- tibble::tribble(
     ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND,
     "P01", "ALB", 33, "Y", "LOW", "LOW",
@@ -117,9 +123,34 @@ test_that("Shift with user-specified na_val and sep_val", {
       new_var = SHIFT1,
       from_var = BNRIND,
       to_var = ANRIND,
-      na_val = "MISSING",
+      missing_value = "MISSING",
       sep_val = " - "
     ),
     expected_output
   )
 })
+
+## Test 5: Test deprecation warning of na_val argument ----
+test_that("derive_var_shift Test 5: Test deprecation warning of na_val argument", {
+  input <- tibble::tribble(
+    ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND,
+    "P01", "ALB", 33, "Y", "LOW", "LOW",
+    "P01", "ALB", 38, NA, "LOW", "NORMAL",
+    "P01", "ALB", NA, NA, "LOW", NA,
+    "P02", "ALB", NA, "Y", NA, NA,
+    "P02", "ALB", 49, NA, NA, "HIGH",
+    "P02", "SODIUM", 147, "Y", "HIGH", "HIGH"
+  )
+
+  expect_warning(
+    derive_var_shift(
+      input,
+      new_var = SHIFT1,
+      from_var = BNRIND,
+      to_var = ANRIND,
+      na_val = "MISSING",
+      sep_val = " - "
+    ),
+    class = "lifecycle_warning_deprecated"
+  )
+})

From 01c6be7f141d2431db678c107b3f7b96e4f07c3a Mon Sep 17 00:00:00 2001
From: ynsec37 <98389771+ynsec37@users.noreply.github.com>
Date: Tue, 1 Aug 2023 22:59:30 +0800
Subject: [PATCH 3/5] Closes #2012_derive_vars_dy (#2013)

* derive dy_vars by argument .name in across (#2012)

use named vector to avoid the variabes end with `_temp` in input dataset

* test no error for variable end with `_temp`(#2012)

* styled file

* add update information for #2012

---------

Co-authored-by: Daniel Sjoberg <danield.sjoberg@gmail.com>
---
 NEWS.md                              |  3 +++
 R/derive_vars_dy.R                   | 32 ++++++++++------------------
 tests/testthat/test-derive_vars_dy.R | 32 ++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index a16e4f0db4..760168a981 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,9 @@
 
 - The function `derive_var_extreme_flag()` has a new function argument, `flag_all` that additionally flags all records if the first or last record is not unique. (#1979)
 
+- The function `derive_vars_dy()` is updated to avoid potential error when the input `dataset` with columns ending with `temp`. (#2012)
+
+
 ## Breaking Changes
 - The following functions, which were deprecated in previous `{admiral}` versions, have been removed: (#1950)
 
diff --git a/R/derive_vars_dy.R b/R/derive_vars_dy.R
index aa085c32eb..3a1e243240 100644
--- a/R/derive_vars_dy.R
+++ b/R/derive_vars_dy.R
@@ -102,31 +102,21 @@ derive_vars_dy <- function(dataset,
     abort(err_msg)
   }
 
-  dy_vars <- if_else(
+  # named vector passed to `.names` in `across()` to derive name of dy_vars
+  dy_vars <- set_names(if_else(
     source_names == "",
     str_replace_all(vars2chr(source_vars), "(DT|DTM)$", "DY"),
     source_names
-  )
+  ), vars2chr(source_vars))
+
   warn_if_vars_exist(dataset, dy_vars)
 
-  if (n_vars > 1L) {
-    dataset %>%
-      mutate(
-        across(
-          .cols = vars2chr(unname(source_vars)),
-          .fns = list(temp = ~
-            compute_duration(start_date = !!reference_date, end_date = .))
-        )
-      ) %>%
-      rename_with(
-        .cols = ends_with("temp"),
-        .fn = ~dy_vars
-      )
-  } else {
-    dataset %>%
-      mutate(
-        !!sym(dy_vars) :=
-          compute_duration(start_date = !!reference_date, end_date = !!source_vars[[1]])
+  dataset %>%
+    mutate(
+      across(
+        .cols = vars2chr(unname(source_vars)),
+        .fns = ~ compute_duration(start_date = !!reference_date, end_date = .x),
+        .names = "{dy_vars}"
       )
-  }
+    )
 }
diff --git a/tests/testthat/test-derive_vars_dy.R b/tests/testthat/test-derive_vars_dy.R
index e619550f76..db246d5737 100644
--- a/tests/testthat/test-derive_vars_dy.R
+++ b/tests/testthat/test-derive_vars_dy.R
@@ -285,3 +285,35 @@ test_that("derive_vars_dy Test 9: Single named --DT input when ref date is --DTM
     keys = c("STUDYID", "USUBJID")
   )
 })
+
+## Test 10: no error if input with variable end with `_temp` ----
+test_that("derive_vars_dy Test 10: no error if input with variable end with `_temp`", {
+  datain <- tibble::tribble(
+    ~STUDYID, ~USUBJID, ~TRTSDTM, ~ASTDT, ~test_temp,
+    "TEST01", "PAT01", "2014-01-17T23:59:59", "2014-01-18", "test"
+  ) %>%
+    mutate(
+      TRTSDTM = lubridate::as_datetime(TRTSDTM),
+      ASTDT = lubridate::ymd(ASTDT)
+    )
+
+  expected_output <- tibble::tribble(
+    ~STUDYID, ~USUBJID, ~TRTSDTM, ~ASTDT, ~test_temp, ~ASTDY,
+    "TEST01", "PAT01", "2014-01-17T23:59:59", "2014-01-18", "test", 2
+  ) %>%
+    mutate(
+      TRTSDTM = lubridate::as_datetime(TRTSDTM),
+      ASTDT = lubridate::ymd(ASTDT)
+    )
+
+  actual_output <- derive_vars_dy(datain,
+    reference_date = TRTSDTM,
+    source_vars = exprs(ASTDT)
+  )
+
+  expect_dfs_equal(
+    expected_output,
+    actual_output,
+    keys = c("STUDYID", "USUBJID")
+  )
+})

From 4ed7aac63a7592f0640464317b5a0c7c7a55710b Mon Sep 17 00:00:00 2001
From: Zelos Zhu <zelos.zhu@atorusresearch.com>
Date: Tue, 1 Aug 2023 15:27:09 -0400
Subject: [PATCH 4/5] Closes #1966 address derive_vars_joined bugs (#2016)

* feat: #1966 make our check_type consistent

* feat: #1966 hacky solution to null new_vars .join problem

* chore: #1966 inserted line in wrong place

* feat: #1966 add news blurb for what was done

* feat: #1966 add tests

* chore: #1966 lintr

* chore: #1966 swap appropriate order selection and restore check_type arg

* chore: #1966 looks like that fixed it

* feat: #1966 issue warning for dataset_add naming conflicts when `new_vars` is NULL

* chore: #1966 clean up for readability

* chore: #1966 restore original replace_values_by_names

* chore: #1966 add additional test to demonstrate how order vars were fixed/selected

* feat: #1966 adopt feedback for error messaging of naming conflicts

---------

Co-authored-by: Zelos Zhu <zdz2101@github.com>
---
 NEWS.md                             |  2 +
 R/derive_joined.R                   | 16 ++++-
 tests/testthat/test-derive_joined.R | 91 +++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 760168a981..db4eb86c67 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -47,6 +47,8 @@ the `filter_add` using the next phase of the deprecation process. (#1950)
 
 - The list of package authors/contributors has been reformatted so that those who are actively maintaining the code base are now marked as *authors*, whereas those who made a significant contribution in the past are now down as *contributors*. All other acknowledgements have been moved to README section (#1941).
 
+- `derive_vars_joined()` had two bugs with regards to duplicates messaging and when `new_vars` was set to `NULL` that have now been addressed (#1966). 
+
 # admiral 0.11.1
 
 - Fix bug in `derive_param_tte()`. (#1962)
diff --git a/R/derive_joined.R b/R/derive_joined.R
index ef6c58aa4b..1e0f5cff48 100644
--- a/R/derive_joined.R
+++ b/R/derive_joined.R
@@ -353,6 +353,19 @@ derive_vars_joined <- function(dataset,
   if (is.null(new_vars)) {
     new_vars <- chr2vars(colnames(dataset_add))
   }
+  preexisting_vars <- chr2vars(colnames(dataset))
+  preexisting_vars_no_by_vars <- preexisting_vars[which(!(preexisting_vars %in% by_vars))]
+  duplicates <- intersect(replace_values_by_names(new_vars), preexisting_vars_no_by_vars)
+  if (length(duplicates) > 0) {
+    err_msg <- sprintf(
+      paste(
+        "The following columns in `dataset_add` have naming conflicts with `dataset`,\n",
+        "please make the appropriate modifications to `new_vars`, with respect to:\n%s"
+      ),
+      enumerate(vars2chr(duplicates))
+    )
+    abort(err_msg)
+  }
 
   # number observations of the input dataset to get a unique key
   # (by_vars and tmp_obs_nr)
@@ -371,7 +384,7 @@ derive_vars_joined <- function(dataset,
     filter_if(filter_add) %>%
     select(
       !!!by_vars,
-      !!!chr2vars(names(order)),
+      !!!replace_values_by_names(extract_vars(order)),
       !!!replace_values_by_names(join_vars),
       !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
     )
@@ -410,6 +423,7 @@ derive_vars_joined <- function(dataset,
       by_vars = exprs(!!!by_vars_left, !!tmp_obs_nr),
       new_vars = add_suffix_to_vars(new_vars, vars = common_vars, suffix = ".join"),
       missing_values = missing_values,
+      check_type = check_type,
       duplicate_msg = paste(
         paste(
           "After applying `filter_join` the joined dataset contains more",
diff --git a/tests/testthat/test-derive_joined.R b/tests/testthat/test-derive_joined.R
index 8574f3b564..eeb1afcc23 100644
--- a/tests/testthat/test-derive_joined.R
+++ b/tests/testthat/test-derive_joined.R
@@ -242,3 +242,94 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from
     keys = c("USUBJID", "AESEQ")
   )
 })
+
+## Test 8: error if new_vars are already in dataset ----
+test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", {
+  myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
+  expect_error(
+    derive_vars_joined(
+      myd,
+      dataset_add = myd,
+      order = exprs(day),
+      mode = "last",
+      filter_join = day < day.join
+    ),
+    regexp = paste(
+      "The following columns in `dataset_add` have naming conflicts with `dataset`"
+    )
+  )
+})
+
+## Test 9: fixing a bug from issue 1966 ----
+test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint
+  adlb_ast <- tribble(
+    ~ADT,         ~ASEQ,
+    "2002-01-01", 1,
+    "2002-02-02", 2,
+    "2002-02-02", 3
+  ) %>%
+    mutate(
+      STUDYID = "ABC",
+      USUBJID = "1",
+      ADT = ymd(ADT),
+      ADTM = as_datetime(ADT)
+    )
+
+  adlb_tbili_pbl <- tribble(
+    ~ADT,         ~ASEQ,
+    "2002-01-01", 4,
+    "2002-02-02", 5,
+    "2002-02-02", 6
+  ) %>%
+    mutate(
+      STUDYID = "ABC",
+      USUBJID = "1",
+      ADT = ymd(ADT),
+      ADTM = as_datetime(ADT)
+    )
+
+  adlb_joined <- derive_vars_joined(
+    adlb_ast,
+    dataset_add = adlb_tbili_pbl,
+    by_vars = exprs(STUDYID, USUBJID),
+    order = exprs(ADTM, ASEQ),
+    new_vars = exprs(TBILI_ADT = ADT),
+    filter_join = ADT <= ADT.join,
+    mode = "first"
+  )
+
+  expected <- adlb_ast %>%
+    mutate(TBILI_ADT = as.Date(c("2002-01-01", "2002-02-02", "2002-02-02"), "%Y-%m-%d"))
+
+  expect_dfs_equal(
+    base = expected,
+    compare = adlb_joined,
+    keys = c("ADT", "ASEQ", "STUDYID", "USUBJID", "ADTM", "TBILI_ADT")
+  )
+})
+
+## Test 10: order vars are selected properly in function body ----
+test_that("derive_vars_joined Test 10: order vars are selected properly in function body", {
+  myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
+  actual <- derive_vars_joined(
+    myd,
+    dataset_add = myd,
+    new_vars = exprs(first_val = val),
+    join_vars = exprs(day),
+    order = exprs(-day),
+    mode = "last",
+    filter_join = day < day.join
+  )
+  expected <- tribble(
+    ~day, ~val, ~first_val,
+    1,       0,         17,
+    2,      17,         21,
+    3,      21,         NA
+  )
+
+  expect_dfs_equal(
+    base = expected,
+    compare = actual,
+    keys = c("day", "val", "first_val")
+  )
+})

From b6f39a5551dd9634cbd7e00fb9206899ac01400c Mon Sep 17 00:00:00 2001
From: Zelos Zhu <zelos.zhu@atorusresearch.com>
Date: Wed, 2 Aug 2023 08:22:12 -0400
Subject: [PATCH 5/5] Closes #2037 deprecate `dataset_expected_obs` for
 `dataset_ref` in respective functions (#2039)

feat: #2037 deprecate `dataset_expected_obs` for `dataset_ref`

Co-authored-by: Zelos Zhu <zdz2101@github.com>
---
 NEWS.md                                       |  3 ++
 R/derive_expected_records.R                   | 37 +++++++++++++------
 R/derive_locf_records.R                       | 34 ++++++++++++-----
 man/derive_expected_records.Rd                | 15 +++++---
 man/derive_locf_records.Rd                    | 13 ++++---
 tests/testthat/test-derive_expected_records.R |  8 ++--
 tests/testthat/test-derive_locf_records.R     | 12 +++---
 7 files changed, 79 insertions(+), 43 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index db4eb86c67..d5103d24e4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -40,6 +40,9 @@ the `filter_add` using the next phase of the deprecation process. (#1950)
 - The `na_val` argument in `derive_var_shift()` has been deprecated in favor of 
 `missing_value` using the first phase of the deprecation process. (#2014)
 
+- The `dataset_expected_obs` argument in `derive_expected_records()` and `derive_locf_records()`
+has been deprecated in favor of `dataset_ref`. (#2037)
+
 ## Documentation
 
 
diff --git a/R/derive_expected_records.R b/R/derive_expected_records.R
index c9a753fedc..f61cf2dab9 100644
--- a/R/derive_expected_records.R
+++ b/R/derive_expected_records.R
@@ -5,17 +5,19 @@
 #'
 #' @param dataset Input dataset
 #'
-#'   A data frame, the columns from `dataset_expected_obs` and specified by the
+#'   A data frame, the columns from `dataset_ref` and specified by the
 #'   `by_vars` parameter are expected.
 #'
-#' @param dataset_expected_obs Expected observations dataset
+#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead.
+#'
+#' @param dataset_ref Expected observations dataset
 #'
 #'   Data frame with the expected observations, e.g., all the expected
 #'   combinations of `PARAMCD`, `PARAM`, `AVISIT`, `AVISITN`, ...
 #'
 #' @param by_vars Grouping variables
 #'
-#'   For each group defined by `by_vars` those observations from `dataset_expected_obs`
+#'   For each group defined by `by_vars` those observations from `dataset_ref`
 #'   are added to the output dataset which do not have a corresponding observation
 #'   in the input dataset.
 #'
@@ -31,7 +33,7 @@
 #'   "TDOSE", PARCAT1 = "OVERALL")`.
 #'
 #' @details For each group (the variables specified in the `by_vars` parameter),
-#' those records from `dataset_expected_obs` that are missing in the input
+#' those records from `dataset_ref` that are missing in the input
 #' dataset are added to the output dataset.
 #'
 #' @return The input dataset with the missed expected observations added for each
@@ -63,7 +65,7 @@
 #'
 #' derive_expected_records(
 #'   dataset = adqs,
-#'   dataset_expected_obs = parm_visit_ref,
+#'   dataset_ref = parm_visit_ref,
 #'   by_vars = exprs(USUBJID, PARAMCD),
 #'   set_values_to = exprs(DTYPE = "DERIVED")
 #' )
@@ -78,35 +80,46 @@
 #'
 #' derive_expected_records(
 #'   dataset = adqs,
-#'   dataset_expected_obs = parm_visit_ref,
+#'   dataset_ref = parm_visit_ref,
 #'   by_vars = exprs(USUBJID, PARAMCD),
 #'   set_values_to = exprs(DTYPE = "DERIVED")
 #' )
 #'
 derive_expected_records <- function(dataset,
                                     dataset_expected_obs,
+                                    dataset_ref,
                                     by_vars = NULL,
                                     set_values_to = NULL) {
+  if (!missing(dataset_expected_obs)) {
+    deprecate_warn(
+      "0.12.0",
+      "derive_expected_records(dataset_expected_obs = )",
+      "derive_expected_records(dataset_ref = )"
+    )
+    assert_data_frame(dataset_expected_obs)
+    dataset_ref <- dataset_expected_obs
+  }
+
   # Check input parameters
   assert_vars(by_vars, optional = TRUE)
-  assert_data_frame(dataset_expected_obs)
+  assert_data_frame(dataset_ref)
   assert_data_frame(
     dataset,
-    required_vars = expr_c(by_vars, chr2vars(colnames(dataset_expected_obs)))
+    required_vars = expr_c(by_vars, chr2vars(colnames(dataset_ref)))
   )
   assert_varval_list(set_values_to, optional = TRUE)
 
   # Derive expected records
-  ## ids: Variables from by_vars but not in dataset_expected_obs
+  ## ids: Variables from by_vars but not in dataset_ref
   ids <- dataset %>%
-    select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>%
+    select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>%
     distinct()
 
   if (ncol(ids) > 0) {
     exp_obsv <- ids %>%
-      crossing(dataset_expected_obs)
+      crossing(dataset_ref)
   } else {
-    exp_obsv <- dataset_expected_obs
+    exp_obsv <- dataset_ref
   } # tmp workaround, update after using tidyr 1.2.0
 
   exp_obs_vars <- exp_obsv %>%
diff --git a/R/derive_locf_records.R b/R/derive_locf_records.R
index e4a2e6ca2c..ad13927788 100644
--- a/R/derive_locf_records.R
+++ b/R/derive_locf_records.R
@@ -8,14 +8,17 @@
 #'   The columns specified by the `by_vars`, `analysis_var`, `order`,
 #'   `keep_vars` parameters are expected.
 #'
-#' @param dataset_expected_obs Expected observations dataset
+#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead.
+#'
+#' @param dataset_ref Expected observations dataset
 #'
 #'   Data frame with all the combinations of `PARAMCD`, `PARAM`, `AVISIT`,
 #'   `AVISITN`, ... which are expected in the dataset is expected.
 #'
+#'
 #' @param by_vars Grouping variables
 #'
-#'   For each group defined by `by_vars` those observations from `dataset_expected_obs`
+#'   For each group defined by `by_vars` those observations from `dataset_ref`
 #'   are added to the output dataset which do not have a corresponding observation
 #'   in the input dataset or for which `analysis_var` is `NA` for the corresponding observation
 #'   in the input dataset.
@@ -40,7 +43,7 @@
 #' @author G Gayatri
 #'
 #' @details For each group (with respect to the variables specified for the
-#' by_vars parameter) those observations from dataset_expected_obs are added to
+#' by_vars parameter) those observations from `dataset_ref` are added to
 #' the output dataset
 #' - which do not have a corresponding observation in the input dataset or
 #' - for which `analysis_var` is NA for the corresponding observation in the input dataset.
@@ -101,8 +104,8 @@
 #' )
 #'
 #' derive_locf_records(
-#'   data = advs,
-#'   dataset_expected_obs = advs_expected_obsv,
+#'   dataset = advs,
+#'   dataset_ref = advs_expected_obsv,
 #'   by_vars = exprs(STUDYID, USUBJID, PARAMCD),
 #'   order = exprs(AVISITN, AVISIT),
 #'   keep_vars = exprs(PARAMN)
@@ -110,10 +113,21 @@
 #'
 derive_locf_records <- function(dataset,
                                 dataset_expected_obs,
+                                dataset_ref,
                                 by_vars,
                                 analysis_var = AVAL,
                                 order,
                                 keep_vars = NULL) {
+  if (!missing(dataset_expected_obs)) {
+    deprecate_warn(
+      "0.12.0",
+      "derive_locf_records(dataset_expected_obs = )",
+      "derive_locf_records(dataset_ref = )"
+    )
+    assert_data_frame(dataset_expected_obs)
+    dataset_ref <- dataset_expected_obs
+  }
+
   #### Input Checking ####
   analysis_var <- assert_symbol(enexpr(analysis_var))
 
@@ -123,27 +137,27 @@ derive_locf_records <- function(dataset,
   assert_expr_list(order)
 
   # Check by_vars and order variables in input datasets
-  assert_data_frame(dataset_expected_obs)
+  assert_data_frame(dataset_ref)
   assert_data_frame(
     dataset,
     required_vars = expr_c(
       by_vars, analysis_var, extract_vars(order), keep_vars,
-      chr2vars(colnames(dataset_expected_obs))
+      chr2vars(colnames(dataset_ref))
     )
   )
 
 
-  #### Prepping 'dataset_expected_obs' ####
+  #### Prepping 'dataset_ref' ####
 
 
   # Get the IDs from input dataset for which the expected observations are to be added
 
   ids <- dataset %>%
-    select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>%
+    select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>%
     distinct()
 
   exp_obsv <- ids %>%
-    crossing(dataset_expected_obs)
+    crossing(dataset_ref)
 
 
 
diff --git a/man/derive_expected_records.Rd b/man/derive_expected_records.Rd
index 75022c4b80..146b48ff60 100644
--- a/man/derive_expected_records.Rd
+++ b/man/derive_expected_records.Rd
@@ -7,6 +7,7 @@
 derive_expected_records(
   dataset,
   dataset_expected_obs,
+  dataset_ref,
   by_vars = NULL,
   set_values_to = NULL
 )
@@ -14,17 +15,19 @@ derive_expected_records(
 \arguments{
 \item{dataset}{Input dataset
 
-A data frame, the columns from \code{dataset_expected_obs} and specified by the
+A data frame, the columns from \code{dataset_ref} and specified by the
 \code{by_vars} parameter are expected.}
 
-\item{dataset_expected_obs}{Expected observations dataset
+\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.}
+
+\item{dataset_ref}{Expected observations dataset
 
 Data frame with the expected observations, e.g., all the expected
 combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT}, \code{AVISITN}, ...}
 
 \item{by_vars}{Grouping variables
 
-For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs}
+For each group defined by \code{by_vars} those observations from \code{dataset_ref}
 are added to the output dataset which do not have a corresponding observation
 in the input dataset.}
 
@@ -51,7 +54,7 @@ contains missing observations.
 }
 \details{
 For each group (the variables specified in the \code{by_vars} parameter),
-those records from \code{dataset_expected_obs} that are missing in the input
+those records from \code{dataset_ref} that are missing in the input
 dataset are added to the output dataset.
 }
 \examples{
@@ -74,7 +77,7 @@ parm_visit_ref <- tribble(
 
 derive_expected_records(
   dataset = adqs,
-  dataset_expected_obs = parm_visit_ref,
+  dataset_ref = parm_visit_ref,
   by_vars = exprs(USUBJID, PARAMCD),
   set_values_to = exprs(DTYPE = "DERIVED")
 )
@@ -89,7 +92,7 @@ parm_visit_ref <- tribble(
 
 derive_expected_records(
   dataset = adqs,
-  dataset_expected_obs = parm_visit_ref,
+  dataset_ref = parm_visit_ref,
   by_vars = exprs(USUBJID, PARAMCD),
   set_values_to = exprs(DTYPE = "DERIVED")
 )
diff --git a/man/derive_locf_records.Rd b/man/derive_locf_records.Rd
index 3ef93a3e51..5f500aaf0b 100644
--- a/man/derive_locf_records.Rd
+++ b/man/derive_locf_records.Rd
@@ -7,6 +7,7 @@
 derive_locf_records(
   dataset,
   dataset_expected_obs,
+  dataset_ref,
   by_vars,
   analysis_var = AVAL,
   order,
@@ -19,14 +20,16 @@ derive_locf_records(
 The columns specified by the \code{by_vars}, \code{analysis_var}, \code{order},
 \code{keep_vars} parameters are expected.}
 
-\item{dataset_expected_obs}{Expected observations dataset
+\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.}
+
+\item{dataset_ref}{Expected observations dataset
 
 Data frame with all the combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT},
 \code{AVISITN}, ... which are expected in the dataset is expected.}
 
 \item{by_vars}{Grouping variables
 
-For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs}
+For each group defined by \code{by_vars} those observations from \code{dataset_ref}
 are added to the output dataset which do not have a corresponding observation
 in the input dataset or for which \code{analysis_var} is \code{NA} for the corresponding observation
 in the input dataset.}
@@ -59,7 +62,7 @@ does not contain observations for missed visits/time points.
 }
 \details{
 For each group (with respect to the variables specified for the
-by_vars parameter) those observations from dataset_expected_obs are added to
+by_vars parameter) those observations from \code{dataset_ref} are added to
 the output dataset
 \itemize{
 \item which do not have a corresponding observation in the input dataset or
@@ -113,8 +116,8 @@ advs_expected_obsv <- tribble(
 )
 
 derive_locf_records(
-  data = advs,
-  dataset_expected_obs = advs_expected_obsv,
+  dataset = advs,
+  dataset_ref = advs_expected_obsv,
   by_vars = exprs(STUDYID, USUBJID, PARAMCD),
   order = exprs(AVISITN, AVISIT),
   keep_vars = exprs(PARAMN)
diff --git a/tests/testthat/test-derive_expected_records.R b/tests/testthat/test-derive_expected_records.R
index 05f0e003bc..0937a4a92c 100644
--- a/tests/testthat/test-derive_expected_records.R
+++ b/tests/testthat/test-derive_expected_records.R
@@ -30,7 +30,7 @@ test_that("derive_expected_records Test 1: missing values in `by_vars`", {
 
   actual_output <- derive_expected_records(
     dataset = input,
-    dataset_expected_obs = expected_obsv,
+    dataset_ref = expected_obsv,
     by_vars = exprs(USUBJID),
     set_values_to = exprs(DTYPE = "DERIVED")
   )
@@ -71,7 +71,7 @@ test_that("derive_expected_records Test 2: `by_vars` = NULL", {
 
   actual_output <- derive_expected_records(
     dataset = input,
-    dataset_expected_obs = expected_obsv,
+    dataset_ref = expected_obsv,
     by_vars = NULL,
     set_values_to = exprs(DTYPE = "DERIVED")
   )
@@ -114,7 +114,7 @@ test_that("derive_expected_records Test 3: visit variables are parameter indepen
 
   actual_output <- derive_expected_records(
     dataset = input,
-    dataset_expected_obs = expected_obsv,
+    dataset_ref = expected_obsv,
     by_vars = exprs(USUBJID, PARAMCD),
     set_values_to = exprs(DTYPE = "DERIVED")
   )
@@ -157,7 +157,7 @@ test_that("derive_expected_records Test 4: visit variables are parameter depende
 
   actual_output <- derive_expected_records(
     dataset = input,
-    dataset_expected_obs = expected_obsv,
+    dataset_ref = expected_obsv,
     by_vars = exprs(USUBJID),
     set_values_to = exprs(DTYPE = "DERIVED")
   )
diff --git a/tests/testthat/test-derive_locf_records.R b/tests/testthat/test-derive_locf_records.R
index 61c9a103a6..9443e970c8 100644
--- a/tests/testthat/test-derive_locf_records.R
+++ b/tests/testthat/test-derive_locf_records.R
@@ -32,7 +32,7 @@ test_that("derive_locf_records Test 1: visits are missing", {
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     order = exprs(AVISITN, AVISIT)
   )
@@ -82,7 +82,7 @@ test_that("derive_locf_records Test 2: some visits have missing AVAL", {
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     order = exprs(AVISITN, AVISIT)
   )
@@ -142,7 +142,7 @@ test_that("derive_locf_records Test 3: visits are missing - and DTYPE already ex
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     order = exprs(AVISITN, AVISIT)
   )
@@ -190,7 +190,7 @@ test_that("derive_locf_records Test 4: visit variables are parameter independent
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     order = exprs(AVISITN, AVISIT)
   )
@@ -246,7 +246,7 @@ test_that("derive_locf_records Test 5: visit variables are parameter dependent",
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     order = exprs(AVISITN, AVISIT)
   )
@@ -290,7 +290,7 @@ test_that("derive_locf_records Test 6: populate VISITNUM for LOCF records", {
 
   actual_output <- derive_locf_records(
     input,
-    dataset_expected_obs = advs_expected_obsv,
+    dataset_ref = advs_expected_obsv,
     by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
     analysis_var = AVALC,
     order = exprs(AVISITN, AVISIT),