From 9adec47c3112e2e740793886f91ef2ed19dcdccd Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 18 Apr 2023 11:27:21 +0100
Subject: [PATCH 01/37] initial rough work on delay discharge

---
 R/add_dd.R | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 R/add_dd.R

diff --git a/R/add_dd.R b/R/add_dd.R
new file mode 100644
index 000000000..1bb55d2d2
--- /dev/null
+++ b/R/add_dd.R
@@ -0,0 +1,93 @@
+#' Add Delay Discharge to working file
+#'
+#' @param data The input data frame
+#' @param year The year being processed
+#'
+#' @return A data frame linking delay discharge cohorts
+#' @export
+#'
+#' @family episode file
+add_dd <- function(data, year) {
+  year_param <- year
+
+  data_chi <- data %>%
+    # Keep records that have a chi, and a cij_marker.
+    dplyr::filter(is.na(chi)) %>%
+    dplyr::filter(recid %in% c("01B", "02B", "04B", "GLS")) %>%
+    # create a copy of the CIJ maker
+    dplyr::mutate(
+      temp_cij_maker = cij_maker
+    ) %>%
+    dplyr::full_join(
+      # Not sure which function to use here. Will change it later
+      haven::read_sav("/conf/hscdiip/SLF_Extracts/Delayed_Discharges/Jul16_Sep22DD_LinkageFile.zsav"),
+      by = "chi"
+    ) %>%
+    # Create an order variable to make DD records appear after others.
+    # but might it be better if recid has levels?
+    dplyr::mutate(
+      order = dplyr::case_when(
+        recid %in% c("00B", "01B", "02B", "04B", "GLS") ~ 1L,
+        recid == "DD" ~ 2L,
+        TRUE ~ NA
+      )
+    ) %>%
+    # Remove any DD records which don't match a chi in the file.
+    dplyr::arrange(chi) %>%
+    dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>%
+    # sort so that DD is roughly where we expect it to fit
+    dplyr::arrange(chi, keydate1_dateformat) %>%
+
+    # Capture the Mental Health delays with no end dates.
+    dplyr::mutate(
+      Flag_8 = dplyr::if_else((
+        chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
+          is.na(keydate2_dateformat) &
+          is.na(lag(keydate2_dateformat)) &
+          keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
+      dplyr::if_else(keydate1_dateformat > (lag(CIJ_start_date)), 2, 1),
+      NA
+    ),
+    temp_cij_maker = dplyr::if_else((
+      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
+        is.na(keydate2_dateformat) &
+        is.na(lag(keydate2_dateformat)) &
+        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
+      dplyr::lag(temp_cij_maker),
+      NA
+    ),
+    CIJ_start_date = dplyr::if_else((
+      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
+        is.na(keydate2_dateformat) &
+        is.na(lag(keydate2_dateformat)) &
+        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
+      dplyr::lag(CIJ_start_date),
+      NA
+    ),
+    CIJ_end_date = dplyr::if_else((
+      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
+        is.na(keydate2_dateformat) &
+        is.na(lag(keydate2_dateformat)) &
+        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
+      dplyr::lag(CIJ_end_date),
+      NA
+    )) %>%
+
+    # Use Min and Max CIJ dates to fill in temp_cij_marker -
+    # where possible - DD episodes with no CIJ.
+
+
+
+
+
+    data_return <- row_bind(
+      data_chi,
+      data %>%
+        dplyr::filter(is.na(chi)) %>%
+        dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))),
+      data %>%
+        dplyr::filter(!is.na(chi))
+    )
+
+  return()
+}

From 8e5ba8010cc05347a4594c9e1cdd29970fc01a62 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 18 Apr 2023 10:31:40 +0000
Subject: [PATCH 02/37] Update documentation

---
 NAMESPACE             |  1 +
 man/add_dd.Rd         | 25 +++++++++++++++++++++++++
 man/add_nsu_cohort.Rd |  1 +
 man/add_ppa_flag.Rd   |  1 +
 4 files changed, 28 insertions(+)
 create mode 100644 man/add_dd.Rd

diff --git a/NAMESPACE b/NAMESPACE
index ce736d376..68de0ab3e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(add_dd)
 export(add_nsu_cohort)
 export(add_ppa_flag)
 export(add_smr_type)
diff --git a/man/add_dd.Rd b/man/add_dd.Rd
new file mode 100644
index 000000000..2dd6685c3
--- /dev/null
+++ b/man/add_dd.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_dd.R
+\name{add_dd}
+\alias{add_dd}
+\title{Add Delay Discharge to working file}
+\usage{
+add_dd(data, year)
+}
+\arguments{
+\item{data}{The input data frame}
+
+\item{year}{The year being processed}
+}
+\value{
+A data frame linking delay discharge cohorts
+}
+\description{
+Add Delay Discharge to working file
+}
+\seealso{
+Other episode file: 
+\code{\link{add_nsu_cohort}()},
+\code{\link{add_ppa_flag}()}
+}
+\concept{episode file}
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index f6fd2df65..e80fe2ede 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -21,6 +21,7 @@ Add NSU cohort to working file
 \code{\link[=get_nsu_path]{get_nsu_path()}}
 
 Other episode file: 
+\code{\link{add_dd}()},
 \code{\link{add_ppa_flag}()}
 }
 \concept{episode file}
diff --git a/man/add_ppa_flag.Rd b/man/add_ppa_flag.Rd
index 9eb82797c..55660352d 100644
--- a/man/add_ppa_flag.Rd
+++ b/man/add_ppa_flag.Rd
@@ -19,6 +19,7 @@ was preventable or not.
 }
 \seealso{
 Other episode file: 
+\code{\link{add_dd}()},
 \code{\link{add_nsu_cohort}()}
 }
 \concept{episode file}

From c02118903a0d731e98398516e68014cdf502bd08 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 19 Apr 2023 18:05:26 +0100
Subject: [PATCH 03/37] some conversion from SPSS

---
 R/add_dd.R | 84 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 1bb55d2d2..0ce225a36 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -37,49 +37,65 @@ add_dd <- function(data, year) {
     dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>%
     # sort so that DD is roughly where we expect it to fit
     dplyr::arrange(chi, keydate1_dateformat) %>%
+    # add row number to restore the order later
+    dplyr::mutate(row_no = dplyr::row_number())
 
-    # Capture the Mental Health delays with no end dates.
-    dplyr::mutate(
-      Flag_8 = dplyr::if_else((
-        chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
-          is.na(keydate2_dateformat) &
-          is.na(lag(keydate2_dateformat)) &
-          keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
-      dplyr::if_else(keydate1_dateformat > (lag(CIJ_start_date)), 2, 1),
-      NA
-    ),
-    temp_cij_maker = dplyr::if_else((
-      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
-        is.na(keydate2_dateformat) &
-        is.na(lag(keydate2_dateformat)) &
-        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
-      dplyr::lag(temp_cij_maker),
-      NA
-    ),
-    CIJ_start_date = dplyr::if_else((
-      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
-        is.na(keydate2_dateformat) &
-        is.na(lag(keydate2_dateformat)) &
-        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
-      dplyr::lag(CIJ_start_date),
-      NA
-    ),
-    CIJ_end_date = dplyr::if_else((
-      chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" &
+  # Capture the Mental Health delays with no end dates.
+  data_chi_1 <- data_chi %>%
+    dplyr::select(
+      chi,
+      recid,
+      keydate1_dateformat,
+      keydate2_dateformat,
+      CIJ_start_date,
+      CIJ_end_date,
+      temp_cij_marker,
+      row_no
+    ) %>%
+
+    dplyr::filter(
+      chi == lag(chi) & recid == "DD" &
+        lag(recid) == "04B" &
         is.na(keydate2_dateformat) &
         is.na(lag(keydate2_dateformat)) &
-        keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))),
-      dplyr::lag(CIJ_end_date),
-      NA
-    )) %>%
+        keydate1_dateformat >= lag(CIJ_start_date) - lubridate::days(1)
+    ) %>%
+    dplyr::mutate(
+      Flag_8 = dplyr::if_else(keydate1_dateformat >= lag(CIJ_start_date), 2, 1),
+      temp_cij_marker = lag(temp_cij_marker),
+      CIJ_start_date = lag(CIJ_start_date),
+      CIJ_end_date = lag(CIJ_end_date)
+    )
+
+  data_chi <- data_chi %>%
+    dplyr::left_join(data_chi_1, suffix = c("", "_redundancy")) %>%
+    dplyr::select(-ends_with("_redundancy"))
+  # As I imagine, this will possibly leave some NA in columns including
+  # CIJ_start_date, CIJ_end_date
+
+  # Use Min and Max CIJ dates to fill in temp_cij_marker -
+  # where possible - DD episodes with no CIJ.
+  ## difficult parts. hard to vectorize it.
+  # data_chi_1 <- data_chi %>%
+  #   dplyr::if_else(
+  #     chi == lag(chi) & is.na(temp_cij_marker),
+  #     Flag_1 = 0,
+  #
+  #   )
+
+  # ## non-vectorized version. for loop
+  # for(ii in 2:max(data_chi$row_no)){
+  #   if(chi[ii] == chi[ii - 1] & is.na(temp_cij_marker[ii])){
+  #
+  #   }
+  # }
 
-    # Use Min and Max CIJ dates to fill in temp_cij_marker -
-    # where possible - DD episodes with no CIJ.
 
 
 
 
 
+    # Eventually, bind non_chi back
     data_return <- row_bind(
       data_chi,
       data %>%

From a3b6d0575e7cbb51e0c8ca9bd223348cfc0aae84 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 19 Apr 2023 17:10:57 +0000
Subject: [PATCH 04/37] Style code

---
 R/add_dd.R | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 0ce225a36..177c5d89b 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -52,7 +52,6 @@ add_dd <- function(data, year) {
       temp_cij_marker,
       row_no
     ) %>%
-
     dplyr::filter(
       chi == lag(chi) & recid == "DD" &
         lag(recid) == "04B" &
@@ -95,15 +94,15 @@ add_dd <- function(data, year) {
 
 
 
-    # Eventually, bind non_chi back
-    data_return <- row_bind(
-      data_chi,
-      data %>%
-        dplyr::filter(is.na(chi)) %>%
-        dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))),
-      data %>%
-        dplyr::filter(!is.na(chi))
-    )
+  # Eventually, bind non_chi back
+  data_return <- row_bind(
+    data_chi,
+    data %>%
+      dplyr::filter(is.na(chi)) %>%
+      dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))),
+    data %>%
+      dplyr::filter(!is.na(chi))
+  )
 
   return()
 }

From d18f061e218c08a1e73044a8af3d8f3a740995be Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 26 Apr 2023 16:44:37 +0100
Subject: [PATCH 05/37] a function of adding delay discharge to episode data

---
 R/add_dd.R | 240 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 157 insertions(+), 83 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 177c5d89b..695487583 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -10,99 +10,173 @@
 add_dd <- function(data, year) {
   year_param <- year
 
-  data_chi <- data %>%
-    # Keep records that have a chi, and a cij_marker.
-    dplyr::filter(is.na(chi)) %>%
-    dplyr::filter(recid %in% c("01B", "02B", "04B", "GLS")) %>%
-    # create a copy of the CIJ maker
+  data = data %>%
+    dplyr::arrange(chi, cij_marker) %>%
     dplyr::mutate(
-      temp_cij_maker = cij_maker
-    ) %>%
-    dplyr::full_join(
-      # Not sure which function to use here. Will change it later
-      haven::read_sav("/conf/hscdiip/SLF_Extracts/Delayed_Discharges/Jul16_Sep22DD_LinkageFile.zsav"),
-      by = "chi"
-    ) %>%
-    # Create an order variable to make DD records appear after others.
-    # but might it be better if recid has levels?
-    dplyr::mutate(
-      order = dplyr::case_when(
-        recid %in% c("00B", "01B", "02B", "04B", "GLS") ~ 1L,
-        recid == "DD" ~ 2L,
-        TRUE ~ NA
-      )
-    ) %>%
-    # Remove any DD records which don't match a chi in the file.
-    dplyr::arrange(chi) %>%
-    dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>%
-    # sort so that DD is roughly where we expect it to fit
-    dplyr::arrange(chi, keydate1_dateformat) %>%
-    # add row number to restore the order later
-    dplyr::mutate(row_no = dplyr::row_number())
-
-  # Capture the Mental Health delays with no end dates.
-  data_chi_1 <- data_chi %>%
-    dplyr::select(
-      chi,
-      recid,
-      keydate1_dateformat,
-      keydate2_dateformat,
-      CIJ_start_date,
-      CIJ_end_date,
-      temp_cij_marker,
-      row_no
-    ) %>%
-    dplyr::filter(
-      chi == lag(chi) & recid == "DD" &
-        lag(recid) == "04B" &
-        is.na(keydate2_dateformat) &
-        is.na(lag(keydate2_dateformat)) &
-        keydate1_dateformat >= lag(CIJ_start_date) - lubridate::days(1)
-    ) %>%
-    dplyr::mutate(
-      Flag_8 = dplyr::if_else(keydate1_dateformat >= lag(CIJ_start_date), 2, 1),
-      temp_cij_marker = lag(temp_cij_marker),
-      CIJ_start_date = lag(CIJ_start_date),
-      CIJ_end_date = lag(CIJ_end_date)
+      cij_start_date_lower = cij_start_date - lubridate::days(1),
+      cij_end_date_upper = cij_end_date + lubridate::days(1)
     )
 
-  data_chi <- data_chi %>%
-    dplyr::left_join(data_chi_1, suffix = c("", "_redundancy")) %>%
-    dplyr::select(-ends_with("_redundancy"))
-  # As I imagine, this will possibly leave some NA in columns including
-  # CIJ_start_date, CIJ_end_date
+  ## handling DD ----
+  dd_data = read_file(get_source_extract_path(year_param, "DD"))
+  by_dd = dplyr::join_by(
+    chi,
+    x$keydate1_dateformat >= y$cij_start_date_lower,
+    x$keydate2_dateformat <= y$cij_end_date_upper
+  )
+  data = dd_data %>%
+    dplyr::inner_join(data,
+                      by_dd,
+                      suffix = c("_dd", "")) %>%
+    dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
+    # remove duplicate columns
+    dplyr::distinct(
+      cij_start_date,
+      cij_end_date,
+      cij_marker,
+      keydate1_dateformat_dd,
+      keydate2_dateformat_dd,
+      .keep_all = TRUE
+    ) %>%
+    # determine DD quality
+    dplyr::mutate(dd_type = dplyr::if_else(
+      is.na(cij_marker),
+      "no-cij",
+      dplyr::case_when(
+        # "1"	"Accurate Match - (1)"
+        # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
+        # "1A"	"Accurate Match (has an assumed  end date) - (1A)"
+        # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
+        # "2"	"Starts in CIJ - (2)"
+        # "2D"	"Starts in CIJ (ends one day after) - (2D)"
+        # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
+        # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
+        # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
+        # "3"	"Ends in CIJ - (3)"
+        # "3D"	"Ends in CIJ (starts one day before) - (3D)"
+        # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
+        # "4"	"Matches unended MH record - (4)"
+        # "4P" "Matches unended MH record (allowing -1 day) - (4P)"
+        # "-" "No Match (We don't keep these)".
+
+        # "1"	"Accurate Match - (1)"
+        keydate1_dateformat_dd >= cij_start_date &
+          keydate2_dateformat_dd <= cij_end_date &
+          !amended_dates ~ "1",
 
-  # Use Min and Max CIJ dates to fill in temp_cij_marker -
-  # where possible - DD episodes with no CIJ.
-  ## difficult parts. hard to vectorize it.
-  # data_chi_1 <- data_chi %>%
-  #   dplyr::if_else(
-  #     chi == lag(chi) & is.na(temp_cij_marker),
-  #     Flag_1 = 0,
-  #
-  #   )
+        # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
+        keydate1_dateformat_dd >= cij_start_date_lower &
+          keydate2_dateformat_dd <= cij_end_date_upper &
+          !amended_dates ~ "1P",
 
-  # ## non-vectorized version. for loop
-  # for(ii in 2:max(data_chi$row_no)){
-  #   if(chi[ii] == chi[ii - 1] & is.na(temp_cij_marker[ii])){
-  #
-  #   }
-  # }
+        # "1A"	"Accurate Match (has an assumed end date) - (1A)"
+        keydate1_dateformat_dd >= cij_start_date &
+          keydate2_dateformat_dd <= cij_end_date &
+          amended_dates ~ "1P",
 
+        # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
+        keydate1_dateformat_dd >= cij_start_date_lower &
+          keydate2_dateformat_dd <= cij_end_date_upper &
+          amended_dates ~ "1AP",
 
+        # "2"	"Starts in CIJ - (2)"
+        keydate1_dateformat_dd >= cij_start_date &
+          keydate1_dateformat_dd <= cij_end_date &
+          keydate2_dateformat_dd >= cij_end_date &
+          !amended_dates ~ "2",
 
+        # "2D"	"Starts in CIJ (ends one day after) - (2D)"
+        keydate1_dateformat_dd >= cij_start_date &
+          keydate1_dateformat_dd <= cij_end_date &
+          keydate2_dateformat_dd >= cij_end_date_upper &
+          !amended_dates ~ "2D",
 
+        # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
+        keydate1_dateformat_dd >= cij_start_date_lower &
+          keydate1_dateformat_dd <= cij_end_date_upper &
+          keydate2_dateformat_dd >= cij_end_date_upper &
+          !amended_dates ~ "2DP",
 
+        # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
+        keydate1_dateformat_dd >= cij_start_date &
+          keydate1_dateformat_dd <= cij_end_date &
+          keydate2_dateformat_dd >= cij_end_date &
+          amended_dates ~ "2A",
 
-  # Eventually, bind non_chi back
-  data_return <- row_bind(
-    data_chi,
-    data %>%
-      dplyr::filter(is.na(chi)) %>%
-      dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))),
-    data %>%
-      dplyr::filter(!is.na(chi))
-  )
+        # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
+        keydate1_dateformat_dd >= cij_start_date_lower &
+          keydate1_dateformat_dd <= cij_end_date_upper &
+          keydate2_dateformat_dd >= cij_end_date_upper &
+          amended_dates ~ "2AP",
+
+        # "3"	"Ends in CIJ - (3)"
+        keydate1_dateformat_dd <= cij_start_date &
+          keydate2_dateformat_dd >= cij_start_date &
+          keydate2_dateformat_dd >= cij_end_date &
+          !amended_dates ~ "3",
+
+        # "3D"	"Ends in CIJ (starts one day before) - (3D)"
+        keydate1_dateformat_dd <= cij_start_date_lower &
+          keydate2_dateformat_dd >= cij_start_date &
+          keydate2_dateformat_dd >= cij_end_date &
+          !amended_dates ~ "3D",
+
+        # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
+        keydate1_dateformat_dd <= cij_start_date_lower &
+          keydate2_dateformat_dd >= cij_start_date_lower &
+          keydate2_dateformat_dd >= cij_end_date_upper &
+          !amended_dates ~ "3DP",
+
+        # "4"	"Matches unended MH record - (4)"
+        recid == "04B" &
+          keydate1_dateformat_dd >= cij_start_date &
+          amended_dates ~ "4",
+
+        # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
+        recid == "04B" &
+          keydate1_dateformat_dd >= cij_start_date_lower &
+          amended_dates ~ "4P",
+
+        # "-" "No Match (We don't keep these)"
+        .default = "-"
+      )
+    )) %>%
+    dplyr::filter(dd_type != "-") %>%
+    dplyr::mutate(smrtype_dd = dplyr::case_when(
+      dd_type %in% c(
+        "1",
+        "1P",
+        "1A",
+        "1AP",
+        "2",
+        "2D",
+        "2DP",
+        "2A",
+        "2AP",
+        "3",
+        "3D",
+        "3DP",
+        "4",
+        "4P"
+      ) ~ "DD-CIJ",
+      dd_type %in% c("no-cij") ~ "DD-No CIJ"
+    )) %>%
+    # tidy up and rename columns to match the format of episode files
+    dplyr::select(
+      chi,
+      recid = recid_dd,
+      keydate1_dateformat = keydate1_dateformat_dd,
+      keydate2_dateformat = keydate2_dateformat_dd,
+      smrtype = smrtype_dd,
+      cij_marker,
+      cij_start_date,
+      cij_end_date,
+      postcode = postcode_dd
+    ) %>%
+    # combind DD with episode data
+    dplyr::bind_rows(data %>% dplyr::select(-c(
+      "cij_start_date_lower", "cij_end_date_upper"
+    )))
 
-  return()
+  return(data)
 }

From f3ba46ff1a9528ed280a4f7d43a32a70cf502050 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 26 Apr 2023 15:51:10 +0000
Subject: [PATCH 06/37] Style code

---
 R/add_dd.R | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 695487583..48280b7be 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -10,7 +10,7 @@
 add_dd <- function(data, year) {
   year_param <- year
 
-  data = data %>%
+  data <- data %>%
     dplyr::arrange(chi, cij_marker) %>%
     dplyr::mutate(
       cij_start_date_lower = cij_start_date - lubridate::days(1),
@@ -18,16 +18,17 @@ add_dd <- function(data, year) {
     )
 
   ## handling DD ----
-  dd_data = read_file(get_source_extract_path(year_param, "DD"))
-  by_dd = dplyr::join_by(
+  dd_data <- read_file(get_source_extract_path(year_param, "DD"))
+  by_dd <- dplyr::join_by(
     chi,
     x$keydate1_dateformat >= y$cij_start_date_lower,
     x$keydate2_dateformat <= y$cij_end_date_upper
   )
-  data = dd_data %>%
+  data <- dd_data %>%
     dplyr::inner_join(data,
-                      by_dd,
-                      suffix = c("_dd", "")) %>%
+      by_dd,
+      suffix = c("_dd", "")
+    ) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
     # remove duplicate columns
     dplyr::distinct(

From 5d00df40e5699094112245ebf9b409615b07bd48 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 26 Apr 2023 17:54:49 +0100
Subject: [PATCH 07/37] Update R/add_dd.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/add_dd.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 48280b7be..ee17d23cf 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -26,7 +26,7 @@ add_dd <- function(data, year) {
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
-      by_dd,
+      by = by_dd,
       suffix = c("_dd", "")
     ) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%

From 75017d9276aadffa8a80942793fc071aad2cf73e Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 26 Apr 2023 17:55:04 +0100
Subject: [PATCH 08/37] Update R/add_dd.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/add_dd.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/add_dd.R b/R/add_dd.R
index ee17d23cf..e49144032 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -32,6 +32,7 @@ add_dd <- function(data, year) {
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
     # remove duplicate columns
     dplyr::distinct(
+      chi,
       cij_start_date,
       cij_end_date,
       cij_marker,

From 957575505237fafd69b483be14453b319a1f9223 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 26 Apr 2023 17:55:37 +0100
Subject: [PATCH 09/37] Update R/add_dd.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/add_dd.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index e49144032..de45f25e0 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -22,7 +22,7 @@ add_dd <- function(data, year) {
   by_dd <- dplyr::join_by(
     chi,
     x$keydate1_dateformat >= y$cij_start_date_lower,
-    x$keydate2_dateformat <= y$cij_end_date_upper
+    x$keydate2_dateformat <= y$<end of the month when the CIJ ends>
   )
   data <- dd_data %>%
     dplyr::inner_join(data,

From 352d4a85bb44dd9eda010457b53a15dd46467114 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 2 May 2023 18:44:09 +0100
Subject: [PATCH 10/37] add_dd functions

---
 R/add_dd.R          | 135 +++++++++++++++++++++++++++++++++++---------
 R/last_date_month.R |  16 ++++++
 2 files changed, 125 insertions(+), 26 deletions(-)
 create mode 100644 R/last_date_month.R

diff --git a/R/add_dd.R b/R/add_dd.R
index de45f25e0..1827ee5d9 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -11,26 +11,50 @@ add_dd <- function(data, year) {
   year_param <- year
 
   data <- data %>%
-    dplyr::arrange(chi, cij_marker) %>%
     dplyr::mutate(
+      # remember to revoke the cij_end_date with dummy_cij_end
       cij_start_date_lower = cij_start_date - lubridate::days(1),
-      cij_end_date_upper = cij_end_date + lubridate::days(1)
+      cij_end_date_upper = cij_end_date + lubridate::days(1),
+      cij_end_month = last_date_month(cij_end_date),
+
+      is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date),
+      dummy_cij_start = dplyr::if_else(
+        is_dummy_cij_start,
+        lubridate::as_date("1900-01-01"),
+        cij_start_date_lower
+      ),
+      is_dummy_cij_end = !is.na(cij_start_date) & is.na(cij_end_date),
+      dummy_cij_end = dplyr::if_else(
+        is_dummy_cij_end,
+        lubridate::today(),
+        cij_end_month
+      )
     )
 
   ## handling DD ----
-  dd_data <- read_file(get_source_extract_path(year_param, "DD"))
+  # no flag for last reported
+  dd_data <-
+    read_file(get_source_extract_path(year_param, "DD")) %>%
+    dplyr::mutate(
+      # remember to revoke the keydate2 and amended_dates with dummy_keydate2
+      is_dummy_keydate2 = is.na(keydate2_dateformat),
+      dummy_keydate2 = dplyr::if_else(is_dummy_keydate2,
+                                      lubridate::today(),
+                                      keydate2_dateformat),
+      dummy_id = dplyr::row_number()
+    )
+
   by_dd <- dplyr::join_by(
     chi,
-    x$keydate1_dateformat >= y$cij_start_date_lower,
-    x$keydate2_dateformat <= y$<end of the month when the CIJ ends>
+    x$keydate1_dateformat >= y$dummy_cij_start,
+    x$dummy_keydate2 <= y$dummy_cij_end
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
-      by = by_dd,
-      suffix = c("_dd", "")
-    ) %>%
+                      by = by_dd,
+                      suffix = c("_dd", "")) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
-    # remove duplicate columns
+    # remove duplicate rows, but still got some duplicate mis-matches
     dplyr::distinct(
       chi,
       cij_start_date,
@@ -40,6 +64,7 @@ add_dd <- function(data, year) {
       keydate2_dateformat_dd,
       .keep_all = TRUE
     ) %>%
+
     # determine DD quality
     dplyr::mutate(dd_type = dplyr::if_else(
       is.na(cij_marker),
@@ -61,6 +86,13 @@ add_dd <- function(data, year) {
         # "4P" "Matches unended MH record (allowing -1 day) - (4P)"
         # "-" "No Match (We don't keep these)".
 
+        # If we use keydate2_dateformat_dd,
+        # we implicitly mean is_dummy_keydate2 needs to be FALSE.
+        # Given that in DD files,
+        # we only keep the records with missing keydate2 for 04B, mental health,
+        # and drop the records with missing keydate2 for other recid,
+        # it should be ok to only use dummy_keydate2 for "4"(s).
+
         # "1"	"Accurate Match - (1)"
         keydate1_dateformat_dd >= cij_start_date &
           keydate2_dateformat_dd <= cij_end_date &
@@ -74,70 +106,84 @@ add_dd <- function(data, year) {
         # "1A"	"Accurate Match (has an assumed end date) - (1A)"
         keydate1_dateformat_dd >= cij_start_date &
           keydate2_dateformat_dd <= cij_end_date &
-          amended_dates ~ "1P",
+          amended_dates ~ "1A",
 
         # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
         keydate1_dateformat_dd >= cij_start_date_lower &
           keydate2_dateformat_dd <= cij_end_date_upper &
           amended_dates ~ "1AP",
 
+        # "1APE"	the CIJ ends during the month but the delay has an end date of the end of the month
+        keydate1_dateformat_dd >= cij_start_date_lower &
+          keydate2_dateformat_dd == cij_end_month &
+          amended_dates ~ "1APE",
+
         # "2"	"Starts in CIJ - (2)"
         keydate1_dateformat_dd >= cij_start_date &
           keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd >= cij_end_date &
+          keydate2_dateformat_dd > cij_end_date &
           !amended_dates ~ "2",
 
         # "2D"	"Starts in CIJ (ends one day after) - (2D)"
         keydate1_dateformat_dd >= cij_start_date &
           keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd >= cij_end_date_upper &
+          keydate2_dateformat_dd > cij_end_date_upper &
           !amended_dates ~ "2D",
 
         # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
         keydate1_dateformat_dd >= cij_start_date_lower &
           keydate1_dateformat_dd <= cij_end_date_upper &
-          keydate2_dateformat_dd >= cij_end_date_upper &
+          keydate2_dateformat_dd > cij_end_date_upper &
           !amended_dates ~ "2DP",
 
         # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
         keydate1_dateformat_dd >= cij_start_date &
           keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd >= cij_end_date &
+          keydate2_dateformat_dd > cij_end_date &
           amended_dates ~ "2A",
 
         # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
         keydate1_dateformat_dd >= cij_start_date_lower &
           keydate1_dateformat_dd <= cij_end_date_upper &
-          keydate2_dateformat_dd >= cij_end_date_upper &
+          keydate2_dateformat_dd > cij_end_date_upper &
+          # keydate2_dateformat_dd == cij_end_month &
           amended_dates ~ "2AP",
 
         # "3"	"Ends in CIJ - (3)"
         keydate1_dateformat_dd <= cij_start_date &
           keydate2_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd >= cij_end_date &
+          keydate2_dateformat_dd <= cij_end_date &
           !amended_dates ~ "3",
 
         # "3D"	"Ends in CIJ (starts one day before) - (3D)"
         keydate1_dateformat_dd <= cij_start_date_lower &
           keydate2_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd >= cij_end_date &
+          keydate2_dateformat_dd <= cij_end_date &
           !amended_dates ~ "3D",
 
         # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
         keydate1_dateformat_dd <= cij_start_date_lower &
           keydate2_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd >= cij_end_date_upper &
+          keydate2_dateformat_dd <= cij_end_date_upper &
           !amended_dates ~ "3DP",
 
+        # "3ADPE"
+        keydate1_dateformat_dd <= cij_start_date_lower &
+          keydate2_dateformat_dd >= cij_start_date_lower &
+          keydate2_dateformat_dd <= cij_end_month &
+          amended_dates ~ "3ADPE",
+
+
+
         # "4"	"Matches unended MH record - (4)"
         recid == "04B" &
           keydate1_dateformat_dd >= cij_start_date &
-          amended_dates ~ "4",
+          is_dummy_cij_end ~ "4",
 
         # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
         recid == "04B" &
           keydate1_dateformat_dd >= cij_start_date_lower &
-          amended_dates ~ "4P",
+          is_dummy_cij_end ~ "4P",
 
         # "-" "No Match (We don't keep these)"
         .default = "-"
@@ -150,6 +196,7 @@ add_dd <- function(data, year) {
         "1P",
         "1A",
         "1AP",
+        "1APE",
         "2",
         "2D",
         "2DP",
@@ -158,6 +205,7 @@ add_dd <- function(data, year) {
         "3",
         "3D",
         "3DP",
+        "3ADPE",
         "4",
         "4P"
       ) ~ "DD-CIJ",
@@ -165,20 +213,55 @@ add_dd <- function(data, year) {
     )) %>%
     # tidy up and rename columns to match the format of episode files
     dplyr::select(
-      chi,
       recid = recid_dd,
+      chi,
       keydate1_dateformat = keydate1_dateformat_dd,
       keydate2_dateformat = keydate2_dateformat_dd,
+      amended_dates,
+      delay_end_reason,
+      primary_delay_reason,
+      primary_delay_reason,
+      hbtreatcode,
+      location,
+      spec,
       smrtype = smrtype_dd,
       cij_marker,
       cij_start_date,
       cij_end_date,
-      postcode = postcode_dd
+      postcode = postcode_dd,
+      dd_responsible_lca,
+      original_admission_date,
+      dd_type
     ) %>%
-    # combind DD with episode data
-    dplyr::bind_rows(data %>% dplyr::select(-c(
-      "cij_start_date_lower", "cij_end_date_upper"
-    )))
+    # combine DD with episode data
+    dplyr::bind_rows(# restore cij_end_date
+      data %>%
+        dplyr::select(
+          -c(
+            "cij_start_date_lower",
+            "cij_end_date_upper",
+            "cij_end_month",
+            "is_dummy_cij_start",
+            "dummy_cij_start",
+            "is_dummy_cij_end",
+            "dummy_cij_end"
+          )
+        ))
+
+  data_summary = data %>%
+    filter(recid == "DD") %>%
+    dplyr::group_by(dd_type) %>%
+    dplyr::summarise(frequency = dplyr::n()) %>%
+    dplyr::mutate(total = nrow(dd_data),
+                  percentage = round(frequency / total * 100, 2))
+
+  data_summary = data.frame(
+    dd_type = "-",
+    frequency = data_summary$total[1] - sum(data_summary$frequency),
+    total = data_summary$total[1]
+  ) %>%
+    dplyr::mutate(percentage = round(frequency/total*100, 2)) %>%
+    dplyr::bind_rows(data_summary)
 
   return(data)
 }
diff --git a/R/last_date_month.R b/R/last_date_month.R
new file mode 100644
index 000000000..96d936320
--- /dev/null
+++ b/R/last_date_month.R
@@ -0,0 +1,16 @@
+#' Return the end date of the month of the given date
+#'
+#' @description Return the end date of the month of the given date
+#'
+#' @param x a date with a date format
+#'
+#' @return a vector of dates of the end date of the FY year
+#' @export
+#'
+#' @examples
+#' last_date_month(lubridate::as_date("2020-02-05"))
+#'
+#' @family date functions
+last_date_month = function(x){
+  return(lubridate::ceiling_date(x, "month") - lubridate::days(1))
+}

From 7eac5224d949f76427d359bffd5d0657987a88b5 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 2 May 2023 17:46:29 +0000
Subject: [PATCH 11/37] Style code

---
 R/add_dd.R          | 29 ++++++++++++++++-------------
 R/last_date_month.R |  2 +-
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 1827ee5d9..1638dac16 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -16,7 +16,6 @@ add_dd <- function(data, year) {
       cij_start_date_lower = cij_start_date - lubridate::days(1),
       cij_end_date_upper = cij_end_date + lubridate::days(1),
       cij_end_month = last_date_month(cij_end_date),
-
       is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date),
       dummy_cij_start = dplyr::if_else(
         is_dummy_cij_start,
@@ -39,8 +38,9 @@ add_dd <- function(data, year) {
       # remember to revoke the keydate2 and amended_dates with dummy_keydate2
       is_dummy_keydate2 = is.na(keydate2_dateformat),
       dummy_keydate2 = dplyr::if_else(is_dummy_keydate2,
-                                      lubridate::today(),
-                                      keydate2_dateformat),
+        lubridate::today(),
+        keydate2_dateformat
+      ),
       dummy_id = dplyr::row_number()
     )
 
@@ -51,8 +51,9 @@ add_dd <- function(data, year) {
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
-                      by = by_dd,
-                      suffix = c("_dd", "")) %>%
+      by = by_dd,
+      suffix = c("_dd", "")
+    ) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
     # remove duplicate rows, but still got some duplicate mis-matches
     dplyr::distinct(
@@ -64,7 +65,6 @@ add_dd <- function(data, year) {
       keydate2_dateformat_dd,
       .keep_all = TRUE
     ) %>%
-
     # determine DD quality
     dplyr::mutate(dd_type = dplyr::if_else(
       is.na(cij_marker),
@@ -234,7 +234,7 @@ add_dd <- function(data, year) {
       dd_type
     ) %>%
     # combine DD with episode data
-    dplyr::bind_rows(# restore cij_end_date
+    dplyr::bind_rows( # restore cij_end_date
       data %>%
         dplyr::select(
           -c(
@@ -246,21 +246,24 @@ add_dd <- function(data, year) {
             "is_dummy_cij_end",
             "dummy_cij_end"
           )
-        ))
+        )
+    )
 
-  data_summary = data %>%
+  data_summary <- data %>%
     filter(recid == "DD") %>%
     dplyr::group_by(dd_type) %>%
     dplyr::summarise(frequency = dplyr::n()) %>%
-    dplyr::mutate(total = nrow(dd_data),
-                  percentage = round(frequency / total * 100, 2))
+    dplyr::mutate(
+      total = nrow(dd_data),
+      percentage = round(frequency / total * 100, 2)
+    )
 
-  data_summary = data.frame(
+  data_summary <- data.frame(
     dd_type = "-",
     frequency = data_summary$total[1] - sum(data_summary$frequency),
     total = data_summary$total[1]
   ) %>%
-    dplyr::mutate(percentage = round(frequency/total*100, 2)) %>%
+    dplyr::mutate(percentage = round(frequency / total * 100, 2)) %>%
     dplyr::bind_rows(data_summary)
 
   return(data)
diff --git a/R/last_date_month.R b/R/last_date_month.R
index 96d936320..471fda031 100644
--- a/R/last_date_month.R
+++ b/R/last_date_month.R
@@ -11,6 +11,6 @@
 #' last_date_month(lubridate::as_date("2020-02-05"))
 #'
 #' @family date functions
-last_date_month = function(x){
+last_date_month <- function(x) {
   return(lubridate::ceiling_date(x, "month") - lubridate::days(1))
 }

From cc5caf0cf005b49a6e341d7eac6737f54491f707 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 3 May 2023 10:50:40 +0100
Subject: [PATCH 12/37] remove duplicated rows when many to many inner join by
 keeping the records that are closest to the cij record

---
 R/add_dd.R | 266 +++++++++++++++++++++++++++++------------------------
 1 file changed, 146 insertions(+), 120 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 1638dac16..725bd83e5 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -66,129 +66,155 @@ add_dd <- function(data, year) {
       .keep_all = TRUE
     ) %>%
     # determine DD quality
-    dplyr::mutate(dd_type = dplyr::if_else(
-      is.na(cij_marker),
-      "no-cij",
-      dplyr::case_when(
-        # "1"	"Accurate Match - (1)"
-        # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
-        # "1A"	"Accurate Match (has an assumed  end date) - (1A)"
-        # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
-        # "2"	"Starts in CIJ - (2)"
-        # "2D"	"Starts in CIJ (ends one day after) - (2D)"
-        # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
-        # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
-        # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
-        # "3"	"Ends in CIJ - (3)"
-        # "3D"	"Ends in CIJ (starts one day before) - (3D)"
-        # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
-        # "4"	"Matches unended MH record - (4)"
-        # "4P" "Matches unended MH record (allowing -1 day) - (4P)"
-        # "-" "No Match (We don't keep these)".
+    dplyr::mutate(
+      dd_type = dplyr::if_else(
+        is.na(cij_marker),
+        "no-cij",
+        dplyr::case_when(
+          # "1"	"Accurate Match - (1)"
+          # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
+          # "1A"	"Accurate Match (has an assumed  end date) - (1A)"
+          # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
+          # "2"	"Starts in CIJ - (2)"
+          # "2D"	"Starts in CIJ (ends one day after) - (2D)"
+          # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
+          # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
+          # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
+          # "3"	"Ends in CIJ - (3)"
+          # "3D"	"Ends in CIJ (starts one day before) - (3D)"
+          # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
+          # "4"	"Matches unended MH record - (4)"
+          # "4P" "Matches unended MH record (allowing -1 day) - (4P)"
+          # "-" "No Match (We don't keep these)".
 
-        # If we use keydate2_dateformat_dd,
-        # we implicitly mean is_dummy_keydate2 needs to be FALSE.
-        # Given that in DD files,
-        # we only keep the records with missing keydate2 for 04B, mental health,
-        # and drop the records with missing keydate2 for other recid,
-        # it should be ok to only use dummy_keydate2 for "4"(s).
+          # If we use keydate2_dateformat_dd,
+          # we implicitly mean is_dummy_keydate2 needs to be FALSE.
+          # Given that in DD files,
+          # we only keep the records with missing keydate2 for 04B, mental health,
+          # and drop the records with missing keydate2 for other recid,
+          # it should be ok to only use dummy_keydate2 for "4"(s).
 
-        # "1"	"Accurate Match - (1)"
-        keydate1_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd <= cij_end_date &
-          !amended_dates ~ "1",
+          # "1"	"Accurate Match - (1)"
+          keydate1_dateformat_dd >= cij_start_date &
+            keydate2_dateformat_dd <= cij_end_date &
+            !amended_dates ~ "1",
 
-        # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
-        keydate1_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd <= cij_end_date_upper &
-          !amended_dates ~ "1P",
+          # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
+          keydate1_dateformat_dd >= cij_start_date_lower &
+            keydate2_dateformat_dd <= cij_end_date_upper &
+            !amended_dates ~ "1P",
 
-        # "1A"	"Accurate Match (has an assumed end date) - (1A)"
-        keydate1_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd <= cij_end_date &
-          amended_dates ~ "1A",
+          # "1A"	"Accurate Match (has an assumed end date) - (1A)"
+          keydate1_dateformat_dd >= cij_start_date &
+            keydate2_dateformat_dd <= cij_end_date &
+            amended_dates ~ "1A",
 
-        # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
-        keydate1_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd <= cij_end_date_upper &
-          amended_dates ~ "1AP",
+          # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
+          keydate1_dateformat_dd >= cij_start_date_lower &
+            keydate2_dateformat_dd <= cij_end_date_upper &
+            amended_dates ~ "1AP",
 
-        # "1APE"	the CIJ ends during the month but the delay has an end date of the end of the month
-        keydate1_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd == cij_end_month &
-          amended_dates ~ "1APE",
+          # "1APE"	the CIJ ends during the month but the delay has an end date of the end of the month
+          keydate1_dateformat_dd >= cij_start_date_lower &
+            keydate2_dateformat_dd == cij_end_month &
+            amended_dates ~ "1APE",
 
-        # "2"	"Starts in CIJ - (2)"
-        keydate1_dateformat_dd >= cij_start_date &
-          keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd > cij_end_date &
-          !amended_dates ~ "2",
+          # "2"	"Starts in CIJ - (2)"
+          keydate1_dateformat_dd >= cij_start_date &
+            keydate1_dateformat_dd <= cij_end_date &
+            keydate2_dateformat_dd > cij_end_date &
+            !amended_dates ~ "2",
 
-        # "2D"	"Starts in CIJ (ends one day after) - (2D)"
-        keydate1_dateformat_dd >= cij_start_date &
-          keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd > cij_end_date_upper &
-          !amended_dates ~ "2D",
+          # "2D"	"Starts in CIJ (ends one day after) - (2D)"
+          keydate1_dateformat_dd >= cij_start_date &
+            keydate1_dateformat_dd <= cij_end_date &
+            keydate2_dateformat_dd > cij_end_date_upper &
+            !amended_dates ~ "2D",
 
-        # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
-        keydate1_dateformat_dd >= cij_start_date_lower &
-          keydate1_dateformat_dd <= cij_end_date_upper &
-          keydate2_dateformat_dd > cij_end_date_upper &
-          !amended_dates ~ "2DP",
+          # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
+          keydate1_dateformat_dd >= cij_start_date_lower &
+            keydate1_dateformat_dd <= cij_end_date_upper &
+            keydate2_dateformat_dd > cij_end_date_upper &
+            !amended_dates ~ "2DP",
 
-        # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
-        keydate1_dateformat_dd >= cij_start_date &
-          keydate1_dateformat_dd <= cij_end_date &
-          keydate2_dateformat_dd > cij_end_date &
-          amended_dates ~ "2A",
+          # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
+          keydate1_dateformat_dd >= cij_start_date &
+            keydate1_dateformat_dd <= cij_end_date &
+            keydate2_dateformat_dd > cij_end_date &
+            amended_dates ~ "2A",
 
-        # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
-        keydate1_dateformat_dd >= cij_start_date_lower &
-          keydate1_dateformat_dd <= cij_end_date_upper &
-          keydate2_dateformat_dd > cij_end_date_upper &
-          # keydate2_dateformat_dd == cij_end_month &
-          amended_dates ~ "2AP",
+          # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
+          keydate1_dateformat_dd >= cij_start_date_lower &
+            keydate1_dateformat_dd <= cij_end_date_upper &
+            keydate2_dateformat_dd > cij_end_date_upper &
+            # keydate2_dateformat_dd == cij_end_month &
+            amended_dates ~ "2AP",
 
-        # "3"	"Ends in CIJ - (3)"
-        keydate1_dateformat_dd <= cij_start_date &
-          keydate2_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd <= cij_end_date &
-          !amended_dates ~ "3",
+          # "3"	"Ends in CIJ - (3)"
+          keydate1_dateformat_dd <= cij_start_date &
+            keydate2_dateformat_dd >= cij_start_date &
+            keydate2_dateformat_dd <= cij_end_date &
+            !amended_dates ~ "3",
 
-        # "3D"	"Ends in CIJ (starts one day before) - (3D)"
-        keydate1_dateformat_dd <= cij_start_date_lower &
-          keydate2_dateformat_dd >= cij_start_date &
-          keydate2_dateformat_dd <= cij_end_date &
-          !amended_dates ~ "3D",
+          # "3D"	"Ends in CIJ (starts one day before) - (3D)"
+          keydate1_dateformat_dd <= cij_start_date_lower &
+            keydate2_dateformat_dd >= cij_start_date &
+            keydate2_dateformat_dd <= cij_end_date &
+            !amended_dates ~ "3D",
 
-        # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
-        keydate1_dateformat_dd <= cij_start_date_lower &
-          keydate2_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd <= cij_end_date_upper &
-          !amended_dates ~ "3DP",
+          # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
+          keydate1_dateformat_dd <= cij_start_date_lower &
+            keydate2_dateformat_dd >= cij_start_date_lower &
+            keydate2_dateformat_dd <= cij_end_date_upper &
+            !amended_dates ~ "3DP",
 
-        # "3ADPE"
-        keydate1_dateformat_dd <= cij_start_date_lower &
-          keydate2_dateformat_dd >= cij_start_date_lower &
-          keydate2_dateformat_dd <= cij_end_month &
-          amended_dates ~ "3ADPE",
+          # "3ADPE"
+          keydate1_dateformat_dd <= cij_start_date_lower &
+            keydate2_dateformat_dd >= cij_start_date_lower &
+            keydate2_dateformat_dd <= cij_end_month &
+            amended_dates ~ "3ADPE",
 
 
 
-        # "4"	"Matches unended MH record - (4)"
-        recid == "04B" &
-          keydate1_dateformat_dd >= cij_start_date &
-          is_dummy_cij_end ~ "4",
+          # "4"	"Matches unended MH record - (4)"
+          recid == "04B" &
+            keydate1_dateformat_dd >= cij_start_date &
+            is_dummy_cij_end ~ "4",
 
-        # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
-        recid == "04B" &
-          keydate1_dateformat_dd >= cij_start_date_lower &
-          is_dummy_cij_end ~ "4P",
+          # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
+          recid == "04B" &
+            keydate1_dateformat_dd >= cij_start_date_lower &
+            is_dummy_cij_end ~ "4P",
 
-        # "-" "No Match (We don't keep these)"
-        .default = "-"
-      )
-    )) %>%
+          # "-" "No Match (We don't keep these)"
+          .default = "-"
+        )
+      ),
+      dd_type = factor(
+        dd_type,
+        levels = c(
+          "1",
+          "1P",
+          "1A",
+          "1AP",
+          "2",
+          "2D",
+          "2DP",
+          "2A",
+          "2AP",
+          "3",
+          "3D",
+          "3DP",
+          "1APE",
+          "3ADPE",
+          "4",
+          "4P",
+          "-"
+        )
+      ),
+      datediff_end = abs(cij_end_date - keydate2_dateformat_dd),
+      datediff_start = cij_start_date - keydate1_dateformat_dd
+    ) %>%
     dplyr::filter(dd_type != "-") %>%
     dplyr::mutate(smrtype_dd = dplyr::case_when(
       dd_type %in% c(
@@ -211,6 +237,23 @@ add_dd <- function(data, year) {
       ) ~ "DD-CIJ",
       dd_type %in% c("no-cij") ~ "DD-No CIJ"
     )) %>%
+
+    # remove duplicated rows when many to many inner join
+    # keep the records that closest to the cij record
+    dplyr::arrange(
+      chi,
+      original_admission_date,
+      keydate1_dateformat_dd,
+      keydate2_dateformat_dd,
+      dummy_id,
+      dd_type,
+      datediff_end,-datediff_start
+    ) %>%
+    dplyr::distinct(postcode,
+                    keydate1_dateformat_dd,
+                    keydate2_dateformat_dd,
+                    .keep_all = TRUE)
+
     # tidy up and rename columns to match the format of episode files
     dplyr::select(
       recid = recid_dd,
@@ -249,22 +292,5 @@ add_dd <- function(data, year) {
         )
     )
 
-  data_summary <- data %>%
-    filter(recid == "DD") %>%
-    dplyr::group_by(dd_type) %>%
-    dplyr::summarise(frequency = dplyr::n()) %>%
-    dplyr::mutate(
-      total = nrow(dd_data),
-      percentage = round(frequency / total * 100, 2)
-    )
-
-  data_summary <- data.frame(
-    dd_type = "-",
-    frequency = data_summary$total[1] - sum(data_summary$frequency),
-    total = data_summary$total[1]
-  ) %>%
-    dplyr::mutate(percentage = round(frequency / total * 100, 2)) %>%
-    dplyr::bind_rows(data_summary)
-
   return(data)
 }

From d17a0d44dbf606838d0bddde99765f09b25f5b99 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 3 May 2023 09:53:43 +0000
Subject: [PATCH 13/37] Style code

---
 R/add_dd.R | 54 +++++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 725bd83e5..9c4630190 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -237,7 +237,6 @@ add_dd <- function(data, year) {
       ) ~ "DD-CIJ",
       dd_type %in% c("no-cij") ~ "DD-No CIJ"
     )) %>%
-
     # remove duplicated rows when many to many inner join
     # keep the records that closest to the cij record
     dplyr::arrange(
@@ -247,35 +246,36 @@ add_dd <- function(data, year) {
       keydate2_dateformat_dd,
       dummy_id,
       dd_type,
-      datediff_end,-datediff_start
+      datediff_end, -datediff_start
     ) %>%
     dplyr::distinct(postcode,
-                    keydate1_dateformat_dd,
-                    keydate2_dateformat_dd,
-                    .keep_all = TRUE)
+      keydate1_dateformat_dd,
+      keydate2_dateformat_dd,
+      .keep_all = TRUE
+    )
 
-    # tidy up and rename columns to match the format of episode files
-    dplyr::select(
-      recid = recid_dd,
-      chi,
-      keydate1_dateformat = keydate1_dateformat_dd,
-      keydate2_dateformat = keydate2_dateformat_dd,
-      amended_dates,
-      delay_end_reason,
-      primary_delay_reason,
-      primary_delay_reason,
-      hbtreatcode,
-      location,
-      spec,
-      smrtype = smrtype_dd,
-      cij_marker,
-      cij_start_date,
-      cij_end_date,
-      postcode = postcode_dd,
-      dd_responsible_lca,
-      original_admission_date,
-      dd_type
-    ) %>%
+  # tidy up and rename columns to match the format of episode files
+  dplyr::select(
+    recid = recid_dd,
+    chi,
+    keydate1_dateformat = keydate1_dateformat_dd,
+    keydate2_dateformat = keydate2_dateformat_dd,
+    amended_dates,
+    delay_end_reason,
+    primary_delay_reason,
+    primary_delay_reason,
+    hbtreatcode,
+    location,
+    spec,
+    smrtype = smrtype_dd,
+    cij_marker,
+    cij_start_date,
+    cij_end_date,
+    postcode = postcode_dd,
+    dd_responsible_lca,
+    original_admission_date,
+    dd_type
+  ) %>%
     # combine DD with episode data
     dplyr::bind_rows( # restore cij_end_date
       data %>%

From c8cb968afcaec8a52990b7820a7ec2e39543c985 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 3 May 2023 12:10:29 +0100
Subject: [PATCH 14/37] fix missing %>%

---
 R/add_dd.R | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 9c4630190..a3a3b2524 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -180,11 +180,13 @@ add_dd <- function(data, year) {
           recid == "04B" &
             keydate1_dateformat_dd >= cij_start_date &
             is_dummy_cij_end ~ "4",
+          # is_dummy_cij_end & is_dummy_keydate2 ~ "4",
 
           # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
           recid == "04B" &
             keydate1_dateformat_dd >= cij_start_date_lower &
             is_dummy_cij_end ~ "4P",
+          # is_dummy_cij_end & is_dummy_keydate2  ~ "4P",
 
           # "-" "No Match (We don't keep these)"
           .default = "-"
@@ -252,32 +254,32 @@ add_dd <- function(data, year) {
       keydate1_dateformat_dd,
       keydate2_dateformat_dd,
       .keep_all = TRUE
-    )
+    ) %>%
 
-  # tidy up and rename columns to match the format of episode files
-  dplyr::select(
-    recid = recid_dd,
-    chi,
-    keydate1_dateformat = keydate1_dateformat_dd,
-    keydate2_dateformat = keydate2_dateformat_dd,
-    amended_dates,
-    delay_end_reason,
-    primary_delay_reason,
-    primary_delay_reason,
-    hbtreatcode,
-    location,
-    spec,
-    smrtype = smrtype_dd,
-    cij_marker,
-    cij_start_date,
-    cij_end_date,
-    postcode = postcode_dd,
-    dd_responsible_lca,
-    original_admission_date,
-    dd_type
-  ) %>%
+    # tidy up and rename columns to match the format of episode files
+    dplyr::select(
+      recid = recid_dd,
+      chi,
+      keydate1_dateformat = keydate1_dateformat_dd,
+      keydate2_dateformat = keydate2_dateformat_dd,
+      amended_dates,
+      delay_end_reason,
+      primary_delay_reason,
+      primary_delay_reason,
+      hbtreatcode,
+      location,
+      spec,
+      smrtype = smrtype_dd,
+      cij_marker,
+      cij_start_date,
+      cij_end_date,
+      postcode = postcode_dd,
+      dd_responsible_lca,
+      original_admission_date,
+      dd_type
+    ) %>%
     # combine DD with episode data
-    dplyr::bind_rows( # restore cij_end_date
+    dplyr::bind_rows(# restore cij_end_date
       data %>%
         dplyr::select(
           -c(
@@ -289,8 +291,7 @@ add_dd <- function(data, year) {
             "is_dummy_cij_end",
             "dummy_cij_end"
           )
-        )
-    )
+        ))
 
   return(data)
 }

From 040f2e31bbbd1368aada0e4b5ad5107fd2941eaf Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 3 May 2023 11:13:43 +0000
Subject: [PATCH 15/37] Update documentation

---
 NAMESPACE                      |  1 +
 man/calculate_stay.Rd          |  1 +
 man/check_quarter_format.Rd    |  1 +
 man/compute_mid_year_age.Rd    |  1 +
 man/convert_date_to_numeric.Rd |  1 +
 man/convert_numeric_to_date.Rd |  1 +
 man/end_fy.Rd                  |  1 +
 man/end_fy_quarter.Rd          |  1 +
 man/end_next_fy_quarter.Rd     |  1 +
 man/fy_interval.Rd             |  1 +
 man/is_date_in_fyyear.Rd       |  1 +
 man/last_date_month.Rd         | 39 ++++++++++++++++++++++++++++++++++
 man/midpoint_fy.Rd             |  1 +
 man/start_fy.Rd                |  1 +
 man/start_fy_quarter.Rd        |  1 +
 man/start_next_fy_quarter.Rd   |  1 +
 16 files changed, 54 insertions(+)
 create mode 100644 man/last_date_month.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 7eef077f3..8b87fca73 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -95,6 +95,7 @@ export(get_year_dir)
 export(is_date_in_fyyear)
 export(is_missing)
 export(la_code_lookup)
+export(last_date_month)
 export(latest_cost_year)
 export(latest_update)
 export(match_on_ltcs)
diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index bb48a2030..78148921c 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -40,6 +40,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/check_quarter_format.Rd b/man/check_quarter_format.Rd
index c49841baf..f0aba7a62 100644
--- a/man/check_quarter_format.Rd
+++ b/man/check_quarter_format.Rd
@@ -27,6 +27,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 0e353b685..ab1fd3c75 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -36,6 +36,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index 1a742e10a..4e0470b01 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -30,6 +30,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index 5173b07a5..0328df141 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -30,6 +30,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 34f579e47..75316104f 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -32,6 +32,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 05a36a761..ebebe2262 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -31,6 +31,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index dd6774af3..d5f0d6088 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -31,6 +31,7 @@ Other date functions:
 \code{\link{end_fy}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index ed971532c..b1ec14440 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -31,6 +31,7 @@ Other date functions:
 \code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index 3faf503e9..702926e4d 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -46,6 +46,7 @@ Other date functions:
 \code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
new file mode 100644
index 000000000..441f04bbf
--- /dev/null
+++ b/man/last_date_month.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/last_date_month.R
+\name{last_date_month}
+\alias{last_date_month}
+\title{Return the end date of the month of the given date}
+\usage{
+last_date_month(x)
+}
+\arguments{
+\item{x}{a date with a date format}
+}
+\value{
+a vector of dates of the end date of the FY year
+}
+\description{
+Return the end date of the month of the given date
+}
+\examples{
+last_date_month(lubridate::as_date("2020-02-05"))
+
+}
+\seealso{
+Other date functions: 
+\code{\link{calculate_stay}()},
+\code{\link{check_quarter_format}()},
+\code{\link{compute_mid_year_age}()},
+\code{\link{convert_date_to_numeric}()},
+\code{\link{convert_numeric_to_date}()},
+\code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
+\code{\link{end_next_fy_quarter}()},
+\code{\link{fy_interval}()},
+\code{\link{is_date_in_fyyear}()},
+\code{\link{midpoint_fy}()},
+\code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
+\code{\link{start_next_fy_quarter}()}
+}
+\concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index d351ae44b..20d83f9c4 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -33,6 +33,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index dd331a36b..02cdbacf3 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -33,6 +33,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index 130d974d0..a58c2d0cf 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -32,6 +32,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index a03c7054d..7382de86f 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -32,6 +32,7 @@ Other date functions:
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
+\code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()}

From 2f0d2513b4332a6f331914bd05e6891bef17603c Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 3 May 2023 11:14:07 +0000
Subject: [PATCH 16/37] Style code

---
 R/add_dd.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index a3a3b2524..1374dc305 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -255,7 +255,6 @@ add_dd <- function(data, year) {
       keydate2_dateformat_dd,
       .keep_all = TRUE
     ) %>%
-
     # tidy up and rename columns to match the format of episode files
     dplyr::select(
       recid = recid_dd,
@@ -279,7 +278,7 @@ add_dd <- function(data, year) {
       dd_type
     ) %>%
     # combine DD with episode data
-    dplyr::bind_rows(# restore cij_end_date
+    dplyr::bind_rows( # restore cij_end_date
       data %>%
         dplyr::select(
           -c(
@@ -291,7 +290,8 @@ add_dd <- function(data, year) {
             "is_dummy_cij_end",
             "dummy_cij_end"
           )
-        ))
+        )
+    )
 
   return(data)
 }

From badcea76bc50cbaa9a1e3080dd1036f4e89380ac Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 3 May 2023 15:56:48 +0100
Subject: [PATCH 17/37] assign 1APE cij_end_date to keydate2_dd

---
 R/add_dd.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index a3a3b2524..1551b943a 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -174,19 +174,15 @@ add_dd <- function(data, year) {
             keydate2_dateformat_dd <= cij_end_month &
             amended_dates ~ "3ADPE",
 
-
-
           # "4"	"Matches unended MH record - (4)"
           recid == "04B" &
             keydate1_dateformat_dd >= cij_start_date &
             is_dummy_cij_end ~ "4",
-          # is_dummy_cij_end & is_dummy_keydate2 ~ "4",
 
           # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
           recid == "04B" &
             keydate1_dateformat_dd >= cij_start_date_lower &
             is_dummy_cij_end ~ "4P",
-          # is_dummy_cij_end & is_dummy_keydate2  ~ "4P",
 
           # "-" "No Match (We don't keep these)"
           .default = "-"
@@ -214,9 +210,18 @@ add_dd <- function(data, year) {
           "-"
         )
       ),
+
+      # For "1APE", assign 1APE cij_end_date to keydate2_dateformat_dd
+      keydate2_dateformat_dd = dplyr::if_else(
+        dd_type == "1APE" | dd_type == "3ADPE",
+        cij_end_date,
+        keydate2_dateformat_dd,
+      ),
+
       datediff_end = abs(cij_end_date - keydate2_dateformat_dd),
       datediff_start = cij_start_date - keydate1_dateformat_dd
     ) %>%
+
     dplyr::filter(dd_type != "-") %>%
     dplyr::mutate(smrtype_dd = dplyr::case_when(
       dd_type %in% c(

From bd0fab5f94a7ed021fb60c77c89fe75b529adbdb Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 3 May 2023 15:00:10 +0000
Subject: [PATCH 18/37] Style code

---
 R/add_dd.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 1c8d75110..bf83835ef 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -217,11 +217,9 @@ add_dd <- function(data, year) {
         cij_end_date,
         keydate2_dateformat_dd,
       ),
-
       datediff_end = abs(cij_end_date - keydate2_dateformat_dd),
       datediff_start = cij_start_date - keydate1_dateformat_dd
     ) %>%
-
     dplyr::filter(dd_type != "-") %>%
     dplyr::mutate(smrtype_dd = dplyr::case_when(
       dd_type %in% c(

From 3787e97f6a2fe72de4b6fae73939f1ceba483b5a Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 9 May 2023 16:39:31 +0100
Subject: [PATCH 19/37] corporate add_dd to run_episode_file

---
 R/add_dd.R           | 126 ++++++++++++++++++++++---------------------
 R/run_episode_file.R |   4 +-
 2 files changed, 68 insertions(+), 62 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index bf83835ef..82f3fe24d 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -34,25 +34,29 @@ add_dd <- function(data, year) {
   # no flag for last reported
   dd_data <-
     read_file(get_source_extract_path(year_param, "DD")) %>%
+    dplyr::rename(
+      record_keydate1 = keydate1_dateformat,
+      record_keydate2 = keydate2_dateformat
+    ) %>%
     dplyr::mutate(
       # remember to revoke the keydate2 and amended_dates with dummy_keydate2
-      is_dummy_keydate2 = is.na(keydate2_dateformat),
+      is_dummy_keydate2 = is.na(record_keydate2),
       dummy_keydate2 = dplyr::if_else(is_dummy_keydate2,
-        lubridate::today(),
-        keydate2_dateformat
+                                      lubridate::today(),
+                                      record_keydate2
       ),
       dummy_id = dplyr::row_number()
     )
 
   by_dd <- dplyr::join_by(
     chi,
-    x$keydate1_dateformat >= y$dummy_cij_start,
+    x$record_keydate1 >= y$dummy_cij_start,
     x$dummy_keydate2 <= y$dummy_cij_end
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
-      by = by_dd,
-      suffix = c("_dd", "")
+                      by = by_dd,
+                      suffix = c("_dd", "")
     ) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
     # remove duplicate rows, but still got some duplicate mis-matches
@@ -61,8 +65,8 @@ add_dd <- function(data, year) {
       cij_start_date,
       cij_end_date,
       cij_marker,
-      keydate1_dateformat_dd,
-      keydate2_dateformat_dd,
+      record_keydate1_dd,
+      record_keydate2_dd,
       .keep_all = TRUE
     ) %>%
     # determine DD quality
@@ -87,7 +91,7 @@ add_dd <- function(data, year) {
           # "4P" "Matches unended MH record (allowing -1 day) - (4P)"
           # "-" "No Match (We don't keep these)".
 
-          # If we use keydate2_dateformat_dd,
+          # If we use record_keydate2_dd,
           # we implicitly mean is_dummy_keydate2 needs to be FALSE.
           # Given that in DD files,
           # we only keep the records with missing keydate2 for 04B, mental health,
@@ -95,93 +99,93 @@ add_dd <- function(data, year) {
           # it should be ok to only use dummy_keydate2 for "4"(s).
 
           # "1"	"Accurate Match - (1)"
-          keydate1_dateformat_dd >= cij_start_date &
-            keydate2_dateformat_dd <= cij_end_date &
+          record_keydate1_dd >= cij_start_date &
+            record_keydate2_dd <= cij_end_date &
             !amended_dates ~ "1",
 
           # "1P"	"Accurate Match (allowing +-1 day) - (1P)"
-          keydate1_dateformat_dd >= cij_start_date_lower &
-            keydate2_dateformat_dd <= cij_end_date_upper &
+          record_keydate1_dd >= cij_start_date_lower &
+            record_keydate2_dd <= cij_end_date_upper &
             !amended_dates ~ "1P",
 
           # "1A"	"Accurate Match (has an assumed end date) - (1A)"
-          keydate1_dateformat_dd >= cij_start_date &
-            keydate2_dateformat_dd <= cij_end_date &
+          record_keydate1_dd >= cij_start_date &
+            record_keydate2_dd <= cij_end_date &
             amended_dates ~ "1A",
 
           # "1AP"	"Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
-          keydate1_dateformat_dd >= cij_start_date_lower &
-            keydate2_dateformat_dd <= cij_end_date_upper &
+          record_keydate1_dd >= cij_start_date_lower &
+            record_keydate2_dd <= cij_end_date_upper &
             amended_dates ~ "1AP",
 
           # "1APE"	the CIJ ends during the month but the delay has an end date of the end of the month
-          keydate1_dateformat_dd >= cij_start_date_lower &
-            keydate2_dateformat_dd == cij_end_month &
+          record_keydate1_dd >= cij_start_date_lower &
+            record_keydate2_dd == cij_end_month &
             amended_dates ~ "1APE",
 
           # "2"	"Starts in CIJ - (2)"
-          keydate1_dateformat_dd >= cij_start_date &
-            keydate1_dateformat_dd <= cij_end_date &
-            keydate2_dateformat_dd > cij_end_date &
+          record_keydate1_dd >= cij_start_date &
+            record_keydate1_dd <= cij_end_date &
+            record_keydate2_dd > cij_end_date &
             !amended_dates ~ "2",
 
           # "2D"	"Starts in CIJ (ends one day after) - (2D)"
-          keydate1_dateformat_dd >= cij_start_date &
-            keydate1_dateformat_dd <= cij_end_date &
-            keydate2_dateformat_dd > cij_end_date_upper &
+          record_keydate1_dd >= cij_start_date &
+            record_keydate1_dd <= cij_end_date &
+            record_keydate2_dd > cij_end_date_upper &
             !amended_dates ~ "2D",
 
           # "2DP"	"Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
-          keydate1_dateformat_dd >= cij_start_date_lower &
-            keydate1_dateformat_dd <= cij_end_date_upper &
-            keydate2_dateformat_dd > cij_end_date_upper &
+          record_keydate1_dd >= cij_start_date_lower &
+            record_keydate1_dd <= cij_end_date_upper &
+            record_keydate2_dd > cij_end_date_upper &
             !amended_dates ~ "2DP",
 
           # "2A"	"Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
-          keydate1_dateformat_dd >= cij_start_date &
-            keydate1_dateformat_dd <= cij_end_date &
-            keydate2_dateformat_dd > cij_end_date &
+          record_keydate1_dd >= cij_start_date &
+            record_keydate1_dd <= cij_end_date &
+            record_keydate2_dd > cij_end_date &
             amended_dates ~ "2A",
 
           # "2AP"	"Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
-          keydate1_dateformat_dd >= cij_start_date_lower &
-            keydate1_dateformat_dd <= cij_end_date_upper &
-            keydate2_dateformat_dd > cij_end_date_upper &
-            # keydate2_dateformat_dd == cij_end_month &
+          record_keydate1_dd >= cij_start_date_lower &
+            record_keydate1_dd <= cij_end_date_upper &
+            record_keydate2_dd > cij_end_date_upper &
+            # record_keydate2_dd == cij_end_month &
             amended_dates ~ "2AP",
 
           # "3"	"Ends in CIJ - (3)"
-          keydate1_dateformat_dd <= cij_start_date &
-            keydate2_dateformat_dd >= cij_start_date &
-            keydate2_dateformat_dd <= cij_end_date &
+          record_keydate1_dd <= cij_start_date &
+            record_keydate2_dd >= cij_start_date &
+            record_keydate2_dd <= cij_end_date &
             !amended_dates ~ "3",
 
           # "3D"	"Ends in CIJ (starts one day before) - (3D)"
-          keydate1_dateformat_dd <= cij_start_date_lower &
-            keydate2_dateformat_dd >= cij_start_date &
-            keydate2_dateformat_dd <= cij_end_date &
+          record_keydate1_dd <= cij_start_date_lower &
+            record_keydate2_dd >= cij_start_date &
+            record_keydate2_dd <= cij_end_date &
             !amended_dates ~ "3D",
 
           # "3DP"	"Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
-          keydate1_dateformat_dd <= cij_start_date_lower &
-            keydate2_dateformat_dd >= cij_start_date_lower &
-            keydate2_dateformat_dd <= cij_end_date_upper &
+          record_keydate1_dd <= cij_start_date_lower &
+            record_keydate2_dd >= cij_start_date_lower &
+            record_keydate2_dd <= cij_end_date_upper &
             !amended_dates ~ "3DP",
 
           # "3ADPE"
-          keydate1_dateformat_dd <= cij_start_date_lower &
-            keydate2_dateformat_dd >= cij_start_date_lower &
-            keydate2_dateformat_dd <= cij_end_month &
+          record_keydate1_dd <= cij_start_date_lower &
+            record_keydate2_dd >= cij_start_date_lower &
+            record_keydate2_dd <= cij_end_month &
             amended_dates ~ "3ADPE",
 
           # "4"	"Matches unended MH record - (4)"
           recid == "04B" &
-            keydate1_dateformat_dd >= cij_start_date &
+            record_keydate1_dd >= cij_start_date &
             is_dummy_cij_end ~ "4",
 
           # "4P"	"Matches unended MH record (allowing -1 day) - (4P)"
           recid == "04B" &
-            keydate1_dateformat_dd >= cij_start_date_lower &
+            record_keydate1_dd >= cij_start_date_lower &
             is_dummy_cij_end ~ "4P",
 
           # "-" "No Match (We don't keep these)"
@@ -211,14 +215,14 @@ add_dd <- function(data, year) {
         )
       ),
 
-      # For "1APE", assign 1APE cij_end_date to keydate2_dateformat_dd
-      keydate2_dateformat_dd = dplyr::if_else(
+      # For "1APE", assign 1APE cij_end_date to record_keydate2_dd
+      record_keydate2_dd = dplyr::if_else(
         dd_type == "1APE" | dd_type == "3ADPE",
         cij_end_date,
-        keydate2_dateformat_dd,
+        record_keydate2_dd,
       ),
-      datediff_end = abs(cij_end_date - keydate2_dateformat_dd),
-      datediff_start = cij_start_date - keydate1_dateformat_dd
+      datediff_end = abs(cij_end_date - record_keydate2_dd),
+      datediff_start = cij_start_date - record_keydate1_dd
     ) %>%
     dplyr::filter(dd_type != "-") %>%
     dplyr::mutate(smrtype_dd = dplyr::case_when(
@@ -247,23 +251,23 @@ add_dd <- function(data, year) {
     dplyr::arrange(
       chi,
       original_admission_date,
-      keydate1_dateformat_dd,
-      keydate2_dateformat_dd,
+      record_keydate1_dd,
+      record_keydate2_dd,
       dummy_id,
       dd_type,
       datediff_end, -datediff_start
     ) %>%
     dplyr::distinct(postcode,
-      keydate1_dateformat_dd,
-      keydate2_dateformat_dd,
-      .keep_all = TRUE
+                    record_keydate1_dd,
+                    record_keydate2_dd,
+                    .keep_all = TRUE
     ) %>%
     # tidy up and rename columns to match the format of episode files
     dplyr::select(
       recid = recid_dd,
       chi,
-      keydate1_dateformat = keydate1_dateformat_dd,
-      keydate2_dateformat = keydate2_dateformat_dd,
+      record_keydate1 = record_keydate1_dd,
+      record_keydate2 = record_keydate2_dd,
       amended_dates,
       delay_end_reason,
       primary_delay_reason,
diff --git a/R/run_episode_file.R b/R/run_episode_file.R
index 87d5f7be6..2fe2c44d9 100644
--- a/R/run_episode_file.R
+++ b/R/run_episode_file.R
@@ -37,6 +37,8 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) {
       "op1a",
       "age",
       "cij_marker",
+      "cij_start_date",
+      "cij_end_date",
       "cij_pattype_code",
       "cij_ipdc",
       "cij_admtype",
@@ -72,7 +74,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) {
     fill_missing_cij_markers() %>%
     create_cost_inc_dna() %>%
     add_ppa_flag() %>%
-    # TODO add Link Delayed Discharge here (From C02)
+    add_dd(year) %>%
     add_nsu_cohort(year) %>%
     match_on_ltcs(year) %>%
     correct_demographics(year) %>%

From 05fda77ac819f8e545ac3e4f6cf32b6333cec431 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 9 May 2023 15:44:42 +0000
Subject: [PATCH 20/37] Style code

---
 R/add_dd.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 82f3fe24d..8d1e8c253 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -42,8 +42,8 @@ add_dd <- function(data, year) {
       # remember to revoke the keydate2 and amended_dates with dummy_keydate2
       is_dummy_keydate2 = is.na(record_keydate2),
       dummy_keydate2 = dplyr::if_else(is_dummy_keydate2,
-                                      lubridate::today(),
-                                      record_keydate2
+        lubridate::today(),
+        record_keydate2
       ),
       dummy_id = dplyr::row_number()
     )
@@ -55,8 +55,8 @@ add_dd <- function(data, year) {
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
-                      by = by_dd,
-                      suffix = c("_dd", "")
+      by = by_dd,
+      suffix = c("_dd", "")
     ) %>%
     dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
     # remove duplicate rows, but still got some duplicate mis-matches
@@ -258,9 +258,9 @@ add_dd <- function(data, year) {
       datediff_end, -datediff_start
     ) %>%
     dplyr::distinct(postcode,
-                    record_keydate1_dd,
-                    record_keydate2_dd,
-                    .keep_all = TRUE
+      record_keydate1_dd,
+      record_keydate2_dd,
+      .keep_all = TRUE
     ) %>%
     # tidy up and rename columns to match the format of episode files
     dplyr::select(

From 56a700a3791e480a04a0be838f597bd7cf357615 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 17 May 2023 08:55:04 +0000
Subject: [PATCH 21/37] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/4989871850/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/639#issuecomment-1551010365

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
---
 .github/actions/spelling/expect.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 4b36c7d8e..9254991ea 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,6 +1,7 @@
 Accom
 admloc
 admtype
+ADPE
 adtf
 arrivalmode
 arth
@@ -30,6 +31,7 @@ cph
 createslf
 dataframe
 datazone
+datediff
 dateformat
 dateop
 datetime
@@ -74,6 +76,7 @@ hbtreatcode
 hbtreatname
 HCP
 HHG
+hhg
 hjust
 hms
 homecare
@@ -161,7 +164,9 @@ smr
 SMRA
 smrtype
 SPARRA
+sparra
 spd
+SPSS
 spss
 stadm
 stefanzweifel
@@ -187,6 +192,7 @@ vline
 xintercept
 xlsx
 yearstay
+YYYYQX
 zihao
 zsav
 zstd

From acf960ee974a27ef5ba3cfa83feaaa4a21033848 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 23 May 2023 07:39:09 +0100
Subject: [PATCH 22/37] Update R/add_dd.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/add_dd.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 8d1e8c253..9ac6c3409 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -13,7 +13,7 @@ add_dd <- function(data, year) {
   data <- data %>%
     dplyr::mutate(
       # remember to revoke the cij_end_date with dummy_cij_end
-      cij_start_date_lower = cij_start_date - lubridate::days(1),
+      cij_start_date_lower = .data$cij_start_date - lubridate::days(1),
       cij_end_date_upper = cij_end_date + lubridate::days(1),
       cij_end_month = last_date_month(cij_end_date),
       is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date),

From 4b5f70f10cb8e506a0c90c6555670b346471259a Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 23 May 2023 10:57:48 +0100
Subject: [PATCH 23/37] select the correct lines for delayed discharge

---
 R/add_dd.R | 59 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 8d1e8c253..b6b99fce7 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -261,31 +261,41 @@ add_dd <- function(data, year) {
       record_keydate1_dd,
       record_keydate2_dd,
       .keep_all = TRUE
-    ) %>%
+    ) #%>%
     # tidy up and rename columns to match the format of episode files
-    dplyr::select(
-      recid = recid_dd,
-      chi,
-      record_keydate1 = record_keydate1_dd,
-      record_keydate2 = record_keydate2_dd,
-      amended_dates,
-      delay_end_reason,
-      primary_delay_reason,
-      primary_delay_reason,
-      hbtreatcode,
-      location,
-      spec,
-      smrtype = smrtype_dd,
-      cij_marker,
-      cij_start_date,
-      cij_end_date,
-      postcode = postcode_dd,
-      dd_responsible_lca,
-      original_admission_date,
-      dd_type
-    ) %>%
+  dplyr::select(
+    "year" = "year_dd",
+    "recid" = "recid_dd",
+    "record_keydate1" = "record_keydate1_dd",
+    "record_keydate2" = "record_keydate2_dd",
+    "smrtype" = "smrtype_dd",
+    "chi",
+    "gender",
+    "dob",
+    "age",
+    "gpprac",
+    "postcode" = "postcode_dd",
+    "lca" = "dd_responsible_lca",# ???
+    "hbtreatcode" = "hbtreatcode_dd",
+    "original_admission_date",
+    "amended_dates",
+    "delay_end_reason",
+    "primary_delay_reason",
+    "secondary_delay_reason",
+    "cij_marker",
+    "cij_start_date",
+    "cij_end_date",
+    "cij_pattype_code",
+    "cij_ipdc",
+    "cij_admtype",
+    "cij_adm_spec",
+    "cij_dis_spec",
+    "location",
+    "spec" = "spec_dd",
+    "dd_type"
+  ) %>%
     # combine DD with episode data
-    dplyr::bind_rows( # restore cij_end_date
+    dplyr::bind_rows(# restore cij_end_date
       data %>%
         dplyr::select(
           -c(
@@ -297,8 +307,7 @@ add_dd <- function(data, year) {
             "is_dummy_cij_end",
             "dummy_cij_end"
           )
-        )
-    )
+        ))
 
   return(data)
 }

From da8449b28241960a60aeb4602b25e55aebd256fd Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 23 May 2023 10:01:40 +0000
Subject: [PATCH 24/37] Style code

---
 R/add_dd.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index f0c07a97c..c4a02a66e 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -261,8 +261,8 @@ add_dd <- function(data, year) {
       record_keydate1_dd,
       record_keydate2_dd,
       .keep_all = TRUE
-    ) #%>%
-    # tidy up and rename columns to match the format of episode files
+    ) # %>%
+  # tidy up and rename columns to match the format of episode files
   dplyr::select(
     "year" = "year_dd",
     "recid" = "recid_dd",
@@ -275,7 +275,7 @@ add_dd <- function(data, year) {
     "age",
     "gpprac",
     "postcode" = "postcode_dd",
-    "lca" = "dd_responsible_lca",# ???
+    "lca" = "dd_responsible_lca", # ???
     "hbtreatcode" = "hbtreatcode_dd",
     "original_admission_date",
     "amended_dates",
@@ -295,7 +295,7 @@ add_dd <- function(data, year) {
     "dd_type"
   ) %>%
     # combine DD with episode data
-    dplyr::bind_rows(# restore cij_end_date
+    dplyr::bind_rows( # restore cij_end_date
       data %>%
         dplyr::select(
           -c(
@@ -307,7 +307,8 @@ add_dd <- function(data, year) {
             "is_dummy_cij_end",
             "dummy_cij_end"
           )
-        ))
+        )
+    )
 
   return(data)
 }

From 42809faf3a285cebf9a01c5a2a09d9ea7e4b3e5a Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 23 May 2023 11:04:13 +0100
Subject: [PATCH 25/37] add_dd lca

---
 R/add_dd.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index f0c07a97c..d7b1979a7 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -275,7 +275,7 @@ add_dd <- function(data, year) {
     "age",
     "gpprac",
     "postcode" = "postcode_dd",
-    "lca" = "dd_responsible_lca",# ???
+    "lca" = "dd_responsible_lca",
     "hbtreatcode" = "hbtreatcode_dd",
     "original_admission_date",
     "amended_dates",

From 92bd0112702c71cb079f895613251459715a78b3 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 23 May 2023 10:11:02 +0000
Subject: [PATCH 26/37] Style code

---
 R/add_dd.R | 64 +++++++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 0f0e5c4fc..d9f141aef 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -262,38 +262,38 @@ add_dd <- function(data, year) {
       record_keydate2_dd,
       .keep_all = TRUE
     ) %>%
-  # tidy up and rename columns to match the format of episode files
-  dplyr::select(
-    "year" = "year_dd",
-    "recid" = "recid_dd",
-    "record_keydate1" = "record_keydate1_dd",
-    "record_keydate2" = "record_keydate2_dd",
-    "smrtype" = "smrtype_dd",
-    "chi",
-    "gender",
-    "dob",
-    "age",
-    "gpprac",
-    "postcode" = "postcode_dd",
-    "lca" = "dd_responsible_lca",
-    "hbtreatcode" = "hbtreatcode_dd",
-    "original_admission_date",
-    "amended_dates",
-    "delay_end_reason",
-    "primary_delay_reason",
-    "secondary_delay_reason",
-    "cij_marker",
-    "cij_start_date",
-    "cij_end_date",
-    "cij_pattype_code",
-    "cij_ipdc",
-    "cij_admtype",
-    "cij_adm_spec",
-    "cij_dis_spec",
-    "location",
-    "spec" = "spec_dd",
-    "dd_type"
-  ) %>%
+    # tidy up and rename columns to match the format of episode files
+    dplyr::select(
+      "year" = "year_dd",
+      "recid" = "recid_dd",
+      "record_keydate1" = "record_keydate1_dd",
+      "record_keydate2" = "record_keydate2_dd",
+      "smrtype" = "smrtype_dd",
+      "chi",
+      "gender",
+      "dob",
+      "age",
+      "gpprac",
+      "postcode" = "postcode_dd",
+      "lca" = "dd_responsible_lca",
+      "hbtreatcode" = "hbtreatcode_dd",
+      "original_admission_date",
+      "amended_dates",
+      "delay_end_reason",
+      "primary_delay_reason",
+      "secondary_delay_reason",
+      "cij_marker",
+      "cij_start_date",
+      "cij_end_date",
+      "cij_pattype_code",
+      "cij_ipdc",
+      "cij_admtype",
+      "cij_adm_spec",
+      "cij_dis_spec",
+      "location",
+      "spec" = "spec_dd",
+      "dd_type"
+    ) %>%
     # combine DD with episode data
     dplyr::bind_rows( # restore cij_end_date
       data %>%

From 4e7c07cf50a4ca81f8b7fc3ed9dfdc1704d3053e Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 30 May 2023 10:06:02 +0100
Subject: [PATCH 27/37] Update R/add_dd.R

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/add_dd.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index d9f141aef..aa6fcbd39 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -14,15 +14,15 @@ add_dd <- function(data, year) {
     dplyr::mutate(
       # remember to revoke the cij_end_date with dummy_cij_end
       cij_start_date_lower = .data$cij_start_date - lubridate::days(1),
-      cij_end_date_upper = cij_end_date + lubridate::days(1),
-      cij_end_month = last_date_month(cij_end_date),
-      is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date),
+      cij_end_date_upper = .data$cij_end_date + lubridate::days(1),
+      cij_end_month = last_date_month(.data$cij_end_date),
+      is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date),
       dummy_cij_start = dplyr::if_else(
         is_dummy_cij_start,
         lubridate::as_date("1900-01-01"),
-        cij_start_date_lower
+        .data$cij_start_date_lower
       ),
-      is_dummy_cij_end = !is.na(cij_start_date) & is.na(cij_end_date),
+      is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date),
       dummy_cij_end = dplyr::if_else(
         is_dummy_cij_end,
         lubridate::today(),

From d70b893edf23dcda1e415edda53f6e0bca43a0de Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 May 2023 11:19:26 +0100
Subject: [PATCH 28/37] remove unnecessary clarity x$ y$

---
 R/add_dd.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index aa6fcbd39..72101c2db 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -50,8 +50,8 @@ add_dd <- function(data, year) {
 
   by_dd <- dplyr::join_by(
     chi,
-    x$record_keydate1 >= y$dummy_cij_start,
-    x$dummy_keydate2 <= y$dummy_cij_end
+    record_keydate1 >= dummy_cij_start,
+    dummy_keydate2 <= dummy_cij_end
   )
   data <- dd_data %>%
     dplyr::inner_join(data,

From c43243f3b49433050a9e9d6970e5b4e1017ea25c Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 09:22:47 +0100
Subject: [PATCH 29/37] Add `.data$` where needed

---
 R/add_dd.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 72101c2db..2f82bcef2 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -18,15 +18,15 @@ add_dd <- function(data, year) {
       cij_end_month = last_date_month(.data$cij_end_date),
       is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date),
       dummy_cij_start = dplyr::if_else(
-        is_dummy_cij_start,
+        .data$is_dummy_cij_start,
         lubridate::as_date("1900-01-01"),
         .data$cij_start_date_lower
       ),
       is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date),
       dummy_cij_end = dplyr::if_else(
-        is_dummy_cij_end,
+        .data$is_dummy_cij_end,
         lubridate::today(),
-        cij_end_month
+        .data$cij_end_month
       )
     )
 
@@ -40,8 +40,8 @@ add_dd <- function(data, year) {
     ) %>%
     dplyr::mutate(
       # remember to revoke the keydate2 and amended_dates with dummy_keydate2
-      is_dummy_keydate2 = is.na(record_keydate2),
-      dummy_keydate2 = dplyr::if_else(is_dummy_keydate2,
+      is_dummy_keydate2 = is.na(.data$record_keydate2),
+      dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2,
         lubridate::today(),
         record_keydate2
       ),

From 5a97d23173ca8cc214496e7995c0c17fd06b481f Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 09:23:27 +0100
Subject: [PATCH 30/37] Add quotes in the rename

Also add a TODO to make this change earlier
---
 R/add_dd.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 2f82bcef2..eade61e6f 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -35,8 +35,9 @@ add_dd <- function(data, year) {
   dd_data <-
     read_file(get_source_extract_path(year_param, "DD")) %>%
     dplyr::rename(
-      record_keydate1 = keydate1_dateformat,
-      record_keydate2 = keydate2_dateformat
+      # TODO Change the name of the variables in the DD extract rather than here.
+      record_keydate1 = "keydate1_dateformat",
+      record_keydate2 = "keydate2_dateformat"
     ) %>%
     dplyr::mutate(
       # remember to revoke the keydate2 and amended_dates with dummy_keydate2

From a23dd7a64901e661a10b2889d0b270f61b4ae5ef Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:27:58 +0100
Subject: [PATCH 31/37] Lint - Make integers explicit

---
 R/add_dd.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index eade61e6f..f65d876dc 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -13,8 +13,8 @@ add_dd <- function(data, year) {
   data <- data %>%
     dplyr::mutate(
       # remember to revoke the cij_end_date with dummy_cij_end
-      cij_start_date_lower = .data$cij_start_date - lubridate::days(1),
-      cij_end_date_upper = .data$cij_end_date + lubridate::days(1),
+      cij_start_date_lower = .data$cij_start_date - lubridate::days(1L),
+      cij_end_date_upper = .data$cij_end_date + lubridate::days(1L),
       cij_end_month = last_date_month(.data$cij_end_date),
       is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date),
       dummy_cij_start = dplyr::if_else(

From c172f13efa57f22ac3a0e7bc2a389c970da1216d Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:29:45 +0100
Subject: [PATCH 32/37] Lint - add `.data$` where relevant

---
 R/add_dd.R | 67 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index f65d876dc..72be657b6 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -44,36 +44,41 @@ add_dd <- function(data, year) {
       is_dummy_keydate2 = is.na(.data$record_keydate2),
       dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2,
         lubridate::today(),
-        record_keydate2
+        .data$record_keydate2
       ),
       dummy_id = dplyr::row_number()
     )
 
   by_dd <- dplyr::join_by(
-    chi,
-    record_keydate1 >= dummy_cij_start,
-    dummy_keydate2 <= dummy_cij_end
+    .data$chi,
+    .data$record_keydate1 >= .data$dummy_cij_start,
+    .data$dummy_keydate2 <= .data$dummy_cij_end
   )
   data <- dd_data %>%
     dplyr::inner_join(data,
       by = by_dd,
       suffix = c("_dd", "")
     ) %>%
-    dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>%
-    # remove duplicate rows, but still got some duplicate mis-matches
+    dplyr::arrange(
+      .data$cij_start_date,
+      .data$cij_end_date,
+      .data$cij_marker,
+      .data$postcode
+    ) %>%
+    # remove duplicate rows, but still got some duplicate mismatches
     dplyr::distinct(
-      chi,
-      cij_start_date,
-      cij_end_date,
-      cij_marker,
-      record_keydate1_dd,
-      record_keydate2_dd,
+      .data$chi,
+      .data$cij_start_date,
+      .data$cij_end_date,
+      .data$cij_marker,
+      .data$record_keydate1_dd,
+      .data$record_keydate2_dd,
       .keep_all = TRUE
     ) %>%
     # determine DD quality
     dplyr::mutate(
       dd_type = dplyr::if_else(
-        is.na(cij_marker),
+        is.na(.data$cij_marker),
         "no-cij",
         dplyr::case_when(
           # "1"	"Accurate Match - (1)"
@@ -194,7 +199,7 @@ add_dd <- function(data, year) {
         )
       ),
       dd_type = factor(
-        dd_type,
+        .data$dd_type,
         levels = c(
           "1",
           "1P",
@@ -218,16 +223,16 @@ add_dd <- function(data, year) {
 
       # For "1APE", assign 1APE cij_end_date to record_keydate2_dd
       record_keydate2_dd = dplyr::if_else(
-        dd_type == "1APE" | dd_type == "3ADPE",
-        cij_end_date,
-        record_keydate2_dd,
+        .data$dd_type == "1APE" | .data$dd_type == "3ADPE",
+        .data$cij_end_date,
+        .data$record_keydate2_dd
       ),
-      datediff_end = abs(cij_end_date - record_keydate2_dd),
-      datediff_start = cij_start_date - record_keydate1_dd
+      datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd),
+      datediff_start = .data$cij_start_date - .data$record_keydate1_dd
     ) %>%
-    dplyr::filter(dd_type != "-") %>%
     dplyr::mutate(smrtype_dd = dplyr::case_when(
       dd_type %in% c(
+    dplyr::filter(.data$dd_type != "-") %>%
         "1",
         "1P",
         "1A",
@@ -250,17 +255,19 @@ add_dd <- function(data, year) {
     # remove duplicated rows when many to many inner join
     # keep the records that closest to the cij record
     dplyr::arrange(
-      chi,
-      original_admission_date,
-      record_keydate1_dd,
-      record_keydate2_dd,
-      dummy_id,
-      dd_type,
-      datediff_end, -datediff_start
+      .data$chi,
+      .data$original_admission_date,
+      .data$record_keydate1_dd,
+      .data$record_keydate2_dd,
+      .data$dummy_id,
+      .data$dd_type,
+      .data$datediff_end,
+      dplyr::desc(.data$datediff_start)
     ) %>%
-    dplyr::distinct(postcode,
-      record_keydate1_dd,
-      record_keydate2_dd,
+    dplyr::distinct(
+      .data$postcode,
+      .data$record_keydate1_dd,
+      .data$record_keydate2_dd,
       .keep_all = TRUE
     ) %>%
     # tidy up and rename columns to match the format of episode files

From 2d5f827ed7c27e1dd39cf42f4d816cff2f6a89ce Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:32:31 +0100
Subject: [PATCH 33/37] Use `case_match` instead of `case_when`

---
 R/add_dd.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/add_dd.R b/R/add_dd.R
index 72be657b6..fc6b52c09 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -230,9 +230,10 @@ add_dd <- function(data, year) {
       datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd),
       datediff_start = .data$cij_start_date - .data$record_keydate1_dd
     ) %>%
-    dplyr::mutate(smrtype_dd = dplyr::case_when(
-      dd_type %in% c(
     dplyr::filter(.data$dd_type != "-") %>%
+    dplyr::mutate(smrtype_dd = dplyr::case_match(
+      .data$dd_type,
+      c(
         "1",
         "1P",
         "1A",
@@ -250,7 +251,7 @@ add_dd <- function(data, year) {
         "4",
         "4P"
       ) ~ "DD-CIJ",
-      dd_type %in% c("no-cij") ~ "DD-No CIJ"
+      "no-cij" ~ "DD-No CIJ"
     )) %>%
     # remove duplicated rows when many to many inner join
     # keep the records that closest to the cij record

From 0bf55da40729037261ac1e4bc140f192ad3292d0 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:46:20 +0100
Subject: [PATCH 34/37] Rename `add_dd()` to `link_delayed_discharge_eps()`

---
 NAMESPACE                                        |  2 +-
 R/add_dd.R                                       |  8 +++++---
 R/run_episode_file.R                             |  2 +-
 man/add_nsu_cohort.Rd                            |  4 ++--
 man/add_ppa_flag.Rd                              |  4 ++--
 man/{add_dd.Rd => link_delayed_discharge_eps.Rd} | 13 +++++++------
 6 files changed, 18 insertions(+), 15 deletions(-)
 rename man/{add_dd.Rd => link_delayed_discharge_eps.Rd} (52%)

diff --git a/NAMESPACE b/NAMESPACE
index 14de9ca55..0343029a6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(add_dd)
 export(add_nsu_cohort)
 export(add_ppa_flag)
 export(check_year_format)
@@ -77,6 +76,7 @@ export(la_code_lookup)
 export(last_date_month)
 export(latest_cost_year)
 export(latest_update)
+export(link_delayed_discharge_eps)
 export(midpoint_fy)
 export(phs_db_connection)
 export(previous_update)
diff --git a/R/add_dd.R b/R/add_dd.R
index fc6b52c09..c148ae236 100644
--- a/R/add_dd.R
+++ b/R/add_dd.R
@@ -1,13 +1,15 @@
-#' Add Delay Discharge to working file
+#' Link  Delayed Discharge to WIP episode file
 #'
 #' @param data The input data frame
 #' @param year The year being processed
 #'
-#' @return A data frame linking delay discharge cohorts
+#' @return A data frame with the delayed discharge cohort added and linked
+#' using the `cij_marker`
+#'
 #' @export
 #'
 #' @family episode file
-add_dd <- function(data, year) {
+link_delayed_discharge_eps <- function(data, year) {
   year_param <- year
 
   data <- data %>%
diff --git a/R/run_episode_file.R b/R/run_episode_file.R
index 2fe2c44d9..b1437f2a4 100644
--- a/R/run_episode_file.R
+++ b/R/run_episode_file.R
@@ -74,7 +74,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) {
     fill_missing_cij_markers() %>%
     create_cost_inc_dna() %>%
     add_ppa_flag() %>%
-    add_dd(year) %>%
+    link_delayed_discharge_eps(year) %>%
     add_nsu_cohort(year) %>%
     match_on_ltcs(year) %>%
     correct_demographics(year) %>%
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index e80fe2ede..723c105e1 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -21,7 +21,7 @@ Add NSU cohort to working file
 \code{\link[=get_nsu_path]{get_nsu_path()}}
 
 Other episode file: 
-\code{\link{add_dd}()},
-\code{\link{add_ppa_flag}()}
+\code{\link{add_ppa_flag}()},
+\code{\link{link_delayed_discharge_eps}()}
 }
 \concept{episode file}
diff --git a/man/add_ppa_flag.Rd b/man/add_ppa_flag.Rd
index 55660352d..8533a09f5 100644
--- a/man/add_ppa_flag.Rd
+++ b/man/add_ppa_flag.Rd
@@ -19,7 +19,7 @@ was preventable or not.
 }
 \seealso{
 Other episode file: 
-\code{\link{add_dd}()},
-\code{\link{add_nsu_cohort}()}
+\code{\link{add_nsu_cohort}()},
+\code{\link{link_delayed_discharge_eps}()}
 }
 \concept{episode file}
diff --git a/man/add_dd.Rd b/man/link_delayed_discharge_eps.Rd
similarity index 52%
rename from man/add_dd.Rd
rename to man/link_delayed_discharge_eps.Rd
index 2dd6685c3..20b09f4bb 100644
--- a/man/add_dd.Rd
+++ b/man/link_delayed_discharge_eps.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/add_dd.R
-\name{add_dd}
-\alias{add_dd}
-\title{Add Delay Discharge to working file}
+\name{link_delayed_discharge_eps}
+\alias{link_delayed_discharge_eps}
+\title{Link  Delayed Discharge to WIP episode file}
 \usage{
-add_dd(data, year)
+link_delayed_discharge_eps(data, year)
 }
 \arguments{
 \item{data}{The input data frame}
@@ -12,10 +12,11 @@ add_dd(data, year)
 \item{year}{The year being processed}
 }
 \value{
-A data frame linking delay discharge cohorts
+A data frame with the delayed discharge cohort added and linked
+using the \code{cij_marker}
 }
 \description{
-Add Delay Discharge to working file
+Link  Delayed Discharge to WIP episode file
 }
 \seealso{
 Other episode file: 

From f189ea3f91a3961ae126bc700620c64bb2d793dc Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:48:43 +0100
Subject: [PATCH 35/37] Rename `add_dd.R` to `link_delayed_discharge_eps.R`

---
 R/{add_dd.R => link_delayed_discharge_eps.R} | 0
 man/link_delayed_discharge_eps.Rd            | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename R/{add_dd.R => link_delayed_discharge_eps.R} (100%)

diff --git a/R/add_dd.R b/R/link_delayed_discharge_eps.R
similarity index 100%
rename from R/add_dd.R
rename to R/link_delayed_discharge_eps.R
diff --git a/man/link_delayed_discharge_eps.Rd b/man/link_delayed_discharge_eps.Rd
index 20b09f4bb..b09d70ad0 100644
--- a/man/link_delayed_discharge_eps.Rd
+++ b/man/link_delayed_discharge_eps.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_dd.R
+% Please edit documentation in R/link_delayed_discharge_eps.R
 \name{link_delayed_discharge_eps}
 \alias{link_delayed_discharge_eps}
 \title{Link  Delayed Discharge to WIP episode file}

From 0e9f0acf2a87123b5dd797aa044795800b1af443 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:49:11 +0100
Subject: [PATCH 36/37] Update the documentation for `last_date_month`

---
 R/last_date_month.R    | 11 +++++------
 man/last_date_month.Rd |  8 ++++----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/R/last_date_month.R b/R/last_date_month.R
index 471fda031..979970f87 100644
--- a/R/last_date_month.R
+++ b/R/last_date_month.R
@@ -1,16 +1,15 @@
 #' Return the end date of the month of the given date
 #'
-#' @description Return the end date of the month of the given date
+#' @param date a date with a date format.
 #'
-#' @param x a date with a date format
+#' @return a vector of dates, giving the last day of the month.
 #'
-#' @return a vector of dates of the end date of the FY year
 #' @export
 #'
 #' @examples
-#' last_date_month(lubridate::as_date("2020-02-05"))
+#' last_date_month(Sys.Date())
 #'
 #' @family date functions
-last_date_month <- function(x) {
-  return(lubridate::ceiling_date(x, "month") - lubridate::days(1))
+last_date_month <- function(date) {
+  return(lubridate::ceiling_date(date, "month") - lubridate::days(1))
 }
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index 441f04bbf..7a0eae26e 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -4,19 +4,19 @@
 \alias{last_date_month}
 \title{Return the end date of the month of the given date}
 \usage{
-last_date_month(x)
+last_date_month(date)
 }
 \arguments{
-\item{x}{a date with a date format}
+\item{date}{a date with a date format.}
 }
 \value{
-a vector of dates of the end date of the FY year
+a vector of dates, giving the last day of the month.
 }
 \description{
 Return the end date of the month of the given date
 }
 \examples{
-last_date_month(lubridate::as_date("2020-02-05"))
+last_date_month(Sys.Date())
 
 }
 \seealso{

From 0ec7ec147c82a8ec5ea303bb4141a9bb617a91a7 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Fri, 2 Jun 2023 12:57:03 +0100
Subject: [PATCH 37/37] Add tests for `last_date_month`

---
 tests/testthat/test-last_date_month.R | 37 +++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 tests/testthat/test-last_date_month.R

diff --git a/tests/testthat/test-last_date_month.R b/tests/testthat/test-last_date_month.R
new file mode 100644
index 000000000..bd2dacf27
--- /dev/null
+++ b/tests/testthat/test-last_date_month.R
@@ -0,0 +1,37 @@
+test_that("last_date_month handles types correctly", {
+  expect_s3_class(last_date_month(Sys.Date()), "Date")
+  expect_s3_class(last_date_month(lubridate::today()), "Date")
+
+  expect_error(last_date_month("2000-01-01"))
+})
+
+test_that("last_date_month is correct", {
+  dates <- as.Date(
+    c(
+      "2020-01-01",
+      "2020-01-30",
+      "2020-02-01",
+      "2020-02-28",
+      "2020-02-29",
+      "2022-02-01",
+      "2022-02-28",
+      "2022-02-29"
+    )
+  )
+
+  expect_equal(
+    last_date_month(dates),
+    as.Date(
+      c(
+        "2020-01-31",
+        "2020-01-31",
+        "2020-02-29",
+        "2020-02-29",
+        "2020-02-29",
+        "2022-02-28",
+        "2022-02-28",
+        NA
+      )
+    )
+  )
+})