From 9adec47c3112e2e740793886f91ef2ed19dcdccd Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 18 Apr 2023 11:27:21 +0100 Subject: [PATCH 01/37] initial rough work on delay discharge --- R/add_dd.R | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 R/add_dd.R diff --git a/R/add_dd.R b/R/add_dd.R new file mode 100644 index 000000000..1bb55d2d2 --- /dev/null +++ b/R/add_dd.R @@ -0,0 +1,93 @@ +#' Add Delay Discharge to working file +#' +#' @param data The input data frame +#' @param year The year being processed +#' +#' @return A data frame linking delay discharge cohorts +#' @export +#' +#' @family episode file +add_dd <- function(data, year) { + year_param <- year + + data_chi <- data %>% + # Keep records that have a chi, and a cij_marker. + dplyr::filter(is.na(chi)) %>% + dplyr::filter(recid %in% c("01B", "02B", "04B", "GLS")) %>% + # create a copy of the CIJ maker + dplyr::mutate( + temp_cij_maker = cij_maker + ) %>% + dplyr::full_join( + # Not sure which function to use here. Will change it later + haven::read_sav("/conf/hscdiip/SLF_Extracts/Delayed_Discharges/Jul16_Sep22DD_LinkageFile.zsav"), + by = "chi" + ) %>% + # Create an order variable to make DD records appear after others. + # but might it be better if recid has levels? + dplyr::mutate( + order = dplyr::case_when( + recid %in% c("00B", "01B", "02B", "04B", "GLS") ~ 1L, + recid == "DD" ~ 2L, + TRUE ~ NA + ) + ) %>% + # Remove any DD records which don't match a chi in the file. + dplyr::arrange(chi) %>% + dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>% + # sort so that DD is roughly where we expect it to fit + dplyr::arrange(chi, keydate1_dateformat) %>% + + # Capture the Mental Health delays with no end dates. + dplyr::mutate( + Flag_8 = dplyr::if_else(( + chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & + is.na(keydate2_dateformat) & + is.na(lag(keydate2_dateformat)) & + keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), + dplyr::if_else(keydate1_dateformat > (lag(CIJ_start_date)), 2, 1), + NA + ), + temp_cij_maker = dplyr::if_else(( + chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & + is.na(keydate2_dateformat) & + is.na(lag(keydate2_dateformat)) & + keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), + dplyr::lag(temp_cij_maker), + NA + ), + CIJ_start_date = dplyr::if_else(( + chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & + is.na(keydate2_dateformat) & + is.na(lag(keydate2_dateformat)) & + keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), + dplyr::lag(CIJ_start_date), + NA + ), + CIJ_end_date = dplyr::if_else(( + chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & + is.na(keydate2_dateformat) & + is.na(lag(keydate2_dateformat)) & + keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), + dplyr::lag(CIJ_end_date), + NA + )) %>% + + # Use Min and Max CIJ dates to fill in temp_cij_marker - + # where possible - DD episodes with no CIJ. + + + + + + data_return <- row_bind( + data_chi, + data %>% + dplyr::filter(is.na(chi)) %>% + dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))), + data %>% + dplyr::filter(!is.na(chi)) + ) + + return() +} From 8e5ba8010cc05347a4594c9e1cdd29970fc01a62 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 18 Apr 2023 10:31:40 +0000 Subject: [PATCH 02/37] Update documentation --- NAMESPACE | 1 + man/add_dd.Rd | 25 +++++++++++++++++++++++++ man/add_nsu_cohort.Rd | 1 + man/add_ppa_flag.Rd | 1 + 4 files changed, 28 insertions(+) create mode 100644 man/add_dd.Rd diff --git a/NAMESPACE b/NAMESPACE index ce736d376..68de0ab3e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(add_dd) export(add_nsu_cohort) export(add_ppa_flag) export(add_smr_type) diff --git a/man/add_dd.Rd b/man/add_dd.Rd new file mode 100644 index 000000000..2dd6685c3 --- /dev/null +++ b/man/add_dd.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_dd.R +\name{add_dd} +\alias{add_dd} +\title{Add Delay Discharge to working file} +\usage{ +add_dd(data, year) +} +\arguments{ +\item{data}{The input data frame} + +\item{year}{The year being processed} +} +\value{ +A data frame linking delay discharge cohorts +} +\description{ +Add Delay Discharge to working file +} +\seealso{ +Other episode file: +\code{\link{add_nsu_cohort}()}, +\code{\link{add_ppa_flag}()} +} +\concept{episode file} diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd index f6fd2df65..e80fe2ede 100644 --- a/man/add_nsu_cohort.Rd +++ b/man/add_nsu_cohort.Rd @@ -21,6 +21,7 @@ Add NSU cohort to working file \code{\link[=get_nsu_path]{get_nsu_path()}} Other episode file: +\code{\link{add_dd}()}, \code{\link{add_ppa_flag}()} } \concept{episode file} diff --git a/man/add_ppa_flag.Rd b/man/add_ppa_flag.Rd index 9eb82797c..55660352d 100644 --- a/man/add_ppa_flag.Rd +++ b/man/add_ppa_flag.Rd @@ -19,6 +19,7 @@ was preventable or not. } \seealso{ Other episode file: +\code{\link{add_dd}()}, \code{\link{add_nsu_cohort}()} } \concept{episode file} From c02118903a0d731e98398516e68014cdf502bd08 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 19 Apr 2023 18:05:26 +0100 Subject: [PATCH 03/37] some conversion from SPSS --- R/add_dd.R | 84 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 1bb55d2d2..0ce225a36 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -37,49 +37,65 @@ add_dd <- function(data, year) { dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>% # sort so that DD is roughly where we expect it to fit dplyr::arrange(chi, keydate1_dateformat) %>% + # add row number to restore the order later + dplyr::mutate(row_no = dplyr::row_number()) - # Capture the Mental Health delays with no end dates. - dplyr::mutate( - Flag_8 = dplyr::if_else(( - chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & - is.na(keydate2_dateformat) & - is.na(lag(keydate2_dateformat)) & - keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), - dplyr::if_else(keydate1_dateformat > (lag(CIJ_start_date)), 2, 1), - NA - ), - temp_cij_maker = dplyr::if_else(( - chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & - is.na(keydate2_dateformat) & - is.na(lag(keydate2_dateformat)) & - keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), - dplyr::lag(temp_cij_maker), - NA - ), - CIJ_start_date = dplyr::if_else(( - chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & - is.na(keydate2_dateformat) & - is.na(lag(keydate2_dateformat)) & - keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), - dplyr::lag(CIJ_start_date), - NA - ), - CIJ_end_date = dplyr::if_else(( - chi = dplyr::lag(chi) & recid == "DD" & lag(recid) == "04B" & + # Capture the Mental Health delays with no end dates. + data_chi_1 <- data_chi %>% + dplyr::select( + chi, + recid, + keydate1_dateformat, + keydate2_dateformat, + CIJ_start_date, + CIJ_end_date, + temp_cij_marker, + row_no + ) %>% + + dplyr::filter( + chi == lag(chi) & recid == "DD" & + lag(recid) == "04B" & is.na(keydate2_dateformat) & is.na(lag(keydate2_dateformat)) & - keydate1_dateformat > (lag(CIJ_start_date) - lubridate::days(1))), - dplyr::lag(CIJ_end_date), - NA - )) %>% + keydate1_dateformat >= lag(CIJ_start_date) - lubridate::days(1) + ) %>% + dplyr::mutate( + Flag_8 = dplyr::if_else(keydate1_dateformat >= lag(CIJ_start_date), 2, 1), + temp_cij_marker = lag(temp_cij_marker), + CIJ_start_date = lag(CIJ_start_date), + CIJ_end_date = lag(CIJ_end_date) + ) + + data_chi <- data_chi %>% + dplyr::left_join(data_chi_1, suffix = c("", "_redundancy")) %>% + dplyr::select(-ends_with("_redundancy")) + # As I imagine, this will possibly leave some NA in columns including + # CIJ_start_date, CIJ_end_date + + # Use Min and Max CIJ dates to fill in temp_cij_marker - + # where possible - DD episodes with no CIJ. + ## difficult parts. hard to vectorize it. + # data_chi_1 <- data_chi %>% + # dplyr::if_else( + # chi == lag(chi) & is.na(temp_cij_marker), + # Flag_1 = 0, + # + # ) + + # ## non-vectorized version. for loop + # for(ii in 2:max(data_chi$row_no)){ + # if(chi[ii] == chi[ii - 1] & is.na(temp_cij_marker[ii])){ + # + # } + # } - # Use Min and Max CIJ dates to fill in temp_cij_marker - - # where possible - DD episodes with no CIJ. + # Eventually, bind non_chi back data_return <- row_bind( data_chi, data %>% From a3b6d0575e7cbb51e0c8ca9bd223348cfc0aae84 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 19 Apr 2023 17:10:57 +0000 Subject: [PATCH 04/37] Style code --- R/add_dd.R | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 0ce225a36..177c5d89b 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -52,7 +52,6 @@ add_dd <- function(data, year) { temp_cij_marker, row_no ) %>% - dplyr::filter( chi == lag(chi) & recid == "DD" & lag(recid) == "04B" & @@ -95,15 +94,15 @@ add_dd <- function(data, year) { - # Eventually, bind non_chi back - data_return <- row_bind( - data_chi, - data %>% - dplyr::filter(is.na(chi)) %>% - dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))), - data %>% - dplyr::filter(!is.na(chi)) - ) + # Eventually, bind non_chi back + data_return <- row_bind( + data_chi, + data %>% + dplyr::filter(is.na(chi)) %>% + dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))), + data %>% + dplyr::filter(!is.na(chi)) + ) return() } From d18f061e218c08a1e73044a8af3d8f3a740995be Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 26 Apr 2023 16:44:37 +0100 Subject: [PATCH 05/37] a function of adding delay discharge to episode data --- R/add_dd.R | 240 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 157 insertions(+), 83 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 177c5d89b..695487583 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -10,99 +10,173 @@ add_dd <- function(data, year) { year_param <- year - data_chi <- data %>% - # Keep records that have a chi, and a cij_marker. - dplyr::filter(is.na(chi)) %>% - dplyr::filter(recid %in% c("01B", "02B", "04B", "GLS")) %>% - # create a copy of the CIJ maker + data = data %>% + dplyr::arrange(chi, cij_marker) %>% dplyr::mutate( - temp_cij_maker = cij_maker - ) %>% - dplyr::full_join( - # Not sure which function to use here. Will change it later - haven::read_sav("/conf/hscdiip/SLF_Extracts/Delayed_Discharges/Jul16_Sep22DD_LinkageFile.zsav"), - by = "chi" - ) %>% - # Create an order variable to make DD records appear after others. - # but might it be better if recid has levels? - dplyr::mutate( - order = dplyr::case_when( - recid %in% c("00B", "01B", "02B", "04B", "GLS") ~ 1L, - recid == "DD" ~ 2L, - TRUE ~ NA - ) - ) %>% - # Remove any DD records which don't match a chi in the file. - dplyr::arrange(chi) %>% - dplyr::filter(!(recid == "DD" & chi != dplyr::lag(chi))) %>% - # sort so that DD is roughly where we expect it to fit - dplyr::arrange(chi, keydate1_dateformat) %>% - # add row number to restore the order later - dplyr::mutate(row_no = dplyr::row_number()) - - # Capture the Mental Health delays with no end dates. - data_chi_1 <- data_chi %>% - dplyr::select( - chi, - recid, - keydate1_dateformat, - keydate2_dateformat, - CIJ_start_date, - CIJ_end_date, - temp_cij_marker, - row_no - ) %>% - dplyr::filter( - chi == lag(chi) & recid == "DD" & - lag(recid) == "04B" & - is.na(keydate2_dateformat) & - is.na(lag(keydate2_dateformat)) & - keydate1_dateformat >= lag(CIJ_start_date) - lubridate::days(1) - ) %>% - dplyr::mutate( - Flag_8 = dplyr::if_else(keydate1_dateformat >= lag(CIJ_start_date), 2, 1), - temp_cij_marker = lag(temp_cij_marker), - CIJ_start_date = lag(CIJ_start_date), - CIJ_end_date = lag(CIJ_end_date) + cij_start_date_lower = cij_start_date - lubridate::days(1), + cij_end_date_upper = cij_end_date + lubridate::days(1) ) - data_chi <- data_chi %>% - dplyr::left_join(data_chi_1, suffix = c("", "_redundancy")) %>% - dplyr::select(-ends_with("_redundancy")) - # As I imagine, this will possibly leave some NA in columns including - # CIJ_start_date, CIJ_end_date + ## handling DD ---- + dd_data = read_file(get_source_extract_path(year_param, "DD")) + by_dd = dplyr::join_by( + chi, + x$keydate1_dateformat >= y$cij_start_date_lower, + x$keydate2_dateformat <= y$cij_end_date_upper + ) + data = dd_data %>% + dplyr::inner_join(data, + by_dd, + suffix = c("_dd", "")) %>% + dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% + # remove duplicate columns + dplyr::distinct( + cij_start_date, + cij_end_date, + cij_marker, + keydate1_dateformat_dd, + keydate2_dateformat_dd, + .keep_all = TRUE + ) %>% + # determine DD quality + dplyr::mutate(dd_type = dplyr::if_else( + is.na(cij_marker), + "no-cij", + dplyr::case_when( + # "1" "Accurate Match - (1)" + # "1P" "Accurate Match (allowing +-1 day) - (1P)" + # "1A" "Accurate Match (has an assumed end date) - (1A)" + # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" + # "2" "Starts in CIJ - (2)" + # "2D" "Starts in CIJ (ends one day after) - (2D)" + # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" + # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" + # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" + # "3" "Ends in CIJ - (3)" + # "3D" "Ends in CIJ (starts one day before) - (3D)" + # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" + # "4" "Matches unended MH record - (4)" + # "4P" "Matches unended MH record (allowing -1 day) - (4P)" + # "-" "No Match (We don't keep these)". + + # "1" "Accurate Match - (1)" + keydate1_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + !amended_dates ~ "1", - # Use Min and Max CIJ dates to fill in temp_cij_marker - - # where possible - DD episodes with no CIJ. - ## difficult parts. hard to vectorize it. - # data_chi_1 <- data_chi %>% - # dplyr::if_else( - # chi == lag(chi) & is.na(temp_cij_marker), - # Flag_1 = 0, - # - # ) + # "1P" "Accurate Match (allowing +-1 day) - (1P)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_date_upper & + !amended_dates ~ "1P", - # ## non-vectorized version. for loop - # for(ii in 2:max(data_chi$row_no)){ - # if(chi[ii] == chi[ii - 1] & is.na(temp_cij_marker[ii])){ - # - # } - # } + # "1A" "Accurate Match (has an assumed end date) - (1A)" + keydate1_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + amended_dates ~ "1P", + # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_date_upper & + amended_dates ~ "1AP", + # "2" "Starts in CIJ - (2)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd >= cij_end_date & + !amended_dates ~ "2", + # "2D" "Starts in CIJ (ends one day after) - (2D)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd >= cij_end_date_upper & + !amended_dates ~ "2D", + # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate1_dateformat_dd <= cij_end_date_upper & + keydate2_dateformat_dd >= cij_end_date_upper & + !amended_dates ~ "2DP", + # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd >= cij_end_date & + amended_dates ~ "2A", - # Eventually, bind non_chi back - data_return <- row_bind( - data_chi, - data %>% - dplyr::filter(is.na(chi)) %>% - dplyr::filter(!(recid %in% c("01B", "02B", "04B", "GLS"))), - data %>% - dplyr::filter(!is.na(chi)) - ) + # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate1_dateformat_dd <= cij_end_date_upper & + keydate2_dateformat_dd >= cij_end_date_upper & + amended_dates ~ "2AP", + + # "3" "Ends in CIJ - (3)" + keydate1_dateformat_dd <= cij_start_date & + keydate2_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd >= cij_end_date & + !amended_dates ~ "3", + + # "3D" "Ends in CIJ (starts one day before) - (3D)" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd >= cij_end_date & + !amended_dates ~ "3D", + + # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd >= cij_end_date_upper & + !amended_dates ~ "3DP", + + # "4" "Matches unended MH record - (4)" + recid == "04B" & + keydate1_dateformat_dd >= cij_start_date & + amended_dates ~ "4", + + # "4P" "Matches unended MH record (allowing -1 day) - (4P)" + recid == "04B" & + keydate1_dateformat_dd >= cij_start_date_lower & + amended_dates ~ "4P", + + # "-" "No Match (We don't keep these)" + .default = "-" + ) + )) %>% + dplyr::filter(dd_type != "-") %>% + dplyr::mutate(smrtype_dd = dplyr::case_when( + dd_type %in% c( + "1", + "1P", + "1A", + "1AP", + "2", + "2D", + "2DP", + "2A", + "2AP", + "3", + "3D", + "3DP", + "4", + "4P" + ) ~ "DD-CIJ", + dd_type %in% c("no-cij") ~ "DD-No CIJ" + )) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + chi, + recid = recid_dd, + keydate1_dateformat = keydate1_dateformat_dd, + keydate2_dateformat = keydate2_dateformat_dd, + smrtype = smrtype_dd, + cij_marker, + cij_start_date, + cij_end_date, + postcode = postcode_dd + ) %>% + # combind DD with episode data + dplyr::bind_rows(data %>% dplyr::select(-c( + "cij_start_date_lower", "cij_end_date_upper" + ))) - return() + return(data) } From f3ba46ff1a9528ed280a4f7d43a32a70cf502050 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 26 Apr 2023 15:51:10 +0000 Subject: [PATCH 06/37] Style code --- R/add_dd.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 695487583..48280b7be 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -10,7 +10,7 @@ add_dd <- function(data, year) { year_param <- year - data = data %>% + data <- data %>% dplyr::arrange(chi, cij_marker) %>% dplyr::mutate( cij_start_date_lower = cij_start_date - lubridate::days(1), @@ -18,16 +18,17 @@ add_dd <- function(data, year) { ) ## handling DD ---- - dd_data = read_file(get_source_extract_path(year_param, "DD")) - by_dd = dplyr::join_by( + dd_data <- read_file(get_source_extract_path(year_param, "DD")) + by_dd <- dplyr::join_by( chi, x$keydate1_dateformat >= y$cij_start_date_lower, x$keydate2_dateformat <= y$cij_end_date_upper ) - data = dd_data %>% + data <- dd_data %>% dplyr::inner_join(data, - by_dd, - suffix = c("_dd", "")) %>% + by_dd, + suffix = c("_dd", "") + ) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% # remove duplicate columns dplyr::distinct( From 5d00df40e5699094112245ebf9b409615b07bd48 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 26 Apr 2023 17:54:49 +0100 Subject: [PATCH 07/37] Update R/add_dd.R Co-authored-by: James McMahon --- R/add_dd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_dd.R b/R/add_dd.R index 48280b7be..ee17d23cf 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -26,7 +26,7 @@ add_dd <- function(data, year) { ) data <- dd_data %>% dplyr::inner_join(data, - by_dd, + by = by_dd, suffix = c("_dd", "") ) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% From 75017d9276aadffa8a80942793fc071aad2cf73e Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 26 Apr 2023 17:55:04 +0100 Subject: [PATCH 08/37] Update R/add_dd.R Co-authored-by: James McMahon --- R/add_dd.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/add_dd.R b/R/add_dd.R index ee17d23cf..e49144032 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -32,6 +32,7 @@ add_dd <- function(data, year) { dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% # remove duplicate columns dplyr::distinct( + chi, cij_start_date, cij_end_date, cij_marker, From 957575505237fafd69b483be14453b319a1f9223 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 26 Apr 2023 17:55:37 +0100 Subject: [PATCH 09/37] Update R/add_dd.R Co-authored-by: James McMahon --- R/add_dd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_dd.R b/R/add_dd.R index e49144032..de45f25e0 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -22,7 +22,7 @@ add_dd <- function(data, year) { by_dd <- dplyr::join_by( chi, x$keydate1_dateformat >= y$cij_start_date_lower, - x$keydate2_dateformat <= y$cij_end_date_upper + x$keydate2_dateformat <= y$ ) data <- dd_data %>% dplyr::inner_join(data, From 352d4a85bb44dd9eda010457b53a15dd46467114 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 2 May 2023 18:44:09 +0100 Subject: [PATCH 10/37] add_dd functions --- R/add_dd.R | 135 +++++++++++++++++++++++++++++++++++--------- R/last_date_month.R | 16 ++++++ 2 files changed, 125 insertions(+), 26 deletions(-) create mode 100644 R/last_date_month.R diff --git a/R/add_dd.R b/R/add_dd.R index de45f25e0..1827ee5d9 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -11,26 +11,50 @@ add_dd <- function(data, year) { year_param <- year data <- data %>% - dplyr::arrange(chi, cij_marker) %>% dplyr::mutate( + # remember to revoke the cij_end_date with dummy_cij_end cij_start_date_lower = cij_start_date - lubridate::days(1), - cij_end_date_upper = cij_end_date + lubridate::days(1) + cij_end_date_upper = cij_end_date + lubridate::days(1), + cij_end_month = last_date_month(cij_end_date), + + is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date), + dummy_cij_start = dplyr::if_else( + is_dummy_cij_start, + lubridate::as_date("1900-01-01"), + cij_start_date_lower + ), + is_dummy_cij_end = !is.na(cij_start_date) & is.na(cij_end_date), + dummy_cij_end = dplyr::if_else( + is_dummy_cij_end, + lubridate::today(), + cij_end_month + ) ) ## handling DD ---- - dd_data <- read_file(get_source_extract_path(year_param, "DD")) + # no flag for last reported + dd_data <- + read_file(get_source_extract_path(year_param, "DD")) %>% + dplyr::mutate( + # remember to revoke the keydate2 and amended_dates with dummy_keydate2 + is_dummy_keydate2 = is.na(keydate2_dateformat), + dummy_keydate2 = dplyr::if_else(is_dummy_keydate2, + lubridate::today(), + keydate2_dateformat), + dummy_id = dplyr::row_number() + ) + by_dd <- dplyr::join_by( chi, - x$keydate1_dateformat >= y$cij_start_date_lower, - x$keydate2_dateformat <= y$ + x$keydate1_dateformat >= y$dummy_cij_start, + x$dummy_keydate2 <= y$dummy_cij_end ) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "") - ) %>% + by = by_dd, + suffix = c("_dd", "")) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% - # remove duplicate columns + # remove duplicate rows, but still got some duplicate mis-matches dplyr::distinct( chi, cij_start_date, @@ -40,6 +64,7 @@ add_dd <- function(data, year) { keydate2_dateformat_dd, .keep_all = TRUE ) %>% + # determine DD quality dplyr::mutate(dd_type = dplyr::if_else( is.na(cij_marker), @@ -61,6 +86,13 @@ add_dd <- function(data, year) { # "4P" "Matches unended MH record (allowing -1 day) - (4P)" # "-" "No Match (We don't keep these)". + # If we use keydate2_dateformat_dd, + # we implicitly mean is_dummy_keydate2 needs to be FALSE. + # Given that in DD files, + # we only keep the records with missing keydate2 for 04B, mental health, + # and drop the records with missing keydate2 for other recid, + # it should be ok to only use dummy_keydate2 for "4"(s). + # "1" "Accurate Match - (1)" keydate1_dateformat_dd >= cij_start_date & keydate2_dateformat_dd <= cij_end_date & @@ -74,70 +106,84 @@ add_dd <- function(data, year) { # "1A" "Accurate Match (has an assumed end date) - (1A)" keydate1_dateformat_dd >= cij_start_date & keydate2_dateformat_dd <= cij_end_date & - amended_dates ~ "1P", + amended_dates ~ "1A", # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" keydate1_dateformat_dd >= cij_start_date_lower & keydate2_dateformat_dd <= cij_end_date_upper & amended_dates ~ "1AP", + # "1APE" the CIJ ends during the month but the delay has an end date of the end of the month + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd == cij_end_month & + amended_dates ~ "1APE", + # "2" "Starts in CIJ - (2)" keydate1_dateformat_dd >= cij_start_date & keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd >= cij_end_date & + keydate2_dateformat_dd > cij_end_date & !amended_dates ~ "2", # "2D" "Starts in CIJ (ends one day after) - (2D)" keydate1_dateformat_dd >= cij_start_date & keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd >= cij_end_date_upper & + keydate2_dateformat_dd > cij_end_date_upper & !amended_dates ~ "2D", # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" keydate1_dateformat_dd >= cij_start_date_lower & keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd >= cij_end_date_upper & + keydate2_dateformat_dd > cij_end_date_upper & !amended_dates ~ "2DP", # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" keydate1_dateformat_dd >= cij_start_date & keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd >= cij_end_date & + keydate2_dateformat_dd > cij_end_date & amended_dates ~ "2A", # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" keydate1_dateformat_dd >= cij_start_date_lower & keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd >= cij_end_date_upper & + keydate2_dateformat_dd > cij_end_date_upper & + # keydate2_dateformat_dd == cij_end_month & amended_dates ~ "2AP", # "3" "Ends in CIJ - (3)" keydate1_dateformat_dd <= cij_start_date & keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd >= cij_end_date & + keydate2_dateformat_dd <= cij_end_date & !amended_dates ~ "3", # "3D" "Ends in CIJ (starts one day before) - (3D)" keydate1_dateformat_dd <= cij_start_date_lower & keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd >= cij_end_date & + keydate2_dateformat_dd <= cij_end_date & !amended_dates ~ "3D", # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" keydate1_dateformat_dd <= cij_start_date_lower & keydate2_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd >= cij_end_date_upper & + keydate2_dateformat_dd <= cij_end_date_upper & !amended_dates ~ "3DP", + # "3ADPE" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_month & + amended_dates ~ "3ADPE", + + + # "4" "Matches unended MH record - (4)" recid == "04B" & keydate1_dateformat_dd >= cij_start_date & - amended_dates ~ "4", + is_dummy_cij_end ~ "4", # "4P" "Matches unended MH record (allowing -1 day) - (4P)" recid == "04B" & keydate1_dateformat_dd >= cij_start_date_lower & - amended_dates ~ "4P", + is_dummy_cij_end ~ "4P", # "-" "No Match (We don't keep these)" .default = "-" @@ -150,6 +196,7 @@ add_dd <- function(data, year) { "1P", "1A", "1AP", + "1APE", "2", "2D", "2DP", @@ -158,6 +205,7 @@ add_dd <- function(data, year) { "3", "3D", "3DP", + "3ADPE", "4", "4P" ) ~ "DD-CIJ", @@ -165,20 +213,55 @@ add_dd <- function(data, year) { )) %>% # tidy up and rename columns to match the format of episode files dplyr::select( - chi, recid = recid_dd, + chi, keydate1_dateformat = keydate1_dateformat_dd, keydate2_dateformat = keydate2_dateformat_dd, + amended_dates, + delay_end_reason, + primary_delay_reason, + primary_delay_reason, + hbtreatcode, + location, + spec, smrtype = smrtype_dd, cij_marker, cij_start_date, cij_end_date, - postcode = postcode_dd + postcode = postcode_dd, + dd_responsible_lca, + original_admission_date, + dd_type ) %>% - # combind DD with episode data - dplyr::bind_rows(data %>% dplyr::select(-c( - "cij_start_date_lower", "cij_end_date_upper" - ))) + # combine DD with episode data + dplyr::bind_rows(# restore cij_end_date + data %>% + dplyr::select( + -c( + "cij_start_date_lower", + "cij_end_date_upper", + "cij_end_month", + "is_dummy_cij_start", + "dummy_cij_start", + "is_dummy_cij_end", + "dummy_cij_end" + ) + )) + + data_summary = data %>% + filter(recid == "DD") %>% + dplyr::group_by(dd_type) %>% + dplyr::summarise(frequency = dplyr::n()) %>% + dplyr::mutate(total = nrow(dd_data), + percentage = round(frequency / total * 100, 2)) + + data_summary = data.frame( + dd_type = "-", + frequency = data_summary$total[1] - sum(data_summary$frequency), + total = data_summary$total[1] + ) %>% + dplyr::mutate(percentage = round(frequency/total*100, 2)) %>% + dplyr::bind_rows(data_summary) return(data) } diff --git a/R/last_date_month.R b/R/last_date_month.R new file mode 100644 index 000000000..96d936320 --- /dev/null +++ b/R/last_date_month.R @@ -0,0 +1,16 @@ +#' Return the end date of the month of the given date +#' +#' @description Return the end date of the month of the given date +#' +#' @param x a date with a date format +#' +#' @return a vector of dates of the end date of the FY year +#' @export +#' +#' @examples +#' last_date_month(lubridate::as_date("2020-02-05")) +#' +#' @family date functions +last_date_month = function(x){ + return(lubridate::ceiling_date(x, "month") - lubridate::days(1)) +} From 7eac5224d949f76427d359bffd5d0657987a88b5 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 2 May 2023 17:46:29 +0000 Subject: [PATCH 11/37] Style code --- R/add_dd.R | 29 ++++++++++++++++------------- R/last_date_month.R | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 1827ee5d9..1638dac16 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -16,7 +16,6 @@ add_dd <- function(data, year) { cij_start_date_lower = cij_start_date - lubridate::days(1), cij_end_date_upper = cij_end_date + lubridate::days(1), cij_end_month = last_date_month(cij_end_date), - is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date), dummy_cij_start = dplyr::if_else( is_dummy_cij_start, @@ -39,8 +38,9 @@ add_dd <- function(data, year) { # remember to revoke the keydate2 and amended_dates with dummy_keydate2 is_dummy_keydate2 = is.na(keydate2_dateformat), dummy_keydate2 = dplyr::if_else(is_dummy_keydate2, - lubridate::today(), - keydate2_dateformat), + lubridate::today(), + keydate2_dateformat + ), dummy_id = dplyr::row_number() ) @@ -51,8 +51,9 @@ add_dd <- function(data, year) { ) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "")) %>% + by = by_dd, + suffix = c("_dd", "") + ) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% # remove duplicate rows, but still got some duplicate mis-matches dplyr::distinct( @@ -64,7 +65,6 @@ add_dd <- function(data, year) { keydate2_dateformat_dd, .keep_all = TRUE ) %>% - # determine DD quality dplyr::mutate(dd_type = dplyr::if_else( is.na(cij_marker), @@ -234,7 +234,7 @@ add_dd <- function(data, year) { dd_type ) %>% # combine DD with episode data - dplyr::bind_rows(# restore cij_end_date + dplyr::bind_rows( # restore cij_end_date data %>% dplyr::select( -c( @@ -246,21 +246,24 @@ add_dd <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - )) + ) + ) - data_summary = data %>% + data_summary <- data %>% filter(recid == "DD") %>% dplyr::group_by(dd_type) %>% dplyr::summarise(frequency = dplyr::n()) %>% - dplyr::mutate(total = nrow(dd_data), - percentage = round(frequency / total * 100, 2)) + dplyr::mutate( + total = nrow(dd_data), + percentage = round(frequency / total * 100, 2) + ) - data_summary = data.frame( + data_summary <- data.frame( dd_type = "-", frequency = data_summary$total[1] - sum(data_summary$frequency), total = data_summary$total[1] ) %>% - dplyr::mutate(percentage = round(frequency/total*100, 2)) %>% + dplyr::mutate(percentage = round(frequency / total * 100, 2)) %>% dplyr::bind_rows(data_summary) return(data) diff --git a/R/last_date_month.R b/R/last_date_month.R index 96d936320..471fda031 100644 --- a/R/last_date_month.R +++ b/R/last_date_month.R @@ -11,6 +11,6 @@ #' last_date_month(lubridate::as_date("2020-02-05")) #' #' @family date functions -last_date_month = function(x){ +last_date_month <- function(x) { return(lubridate::ceiling_date(x, "month") - lubridate::days(1)) } From cc5caf0cf005b49a6e341d7eac6737f54491f707 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 3 May 2023 10:50:40 +0100 Subject: [PATCH 12/37] remove duplicated rows when many to many inner join by keeping the records that are closest to the cij record --- R/add_dd.R | 266 +++++++++++++++++++++++++++++------------------------ 1 file changed, 146 insertions(+), 120 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 1638dac16..725bd83e5 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -66,129 +66,155 @@ add_dd <- function(data, year) { .keep_all = TRUE ) %>% # determine DD quality - dplyr::mutate(dd_type = dplyr::if_else( - is.na(cij_marker), - "no-cij", - dplyr::case_when( - # "1" "Accurate Match - (1)" - # "1P" "Accurate Match (allowing +-1 day) - (1P)" - # "1A" "Accurate Match (has an assumed end date) - (1A)" - # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" - # "2" "Starts in CIJ - (2)" - # "2D" "Starts in CIJ (ends one day after) - (2D)" - # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" - # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" - # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" - # "3" "Ends in CIJ - (3)" - # "3D" "Ends in CIJ (starts one day before) - (3D)" - # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" - # "4" "Matches unended MH record - (4)" - # "4P" "Matches unended MH record (allowing -1 day) - (4P)" - # "-" "No Match (We don't keep these)". + dplyr::mutate( + dd_type = dplyr::if_else( + is.na(cij_marker), + "no-cij", + dplyr::case_when( + # "1" "Accurate Match - (1)" + # "1P" "Accurate Match (allowing +-1 day) - (1P)" + # "1A" "Accurate Match (has an assumed end date) - (1A)" + # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" + # "2" "Starts in CIJ - (2)" + # "2D" "Starts in CIJ (ends one day after) - (2D)" + # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" + # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" + # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" + # "3" "Ends in CIJ - (3)" + # "3D" "Ends in CIJ (starts one day before) - (3D)" + # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" + # "4" "Matches unended MH record - (4)" + # "4P" "Matches unended MH record (allowing -1 day) - (4P)" + # "-" "No Match (We don't keep these)". - # If we use keydate2_dateformat_dd, - # we implicitly mean is_dummy_keydate2 needs to be FALSE. - # Given that in DD files, - # we only keep the records with missing keydate2 for 04B, mental health, - # and drop the records with missing keydate2 for other recid, - # it should be ok to only use dummy_keydate2 for "4"(s). + # If we use keydate2_dateformat_dd, + # we implicitly mean is_dummy_keydate2 needs to be FALSE. + # Given that in DD files, + # we only keep the records with missing keydate2 for 04B, mental health, + # and drop the records with missing keydate2 for other recid, + # it should be ok to only use dummy_keydate2 for "4"(s). - # "1" "Accurate Match - (1)" - keydate1_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & - !amended_dates ~ "1", + # "1" "Accurate Match - (1)" + keydate1_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + !amended_dates ~ "1", - # "1P" "Accurate Match (allowing +-1 day) - (1P)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & - !amended_dates ~ "1P", + # "1P" "Accurate Match (allowing +-1 day) - (1P)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_date_upper & + !amended_dates ~ "1P", - # "1A" "Accurate Match (has an assumed end date) - (1A)" - keydate1_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & - amended_dates ~ "1A", + # "1A" "Accurate Match (has an assumed end date) - (1A)" + keydate1_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + amended_dates ~ "1A", - # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & - amended_dates ~ "1AP", + # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_date_upper & + amended_dates ~ "1AP", - # "1APE" the CIJ ends during the month but the delay has an end date of the end of the month - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd == cij_end_month & - amended_dates ~ "1APE", + # "1APE" the CIJ ends during the month but the delay has an end date of the end of the month + keydate1_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd == cij_end_month & + amended_dates ~ "1APE", - # "2" "Starts in CIJ - (2)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date & - !amended_dates ~ "2", + # "2" "Starts in CIJ - (2)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd > cij_end_date & + !amended_dates ~ "2", - # "2D" "Starts in CIJ (ends one day after) - (2D)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date_upper & - !amended_dates ~ "2D", + # "2D" "Starts in CIJ (ends one day after) - (2D)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd > cij_end_date_upper & + !amended_dates ~ "2D", - # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd > cij_end_date_upper & - !amended_dates ~ "2DP", + # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate1_dateformat_dd <= cij_end_date_upper & + keydate2_dateformat_dd > cij_end_date_upper & + !amended_dates ~ "2DP", - # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date & - amended_dates ~ "2A", + # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" + keydate1_dateformat_dd >= cij_start_date & + keydate1_dateformat_dd <= cij_end_date & + keydate2_dateformat_dd > cij_end_date & + amended_dates ~ "2A", - # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd > cij_end_date_upper & - # keydate2_dateformat_dd == cij_end_month & - amended_dates ~ "2AP", + # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" + keydate1_dateformat_dd >= cij_start_date_lower & + keydate1_dateformat_dd <= cij_end_date_upper & + keydate2_dateformat_dd > cij_end_date_upper & + # keydate2_dateformat_dd == cij_end_month & + amended_dates ~ "2AP", - # "3" "Ends in CIJ - (3)" - keydate1_dateformat_dd <= cij_start_date & - keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & - !amended_dates ~ "3", + # "3" "Ends in CIJ - (3)" + keydate1_dateformat_dd <= cij_start_date & + keydate2_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + !amended_dates ~ "3", - # "3D" "Ends in CIJ (starts one day before) - (3D)" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & - !amended_dates ~ "3D", + # "3D" "Ends in CIJ (starts one day before) - (3D)" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date & + keydate2_dateformat_dd <= cij_end_date & + !amended_dates ~ "3D", - # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & - !amended_dates ~ "3DP", + # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_date_upper & + !amended_dates ~ "3DP", - # "3ADPE" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_month & - amended_dates ~ "3ADPE", + # "3ADPE" + keydate1_dateformat_dd <= cij_start_date_lower & + keydate2_dateformat_dd >= cij_start_date_lower & + keydate2_dateformat_dd <= cij_end_month & + amended_dates ~ "3ADPE", - # "4" "Matches unended MH record - (4)" - recid == "04B" & - keydate1_dateformat_dd >= cij_start_date & - is_dummy_cij_end ~ "4", + # "4" "Matches unended MH record - (4)" + recid == "04B" & + keydate1_dateformat_dd >= cij_start_date & + is_dummy_cij_end ~ "4", - # "4P" "Matches unended MH record (allowing -1 day) - (4P)" - recid == "04B" & - keydate1_dateformat_dd >= cij_start_date_lower & - is_dummy_cij_end ~ "4P", + # "4P" "Matches unended MH record (allowing -1 day) - (4P)" + recid == "04B" & + keydate1_dateformat_dd >= cij_start_date_lower & + is_dummy_cij_end ~ "4P", - # "-" "No Match (We don't keep these)" - .default = "-" - ) - )) %>% + # "-" "No Match (We don't keep these)" + .default = "-" + ) + ), + dd_type = factor( + dd_type, + levels = c( + "1", + "1P", + "1A", + "1AP", + "2", + "2D", + "2DP", + "2A", + "2AP", + "3", + "3D", + "3DP", + "1APE", + "3ADPE", + "4", + "4P", + "-" + ) + ), + datediff_end = abs(cij_end_date - keydate2_dateformat_dd), + datediff_start = cij_start_date - keydate1_dateformat_dd + ) %>% dplyr::filter(dd_type != "-") %>% dplyr::mutate(smrtype_dd = dplyr::case_when( dd_type %in% c( @@ -211,6 +237,23 @@ add_dd <- function(data, year) { ) ~ "DD-CIJ", dd_type %in% c("no-cij") ~ "DD-No CIJ" )) %>% + + # remove duplicated rows when many to many inner join + # keep the records that closest to the cij record + dplyr::arrange( + chi, + original_admission_date, + keydate1_dateformat_dd, + keydate2_dateformat_dd, + dummy_id, + dd_type, + datediff_end,-datediff_start + ) %>% + dplyr::distinct(postcode, + keydate1_dateformat_dd, + keydate2_dateformat_dd, + .keep_all = TRUE) + # tidy up and rename columns to match the format of episode files dplyr::select( recid = recid_dd, @@ -249,22 +292,5 @@ add_dd <- function(data, year) { ) ) - data_summary <- data %>% - filter(recid == "DD") %>% - dplyr::group_by(dd_type) %>% - dplyr::summarise(frequency = dplyr::n()) %>% - dplyr::mutate( - total = nrow(dd_data), - percentage = round(frequency / total * 100, 2) - ) - - data_summary <- data.frame( - dd_type = "-", - frequency = data_summary$total[1] - sum(data_summary$frequency), - total = data_summary$total[1] - ) %>% - dplyr::mutate(percentage = round(frequency / total * 100, 2)) %>% - dplyr::bind_rows(data_summary) - return(data) } From d17a0d44dbf606838d0bddde99765f09b25f5b99 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 3 May 2023 09:53:43 +0000 Subject: [PATCH 13/37] Style code --- R/add_dd.R | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 725bd83e5..9c4630190 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -237,7 +237,6 @@ add_dd <- function(data, year) { ) ~ "DD-CIJ", dd_type %in% c("no-cij") ~ "DD-No CIJ" )) %>% - # remove duplicated rows when many to many inner join # keep the records that closest to the cij record dplyr::arrange( @@ -247,35 +246,36 @@ add_dd <- function(data, year) { keydate2_dateformat_dd, dummy_id, dd_type, - datediff_end,-datediff_start + datediff_end, -datediff_start ) %>% dplyr::distinct(postcode, - keydate1_dateformat_dd, - keydate2_dateformat_dd, - .keep_all = TRUE) + keydate1_dateformat_dd, + keydate2_dateformat_dd, + .keep_all = TRUE + ) - # tidy up and rename columns to match the format of episode files - dplyr::select( - recid = recid_dd, - chi, - keydate1_dateformat = keydate1_dateformat_dd, - keydate2_dateformat = keydate2_dateformat_dd, - amended_dates, - delay_end_reason, - primary_delay_reason, - primary_delay_reason, - hbtreatcode, - location, - spec, - smrtype = smrtype_dd, - cij_marker, - cij_start_date, - cij_end_date, - postcode = postcode_dd, - dd_responsible_lca, - original_admission_date, - dd_type - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + recid = recid_dd, + chi, + keydate1_dateformat = keydate1_dateformat_dd, + keydate2_dateformat = keydate2_dateformat_dd, + amended_dates, + delay_end_reason, + primary_delay_reason, + primary_delay_reason, + hbtreatcode, + location, + spec, + smrtype = smrtype_dd, + cij_marker, + cij_start_date, + cij_end_date, + postcode = postcode_dd, + dd_responsible_lca, + original_admission_date, + dd_type + ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date data %>% From c8cb968afcaec8a52990b7820a7ec2e39543c985 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 3 May 2023 12:10:29 +0100 Subject: [PATCH 14/37] fix missing %>% --- R/add_dd.R | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 9c4630190..a3a3b2524 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -180,11 +180,13 @@ add_dd <- function(data, year) { recid == "04B" & keydate1_dateformat_dd >= cij_start_date & is_dummy_cij_end ~ "4", + # is_dummy_cij_end & is_dummy_keydate2 ~ "4", # "4P" "Matches unended MH record (allowing -1 day) - (4P)" recid == "04B" & keydate1_dateformat_dd >= cij_start_date_lower & is_dummy_cij_end ~ "4P", + # is_dummy_cij_end & is_dummy_keydate2 ~ "4P", # "-" "No Match (We don't keep these)" .default = "-" @@ -252,32 +254,32 @@ add_dd <- function(data, year) { keydate1_dateformat_dd, keydate2_dateformat_dd, .keep_all = TRUE - ) + ) %>% - # tidy up and rename columns to match the format of episode files - dplyr::select( - recid = recid_dd, - chi, - keydate1_dateformat = keydate1_dateformat_dd, - keydate2_dateformat = keydate2_dateformat_dd, - amended_dates, - delay_end_reason, - primary_delay_reason, - primary_delay_reason, - hbtreatcode, - location, - spec, - smrtype = smrtype_dd, - cij_marker, - cij_start_date, - cij_end_date, - postcode = postcode_dd, - dd_responsible_lca, - original_admission_date, - dd_type - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + recid = recid_dd, + chi, + keydate1_dateformat = keydate1_dateformat_dd, + keydate2_dateformat = keydate2_dateformat_dd, + amended_dates, + delay_end_reason, + primary_delay_reason, + primary_delay_reason, + hbtreatcode, + location, + spec, + smrtype = smrtype_dd, + cij_marker, + cij_start_date, + cij_end_date, + postcode = postcode_dd, + dd_responsible_lca, + original_admission_date, + dd_type + ) %>% # combine DD with episode data - dplyr::bind_rows( # restore cij_end_date + dplyr::bind_rows(# restore cij_end_date data %>% dplyr::select( -c( @@ -289,8 +291,7 @@ add_dd <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - ) - ) + )) return(data) } From 040f2e31bbbd1368aada0e4b5ad5107fd2941eaf Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 3 May 2023 11:13:43 +0000 Subject: [PATCH 15/37] Update documentation --- NAMESPACE | 1 + man/calculate_stay.Rd | 1 + man/check_quarter_format.Rd | 1 + man/compute_mid_year_age.Rd | 1 + man/convert_date_to_numeric.Rd | 1 + man/convert_numeric_to_date.Rd | 1 + man/end_fy.Rd | 1 + man/end_fy_quarter.Rd | 1 + man/end_next_fy_quarter.Rd | 1 + man/fy_interval.Rd | 1 + man/is_date_in_fyyear.Rd | 1 + man/last_date_month.Rd | 39 ++++++++++++++++++++++++++++++++++ man/midpoint_fy.Rd | 1 + man/start_fy.Rd | 1 + man/start_fy_quarter.Rd | 1 + man/start_next_fy_quarter.Rd | 1 + 16 files changed, 54 insertions(+) create mode 100644 man/last_date_month.Rd diff --git a/NAMESPACE b/NAMESPACE index 7eef077f3..8b87fca73 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ export(get_year_dir) export(is_date_in_fyyear) export(is_missing) export(la_code_lookup) +export(last_date_month) export(latest_cost_year) export(latest_update) export(match_on_ltcs) diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index bb48a2030..78148921c 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -40,6 +40,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/check_quarter_format.Rd b/man/check_quarter_format.Rd index c49841baf..f0aba7a62 100644 --- a/man/check_quarter_format.Rd +++ b/man/check_quarter_format.Rd @@ -27,6 +27,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 0e353b685..ab1fd3c75 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -36,6 +36,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 1a742e10a..4e0470b01 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -30,6 +30,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index 5173b07a5..0328df141 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -30,6 +30,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 34f579e47..75316104f 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -32,6 +32,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 05a36a761..ebebe2262 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -31,6 +31,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index dd6774af3..d5f0d6088 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -31,6 +31,7 @@ Other date functions: \code{\link{end_fy}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index ed971532c..b1ec14440 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -31,6 +31,7 @@ Other date functions: \code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 3faf503e9..702926e4d 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -46,6 +46,7 @@ Other date functions: \code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd new file mode 100644 index 000000000..441f04bbf --- /dev/null +++ b/man/last_date_month.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/last_date_month.R +\name{last_date_month} +\alias{last_date_month} +\title{Return the end date of the month of the given date} +\usage{ +last_date_month(x) +} +\arguments{ +\item{x}{a date with a date format} +} +\value{ +a vector of dates of the end date of the FY year +} +\description{ +Return the end date of the month of the given date +} +\examples{ +last_date_month(lubridate::as_date("2020-02-05")) + +} +\seealso{ +Other date functions: +\code{\link{calculate_stay}()}, +\code{\link{check_quarter_format}()}, +\code{\link{compute_mid_year_age}()}, +\code{\link{convert_date_to_numeric}()}, +\code{\link{convert_numeric_to_date}()}, +\code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, +\code{\link{end_next_fy_quarter}()}, +\code{\link{fy_interval}()}, +\code{\link{is_date_in_fyyear}()}, +\code{\link{midpoint_fy}()}, +\code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, +\code{\link{start_next_fy_quarter}()} +} +\concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index d351ae44b..20d83f9c4 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -33,6 +33,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index dd331a36b..02cdbacf3 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -33,6 +33,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index 130d974d0..a58c2d0cf 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -32,6 +32,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index a03c7054d..7382de86f 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -32,6 +32,7 @@ Other date functions: \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, +\code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{start_fy_quarter}()}, \code{\link{start_fy}()} From 2f0d2513b4332a6f331914bd05e6891bef17603c Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 3 May 2023 11:14:07 +0000 Subject: [PATCH 16/37] Style code --- R/add_dd.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index a3a3b2524..1374dc305 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -255,7 +255,6 @@ add_dd <- function(data, year) { keydate2_dateformat_dd, .keep_all = TRUE ) %>% - # tidy up and rename columns to match the format of episode files dplyr::select( recid = recid_dd, @@ -279,7 +278,7 @@ add_dd <- function(data, year) { dd_type ) %>% # combine DD with episode data - dplyr::bind_rows(# restore cij_end_date + dplyr::bind_rows( # restore cij_end_date data %>% dplyr::select( -c( @@ -291,7 +290,8 @@ add_dd <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - )) + ) + ) return(data) } From badcea76bc50cbaa9a1e3080dd1036f4e89380ac Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 3 May 2023 15:56:48 +0100 Subject: [PATCH 17/37] assign 1APE cij_end_date to keydate2_dd --- R/add_dd.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index a3a3b2524..1551b943a 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -174,19 +174,15 @@ add_dd <- function(data, year) { keydate2_dateformat_dd <= cij_end_month & amended_dates ~ "3ADPE", - - # "4" "Matches unended MH record - (4)" recid == "04B" & keydate1_dateformat_dd >= cij_start_date & is_dummy_cij_end ~ "4", - # is_dummy_cij_end & is_dummy_keydate2 ~ "4", # "4P" "Matches unended MH record (allowing -1 day) - (4P)" recid == "04B" & keydate1_dateformat_dd >= cij_start_date_lower & is_dummy_cij_end ~ "4P", - # is_dummy_cij_end & is_dummy_keydate2 ~ "4P", # "-" "No Match (We don't keep these)" .default = "-" @@ -214,9 +210,18 @@ add_dd <- function(data, year) { "-" ) ), + + # For "1APE", assign 1APE cij_end_date to keydate2_dateformat_dd + keydate2_dateformat_dd = dplyr::if_else( + dd_type == "1APE" | dd_type == "3ADPE", + cij_end_date, + keydate2_dateformat_dd, + ), + datediff_end = abs(cij_end_date - keydate2_dateformat_dd), datediff_start = cij_start_date - keydate1_dateformat_dd ) %>% + dplyr::filter(dd_type != "-") %>% dplyr::mutate(smrtype_dd = dplyr::case_when( dd_type %in% c( From bd0fab5f94a7ed021fb60c77c89fe75b529adbdb Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 3 May 2023 15:00:10 +0000 Subject: [PATCH 18/37] Style code --- R/add_dd.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 1c8d75110..bf83835ef 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -217,11 +217,9 @@ add_dd <- function(data, year) { cij_end_date, keydate2_dateformat_dd, ), - datediff_end = abs(cij_end_date - keydate2_dateformat_dd), datediff_start = cij_start_date - keydate1_dateformat_dd ) %>% - dplyr::filter(dd_type != "-") %>% dplyr::mutate(smrtype_dd = dplyr::case_when( dd_type %in% c( From 3787e97f6a2fe72de4b6fae73939f1ceba483b5a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 9 May 2023 16:39:31 +0100 Subject: [PATCH 19/37] corporate add_dd to run_episode_file --- R/add_dd.R | 126 ++++++++++++++++++++++--------------------- R/run_episode_file.R | 4 +- 2 files changed, 68 insertions(+), 62 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index bf83835ef..82f3fe24d 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -34,25 +34,29 @@ add_dd <- function(data, year) { # no flag for last reported dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% + dplyr::rename( + record_keydate1 = keydate1_dateformat, + record_keydate2 = keydate2_dateformat + ) %>% dplyr::mutate( # remember to revoke the keydate2 and amended_dates with dummy_keydate2 - is_dummy_keydate2 = is.na(keydate2_dateformat), + is_dummy_keydate2 = is.na(record_keydate2), dummy_keydate2 = dplyr::if_else(is_dummy_keydate2, - lubridate::today(), - keydate2_dateformat + lubridate::today(), + record_keydate2 ), dummy_id = dplyr::row_number() ) by_dd <- dplyr::join_by( chi, - x$keydate1_dateformat >= y$dummy_cij_start, + x$record_keydate1 >= y$dummy_cij_start, x$dummy_keydate2 <= y$dummy_cij_end ) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "") + by = by_dd, + suffix = c("_dd", "") ) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% # remove duplicate rows, but still got some duplicate mis-matches @@ -61,8 +65,8 @@ add_dd <- function(data, year) { cij_start_date, cij_end_date, cij_marker, - keydate1_dateformat_dd, - keydate2_dateformat_dd, + record_keydate1_dd, + record_keydate2_dd, .keep_all = TRUE ) %>% # determine DD quality @@ -87,7 +91,7 @@ add_dd <- function(data, year) { # "4P" "Matches unended MH record (allowing -1 day) - (4P)" # "-" "No Match (We don't keep these)". - # If we use keydate2_dateformat_dd, + # If we use record_keydate2_dd, # we implicitly mean is_dummy_keydate2 needs to be FALSE. # Given that in DD files, # we only keep the records with missing keydate2 for 04B, mental health, @@ -95,93 +99,93 @@ add_dd <- function(data, year) { # it should be ok to only use dummy_keydate2 for "4"(s). # "1" "Accurate Match - (1)" - keydate1_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & + record_keydate1_dd >= cij_start_date & + record_keydate2_dd <= cij_end_date & !amended_dates ~ "1", # "1P" "Accurate Match (allowing +-1 day) - (1P)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & + record_keydate1_dd >= cij_start_date_lower & + record_keydate2_dd <= cij_end_date_upper & !amended_dates ~ "1P", # "1A" "Accurate Match (has an assumed end date) - (1A)" - keydate1_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & + record_keydate1_dd >= cij_start_date & + record_keydate2_dd <= cij_end_date & amended_dates ~ "1A", # "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & + record_keydate1_dd >= cij_start_date_lower & + record_keydate2_dd <= cij_end_date_upper & amended_dates ~ "1AP", # "1APE" the CIJ ends during the month but the delay has an end date of the end of the month - keydate1_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd == cij_end_month & + record_keydate1_dd >= cij_start_date_lower & + record_keydate2_dd == cij_end_month & amended_dates ~ "1APE", # "2" "Starts in CIJ - (2)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date & + record_keydate1_dd >= cij_start_date & + record_keydate1_dd <= cij_end_date & + record_keydate2_dd > cij_end_date & !amended_dates ~ "2", # "2D" "Starts in CIJ (ends one day after) - (2D)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date_upper & + record_keydate1_dd >= cij_start_date & + record_keydate1_dd <= cij_end_date & + record_keydate2_dd > cij_end_date_upper & !amended_dates ~ "2D", # "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd > cij_end_date_upper & + record_keydate1_dd >= cij_start_date_lower & + record_keydate1_dd <= cij_end_date_upper & + record_keydate2_dd > cij_end_date_upper & !amended_dates ~ "2DP", # "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)" - keydate1_dateformat_dd >= cij_start_date & - keydate1_dateformat_dd <= cij_end_date & - keydate2_dateformat_dd > cij_end_date & + record_keydate1_dd >= cij_start_date & + record_keydate1_dd <= cij_end_date & + record_keydate2_dd > cij_end_date & amended_dates ~ "2A", # "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)" - keydate1_dateformat_dd >= cij_start_date_lower & - keydate1_dateformat_dd <= cij_end_date_upper & - keydate2_dateformat_dd > cij_end_date_upper & - # keydate2_dateformat_dd == cij_end_month & + record_keydate1_dd >= cij_start_date_lower & + record_keydate1_dd <= cij_end_date_upper & + record_keydate2_dd > cij_end_date_upper & + # record_keydate2_dd == cij_end_month & amended_dates ~ "2AP", # "3" "Ends in CIJ - (3)" - keydate1_dateformat_dd <= cij_start_date & - keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & + record_keydate1_dd <= cij_start_date & + record_keydate2_dd >= cij_start_date & + record_keydate2_dd <= cij_end_date & !amended_dates ~ "3", # "3D" "Ends in CIJ (starts one day before) - (3D)" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date & - keydate2_dateformat_dd <= cij_end_date & + record_keydate1_dd <= cij_start_date_lower & + record_keydate2_dd >= cij_start_date & + record_keydate2_dd <= cij_end_date & !amended_dates ~ "3D", # "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_date_upper & + record_keydate1_dd <= cij_start_date_lower & + record_keydate2_dd >= cij_start_date_lower & + record_keydate2_dd <= cij_end_date_upper & !amended_dates ~ "3DP", # "3ADPE" - keydate1_dateformat_dd <= cij_start_date_lower & - keydate2_dateformat_dd >= cij_start_date_lower & - keydate2_dateformat_dd <= cij_end_month & + record_keydate1_dd <= cij_start_date_lower & + record_keydate2_dd >= cij_start_date_lower & + record_keydate2_dd <= cij_end_month & amended_dates ~ "3ADPE", # "4" "Matches unended MH record - (4)" recid == "04B" & - keydate1_dateformat_dd >= cij_start_date & + record_keydate1_dd >= cij_start_date & is_dummy_cij_end ~ "4", # "4P" "Matches unended MH record (allowing -1 day) - (4P)" recid == "04B" & - keydate1_dateformat_dd >= cij_start_date_lower & + record_keydate1_dd >= cij_start_date_lower & is_dummy_cij_end ~ "4P", # "-" "No Match (We don't keep these)" @@ -211,14 +215,14 @@ add_dd <- function(data, year) { ) ), - # For "1APE", assign 1APE cij_end_date to keydate2_dateformat_dd - keydate2_dateformat_dd = dplyr::if_else( + # For "1APE", assign 1APE cij_end_date to record_keydate2_dd + record_keydate2_dd = dplyr::if_else( dd_type == "1APE" | dd_type == "3ADPE", cij_end_date, - keydate2_dateformat_dd, + record_keydate2_dd, ), - datediff_end = abs(cij_end_date - keydate2_dateformat_dd), - datediff_start = cij_start_date - keydate1_dateformat_dd + datediff_end = abs(cij_end_date - record_keydate2_dd), + datediff_start = cij_start_date - record_keydate1_dd ) %>% dplyr::filter(dd_type != "-") %>% dplyr::mutate(smrtype_dd = dplyr::case_when( @@ -247,23 +251,23 @@ add_dd <- function(data, year) { dplyr::arrange( chi, original_admission_date, - keydate1_dateformat_dd, - keydate2_dateformat_dd, + record_keydate1_dd, + record_keydate2_dd, dummy_id, dd_type, datediff_end, -datediff_start ) %>% dplyr::distinct(postcode, - keydate1_dateformat_dd, - keydate2_dateformat_dd, - .keep_all = TRUE + record_keydate1_dd, + record_keydate2_dd, + .keep_all = TRUE ) %>% # tidy up and rename columns to match the format of episode files dplyr::select( recid = recid_dd, chi, - keydate1_dateformat = keydate1_dateformat_dd, - keydate2_dateformat = keydate2_dateformat_dd, + record_keydate1 = record_keydate1_dd, + record_keydate2 = record_keydate2_dd, amended_dates, delay_end_reason, primary_delay_reason, diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 87d5f7be6..2fe2c44d9 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -37,6 +37,8 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { "op1a", "age", "cij_marker", + "cij_start_date", + "cij_end_date", "cij_pattype_code", "cij_ipdc", "cij_admtype", @@ -72,7 +74,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { fill_missing_cij_markers() %>% create_cost_inc_dna() %>% add_ppa_flag() %>% - # TODO add Link Delayed Discharge here (From C02) + add_dd(year) %>% add_nsu_cohort(year) %>% match_on_ltcs(year) %>% correct_demographics(year) %>% From 05fda77ac819f8e545ac3e4f6cf32b6333cec431 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 9 May 2023 15:44:42 +0000 Subject: [PATCH 20/37] Style code --- R/add_dd.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 82f3fe24d..8d1e8c253 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -42,8 +42,8 @@ add_dd <- function(data, year) { # remember to revoke the keydate2 and amended_dates with dummy_keydate2 is_dummy_keydate2 = is.na(record_keydate2), dummy_keydate2 = dplyr::if_else(is_dummy_keydate2, - lubridate::today(), - record_keydate2 + lubridate::today(), + record_keydate2 ), dummy_id = dplyr::row_number() ) @@ -55,8 +55,8 @@ add_dd <- function(data, year) { ) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "") + by = by_dd, + suffix = c("_dd", "") ) %>% dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% # remove duplicate rows, but still got some duplicate mis-matches @@ -258,9 +258,9 @@ add_dd <- function(data, year) { datediff_end, -datediff_start ) %>% dplyr::distinct(postcode, - record_keydate1_dd, - record_keydate2_dd, - .keep_all = TRUE + record_keydate1_dd, + record_keydate2_dd, + .keep_all = TRUE ) %>% # tidy up and rename columns to match the format of episode files dplyr::select( From 56a700a3791e480a04a0be838f597bd7cf357615 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 17 May 2023 08:55:04 +0000 Subject: [PATCH 21/37] [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/4989871850/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/639#issuecomment-1551010365 Signed-off-by: check-spelling-bot --- .github/actions/spelling/expect.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 4b36c7d8e..9254991ea 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -1,6 +1,7 @@ Accom admloc admtype +ADPE adtf arrivalmode arth @@ -30,6 +31,7 @@ cph createslf dataframe datazone +datediff dateformat dateop datetime @@ -74,6 +76,7 @@ hbtreatcode hbtreatname HCP HHG +hhg hjust hms homecare @@ -161,7 +164,9 @@ smr SMRA smrtype SPARRA +sparra spd +SPSS spss stadm stefanzweifel @@ -187,6 +192,7 @@ vline xintercept xlsx yearstay +YYYYQX zihao zsav zstd From acf960ee974a27ef5ba3cfa83feaaa4a21033848 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 May 2023 07:39:09 +0100 Subject: [PATCH 22/37] Update R/add_dd.R Co-authored-by: James McMahon --- R/add_dd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_dd.R b/R/add_dd.R index 8d1e8c253..9ac6c3409 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -13,7 +13,7 @@ add_dd <- function(data, year) { data <- data %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end - cij_start_date_lower = cij_start_date - lubridate::days(1), + cij_start_date_lower = .data$cij_start_date - lubridate::days(1), cij_end_date_upper = cij_end_date + lubridate::days(1), cij_end_month = last_date_month(cij_end_date), is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date), From 4b5f70f10cb8e506a0c90c6555670b346471259a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 May 2023 10:57:48 +0100 Subject: [PATCH 23/37] select the correct lines for delayed discharge --- R/add_dd.R | 59 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 8d1e8c253..b6b99fce7 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -261,31 +261,41 @@ add_dd <- function(data, year) { record_keydate1_dd, record_keydate2_dd, .keep_all = TRUE - ) %>% + ) #%>% # tidy up and rename columns to match the format of episode files - dplyr::select( - recid = recid_dd, - chi, - record_keydate1 = record_keydate1_dd, - record_keydate2 = record_keydate2_dd, - amended_dates, - delay_end_reason, - primary_delay_reason, - primary_delay_reason, - hbtreatcode, - location, - spec, - smrtype = smrtype_dd, - cij_marker, - cij_start_date, - cij_end_date, - postcode = postcode_dd, - dd_responsible_lca, - original_admission_date, - dd_type - ) %>% + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype" = "smrtype_dd", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca",# ??? + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "amended_dates", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "location", + "spec" = "spec_dd", + "dd_type" + ) %>% # combine DD with episode data - dplyr::bind_rows( # restore cij_end_date + dplyr::bind_rows(# restore cij_end_date data %>% dplyr::select( -c( @@ -297,8 +307,7 @@ add_dd <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - ) - ) + )) return(data) } From da8449b28241960a60aeb4602b25e55aebd256fd Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 23 May 2023 10:01:40 +0000 Subject: [PATCH 24/37] Style code --- R/add_dd.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index f0c07a97c..c4a02a66e 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -261,8 +261,8 @@ add_dd <- function(data, year) { record_keydate1_dd, record_keydate2_dd, .keep_all = TRUE - ) #%>% - # tidy up and rename columns to match the format of episode files + ) # %>% + # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", "recid" = "recid_dd", @@ -275,7 +275,7 @@ add_dd <- function(data, year) { "age", "gpprac", "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca",# ??? + "lca" = "dd_responsible_lca", # ??? "hbtreatcode" = "hbtreatcode_dd", "original_admission_date", "amended_dates", @@ -295,7 +295,7 @@ add_dd <- function(data, year) { "dd_type" ) %>% # combine DD with episode data - dplyr::bind_rows(# restore cij_end_date + dplyr::bind_rows( # restore cij_end_date data %>% dplyr::select( -c( @@ -307,7 +307,8 @@ add_dd <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - )) + ) + ) return(data) } From 42809faf3a285cebf9a01c5a2a09d9ea7e4b3e5a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 May 2023 11:04:13 +0100 Subject: [PATCH 25/37] add_dd lca --- R/add_dd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_dd.R b/R/add_dd.R index f0c07a97c..d7b1979a7 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -275,7 +275,7 @@ add_dd <- function(data, year) { "age", "gpprac", "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca",# ??? + "lca" = "dd_responsible_lca", "hbtreatcode" = "hbtreatcode_dd", "original_admission_date", "amended_dates", From 92bd0112702c71cb079f895613251459715a78b3 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 23 May 2023 10:11:02 +0000 Subject: [PATCH 26/37] Style code --- R/add_dd.R | 64 +++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 0f0e5c4fc..d9f141aef 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -262,38 +262,38 @@ add_dd <- function(data, year) { record_keydate2_dd, .keep_all = TRUE ) %>% - # tidy up and rename columns to match the format of episode files - dplyr::select( - "year" = "year_dd", - "recid" = "recid_dd", - "record_keydate1" = "record_keydate1_dd", - "record_keydate2" = "record_keydate2_dd", - "smrtype" = "smrtype_dd", - "chi", - "gender", - "dob", - "age", - "gpprac", - "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca", - "hbtreatcode" = "hbtreatcode_dd", - "original_admission_date", - "amended_dates", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "location", - "spec" = "spec_dd", - "dd_type" - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype" = "smrtype_dd", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca", + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "amended_dates", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "location", + "spec" = "spec_dd", + "dd_type" + ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date data %>% From 4e7c07cf50a4ca81f8b7fc3ed9dfdc1704d3053e Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 30 May 2023 10:06:02 +0100 Subject: [PATCH 27/37] Update R/add_dd.R Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/add_dd.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index d9f141aef..aa6fcbd39 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -14,15 +14,15 @@ add_dd <- function(data, year) { dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end cij_start_date_lower = .data$cij_start_date - lubridate::days(1), - cij_end_date_upper = cij_end_date + lubridate::days(1), - cij_end_month = last_date_month(cij_end_date), - is_dummy_cij_start = is.na(cij_start_date) & !is.na(cij_end_date), + cij_end_date_upper = .data$cij_end_date + lubridate::days(1), + cij_end_month = last_date_month(.data$cij_end_date), + is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date), dummy_cij_start = dplyr::if_else( is_dummy_cij_start, lubridate::as_date("1900-01-01"), - cij_start_date_lower + .data$cij_start_date_lower ), - is_dummy_cij_end = !is.na(cij_start_date) & is.na(cij_end_date), + is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date), dummy_cij_end = dplyr::if_else( is_dummy_cij_end, lubridate::today(), From d70b893edf23dcda1e415edda53f6e0bca43a0de Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 30 May 2023 11:19:26 +0100 Subject: [PATCH 28/37] remove unnecessary clarity x$ y$ --- R/add_dd.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index aa6fcbd39..72101c2db 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -50,8 +50,8 @@ add_dd <- function(data, year) { by_dd <- dplyr::join_by( chi, - x$record_keydate1 >= y$dummy_cij_start, - x$dummy_keydate2 <= y$dummy_cij_end + record_keydate1 >= dummy_cij_start, + dummy_keydate2 <= dummy_cij_end ) data <- dd_data %>% dplyr::inner_join(data, From c43243f3b49433050a9e9d6970e5b4e1017ea25c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 09:22:47 +0100 Subject: [PATCH 29/37] Add `.data$` where needed --- R/add_dd.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 72101c2db..2f82bcef2 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -18,15 +18,15 @@ add_dd <- function(data, year) { cij_end_month = last_date_month(.data$cij_end_date), is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date), dummy_cij_start = dplyr::if_else( - is_dummy_cij_start, + .data$is_dummy_cij_start, lubridate::as_date("1900-01-01"), .data$cij_start_date_lower ), is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date), dummy_cij_end = dplyr::if_else( - is_dummy_cij_end, + .data$is_dummy_cij_end, lubridate::today(), - cij_end_month + .data$cij_end_month ) ) @@ -40,8 +40,8 @@ add_dd <- function(data, year) { ) %>% dplyr::mutate( # remember to revoke the keydate2 and amended_dates with dummy_keydate2 - is_dummy_keydate2 = is.na(record_keydate2), - dummy_keydate2 = dplyr::if_else(is_dummy_keydate2, + is_dummy_keydate2 = is.na(.data$record_keydate2), + dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2, lubridate::today(), record_keydate2 ), From 5a97d23173ca8cc214496e7995c0c17fd06b481f Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 09:23:27 +0100 Subject: [PATCH 30/37] Add quotes in the rename Also add a TODO to make this change earlier --- R/add_dd.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 2f82bcef2..eade61e6f 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -35,8 +35,9 @@ add_dd <- function(data, year) { dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% dplyr::rename( - record_keydate1 = keydate1_dateformat, - record_keydate2 = keydate2_dateformat + # TODO Change the name of the variables in the DD extract rather than here. + record_keydate1 = "keydate1_dateformat", + record_keydate2 = "keydate2_dateformat" ) %>% dplyr::mutate( # remember to revoke the keydate2 and amended_dates with dummy_keydate2 From a23dd7a64901e661a10b2889d0b270f61b4ae5ef Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:27:58 +0100 Subject: [PATCH 31/37] Lint - Make integers explicit --- R/add_dd.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index eade61e6f..f65d876dc 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -13,8 +13,8 @@ add_dd <- function(data, year) { data <- data %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end - cij_start_date_lower = .data$cij_start_date - lubridate::days(1), - cij_end_date_upper = .data$cij_end_date + lubridate::days(1), + cij_start_date_lower = .data$cij_start_date - lubridate::days(1L), + cij_end_date_upper = .data$cij_end_date + lubridate::days(1L), cij_end_month = last_date_month(.data$cij_end_date), is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date), dummy_cij_start = dplyr::if_else( From c172f13efa57f22ac3a0e7bc2a389c970da1216d Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:29:45 +0100 Subject: [PATCH 32/37] Lint - add `.data$` where relevant --- R/add_dd.R | 67 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index f65d876dc..72be657b6 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -44,36 +44,41 @@ add_dd <- function(data, year) { is_dummy_keydate2 = is.na(.data$record_keydate2), dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2, lubridate::today(), - record_keydate2 + .data$record_keydate2 ), dummy_id = dplyr::row_number() ) by_dd <- dplyr::join_by( - chi, - record_keydate1 >= dummy_cij_start, - dummy_keydate2 <= dummy_cij_end + .data$chi, + .data$record_keydate1 >= .data$dummy_cij_start, + .data$dummy_keydate2 <= .data$dummy_cij_end ) data <- dd_data %>% dplyr::inner_join(data, by = by_dd, suffix = c("_dd", "") ) %>% - dplyr::arrange(cij_start_date, cij_end_date, cij_marker, postcode) %>% - # remove duplicate rows, but still got some duplicate mis-matches + dplyr::arrange( + .data$cij_start_date, + .data$cij_end_date, + .data$cij_marker, + .data$postcode + ) %>% + # remove duplicate rows, but still got some duplicate mismatches dplyr::distinct( - chi, - cij_start_date, - cij_end_date, - cij_marker, - record_keydate1_dd, - record_keydate2_dd, + .data$chi, + .data$cij_start_date, + .data$cij_end_date, + .data$cij_marker, + .data$record_keydate1_dd, + .data$record_keydate2_dd, .keep_all = TRUE ) %>% # determine DD quality dplyr::mutate( dd_type = dplyr::if_else( - is.na(cij_marker), + is.na(.data$cij_marker), "no-cij", dplyr::case_when( # "1" "Accurate Match - (1)" @@ -194,7 +199,7 @@ add_dd <- function(data, year) { ) ), dd_type = factor( - dd_type, + .data$dd_type, levels = c( "1", "1P", @@ -218,16 +223,16 @@ add_dd <- function(data, year) { # For "1APE", assign 1APE cij_end_date to record_keydate2_dd record_keydate2_dd = dplyr::if_else( - dd_type == "1APE" | dd_type == "3ADPE", - cij_end_date, - record_keydate2_dd, + .data$dd_type == "1APE" | .data$dd_type == "3ADPE", + .data$cij_end_date, + .data$record_keydate2_dd ), - datediff_end = abs(cij_end_date - record_keydate2_dd), - datediff_start = cij_start_date - record_keydate1_dd + datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd), + datediff_start = .data$cij_start_date - .data$record_keydate1_dd ) %>% - dplyr::filter(dd_type != "-") %>% dplyr::mutate(smrtype_dd = dplyr::case_when( dd_type %in% c( + dplyr::filter(.data$dd_type != "-") %>% "1", "1P", "1A", @@ -250,17 +255,19 @@ add_dd <- function(data, year) { # remove duplicated rows when many to many inner join # keep the records that closest to the cij record dplyr::arrange( - chi, - original_admission_date, - record_keydate1_dd, - record_keydate2_dd, - dummy_id, - dd_type, - datediff_end, -datediff_start + .data$chi, + .data$original_admission_date, + .data$record_keydate1_dd, + .data$record_keydate2_dd, + .data$dummy_id, + .data$dd_type, + .data$datediff_end, + dplyr::desc(.data$datediff_start) ) %>% - dplyr::distinct(postcode, - record_keydate1_dd, - record_keydate2_dd, + dplyr::distinct( + .data$postcode, + .data$record_keydate1_dd, + .data$record_keydate2_dd, .keep_all = TRUE ) %>% # tidy up and rename columns to match the format of episode files From 2d5f827ed7c27e1dd39cf42f4d816cff2f6a89ce Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:32:31 +0100 Subject: [PATCH 33/37] Use `case_match` instead of `case_when` --- R/add_dd.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/add_dd.R b/R/add_dd.R index 72be657b6..fc6b52c09 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -230,9 +230,10 @@ add_dd <- function(data, year) { datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd), datediff_start = .data$cij_start_date - .data$record_keydate1_dd ) %>% - dplyr::mutate(smrtype_dd = dplyr::case_when( - dd_type %in% c( dplyr::filter(.data$dd_type != "-") %>% + dplyr::mutate(smrtype_dd = dplyr::case_match( + .data$dd_type, + c( "1", "1P", "1A", @@ -250,7 +251,7 @@ add_dd <- function(data, year) { "4", "4P" ) ~ "DD-CIJ", - dd_type %in% c("no-cij") ~ "DD-No CIJ" + "no-cij" ~ "DD-No CIJ" )) %>% # remove duplicated rows when many to many inner join # keep the records that closest to the cij record From 0bf55da40729037261ac1e4bc140f192ad3292d0 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:46:20 +0100 Subject: [PATCH 34/37] Rename `add_dd()` to `link_delayed_discharge_eps()` --- NAMESPACE | 2 +- R/add_dd.R | 8 +++++--- R/run_episode_file.R | 2 +- man/add_nsu_cohort.Rd | 4 ++-- man/add_ppa_flag.Rd | 4 ++-- man/{add_dd.Rd => link_delayed_discharge_eps.Rd} | 13 +++++++------ 6 files changed, 18 insertions(+), 15 deletions(-) rename man/{add_dd.Rd => link_delayed_discharge_eps.Rd} (52%) diff --git a/NAMESPACE b/NAMESPACE index 14de9ca55..0343029a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(add_dd) export(add_nsu_cohort) export(add_ppa_flag) export(check_year_format) @@ -77,6 +76,7 @@ export(la_code_lookup) export(last_date_month) export(latest_cost_year) export(latest_update) +export(link_delayed_discharge_eps) export(midpoint_fy) export(phs_db_connection) export(previous_update) diff --git a/R/add_dd.R b/R/add_dd.R index fc6b52c09..c148ae236 100644 --- a/R/add_dd.R +++ b/R/add_dd.R @@ -1,13 +1,15 @@ -#' Add Delay Discharge to working file +#' Link Delayed Discharge to WIP episode file #' #' @param data The input data frame #' @param year The year being processed #' -#' @return A data frame linking delay discharge cohorts +#' @return A data frame with the delayed discharge cohort added and linked +#' using the `cij_marker` +#' #' @export #' #' @family episode file -add_dd <- function(data, year) { +link_delayed_discharge_eps <- function(data, year) { year_param <- year data <- data %>% diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 2fe2c44d9..b1437f2a4 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -74,7 +74,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { fill_missing_cij_markers() %>% create_cost_inc_dna() %>% add_ppa_flag() %>% - add_dd(year) %>% + link_delayed_discharge_eps(year) %>% add_nsu_cohort(year) %>% match_on_ltcs(year) %>% correct_demographics(year) %>% diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd index e80fe2ede..723c105e1 100644 --- a/man/add_nsu_cohort.Rd +++ b/man/add_nsu_cohort.Rd @@ -21,7 +21,7 @@ Add NSU cohort to working file \code{\link[=get_nsu_path]{get_nsu_path()}} Other episode file: -\code{\link{add_dd}()}, -\code{\link{add_ppa_flag}()} +\code{\link{add_ppa_flag}()}, +\code{\link{link_delayed_discharge_eps}()} } \concept{episode file} diff --git a/man/add_ppa_flag.Rd b/man/add_ppa_flag.Rd index 55660352d..8533a09f5 100644 --- a/man/add_ppa_flag.Rd +++ b/man/add_ppa_flag.Rd @@ -19,7 +19,7 @@ was preventable or not. } \seealso{ Other episode file: -\code{\link{add_dd}()}, -\code{\link{add_nsu_cohort}()} +\code{\link{add_nsu_cohort}()}, +\code{\link{link_delayed_discharge_eps}()} } \concept{episode file} diff --git a/man/add_dd.Rd b/man/link_delayed_discharge_eps.Rd similarity index 52% rename from man/add_dd.Rd rename to man/link_delayed_discharge_eps.Rd index 2dd6685c3..20b09f4bb 100644 --- a/man/add_dd.Rd +++ b/man/link_delayed_discharge_eps.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/add_dd.R -\name{add_dd} -\alias{add_dd} -\title{Add Delay Discharge to working file} +\name{link_delayed_discharge_eps} +\alias{link_delayed_discharge_eps} +\title{Link Delayed Discharge to WIP episode file} \usage{ -add_dd(data, year) +link_delayed_discharge_eps(data, year) } \arguments{ \item{data}{The input data frame} @@ -12,10 +12,11 @@ add_dd(data, year) \item{year}{The year being processed} } \value{ -A data frame linking delay discharge cohorts +A data frame with the delayed discharge cohort added and linked +using the \code{cij_marker} } \description{ -Add Delay Discharge to working file +Link Delayed Discharge to WIP episode file } \seealso{ Other episode file: From f189ea3f91a3961ae126bc700620c64bb2d793dc Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:48:43 +0100 Subject: [PATCH 35/37] Rename `add_dd.R` to `link_delayed_discharge_eps.R` --- R/{add_dd.R => link_delayed_discharge_eps.R} | 0 man/link_delayed_discharge_eps.Rd | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename R/{add_dd.R => link_delayed_discharge_eps.R} (100%) diff --git a/R/add_dd.R b/R/link_delayed_discharge_eps.R similarity index 100% rename from R/add_dd.R rename to R/link_delayed_discharge_eps.R diff --git a/man/link_delayed_discharge_eps.Rd b/man/link_delayed_discharge_eps.Rd index 20b09f4bb..b09d70ad0 100644 --- a/man/link_delayed_discharge_eps.Rd +++ b/man/link_delayed_discharge_eps.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_dd.R +% Please edit documentation in R/link_delayed_discharge_eps.R \name{link_delayed_discharge_eps} \alias{link_delayed_discharge_eps} \title{Link Delayed Discharge to WIP episode file} From 0e9f0acf2a87123b5dd797aa044795800b1af443 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:49:11 +0100 Subject: [PATCH 36/37] Update the documentation for `last_date_month` --- R/last_date_month.R | 11 +++++------ man/last_date_month.Rd | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/last_date_month.R b/R/last_date_month.R index 471fda031..979970f87 100644 --- a/R/last_date_month.R +++ b/R/last_date_month.R @@ -1,16 +1,15 @@ #' Return the end date of the month of the given date #' -#' @description Return the end date of the month of the given date +#' @param date a date with a date format. #' -#' @param x a date with a date format +#' @return a vector of dates, giving the last day of the month. #' -#' @return a vector of dates of the end date of the FY year #' @export #' #' @examples -#' last_date_month(lubridate::as_date("2020-02-05")) +#' last_date_month(Sys.Date()) #' #' @family date functions -last_date_month <- function(x) { - return(lubridate::ceiling_date(x, "month") - lubridate::days(1)) +last_date_month <- function(date) { + return(lubridate::ceiling_date(date, "month") - lubridate::days(1)) } diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index 441f04bbf..7a0eae26e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -4,19 +4,19 @@ \alias{last_date_month} \title{Return the end date of the month of the given date} \usage{ -last_date_month(x) +last_date_month(date) } \arguments{ -\item{x}{a date with a date format} +\item{date}{a date with a date format.} } \value{ -a vector of dates of the end date of the FY year +a vector of dates, giving the last day of the month. } \description{ Return the end date of the month of the given date } \examples{ -last_date_month(lubridate::as_date("2020-02-05")) +last_date_month(Sys.Date()) } \seealso{ From 0ec7ec147c82a8ec5ea303bb4141a9bb617a91a7 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 2 Jun 2023 12:57:03 +0100 Subject: [PATCH 37/37] Add tests for `last_date_month` --- tests/testthat/test-last_date_month.R | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/testthat/test-last_date_month.R diff --git a/tests/testthat/test-last_date_month.R b/tests/testthat/test-last_date_month.R new file mode 100644 index 000000000..bd2dacf27 --- /dev/null +++ b/tests/testthat/test-last_date_month.R @@ -0,0 +1,37 @@ +test_that("last_date_month handles types correctly", { + expect_s3_class(last_date_month(Sys.Date()), "Date") + expect_s3_class(last_date_month(lubridate::today()), "Date") + + expect_error(last_date_month("2000-01-01")) +}) + +test_that("last_date_month is correct", { + dates <- as.Date( + c( + "2020-01-01", + "2020-01-30", + "2020-02-01", + "2020-02-28", + "2020-02-29", + "2022-02-01", + "2022-02-28", + "2022-02-29" + ) + ) + + expect_equal( + last_date_month(dates), + as.Date( + c( + "2020-01-31", + "2020-01-31", + "2020-02-29", + "2020-02-29", + "2020-02-29", + "2022-02-28", + "2022-02-28", + NA + ) + ) + ) +})