Skip to content

Commit

Permalink
333 c02 link delayed discharges episodes (#639)
Browse files Browse the repository at this point in the history
* initial rough work on delay discharge

* Update documentation

* some conversion from SPSS

* Style code

* a function of adding delay discharge to episode data

* Style code

* Update R/add_dd.R

Co-authored-by: James McMahon <[email protected]>

* Update R/add_dd.R

Co-authored-by: James McMahon <[email protected]>

* Update R/add_dd.R

Co-authored-by: James McMahon <[email protected]>

* add_dd functions

* Style code

* remove duplicated rows when many to many inner join
by keeping the records that are closest to the cij record

* Style code

* fix missing %>%

* Update documentation

* Style code

* assign 1APE cij_end_date to keydate2_dd

* Style code

* corporate add_dd to run_episode_file

* Style code

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/4989871850/attempts/1
Accepted in #639 (comment)

Signed-off-by: check-spelling-bot <[email protected]>

* Update R/add_dd.R

Co-authored-by: James McMahon <[email protected]>

* select the correct lines for delayed discharge

* Style code

* add_dd lca

* Style code

* Update R/add_dd.R

Co-authored-by: Jennit07 <[email protected]>

* remove unnecessary clarity x$ y$

* Add `.data$` where needed

* Add quotes in the rename

Also add a TODO to make this change earlier

* Lint - Make integers explicit

* Lint - add `.data$` where relevant

* Use `case_match` instead of `case_when`

* Rename `add_dd()` to `link_delayed_discharge_eps()`

* Rename `add_dd.R` to `link_delayed_discharge_eps.R`

* Update the documentation for `last_date_month`

* Add tests for `last_date_month`

---------

Signed-off-by: check-spelling-bot <[email protected]>
Co-authored-by: lizihao-anu <[email protected]>
Co-authored-by: James McMahon <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
  • Loading branch information
4 people authored Jun 2, 2023
1 parent 7e3d547 commit c47672c
Show file tree
Hide file tree
Showing 24 changed files with 470 additions and 3 deletions.
5 changes: 5 additions & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Accom
admloc
admtype
ADPE
adtf
arrivalmode
arth
Expand Down Expand Up @@ -35,6 +36,7 @@ createslf
dataframe
datamart
datazone
datediff
dateformat
dateop
datetime
Expand Down Expand Up @@ -83,6 +85,7 @@ hbtreatname
hci
HCP
HHG
hhg
hjust
hms
homecare
Expand Down Expand Up @@ -174,6 +177,7 @@ smr
SMRA
smrtype
SPARRA
sparra
spd
SPSS
spss
Expand Down Expand Up @@ -202,6 +206,7 @@ vline
xintercept
xlsx
yearstay
YYYYQX
zihao
zsav
zstd
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ export(get_year_dir)
export(is_date_in_fyyear)
export(is_missing)
export(la_code_lookup)
export(last_date_month)
export(latest_cost_year)
export(latest_update)
export(link_delayed_discharge_eps)
export(midpoint_fy)
export(phs_db_connection)
export(previous_update)
Expand Down
15 changes: 15 additions & 0 deletions R/last_date_month.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#' Return the end date of the month of the given date
#'
#' @param date a date with a date format.
#'
#' @return a vector of dates, giving the last day of the month.
#'
#' @export
#'
#' @examples
#' last_date_month(Sys.Date())
#'
#' @family date functions
last_date_month <- function(date) {
return(lubridate::ceiling_date(date, "month") - lubridate::days(1))
}
325 changes: 325 additions & 0 deletions R/link_delayed_discharge_eps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
#' Link Delayed Discharge to WIP episode file
#'
#' @param data The input data frame
#' @param year The year being processed
#'
#' @return A data frame with the delayed discharge cohort added and linked
#' using the `cij_marker`
#'
#' @export
#'
#' @family episode file
link_delayed_discharge_eps <- function(data, year) {
year_param <- year

data <- data %>%
dplyr::mutate(
# remember to revoke the cij_end_date with dummy_cij_end
cij_start_date_lower = .data$cij_start_date - lubridate::days(1L),
cij_end_date_upper = .data$cij_end_date + lubridate::days(1L),
cij_end_month = last_date_month(.data$cij_end_date),
is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date),
dummy_cij_start = dplyr::if_else(
.data$is_dummy_cij_start,
lubridate::as_date("1900-01-01"),
.data$cij_start_date_lower
),
is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date),
dummy_cij_end = dplyr::if_else(
.data$is_dummy_cij_end,
lubridate::today(),
.data$cij_end_month
)
)

## handling DD ----
# no flag for last reported
dd_data <-
read_file(get_source_extract_path(year_param, "DD")) %>%
dplyr::rename(
# TODO Change the name of the variables in the DD extract rather than here.
record_keydate1 = "keydate1_dateformat",
record_keydate2 = "keydate2_dateformat"
) %>%
dplyr::mutate(
# remember to revoke the keydate2 and amended_dates with dummy_keydate2
is_dummy_keydate2 = is.na(.data$record_keydate2),
dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2,
lubridate::today(),
.data$record_keydate2
),
dummy_id = dplyr::row_number()
)

by_dd <- dplyr::join_by(
.data$chi,
.data$record_keydate1 >= .data$dummy_cij_start,
.data$dummy_keydate2 <= .data$dummy_cij_end
)
data <- dd_data %>%
dplyr::inner_join(data,
by = by_dd,
suffix = c("_dd", "")
) %>%
dplyr::arrange(
.data$cij_start_date,
.data$cij_end_date,
.data$cij_marker,
.data$postcode
) %>%
# remove duplicate rows, but still got some duplicate mismatches
dplyr::distinct(
.data$chi,
.data$cij_start_date,
.data$cij_end_date,
.data$cij_marker,
.data$record_keydate1_dd,
.data$record_keydate2_dd,
.keep_all = TRUE
) %>%
# determine DD quality
dplyr::mutate(
dd_type = dplyr::if_else(
is.na(.data$cij_marker),
"no-cij",
dplyr::case_when(
# "1" "Accurate Match - (1)"
# "1P" "Accurate Match (allowing +-1 day) - (1P)"
# "1A" "Accurate Match (has an assumed end date) - (1A)"
# "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
# "2" "Starts in CIJ - (2)"
# "2D" "Starts in CIJ (ends one day after) - (2D)"
# "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
# "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
# "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
# "3" "Ends in CIJ - (3)"
# "3D" "Ends in CIJ (starts one day before) - (3D)"
# "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
# "4" "Matches unended MH record - (4)"
# "4P" "Matches unended MH record (allowing -1 day) - (4P)"
# "-" "No Match (We don't keep these)".

# If we use record_keydate2_dd,
# we implicitly mean is_dummy_keydate2 needs to be FALSE.
# Given that in DD files,
# we only keep the records with missing keydate2 for 04B, mental health,
# and drop the records with missing keydate2 for other recid,
# it should be ok to only use dummy_keydate2 for "4"(s).

# "1" "Accurate Match - (1)"
record_keydate1_dd >= cij_start_date &
record_keydate2_dd <= cij_end_date &
!amended_dates ~ "1",

# "1P" "Accurate Match (allowing +-1 day) - (1P)"
record_keydate1_dd >= cij_start_date_lower &
record_keydate2_dd <= cij_end_date_upper &
!amended_dates ~ "1P",

# "1A" "Accurate Match (has an assumed end date) - (1A)"
record_keydate1_dd >= cij_start_date &
record_keydate2_dd <= cij_end_date &
amended_dates ~ "1A",

# "1AP" "Accurate Match (allowing +-1 day and has an assumed end date) - (1AP)"
record_keydate1_dd >= cij_start_date_lower &
record_keydate2_dd <= cij_end_date_upper &
amended_dates ~ "1AP",

# "1APE" the CIJ ends during the month but the delay has an end date of the end of the month
record_keydate1_dd >= cij_start_date_lower &
record_keydate2_dd == cij_end_month &
amended_dates ~ "1APE",

# "2" "Starts in CIJ - (2)"
record_keydate1_dd >= cij_start_date &
record_keydate1_dd <= cij_end_date &
record_keydate2_dd > cij_end_date &
!amended_dates ~ "2",

# "2D" "Starts in CIJ (ends one day after) - (2D)"
record_keydate1_dd >= cij_start_date &
record_keydate1_dd <= cij_end_date &
record_keydate2_dd > cij_end_date_upper &
!amended_dates ~ "2D",

# "2DP" "Starts in CIJ (allowing +-1 day and ends one day after) - (2DP)"
record_keydate1_dd >= cij_start_date_lower &
record_keydate1_dd <= cij_end_date_upper &
record_keydate2_dd > cij_end_date_upper &
!amended_dates ~ "2DP",

# "2A" "Starts in CIJ (Accurate Match after correcting assumed end date) - (2A)"
record_keydate1_dd >= cij_start_date &
record_keydate1_dd <= cij_end_date &
record_keydate2_dd > cij_end_date &
amended_dates ~ "2A",

# "2AP" "Starts in CIJ (Accurate Match (allowing +-1 day) after correcting assumed end date) - (2AP)"
record_keydate1_dd >= cij_start_date_lower &
record_keydate1_dd <= cij_end_date_upper &
record_keydate2_dd > cij_end_date_upper &
# record_keydate2_dd == cij_end_month &
amended_dates ~ "2AP",

# "3" "Ends in CIJ - (3)"
record_keydate1_dd <= cij_start_date &
record_keydate2_dd >= cij_start_date &
record_keydate2_dd <= cij_end_date &
!amended_dates ~ "3",

# "3D" "Ends in CIJ (starts one day before) - (3D)"
record_keydate1_dd <= cij_start_date_lower &
record_keydate2_dd >= cij_start_date &
record_keydate2_dd <= cij_end_date &
!amended_dates ~ "3D",

# "3DP" "Ends in CIJ (allowing +-1 day and starts one day before) - (3DP)"
record_keydate1_dd <= cij_start_date_lower &
record_keydate2_dd >= cij_start_date_lower &
record_keydate2_dd <= cij_end_date_upper &
!amended_dates ~ "3DP",

# "3ADPE"
record_keydate1_dd <= cij_start_date_lower &
record_keydate2_dd >= cij_start_date_lower &
record_keydate2_dd <= cij_end_month &
amended_dates ~ "3ADPE",

# "4" "Matches unended MH record - (4)"
recid == "04B" &
record_keydate1_dd >= cij_start_date &
is_dummy_cij_end ~ "4",

# "4P" "Matches unended MH record (allowing -1 day) - (4P)"
recid == "04B" &
record_keydate1_dd >= cij_start_date_lower &
is_dummy_cij_end ~ "4P",

# "-" "No Match (We don't keep these)"
.default = "-"
)
),
dd_type = factor(
.data$dd_type,
levels = c(
"1",
"1P",
"1A",
"1AP",
"2",
"2D",
"2DP",
"2A",
"2AP",
"3",
"3D",
"3DP",
"1APE",
"3ADPE",
"4",
"4P",
"-"
)
),

# For "1APE", assign 1APE cij_end_date to record_keydate2_dd
record_keydate2_dd = dplyr::if_else(
.data$dd_type == "1APE" | .data$dd_type == "3ADPE",
.data$cij_end_date,
.data$record_keydate2_dd
),
datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd),
datediff_start = .data$cij_start_date - .data$record_keydate1_dd
) %>%
dplyr::filter(.data$dd_type != "-") %>%
dplyr::mutate(smrtype_dd = dplyr::case_match(
.data$dd_type,
c(
"1",
"1P",
"1A",
"1AP",
"1APE",
"2",
"2D",
"2DP",
"2A",
"2AP",
"3",
"3D",
"3DP",
"3ADPE",
"4",
"4P"
) ~ "DD-CIJ",
"no-cij" ~ "DD-No CIJ"
)) %>%
# remove duplicated rows when many to many inner join
# keep the records that closest to the cij record
dplyr::arrange(
.data$chi,
.data$original_admission_date,
.data$record_keydate1_dd,
.data$record_keydate2_dd,
.data$dummy_id,
.data$dd_type,
.data$datediff_end,
dplyr::desc(.data$datediff_start)
) %>%
dplyr::distinct(
.data$postcode,
.data$record_keydate1_dd,
.data$record_keydate2_dd,
.keep_all = TRUE
) %>%
# tidy up and rename columns to match the format of episode files
dplyr::select(
"year" = "year_dd",
"recid" = "recid_dd",
"record_keydate1" = "record_keydate1_dd",
"record_keydate2" = "record_keydate2_dd",
"smrtype" = "smrtype_dd",
"chi",
"gender",
"dob",
"age",
"gpprac",
"postcode" = "postcode_dd",
"lca" = "dd_responsible_lca",
"hbtreatcode" = "hbtreatcode_dd",
"original_admission_date",
"amended_dates",
"delay_end_reason",
"primary_delay_reason",
"secondary_delay_reason",
"cij_marker",
"cij_start_date",
"cij_end_date",
"cij_pattype_code",
"cij_ipdc",
"cij_admtype",
"cij_adm_spec",
"cij_dis_spec",
"location",
"spec" = "spec_dd",
"dd_type"
) %>%
# combine DD with episode data
dplyr::bind_rows( # restore cij_end_date
data %>%
dplyr::select(
-c(
"cij_start_date_lower",
"cij_end_date_upper",
"cij_end_month",
"is_dummy_cij_start",
"dummy_cij_start",
"is_dummy_cij_end",
"dummy_cij_end"
)
)
)

return(data)
}
Loading

0 comments on commit c47672c

Please sign in to comment.