Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

review deaths dates processing #987

Merged
merged 17 commits into from
Aug 6, 2024
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(add_deceased_flag)
export(add_homelessness_date_flags)
export(add_homelessness_flag)
export(add_hri_variables)
Expand Down Expand Up @@ -115,6 +116,7 @@ export(process_lookup_ltc)
export(process_lookup_postcode)
export(process_lookup_sc_client)
export(process_lookup_sc_demographics)
export(process_refined_death)
export(process_sc_all_alarms_telecare)
export(process_sc_all_care_home)
export(process_sc_all_home_care)
Expand Down Expand Up @@ -182,6 +184,7 @@ export(start_fy)
export(start_fy_quarter)
export(start_next_fy_quarter)
export(write_file)
export(years_to_run)
importFrom(data.table,.N)
importFrom(data.table,.SD)
importFrom(magrittr,"%>%")
Expand Down
19 changes: 19 additions & 0 deletions R/00-update_refs.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,22 @@ get_dd_period <- function() {
latest_cost_year <- function() {
"2223"
}

#' The year list for slf to update
#'
#' @description Get the vector of years to update slf
#'
#' @return The vector of financial years
#'
#' @export
#'
#' @family initialisation
years_to_run <- function() {
fy_start_2digit <- 17
fy_end_2digit <- 23
years_to_run <- paste0(
fy_start_2digit:fy_end_2digit,
(fy_start_2digit + 1):(fy_end_2digit + 1)
)
return(years_to_run)
}
4 changes: 1 addition & 3 deletions R/add_activity_after_death_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,7 @@ process_combined_deaths_lookup <- function(update = latest_update(),
if (write_to_disk) {
write_file(
all_boxi_deaths,
fs::path(get_slf_dir(), "Deaths",
file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet")
)
get_combined_slf_deaths_lookup_path()
)
}

Expand Down
40 changes: 40 additions & 0 deletions R/add_deceased_flag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Create the SLF Deaths lookup
#'
#' @description Currently this just uses the NRS death dates 'as is', with no
#' corrections or modifications, it is expected that this will be expanded to
#' use the CHI deaths extract from IT as well as taking into account data in
#' the episode file to assess the validity of a death date.
#'
#' @param year The year to process, in FY format.
#' @param nrs_deaths_data NRS deaths data.
#' @param chi_deaths_data IT CHI deaths data.
#' @param write_to_disk (optional) Should the data be written to disk default is
#' `TRUE` i.e. write the data to disk.
#'
#' @return a [tibble][tibble::tibble-package] containing the episode file
#' @export
add_deceased_flag <- function(
year,
refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
write_to_disk = TRUE) {
# create slf deaths lookup

dplyr::mutate(
death_date = dplyr::if_else(is.na(.data$record_keydate1),
.data$death_date_chi, .data$record_keydate1
),
deceased = TRUE,
.keep = "unused"
) %>%
# save anon chi on disk
slfhelper::get_anon_chi()

if (write_to_disk) {
write_file(
slf_deaths_lookup,
get_slf_deaths_lookup_path(year, check_mode = "write")
)
}

return(slf_deaths_lookup)
}
8 changes: 6 additions & 2 deletions R/get_slf_lookup_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,21 @@ get_slf_deaths_lookup_path <- function(year, ...) {
#' SLF death dates File Path
#'
#' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years
#' Note this name is very similar to the existing slf_deaths_lookup_path
#' which returns the path for the refined_death with deceased flag for each financial year.
#' This function will return the combined financial years lookup
#' i.e. all years put together.
#'
#' @param ... additional arguments passed to [get_file_path()]
#' @param update the update month (defaults to use [latest_update()])
#'
#' @export
#' @family slf lookup file path
#' @seealso [get_file_path()] for the generic function.

get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
# Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for
# the processed BOXI extract for each financial year. This function will return the combined financial
# the refined_death with deceased flag for each financial year.
# This function will return the combined financial
# years lookup i.e. all years put together.
combined_slf_deaths_lookup_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Deaths"),
Expand Down
7 changes: 3 additions & 4 deletions R/process_it_chi_deaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) {
dplyr::desc(.data$death_date_chi)
) %>%
dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
# Use the NRS death_date unless it isn't there
dplyr::mutate(
death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi)
) %>%
# remove death_date_nrs as this is the nrs weekly unvalidated data and we should not use this.
# the boxi nrs death date is more reliable as this is provided monthly and is validated.
dplyr::select(.data$chi, .data$death_date_chi) %>%
slfhelper::get_anon_chi()

if (write_to_disk) {
Expand Down
33 changes: 13 additions & 20 deletions R/process_lookup_deaths.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,29 @@
#' Create the SLF Deaths lookup
#'
#' @description Currently this just uses the NRS death dates 'as is', with no
#' corrections or modifications, it is expected that this will be expanded to
#' use the CHI deaths extract from IT as well as taking into account data in
#' the episode file to assess the validity of a death date.
#' @description Use all-year refined death data to produce year-specific
#' slf_deaths_lookup with deceased flag added.
#'
#' @param year The year to process, in FY format.
#' @param nrs_deaths_data NRS deaths data.
#' @param chi_deaths_data IT CHI deaths data.
#' @param refined_death refined death date combining nrs and it_chi.
#' @param write_to_disk (optional) Should the data be written to disk default is
#' `TRUE` i.e. write the data to disk.
#'
#' @return a [tibble][tibble::tibble-package] containing the episode file
#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths
#' @export
process_slf_deaths_lookup <- function(
year,
nrs_deaths_data = read_file(
get_source_extract_path(year, "deaths"),
col_select = c("chi", "record_keydate1")
),
chi_deaths_data = read_file(get_slf_chi_deaths_path()),
refined_death = read_file(get_combined_slf_deaths_lookup_path()),
write_to_disk = TRUE) {
slf_deaths_lookup <- nrs_deaths_data %>%
# Only modification over 'raw' NRS is to keep the earliest death date
dplyr::select("chi", "record_keydate1") %>%
dplyr::arrange(.data$record_keydate1) %>%
dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
# create slf deaths lookup
slf_deaths_lookup <- refined_death %>%
slfhelper::get_chi() %>%
# Filter the chi death dates to the FY as the lookup is by FY
dplyr::filter(fy == year) %>%
# use the BOXI NRS death date by default, but if it's missing, use the chi death date.
dplyr::mutate(
death_date = .data$record_keydate1,
deceased = TRUE,
.keep = "unused"
deceased = TRUE
) %>%
# save anon chi on disk
slfhelper::get_anon_chi()

if (write_to_disk) {
Expand Down
62 changes: 62 additions & 0 deletions R/process_refined_death.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Process the refined death data
#'
#' @description This will process
#' year-specific BOXI NRS death file (written to disk), and
#' combine them together to get all years NRS file (Not written to disk).
#' Then join all NRS deaths with IT CHI death data
#' to get an all-year refined death file (written to disk).
#'
#' @param it_chi_deaths it chi death data
#' @param write_to_disk write the result to disk or not.
#'
#' @return refined_death The processed lookup of deaths combining NRS and IT_CHI.
#' @export
#' @family process extracts
process_refined_death <- function(
it_chi_deaths = read_file(get_slf_chi_deaths_path()),
write_to_disk = TRUE) {
years_list <- years_to_run()

nrs_all_years <- lapply(years_list, (\(year) {
read_extract_nrs_deaths(
year,
get_boxi_extract_path(year, type = "deaths")
) %>%
process_extract_nrs_deaths(year,
write_to_disk = write_to_disk
)
})) %>%
data.table::rbindlist()

Check failure on line 29 in R/process_refined_death.R

View workflow job for this annotation

GitHub Actions / Check Spelling

`rbindlist` is not a recognized word. (unrecognized-spelling)

it_chi_deaths <- it_chi_deaths %>%
dplyr::select(c(
"anon_chi",
"death_date_chi"
)) %>%
dplyr::arrange(.data$anon_chi, .keep_all = TRUE)

refined_death <- nrs_all_years %>%
dplyr::arrange(.data$anon_chi, .keep_all = TRUE) %>%
dplyr::full_join(it_chi_deaths, by = "anon_chi") %>%
# use the BOXI NRS death date by default, but if it's missing, use the chi death date.
dplyr::mutate(death_date = dplyr::if_else(
is.na(.data$record_keydate1),
.data$death_date_chi,
.data$record_keydate1
)) %>%
dplyr::select(anon_chi, death_date) %>%
# add fy when death happened
dplyr::mutate(
fy = phsmethods::extract_fin_year(death_date),
fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7)))
)

if (write_to_disk) {
write_file(
refined_death,
get_combined_slf_deaths_lookup_path()
)
}

return(refined_death)
}
8 changes: 4 additions & 4 deletions R/process_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#' @param data The extract to process
#' @param sc_demog_lookup The Social Care Demographics lookup produced by
#' [process_lookup_sc_demographics()].
#' @param it_chi_deaths_data The processed lookup of deaths from IT produced
#' with [process_it_chi_deaths()].
#' @param refined_death The processed lookup of deaths from IT produced
#' with [process_refined_death()].
#' @param ch_name_lookup_path Path to the Care Home name Lookup Excel workbook.
#' @param spd_path (Optional) Path the Scottish Postcode Directory, default is
#' to use [get_spd_path()].
Expand All @@ -23,7 +23,7 @@
process_sc_all_care_home <- function(
data,
sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
spd_path = read_file(get_spd_path()),
write_to_disk = TRUE) {
Expand Down Expand Up @@ -207,7 +207,7 @@ process_sc_all_care_home <- function(
# Compare to Deaths Data
# match ch_episode data with deaths data
matched_deaths_data <- ch_episode %>%
dplyr::left_join(it_chi_deaths_data,
dplyr::left_join(refined_death,
by = "chi"
) %>%
# compare discharge date with NRS and CHI death date
Expand Down
4 changes: 0 additions & 4 deletions Run_SLF_Files_targets/run_all_targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@ library(targets)
# use tar_make_future() to run targets for all years
# This will run everything needed for creating the episode file.
tar_make_future()

# Combine deaths lookup here rather than in targets to make sure that
# it is run after the death file for each year is produced.
createslf::process_combined_deaths_lookup()
56 changes: 38 additions & 18 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,19 @@ list(
age = as.difftime(28.0, units = "days")
)
),
tar_target(
refined_death_data,
process_refined_death(
it_chi_deaths = it_chi_deaths_data,
write_to_disk = write_to_disk
)
),
tar_target(
all_care_home,
process_sc_all_care_home(
all_care_home_extract,
sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
it_chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
refined_death = refined_death_data %>% slfhelper::get_chi(),
ch_name_lookup_path = slf_ch_name_lookup_path,
spd_path = spd_path,
write_to_disk = write_to_disk
Expand Down Expand Up @@ -204,6 +211,8 @@ list(
tests_sc_all_sds,
process_tests_sc_all_sds_episodes(all_sds)
),

# Phase II
tar_map(
list(year = years_to_run),
tar_rds(
Expand Down Expand Up @@ -251,11 +260,11 @@ list(
get_boxi_extract_path(year, type = "mh"),
read_extract_mental_health(year, !!.x)
),
tar_file_read(
nrs_deaths_data,
get_boxi_extract_path(year, type = "deaths"),
read_extract_nrs_deaths(year, !!.x)
),
# tar_file_read(
# nrs_deaths_data,
# get_boxi_extract_path(year, type = "deaths"),
# read_extract_nrs_deaths(year, !!.x)
# ),
tar_file_read(
outpatients_data,
get_boxi_extract_path(year, type = "outpatient"),
Expand Down Expand Up @@ -403,23 +412,35 @@ list(
year
)
),
tar_target(source_mental_health_extract, process_extract_mental_health(
mental_health_data,
year,
write_to_disk = write_to_disk
)),
tar_target(
source_mental_health_extract,
process_extract_mental_health(
mental_health_data,
year,
write_to_disk = write_to_disk
)
),
tar_target(
tests_source_mental_health_extract,
process_tests_mental_health(
source_mental_health_extract,
year
)
),
tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths(
nrs_deaths_data,
year,
write_to_disk = write_to_disk
)),
# tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths(
# nrs_deaths_data,
# year,
# write_to_disk = write_to_disk
# )),
tar_target(
source_nrs_deaths_extract,
# use this anomymous function with redundant but necessary refined_death
# to make sure reading year-specific nrs deaths extracts after it is produced
(\(year, refined_death_datas) {
read_file(get_source_extract_path(year, "deaths")) %>%
as.data.frame()
})(year, refined_death_data)
),
tar_target(
tests_source_nrs_deaths_extract,
process_tests_nrs_deaths(
Expand Down Expand Up @@ -549,8 +570,7 @@ list(
slf_deaths_lookup,
process_slf_deaths_lookup(
year = year,
nrs_deaths_data = source_nrs_deaths_extract %>% slfhelper::get_chi(),
chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
refined_death = refined_death_data,
write_to_disk = write_to_disk
)
),
Expand Down
Loading
Loading