diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index c9de1f879..338588a68 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -108,87 +108,3 @@ add_activity_after_death_flag <- function( return(final_data) } - - -#' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts -#' -#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed, -# but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files. -#' -#' @param ... additional arguments passed to [get_slf_deaths_lookup_path()] -#' @param update the update month (defaults to use [latest_update()]) -#' -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. -#' -#' @return the final data as a [tibble][tibble::tibble-package]. -#' @export -#' -#' -#' -# Read data------------------------------------------------ - -process_combined_deaths_lookup <- function(update = latest_update(), - write_to_disk = TRUE, ...) { - dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" - file_names <- list.files(dir_folder, - pattern = "^anon-slf_deaths_lookup_.*parquet", - full.names = TRUE - ) - - # read all year specific deaths lookups and bind them together - all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>% - data.table::rbindlist() %>% - # convert to chi for processing - slfhelper::get_chi() %>% - # Remove rows with missing or blank CHI number - could also use na.omit? - # na.omit(all_boxi_deaths) - dplyr::filter(!is.na(.data$chi) | .data$chi != "") - - # Check all CHI numbers are valid - chi_check <- all_boxi_deaths %>% - dplyr::pull(.data$chi) %>% - phsmethods::chi_check() - - if (!all(chi_check %in% c("Valid CHI", "Missing (Blank)", "Missing (NA)"))) { - # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file - stop("There were bad CHI numbers in the BOXI NRS file") - } - - # Check and print error message for chi numbers with more than one death date - duplicates <- all_boxi_deaths %>% - janitor::get_dupes(.data$chi) - - if (nrow(duplicates) != 0) { - # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file - warning("There were duplicate death dates in the BOXI NRS file.") - } - - - # We decided to include duplicates as unable to determine which is correct date (unless IT can tell us, however, they don't seem to know - # the process well enough), and overall impact will be negligible - # Get anon_chi and use this to match onto episode file later - all_boxi_deaths <- all_boxi_deaths %>% - slfhelper::get_anon_chi() - - # Save out duplicates for further investigation if needed (as anon_chi) - if (!missing(duplicates)) { - write_file( - duplicates, - fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet") - ) - ) - } - - # Maybe save as its own function - # Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file - if (write_to_disk) { - write_file( - all_boxi_deaths, - get_combined_slf_deaths_lookup_path() - ) - } - - return(all_boxi_deaths) -}