Skip to content

Commit

Permalink
remove redundant combine death function
Browse files Browse the repository at this point in the history
  • Loading branch information
lizihao-anu committed Oct 8, 2024
1 parent f4bfd85 commit 883f762
Showing 1 changed file with 0 additions and 84 deletions.
84 changes: 0 additions & 84 deletions R/add_activity_after_death_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,87 +108,3 @@ add_activity_after_death_flag <- function(

return(final_data)
}


#' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts
#'
#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
# but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files.
#'
#' @param ... additional arguments passed to [get_slf_deaths_lookup_path()]
#' @param update the update month (defaults to use [latest_update()])
#'
#' @param write_to_disk (optional) Should the data be written to disk default is
#' `TRUE` i.e. write the data to disk.
#'
#' @return the final data as a [tibble][tibble::tibble-package].
#' @export
#'
#'
#'
# Read data------------------------------------------------

process_combined_deaths_lookup <- function(update = latest_update(),
write_to_disk = TRUE, ...) {
dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths"
file_names <- list.files(dir_folder,
pattern = "^anon-slf_deaths_lookup_.*parquet",
full.names = TRUE
)

# read all year specific deaths lookups and bind them together
all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>%
data.table::rbindlist() %>%
# convert to chi for processing
slfhelper::get_chi() %>%
# Remove rows with missing or blank CHI number - could also use na.omit?
# na.omit(all_boxi_deaths)
dplyr::filter(!is.na(.data$chi) | .data$chi != "")

# Check all CHI numbers are valid
chi_check <- all_boxi_deaths %>%
dplyr::pull(.data$chi) %>%
phsmethods::chi_check()

if (!all(chi_check %in% c("Valid CHI", "Missing (Blank)", "Missing (NA)"))) {
# There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
stop("There were bad CHI numbers in the BOXI NRS file")
}

# Check and print error message for chi numbers with more than one death date
duplicates <- all_boxi_deaths %>%
janitor::get_dupes(.data$chi)

if (nrow(duplicates) != 0) {
# There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
warning("There were duplicate death dates in the BOXI NRS file.")
}


# We decided to include duplicates as unable to determine which is correct date (unless IT can tell us, however, they don't seem to know
# the process well enough), and overall impact will be negligible
# Get anon_chi and use this to match onto episode file later
all_boxi_deaths <- all_boxi_deaths %>%
slfhelper::get_anon_chi()

# Save out duplicates for further investigation if needed (as anon_chi)
if (!missing(duplicates)) {
write_file(
duplicates,
fs::path(get_slf_dir(), "Deaths",
file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet")
)
)
}

# Maybe save as its own function
# Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file
if (write_to_disk) {
write_file(
all_boxi_deaths,
get_combined_slf_deaths_lookup_path()
)
}

return(all_boxi_deaths)
}

0 comments on commit 883f762

Please sign in to comment.