diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 804854271..8025ab225 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -33,12 +33,12 @@ create_episode_file <- function( sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), write_to_disk = TRUE, anon_chi_out = TRUE, - test_mode) { + write_temp_to_disk = FALSE) { processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) episode_file <- dplyr::bind_rows(processed_data_list) %>% slfhelper::get_chi() %>% - write_temp_data(year, file_name = "ep_temp1", test_mode) %>% + write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>% create_cost_inc_dna() %>% apply_cost_uplift() %>% store_ep_file_vars( @@ -122,18 +122,18 @@ create_episode_file <- function( # PC8 format may still be used. Ensure here that all datasets are in PC7 format. postcode = phsmethods::format_postcode(.data$postcode, "pc7") ) %>% - write_temp_data(year, file_name = "ep_temp2", test_mode) %>% + write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>% correct_cij_vars() %>% fill_missing_cij_markers() %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% add_ppa_flag() %>% - write_temp_data(year, file_name = "ep_temp3", test_mode) %>% + write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>% link_delayed_discharge_eps(year, dd_data) %>% add_nsu_cohort(year, nsu_cohort) %>% match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% - write_temp_data(year, file_name = "ep_temp4", test_mode) %>% + write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>% create_cohort_lookups(year) %>% join_cohort_lookups(year) %>% join_sparra_hhg(year) %>% @@ -145,13 +145,13 @@ create_episode_file <- function( year, slf_deaths_lookup ) %>% - write_temp_data(year, file_name = "ep_temp5", test_mode) %>% + write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>% add_activity_after_death_flag(year, deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% load_ep_file_vars(year) %>% - write_temp_data(year, file_name = "ep_temp6", test_mode) + write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { episode_file <- episode_file %>% diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b4ac01164..29c410704 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -18,7 +18,7 @@ create_individual_file <- function( write_to_disk = TRUE, anon_chi_in = TRUE, anon_chi_out = TRUE, - test_mode) { + write_temp_to_disk) { if (anon_chi_in) { episode_file <- slfhelper::get_chi( episode_file, @@ -76,7 +76,7 @@ create_individual_file <- function( remove_blank_chi() %>% add_cij_columns() %>% add_all_columns(year = year) %>% - write_temp_data(data, year, file_name = "indiv_temp1", test_mode) + write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% @@ -85,24 +85,25 @@ create_individual_file <- function( individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi(year = year, exclude_sc_var = FALSE) + aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>% + write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk) } individual_file <- individual_file %>% recode_gender() %>% clean_individual_file(year) %>% join_cohort_lookups(year) %>% - write_temp_data(data, year, file_name = "indiv_temp2", test_mode) %>% + write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) %>% - write_temp_data(data, year, file_name = "indiv_temp3", test_mode) %>% + write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>% join_slf_lookup_vars() %>% dplyr::mutate(year = year) %>% add_hri_variables(chi_variable = "chi") %>% add_keep_population_flag(year) %>% - write_temp_data(data, year, file_name = "indiv_temp4", test_mode) %>% + write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>% join_sc_client(year, file_type = "individual") if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { diff --git a/R/write_temp_data.R b/R/write_temp_data.R index 753e5e110..c049d3b78 100644 --- a/R/write_temp_data.R +++ b/R/write_temp_data.R @@ -4,66 +4,52 @@ #' @param data The data to be written #' @param year year variable #' @param file_name The file name to be written -#' @param test_mode Boolean type to determine whether it is in a test mode +#' @param write_temp_to_disk Boolean type, write temp data to disk or not #' #' @return the data for next step as a [tibble][tibble::tibble-package]. #' @export write_temp_data <- - function(data, year, file_name, test_mode) { - full_file_name <- stringr::str_glue("{file_name}.parquet") - file_path <- file.path( - get_year_dir(year), - full_file_name - ) %>% - add_test_to_filename(test_mode) + function(data, year, file_name, write_temp_to_disk) { + if (write_temp_to_disk) { + full_file_name <- stringr::str_glue("{file_name}.parquet") + file_path <- file.path(get_year_dir(year), + full_file_name) - cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}")) - - write_file(data, - path = file_path - ) + cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}")) + write_file(data, + path = file_path) + } return(data) } -read_temp_data <- function(year, file_name, test_mode) { + +#' Read a temp data from disk for debugging purpose +#' +#' @description Read a temp data to disk for debugging purpose. +#' @param year year variable +#' @param file_name The file name to be read +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +read_temp_data <- function(year, file_name) { full_file_name <- stringr::str_glue("{file_name}.parquet") - file_path <- file.path( - get_year_dir(year), - full_file_name - ) %>% - add_test_to_filename(test_mode) + file_path <- file.path(get_year_dir(year), + full_file_name) return(read_file(file_path)) } - -#' Add "TEST-" to the file name of a file Path +#' Clean temp data from disk #' -#' @description This function takes a full file path and adds "TEST-" as a prefix to the file name, while preserving the directory structure. +#' @description Clean temp data from disk to save storage. +#' @param year year variable +#' @param file_type ep or ind files #' -#' @param file_path A character string representing the full path to a file (e.g., "/path/to/folder/data.csv"). -#' @return A character string representing the modified file path with "TEST-" added to the file name. +#' @return the data for next step as a [tibble][tibble::tibble-package]. #' @export -#' @examples -#' # Example usage -#' file_path <- "/conf/folder1/folder2/data.csv" -#' new_file_path <- add_test_to_filename(file_path) -#' print(new_file_path) # Outputs: "/conf/folder1/folder2/TEST-data.csv" -add_test_to_filename <- function(file_path, test_mode) { - if (test_mode) { - # Extract the directory and the file name separately - dir_path <- dirname(file_path) - file_name <- basename(file_path) - - # Add "TEST-" to the file name - new_file_name <- paste0("TEST-", file_name) - - # Reconstruct the new file path - new_file_path <- file.path(dir_path, new_file_name) - - return(new_file_path) - } else { - return(file_path) - } +clean_temp_data <- function(year, file_type = c("ep", "ind")) { + list.files(path = get_year_dir(year), + pattern = stringr::str_glue("^{file_type}_temp")) %>% + file.remove() } diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R index 70aa2bfca..37bf7fe24 100644 --- a/Run_SLF_Files_manually/run_individual_file_1415.R +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -2,6 +2,8 @@ library(createslf) year <- "1415" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index 8e8dae906..8c6cc48e6 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -2,6 +2,8 @@ library(createslf) year <- "1516" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index 255e4e674..5105ef393 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -2,6 +2,8 @@ library(createslf) year <- "1617" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1718.R b/Run_SLF_Files_manually/run_individual_file_1718.R index 777948fc7..328ef78aa 100644 --- a/Run_SLF_Files_manually/run_individual_file_1718.R +++ b/Run_SLF_Files_manually/run_individual_file_1718.R @@ -2,6 +2,8 @@ library(createslf) year <- "1718" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1819.R b/Run_SLF_Files_manually/run_individual_file_1819.R index 18839b2ea..db9d56455 100644 --- a/Run_SLF_Files_manually/run_individual_file_1819.R +++ b/Run_SLF_Files_manually/run_individual_file_1819.R @@ -2,6 +2,8 @@ library(createslf) year <- "1819" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1920.R b/Run_SLF_Files_manually/run_individual_file_1920.R index 3567d5c5d..80b8f15fb 100644 --- a/Run_SLF_Files_manually/run_individual_file_1920.R +++ b/Run_SLF_Files_manually/run_individual_file_1920.R @@ -2,6 +2,8 @@ library(createslf) year <- "1920" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2021.R b/Run_SLF_Files_manually/run_individual_file_2021.R index 8a78924b3..7b60a2afe 100644 --- a/Run_SLF_Files_manually/run_individual_file_2021.R +++ b/Run_SLF_Files_manually/run_individual_file_2021.R @@ -2,6 +2,8 @@ library(createslf) year <- "2021" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2122.R b/Run_SLF_Files_manually/run_individual_file_2122.R index 9ceaa571c..623e54aac 100644 --- a/Run_SLF_Files_manually/run_individual_file_2122.R +++ b/Run_SLF_Files_manually/run_individual_file_2122.R @@ -2,6 +2,8 @@ library(createslf) year <- "2122" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2223.R b/Run_SLF_Files_manually/run_individual_file_2223.R index b83507dbc..b16c672cb 100644 --- a/Run_SLF_Files_manually/run_individual_file_2223.R +++ b/Run_SLF_Files_manually/run_individual_file_2223.R @@ -2,6 +2,8 @@ library(createslf) year <- "2223" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2324.R b/Run_SLF_Files_manually/run_individual_file_2324.R index 3f6cf0fba..9b27d33ad 100644 --- a/Run_SLF_Files_manually/run_individual_file_2324.R +++ b/Run_SLF_Files_manually/run_individual_file_2324.R @@ -2,6 +2,8 @@ library(createslf) year <- "2324" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R index 843eb505c..4a4f25762 100644 --- a/Run_SLF_Files_manually/run_individual_file_2425.R +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -2,6 +2,8 @@ library(createslf) year <- "2425" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file