Skip to content

Commit

Permalink
change test_mode to write_temp_to_disk and add clean temp function
Browse files Browse the repository at this point in the history
  • Loading branch information
lizihao-anu committed Oct 11, 2024
1 parent 9148570 commit eaf2fba
Show file tree
Hide file tree
Showing 14 changed files with 67 additions and 58 deletions.
14 changes: 7 additions & 7 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ create_episode_file <- function(
sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
write_to_disk = TRUE,
anon_chi_out = TRUE,
test_mode) {
write_temp_to_disk = FALSE) {
processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))

episode_file <- dplyr::bind_rows(processed_data_list) %>%
slfhelper::get_chi() %>%
write_temp_data(year, file_name = "ep_temp1", test_mode) %>%
write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>%
create_cost_inc_dna() %>%
apply_cost_uplift() %>%
store_ep_file_vars(
Expand Down Expand Up @@ -122,18 +122,18 @@ create_episode_file <- function(
# PC8 format may still be used. Ensure here that all datasets are in PC7 format.
postcode = phsmethods::format_postcode(.data$postcode, "pc7")
) %>%
write_temp_data(year, file_name = "ep_temp2", test_mode) %>%
write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>%
correct_cij_vars() %>%
fill_missing_cij_markers() %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
add_ppa_flag() %>%
write_temp_data(year, file_name = "ep_temp3", test_mode) %>%
write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>%
link_delayed_discharge_eps(year, dd_data) %>%
add_nsu_cohort(year, nsu_cohort) %>%
match_on_ltcs(year, ltc_data) %>%
correct_demographics(year) %>%
write_temp_data(year, file_name = "ep_temp4", test_mode) %>%
write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>%
create_cohort_lookups(year) %>%
join_cohort_lookups(year) %>%
join_sparra_hhg(year) %>%
Expand All @@ -145,13 +145,13 @@ create_episode_file <- function(
year,
slf_deaths_lookup
) %>%
write_temp_data(year, file_name = "ep_temp5", test_mode) %>%
write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>%
add_activity_after_death_flag(year,
deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
slfhelper::get_chi()
) %>%
load_ep_file_vars(year) %>%
write_temp_data(year, file_name = "ep_temp6", test_mode)
write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
episode_file <- episode_file %>%
Expand Down
13 changes: 7 additions & 6 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ create_individual_file <- function(
write_to_disk = TRUE,
anon_chi_in = TRUE,
anon_chi_out = TRUE,
test_mode) {
write_temp_to_disk) {
if (anon_chi_in) {
episode_file <- slfhelper::get_chi(
episode_file,
Expand Down Expand Up @@ -76,7 +76,7 @@ create_individual_file <- function(
remove_blank_chi() %>%
add_cij_columns() %>%
add_all_columns(year = year) %>%
write_temp_data(data, year, file_name = "indiv_temp1", test_mode)
write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
individual_file <- individual_file %>%
Expand All @@ -85,24 +85,25 @@ create_individual_file <- function(
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi(year = year, exclude_sc_var = FALSE)
aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>%
write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
}

individual_file <- individual_file %>%
recode_gender() %>%
clean_individual_file(year) %>%
join_cohort_lookups(year) %>%
write_temp_data(data, year, file_name = "indiv_temp2", test_mode) %>%
write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
match_on_ltcs(year) %>%
join_deaths_data(year) %>%
join_sparra_hhg(year) %>%
write_temp_data(data, year, file_name = "indiv_temp3", test_mode) %>%
write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>%
join_slf_lookup_vars() %>%
dplyr::mutate(year = year) %>%
add_hri_variables(chi_variable = "chi") %>%
add_keep_population_flag(year) %>%
write_temp_data(data, year, file_name = "indiv_temp4", test_mode) %>%
write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>%
join_sc_client(year, file_type = "individual")

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
Expand Down
76 changes: 31 additions & 45 deletions R/write_temp_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,52 @@
#' @param data The data to be written
#' @param year year variable
#' @param file_name The file name to be written
#' @param test_mode Boolean type to determine whether it is in a test mode
#' @param write_temp_to_disk Boolean type, write temp data to disk or not
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
write_temp_data <-
function(data, year, file_name, test_mode) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
) %>%
add_test_to_filename(test_mode)
function(data, year, file_name, write_temp_to_disk) {
if (write_temp_to_disk) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(get_year_dir(year),
full_file_name)

cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}"))

write_file(data,
path = file_path
)
cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}"))

write_file(data,
path = file_path)
}
return(data)
}

read_temp_data <- function(year, file_name, test_mode) {

#' Read a temp data from disk for debugging purpose
#'
#' @description Read a temp data to disk for debugging purpose.
#' @param year year variable
#' @param file_name The file name to be read
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
read_temp_data <- function(year, file_name) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
) %>%
add_test_to_filename(test_mode)
file_path <- file.path(get_year_dir(year),
full_file_name)

return(read_file(file_path))
}


#' Add "TEST-" to the file name of a file Path
#' Clean temp data from disk
#'
#' @description This function takes a full file path and adds "TEST-" as a prefix to the file name, while preserving the directory structure.
#' @description Clean temp data from disk to save storage.
#' @param year year variable
#' @param file_type ep or ind files
#'
#' @param file_path A character string representing the full path to a file (e.g., "/path/to/folder/data.csv").
#' @return A character string representing the modified file path with "TEST-" added to the file name.
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
#' @examples
#' # Example usage
#' file_path <- "/conf/folder1/folder2/data.csv"
#' new_file_path <- add_test_to_filename(file_path)
#' print(new_file_path) # Outputs: "/conf/folder1/folder2/TEST-data.csv"
add_test_to_filename <- function(file_path, test_mode) {
if (test_mode) {
# Extract the directory and the file name separately
dir_path <- dirname(file_path)
file_name <- basename(file_path)

# Add "TEST-" to the file name
new_file_name <- paste0("TEST-", file_name)

# Reconstruct the new file path
new_file_path <- file.path(dir_path, new_file_name)

return(new_file_path)
} else {
return(file_path)
}
clean_temp_data <- function(year, file_type = c("ep", "ind")) {
list.files(path = get_year_dir(year),
pattern = stringr::str_glue("^{file_type}_temp")) %>%
file.remove()
}
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1415.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1415"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1516.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1516"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1617.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1617"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1718.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1718"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1819.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1819"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1920.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1920"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2021"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2122.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2122"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2223.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2223"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2324.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2324"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2425.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2425"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down

0 comments on commit eaf2fba

Please sign in to comment.