Skip to content

Commit

Permalink
Update 00_sort_bi_extracts to write anon_chi (#952)
Browse files Browse the repository at this point in the history
* Update `00_sort_BI_extracts`
Save a new file with `anon-` prefix and use slfhelper to get the anon_chi

* remove file copy

* Update `00_sort_bi_extracts` note

* Style code

* Update chi when this is different e.g UPI number or PAT_UPI

* remove storing as a dataframe

* Add condition if CHI exists in data file

* update 00_Sort_BI_Extracts
replace for loop by function to enable parallel computing with lapply

* Style code

* merge similar code

* simplify sort_bi_extracts

---------

Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Zihao Li <[email protected]>
Co-authored-by: lizihao-anu <[email protected]>
  • Loading branch information
4 people authored May 22, 2024
1 parent 1f9d84e commit 5f565fc
Showing 1 changed file with 30 additions and 12 deletions.
42 changes: 30 additions & 12 deletions 00_Sort_BI_Extracts.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Define the source directory and financial year pattern
compress_files <- FALSE
compress_files <- TRUE
source_dir <- "/conf/sourcedev/Source_Linkage_File_Updates/Extracts Temp"
pattern <- "-20(\\d{4})\\.csv"

Expand All @@ -20,31 +20,49 @@ extract_financial_year <- function(filename) {
}
}

# Create directories for each financial year and move files
for (csv_file in csv_files) {
# Create a function to read variable names
is_chi_in_file <- function(filename) {
data <- read.csv(filename, nrow = 1)
return(grepl("UPI", names(data)) %>% any())
}

# function to move files
move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
financial_year <- extract_financial_year(csv_file)
# check if year directory exists
if (!is.null(financial_year)) {
financial_year_dir <- file.path("/conf/sourcedev/Source_Linkage_File_Updates", financial_year, "Extracts")
# if not, create the year directory
# if financial_year_dir does not exist, create the year directory
if (!dir.exists(financial_year_dir)) {
dir.create(financial_year_dir)
}

new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))

# set up new file path location to move each file to their destination.
chi_in_file <- is_chi_in_file(csv_file)
if (chi_in_file) {
read_file(csv_file) %>%
dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
slfhelper::get_anon_chi() %>%
readr::write_csv(file = new_file_path)
cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
} else {
fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
cat("Moved", csv_file, "to", new_file_path, "\n")
}

# compress file
if (compress_files) {
cat("Compressing:", basename(csv_file), "\n")
cat("Compressing:", basename(new_file_path), "\n")
system2(
command = "gzip",
args = shQuote(csv_file)
args = shQuote(new_file_path)
)
csv_file <- paste0(csv_file, ".gz")
}

# move file
new_file_path <- file.path(financial_year_dir, basename(csv_file))
fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
# remove old files
file.remove(csv_file)
cat("Moved:", csv_file, "to", new_file_path, "\n")
}
}

lapply(csv_files, move_temps_to_year_extract, compress_files = compress_files)

0 comments on commit 5f565fc

Please sign in to comment.