Skip to content

Commit

Permalink
00 sort bi extracts (#837)
Browse files Browse the repository at this point in the history
* transform the python script for sorting BI extracts to R

* Style code

* Delete 00-Sort_BI_Extracts.py

* improved script for copy_to_hscdiip.R

* Style code

* improve the speed by fs::file_copy

* Style code

* update the target folder path

* Style code

---------

Co-authored-by: lizihao-anu <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
  • Loading branch information
4 people authored Oct 11, 2023
1 parent e5335c4 commit d586df3
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
2 changes: 1 addition & 1 deletion 00_Sort_BI_Extracts.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ for (csv_file in csv_files) {

# move file
new_file_path <- file.path(financial_year_dir, basename(csv_file))
file.copy(csv_file, new_file_path)
fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
file.remove(csv_file)
cat("Moved:", csv_file, "to", new_file_path, "\n")
}
Expand Down
4 changes: 1 addition & 3 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ create_episode_file <- function(
}

if (write_to_disk) {
slf_episode_path <- get_slf_episode_path(year, check_mode = "write")

write_file(episode_file, slf_episode_path)
write_file(episode_file, get_slf_episode_path(year, check_mode = "write"))
}

return(episode_file)
Expand Down
35 changes: 35 additions & 0 deletions copy_to_hscdiip.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
dir_folder <- "/conf/sourcedev/Source_Linkage_File_Updates"
target_folder <- "/conf/hscdiip/01-Source-linkage-files"
if (!dir.exists(target_folder)) {
dir.create(target_folder, mode = "770")
}
folders <- c("1718", "1819", "1920", "2021", "2122", "2223")
year_n <- length(folders)
resource_consumption <- data.frame(
year = rep("0", year_n),
time_consumption = rep(0, year_n),
file_size_MB = rep(0, year_n)
)

for (i in 1:length(folders)) {
timer <- Sys.time()
print(stringr::str_glue("{folders[i]} starts at {Sys.time()}"))
folder_path <- file.path(dir_folder, folders[i])
old_path <- list.files(folder_path,
pattern = "^source-.*parquet",
full.names = TRUE
)
files_name <- basename(old_path)
new_path <- file.path(target_folder, files_name)
print(files_name)

fs::file_copy(old_path,
new_path,
overwrite = TRUE
)
resource_consumption$time_consumption[i] <- (Sys.time() - timer)
file_size <- sum(file.size(old_path)) / 2^20
resource_consumption$file_size_MB[i] <- file_size
print(stringr::str_glue("file size is {file_size}."))
print(resource_consumption$time_consumption[i])
}

0 comments on commit d586df3

Please sign in to comment.