Skip to content

Commit

Permalink
Fix targets (#892)
Browse files Browse the repository at this point in the history
* fix sc_client_lookup sc_send_lca

* fix an issue of get_pop_path

* Style code

* fix the rest of get_pop_path from get_datazone_pop_path

* Update documentation

* fix sc_send_lca

* add missing year column

* explicitly specify the argument year to avoid corruption of targets

* Update documentation

* new data pipeline with targets
remove create_individual_files from targets and append it to run_targets script

* minor changes

* Style code

* undo sc_send_lca bit

* Update targets scripts

* Remove top level targets scripts

---------

Co-authored-by: lizihao-anu <[email protected]>
Co-authored-by: Megan McNicol <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Jennifer Thom <[email protected]>
  • Loading branch information
5 people authored Feb 27, 2024
1 parent c684b81 commit 9699394
Show file tree
Hide file tree
Showing 12 changed files with 125 additions and 22 deletions.
3 changes: 2 additions & 1 deletion R/aggregate_by_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
Expand Down Expand Up @@ -187,6 +187,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
individual_file_cols5[, chi := NULL],
individual_file_cols6[, chi := NULL]
)
individual_file <- individual_file[, year := year]

# convert back to tibble
return(dplyr::as_tibble(individual_file))
Expand Down
6 changes: 3 additions & 3 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ create_individual_file <- function(
))) %>%
remove_blank_chi() %>%
add_cij_columns() %>%
add_all_columns()
add_all_columns(year = year)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
individual_file <- individual_file %>%
Expand All @@ -82,7 +82,7 @@ create_individual_file <- function(
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi(exclude_sc_var = FALSE)
aggregate_by_chi(year = year, exclude_sc_var = FALSE)
}

individual_file <- individual_file %>%
Expand Down Expand Up @@ -202,7 +202,7 @@ add_cij_columns <- function(episode_file) {
#' of prefixed column names created based on some condition.
#' @family individual_file
#' @inheritParams create_individual_file
add_all_columns <- function(episode_file) {
add_all_columns <- function(episode_file, year) {
cli::cli_alert_info("Add all columns function started at {Sys.time()}")

episode_file <- episode_file %>%
Expand Down
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_1718.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "1718"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("1718"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_1819.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "1819"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("1819"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_1920.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "1920"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("1920"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_2021.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "2021"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("2021"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_2122.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "2122"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("2122"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_2223.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "2223"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("2223"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
14 changes: 14 additions & 0 deletions Run_SLF_Files_targets/run_targets_2324.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
library(targets)

year <- "2324"

# use targets for the process until testing episode files
tar_make_future(
# it does not recognise `contains(year)`
names = (targets::contains("2324"))
)

# use targets to create individual files due to RAM limit
library(createslf)

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
create_individual_file(episode_file, year = year) %>%
process_tests_individual_file(year = year)
32 changes: 16 additions & 16 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -591,24 +591,24 @@ list(
data = episode_file,
year = year
)
),
tar_target(
individual_file,
create_individual_file(
episode_file = episode_file,
year = year,
homelessness_lookup = homelessness_lookup,
write_to_disk = write_to_disk
)
),
tar_target(
individual_file_tests,
process_tests_individual_file(
data = individual_file,
year = year
)
) # ,
# tar_target(
# individual_file,
# create_individual_file(
# episode_file = episode_file,
# year = year,
# homelessness_lookup = homelessness_lookup,
# write_to_disk = write_to_disk
# )
# ),
# tar_target(
# individual_file_tests,
# process_tests_individual_file(
# data = individual_file,
# year = year
# )
# ) # ,
# tar_target(
# episode_file_dataset,
# arrow::write_dataset(
# dataset = episode_file,
Expand Down
4 changes: 3 additions & 1 deletion man/add_all_columns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/aggregate_by_chi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9699394

Please sign in to comment.