From 3c290ed4434dc1ef70d81212dc0a64d8d315bbd8 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 18 Jul 2023 16:37:45 +0100 Subject: [PATCH 1/9] Update `read_file` to return an empty tibble if passed the dummy path This is needed for some other bits, notably NSUs --- R/read_file.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/read_file.R b/R/read_file.R index 2941b62ed..be0a6fc65 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { "parquet" ) + # Return an empty tibble if trying to read the dummy path + if (path == get_dummy_boxi_extract_path()) { + return(tibble::tibble()) + } + ext <- fs::path_ext(path) if (ext == "gz") { From 69ccef581930c01964b3e42c00d33ff08fe145f5 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 18 Jul 2023 16:38:38 +0100 Subject: [PATCH 2/9] Update SPARRA and HHG paths to return dummy if the year is invalid --- R/get_sparra_hhg_paths.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R index 2fd1a69f9..157160ed4 100644 --- a/R/get_sparra_hhg_paths.R +++ b/R/get_sparra_hhg_paths.R @@ -10,6 +10,10 @@ #' @family extract file paths #' @seealso [get_file_path()] for the generic function. get_hhg_path <- function(year, ...) { + if (!check_year_valid(year, "HHG")) { + return(get_dummy_boxi_extract_path()) + } + hhg_file_path <- get_file_path( directory = fs::path(get_slf_dir(), "HHG"), file_name = stringr::str_glue("HHG-20{year}.parquet"), @@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) { #' @family extract file paths #' @seealso [get_file_path()] for the generic function. get_sparra_path <- function(year, ...) { + if (!check_year_valid(year, "SPARRA")) { + return(get_dummy_boxi_extract_path()) + } + sparra_file_path <- get_file_path( directory = fs::path(get_slf_dir(), "SPARRA"), file_name = stringr::str_glue("SPARRA-20{year}.parquet"), From 413024b32e02b755dc947b74f237d71f44a87977 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 19 Jul 2023 12:49:31 +0100 Subject: [PATCH 3/9] Extract all data as a parameter --- R/add_nsu_cohort.R | 14 +++--- R/fill_geographies.R | 56 ++++++++++++++++------- R/get_source_extract_path.R | 89 +++++++++++++++++++------------------ R/join_deaths_data.R | 6 +-- R/match_on_ltcs.R | 9 +++- R/run_episode_file.R | 57 +++++++++++++++++------- _targets.R | 7 +++ man/add_nsu_cohort.Rd | 4 +- man/fill_geographies.Rd | 11 ++++- man/join_cohort_lookups.Rd | 14 +++++- man/join_deaths_data.Rd | 4 +- man/match_on_ltcs.Rd | 4 +- man/run_episode_file.Rd | 19 ++++++++ 13 files changed, 203 insertions(+), 91 deletions(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index c5a26da12..1e21de78a 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -2,13 +2,18 @@ #' #' @param data The input data frame #' @param year The year being processed +#' @param nsu_cohort The NSU data for the year #' #' @return A data frame containing the Non-Service Users as additional rows #' @export #' #' @family episode file #' @seealso [get_nsu_path()] -add_nsu_cohort <- function(data, year) { +add_nsu_cohort <- function( + data, + year, + nsu_cohort = read_file(get_nsu_path(year)) +) { year_param <- year if (!check_year_valid(year, "NSU")) { @@ -29,9 +34,9 @@ add_nsu_cohort <- function(data, year) { ) ) - matched <- dplyr::full_join(data, - # NSU cohort file - read_file(get_nsu_path(year)) %>% + matched <- dplyr::full_join( + data, + nsu_cohort %>% dplyr::mutate( dob = as.Date(.data[["dob"]]), gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) @@ -110,7 +115,6 @@ add_nsu_cohort <- function(data, year) { .data[["chi"]] ) ) %>% - # Remove the additional columns dplyr::select(-dplyr::contains("_nsu"), -"has_chi") return(return_df) diff --git a/R/fill_geographies.R b/R/fill_geographies.R index 58d001493..f0e859169 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -4,10 +4,19 @@ #' then use the lookups to match on additional variables. #' #' @param data the SLF +#' @param slf_pc_lookup The SLF Postcode lookup +#' @param slf_gpprac_lookup The SLF GP Practice lookup #' #' @return a [tibble][tibble::tibble-package] of the SLF with improved #' Postcode and GP Practice details. -fill_geographies <- function(data) { +fill_geographies <- function( + data, + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ) +) { check_variables_exist(data, c( "chi", "postcode", @@ -21,8 +30,15 @@ fill_geographies <- function(data) { )) data %>% - fill_postcode_geogs() %>% - fill_gpprac_geographies() + fill_postcode_geogs( + slf_pc_lookup = read_file(get_slf_postcode_path()) + ) %>% + fill_gpprac_geographies( + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ) + ) } #' Make a postcode lookup for filling to most recent postcodes based on CHI @@ -84,9 +100,10 @@ make_gpprac_lookup <- function(data) { return(gpprac_lookup) } -fill_postcode_geogs <- function(data) { - slf_pc_lookup <- read_file(get_slf_postcode_path()) - +fill_postcode_geogs <- function( + data, + slf_pc_lookup +) { filled_postcodes <- dplyr::left_join( data, make_postcode_lookup(data), @@ -121,18 +138,22 @@ fill_postcode_geogs <- function(data) { lca = dplyr::coalesce(.data$lca, .data$lca_old), datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old) ) %>% - dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode")) + dplyr::select(!c( + "hb2018", + "hscp", + "lca_old", + "datazone2011_old", + "most_recent_postcode" + )) return(filled_postcodes) } -fill_gpprac_geographies <- function(data) { - gpprac_ref <- read_file( - get_slf_gpprac_path(), - col_select = c("gpprac", "cluster", "hbpraccode") - ) - - filled_gpprac <- dplyr::left_join( +fill_gpprac_geographies <- function( + data, + slf_gpprac_lookup +) { + filled_gpprac <- dplyr::left_join( data, make_gpprac_lookup(data), by = "chi" @@ -145,7 +166,12 @@ fill_gpprac_geographies <- function(data) { .data$gpprac ) ) %>% - dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>% + dplyr::left_join( + slf_gpprac_lookup %>% + dplyr::select("gpprac", "cluster", "hbpraccode"), + by = "gpprac", + suffix = c("_old", "") + ) %>% dplyr::mutate( hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old) ) %>% diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R index 89c6dc0b4..3bcb476d6 100644 --- a/R/get_source_extract_path.R +++ b/R/get_source_extract_path.R @@ -10,57 +10,60 @@ #' @export #' #' @family extract file paths -get_source_extract_path <- function(year, - type = c( - "Acute", - "AE", - "AT", - "CH", - "Client", - "CMH", - "DD", - "Deaths", - "DN", - "GPOoH", - "HC", - "Homelessness", - "Maternity", - "MH", - "Outpatients", - "PIS", - "SDS" - ), - ...) { +get_source_extract_path <- function( + year, + type = c( + "Acute", + "AE", + "AT", + "CH", + "Client", + "CMH", + "DD", + "Deaths", + "DN", + "GPOoH", + "HC", + "Homelessness", + "Maternity", + "MH", + "Outpatients", + "PIS", + "SDS" + ), + ... +) { type <- match.arg(type) if (!check_year_valid(year, type)) { - return(NA) + return(get_dummy_boxi_extract_path()) } - file_name <- dplyr::case_when( - type == "Acute" ~ "acute_for_source", - type == "AE" ~ "a&e_for_source", - type == "AT" ~ "Alarms-Telecare-for-source", - type == "CH" ~ "care_home_for_source", - type == "CMH" ~ "CMH_for_source", - type == "Client" ~ "client_for_source", - type == "DD" ~ "DD_for_source", - type == "Deaths" ~ "deaths_for_source", - type == "DN" ~ "DN_for_source", - type == "GPOoH" ~ "GP_OOH_for_source", - type == "HC" ~ "Home_Care_for_source", - type == "Homelessness" ~ "homelessness_for_source", - type == "Maternity" ~ "maternity_for_source", - type == "MH" ~ "mental_health_for_source", - type == "DD" ~ "DD_for_source", - type == "Outpatients" ~ "outpatients_for_source", - type == "PIS" ~ "prescribing_file_for_source", - type == "SDS" ~ "SDS-for-source" - ) + file_name <- dplyr::case_match( + type, + "Acute" ~ "acute_for_source", + "AE" ~ "a_and_e_for_source", + "AT" ~ "alarms-telecare-for-source", + "CH" ~ "care_home_for_source", + "CMH" ~ "cmh_for_source", + "Client" ~ "client_for_source", + "DD" ~ "delayed_discharge_for_source", + "Deaths" ~ "deaths_for_source", + "DN" ~ "district_nursing_for_source", + "GPOoH" ~ "gp_ooh_for_source", + "HC" ~ "home_care_for_source", + "Homelessness" ~ "homelessness_for_source", + "Maternity" ~ "maternity_for_source", + "MH" ~ "mental_health_for_source", + "Outpatients" ~ "outpatients_for_source", + "PIS" ~ "prescribing_for_source", + "SDS" ~ "sds_for_source" + ) %>% + stringr::str_glue("-{year}.parquet") source_extract_path <- get_file_path( directory = get_year_dir(year), - file_name = stringr::str_glue("{file_name}-20{year}.parquet"), + file_name = file_name, ... ) diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 694d2e2b9..89bcbbe13 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -2,16 +2,14 @@ #' #' @param data Episode file data #' @param year financial year, e.g. '1920' -#' @param slf_deaths_lookup_path Path to slf deaths lookup. +#' @param slf_deaths_lookup The SLF deaths lookup. #' #' @return The data including the deaths lookup matched #' on to the episode file. join_deaths_data <- function( data, year, - slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) { - slf_deaths_lookup <- read_file(slf_deaths_lookup_path) - + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) { return( data %>% dplyr::left_join( diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index 637e5b6c7..d7b65ce2a 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -5,13 +5,18 @@ #' #' @param data episode files #' @param year financial year, e.g. '1920' +#' @param ltc_data The LTC data for the year #' #' @return data matched with long term conditions -match_on_ltcs <- function(data, year) { +match_on_ltcs <- function( + data, + year, + ltc_data = read_file(get_ltcs_path(year)) +) { # Match on LTC lookup matched <- dplyr::left_join( data, - read_file(get_ltcs_path(year)), + ltc_data, by = "chi", suffix = c("", "_ltc") ) %>% diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 852a4fd8b..1ef99ea0c 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -4,15 +4,29 @@ #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. +#' @inheritParams add_nsu_cohort +#' @inheritParams fill_geographies +#' @inheritParams join_cohort_lookups +#' @inheritParams join_deaths_data +#' @inheritParams match_on_ltcs +#' @inheritParams link_delayed_discharge_eps #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output #' (instead of chi) #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export -#' run_episode_file <- function( processed_data_list, year, + dd_data = read_file(get_source_extract_path(year, "DD")), + nsu_cohort = read_file(get_nsu_path(year)), + ltc_data = read_file(get_ltcs_path(year)), + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ), + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE) { episode_file <- dplyr::bind_rows(processed_data_list) %>% @@ -98,15 +112,21 @@ run_episode_file <- function( correct_cij_vars() %>% fill_missing_cij_markers() %>% add_ppa_flag() %>% - link_delayed_discharge_eps(year) %>% - add_nsu_cohort(year) %>% - match_on_ltcs(year) %>% + link_delayed_discharge_eps(year, dd_data) %>% + add_nsu_cohort(year, nsu_cohort) %>% + match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% create_cohort_lookups(year) %>% join_cohort_lookups(year) %>% join_sparra_hhg(year) %>% - fill_geographies() %>% - join_deaths_data(year) %>% + fill_geographies( + slf_pc_lookup, + slf_gpprac_lookup + ) %>% + join_deaths_data( + year, + slf_deaths_lookup + ) %>% load_ep_file_vars(year) if (anon_chi_out) { @@ -356,22 +376,29 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { #' #' @inheritParams store_ep_file_vars #' @inheritParams get_demographic_cohorts_path +#' @param demographic_cohort,service_use_cohort The cohort data #' #' @return The data including the Demographic and Service Use lookups. -join_cohort_lookups <- function(data, year, update = latest_update()) { +join_cohort_lookups <- function( + data, + year, + update = latest_update(), + demographic_cohort = read_file( + get_demographic_cohorts_path(year, update), + col_select = c("chi", "demographic_cohort") + ), + service_use_cohort = read_file( + get_service_use_cohorts_path(year, update), + col_select = c("chi", "service_use_cohort") + ) +) { join_cohort_lookups <- data %>% dplyr::left_join( - read_file( - get_demographic_cohorts_path(year, update), - col_select = c("chi", "demographic_cohort") - ), + demographic_cohort, by = "chi" ) %>% dplyr::left_join( - read_file( - get_service_use_cohorts_path(year, update), - col_select = c("chi", "service_use_cohort") - ), + service_use_cohort, by = "chi" ) diff --git a/_targets.R b/_targets.R index ef2fbbe74..b60faed5b 100644 --- a/_targets.R +++ b/_targets.R @@ -533,11 +533,18 @@ list( source_sc_alarms_tele ) ), + tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)), tar_target( episode_file, run_episode_file( processed_data_list, year, + dd_data = source_dd_extract, + nsu_cohort = nsu_cohort, + ltc_data = source_ltc_lookup, + slf_pc_lookup = source_pc_lookup, + slf_gpprac_lookup = source_gp_lookup, + slf_deaths_lookup = slf_deaths_lookup, write_to_disk ) ), diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd index 723c105e1..4ea9324e0 100644 --- a/man/add_nsu_cohort.Rd +++ b/man/add_nsu_cohort.Rd @@ -4,12 +4,14 @@ \alias{add_nsu_cohort} \title{Add NSU cohort to working file} \usage{ -add_nsu_cohort(data, year) +add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year))) } \arguments{ \item{data}{The input data frame} \item{year}{The year being processed} + +\item{nsu_cohort}{The NSU data for the year} } \value{ A data frame containing the Non-Service Users as additional rows diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd index 5308fd8d0..bb619405b 100644 --- a/man/fill_geographies.Rd +++ b/man/fill_geographies.Rd @@ -4,10 +4,19 @@ \alias{fill_geographies} \title{Fill postcode and GP practice geographies} \usage{ -fill_geographies(data) +fill_geographies( + data, + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")) +) } \arguments{ \item{data}{the SLF} + +\item{slf_pc_lookup}{The SLF Postcode lookup} + +\item{slf_gpprac_lookup}{The SLF GP Practice lookup} } \value{ a \link[tibble:tibble-package]{tibble} of the SLF with improved diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 445dcd7c0..7581dd5b6 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -4,14 +4,24 @@ \alias{join_cohort_lookups} \title{Join cohort lookups} \usage{ -join_cohort_lookups(data, year, update = latest_update()) +join_cohort_lookups( + data, + year, + update = latest_update(), + demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select = + c("chi", "demographic_cohort")), + service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select = + c("chi", "service_use_cohort")) +) } \arguments{ \item{data}{The in-progress episode file data.} -\item{year}{The year to process, in FY format.} +\item{year}{financial year in '1718' format} \item{update}{The update to use} + +\item{demographic_cohort, service_use_cohort}{The cohort data} } \value{ The data including the Demographic and Service Use lookups. diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd index 6508d7893..f3b68fe1a 100644 --- a/man/join_deaths_data.Rd +++ b/man/join_deaths_data.Rd @@ -7,7 +7,7 @@ join_deaths_data( data, year, - slf_deaths_lookup_path = get_slf_deaths_lookup_path(year) + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) ) } \arguments{ @@ -15,7 +15,7 @@ join_deaths_data( \item{year}{financial year, e.g. '1920'} -\item{slf_deaths_lookup_path}{Path to slf deaths lookup.} +\item{slf_deaths_lookup}{The SLF deaths lookup.} } \value{ The data including the deaths lookup matched diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd index 0c7e7fb53..e0def00cc 100644 --- a/man/match_on_ltcs.Rd +++ b/man/match_on_ltcs.Rd @@ -4,12 +4,14 @@ \alias{match_on_ltcs} \title{Match on LTC DoB and dates of LTC incidence} \usage{ -match_on_ltcs(data, year) +match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year))) } \arguments{ \item{data}{episode files} \item{year}{financial year, e.g. '1920'} + +\item{ltc_data}{The LTC data for the year} } \value{ data matched with long term conditions diff --git a/man/run_episode_file.Rd b/man/run_episode_file.Rd index 59d5fea1d..424d24afa 100644 --- a/man/run_episode_file.Rd +++ b/man/run_episode_file.Rd @@ -7,6 +7,13 @@ run_episode_file( processed_data_list, year, + dd_data = read_file(get_source_extract_path(year, "DD")), + nsu_cohort = read_file(get_nsu_path(year)), + ltc_data = read_file(get_ltcs_path(year)), + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")), + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE ) @@ -16,6 +23,18 @@ run_episode_file( \item{year}{The year to process, in FY format.} +\item{dd_data}{The processed DD extract} + +\item{nsu_cohort}{The NSU data for the year} + +\item{ltc_data}{The LTC data for the year} + +\item{slf_pc_lookup}{The SLF Postcode lookup} + +\item{slf_gpprac_lookup}{The SLF GP Practice lookup} + +\item{slf_deaths_lookup}{The SLF deaths lookup.} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} From 0b233c2d6aeb2564fb35eca04b516e4faf4d9f78 Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 19 Jul 2023 11:51:55 +0000 Subject: [PATCH 4/9] Style code --- R/add_nsu_cohort.R | 3 +-- R/fill_geographies.R | 15 ++++++-------- R/get_source_extract_path.R | 39 ++++++++++++++++++------------------- R/match_on_ltcs.R | 3 +-- R/run_episode_file.R | 3 +-- 5 files changed, 28 insertions(+), 35 deletions(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 1e21de78a..00260bb8e 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -12,8 +12,7 @@ add_nsu_cohort <- function( data, year, - nsu_cohort = read_file(get_nsu_path(year)) -) { + nsu_cohort = read_file(get_nsu_path(year))) { year_param <- year if (!check_year_valid(year, "NSU")) { diff --git a/R/fill_geographies.R b/R/fill_geographies.R index f0e859169..34b8ca789 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -13,10 +13,9 @@ fill_geographies <- function( data, slf_pc_lookup = read_file(get_slf_postcode_path()), slf_gpprac_lookup = read_file( - get_slf_gpprac_path(), - col_select = c("gpprac", "cluster", "hbpraccode") - ) -) { + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + )) { check_variables_exist(data, c( "chi", "postcode", @@ -102,8 +101,7 @@ make_gpprac_lookup <- function(data) { fill_postcode_geogs <- function( data, - slf_pc_lookup -) { + slf_pc_lookup) { filled_postcodes <- dplyr::left_join( data, make_postcode_lookup(data), @@ -151,9 +149,8 @@ fill_postcode_geogs <- function( fill_gpprac_geographies <- function( data, - slf_gpprac_lookup -) { - filled_gpprac <- dplyr::left_join( + slf_gpprac_lookup) { + filled_gpprac <- dplyr::left_join( data, make_gpprac_lookup(data), by = "chi" diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R index 3bcb476d6..c74d4381c 100644 --- a/R/get_source_extract_path.R +++ b/R/get_source_extract_path.R @@ -13,26 +13,25 @@ get_source_extract_path <- function( year, type = c( - "Acute", - "AE", - "AT", - "CH", - "Client", - "CMH", - "DD", - "Deaths", - "DN", - "GPOoH", - "HC", - "Homelessness", - "Maternity", - "MH", - "Outpatients", - "PIS", - "SDS" - ), - ... -) { + "Acute", + "AE", + "AT", + "CH", + "Client", + "CMH", + "DD", + "Deaths", + "DN", + "GPOoH", + "HC", + "Homelessness", + "Maternity", + "MH", + "Outpatients", + "PIS", + "SDS" + ), + ...) { type <- match.arg(type) if (!check_year_valid(year, type)) { diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index d7b65ce2a..e1e423d25 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -11,8 +11,7 @@ match_on_ltcs <- function( data, year, - ltc_data = read_file(get_ltcs_path(year)) -) { + ltc_data = read_file(get_ltcs_path(year))) { # Match on LTC lookup matched <- dplyr::left_join( data, diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 1ef99ea0c..2b419aa81 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -390,8 +390,7 @@ join_cohort_lookups <- function( service_use_cohort = read_file( get_service_use_cohorts_path(year, update), col_select = c("chi", "service_use_cohort") - ) -) { + )) { join_cohort_lookups <- data %>% dplyr::left_join( demographic_cohort, From 0018d7d94d5b041e5e5fa3b6b56ecaf6833e5d21 Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 19 Jul 2023 11:54:26 +0000 Subject: [PATCH 5/9] Update documentation --- man/join_cohort_lookups.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 7581dd5b6..283123fad 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -17,7 +17,7 @@ join_cohort_lookups( \arguments{ \item{data}{The in-progress episode file data.} -\item{year}{financial year in '1718' format} +\item{year}{The year to process, in FY format.} \item{update}{The update to use} From dd1a983f9b68c15b9153d14ef8d335dd4755c488 Mon Sep 17 00:00:00 2001 From: Moohan Date: Tue, 1 Aug 2023 16:48:14 +0000 Subject: [PATCH 6/9] Style code --- R/create_individual_file.R | 3 ++- R/process_lookup_gpprac.R | 3 ++- R/read_lookup_sc_client.R | 5 ++--- R/read_lookup_sc_demographics.R | 3 +-- R/read_sc_all_alarms_telecare.R | 3 +-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index a5960595d..72045ea95 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -846,7 +846,8 @@ join_slf_lookup_vars <- function(individual_file, #' @param year financial year. #' @param sc_client SC client lookup #' @param sc_demographics SC Demographic lookup -join_sc_client <- function(individual_file, +join_sc_client <- function( + individual_file, year, sc_client = read_file(get_source_extract_path(year, "Client")), sc_demographics = read_file(get_sc_demog_lookup_path(), diff --git a/R/process_lookup_gpprac.R b/R/process_lookup_gpprac.R index e34b67f16..2afe1affd 100644 --- a/R/process_lookup_gpprac.R +++ b/R/process_lookup_gpprac.R @@ -12,7 +12,8 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_lookup_gpprac <- function(open_data = get_gpprac_opendata(), +process_lookup_gpprac <- function( + open_data = get_gpprac_opendata(), gpprac_ref_path = get_gpprac_ref_path(), spd_path = get_spd_path(), write_to_disk = TRUE) { diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index a370340a6..cc98060f3 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -9,9 +9,8 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -read_lookup_sc_client <- function( - fyyear, - sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { +read_lookup_sc_client <- function(fyyear, + sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { check_year_format(fyyear) year <- convert_fyyear_to_year(fyyear) diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index 831d2de75..fcdde5417 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -5,8 +5,7 @@ #' @return a [tibble][tibble::tibble-package] #' @export #' -read_lookup_sc_demographics <- function( - sc_connection = phs_db_connection(dsn = "DVPROD")) { +read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = "DVPROD")) { sc_demog <- dplyr::tbl( sc_connection, dbplyr::in_schema("social_care_2", "demographic_snapshot") diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index b4149d65d..ac3ac206d 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -6,8 +6,7 @@ #' #' @export #' -read_sc_all_alarms_telecare <- function( - sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { +read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { # Read in data--------------------------------------- ## read in data - social care 2 demographic From aaf73562d03958c0bd61518bbafd95c86537a4a7 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 26 Sep 2023 09:40:23 +0000 Subject: [PATCH 7/9] Update documentation --- NAMESPACE | 2 +- man/clean_up_ch.Rd | 2 -- man/correct_cij_vars.Rd | 2 +- man/create_cohort_lookups.Rd | 2 +- man/create_cost_inc_dna.Rd | 2 +- man/create_individual_file.Rd | 8 -------- man/fill_missing_cij_markers.Rd | 2 +- man/join_cohort_lookups.Rd | 4 ++-- man/load_ep_file_vars.Rd | 2 +- ...reate_episode_file.Rd => run_episode_file.Rd} | 16 ++++++++-------- man/store_ep_file_vars.Rd | 2 +- 11 files changed, 17 insertions(+), 27 deletions(-) rename man/{create_episode_file.Rd => run_episode_file.Rd} (80%) diff --git a/NAMESPACE b/NAMESPACE index d0323b8c4..678d7a53f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,6 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) -export(create_episode_file) export(create_individual_file) export(create_service_use_cohorts) export(end_fy) @@ -156,6 +155,7 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) +export(run_episode_file) export(setup_keyring) export(start_fy) export(start_fy_quarter) diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd index c0c61966d..8665d0027 100644 --- a/man/clean_up_ch.Rd +++ b/man/clean_up_ch.Rd @@ -8,8 +8,6 @@ clean_up_ch(episode_file, year) } \arguments{ \item{episode_file}{Tibble containing episodic data.} - -\item{year}{The year to process, in FY format.} } \description{ Clean up CH-related columns. diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd index 558514dc6..97a7f046f 100644 --- a/man/correct_cij_vars.Rd +++ b/man/correct_cij_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{correct_cij_vars} \alias{correct_cij_vars} \title{Correct the CIJ variables} diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd index 109869074..f0ad267aa 100644 --- a/man/create_cohort_lookups.Rd +++ b/man/create_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{create_cohort_lookups} \alias{create_cohort_lookups} \title{Create the cohort lookups} diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd index 47c38b176..69e7e37b5 100644 --- a/man/create_cost_inc_dna.Rd +++ b/man/create_cost_inc_dna.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{create_cost_inc_dna} \alias{create_cost_inc_dna} \title{Create cost total net inc DNA} diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index 4fd9a4a53..10b43efe1 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -15,16 +15,8 @@ create_individual_file( \arguments{ \item{episode_file}{Tibble containing episodic data.} -\item{year}{The year to process, in FY format.} - -\item{write_to_disk}{(optional) Should the data be written to disk default is -\code{TRUE} i.e. write the data to disk.} - \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input (instead of chi).} - -\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output -(instead of chi).} } \value{ The processed individual file diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd index 4795eed7a..03b64217e 100644 --- a/man/fill_missing_cij_markers.Rd +++ b/man/fill_missing_cij_markers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{fill_missing_cij_markers} \alias{fill_missing_cij_markers} \title{Fill any missing CIJ markers for records that should have them} diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 3ef549cc3..7581dd5b6 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{join_cohort_lookups} \alias{join_cohort_lookups} \title{Join cohort lookups} @@ -17,7 +17,7 @@ join_cohort_lookups( \arguments{ \item{data}{The in-progress episode file data.} -\item{year}{The year to process, in FY format.} +\item{year}{financial year in '1718' format} \item{update}{The update to use} diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd index 509b0e00c..cee9cc440 100644 --- a/man/load_ep_file_vars.Rd +++ b/man/load_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{load_ep_file_vars} \alias{load_ep_file_vars} \title{Load the unneeded episode file variables} diff --git a/man/create_episode_file.Rd b/man/run_episode_file.Rd similarity index 80% rename from man/create_episode_file.Rd rename to man/run_episode_file.Rd index 99f885127..424d24afa 100644 --- a/man/create_episode_file.Rd +++ b/man/run_episode_file.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R -\name{create_episode_file} -\alias{create_episode_file} -\title{Create the Source Episode file} +% Please edit documentation in R/run_episode_file.R +\name{run_episode_file} +\alias{run_episode_file} +\title{Produce the Source Episode file} \usage{ -create_episode_file( +run_episode_file( processed_data_list, year, dd_data = read_file(get_source_extract_path(year, "DD")), @@ -39,11 +39,11 @@ create_episode_file( \code{TRUE} i.e. write the data to disk.} \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output -(instead of chi).} +(instead of chi)} } \value{ -the Source Episode file as a \link[tibble:tibble-package]{tibble}. +a \link[tibble:tibble-package]{tibble} containing the episode file } \description{ -Create the Source Episode file +Produce the Source Episode file } diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd index 880266d58..06316aac1 100644 --- a/man/store_ep_file_vars.Rd +++ b/man/store_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_episode_file.R +% Please edit documentation in R/run_episode_file.R \name{store_ep_file_vars} \alias{store_ep_file_vars} \title{Store the unneeded episode file variables} From 3569e9fce41f6e59f2a80a335d2abccc05324529 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 26 Sep 2023 10:47:02 +0100 Subject: [PATCH 8/9] rename `run` to `create_episode_file` --- R/{run_episode_file.R => create_episode_file.R} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename R/{run_episode_file.R => create_episode_file.R} (99%) diff --git a/R/run_episode_file.R b/R/create_episode_file.R similarity index 99% rename from R/run_episode_file.R rename to R/create_episode_file.R index 36ef73312..3dc33e193 100644 --- a/R/run_episode_file.R +++ b/R/create_episode_file.R @@ -15,7 +15,7 @@ #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export -run_episode_file <- function( +create_episode_file <- function( processed_data_list, year, dd_data = read_file(get_source_extract_path(year, "DD")), @@ -154,7 +154,7 @@ run_episode_file <- function( #' Store the unneeded episode file variables #' #' @param data The in-progress episode file data. -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @param vars_to_keep a character vector of the variables to keep, all others #' will be stored. #' @@ -192,7 +192,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' Load the unneeded episode file variables #' -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @inheritParams store_ep_file_vars #' #' @return The full SLF data. From eeee2a3be0a8cdad58f5efd28d0bcddc63f02644 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 26 Sep 2023 10:48:28 +0100 Subject: [PATCH 9/9] Update documentation --- NAMESPACE | 2 +- man/clean_up_ch.Rd | 2 ++ man/correct_cij_vars.Rd | 2 +- man/create_cohort_lookups.Rd | 2 +- man/create_cost_inc_dna.Rd | 2 +- man/{run_episode_file.Rd => create_episode_file.Rd} | 8 ++++---- man/create_individual_file.Rd | 8 ++++++++ man/fill_missing_cij_markers.Rd | 2 +- man/join_cohort_lookups.Rd | 4 ++-- man/load_ep_file_vars.Rd | 2 +- man/store_ep_file_vars.Rd | 2 +- 11 files changed, 23 insertions(+), 13 deletions(-) rename man/{run_episode_file.Rd => create_episode_file.Rd} (90%) diff --git a/NAMESPACE b/NAMESPACE index 678d7a53f..d0323b8c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_episode_file) export(create_individual_file) export(create_service_use_cohorts) export(end_fy) @@ -155,7 +156,6 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) -export(run_episode_file) export(setup_keyring) export(start_fy) export(start_fy_quarter) diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd index 8665d0027..c0c61966d 100644 --- a/man/clean_up_ch.Rd +++ b/man/clean_up_ch.Rd @@ -8,6 +8,8 @@ clean_up_ch(episode_file, year) } \arguments{ \item{episode_file}{Tibble containing episodic data.} + +\item{year}{The year to process, in FY format.} } \description{ Clean up CH-related columns. diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd index 97a7f046f..558514dc6 100644 --- a/man/correct_cij_vars.Rd +++ b/man/correct_cij_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{correct_cij_vars} \alias{correct_cij_vars} \title{Correct the CIJ variables} diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd index f0ad267aa..109869074 100644 --- a/man/create_cohort_lookups.Rd +++ b/man/create_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cohort_lookups} \alias{create_cohort_lookups} \title{Create the cohort lookups} diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd index 69e7e37b5..47c38b176 100644 --- a/man/create_cost_inc_dna.Rd +++ b/man/create_cost_inc_dna.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cost_inc_dna} \alias{create_cost_inc_dna} \title{Create cost total net inc DNA} diff --git a/man/run_episode_file.Rd b/man/create_episode_file.Rd similarity index 90% rename from man/run_episode_file.Rd rename to man/create_episode_file.Rd index 424d24afa..c1ce0e063 100644 --- a/man/run_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R -\name{run_episode_file} -\alias{run_episode_file} +% Please edit documentation in R/create_episode_file.R +\name{create_episode_file} +\alias{create_episode_file} \title{Produce the Source Episode file} \usage{ -run_episode_file( +create_episode_file( processed_data_list, year, dd_data = read_file(get_source_extract_path(year, "DD")), diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index 10b43efe1..c4502e5ae 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -15,8 +15,16 @@ create_individual_file( \arguments{ \item{episode_file}{Tibble containing episodic data.} +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} + \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input (instead of chi).} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} } \value{ The processed individual file diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd index 03b64217e..4795eed7a 100644 --- a/man/fill_missing_cij_markers.Rd +++ b/man/fill_missing_cij_markers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{fill_missing_cij_markers} \alias{fill_missing_cij_markers} \title{Fill any missing CIJ markers for records that should have them} diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 7581dd5b6..3ef549cc3 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{join_cohort_lookups} \alias{join_cohort_lookups} \title{Join cohort lookups} @@ -17,7 +17,7 @@ join_cohort_lookups( \arguments{ \item{data}{The in-progress episode file data.} -\item{year}{financial year in '1718' format} +\item{year}{The year to process, in FY format.} \item{update}{The update to use} diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd index cee9cc440..509b0e00c 100644 --- a/man/load_ep_file_vars.Rd +++ b/man/load_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{load_ep_file_vars} \alias{load_ep_file_vars} \title{Load the unneeded episode file variables} diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd index 06316aac1..880266d58 100644 --- a/man/store_ep_file_vars.Rd +++ b/man/store_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{store_ep_file_vars} \alias{store_ep_file_vars} \title{Store the unneeded episode file variables}