diff --git a/.Rbuildignore b/.Rbuildignore index 168a3e006..2cab1bda6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,3 +22,4 @@ ^_targets\.R$ ^_targets\.yaml$ ^_SPSS_archived$ +^run_targets_ diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 7c038f875..c3f39305b 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -29,6 +29,7 @@ Classificat cls cmh CNWs +Comhairle codecov commhosp congen @@ -97,6 +98,7 @@ hjust hms homecare homev +hscdiip hscp hscpnames htmlwidgets @@ -115,6 +117,7 @@ keyring keytime keytimex kis +lazydt lgl lintr los @@ -122,6 +125,7 @@ ltc ltcs lubridate magrittr +Matern markdownguide Mcbride mcmahon @@ -148,6 +152,7 @@ outfile pandoc patflow pattype +PCEC phs phsmethods phsopendata @@ -177,6 +182,7 @@ reasonwait recid reflectoring refsource +renviron rlang rmarkdown roxygen @@ -215,6 +221,7 @@ submis tadm tarchetypes tbl +Telecare telecare testthat thom diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 060d818c5..babd1de81 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [master, main, main-R] + branches: [master, main, development] pull_request: - branches: [master, main, main-R] + branches: [master, main, development] name: R-CMD-check diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml index e057592b4..e962bdf44 100644 --- a/.github/workflows/lint-changed-files.yaml +++ b/.github/workflows/lint-changed-files.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: pull_request: - branches: [main-R, master, main] + branches: [master, main, development] name: lint-changed-files diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 612ecb81a..c47424169 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [master, main, main-R] + branches: [master, main, development] pull_request: - branches: [master, main, main-R] + branches: [master, main, development] name: test-coverage diff --git a/DESCRIPTION b/DESCRIPTION index a437b80cc..5123289dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,9 +50,11 @@ Imports: rmarkdown (>= 2.17), rstudioapi (>= 0.14), scales (>= 1.2.0), - slfhelper (>= 0.9.0), + slfhelper (>= 0.10.0), stringdist (>= 0.9.10), stringr (>= 1.5.0), + tarchetypes (>= 0.7.6), + targets (>= 1.2.0), tibble (>= 3.2.1), tidyr (>= 1.3.0), tidyselect (>= 1.2.0), @@ -61,8 +63,6 @@ Suggests: covr (>= 3.6.1), roxygen2 (>= 7.2.3), spelling (>= 2.2), - tarchetypes (>= 0.7.5), - targets (>= 0.14.3), testthat (>= 3.1.7) Remotes: Public-Health-Scotland/phsmethods, diff --git a/NAMESPACE b/NAMESPACE index 464cced34..c5dca28bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,9 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(add_homelessness_date_flags) +export(add_homelessness_flag) +export(add_hri_variables) export(add_nsu_cohort) export(check_year_format) export(clean_up_free_text) @@ -13,6 +16,8 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_episode_file) +export(create_homelessness_lookup) export(create_individual_file) export(create_service_use_cohorts) export(end_fy) @@ -29,6 +34,7 @@ export(get_demographic_cohorts_path) export(get_dev_dir) export(get_dn_costs_path) export(get_dn_raw_costs_path) +export(get_existing_data_for_tests) export(get_file_path) export(get_gp_ooh_costs_path) export(get_gp_ooh_raw_costs_path) @@ -49,6 +55,7 @@ export(get_practice_details_path) export(get_readcode_lookup_path) export(get_sc_at_episodes_path) export(get_sc_ch_episodes_path) +export(get_sc_client_lookup_path) export(get_sc_demog_lookup_path) export(get_sc_hc_episodes_path) export(get_sc_sds_episodes_path) @@ -59,9 +66,7 @@ export(get_slf_ch_name_lookup_path) export(get_slf_chi_deaths_path) export(get_slf_deaths_lookup_path) export(get_slf_dir) -export(get_slf_ep_temp_path) export(get_slf_gpprac_path) -export(get_slf_indiv_temp_path) export(get_slf_postcode_path) export(get_source_extract_path) export(get_sparra_path) @@ -132,8 +137,10 @@ export(process_tests_nrs_deaths) export(process_tests_outpatients) export(process_tests_prescribing) export(process_tests_sc_ch_episodes) +export(process_tests_sc_client_lookup) export(process_tests_sc_demographics) export(process_tests_sds) +export(produce_episode_file_tests) export(produce_source_extract_tests) export(produce_test_comparison) export(read_extract_acute) @@ -157,7 +164,7 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) -export(run_episode_file) +export(setup_keyring) export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) diff --git a/NEWS.md b/NEWS.md index cbcb62079..fdbb64c9c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,50 @@ -# March 2023 Update - Unreleased +# September 2023 Update - Unreleased +* Update of 2017/18 onwards to include bug fixes within the files. +* New 2023/24 files. + *No social care data available for new 2023/24 file. +* New NSU cohort for 2022/23 file. +* SPD and SIMD updated. +* Re addition of: + * HRIs in individual file. + * Homelessness Flags. +* Bug fixes: + * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. + * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which + brings the difference down to `3.3%`. + * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. + * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. +* Internal changes to SLF development: + * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. + * Tests updated to include `HSCP`count. + * Tests created for `Delayed Discharges` extract and `Social care Client lookup`. +# June 2023 Update - Released 24-Jul-2023 +* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet). +* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file. +* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update). +* Files changed into parquet format. +* SLFhelper updated. +* Removal of `keydate1_dateformat` and `keydate2_dateformat`. +* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code. +* Preventable beddays - not able to calculate these correctly. * Death fixes not included. +* Variables not ordered in R like they used to be in SPSS. +* End of HHG. +* New variable `ch_postcode`. +* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. +* HRI's not included. +* Homelessness flags not included. +* Keep_population flag not included. + + +# March 2023 Update - Released 10-Mar-2023 +* 2021/22 episode and individual files refreshed with updated activity. +* 2022/23 file updated and contains data up to the end of Q3. +* Social care data is available for 2022/23. +* Typo in the variable name `ooh_covid_assessment` +* Next update in May as a test run in R but won't be released. +* Next release in June. + # December 2022 Update - Released 07-Dec-2022 * Now using the 2022v2 Scottish Postcode Directory. * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`. diff --git a/R/00-update_refs.R b/R/00-update_refs.R index a462ffdba..aef1e0da4 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -7,7 +7,7 @@ #' #' @family initialisation latest_update <- function() { - "Jun_2023" + "Sep_2023" } #' Previous update @@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) { #' #' @family initialisation get_dd_period <- function() { - "Jul16_Mar23" + "Jul16_Jun23" } #' The latest financial year for Cost uplift setting @@ -74,5 +74,5 @@ get_dd_period <- function() { #' #' @family initialisation latest_cost_year <- function() { - "2223" + "2324" } diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R new file mode 100644 index 000000000..710324646 --- /dev/null +++ b/R/add_hri_variables.R @@ -0,0 +1,142 @@ +#' Flag non-Scottish residents +#' +#' @details The variable keep flag can be in the range c(0:4) where +#' \itemize{ +#' \item{keep_flag = 0 when resident is Scottish} +#' \item{keep_flag = 1 when resident is not Scottish} +#' \item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing} +#' \item{keep_flag = 3 when the gpprac is not English and the postcode is missing} +#' \item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy} +#' } +#' The intention is to only keep the records where keep_flag = 0 +#' +#' @inheritParams add_hri_variables +#' +#' @return A data frame with the variable 'keep_flag' +flag_non_scottish_residents <- function( + data, + slf_pc_lookup) { + check_variables_exist(data, c("postcode", "gpprac")) + + # Make a lookup of postcode areas, which consist of the first characters + # of the postcode + pc_areas <- slf_pc_lookup %>% + dplyr::mutate( + pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}"), + scot_flag = TRUE + ) %>% + dplyr::distinct(pc_area, scot_flag) + + # Create a flag, 'keep_flag', to determine whether individuals are Scottish + # residents or not + return_data <- data %>% + dplyr::mutate(pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}")) %>% + dplyr::left_join(pc_areas, by = "pc_area") %>% + dplyr::mutate( + dummy_postcode = .data$postcode %in% c("BF010AA", "NF1 1AB", "NK010AA") | + stringr::str_sub(.data$postcode, 1, 4) %in% c("ZZ01", "ZZ61"), + eng_prac = .data$gpprac %in% c(99942, 99957, 99961, 99976, 99981, 99995, 99999), + scottish_resident = dplyr::case_when( + .data$scot_flag ~ 0L, + (is_missing(.data$postcode) | .data$dummy_postcode) & is.na(.data$gpprac) ~ 2L, + !.data$eng_prac & is_missing(.data$postcode) ~ 3L, + !.data$eng_prac & .data$dummy_postcode ~ 4L, + .default = 1L + ) + ) %>% + dplyr::select(-"dummy_postcode", -"eng_prac") + + return(return_data) +} + +#' Add HRI variables to an SLF Individual File +#' +#' @details Filters the dataset to only include Scottish residents, then +#' creates a lookup where HRIs are calculated at Scotland, Health Board, and +#' LCA level. Then joins on this lookup by chi/anon_chi. +#' +#' @param data An SLF individual file. +#' @param slf_pc_lookup The Source postcode lookup, defaults +#' to [get_slf_postcode_path()] read using [read_file()]. +#' +#' @return The individual file with HRI variables matched on +#' @export +add_hri_variables <- function( + data, + chi_variable = "chi", + slf_pc_lookup = read_file( + get_slf_postcode_path(), + col_select = "postcode" + )) { + hri_lookup <- data %>% + dplyr::select( + "year", + chi_variable, + "postcode", + "gpprac", + "lca", + "hbrescode", + "health_net_cost", + "acute_episodes", + "mat_episodes", + "mh_episodes", + "gls_episodes", + "op_newcons_attendances", + # op_newcons_dnas, + "ae_attendances", + "pis_paid_items", + "ooh_cases" + ) %>% + flag_non_scottish_residents(slf_pc_lookup = slf_pc_lookup) %>% + dplyr::filter(scottish_resident == 0L) %>% + # Scotland cost and proportion + dplyr::mutate( + scotland_cost = sum(health_net_cost), + scotland_pct = (health_net_cost / scotland_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost)) %>% + dplyr::mutate(hri_scotp = cumsum(scotland_pct)) %>% + # Health Board + dplyr::group_by(hbrescode) %>% + dplyr::mutate( + hb_cost = sum(health_net_cost), + hb_pct = (health_net_cost / hb_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_hbp = cumsum(hb_pct)) %>% + dplyr::ungroup() %>% + # LCA + dplyr::group_by(lca) %>% + dplyr::mutate( + lca_cost = sum(health_net_cost), + lca_pct = (health_net_cost / lca_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_lcap = cumsum(lca_pct)) %>% + dplyr::ungroup() %>% + # Add HRI flags + dplyr::mutate( + hri_scot = hri_scotp <= 50.0, + hri_hb = hri_hbp <= 50.0, + hri_lca = hri_lcap <= 50.0, + # Deal with potential missing variables + hri_hb = dplyr::if_else(is_missing(hbrescode), FALSE, hri_hb), + hri_hbp = dplyr::if_else(is_missing(hbrescode), NA, hri_hbp), + hri_lca = dplyr::if_else(is_missing(lca), FALSE, hri_lca), + hri_lcap = dplyr::if_else(is_missing(lca), NA, hri_lcap) + ) %>% + # Select only required variables for the lookup + dplyr::select( + chi_variable, + "hri_scot", + "hri_scotp", + "hri_hb", + "hri_hbp", + "hri_lca", + "hri_lcap" + ) + + return_data <- dplyr::left_join(data, hri_lookup, by = chi_variable) + + return(return_data) +} diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index c5a26da12..00260bb8e 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -2,13 +2,17 @@ #' #' @param data The input data frame #' @param year The year being processed +#' @param nsu_cohort The NSU data for the year #' #' @return A data frame containing the Non-Service Users as additional rows #' @export #' #' @family episode file #' @seealso [get_nsu_path()] -add_nsu_cohort <- function(data, year) { +add_nsu_cohort <- function( + data, + year, + nsu_cohort = read_file(get_nsu_path(year))) { year_param <- year if (!check_year_valid(year, "NSU")) { @@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) { ) ) - matched <- dplyr::full_join(data, - # NSU cohort file - read_file(get_nsu_path(year)) %>% + matched <- dplyr::full_join( + data, + nsu_cohort %>% dplyr::mutate( dob = as.Date(.data[["dob"]]), gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) @@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) { .data[["chi"]] ) ) %>% - # Remove the additional columns dplyr::select(-dplyr::contains("_nsu"), -"has_chi") return(return_df) diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R index a6e9a175d..d0d0c4395 100644 --- a/R/add_ppa_flag.R +++ b/R/add_ppa_flag.R @@ -25,11 +25,11 @@ add_ppa_flag <- function(data) { ) ) - if (!(any(data$recid %in% c("01B", "02B", "04B", "GLS")))) { - nrecids <- length(unique(data$recid)) + unique_recids <- unique(data[["recid"]]) + if (!(any(unique_recids %in% c("01B", "02B", "04B", "GLS")))) { cli::cli_abort( - "None of the {nrecids} recid{?s} provided will relate to PPAs, - and the function will abort." + "None of the {length(unique_recids)} recid{?s} provided will relate + to PPAs, and the function will abort." ) } diff --git a/R/add_smr_type.R b/R/add_smr_type.R index 180ea32c3..aa9e383bc 100644 --- a/R/add_smr_type.R +++ b/R/add_smr_type.R @@ -20,7 +20,7 @@ add_smr_type <- function(recid, # variable. Need to make sure to change all places where it is used as well. # Situation where some recids are not in the accepted values - if (any(!(recid %in% c( + if (!all(recid %in% c( "00B", "01B", "02B", @@ -35,9 +35,7 @@ add_smr_type <- function(recid, "NRS", "OoH", "PIS" - ) - )) & - !anyNA(recid)) { + )) && !anyNA(recid)) { cli::cli_warn(c("i" = "One or more values of {.var recid} do not have an assignable {.var smrtype}")) } @@ -51,7 +49,7 @@ add_smr_type <- function(recid, } # Situation where maternity records are present without a corresponding mpat - if (all(recid == "02B") & anyNA(mpat)) { + if (all(recid == "02B") && anyNA(mpat)) { cli::cli_abort( "In Maternity records, {.var mpat} is required to assign an smrtype, and there are some {.val NA} values. Please check the data." @@ -59,7 +57,7 @@ add_smr_type <- function(recid, } # Situation where acute records are present without a corresponding ipdc - if (all(recid %in% c("01B", "GLS")) & anyNA(ipdc)) { + if (all(recid %in% c("01B", "GLS")) && anyNA(ipdc)) { if (all(is.na(ipdc))) { cli::cli_abort( "In Acute records, {.var ipdc} is required to assign an smrtype, but @@ -72,19 +70,21 @@ add_smr_type <- function(recid, ) } - # Situation where Home Care records are present without a corresponding hc_service - if (all(recid == "HC") & anyNA(hc_service)) { + # Situation where Home Care records are present without + # a corresponding hc_service + if (all(recid == "HC") && anyNA(hc_service)) { cli::cli_abort( "In Home Care records, {.var hc_service} is required to assign an smrtype, - and there are some {.val NA} values. Please check the data." + and there are some {.val NA} values. Please check the data." ) } - # Situation where Homelessness records are present without a corresponding main_applicant_flag - if (all(recid == "HL1") & anyNA(main_applicant_flag)) { + # Situation where Homelessness records are present without a + # corresponding main_applicant_flag + if (all(recid == "HL1") && anyNA(main_applicant_flag)) { cli::cli_abort( - "In Homelessness records, {.var main_applicant_flag} is required to assign an smrtype, - and there are some {.val NA} values. Please check the data." + "In Homelessness records, {.var main_applicant_flag} is required to assign + an smrtype, and there are some {.val NA} values. Please check the data." ) } @@ -92,12 +92,12 @@ add_smr_type <- function(recid, if (all(is.na(recid))) { cli::cli_abort( "Cannot assign {.var smrtype} when all {.var recid} are {.val NA}, - please check the data" + please check the data" ) } # Situation where a maternity recid is given but no mpat marker - if (all(recid == "02B") & missing(mpat)) { + if (all(recid == "02B") && missing(mpat)) { cli::cli_abort( "An {.var mpat} vector has not been supplied, and therefore Maternity records cannot be given an {.var smrtype}" @@ -105,7 +105,7 @@ add_smr_type <- function(recid, } # Situation where an Acute/GLS recid is given but no ipdc marker - if (any(recid %in% c("01B", "GLS")) & missing(ipdc)) { + if (any(recid %in% c("01B", "GLS")) && missing(ipdc)) { cli::cli_abort( "An {.var ipdc} vector has not been supplied, and therefore Acute/GLS records cannot be given an {.var smrtype}" @@ -113,15 +113,16 @@ add_smr_type <- function(recid, } # Situation where a Home Care recid is given but no hc_service marker - if (any(recid == "HC") & missing(hc_service)) { + if (any(recid == "HC") && missing(hc_service)) { cli::cli_abort( - "An {.var hc_service} vector has not been supplied, and therefore Home Care - records cannot be given an {.var smrtype}" + "An {.var hc_service} vector has not been supplied, and therefore + Home Care records cannot be given an {.var smrtype}" ) } - # Situation where a Homelessness recid is given but no main_applicant_flag marker - if (any(recid == "HL1") & missing(main_applicant_flag)) { + # Situation where a Homelessness recid is given + # but no main_applicant_flag marker + if (any(recid == "HL1") && missing(main_applicant_flag)) { cli::cli_abort( "A {.var main_applicant_flag} vector has not been supplied, and therefore Homelessness records cannot be given an {.var smrtype}" @@ -158,28 +159,30 @@ add_smr_type <- function(recid, recid == "HL1" & main_applicant_flag == "N" ~ "HL1-Other" ) } else if (all(recid == "OoH")) { - smrtype <- dplyr::case_when( - consultation_type == "DISTRICT NURSE" ~ "OOH-DN", - consultation_type == "DOCTOR ADVICE/NURSE ADVICE" ~ "OOH-Advice", - consultation_type == "HOME VISIT" ~ "OOH-HomeV", - consultation_type == "NHS 24 NURSE ADVICE" ~ "OOH-NHS24", - consultation_type == "PCEC/PCC" ~ "OOH-PCC", - consultation_type == "COVID19 ASSESSMENT" ~ "OOH-C19Ass", - consultation_type == "COVID19 ADVICE" ~ "OOH-C19Adv", - consultation_type == "COVID19 OTHER" ~ "OOH-C19Oth", + smrtype <- dplyr::case_match( + consultation_type, + "DISTRICT NURSE" ~ "OOH-DN", + "DOCTOR ADVICE/NURSE ADVICE" ~ "OOH-Advice", + "HOME VISIT" ~ "OOH-HomeV", + "NHS 24 NURSE ADVICE" ~ "OOH-NHS24", + "PCEC/PCC" ~ "OOH-PCC", + "COVID19 ASSESSMENT" ~ "OOH-C19Ass", + "COVID19 ADVICE" ~ "OOH-C19Adv", + "COVID19 OTHER" ~ "OOH-C19Oth", .default = "OOH-Other" ) } else { # Recids that can be recoded with no identifier - smrtype <- dplyr::case_when( - recid == "00B" ~ "Outpatient", - recid == "04B" ~ "Psych-IP", - recid == "AE2" ~ "A & E", - recid == "CH" ~ "Care-Home", - recid == "CMH" ~ "Comm-MH", - recid == "DN" ~ "DN", - recid == "NRS" ~ "NRS Deaths", - recid == "PIS" ~ "PIS" + smrtype <- dplyr::case_match( + recid, + "00B" ~ "Outpatient", + "04B" ~ "Psych-IP", + "AE2" ~ "A & E", + "CH" ~ "Care-Home", + "CMH" ~ "Comm-MH", + "DN" ~ "DN", + "NRS" ~ "NRS Deaths", + "PIS" ~ "PIS" ) } diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi.R similarity index 65% rename from R/aggregate_by_chi_zihao.R rename to R/aggregate_by_chi.R index 7d9ce5ed3..8d9dff96d 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi.R @@ -7,7 +7,7 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi_zihao <- function(episode_file) { +aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") # Convert to data.table @@ -28,17 +28,33 @@ aggregate_by_chi_zihao <- function(episode_file) { ) ) - data.table::setnames( - episode_file, - c( - "ch_chi_cis", "cij_marker", "ooh_case_id" - # ,"hh_in_fy" - ), - c( - "ch_cis_episodes", "cij_total", "ooh_cases" - # ,"hl1_in_fy" + if (exclude_sc_var) { + data.table::setnames( + episode_file, + c( + "cij_marker", + "ooh_case_id" + ), + c( + "cij_total", + "ooh_cases" + ) ) - ) + } else { + data.table::setnames( + episode_file, + c( + "ch_chi_cis", + "cij_marker", + "ooh_case_id" + ), + c( + "ch_cis_episodes", + "cij_total", + "ooh_cases" + ) + ) + } # column specification, grouped by chi # columns to select last @@ -48,6 +64,9 @@ aggregate_by_chi_zihao <- function(episode_file) { "gpprac", vars_start_with(episode_file, "sc_") ) + if (exclude_sc_var) { + cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))] + } # columns to count unique rows cols3 <- c( "ch_cis_episodes", @@ -59,6 +78,9 @@ aggregate_by_chi_zihao <- function(episode_file) { "ooh_cases", "preventable_admissions" ) + if (exclude_sc_var) { + cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")] + } # columns to sum up cols4 <- c( vars_end_with( @@ -90,9 +112,25 @@ aggregate_by_chi_zihao <- function(episode_file) { ), "health_net_cost_inc_dnas" ) - cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] + cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] + if (exclude_sc_var) { + cols4 <- + cols4[!(cols4 %in% c( + vars_end_with( + episode_file, + c( + "alarms", + "telecare" + ) + ), + vars_start_with( + episode_file, + "sds_option" + ) + ))] + } # columns to select maximum - cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy"))) + cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy")) data.table::setnafill(episode_file, fill = 0L, cols = cols5) # compute individual_file_cols1 <- episode_file[, @@ -126,9 +164,9 @@ aggregate_by_chi_zihao <- function(episode_file) { individual_file_cols6 <- episode_file[, .( preventable_beddays = ifelse( - max(cij_ppa, na.rm = TRUE), - max(cij_end_date) - min(cij_start_date), - NA_real_ + any(cij_ppa, na.rm = TRUE), + as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), + NA_integer_ ) ), # cij_marker has been renamed as cij_total @@ -155,8 +193,13 @@ aggregate_by_chi_zihao <- function(episode_file) { } -#' select columns ending with some patterns -#' @describeIn select columns based on patterns +#' Select columns according to a pattern +#' +#' @describeIn vars_select Choose variables ending in a given pattern. +#' +#' @param data The data from which to select columns/variables. +#' @param vars The variables / pattern to find, as a character vector +#' @param ignore_case Should case be ignored (Default: FALSE) vars_end_with <- function(data, vars, ignore_case = FALSE) { names(data)[stringr::str_ends( names(data), @@ -166,8 +209,7 @@ vars_end_with <- function(data, vars, ignore_case = FALSE) { )] } -#' select columns starting with some patterns -#' @describeIn select columns based on patterns +#' @describeIn vars_select Choose variables starting with a given pattern. vars_start_with <- function(data, vars, ignore_case = FALSE) { names(data)[stringr::str_starts( names(data), @@ -177,35 +219,41 @@ vars_start_with <- function(data, vars, ignore_case = FALSE) { )] } -#' select columns contains some characters -#' @describeIn select columns based on patterns +#' @describeIn vars_select Choose variables which contain a given pattern. vars_contain <- function(data, vars, ignore_case = FALSE) { - names(data)[stringr::str_detect( + stringr::str_subset( names(data), stringr::regex(paste(vars, collapse = "|"), ignore_case = ignore_case ) - )] + ) } -#' Aggregate CIS episodes +#' Aggregate Care Home episodes to ch_cis #' #' @description Aggregate CH variables by CHI and CIS. #' #' @inheritParams create_individual_file -aggregate_ch_episodes_zihao <- function(episode_file) { +aggregate_ch_episodes <- function(episode_file) { cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") # Convert to data.table data.table::setDT(episode_file) # Perform grouping and aggregation - episode_file <- episode_file[, `:=`( - ch_no_cost = max(ch_no_cost), - ch_ep_start = min(record_keydate1), - ch_ep_end = max(ch_ep_end), - ch_cost_per_day = mean(ch_cost_per_day) - ), by = c("chi", "ch_chi_cis")] + episode_file[, c( + "ch_no_cost", + "ch_ep_start", + "ch_ep_end", + "ch_cost_per_day" + ) := list( + max(ch_no_cost), + min(record_keydate1), + max(ch_ep_end), + mean(ch_cost_per_day) + ), + by = c("chi", "ch_chi_cis") + ] # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) diff --git a/R/calculate_measures.R b/R/calculate_measures.R index 4f23c1f6d..a8b7510b8 100644 --- a/R/calculate_measures.R +++ b/R/calculate_measures.R @@ -27,7 +27,7 @@ calculate_measures <- function( measure <- match.arg(measure) if (!is.null(group_by)) { - group_by <- match.arg(group_by, c("recid")) + group_by <- match.arg(group_by, "recid") if (group_by == "recid") { data <- data %>% diff --git a/R/check_variables_exist.R b/R/check_variables_exist.R index 6effdffd2..82bccaf4f 100644 --- a/R/check_variables_exist.R +++ b/R/check_variables_exist.R @@ -24,10 +24,8 @@ check_variables_exist <- function(data, variables) { } else { missing_variables <- variables[which(!variables_present)] - n_missing <- length(missing_variables) - cli::cli_abort( - "{cli::qty(n_missing)}Variable{?s} {.val {missing_variables}} {?is/are} + "{cli::qty(length(missing_variables))}Variable{?s} {.val {missing_variables}} {?is/are} required, but {?is/are} missing from {.arg data}." ) } diff --git a/R/check_year_format.R b/R/check_year_format.R index 8fcb29aab..2fa2dedfc 100644 --- a/R/check_year_format.R +++ b/R/check_year_format.R @@ -28,8 +28,8 @@ check_year_format <- function(year, format = "fyyear") { format <- match.arg(arg = format, choices = c("fyyear", "alternate")) - first_part <- as.integer(substr(year, 1L, 2L)) - second_part <- as.integer(substr(year, 3L, 4L)) + first_part <- as.integer(stringr::str_sub(year, 1L, 2L)) + second_part <- as.integer(stringr::str_sub(year, 3L, 4L)) if (format == "fyyear") { if (any(first_part + 1L != second_part)) { @@ -40,7 +40,7 @@ check_year_format <- function(year, format = "fyyear") { )) } } else if (format == "alternate") { - if (any(!(first_part %in% 18L:20L))) { + if (!all(first_part %in% 18L:20L)) { cli::cli_abort(c( "The {.var year} has been entered in the wrong format.", "Try again using the alternate form, e.g. {.val 2017}", @@ -51,9 +51,11 @@ check_year_format <- function(year, format = "fyyear") { count_bad_values <- sum(possible_bad_values) cli::cli_warn(c( - "{cli::qty(count_bad_values)}{?A/Some} {.var year} value{?s} ha{?s/ve} likely been entered in the wrong format.", + "{cli::qty(count_bad_values)}{?A/Some} {.var year} value{?s} ha{?s/ve} + likely been entered in the wrong format.", "i" = "{.val {year[possible_bad_values]}}", - "You might want to check and try again using the alternate form, e.g. {.val 2017}", + "You might want to check and try again using the alternate form, + e.g. {.val 2017}", "Or use the function {.fun convert_fyyear_to_year}." )) } diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 9f496dc05..5491709f0 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -38,14 +38,16 @@ check_year_valid <- function( return(FALSE) } else if (year <= "1617" && type %in% c("CH", "HC", "SDS", "AT")) { return(FALSE) - } else if (year <= "1718" && type %in% c("HHG")) { + } else if (year <= "1718" && type %in% "HHG") { return(FALSE) } else if (year >= "2122" && type %in% c("CMH", "DN")) { return(FALSE) - } else if (year >= "2223" && type %in% c("NSU")) { + } else if (year >= "2324" && type %in% "NSU") { return(FALSE) } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) { return(FALSE) + } else if (year >= "2324" && type %in% c("CH", "HC", "SDS", "AT")) { + return(FALSE) } return(TRUE) diff --git a/R/clean_up_free_text.R b/R/clean_up_free_text.R index fb9e6ae51..d74a2fa80 100644 --- a/R/clean_up_free_text.R +++ b/R/clean_up_free_text.R @@ -14,9 +14,10 @@ #' @export #' @examples #' clean_up_free_text("hiwSDS SD. h") -clean_up_free_text <- function(string, - case_to = c("upper", "lower", "sentence", "title", "none"), - remove_punct = TRUE) { +clean_up_free_text <- function( + string, + case_to = c("upper", "lower", "sentence", "title", "none"), + remove_punct = TRUE) { if (missing(case_to)) case_to <- "title" case_to <- match.arg(case_to) diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R index 4db1632d0..0e2483cf7 100644 --- a/R/compute_mid_year_age.R +++ b/R/compute_mid_year_age.R @@ -1,6 +1,7 @@ #' Compute Age at Midpoint of Year #' -#' @description Compute the age of a client at the midpoint of the year - 30-09-YYYY +#' @description Compute the age of a client at the midpoint of the year - +#' 30-09-YYYY #' #' @param fyyear current financial year #' @param dob date of birth of the clients @@ -19,7 +20,7 @@ compute_mid_year_age <- function(fyyear, dob) { age_intervals <- lubridate::interval( start = dob, - end = as.Date(midpoint_fy(fyyear)) + end = midpoint_fy(fyyear) ) ages <- lubridate::as.period(age_intervals)$year diff --git a/R/convert_ca_to_lca.R b/R/convert_ca_to_lca.R index b1537ef11..518d7e8fb 100644 --- a/R/convert_ca_to_lca.R +++ b/R/convert_ca_to_lca.R @@ -14,39 +14,40 @@ #' @family code functions #' @seealso convert_sending_location_to_lca convert_ca_to_lca <- function(ca_var) { - lca <- dplyr::case_when( - ca_var == "S12000033" | ca_var == "Aberdeen City" ~ "01", - ca_var == "S12000034" | ca_var == "Aberdeenshire" ~ "02", - ca_var == "S12000041" | ca_var == "Angus" ~ "03", - ca_var == "S12000035" | ca_var == "Argyll & Bute" ~ "04", - ca_var == "S12000026" | ca_var == "Scottish Borders" ~ "05", - ca_var == "S12000005" | ca_var == "Clackmannanshire" ~ "06", - ca_var == "S12000039" | ca_var == "West Dunbartonshire" ~ "07", - ca_var == "S12000006" | ca_var == "Dumfries and Galloway" ~ "08", - ca_var == "S12000042" | ca_var == "Dundee City" ~ "09", - ca_var == "S12000008" | ca_var == "East Ayrshire" ~ "10", - ca_var == "S12000045" | ca_var == "East Dunbartonshire" ~ "11", - ca_var == "S12000010" | ca_var == "East Lothian" ~ "12", - ca_var == "S12000011" | ca_var == "East Renfrewshire" ~ "13", - ca_var == "S12000036" | ca_var == "City of Edinburgh" ~ "14", - ca_var == "S12000014" | ca_var == "Falkirk" ~ "15", - ca_var %in% c("S12000015", "S12000047") | ca_var == "Fife" ~ "16", - ca_var %in% c("S12000046", "S12000049") | ca_var == "Glasgow City" ~ "17", - ca_var == "S12000017" | ca_var == "Highland" ~ "18", - ca_var == "S12000018" | ca_var == "Inverclyde" ~ "19", - ca_var == "S12000019" | ca_var == "Midlothian" ~ "20", - ca_var == "S12000020" | ca_var == "Moray" ~ "21", - ca_var == "S12000021" | ca_var == "North Ayrshire" ~ "22", - ca_var %in% c("S12000044", "S12000050") | ca_var == "North Lanarkshire" ~ "23", - ca_var == "S12000023" | ca_var == "Orkney" ~ "24", - ca_var %in% c("S12000024", "S12000048") | ca_var == "Perth and Kinross" ~ "25", - ca_var == "S12000038" | ca_var == "Renfrewshire" ~ "26", - ca_var == "S12000027" | ca_var == "Shetland Islands" ~ "27", - ca_var == "S12000028" | ca_var == "South Ayrshire" ~ "28", - ca_var == "S12000029" | ca_var == "South Lanarkshire" ~ "29", - ca_var == "S12000030" | ca_var == "Stirling" ~ "30", - ca_var == "S12000040" | ca_var == "West Lothian" ~ "31", - ca_var == "S12000013" | ca_var == "Na h-Eileanan Siar" | ca_var == "Comhairle nan Eilean Siar" ~ "32" + lca <- dplyr::case_match( + ca_var, + c("S12000033", "Aberdeen City") ~ "01", + c("S12000034", "Aberdeenshire") ~ "02", + c("S12000041", "Angus") ~ "03", + c("S12000035", "Argyll & Bute") ~ "04", + c("S12000026", "Scottish Borders") ~ "05", + c("S12000005", "Clackmannanshire") ~ "06", + c("S12000039", "West Dunbartonshire") ~ "07", + c("S12000006", "Dumfries and Galloway") ~ "08", + c("S12000042", "Dundee City") ~ "09", + c("S12000008", "East Ayrshire") ~ "10", + c("S12000045", "East Dunbartonshire") ~ "11", + c("S12000010", "East Lothian") ~ "12", + c("S12000011", "East Renfrewshire") ~ "13", + c("S12000036", "City of Edinburgh") ~ "14", + c("S12000014", "Falkirk") ~ "15", + c("S12000015", "S12000047", "Fife") ~ "16", + c("S12000046", "S12000049", "Glasgow City") ~ "17", + c("S12000017", "Highland") ~ "18", + c("S12000018", "Inverclyde") ~ "19", + c("S12000019", "Midlothian") ~ "20", + c("S12000020", "Moray") ~ "21", + c("S12000021", "North Ayrshire") ~ "22", + c("S12000044", "S12000050", "North Lanarkshire") ~ "23", + c("S12000023", "Orkney") ~ "24", + c("S12000024", "S12000048", "Perth and Kinross") ~ "25", + c("S12000038", "Renfrewshire") ~ "26", + c("S12000027", "Shetland Islands") ~ "27", + c("S12000028", "South Ayrshire") ~ "28", + c("S12000029", "South Lanarkshire") ~ "29", + c("S12000030", "Stirling") ~ "30", + c("S12000040", "West Lothian") ~ "31", + c("S12000013", "Na h-Eileanan Siar", "Comhairle nan Eilean Siar") ~ "32" ) return(lca) } diff --git a/R/convert_codes_to_name.R b/R/convert_codes_to_name.R index 4d6fd6b67..2b44109fd 100644 --- a/R/convert_codes_to_name.R +++ b/R/convert_codes_to_name.R @@ -14,38 +14,39 @@ #' #' @family code functions convert_hscp_to_hscpnames <- function(hscp) { - hscpnames <- dplyr::case_when( - hscp == "S37000001" ~ "Aberdeen City", - hscp == "S37000002" ~ "Aberdeenshire", - hscp == "S37000003" ~ "Angus", - hscp == "S37000004" ~ "Argyll and Bute", - hscp == "S37000005" ~ "Clackmannanshire and Stirling", - hscp == "S37000006" ~ "Dumfries and Galloway", - hscp == "S37000007" ~ "Dundee City", - hscp == "S37000008" ~ "East Ayrshire", - hscp == "S37000009" ~ "East Dunbartonshire", - hscp == "S37000010" ~ "East Lothian", - hscp == "S37000011" ~ "East Renfrewshire", - hscp == "S37000012" ~ "Edinburgh", - hscp == "S37000013" ~ "Falkirk", - hscp == "S37000016" ~ "Highland", - hscp == "S37000017" ~ "Inverclyde", - hscp == "S37000018" ~ "Midlothian", - hscp == "S37000019" ~ "Moray", - hscp == "S37000020" ~ "North Ayrshire", - hscp == "S37000022" ~ "Orkney Islands", - hscp == "S37000024" ~ "Renfrewshire", - hscp == "S37000025" ~ "Scottish Borders", - hscp == "S37000026" ~ "Shetland Islands", - hscp == "S37000027" ~ "South Ayrshire", - hscp == "S37000028" ~ "South Lanarkshire", - hscp == "S37000029" ~ "West Dunbartonshire", - hscp == "S37000030" ~ "West Lothian", - hscp == "S37000031" ~ "Western Isles", - hscp == "S37000032" ~ "Fife", - hscp == "S37000033" ~ "Perth and Kinross", - hscp == "S37000034" ~ "Glasgow City", - hscp == "S37000035" ~ "North Lanarkshire" + hscpnames <- dplyr::case_match( + hscp, + "S37000001" ~ "Aberdeen City", + "S37000002" ~ "Aberdeenshire", + "S37000003" ~ "Angus", + "S37000004" ~ "Argyll and Bute", + "S37000005" ~ "Clackmannanshire and Stirling", + "S37000006" ~ "Dumfries and Galloway", + "S37000007" ~ "Dundee City", + "S37000008" ~ "East Ayrshire", + "S37000009" ~ "East Dunbartonshire", + "S37000010" ~ "East Lothian", + "S37000011" ~ "East Renfrewshire", + "S37000012" ~ "Edinburgh", + "S37000013" ~ "Falkirk", + "S37000016" ~ "Highland", + "S37000017" ~ "Inverclyde", + "S37000018" ~ "Midlothian", + "S37000019" ~ "Moray", + "S37000020" ~ "North Ayrshire", + "S37000022" ~ "Orkney Islands", + "S37000024" ~ "Renfrewshire", + "S37000025" ~ "Scottish Borders", + "S37000026" ~ "Shetland Islands", + "S37000027" ~ "South Ayrshire", + "S37000028" ~ "South Lanarkshire", + "S37000029" ~ "West Dunbartonshire", + "S37000030" ~ "West Lothian", + "S37000031" ~ "Western Isles", + "S37000032" ~ "Fife", + "S37000033" ~ "Perth and Kinross", + "S37000034" ~ "Glasgow City", + "S37000035" ~ "North Lanarkshire" ) return(hscpnames) } @@ -66,22 +67,22 @@ convert_hscp_to_hscpnames <- function(hscp) { #' #' @family code functions convert_hb_to_hbnames <- function(hb) { - hbnames <- dplyr::case_when( - hb == "S08000015" ~ "Ayrshire and Arran", - hb == "S08000016" ~ "Borders", - hb == "S08000017" ~ "Dumfries and Galloway", - hb == "S08000019" ~ "Forth Valley", - hb == "S08000020" ~ "Grampian", - hb == "S08000022" ~ "Highland", - hb == "S08000024" ~ "Lothian", - hb == "S08000025" ~ "Orkney", - hb == "S08000026" ~ "Shetland", - hb == "S08000028" ~ "Western Isles", - hb == "S08000029" ~ "Fife", - hb == "S08000030" ~ "Tayside", - hb == "S08000031" ~ "Greater Glasgow and Clyde", - hb == "S08000032" ~ "Lanarkshire" + hbnames <- dplyr::case_match( + hb, + "S08000015" ~ "Ayrshire and Arran", + "S08000016" ~ "Borders", + "S08000017" ~ "Dumfries and Galloway", + "S08000019" ~ "Forth Valley", + "S08000020" ~ "Grampian", + "S08000022" ~ "Highland", + "S08000024" ~ "Lothian", + "S08000025" ~ "Orkney", + "S08000026" ~ "Shetland", + "S08000028" ~ "Western Isles", + "S08000029" ~ "Fife", + "S08000030" ~ "Tayside", + "S08000031" ~ "Greater Glasgow and Clyde", + "S08000032" ~ "Lanarkshire" ) - return(hbnames) } diff --git a/R/convert_date_types.R b/R/convert_date_types.R index a008b73f4..4402753a3 100644 --- a/R/convert_date_types.R +++ b/R/convert_date_types.R @@ -29,5 +29,9 @@ convert_date_to_numeric <- function(date) { #' #' @family date functions convert_numeric_to_date <- function(numeric_date) { - as.Date(lubridate::fast_strptime(as.character(numeric_date), "%Y%m%d", tz = "UTC")) + as.Date(lubridate::fast_strptime( + x = as.character(numeric_date), + format = "%Y%m%d", + tz = "UTC" + )) } diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R index 21d14b676..ff7e51db1 100644 --- a/R/convert_sending_location_to_lca.R +++ b/R/convert_sending_location_to_lca.R @@ -9,46 +9,49 @@ #' @export #' #' @examples -#' sending_location <- c("100", "120") +#' sending_location <- c(100, 120) #' convert_sending_location_to_lca(sending_location) #' #' @family code functions #' #' @seealso convert_ca_to_lca convert_sending_location_to_lca <- function(sending_location) { - lca <- dplyr::case_when( - sending_location == "100" ~ "01", # Aberdeen City - sending_location == "110" ~ "02", # Aberdeenshire - sending_location == "120" ~ "03", # Angus - sending_location == "130" ~ "04", # Argyll and Bute - sending_location == "355" ~ "05", # Scottish Borders - sending_location == "150" ~ "06", # Clackmannanshire - sending_location == "395" ~ "07", # West Dumbartonshire - sending_location == "170" ~ "08", # Dumfries and Galloway - sending_location == "180" ~ "09", # Dundee City - sending_location == "190" ~ "10", # East Ayrshire - sending_location == "200" ~ "11", # East Dunbartonshire - sending_location == "210" ~ "12", # East Lothian - sending_location == "220" ~ "13", # East Renfrewshire - sending_location == "230" ~ "14", # City of Edinburgh - sending_location == "240" ~ "15", # Falkirk - sending_location == "250" ~ "16", # Fife - sending_location == "260" ~ "17", # Glasgow City - sending_location == "270" ~ "18", # Highland - sending_location == "280" ~ "19", # Inverclyde - sending_location == "290" ~ "20", # Midlothian - sending_location == "300" ~ "21", # Moray - sending_location == "310" ~ "22", # North Ayrshire - sending_location == "320" ~ "23", # North Lanarkshire - sending_location == "330" ~ "24", # Orkney Islands - sending_location == "340" ~ "25", # Perth and Kinross - sending_location == "350" ~ "26", # Renfrewshire - sending_location == "360" ~ "27", # Shetland Islands - sending_location == "370" ~ "28", # South Ayrshire - sending_location == "380" ~ "29", # South Lanarkshire - sending_location == "390" ~ "30", # Stirling - sending_location == "400" ~ "31", # West Lothian - sending_location == "235" ~ "32" # Na_h_Eileanan_Siar + lca <- dplyr::case_match( + sending_location, + 100L ~ "01", # Aberdeen City + 110L ~ "02", # Aberdeenshire + 120L ~ "03", # Angus + 130L ~ "04", # Argyll and Bute + 355L ~ "05", # Scottish Borders + 150L ~ "06", # Clackmannanshire + 395L ~ "07", # West Dunbartonshire + 170L ~ "08", # Dumfries and Galloway + 180L ~ "09", # Dundee City + 190L ~ "10", # East Ayrshire + 200L ~ "11", # East Dunbartonshire + 210L ~ "12", # East Lothian + 220L ~ "13", # East Renfrewshire + 230L ~ "14", # City of Edinburgh + 240L ~ "15", # Falkirk + 250L ~ "16", # Fife + 260L ~ "17", # Glasgow City + 270L ~ "18", # Highland + 280L ~ "19", # Inverclyde + 290L ~ "20", # Midlothian + 300L ~ "21", # Moray + 310L ~ "22", # North Ayrshire + 320L ~ "23", # North Lanarkshire + 330L ~ "24", # Orkney Islands + 340L ~ "25", # Perth and Kinross + 350L ~ "26", # Renfrewshire + 360L ~ "27", # Shetland Islands + 370L ~ "28", # South Ayrshire + 380L ~ "29", # South Lanarkshire + 390L ~ "30", # Stirling + 400L ~ "31", # West Lothian + 235L ~ "32", # Na_h_Eileanan_Siar + .default = NA_character_ ) + return(lca) } diff --git a/R/convert_year_types.R b/R/convert_year_types.R index 1ba904e3d..8b9b04265 100644 --- a/R/convert_year_types.R +++ b/R/convert_year_types.R @@ -16,7 +16,7 @@ convert_fyyear_to_year <- function(fyyear) { fyyear <- check_year_format(year = fyyear, format = "fyyear") - year <- paste0("20", substr(fyyear, 1L, 2L)) + year <- paste0("20", stringr::str_sub(fyyear, 1L, 2L)) return(year) } @@ -39,8 +39,8 @@ convert_fyyear_to_year <- function(fyyear) { convert_year_to_fyyear <- function(year) { year <- check_year_format(year = year, format = "alternate") - first_part <- substr(year, 1L, 2L) - second_part <- substr(year, 3L, 4L) + first_part <- stringr::str_sub(year, 1L, 2L) + second_part <- stringr::str_sub(year, 3L, 4L) fyyear <- dplyr::if_else( @@ -53,7 +53,8 @@ convert_year_to_fyyear <- function(year) { non_21c <- which(first_part != "20") cli::cli_warn(c( - "i" = "{cli::qty(length(non_21c))}{?A/Some} value{?s} w{?as/ere} not in the 21st century i.e. not {.val 20xx}", + "i" = "{cli::qty(length(non_21c))}{?A/Some} value{?s} w{?as/ere} + not in the 21st century i.e. not {.val 20xx}", "This may have produced unexpected results, specifically:", "*" = "{.val {year[non_21c]}} -> {.val {fyyear[non_21c]}}" )) diff --git a/R/correct_demographics.R b/R/correct_demographics.R index 67bb39abe..d7ef6f469 100644 --- a/R/correct_demographics.R +++ b/R/correct_demographics.R @@ -59,13 +59,13 @@ correct_demographics <- function(data, year) { `min` ) ~ chi_dob_min, # If they have a GLS record and the age is broadly correct, assume older - dplyr::between(chi_age_max, 50, 130) & + dplyr::between(chi_age_max, 50L, 130L) & recid == "GLS" ~ chi_dob_min, - # If a congenital defect lines up with a dob, assume it is correct + # If a congenital defect lines up with a DoB, assume it is correct chi_dob_max == congen_date ~ chi_dob_max, chi_dob_min == congen_date ~ chi_dob_min, # If being older makes them over 113, assume they are younger - chi_age_max > 113 ~ chi_dob_max + chi_age_max > 113L ~ chi_dob_max ) ) %>% # If we still don't have an age, try and fill it in from other records. @@ -74,7 +74,7 @@ correct_demographics <- function(data, year) { dplyr::ungroup() %>% # Fill in the ages for any that are left. dplyr::mutate( - age = compute_mid_year_age(year, .data$dob), + age = compute_mid_year_age(year, .data$dob) ) %>% # Fill in gender from CHI if it's missing. dplyr::mutate( diff --git a/R/cost_uplift.R b/R/cost_uplift.R index 04bd9917f..2bb1d4c1f 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -35,15 +35,20 @@ apply_cost_uplift <- function(data) { #' #' @return episode data with a uplift scale lookup_uplift <- function(data) { - # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, provided by Paul Leak. + # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, + # provided by Paul Leak. # For older years, don't uplift. - # For years after 2022/23 uplift by an additional 1% per year after the latest cost year (2022/23) - # For non plics recids use uplift of 1 so we won't change anything. + # For years after 2022/23 uplift by an additional 1% per year after the latest + # cost year (2022/23) + # For non PLICS recids use uplift of 1 so we won't change anything. # to accelerate, create a data frame of year and uplift for match-joining start_year <- 10L end_year <- as.integer(format(Sys.Date(), "%y")) - year <- paste0(start_year:end_year, (start_year + 1):(end_year + 1)) %>% as.integer() + year <- as.integer(paste0( + start_year:end_year, + (start_year + 1L):(end_year + 1L) + )) uplift_df <- tibble::tibble(year, uplift = 1.0 ) %>% @@ -52,25 +57,27 @@ lookup_uplift <- function(data) { uplift_df <- uplift_df %>% dplyr::mutate(uplift = dplyr::case_when( - # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, provided by Paul Leak. + # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, + # provided by Paul Leak. year == 2021L ~ 1.015, year == 2122L ~ 1.015 * 1.041, year == 2223L ~ 1.015 * 1.041 * 1.062, - # For years after 2022/23 uplift by an additional 1% per year after the latest cost year (2022/23) + # For years after 2022/23 uplift by an additional 1% per year after + # the latest cost year (2022/23) year > as.integer(latest_cost_year()) ~ (1.015 * 1.041 * 1.062) * (1.01^(.data$row_no - latest_cost_year_row)), # For older years, don't uplift. - TRUE ~ 1 + .default = 1.0 )) %>% dplyr::mutate(year = as.character(.data$year)) %>% dplyr::select(-"row_no") data <- data %>% dplyr::left_join(uplift_df, by = "year") %>% - # For non plics recids use uplift of 1 so we won't change anything. + # For non PLICS recids use uplift of 1 so we won't change anything. dplyr::mutate(uplift = dplyr::if_else( .data$recid %in% c("00B", "01B", "GLS", "02B", "04B", "AE2"), .data$uplift, - 1 + 1.0 )) return(data) diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R index 0968eec06..3023292ce 100644 --- a/R/create_demog_test_flags.R +++ b/R/create_demog_test_flags.R @@ -13,45 +13,13 @@ create_demog_test_flags <- function(data) { dplyr::arrange(.data$chi) %>% # create test flags dplyr::mutate( - valid_chi = dplyr::if_else( - phsmethods::chi_check(.data$chi) == "Valid CHI", - 1L, - 0L - ), - unique_chi = dplyr::if_else( - dplyr::lag(.data$chi) != .data$chi, - 1L, - 0L - ), - n_missing_chi = dplyr::if_else( - is_missing(.data$chi), - 1L, - 0L - ), - n_males = dplyr::if_else( - .data$gender == 1L, - 1L, - 0L - ), - n_females = dplyr::if_else( - .data$gender == 2L, - 1L, - 0L - ), - # n_postcode = dplyr::if_else( - # is.na(.data$postcode) | .data$postcode == "", - # 0L, - # 1L - # ), - # n_missing_postcode = dplyr::if_else( - # is_missing(.data$postcode), - # 1L, - # 0L - # ), - missing_dob = dplyr::if_else( - is.na(.data$dob), - 1L, - 0L - ) + valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI", + unique_chi = dplyr::lag(.data$chi) != .data$chi, + n_missing_chi = is_missing(.data$chi), + n_males = .data$gender == 1L, + n_females = .data$gender == 2L, + n_postcode = !is.na(.data$postcode) | !.data$postcode == "", + n_missing_postcode = is_missing(.data$postcode), + missing_dob = is.na(.data$dob) ) } diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R index dfc2e25cf..2b252a151 100644 --- a/R/create_demographic_lookup.R +++ b/R/create_demographic_lookup.R @@ -348,7 +348,7 @@ assign_d_cohort_high_cc <- function(dementia, # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y", # then high_cc_cohort = TRUE # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65 - rowSums(dplyr::across(c( + rowSums(dplyr::pick(c( "dementia", "hefailure", "refailure", @@ -374,7 +374,7 @@ assign_d_cohort_high_cc <- function(dementia, #' @family Demographic and Service Use Cohort functions assign_d_cohort_medium_cc <- function(cvd, copd, chd, parkinsons, ms) { medium_cc <- - rowSums(dplyr::across(c( + rowSums(dplyr::pick(c( "cvd", "copd", "chd", @@ -403,7 +403,7 @@ assign_d_cohort_low_cc <- function(epilepsy, diabetes, atrialfib) { low_cc <- - rowSums(dplyr::across(c( + rowSums(dplyr::pick(c( "epilepsy", "asthma", "arth", @@ -596,12 +596,12 @@ assign_d_cohort_substance <- function(data) { f11 = .data$recid %in% c("01B", "04B") & rowSums(dplyr::across( c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"), - ~ stringr::str_sub(.x, 1L, 3L) %in% c("F11") + ~ stringr::str_sub(.x, 1L, 3L) %in% "F11" )) > 0L, f13 = .data$recid %in% c("01B", "04B") & rowSums(dplyr::across( c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"), - ~ stringr::str_sub(.x, 1L, 3L) %in% c("F13") + ~ stringr::str_sub(.x, 1L, 3L) %in% "F13" )) > 0L, t402_t404 = .data$recid %in% c("01B", "04B") & rowSums(dplyr::across( @@ -611,13 +611,13 @@ assign_d_cohort_substance <- function(data) { t424 = .data$recid %in% c("01B", "04B") & rowSums(dplyr::across( c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"), - ~ stringr::str_sub(.x, 1L, 4L) %in% c("T424") + ~ stringr::str_sub(.x, 1L, 4L) %in% "T424" )) > 0L ) %>% # Aggregate to CIJ level dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::summarise( - dplyr::across(c("mh":"t424"), any) + dplyr::across("mh":"t424", ~ any(.x)) ) %>% dplyr::ungroup() %>% # Assign drug and alcohol misuse diff --git a/R/run_episode_file.R b/R/create_episode_file.R similarity index 74% rename from R/run_episode_file.R rename to R/create_episode_file.R index 852a4fd8b..1e2319836 100644 --- a/R/run_episode_file.R +++ b/R/create_episode_file.R @@ -4,15 +4,30 @@ #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. +#' @inheritParams add_nsu_cohort +#' @inheritParams fill_geographies +#' @inheritParams join_cohort_lookups +#' @inheritParams join_deaths_data +#' @inheritParams match_on_ltcs +#' @inheritParams link_delayed_discharge_eps #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output #' (instead of chi) #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export -#' -run_episode_file <- function( +create_episode_file <- function( processed_data_list, year, + dd_data = read_file(get_source_extract_path(year, "DD")), + homelessness_lookup = create_homelessness_lookup(year), + nsu_cohort = read_file(get_nsu_path(year)), + ltc_data = read_file(get_ltcs_path(year)), + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ), + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE) { episode_file <- dplyr::bind_rows(processed_data_list) %>% @@ -93,28 +108,66 @@ run_episode_file <- function( NA_character_, .data$chi ), - gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) + gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]), + # PC8 format may still be used. Ensure here that all datasets are in PC7 format. + postcode = phsmethods::format_postcode(.data$postcode, "pc7") ) %>% correct_cij_vars() %>% fill_missing_cij_markers() %>% + add_homelessness_flag(year, lookup = homelessness_lookup) %>% + add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% add_ppa_flag() %>% - link_delayed_discharge_eps(year) %>% - add_nsu_cohort(year) %>% - match_on_ltcs(year) %>% + link_delayed_discharge_eps(year, dd_data) %>% + add_nsu_cohort(year, nsu_cohort) %>% + match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% create_cohort_lookups(year) %>% join_cohort_lookups(year) %>% join_sparra_hhg(year) %>% - fill_geographies() %>% - join_deaths_data(year) %>% + fill_geographies( + slf_pc_lookup, + slf_gpprac_lookup + ) %>% + join_deaths_data( + year, + slf_deaths_lookup + ) %>% load_ep_file_vars(year) - if (anon_chi_out) { - # TODO When slfhelper is updated remove the unnecessary code + if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { episode_file <- episode_file %>% - tidyr::replace_na(list(chi = "")) %>% - slfhelper::get_anon_chi() %>% - dplyr::mutate(anon_chi = dplyr::na_if(.data$anon_chi, "")) + dplyr::mutate( + sc_send_lca = NA, + sc_living_alone = NA, + sc_support_from_unpaid_carer = NA, + sc_social_worker = NA, + sc_type_of_housing = NA, + sc_meals = NA, + sc_day_care = NA, + sc_latest_submission = NA, + ch_chi_cis = NA, + sc_id_cis = NA, + ch_name = NA, + ch_adm_reason = NA, + ch_provider = NA, + ch_nursing = NA, + hc_hours_annual = NA, + hc_hours_q1 = NA, + hc_hours_q2 = NA, + hc_hours_q3 = NA, + hc_hours_q4 = NA, + hc_cost_q1 = NA, + hc_cost_q2 = NA, + hc_cost_q3 = NA, + hc_cost_q4 = NA, + hc_provider = NA, + hc_reablement = NA, + sds_option_4 = NA, + ) + } + + if (anon_chi_out) { + episode_file <- slfhelper::get_anon_chi(episode_file) } if (write_to_disk) { @@ -136,7 +189,7 @@ run_episode_file <- function( #' Store the unneeded episode file variables #' #' @param data The in-progress episode file data. -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @param vars_to_keep a character vector of the variables to keep, all others #' will be stored. #' @@ -174,7 +227,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' Load the unneeded episode file variables #' -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @inheritParams store_ep_file_vars #' #' @return The full SLF data. @@ -275,21 +328,22 @@ correct_cij_vars <- function(data) { ), cij_pattype_code = dplyr::if_else( !is.na(.data$chi) & .data$recid %in% c("01B", "04B", "GLS", "02B"), - dplyr::case_match(.data$cij_admtype, - c("41", "42") ~ 2, - c("40", "48", "99") ~ 9, - "18" ~ 0, - .default = .data$cij_pattype_code + dplyr::case_match( + .data$cij_admtype, + c("41", "42") ~ 2L, + c("40", "48", "99") ~ 9L, + "18" ~ 0L, + .default = as.integer(.data$cij_pattype_code) ), .data$cij_pattype_code ), # Recode cij_pattype based on above cij_pattype = dplyr::case_match( .data$cij_pattype_code, - 0 ~ "Non-Elective", - 1 ~ "Elective", - 2 ~ "Maternity", - 9 ~ "Other" + 0L ~ "Non-Elective", + 1L ~ "Elective", + 2L ~ "Maternity", + 9L ~ "Other" ) ) } @@ -310,7 +364,7 @@ create_cost_inc_dna <- function(data) { # In the Cost_Total_Net column set the cost for # those with attendance status 5 or 8 (CNWs and DNAs) cost_total_net = dplyr::if_else( - .data$attendance_status %in% c(5, 8), + .data$attendance_status %in% c(5L, 8L), 0.0, .data$cost_total_net ) @@ -356,22 +410,28 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { #' #' @inheritParams store_ep_file_vars #' @inheritParams get_demographic_cohorts_path +#' @param demographic_cohort,service_use_cohort The cohort data #' #' @return The data including the Demographic and Service Use lookups. -join_cohort_lookups <- function(data, year, update = latest_update()) { +join_cohort_lookups <- function( + data, + year, + update = latest_update(), + demographic_cohort = read_file( + get_demographic_cohorts_path(year, update), + col_select = c("chi", "demographic_cohort") + ), + service_use_cohort = read_file( + get_service_use_cohorts_path(year, update), + col_select = c("chi", "service_use_cohort") + )) { join_cohort_lookups <- data %>% dplyr::left_join( - read_file( - get_demographic_cohorts_path(year, update), - col_select = c("chi", "demographic_cohort") - ), + demographic_cohort, by = "chi" ) %>% dplyr::left_join( - read_file( - get_service_use_cohorts_path(year, update), - col_select = c("chi", "service_use_cohort") - ), + service_use_cohort, by = "chi" ) diff --git a/R/create_hb_test_flags.R b/R/create_hb_test_flags.R index cb5855c1e..d21f1662a 100644 --- a/R/create_hb_test_flags.R +++ b/R/create_hb_test_flags.R @@ -11,67 +11,19 @@ create_hb_test_flags <- function(data, hb_var) { data <- data %>% dplyr::mutate( - NHS_Ayrshire_and_Arran = dplyr::if_else( - {{ hb_var }} == "S08000015", - 1L, - 0L - ), - NHS_Borders = dplyr::if_else({{ hb_var }} == "S08000016", 1L, 0L), - NHS_Dumfries_and_Galloway = dplyr::if_else( - {{ hb_var }} == "S08000017", - 1L, - 0L - ), - NHS_Forth_Valley = dplyr::if_else({{ hb_var }} == "S08000019", 1L, 0L), - NHS_Grampian = dplyr::if_else( - {{ hb_var }} == "S08000020", - 1L, - 0L - ), - NHS_Highland = dplyr::if_else( - {{ hb_var }} == "S08000022", - 1L, - 0L - ), - NHS_Lothian = dplyr::if_else( - {{ hb_var }} == "S08000024", - 1L, - 0L - ), - NHS_Orkney = dplyr::if_else( - {{ hb_var }} == "S08000025", - 1L, - 0L - ), - NHS_Shetland = dplyr::if_else( - {{ hb_var }} == "S08000026", - 1L, - 0L - ), - NHS_Western_Isles = dplyr::if_else( - {{ hb_var }} == "S08000028", - 1L, - 0L - ), - NHS_Fife = dplyr::if_else( - {{ hb_var }} == "S08000029", - 1L, - 0L - ), - NHS_Tayside = dplyr::if_else( - {{ hb_var }} == "S08000030", - 1L, - 0L - ), - NHS_Greater_Glasgow_and_Clyde = dplyr::if_else( - {{ hb_var }} %in% c("S08000031", "S08000021"), - 1L, - 0L - ), - NHS_Lanarkshire = dplyr::if_else( - {{ hb_var }} %in% c("S08000032", "S08000023"), - 1L, - 0L - ) + NHS_Ayrshire_and_Arran = {{ hb_var }} == "S08000015", + NHS_Borders = {{ hb_var }} == "S08000016", + NHS_Dumfries_and_Galloway = {{ hb_var }} == "S08000017", + NHS_Forth_Valley = {{ hb_var }} == "S08000019", + NHS_Grampian = {{ hb_var }} == "S08000020", + NHS_Highland = {{ hb_var }} == "S08000022", + NHS_Lothian = {{ hb_var }} == "S08000024", + NHS_Orkney = {{ hb_var }} == "S08000025", + NHS_Shetland = {{ hb_var }} == "S08000026", + NHS_Western_Isles = {{ hb_var }} == "S08000028", + NHS_Fife = {{ hb_var }} == "S08000029", + NHS_Tayside = {{ hb_var }} == "S08000030", + NHS_Greater_Glasgow_and_Clyde = {{ hb_var }} %in% c("S08000031", "S08000021"), + NHS_Lanarkshire = {{ hb_var }} %in% c("S08000032", "S08000023") ) } diff --git a/R/create_hscp_test_flags.R b/R/create_hscp_test_flags.R index b7dd0a02e..55e67b67c 100644 --- a/R/create_hscp_test_flags.R +++ b/R/create_hscp_test_flags.R @@ -5,166 +5,42 @@ #' @param data the data containing a HSCP variable #' @param hscp_var HSCP variable e.g. HSCP2019 HSCP2018 #' -#' @return a dataframe with flag (1 or 0) for each HSCP +#' @return a dataframe with flag (TRUE or FALSE) for each HSCP #' #' @family flag functions create_hscp_test_flags <- function(data, hscp_var) { data <- data %>% dplyr::mutate( - Aberdeen_City = dplyr::if_else( - {{ hscp_var }} == "S37000001", - 1L, - 0L - ), - Aberdeenshire = dplyr::if_else( - {{ hscp_var }} == "S37000002", - 1L, - 0L - ), - Angus = dplyr::if_else( - {{ hscp_var }} == "S37000003", - 1L, - 0L - ), - Argyll_and_Bute = dplyr::if_else( - {{ hscp_var }} == "S37000004", - 1L, - 0L - ), - Clackmannanshire_and_Stirling = dplyr::if_else( - {{ hscp_var }} == "S37000005", - 1L, - 0L - ), - Dumfries_and_Galloway = dplyr::if_else( - {{ hscp_var }} == "S37000006", - 1L, - 0L - ), - Dundee_City = dplyr::if_else( - {{ hscp_var }} == "S37000007", - 1L, - 0L - ), - East_Ayrshire = dplyr::if_else( - {{ hscp_var }} == "S37000008", - 1L, - 0L - ), - East_Dunbartonshire = dplyr::if_else( - {{ hscp_var }} == "S37000009", - 1L, - 0L - ), - East_Lothian = dplyr::if_else( - {{ hscp_var }} == "S37000010", - 1L, - 0L - ), - East_Renfrewshire = dplyr::if_else( - {{ hscp_var }} == "S37000011", - 1L, - 0L - ), - Edinburgh = dplyr::if_else( - {{ hscp_var }} == "S37000012", - 1L, - 0L - ), - Falkirk = dplyr::if_else( - {{ hscp_var }} == "S37000013", - 1L, - 0L - ), - Highland = dplyr::if_else( - {{ hscp_var }} == "S37000016", - 1L, - 0L - ), - Inverclyde = dplyr::if_else( - {{ hscp_var }} == "S37000017", - 1L, - 0L - ), - Midlothian = dplyr::if_else( - {{ hscp_var }} == "S37000018", - 1L, - 0L - ), - Moray = dplyr::if_else( - {{ hscp_var }} == "S37000019", - 1L, - 0L - ), - North_Ayrshire = dplyr::if_else( - {{ hscp_var }} == "S37000020", - 1L, - 0L - ), - Orkney_Islands = dplyr::if_else( - {{ hscp_var }} == "S37000022", - 1L, - 0L - ), - Renfrewshire = dplyr::if_else( - {{ hscp_var }} == "S37000024", - 1L, - 0L - ), - Scottish_Borders = dplyr::if_else( - {{ hscp_var }} == "S37000025", - 1L, - 0L - ), - Shetland_Islands = dplyr::if_else( - {{ hscp_var }} == "S37000026", - 1L, - 0L - ), - South_Ayrshire = dplyr::if_else( - {{ hscp_var }} == "S37000027", - 1L, - 0L - ), - South_Lanarkshire = dplyr::if_else( - {{ hscp_var }} == "S37000028", - 1L, - 0L - ), - West_Dunbartonshire = dplyr::if_else( - {{ hscp_var }} == "S37000029", - 1L, - 0L - ), - West_Lothian = dplyr::if_else( - {{ hscp_var }} == "S37000030", - 1L, - 0L - ), - Western_Isles = dplyr::if_else( - {{ hscp_var }} == "S37000031", - 1L, - 0L - ), - Fife = dplyr::if_else( - {{ hscp_var }} == "S37000032", - 1L, - 0L - ), - Perth_and_Kinross = dplyr::if_else( - {{ hscp_var }} == "S37000033", - 1L, - 0L - ), - Glasgow_City = dplyr::if_else( - {{ hscp_var }} %in% c("S37000015", "S37000034"), - 1L, - 0L - ), - North_Lanarkshire = dplyr::if_else( - {{ hscp_var }} %in% c("S37000021", "S37000035"), - 1L, - 0L - ) + Aberdeen_City = {{ hscp_var }} == "S37000001", + Aberdeenshire = {{ hscp_var }} == "S37000002", + Angus = {{ hscp_var }} == "S37000003", + Argyll_and_Bute = {{ hscp_var }} == "S37000004", + Clackmannanshire_and_Stirling = {{ hscp_var }} == "S37000005", + Dumfries_and_Galloway = {{ hscp_var }} == "S37000006", + Dundee_City = {{ hscp_var }} == "S37000007", + East_Ayrshire = {{ hscp_var }} == "S37000008", + East_Dunbartonshire = {{ hscp_var }} == "S37000009", + East_Lothian = {{ hscp_var }} == "S37000010", + East_Renfrewshire = {{ hscp_var }} == "S37000011", + Edinburgh = {{ hscp_var }} == "S37000012", + Falkirk = {{ hscp_var }} == "S37000013", + Highland = {{ hscp_var }} == "S37000016", + Inverclyde = {{ hscp_var }} == "S37000017", + Midlothian = {{ hscp_var }} == "S37000018", + Moray = {{ hscp_var }} == "S37000019", + North_Ayrshire = {{ hscp_var }} == "S37000020", + Orkney_Islands = {{ hscp_var }} == "S37000022", + Renfrewshire = {{ hscp_var }} == "S37000024", + Scottish_Borders = {{ hscp_var }} == "S37000025", + Shetland_Islands = {{ hscp_var }} == "S37000026", + South_Ayrshire = {{ hscp_var }} == "S37000027", + South_Lanarkshire = {{ hscp_var }} == "S37000028", + West_Dunbartonshire = {{ hscp_var }} == "S37000029", + West_Lothian = {{ hscp_var }} == "S37000030", + Western_Isles = {{ hscp_var }} == "S37000031", + Fife = {{ hscp_var }} == "S37000032", + Perth_and_Kinross = {{ hscp_var }} == "S37000033", + Glasgow_City = {{ hscp_var }} %in% c("S37000015", "S37000034"), + North_Lanarkshire = {{ hscp_var }} %in% c("S37000021", "S37000035"), ) } diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e2cf996a1..664e69ad2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -1,17 +1,18 @@ -#' Create individual file +#' Create the Source Individual file #' -#' @description Creates individual file from episode file +#' @description Creates the individual file from the episode file. #' -#' @param episode_file Tibble containing episodic data +#' @param episode_file Tibble containing episodic data. #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input -#' (instead of chi) -#' @inheritParams run_episode_file +#' (instead of chi). +#' @inheritParams create_episode_file #' #' @return The processed individual file #' @export create_individual_file <- function( episode_file, year, + homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, anon_chi_out = TRUE) { @@ -56,23 +57,74 @@ create_individual_file <- function( "sc_latest_submission", "hc_hours_annual", "hc_reablement", - "ooh_case_id" + "ooh_case_id", + "lca", + "hbrescode", + "health_net_cost", + "acute_episodes", + "mat_episodes", + "mh_episodes", + "gls_episodes", + "op_newcons_attendances", + "ae_attendances", + "pis_paid_items", + "ooh_cases" ))) %>% remove_blank_chi() %>% add_cij_columns() %>% - add_all_columns() %>% - aggregate_ch_episodes_zihao() %>% - clean_up_ch(year) %>% + add_all_columns() + + if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { + individual_file <- individual_file %>% + aggregate_by_chi(exclude_sc_var = TRUE) + } else { + individual_file <- individual_file %>% + aggregate_ch_episodes() %>% + clean_up_ch(year) %>% + aggregate_by_chi(exclude_sc_var = FALSE) %>% + join_sc_client(year) + } + + individual_file <- individual_file %>% recode_gender() %>% - aggregate_by_chi_zihao() %>% clean_individual_file(year) %>% join_cohort_lookups(year) %>% + add_homelessness_flag(year, lookup = homelessness_lookup) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) %>% join_slf_lookup_vars() %>% - join_sc_client(year) %>% - dplyr::mutate(year = year) + dplyr::mutate(year = year) %>% + add_hri_variables(chi_variable = "chi") + + if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { + individual_file <- individual_file %>% + dplyr::mutate( + ch_cis_episodes = NA, + ch_beddays = NA, + ch_cost = NA, + hc_episodes = NA, + hc_personal_episodes = NA, + hc_non_personal_episodes = NA, + hc_reablement_episodes = NA, + hc_total_cost = NA, + hc_total_hours = NA, + hc_personal_hours = NA, + hc_non_personal_hours = NA, + hc_reablement_hours = NA, + at_alarms = NA, + at_telecare = NA, + sds_option_1 = NA, + sds_option_2 = NA, + sds_option_3 = NA, + sds_option_4 = NA, + sc_living_alone = NA, + sc_support_from_unpaid_carer = NA, + sc_social_worker = NA, + sc_meals = NA, + sc_day_care = NA + ) + } if (anon_chi_out) { individual_file <- individual_file %>% @@ -121,17 +173,17 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( cij_non_el = dplyr::if_else( - .data$cij_pattype_code == 0, + .data$cij_pattype_code == 0L, .data$cij_marker, NA_real_ ), cij_el = dplyr::if_else( - .data$cij_pattype_code == 1, + .data$cij_pattype_code == 1L, .data$cij_marker, NA_real_ ), cij_mat = dplyr::if_else( - .data$cij_pattype_code == 2, + .data$cij_pattype_code == 2L, .data$cij_marker, NA_real_ ), @@ -141,7 +193,7 @@ add_cij_columns <- function(episode_file) { NA_real_ ), preventable_admissions = dplyr::if_else( - .data$cij_ppa == 1, + .data$cij_ppa == 1L, .data$cij_marker, NA_integer_ ) @@ -157,7 +209,7 @@ add_cij_columns <- function(episode_file) { add_all_columns <- function(episode_file) { cli::cli_alert_info("Add all columns function started at {Sys.time()}") - episode_file %>% + episode_file <- episode_file %>% add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>% add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>% add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>% @@ -171,11 +223,17 @@ add_all_columns <- function(episode_file) { add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% add_nrs_columns("NRS", .data$recid == "NRS") %>% - add_hl1_columns("HL1", .data$recid == "HL1") %>% - add_ch_columns("CH", .data$recid == "CH") %>% - add_hc_columns("HC", .data$recid == "HC") %>% - add_at_columns("AT", .data$recid == "AT") %>% - add_sds_columns("SDS", .data$recid == "SDS") %>% + add_hl1_columns("HL1", .data$recid == "HL1") + + if (check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { + episode_file <- episode_file %>% + add_ch_columns("CH", .data$recid == "CH") %>% + add_hc_columns("HC", .data$recid == "HC") %>% + add_at_columns("AT", .data$recid == "AT") %>% + add_sds_columns("SDS", .data$recid == "SDS") + } + + episode_file <- episode_file %>% dplyr::mutate( health_net_cost = rowSums( dplyr::pick( @@ -192,7 +250,7 @@ add_all_columns <- function(episode_file) { ), health_net_cost_inc_dnas = .data$health_net_cost + dplyr::if_else( is.na(.data$OP_cost_dnas), - 0, + 0.0, .data$OP_cost_dnas ) ) @@ -247,13 +305,13 @@ add_op_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition) - condition_1 <- substitute(condition & attendance_status == 1) + condition_1 <- substitute(condition & attendance_status == 1L) episode_file <- episode_file %>% dplyr::mutate( "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1L, NA_integer_), "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), .data$cost_total_net, NA_real_) ) - condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8)) + condition_5_8 <- substitute(condition & attendance_status %in% c(5L, 8L)) episode_file <- episode_file %>% dplyr::mutate( "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_), @@ -306,11 +364,11 @@ add_ooh_columns <- function(episode_file, prefix, condition) { "{prefix}_consultation_time" := dplyr::if_else( eval(condition), pmax( - 0, + 0.0, as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins") ), NA_real_ - ), + ) ) return(episode_file) @@ -406,7 +464,7 @@ add_ch_columns <- function(episode_file, prefix, condition) { add_standard_cols(prefix, condition) %>% dplyr::mutate( ch_cost_per_day = dplyr::if_else( - eval(condition) & .data$yearstay > 0, + eval(condition) & .data$yearstay > 0.0, .data$cost_total_net / .data$yearstay, .data$cost_total_net ), @@ -433,8 +491,16 @@ add_hc_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE) %>% dplyr::mutate( - "{prefix}_total_hours" := dplyr::if_else(eval(condition), .data$hc_hours_annual, NA_real_), - "{prefix}_total_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_), + "{prefix}_total_hours" := dplyr::if_else( + eval(condition), + .data$hc_hours_annual, + NA_real_ + ), + "{prefix}_total_cost" := dplyr::if_else( + eval(condition), + .data$cost_total_net, + NA_real_ + ) ) condition_per <- substitute(condition & smrtype == "HC-Per") episode_file <- episode_file %>% @@ -450,7 +516,7 @@ add_hc_columns <- function(episode_file, prefix, condition) { "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), .data$hc_hours_annual, NA_real_), "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), .data$cost_total_net, NA_real_) ) - condition_reabl <- substitute(condition & hc_reablement == 1) + condition_reabl <- substitute(condition & hc_reablement == 1L) episode_file <- episode_file %>% dplyr::mutate( "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1L, NA_integer_), @@ -547,35 +613,6 @@ add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, return(episode_file) } - -#' Aggregate CIS episodes -#' -#' @description Aggregate CH variables by CHI and CIS. -#' -#' @inheritParams create_individual_file -aggregate_ch_episodes <- function(episode_file) { - cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") - - episode_file %>% - # dplyr::filter(!is.na(.data$ch_chi_cis)) %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% - dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% - dplyr::mutate( - ch_no_cost = max(.data$ch_no_cost), - ch_ep_start = min(.data$record_keydate1), - ch_ep_end = max(.data$ch_ep_end), - ch_cost_per_day = mean(.data$ch_cost_per_day) - ) %>% - dplyr::ungroup() %>% - # change the data format from data.table to data.frame - tibble::as_tibble() - - # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% - # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>% - # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) -} - #' Clean up CH #' #' @description Clean up CH-related columns. @@ -590,7 +627,7 @@ clean_up_ch <- function(episode_file, year) { fy_start = start_fy(year) ) %>% dplyr::mutate( - term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1), + term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1L), term_2 = pmax(.data$ch_ep_start, .data$fy_start) ) %>% dplyr::mutate( @@ -600,18 +637,18 @@ clean_up_ch <- function(episode_file, year) { NA_real_ ), ch_cost = dplyr::if_else( - .data$recid == "CH" & .data$ch_no_cost == 0, + .data$recid == "CH" & .data$ch_no_cost == 0L, .data$ch_beddays * .data$ch_cost_per_day, NA_real_ ), ch_beddays = dplyr::if_else( - .data$recid == "CH" & .data$ch_chi_cis == 0, - 0, + .data$recid == "CH" & .data$ch_chi_cis == 0L, + 0L, .data$ch_beddays ), ch_cost = dplyr::if_else( - .data$recid == "CH" & .data$ch_chi_cis == 0, - 0, + .data$recid == "CH" & .data$ch_chi_cis == 0L, + 0.0, .data$ch_cost ) ) %>% @@ -629,105 +666,13 @@ recode_gender <- function(episode_file) { episode_file %>% dplyr::mutate( gender = dplyr::if_else( - .data$gender %in% c(0, 9), + .data$gender %in% c(0L, 9L), 1.5, .data$gender ) ) } -#' Aggregate by CHI -#' -#' @description Aggregate episode file by CHI to convert into -#' individual file. -#' -#' @inheritParams create_individual_file -aggregate_by_chi <- function(episode_file) { - cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - - episode_file %>% - dplyr::arrange( - chi, - record_keydate1, - keytime1, - record_keydate2, - keytime2 - ) %>% - dplyr::group_by(.data$chi) %>% - dplyr::summarise( - gender = mean(gender), - dplyr::across( - dplyr::ends_with(c("postcode", "DoB", "gpprac")), - ~ dplyr::last(., na_rm = TRUE) - ), - dplyr::across( - c( - "ch_cis_episodes" = "ch_chi_cis", - "cij_total" = "cij_marker", - "cij_el", - "cij_non_el", - "cij_mat", - # "cij_delay", - "ooh_cases" = "ooh_case_id", - "preventable_admissions" - ), - ~ dplyr::n_distinct(.x, na.rm = TRUE) - ), - dplyr::across( - c( - dplyr::ends_with( - c( - "episodes", - "beddays", - "cost", - "attendances", - "attend", - "contacts", - "hours", - "alarms", - "telecare", - "paid_items", - "advice", - "homeV", - "time", - "assessment", - "other", - # "DN", - "NHS24", - "PCC", - "_dnas" - ) - ), - dplyr::starts_with("SDS_option") - ), - ~ sum(., na.rm = TRUE) - ), - # dplyr::across( - # c( - # # dplyr::starts_with("sc_"), - # #-"sc_send_lca", - # #-"sc_latest_submission", - # # "HL1_in_FY" = "hh_in_fy", - # "NSU" - # ), - # ~ max_no_inf(.) - # ), - dplyr::across( - c( - condition_cols(), - # "death_date", - # "deceased", - "year", - dplyr::ends_with(c( - "_Cohort", "end_fy", "start_fy" - )), - ), - ~ dplyr::first(., na_rm = TRUE) - ) - ) %>% - dplyr::ungroup() -} - #' Condition columns #' #' @description Returns chr vector of column names @@ -838,12 +783,13 @@ join_slf_lookup_vars <- function(individual_file, #' @param year financial year. #' @param sc_client SC client lookup #' @param sc_demographics SC Demographic lookup -join_sc_client <- function(individual_file, - year, - sc_client = read_file(get_source_extract_path(year, "Client")), - sc_demographics = read_file(get_sc_demog_lookup_path(), - col_select = c("sending_location", "social_care_id", "chi") - )) { +join_sc_client <- function( + individual_file, + year, + sc_client = read_file(get_sc_client_lookup_path(year)), + sc_demographics = read_file(get_sc_demog_lookup_path(), + col_select = c("sending_location", "social_care_id", "chi") + )) { # TODO Update the client lookup processing script to match # on demographics there so the client lookup already has CHI. @@ -853,13 +799,25 @@ join_sc_client <- function(individual_file, sc_demographics %>% dplyr::select("sending_location", "social_care_id", "chi"), by = c("sending_location", "social_care_id") - ) + ) %>% + dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_meals", + "sc_day_care" + ) + )) == "Not Known")) %>% + dplyr::arrange(chi, count_not_known) %>% + dplyr::distinct(chi, .keep_all = TRUE) # Match on client variables by chi individual_file <- individual_file %>% dplyr::left_join( join_client_demog, - by = "chi" + by = "chi", + relationship = "one-to-one" ) %>% dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission")) diff --git a/R/create_monthly_beddays.R b/R/create_monthly_beddays.R index 175baeb8d..f57fc067f 100644 --- a/R/create_monthly_beddays.R +++ b/R/create_monthly_beddays.R @@ -39,7 +39,7 @@ create_monthly_beddays <- function(data, if (any( admission_dates_vector > discharge_dates_vector, na.rm = TRUE - ) & !all(is.na(discharge_dates_vector))) { + ) && !all(is.na(discharge_dates_vector))) { first_error <- which.max(admission_dates_vector > discharge_dates_vector) cli::cli_abort( @@ -68,7 +68,7 @@ create_monthly_beddays <- function(data, # Shift it forward by a day (default) # so we will count the last day and not the first. lubridate::int_shift( - by = lubridate::days(dplyr::if_else(count_last, 1L, 0L)) + by = lubridate::days(as.integer(count_last)) )) # Create the start dates of the months for the financial year diff --git a/R/create_monthly_costs.R b/R/create_monthly_costs.R index c9ccf4bed..3ff4a5268 100644 --- a/R/create_monthly_costs.R +++ b/R/create_monthly_costs.R @@ -20,7 +20,7 @@ create_monthly_costs <- function(data, check_variables_exist(data, c( "record_keydate1", "record_keydate2", - paste0(tolower(month.abb[c(4:12, 1:3)]), "_beddays") + paste0(tolower(month.abb[c(4L:12L, 1L:3L)]), "_beddays") )) beddays_months <- data %>% @@ -29,7 +29,7 @@ create_monthly_costs <- function(data, # Fix the instances where the episode is a daycase (in maternity data); # these will sometimes have 0.33 for the yearstay, # this should be applied to the relevant month. - full_cost_col <- month.abb[c(4:12, 1:3)] %>% + full_cost_col <- month.abb[c(4L:12L, 1L:3L)] %>% tolower() %>% paste0("_cost") @@ -37,7 +37,7 @@ create_monthly_costs <- function(data, dplyr::select(!dplyr::ends_with("_beddays")) %>% dplyr::mutate( daycase_added = tidyr::replace_na( - ({{ yearstay }} == 0.33) | ({{ yearstay }} == 0L & {{ cost_total_net }} > 0), + ({{ yearstay }} == 0.33) | ({{ yearstay }} == 0L & {{ cost_total_net }} > 0.0), replace = FALSE ) ) %>% @@ -51,12 +51,12 @@ create_monthly_costs <- function(data, cost_month = month.abb[.data$cost_month] %>% tolower() %>% paste0("_cost"), - daycase_added = dplyr::if_else(.data$daycase_added, 1, 0) + daycase_added = as.integer(.data$daycase_added) ) %>% tidyr::pivot_wider( names_from = "cost_month", values_from = "daycase_added", - values_fill = 0 + values_fill = 0L ) %>% dplyr::select( tidyselect::any_of(full_cost_col), @@ -67,7 +67,7 @@ create_monthly_costs <- function(data, add_months <- setdiff(full_cost_col, available_months) add_months_df <- dplyr::as_tibble( - matrix(0, nrow = nrow(data), ncol = length(add_months)), + matrix(0.0, nrow = nrow(data), ncol = length(add_months)), .name_repair = ~add_months ) diff --git a/R/create_sending_location_test_flags.R b/R/create_sending_location_test_flags.R new file mode 100644 index 000000000..373dc2c03 --- /dev/null +++ b/R/create_sending_location_test_flags.R @@ -0,0 +1,48 @@ +#' Create sending location test flags +#' +#' @description Create flags for sending location +#' +#' @param data the data containing the variable sending_location +#' @param sending_location_var sending_location variable +#' @return a dataframe with flag (T or F) for each sending location +#' +#' @family flag functions +create_sending_location_test_flags <- function(data, sending_location_var) { + data <- data %>% + dplyr::mutate( + Aberdeen_City = {{ sending_location_var }} == 100L, + Aberdeenshire = {{ sending_location_var }} == 110L, + Angus = {{ sending_location_var }} == 120L, + Argyll_and_Bute = {{ sending_location_var }} == 130L, + City_of_Edinburgh = {{ sending_location_var }} == 230L, + Clackmannanshire = {{ sending_location_var }} == 150L, + Dumfries_and_Galloway = {{ sending_location_var }} == 170L, + Dundee_City = {{ sending_location_var }} == 180L, + East_Ayrshire = {{ sending_location_var }} == 190L, + East_Dunbartonshire = {{ sending_location_var }} == 200L, + East_Lothian = {{ sending_location_var }} == 210L, + East_Renfrewshire = {{ sending_location_var }} == 220L, + Falkirk = {{ sending_location_var }} == 240L, + Fife = {{ sending_location_var }} == 250L, + Glasgow_City = {{ sending_location_var }} == 260L, + Highland = {{ sending_location_var }} == 270L, + Inverclyde = {{ sending_location_var }} == 280L, + Midlothian = {{ sending_location_var }} == 290L, + Moray = {{ sending_location_var }} == 300L, + Na_h_Eileanan_Siar = {{ sending_location_var }} == 235L, + North_Ayrshire = {{ sending_location_var }} == 310L, + North_Lanarkshire = {{ sending_location_var }} == 320L, + Orkney_Islands = {{ sending_location_var }} == 330L, + Perth_and_Kinross = {{ sending_location_var }} == 340L, + Renfrewshire = {{ sending_location_var }} == 350L, + Scottish_Borders = {{ sending_location_var }} == 355L, + Shetland_Islands = {{ sending_location_var }} == 360L, + South_Ayrshire = {{ sending_location_var }} == 370L, + South_Lanarkshire = {{ sending_location_var }} == 380L, + Stirling = {{ sending_location_var }} == 390L, + West_Dunbartonshire = {{ sending_location_var }} == 395L, + West_Lothian = {{ sending_location_var }} == 400L + ) + + return(data) +} diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R index 30d3b0789..4acbfc507 100644 --- a/R/create_service_use_lookup.R +++ b/R/create_service_use_lookup.R @@ -35,9 +35,21 @@ create_service_use_cohorts <- function( ), # Calculate service costs - geriatric_cost = calculate_geriatric_cost(.data$recid, .data$spec, .data$cost_total_net), - maternity_cost = calculate_maternity_cost(.data$recid, .data$cij_pattype, .data$cost_total_net), - psychiatry_cost = calculate_psychiatry_cost(.data$recid, .data$spec, .data$cost_total_net), + geriatric_cost = calculate_geriatric_cost( + .data$recid, + .data$spec, + .data$cost_total_net + ), + maternity_cost = calculate_maternity_cost( + .data$recid, + .data$cij_pattype, + .data$cost_total_net + ), + psychiatry_cost = calculate_psychiatry_cost( + .data$recid, + .data$spec, + .data$cost_total_net + ), acute_elective_cost = calculate_acute_elective_cost( .data$recid, .data$cij_pattype, .data$cij_ipdc, .data$spec, .data$cost_total_net @@ -46,62 +58,154 @@ create_service_use_cohorts <- function( .data$recid, .data$cij_pattype, .data$spec, .data$cost_total_net ), - outpatient_cost = calculate_outpatient_costs(.data$recid, .data$cost_total_net, .data$geriatric_cost)[[1]], - total_outpatient_cost = calculate_outpatient_costs(.data$recid, .data$cost_total_net, .data$geriatric_cost)[[2]], - care_home_cost = calculate_care_home_cost(.data$recid, .data$cost_total_net), - hospital_elective_cost = calculate_hospital_elective_cost(.data$recid, .data$cij_pattype, .data$cost_total_net), - hospital_emergency_cost = calculate_hospital_emergency_cost(.data$recid, .data$cij_pattype, .data$cost_total_net), - prescribing_cost = calculate_prescribing_cost(.data$recid, .data$cost_total_net), - ae2_cost = calculate_ae2_cost(.data$recid, .data$cost_total_net), - community_health_cost = calculate_community_health_cost(.data$recid, .data$cost_total_net), + outpatient_cost = calculate_outpatient_costs( + recid = .data$recid, + cost_total_net = .data$cost_total_net, + geriatric_cost = .data$geriatric_cost + )[["outpatient_cost"]], + total_outpatient_cost = calculate_outpatient_costs( + .data$recid, + .data$cost_total_net, + .data$geriatric_cost + )[["total_outpatient_cost"]], + care_home_cost = calculate_care_home_cost( + .data$recid, + .data$cost_total_net + ), + hospital_elective_cost = calculate_hospital_elective_cost( + .data$recid, + .data$cij_pattype, + .data$cost_total_net + ), + hospital_emergency_cost = calculate_hospital_emergency_cost( + .data$recid, + .data$cij_pattype, + .data$cost_total_net + ), + prescribing_cost = calculate_prescribing_cost( + .data$recid, + .data$cost_total_net + ), + ae2_cost = calculate_ae2_cost( + .data$recid, + .data$cost_total_net + ), + community_health_cost = calculate_community_health_cost( + .data$recid, + .data$cost_total_net + ), operation_flag = add_operation_flag(.data$op1a) ) %>% # Aggregate to CIJ level - dplyr::group_by(.data$chi, .data$cij_marker, .data$cij_ipdc, .data$cij_pattype) %>% + dplyr::group_by( + .data$chi, + .data$cij_marker, + .data$cij_ipdc, + .data$cij_pattype + ) %>% dplyr::summarise( - dplyr::across(c("cost_total_net", "geriatric_cost":"community_health_cost"), sum), - dplyr::across(c("operation_flag", "cij_attendance"), any) + dplyr::across( + c("cost_total_net", "geriatric_cost":"community_health_cost"), + ~ sum(.x) + ), + dplyr::across( + c("operation_flag", "cij_attendance"), + ~ any(.x) + ) ) %>% dplyr::ungroup() %>% # Create specific instance counters and compute cost for elective inpatients dplyr::mutate( - emergency_instances = assign_emergency_instances(.data$cij_pattype), - elective_instances = assign_elective_instances(.data$cij_pattype, .data$cij_ipdc), - elective_inpatient_instances = assign_elective_inpatient_instances(.data$cij_pattype, .data$cij_ipdc), - elective_daycase_instances = assign_elective_daycase_instances(.data$cij_pattype, .data$cij_ipdc), - death_flag = assign_death_flag(.data$cij_marker), + emergency_instances = assign_emergency_instances( + .data$cij_pattype + ), + elective_instances = assign_elective_instances( + .data$cij_pattype, + .data$cij_ipdc + ), + elective_inpatient_instances = assign_elective_inpatient_instances( + .data$cij_pattype, + .data$cij_ipdc + ), + elective_daycase_instances = assign_elective_daycase_instances( + .data$cij_pattype, + .data$cij_ipdc + ), + death_flag = assign_death_flag( + .data$cij_marker + ), elective_inpatient_cost = calculate_elective_inpatient_cost( .data$elective_inpatient_instances, .data$cost_total_net ) ) %>% # Move flags to end of data frame - dplyr::relocate(c("operation_flag", "death_flag"), .after = dplyr::last_col()) %>% + dplyr::relocate( + c("operation_flag", "death_flag"), + .after = dplyr::last_col() + ) %>% # Aggregate to chi-level dplyr::group_by(.data$chi) %>% dplyr::summarise( - dplyr::across(c(.data$cost_total_net:.data$elective_inpatient_cost), sum), - dplyr::across(c(.data$operation_flag, .data$death_flag), any) + dplyr::across( + c(.data$cost_total_net:.data$elective_inpatient_cost), + ~ sum(.x) + ), + dplyr::across( + c(.data$operation_flag, .data$death_flag), + ~ any(.x) + ) ) %>% dplyr::ungroup() %>% dplyr::mutate( # Create flag for elective inpatients - elective_inpatient_flag = assign_elective_inpatient_flag(.data$acute_elective_cost, .data$elective_inpatient_cost), + elective_inpatient_flag = assign_elective_inpatient_flag( + .data$acute_elective_cost, + .data$elective_inpatient_cost + ), # Assign cohort flags - psychiatry_cohort = assign_s_cohort_psychiatry(.data$psychiatry_cost), - maternity_cohort = assign_s_cohort_maternity(.data$maternity_cost), - geriatric_cohort = assign_s_cohort_geriatric(.data$geriatric_cost), - elective_inpatient_cohort = assign_s_cohort_elective_inpatient(.data$elective_inpatient_flag), - limited_daycases_cohort = assign_s_cohort_limited_daycases(.data$elective_inpatient_flag, .data$elective_instances), - routine_daycase_cohort = assign_s_cohort_routine_daycase(.data$elective_inpatient_flag, .data$elective_instances), - single_emergency_cohort = assign_s_cohort_single_emergency(.data$emergency_instances), - multiple_emergency_cohort = assign_s_cohort_multiple_emergency(.data$emergency_instances), - prescribing_cohort = assign_s_cohort_prescribing(.data$prescribing_cost), - outpatient_cohort = assign_s_cohort_outpatient(.data$outpatient_cost), - ae2_cohort = assign_s_cohort_ae2(.data$ae2_cost), - community_care_cohort = assign_s_cohort_community_care(.data$community_health_cost), + psychiatry_cohort = assign_s_cohort_psychiatry( + .data$psychiatry_cost + ), + maternity_cohort = assign_s_cohort_maternity( + .data$maternity_cost + ), + geriatric_cohort = assign_s_cohort_geriatric( + .data$geriatric_cost + ), + elective_inpatient_cohort = assign_s_cohort_elective_inpatient( + .data$elective_inpatient_flag + ), + limited_daycases_cohort = assign_s_cohort_limited_daycases( + .data$elective_inpatient_flag, + .data$elective_instances + ), + routine_daycase_cohort = assign_s_cohort_routine_daycase( + .data$elective_inpatient_flag, + .data$elective_instances + ), + single_emergency_cohort = assign_s_cohort_single_emergency( + .data$emergency_instances + ), + multiple_emergency_cohort = assign_s_cohort_multiple_emergency( + .data$emergency_instances + ), + prescribing_cohort = assign_s_cohort_prescribing( + .data$prescribing_cost + ), + outpatient_cohort = assign_s_cohort_outpatient( + .data$outpatient_cost + ), + ae2_cohort = assign_s_cohort_ae2( + .data$ae2_cost + ), + community_care_cohort = assign_s_cohort_community_care( + .data$community_health_cost + ), # Assign other cohort if none have been assigned - other_cohort = rowSums(dplyr::across("psychiatry_cohort":"community_care_cohort")) == 0, + other_cohort = rowSums( + dplyr::pick("psychiatry_cohort":"community_care_cohort") + ) == 0L, # Recalculate costs based on the cohorts elective_inpatient_cost = recalculate_elective_inpatient_cost( @@ -131,7 +235,7 @@ create_service_use_cohorts <- function( # Care Home cost is removed for now, so set to zero residential_care_cost = calculate_residential_care_cost(), # Replace any missing total costs with zero - dplyr::across("cost_total_net", ~ replace(., is.na(.), 0)) + cost_total_net = tidyr::replace_na(.data$cost_total_net, 0.0) ) %>% # Add the cohort names assign_cohort_names() %>% @@ -175,7 +279,9 @@ create_service_use_cohorts <- function( #' @family Demographic and Service Use Cohort functions calculate_geriatric_cost <- function(recid, spec, cost_total_net) { geriatric_cost <- dplyr::if_else( - recid %in% c("50B", "GLS") | spec %in% c("AB", "G4"), cost_total_net, 0 + recid %in% c("50B", "GLS") | spec %in% c("AB", "G4"), + cost_total_net, + 0.0 ) return(geriatric_cost) } @@ -380,9 +486,12 @@ calculate_community_health_cost <- function(recid, cost_total_net) { #' @return A vector of elective inpatient costs #' @seealso [assign_elective_inpatient_instances()] #' @family Demographic and Service Use Cohort functions -calculate_elective_inpatient_cost <- function(elective_inpatient_instances, cost_total_net) { +calculate_elective_inpatient_cost <- function(elective_inpatient_instances, + cost_total_net) { elective_inpatient_cost <- dplyr::if_else( - elective_inpatient_instances, cost_total_net, 0 + elective_inpatient_instances, + cost_total_net, + 0.0 ) return(elective_inpatient_cost) } @@ -391,7 +500,8 @@ calculate_elective_inpatient_cost <- function(elective_inpatient_instances, cost #' #' @param op1a A vector of operation codes #' -#' @return A boolean vector showing whether a record contains an operation or not +#' @return A boolean vector showing whether a record contains an operation or +#' not. #' @family Demographic and Service Use Cohort functions add_operation_flag <- function(op1a) { operation_flag <- !is_missing(op1a) @@ -532,29 +642,31 @@ assign_s_cohort_elective_inpatient <- function(elective_inpatient_flag) { } #' Assign limited daycases cohort flag -#' @description If the record does not have an elective inpatient flag and they have -#' 3 or fewer elective instances, return `TRUE` +#' @description If the record does not have an elective inpatient flag +#' and they have 3 or fewer elective instances, return `TRUE`. #' #' @param elective_inpatient_flag A vector of elective inpatient flags #' @param elective_instances A vector of elective instances #' #' @return A boolean vector of limited daycases cohort flags #' @family Demographic and Service Use Cohort functions -assign_s_cohort_limited_daycases <- function(elective_inpatient_flag, elective_instances) { - limited_daycases_cohort <- !elective_inpatient_flag & elective_instances <= 3 +assign_s_cohort_limited_daycases <- function(elective_inpatient_flag, + elective_instances) { + limited_daycases_cohort <- !elective_inpatient_flag & elective_instances <= 3L return(limited_daycases_cohort) } #' Assign routine daycase cohort flag -#' @description If the record does not have an elective inpatient flag and they have -#' 4 or more elective instances, return `TRUE` +#' @description If the record does not have an elective inpatient flag and +#' they have 4 or more elective instances, return `TRUE`. #' #' @inheritParams assign_s_cohort_limited_daycases #' #' @return A boolean vector of routine daycase cohort flags #' @family Demographic and Service Use Cohort functions -assign_s_cohort_routine_daycase <- function(elective_inpatient_flag, elective_instances) { - routine_daycase_cohort <- !elective_inpatient_flag & elective_instances >= 4 +assign_s_cohort_routine_daycase <- function(elective_inpatient_flag, + elective_instances) { + routine_daycase_cohort <- !elective_inpatient_flag & elective_instances >= 4L return(routine_daycase_cohort) } @@ -565,7 +677,7 @@ assign_s_cohort_routine_daycase <- function(elective_inpatient_flag, elective_in #' @return A boolean vector of single emergency cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_single_emergency <- function(emergency_instances) { - single_emergency_cohort <- emergency_instances == 1 + single_emergency_cohort <- emergency_instances == 1L return(single_emergency_cohort) } @@ -576,31 +688,33 @@ assign_s_cohort_single_emergency <- function(emergency_instances) { #' @return A boolean vector of multiple emergency cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_multiple_emergency <- function(emergency_instances) { - multiple_emergency_cohort <- emergency_instances >= 2 + multiple_emergency_cohort <- emergency_instances >= 2L return(multiple_emergency_cohort) } #' Assign prescribing cohort flag -#' @description If the record has a prescribing cost greater than zero, assign `TRUE` +#' @description If the record has a prescribing cost greater than zero, +#' assign `TRUE`. #' #' @param prescribing_cost A vector of prescribing costs #' #' @return A boolean vector of prescribing cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_prescribing <- function(prescribing_cost) { - prescribing_cohort <- prescribing_cost > 0 + prescribing_cohort <- prescribing_cost > 0.0 return(prescribing_cohort) } #' Assign outpatient cohort flag -#' @description If the record has a outpatient cost greater than zero, assign `TRUE` +#' @description If the record has a outpatient cost greater than zero, +#' assign `TRUE`. #' #' @param outpatient_cost A vector of outpatient costs #' #' @return A boolean vector of outpatient cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_outpatient <- function(outpatient_cost) { - outpatient_cohort <- outpatient_cost > 0 + outpatient_cohort <- outpatient_cost > 0.0 return(outpatient_cohort) } @@ -613,7 +727,7 @@ assign_s_cohort_outpatient <- function(outpatient_cost) { #' @return A boolean vector of residential care cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_residential_care <- function(care_home_cost) { - residential_care_cohort <- care_home_cost > 0 + residential_care_cohort <- care_home_cost > 0.0 return(residential_care_cohort) } @@ -625,7 +739,7 @@ assign_s_cohort_residential_care <- function(care_home_cost) { #' @return A boolean vector of A&E cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_ae2 <- function(ae2_cost) { - ae2_cohort <- ae2_cost > 0 + ae2_cohort <- ae2_cost > 0.0 return(ae2_cohort) } @@ -638,7 +752,7 @@ assign_s_cohort_ae2 <- function(ae2_cost) { #' @return A boolean vector of Community Care cohort flags #' @family Demographic and Service Use Cohort functions assign_s_cohort_community_care <- function(community_health_cost) { - community_care_cohort <- community_health_cost > 0 # | home_care_cost > 0 + community_care_cohort <- community_health_cost > 0.0 # | home_care_cost > 0 return(community_care_cohort) } @@ -651,8 +765,13 @@ assign_s_cohort_community_care <- function(community_health_cost) { #' #' @return A vector of elective inpatient costs #' @family Demographic and Service Use Cohort functions -recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort, acute_elective_cost) { - elective_inpatient_cost <- dplyr::if_else(elective_inpatient_cohort, acute_elective_cost, 0) +recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort, + acute_elective_cost) { + elective_inpatient_cost <- dplyr::if_else( + elective_inpatient_cohort, + acute_elective_cost, + 0.0 + ) return(elective_inpatient_cost) } @@ -663,8 +782,13 @@ recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort, acute #' #' @return A vector of limited daycase costs #' @family Demographic and Service Use Cohort functions -calculate_limited_daycases_cost <- function(limited_daycases_cohort, acute_elective_cost) { - limited_daycases_cost <- dplyr::if_else(limited_daycases_cohort, acute_elective_cost, 0) +calculate_limited_daycases_cost <- function(limited_daycases_cohort, + acute_elective_cost) { + limited_daycases_cost <- dplyr::if_else( + limited_daycases_cohort, + acute_elective_cost, + 0.0 + ) return(limited_daycases_cost) } @@ -675,8 +799,13 @@ calculate_limited_daycases_cost <- function(limited_daycases_cohort, acute_elect #' #' @return A vector of routine daycase costs #' @family Demographic and Service Use Cohort functions -calculate_routine_daycase_cost <- function(routine_daycase_cohort, acute_elective_cost) { - routine_daycase_cost <- dplyr::if_else(routine_daycase_cohort, acute_elective_cost, 0) +calculate_routine_daycase_cost <- function(routine_daycase_cohort, + acute_elective_cost) { + routine_daycase_cost <- dplyr::if_else( + routine_daycase_cohort, + acute_elective_cost, + 0.0 + ) return(routine_daycase_cost) } @@ -687,8 +816,13 @@ calculate_routine_daycase_cost <- function(routine_daycase_cohort, acute_electiv #' #' @return A vector of single emergency costs #' @family Demographic and Service Use Cohort functions -calculate_single_emergency_cost <- function(single_emergency_cohort, acute_emergency_cost) { - single_emergency_cost <- dplyr::if_else(single_emergency_cohort, acute_emergency_cost, 0) +calculate_single_emergency_cost <- function(single_emergency_cohort, + acute_emergency_cost) { + single_emergency_cost <- dplyr::if_else( + single_emergency_cohort, + acute_emergency_cost, + 0.0 + ) return(single_emergency_cost) } @@ -699,8 +833,13 @@ calculate_single_emergency_cost <- function(single_emergency_cohort, acute_emerg #' #' @return A vector of multiple emergency costs #' @family Demographic and Service Use Cohort functions -calculate_multiple_emergency_cost <- function(multiple_emergency_cohort, acute_emergency_cost) { - multiple_emergency_cost <- dplyr::if_else(multiple_emergency_cohort, acute_emergency_cost, 0) +calculate_multiple_emergency_cost <- function(multiple_emergency_cohort, + acute_emergency_cost) { + multiple_emergency_cost <- dplyr::if_else( + multiple_emergency_cohort, + acute_emergency_cost, + 0.0 + ) return(multiple_emergency_cost) } @@ -711,13 +850,16 @@ calculate_multiple_emergency_cost <- function(multiple_emergency_cohort, acute_e #' #' @return A vector of community care costs #' @family Demographic and Service Use Cohort functions -calculate_community_care_cost <- function(community_care_cohort, community_health_cost) { +calculate_community_care_cost <- function(community_care_cohort, + community_health_cost) { community_care_cost <- dplyr::if_else( - community_care_cohort, community_health_cost, 0 + community_care_cohort, + community_health_cost, + 0.0 ) # FOR FUTURE # community_care_cost <- dplyr::if_else( - # community_care_cohort + home_care_cost, community_health_cost, 0) + # community_care_cohort + home_care_cost, community_health_cost, 0.0) return(community_care_cost) } @@ -727,7 +869,7 @@ calculate_community_care_cost <- function(community_care_cohort, community_healt #' @return A vector of community care costs, currently zero #' @family Demographic and Service Use Cohort functions calculate_residential_care_cost <- function() { - residential_care_cost <- 0 + residential_care_cost <- 0.0 return(residential_care_cost) } @@ -735,7 +877,8 @@ calculate_residential_care_cost <- function() { #' #' @param data A data frame #' -#' @return A data frame with an additional variable containing the assigned cohort +#' @return A data frame with an additional variable containing the assigned +#' cohort #' #' @family Demographic and Service Use Cohort functions assign_cohort_names <- function(data) { @@ -765,10 +908,8 @@ assign_cohort_names <- function(data) { # Situation where no cost is greater than another, # so the maximum is the same as the mean .data$cost_max == rowSums( - dplyr::across( - c("psychiatry_cost":"residential_care_cost") - ) - ) / 12 ~ "Unassigned", + dplyr::pick("psychiatry_cost":"residential_care_cost") + ) / 12.0 ~ "Unassigned", .data$cost_max == .data$psychiatry_cost ~ "Psychiatry", .data$cost_max == .data$maternity_cost ~ "Maternity", # Geriatric has to be larger or equal to psychiatry @@ -786,7 +927,7 @@ assign_cohort_names <- function(data) { # Future: cost_max == .data$community_care_cost ~ "Community Care", .data$cost_max == .data$ae2_cost ~ "Unscheduled Care", .data$cost_max == .data$residential_care_cost ~ "Residential Care", - TRUE ~ "Unassigned" + .default = "Unassigned" ) ) %>% dplyr::select(-"cost_max") diff --git a/R/fill_geographies.R b/R/fill_geographies.R index 58d001493..c9aee6355 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -4,10 +4,18 @@ #' then use the lookups to match on additional variables. #' #' @param data the SLF +#' @param slf_pc_lookup The SLF Postcode lookup +#' @param slf_gpprac_lookup The SLF GP Practice lookup #' #' @return a [tibble][tibble::tibble-package] of the SLF with improved #' Postcode and GP Practice details. -fill_geographies <- function(data) { +fill_geographies <- function( + data, + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + )) { check_variables_exist(data, c( "chi", "postcode", @@ -21,8 +29,15 @@ fill_geographies <- function(data) { )) data %>% - fill_postcode_geogs() %>% - fill_gpprac_geographies() + fill_postcode_geogs( + slf_pc_lookup = read_file(get_slf_postcode_path()) + ) %>% + fill_gpprac_geographies( + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ) + ) } #' Make a postcode lookup for filling to most recent postcodes based on CHI @@ -38,8 +53,10 @@ make_postcode_lookup <- function(data) { dplyr::distinct(.data$chi, .data$postcode, .data$record_keydate2) %>% # Format postcodes to 7-character format and replace dummy with NA dplyr::mutate( - postcode = phsmethods::format_postcode(.data$postcode, format = "pc7"), - postcode = dplyr::na_if(.data$postcode, "NK010AA") + postcode = dplyr::na_if( + phsmethods::format_postcode(.data$postcode, format = "pc7"), + "NK010AA" + ) ) %>% # Drop any episodes with no postcode dplyr::filter(!is.na(.data$postcode)) %>% @@ -84,9 +101,9 @@ make_gpprac_lookup <- function(data) { return(gpprac_lookup) } -fill_postcode_geogs <- function(data) { - slf_pc_lookup <- read_file(get_slf_postcode_path()) - +fill_postcode_geogs <- function( + data, + slf_pc_lookup) { filled_postcodes <- dplyr::left_join( data, make_postcode_lookup(data), @@ -121,17 +138,20 @@ fill_postcode_geogs <- function(data) { lca = dplyr::coalesce(.data$lca, .data$lca_old), datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old) ) %>% - dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode")) + dplyr::select(!c( + "hb2018", + "hscp", + "lca_old", + "datazone2011_old", + "most_recent_postcode" + )) return(filled_postcodes) } -fill_gpprac_geographies <- function(data) { - gpprac_ref <- read_file( - get_slf_gpprac_path(), - col_select = c("gpprac", "cluster", "hbpraccode") - ) - +fill_gpprac_geographies <- function( + data, + slf_gpprac_lookup) { filled_gpprac <- dplyr::left_join( data, make_gpprac_lookup(data), @@ -145,7 +165,12 @@ fill_gpprac_geographies <- function(data) { .data$gpprac ) ) %>% - dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>% + dplyr::left_join( + slf_gpprac_lookup %>% + dplyr::select("gpprac", "cluster", "hbpraccode"), + by = "gpprac", + suffix = c("_old", "") + ) %>% dplyr::mutate( hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old) ) %>% diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R index bffa009e0..54440586c 100644 --- a/R/fix_sc_dates.R +++ b/R/fix_sc_dates.R @@ -12,7 +12,7 @@ fix_sc_start_dates <- function(start_date, period) { # financial year start_date <- dplyr::if_else( is.na(start_date), - start_fy(year = substr(period, 1L, 4L), "alternate"), + start_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"), start_date ) @@ -35,7 +35,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) { # the end of financial year end_date <- dplyr::if_else( start_date > end_date, - end_fy(year = substr(period, 1L, 4L), "alternate"), + end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"), end_date ) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index c3dd0fdf6..6096525e5 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -29,34 +29,40 @@ get_boxi_extract_path <- function( )) { type <- match.arg(type) - year_dir <- get_year_dir(year, extracts_dir = TRUE) + if (type %in% c("DN", "CMH")) { + dir <- fs::path(get_slf_dir(), "Archived_data") + } else { + dir <- get_year_dir(year, extracts_dir = TRUE) + } if (!check_year_valid(year, type)) { return(get_dummy_boxi_extract_path()) } - file_name <- dplyr::case_when( - type == "AE" ~ "A&E-episode-level-extract", - type == "AE_CUP" ~ "A&E-UCD-CUP-extract", - type == "Acute" ~ "Acute-episode-level-extract", - type == "CMH" ~ "Community-MH-contact-level-extract", - type == "DN" ~ "District-Nursing-contact-level-extract", - type == "GP_OoH-c" ~ "GP-OoH-consultations-extract", - type == "GP_OoH-d" ~ "GP-OoH-diagnosis-extract", - type == "GP_OoH-o" ~ "GP-OoH-outcomes-extract", - type == "Homelessness" ~ "Homelessness-extract", - type == "Maternity" ~ "Maternity-episode-level-extract", - type == "MH" ~ "Mental-Health-episode-level-extract", - type == "Deaths" ~ "NRS-death-registrations-extract", - type == "Outpatients" ~ "Outpatients-episode-level-extract" + file_name <- dplyr::case_match( + type, + "AE" ~ "A&E-episode-level-extract", + "AE_CUP" ~ "A&E-UCD-CUP-extract", + "Acute" ~ "Acute-episode-level-extract", + "CMH" ~ "Community-MH-contact-level-extract", + "DN" ~ "District-Nursing-contact-level-extract", + "GP_OoH-c" ~ "GP-OoH-consultations-extract", + "GP_OoH-d" ~ "GP-OoH-diagnosis-extract", + "GP_OoH-o" ~ "GP-OoH-outcomes-extract", + "Homelessness" ~ "Homelessness-extract", + "Maternity" ~ "Maternity-episode-level-extract", + "MH" ~ "Mental-Health-episode-level-extract", + "Deaths" ~ "NRS-death-registrations-extract", + "Outpatients" ~ "Outpatients-episode-level-extract" ) boxi_extract_path_csv_gz <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv.gz") ) + boxi_extract_path_csv <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv") ) diff --git a/R/get_connection_PHS_database.R b/R/get_connection_PHS_database.R index a7c99653b..0a528f45b 100644 --- a/R/get_connection_PHS_database.R +++ b/R/get_connection_PHS_database.R @@ -1,22 +1,32 @@ #' Open a connection to a PHS database #' -#' @description Opens a connection to PHS database to allow data to be collected +#' @description Opens a connection to PHS database given a Data Source Name +#' (DSN) it will try to get the username, asking for input if in an interactive +#' session. It will also use [keyring][keyring::keyring-package] to find +#' an existing keyring called 'createslf' which should contain a `db_password` +#' key with the users database password. #' -#' @param dsn The Data Source Name passed on to `odbc::dbconnect` -#' the dsn must be setup first. e.g. SMRA or DVPROD +#' @param dsn The Data Source Name (DSN) passed on to [odbc::dbConnect()] +#' the DSN must be set up first. e.g. `SMRA` or `DVPROD` #' @param username The username to use for authentication, -#' if not supplied it first will check the environment variable -#' and finally ask the user for input. +#' if not supplied it will try to find it automatically and if possible ask the +#' user for input. #' -#' @return a connection to the specified dsn +#' @return a connection to the specified Data Source. #' @export -#' -phs_db_connection <- function(dsn, username = Sys.getenv("USER")) { - # Collect username from the environment - username <- Sys.getenv("USER") +phs_db_connection <- function(dsn, username) { + if (missing(username)) { + # Collect username if possible + username <- dplyr::case_when( + Sys.info()["USER"] != "unknown" ~ Sys.info()["USER"], + Sys.getenv("USER") != "" ~ Sys.getenv("USER"), + system2("whoami", stdout = TRUE) != "" ~ system2("whoami", stdout = TRUE), + .default = NA + ) + } - # Check the username is not empty and take input if not - if (is.na(username) | username == "") { + # If the username is missing try to get input from the user + if (is.na(username)) { if (rlang::is_interactive()) { username <- rstudioapi::showPrompt( title = "Username", @@ -24,46 +34,219 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) { default = "" ) } else { - cli::cli_abort("No username found, you should supply one with {.arg username}") + cli::cli_abort( + c( + "x" = "No username found, you can use the {.arg username} argument.", + "i" = "Alternatively, add {.code USER = \"\"} to your + {.file .Renviron} file." + ) + ) } } - # TODO improve error messages and provide instructions for setting up keyring - # Add the following code to R profile. - # Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf"), - # keyring_create("createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")), - # key_set(keyring = "createslf", service = "db_password") + # Check the status of keyring + # Does the 'createslf' keyring exist + keyring_exists <- "createslf" %in% keyring::keyring_list()[["keyring"]] - if (!("createslf" %in% keyring::keyring_list()[["keyring"]])) { - cli::cli_abort("The {.val createslf} keyring does not exist.") + # Does the 'db_password' key exist in the 'createslf' keyring + if (keyring_exists) { + key_exists <- "db_password" %in% keyring::key_list(keyring = "createslf")[["service"]] + } else { + key_exists <- FALSE } - if (!("db_password" %in% keyring::key_list(keyring = "createslf")[["service"]])) { - cli::cli_abort("{.val db_password} is missing from the {.val createslf} keyring.") - } + # Does the 'CREATESLF_KEYRING_PASS' environment variable exist + env_var_pass_exists <- Sys.getenv("CREATESLF_KEYRING_PASS") != "" - if (Sys.getenv("CREATESLF_KEYRING_PASS") == "") { - cli::cli_abort("You must have the password to unlock the {.val createslf} keyring in your environment as - {.envvar CREATESLF_KEYRING_PASS}. Please set this up in your {.file .Renviron} or {.file .Rprofile}") + if (!all(keyring_exists, key_exists, env_var_pass_exists)) { + if (rlang::is_interactive()) { + setup_keyring( + keyring = "createslf", + key = "db_password", + keyring_exists = keyring_exists, + key_exists = key_exists, + env_var_pass_exists = env_var_pass_exists + ) + } else { + if (any(keyring_exists, key_exists, env_var_pass_exists)) { + cli::cli_abort( + c( + "x" = "Your keyring needs to be set up, run:", + "{.code setup_keyring(keyring = \"createslf\", key = \"db_password\", + keyring_exists = {keyring_exists}, key_exists = {key_exists}, + env_var_pass_exists = {env_var_pass_exists})}" + ) + ) + } else { + cli::cli_abort( + c( + "x" = "Your keyring needs to be set up, run:", + "{.code setup_keyring(keyring = \"createslf\", + key = \"db_password\")}" + ) + ) + } + } } - keyring::keyring_unlock(keyring = "createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")) - - if (keyring::keyring_is_locked(keyring = "createslf")) { - cli::cli_abort("Keyring is locked. To unlock createslf keyring, please use {.fun keyring::keyring_unlock}") + if (env_var_pass_exists) { + keyring::keyring_unlock( + keyring = "createslf", + password = Sys.getenv("CREATESLF_KEYRING_PASS") + ) + } else { + keyring::keyring_unlock( + keyring = "createslf", + password = rstudioapi::askForPassword( + prompt = "Enter the password for the keyring you just created." + ) + ) } # Create the connection - password_text <- stringr::str_glue("{dsn} password for user: {username}") db_connection <- odbc::dbConnect( odbc::odbc(), dsn = dsn, uid = username, - pwd = keyring::key_get(keyring = "createslf", service = "db_password") + pwd = keyring::key_get( + keyring = "createslf", + service = "db_password" + ) ) keyring::keyring_lock(keyring = "createslf") return(db_connection) } + +#' Interactively set up the keyring +#' +#' @description +#' This is meant to be used with [phs_db_connection()], it can only be used +#' interactively i.e. not in targets or in a workbench job. +#' +#' With the default options it will go through the steps to set up a keyring +#' which can be used to supply passwords to [odbc::dbConnect()] (or others) in a +#' secure and seamless way. +#' +#' 1. Create an .Renviron file in the project and add a password (for the +#' keyring) to it. +#' 2. Create a keyring with the password - Since we have saved the password as +#' an environment variable it can be picked unlocked and used automatically. +#' 3. Add the database password to the keyring. +#' +#' +#' @param keyring Name of the keyring +#' @param key Name of the key +#' @param keyring_exists Does the keyring already exist +#' @param key_exists Does the key already exist +#' @param env_var_pass_exists Does the password for the keyring already exist +#' in the environment. +#' +#' @return NULL (invisibly) +#' @export +setup_keyring <- function( + keyring = "createslf", + key = "db_password", + keyring_exists = FALSE, + key_exists = FALSE, + env_var_pass_exists = FALSE) { + # First we need the password as an environment variable + if (!env_var_pass_exists) { + if (Sys.getenv("CREATESLF_KEYRING_PASS") != "") { + cli::cli_alert_warning( + "{.env CREATESLF_KEYRING_PASS} already exists in the environment, you + will need to clean this up manually if it's not correct." + ) + keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS") + } else if ( + any(stringr::str_detect( + readr::read_lines(".Renviron"), + "^CREATESLF_KEYRING_PASS\\s*?=\\s*?['\"].+?['\"]$" + )) + + ) { + cli::cli_abort( + "Your {.file .Renviron} file looks ok, try restarting your session." + ) + } else { + keyring_password <- rstudioapi::askForPassword( + prompt = stringr::str_glue( + "Enter a password for the '{keyring}' keyring, this should + not be your LDAP / database password." + ) + ) + if (is.null(keyring_password)) { + cli::cli_abort("No keyring password entered.") + } + if (!fs::file_exists(".Renviron")) { + cli::cli_alert_success("Creating an {.file .Renviron} file.") + } + + renviron_text <- stringr::str_glue( + "CREATESLF_KEYRING_PASS = \"{keyring_password}\"" + ) + + readr::write_lines( + x = renviron_text, + file = ".Renviron", + append = TRUE + ) + + cli::cli_alert_success( + "Added {.code {renviron_text}} to the {.file .Renviron} file." + ) + + cli::cli_alert_info("You will need to restart your R session.") + } + } else { + keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS") + } + + # If the keyring doesn't exist create it now. + if (!keyring_exists) { + if (keyring %in% keyring::keyring_list()[["keyring"]]) { + cli::cli_alert_warning( + "The {keyring} keyring already exists, you will be asked to + overwrite it." + ) + } + keyring::keyring_create( + keyring = keyring, + password = keyring_password + ) + + cli::cli_alert_success( + "Created the '{keyring}' keyring with {.fun keyring::keyring_create}." + ) + } + + # If we just created the keyring it will already be unlocked + keyring::keyring_unlock( + keyring = keyring, + password = keyring_password + ) + + # Now add the password to the keyring + if (!key_exists) { + keyring::key_set( + keyring = keyring, + service = key, + prompt = "Enter you LDAP password for database connections." + ) + + cli::cli_alert_success( + "Added the '{key}' key to the '{keyring}' keyring with + {.fun keyring::keyring_set}." + ) + } + + keyring::keyring_lock(keyring = keyring) + + cli::cli_alert_success( + "The keyring should now be set up correctly." + ) + + return(invisible(NULL)) +} diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R index a242aee42..9e7d06dcd 100644 --- a/R/get_existing_data_for_tests.R +++ b/R/get_existing_data_for_tests.R @@ -8,13 +8,16 @@ #' new data which the SLF data will be compared to. #' @param file_version whether to test against the "episode" file (the default) #' or the "individual" file. +#' @param anon_chi Default set as FALSE. For use in episode tests where +#' we want anon_chi instead of chi. #' #' @return a [tibble][tibble::tibble-package] from the #' SLF with the relevant recids and variables. #' #' @family test functions #' @seealso produce_source_extract_tests -get_existing_data_for_tests <- function(new_data, file_version = "episode") { +#' @export +get_existing_data_for_tests <- function(new_data, file_version = "episode", anon_chi = FALSE) { file_version <- match.arg(file_version, c("episode", "individual")) year <- new_data %>% @@ -32,6 +35,9 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { "anon_chi", dplyr::intersect(slfhelper::ep_file_vars, tolower(names(new_data))) ) + if ("hscp" %in% names(new_data)) { + variable_names <- c("hscp2018", variable_names) + } } else if (file_version == "individual") { variable_names <- c( "anon_chi", @@ -43,14 +49,21 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { slf_data <- suppressMessages(slfhelper::read_slf_episode( year = year, recids = recids, - columns = variable_names + col_select = variable_names )) } else { slf_data <- suppressMessages(slfhelper::read_slf_individual( year = year, - columns = variable_names + col_select = variable_names )) } - return(slfhelper::get_chi(slf_data)) + if (anon_chi == FALSE) { + slf_data <- slf_data %>% + slfhelper::get_chi() + } else { + slf_data <- slf_data + } + + return(slf_data) } diff --git a/R/get_file_paths.R b/R/get_file_paths.R index 229bebf39..b65015e5c 100644 --- a/R/get_file_paths.R +++ b/R/get_file_paths.R @@ -72,7 +72,7 @@ find the latest file with {.arg file_name_regexp}", } if (!fs::file_exists(file_path) && check_mode != "exists") { - if (is.null(create) && check_mode == "write" | + if (is.null(create) && check_mode == "write" || !is.null(create) && create == TRUE) { # The file doesn't exist but we do want to create it fs::file_create(file_path) diff --git a/R/get_fy_dates.R b/R/get_fy_dates.R index 1a4bf6f45..257a14488 100644 --- a/R/get_fy_dates.R +++ b/R/get_fy_dates.R @@ -20,9 +20,9 @@ start_fy <- function(year, format = c("fyyear", "alternate")) { format <- match.arg(format) if (format == "fyyear") { - start_fy <- lubridate::make_date(convert_fyyear_to_year(year), 4, 1) + start_fy <- lubridate::make_date(convert_fyyear_to_year(year), 4L, 1L) } else if (format == "alternate") { - start_fy <- lubridate::make_date(year, 4, 1) + start_fy <- lubridate::make_date(year, 4L, 1L) } return(start_fy) @@ -47,14 +47,14 @@ end_fy <- function(year, format = c("fyyear", "alternate")) { format <- "fyyear" } - year <- as.numeric(paste0("20", substr(year, 3, 4))) + year <- as.numeric(paste0("20", stringr::str_sub(year, 3L, 4L))) format <- match.arg(format) if (format == "fyyear") { - end_fy <- lubridate::make_date(year, 3, 31) + end_fy <- lubridate::make_date(year, 3L, 31L) } else if (format == "alternate") { - end_fy <- lubridate::make_date(year + 1L, 3, 31) + end_fy <- lubridate::make_date(year + 1L, 3L, 31L) } return(end_fy) @@ -85,9 +85,9 @@ midpoint_fy <- function(year, format = c("fyyear", "alternate")) { check_year_format(year, format = "fyyear") if (format == "fyyear") { - midpoint_fy <- lubridate::make_date(convert_fyyear_to_year(year), 9, 30) + midpoint_fy <- lubridate::make_date(convert_fyyear_to_year(year), 9L, 30L) } else if (format == "alternate") { - midpoint_fy <- lubridate::make_date(year, 9, 30) + midpoint_fy <- lubridate::make_date(year, 9L, 30L) } return(midpoint_fy) @@ -113,7 +113,7 @@ next_fy <- function(year, format = c("fyyear", "alternate")) { check_year_format(year, format = "fyyear") - fy <- as.integer(substr(year, 1, 2)) + fy <- as.integer(stringr::str_sub(year, 1L, 2L)) next_fy <- paste0(fy + 1L, fy + 2L) diff --git a/R/get_it_extract_paths.R b/R/get_it_extract_paths.R index 4e44807b4..3c4dc54c3 100644 --- a/R/get_it_extract_paths.R +++ b/R/get_it_extract_paths.R @@ -104,10 +104,10 @@ get_it_prescribing_path <- function(year, it_reference = NULL, ...) { check_it_reference <- function(it_reference) { if (stringr::str_starts(it_reference, stringr::fixed("SCTASK"))) { # If the 'full' reference has been supplied trim to just the number - it_reference <- stringr::str_sub(it_reference, start = 7, end = 14) + it_reference <- stringr::str_sub(it_reference, start = 7L, end = 14L) } - if (stringr::str_detect(it_reference, "[0-9]{7}", negate = TRUE)) { + if (stringr::str_detect(it_reference, "^[0-9]{7}$", negate = TRUE)) { cli::cli_abort( c("x" = "{.arg it_reference} must be exactly 7 numbers."), call = rlang::caller_env() diff --git a/R/get_sc_demog_path.R b/R/get_sc_demog_path.R deleted file mode 100644 index 89658c356..000000000 --- a/R/get_sc_demog_path.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Social Care Demographic Lookup File Path -#' -#' @description Get the file path for the Social Care Demographic lookup file -#' -#' @param update The update month to use, -#' defaults to [latest_update()] -#' -#' @param ... additional arguments passed to [get_file_path()] -#' -#' @return The path to the social care demographic file -#' as an [fs::path()] -#' @export -#' @family social care lookup file paths -#' @seealso [get_file_path()] for the generic function. -get_sc_demog_lookup_path <- function(update = latest_update(), ...) { - sc_demog_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), - file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"), - ... - ) - - return(sc_demog_lookup_path) -} diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R new file mode 100644 index 000000000..5add38b08 --- /dev/null +++ b/R/get_sc_lookup_paths.R @@ -0,0 +1,48 @@ +#' Social Care Demographic Lookup File Path +#' +#' @description Get the file path for the Social Care Demographic lookup file +#' +#' @param update The update month to use, +#' defaults to [latest_update()] +#' +#' @param ... additional arguments passed to [get_file_path()] +#' +#' @return The path to the social care demographic file +#' as an [fs::path()] +#' @export +#' @family social care lookup file paths +#' @seealso [get_file_path()] for the generic function. +get_sc_demog_lookup_path <- function(update = latest_update(), ...) { + sc_demog_lookup_path <- get_file_path( + directory = fs::path(get_slf_dir(), "Social_care"), + file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"), + ... + ) + + return(sc_demog_lookup_path) +} + +#' Social Care Client Lookup File Path +#' +#' @description Get the file path for the Social Care Client lookup file +#' +#' @param year Financial year. +#' @param update The update month to use, +#' defaults to [latest_update()] +#' +#' @param ... additional arguments passed to [get_file_path()] +#' +#' @return The path to the social care demographic file +#' as an [fs::path()] +#' @export +#' @family social care lookup file paths +#' @seealso [get_file_path()] for the generic function. +get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { + sc_client_lookup_path <- get_file_path( + directory = fs::path(get_slf_dir(), "Social_care"), + file_name = stringr::str_glue("sc_client_lookup_{year}_{update}.parquet"), + ... + ) + + return(sc_client_lookup_path) +} diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R index 89c6dc0b4..cbd3fd46e 100644 --- a/R/get_source_extract_path.R +++ b/R/get_source_extract_path.R @@ -10,57 +10,63 @@ #' @export #' #' @family extract file paths -get_source_extract_path <- function(year, - type = c( - "Acute", - "AE", - "AT", - "CH", - "Client", - "CMH", - "DD", - "Deaths", - "DN", - "GPOoH", - "HC", - "Homelessness", - "Maternity", - "MH", - "Outpatients", - "PIS", - "SDS" - ), - ...) { +get_source_extract_path <- function( + year, + type = c( + "Acute", + "AE", + "AT", + "CH", + "CMH", + "DD", + "Deaths", + "DN", + "GPOoH", + "HC", + "Homelessness", + "Maternity", + "MH", + "Outpatients", + "PIS", + "SDS" + ), + ...) { + if (year %in% type) { + cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.") + } + + year <- check_year_format(year) + type <- match.arg(type) if (!check_year_valid(year, type)) { - return(NA) + return(get_dummy_boxi_extract_path()) } - file_name <- dplyr::case_when( - type == "Acute" ~ "acute_for_source", - type == "AE" ~ "a&e_for_source", - type == "AT" ~ "Alarms-Telecare-for-source", - type == "CH" ~ "care_home_for_source", - type == "CMH" ~ "CMH_for_source", - type == "Client" ~ "client_for_source", - type == "DD" ~ "DD_for_source", - type == "Deaths" ~ "deaths_for_source", - type == "DN" ~ "DN_for_source", - type == "GPOoH" ~ "GP_OOH_for_source", - type == "HC" ~ "Home_Care_for_source", - type == "Homelessness" ~ "homelessness_for_source", - type == "Maternity" ~ "maternity_for_source", - type == "MH" ~ "mental_health_for_source", - type == "DD" ~ "DD_for_source", - type == "Outpatients" ~ "outpatients_for_source", - type == "PIS" ~ "prescribing_file_for_source", - type == "SDS" ~ "SDS-for-source" - ) + file_name <- dplyr::case_match( + type, + "Acute" ~ "acute_for_source", + "AE" ~ "a_and_e_for_source", + "AT" ~ "alarms-telecare-for-source", + "CH" ~ "care_home_for_source", + "CMH" ~ "cmh_for_source", + "DD" ~ "delayed_discharge_for_source", + "Deaths" ~ "deaths_for_source", + "DN" ~ "district_nursing_for_source", + "GPOoH" ~ "gp_ooh_for_source", + "HC" ~ "home_care_for_source", + "Homelessness" ~ "homelessness_for_source", + "Maternity" ~ "maternity_for_source", + "MH" ~ "mental_health_for_source", + "Outpatients" ~ "outpatients_for_source", + "PIS" ~ "prescribing_for_source", + "SDS" ~ "sds_for_source" + ) %>% + stringr::str_glue("-{year}.parquet") source_extract_path <- get_file_path( directory = get_year_dir(year), - file_name = stringr::str_glue("{file_name}-20{year}.parquet"), + file_name = file_name, ... ) diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R index 2fd1a69f9..157160ed4 100644 --- a/R/get_sparra_hhg_paths.R +++ b/R/get_sparra_hhg_paths.R @@ -10,6 +10,10 @@ #' @family extract file paths #' @seealso [get_file_path()] for the generic function. get_hhg_path <- function(year, ...) { + if (!check_year_valid(year, "HHG")) { + return(get_dummy_boxi_extract_path()) + } + hhg_file_path <- get_file_path( directory = fs::path(get_slf_dir(), "HHG"), file_name = stringr::str_glue("HHG-20{year}.parquet"), @@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) { #' @family extract file paths #' @seealso [get_file_path()] for the generic function. get_sparra_path <- function(year, ...) { + if (!check_year_valid(year, "SPARRA")) { + return(get_dummy_boxi_extract_path()) + } + sparra_file_path <- get_file_path( directory = fs::path(get_slf_dir(), "SPARRA"), file_name = stringr::str_glue("SPARRA-20{year}.parquet"), diff --git a/R/get_temp_file_paths.R b/R/get_temp_file_paths.R deleted file mode 100644 index 2a6bcbaee..000000000 --- a/R/get_temp_file_paths.R +++ /dev/null @@ -1,117 +0,0 @@ -#' Get a temporary version of the SLF -#' -#' @param year The financial year -#' @param temp_version The temp version e.g. 1 or 7 -#' @param file_version Episode or Individual file -#' -#' @return The path to the file (`.rds`) -get_slf_temp_path <- - function(year, - temp_version, - file_version = c("episode", "individual")) { - year <- check_year_format(year) - file_version <- match.arg(file_version) - - base_dir <- fs::path( - "/", - "conf", - "sourcedev", - "Source_Linkage_File_Updates" - ) - - year_dir <- fs::path(base_dir, year) - - temp_files_available <- fs::dir_ls(year_dir, - glob = "*temp-*" - ) %>% - stringr::str_match( - pattern = stringr::str_glue( - "temp-source-{file_version}-file-(?[1-9])-{year}\\.rds" - ) - ) %>% - magrittr::extract(, "version") - - temp_files_available <- - temp_files_available[!is.na(temp_files_available)] - - if (length(temp_files_available) == 0L) { - years_available <- fs::dir_ls( - base_dir, - recurse = TRUE, - glob = stringr::str_glue("*temp-source-{file_version}*") - ) %>% - stringr::str_match( - pattern = stringr::str_glue( - "temp-source-{file_version}-file-[1-9]-(?[0-9]{{4}})\\.rds" - ) - ) %>% - magrittr::extract(, "year") %>% - unique() - - years_formatted <- - cli::cli_vec(years_available[!is.na(years_available)], - style = list("vec-last" = " or ") - ) - - cli::cli_abort( - c( - "No temp {file_version} files for {.val {year}}", - "{cli::qty(years_available)}{?There is only/You can choose from} {.val {years_formatted}}." - ), - call = rlang::caller_env() - ) - } - - if (!(temp_version %in% temp_files_available)) { - temp_files_formatted <- cli::cli_vec(temp_files_available, - style = list("vec-last" = " or ") - ) - - cli::cli_abort( - c( - "Temp {file_version} file {.val {temp_version}} isn't available for {.val {year}}.", - "{cli::qty(temp_files_available)}{?There is only/You can choose from} {.val {temp_files_formatted}}." - ), - call = rlang::caller_env() - ) - } - - # Do check to see which temp versions exist for the given year - # Return nice error if it doesn't work - - file_name <- - stringr::str_glue("temp-source-{file_version}-file-{temp_version}-{year}.rds") - - file_path <- get_file_path( - directory = year_dir, - file_name = file_name - ) - - return(file_path) - } - -#' Get a temporary version of the SLF episode file -#' -#' @inherit get_slf_temp_path -#' -#' @export -get_slf_ep_temp_path <- function(year, temp_version) { - get_slf_temp_path( - year = year, - temp_version = temp_version, - file_version = "episode" - ) -} - -#' Get a temporary version of the SLF individual file -#' -#' @inherit get_slf_temp_path -#' -#' @export -get_slf_indiv_temp_path <- function(year, temp_version) { - get_slf_temp_path( - year = year, - temp_version = temp_version, - file_version = "individual" - ) -} diff --git a/R/gzip_files.R b/R/gzip_files.R index b6cc0a2b0..9a665fbc0 100644 --- a/R/gzip_files.R +++ b/R/gzip_files.R @@ -17,7 +17,7 @@ gzip_files <- function( ) n_unzipped_files <- length(unzipped_files) - if (n_unzipped_files > 0) { + if (n_unzipped_files > 0L) { cli::cli_inform(c( "i" = "{cli::qty(n_unzipped_files)}There {?is/are} {n_unzipped_files} uncompressed file{?s} for {year}, which will be compressed with diff --git a/R/is_date_in_fyyear.R b/R/is_date_in_fyyear.R index 44e816893..924e21e74 100644 --- a/R/is_date_in_fyyear.R +++ b/R/is_date_in_fyyear.R @@ -43,7 +43,7 @@ is_date_in_fyyear <- function(fyyear, date, date_end = NULL) { } # Check that date_end always comes after date (or all date_end is NA) - if (any(date > date_end, na.rm = TRUE) & !all(is.na(date_end))) { + if (any(date > date_end, na.rm = TRUE) && !all(is.na(date_end))) { first_error <- which.max(date > date_end) cli::cli_abort( diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 694d2e2b9..89bcbbe13 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -2,16 +2,14 @@ #' #' @param data Episode file data #' @param year financial year, e.g. '1920' -#' @param slf_deaths_lookup_path Path to slf deaths lookup. +#' @param slf_deaths_lookup The SLF deaths lookup. #' #' @return The data including the deaths lookup matched #' on to the episode file. join_deaths_data <- function( data, year, - slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) { - slf_deaths_lookup <- read_file(slf_deaths_lookup_path) - + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) { return( data %>% dplyr::left_join( diff --git a/R/last_date_month.R b/R/last_date_month.R index 979970f87..0fddacc81 100644 --- a/R/last_date_month.R +++ b/R/last_date_month.R @@ -11,5 +11,5 @@ #' #' @family date functions last_date_month <- function(date) { - return(lubridate::ceiling_date(date, "month") - lubridate::days(1)) + return(lubridate::ceiling_date(date, "month") - lubridate::days(1L)) } diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index 637e5b6c7..f83f31325 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -5,19 +5,23 @@ #' #' @param data episode files #' @param year financial year, e.g. '1920' +#' @param ltc_data The LTC data for the year #' #' @return data matched with long term conditions -match_on_ltcs <- function(data, year) { +match_on_ltcs <- function( + data, + year, + ltc_data = read_file(get_ltcs_path(year))) { # Match on LTC lookup matched <- dplyr::left_join( data, - read_file(get_ltcs_path(year)), + ltc_data, by = "chi", suffix = c("", "_ltc") ) %>% dplyr::mutate( # Replace any NA values with 0 for the LTC flags - dplyr::across("arth":"digestive", ~ tidyr::replace_na(., 0)), + dplyr::across("arth":"digestive", ~ tidyr::replace_na(.x, 0L)), # Use the postcode from the LTC file if it's otherwise missing postcode = dplyr::if_else(is.na(.data$postcode), .data$postcode_ltc, diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R index 2efc25dd8..5d97d705f 100644 --- a/R/process_costs_rmd.R +++ b/R/process_costs_rmd.R @@ -14,7 +14,8 @@ process_costs_rmd <- function(file_name) { stringr::fixed("Rmd", ignore_case = TRUE) )) { cli::cli_abort( - "{.arg file_name} must be an {.code .Rmd} not a {.code .{fs::path_ext(file_name)}}." + "{.arg file_name} must be an {.code .Rmd} not a + {.code .{fs::path_ext(file_name)}}." ) } @@ -34,7 +35,12 @@ process_costs_rmd <- function(file_name) { output_file <- get_file_path( directory = output_dir, - file_name = fs::path_ext_set(stringr::str_glue("{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"), "html"), + file_name = fs::path_ext_set( + stringr::str_glue( + "{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}" + ), + "html" + ), check_mode = "write" ) @@ -55,7 +61,7 @@ process_costs_rmd <- function(file_name) { #' #' @description This will read and process the #' District Nursing cost lookup, it will return the final data -#' but also write this out as a rds. +#' and write it to disk. #' #' @param file_path Path to the cost lookup. #' @@ -73,7 +79,7 @@ process_costs_dn_rmd <- function(file_path = get_dn_costs_path()) { #' #' @description This will read and process the #' care homes cost lookup, it will return the final data -#' but also write this out as a rds. +#' and write it to disk. #' #' @inheritParams process_costs_dn_rmd #' @@ -91,7 +97,7 @@ process_costs_ch_rmd <- function(file_path = get_ch_costs_path()) { #' #' @description This will read and process the #' GP ooh cost lookup, it will return the final data -#' but also write this out as a rds. +#' and write it to disk. #' #' @inheritParams process_costs_dn_rmd #' @@ -109,7 +115,7 @@ process_costs_gp_ooh_rmd <- function(file_path = get_gp_ooh_costs_path()) { #' #' @description This will read and process the #' Home Care cost lookup, it will return the final data -#' but also write this out as a rds. +#' and write it to disk. #' #' @inheritParams process_costs_dn_rmd #' diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index 7d47d0ef4..70ff29370 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' acute extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -53,17 +53,15 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { convert_monthly_rows_to_vars(.data$costmonthnum, .data$cost_total_net, .data$yearstay) %>% # add yearstay and cost_total_net variables dplyr::mutate( - yearstay = rowSums(dplyr::across(tidyselect::ends_with("_beddays"))), - cost_total_net = rowSums(dplyr::across(tidyselect::ends_with("_cost"))) + yearstay = rowSums(dplyr::pick(tidyselect::ends_with("_beddays"))), + cost_total_net = rowSums(dplyr::pick(tidyselect::ends_with("_cost"))) ) %>% # Add oldtadm as a factor with labels dplyr::mutate(oldtadm = factor(.data$oldtadm, levels = 0L:8L )) - - ## save outfile --------------------------------------- - outfile <- acute_clean %>% + acute_processed <- acute_clean %>% dplyr::select( "year", "recid", @@ -113,10 +111,11 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { dplyr::arrange(.data$chi, .data$record_keydate1) if (write_to_disk) { - # Save as rds file - outfile %>% - write_file(get_source_extract_path(year, "Acute", check_mode = "write")) + write_file( + acute_processed, + get_source_extract_path(year, "Acute", check_mode = "write") + ) } - return(outfile) + return(acute_processed) } diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R index 1be6efe39..95dfd99be 100644 --- a/R/process_extract_ae.R +++ b/R/process_extract_ae.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' A&E extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -35,6 +35,10 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { .data$postcode_chi, .data$postcode_epi )) %>% + # A&E data has postcode in PC8 format but we need it in PC7 format + dplyr::mutate( + postcode = phsmethods::format_postcode(.data$postcode, "pc7") + ) %>% ## recode cypher HB codes ## dplyr::mutate( dplyr::across(c("hbtreatcode", "hbrescode"), ~ dplyr::case_when( @@ -233,9 +237,7 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { .data$keytime2 ) - - # Save outfile---------------------------------------- - outfile <- matched_ae_data %>% + ae_processed <- matched_ae_data %>% dplyr::select( "year", "recid", @@ -290,10 +292,11 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { ) if (write_to_disk) { - # Save as rds file - outfile %>% - write_file(get_source_extract_path(year, "AE", check_mode = "write")) + write_file( + ae_processed, + get_source_extract_path(year, "AE", check_mode = "write") + ) } - return(outfile) + return(ae_processed) } diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index 15cd79809..9a0745a04 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' (year specific) Alarms Telecare extract, it will return the final data -#' but also write this out as rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_extract_care_home #' diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index 560011f84..cbf6d417c 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' (year specific) Care Home extract, it will return the final data -#' but also write this out as rds. +#' and (optionally) write it to disk. #' #' @param data The full processed data which will be selected from to create #' the year specific data. @@ -43,7 +43,7 @@ process_extract_care_home <- function( ) %>% # remove any episodes where the latest submission was before the current year dplyr::filter( - substr(.data$sc_latest_submission, 1, 4) >= convert_fyyear_to_year(year) + substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year) ) %>% # Match to client data dplyr::left_join( diff --git a/R/process_extract_cmh.R b/R/process_extract_cmh.R index 0b1ba0f19..a2adad75e 100644 --- a/R/process_extract_cmh.R +++ b/R/process_extract_cmh.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' CMH extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -44,9 +44,7 @@ process_extract_cmh <- function(data, # create blank diag 6 dplyr::mutate(diag6 = NA) - # Outfile -------------------------------------------- - - outfile <- cmh_clean %>% + cmh_processed <- cmh_clean %>% dplyr::select( "year", "recid", @@ -73,10 +71,11 @@ process_extract_cmh <- function(data, ) if (write_to_disk) { - # Save as rds file - outfile %>% - write_file(get_source_extract_path(year, "CMH", check_mode = "write")) + write_file( + cmh_processed, + get_source_extract_path(year, "CMH", check_mode = "write") + ) } - return(outfile) + return(cmh_processed) } diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R index 7262c1df6..6dc175cb8 100644 --- a/R/process_extract_consultations.R +++ b/R/process_extract_consultations.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' GP OOH Consultations extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. diff --git a/R/process_extract_delayed_discharges.R b/R/process_extract_delayed_discharges.R index 29f37eb29..3c56807f9 100644 --- a/R/process_extract_delayed_discharges.R +++ b/R/process_extract_delayed_discharges.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' delayed discharges extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R index a1b3bf816..9d1df62a6 100644 --- a/R/process_extract_district_nursing.R +++ b/R/process_extract_district_nursing.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' District Nursing extract, it will return the final data -#' but also write this out an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index 4add41cfa..2b536878a 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' GP OoH extract, it will return the final data -#' but also write this out an rds. +#' and (optionally) write it to disk. #' #' @param year The year to process, in FY format. #' @param data_list A list containing the extracts. diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index 382521b5d..874ad899c 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' (year specific) Home Care extract, it will return the final data -#' but also write this out as rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_extract_care_home #' @@ -30,9 +30,13 @@ process_extract_home_care <- function( hc_data <- data %>% # select episodes for FY - dplyr::filter(is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2)) %>% + dplyr::filter( + is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2) + ) %>% # remove any episodes where the latest submission was before the current year - dplyr::filter(substr(.data$sc_latest_submission, 1, 4) >= convert_fyyear_to_year(year)) %>% + dplyr::filter( + substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year) + ) %>% # Match to client data dplyr::left_join(client_lookup, by = c("sending_location", "social_care_id")) %>% dplyr::mutate(year = year) diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 36528cfa7..f4fb7d3e5 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' homelessness extract, it will return the final data -#' and optionally write it out as rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process from [read_extract_homelessness()]. #' @param year The year to process, in FY format. @@ -43,7 +43,7 @@ process_extract_homelessness <- function( ) %>% dplyr::mutate( dplyr::across( - c("financial_difficulties_debt_unemployment":"refused"), + "financial_difficulties_debt_unemployment":"refused", ~ tidyr::replace_na(.x, 9L) ), hl1_reason_ftm = paste0( @@ -146,13 +146,14 @@ process_extract_homelessness <- function( ) if (write_to_disk) { - final_data %>% - write_file(get_file_path( - get_year_dir(year), - stringr::str_glue("homelessness_for_source-20{year}"), - ext = "rds", + write_file( + final_data, + get_source_extract_path( + year = year, + type = "Homelessness", check_mode = "write" - )) + ) + ) } return(final_data) diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R index 04fa46ced..64fa4e205 100644 --- a/R/process_extract_maternity.R +++ b/R/process_extract_maternity.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' maternity extract, it will return the final data -#' but also write this out an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -63,10 +63,7 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) { ) ) - - # Save outfile------------------------------------------------ - - outfile <- maternity_clean %>% + maternity_processed <- maternity_clean %>% dplyr::select( "year", "recid", @@ -113,12 +110,11 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) { dplyr::arrange(.data$chi, .data$record_keydate1) if (write_to_disk) { - # Save as rds file - outfile %>% - write_file( - get_source_extract_path(year, "Maternity", check_mode = "write") - ) + write_file( + maternity_processed, + get_source_extract_path(year, "Maternity", check_mode = "write") + ) } - return(outfile) + return(maternity_processed) } diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R index 108c14c61..ffea63d28 100644 --- a/R/process_extract_mental_health.R +++ b/R/process_extract_mental_health.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' mental health extract, it will return the final data -#' but also write this out an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -32,8 +32,10 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) { dplyr::mutate(gpprac = convert_eng_gpprac_to_dummy(.data$gpprac)) %>% # cij_ipdc dplyr::mutate( - cij_ipdc = dplyr::if_else(.data$cij_inpatient == "MH", "I", "NA"), - cij_ipdc = dplyr::na_if(.data$cij_ipdc, "NA") + cij_ipdc = dplyr::na_if( + dplyr::if_else(.data$cij_inpatient == "MH", "I", "NA"), + "NA" + ) ) %>% dplyr::select(-.data$cij_inpatient) %>% # cij_admtype recode unknown to 99 @@ -52,9 +54,9 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) { ) %>% dplyr::mutate( # yearstay - yearstay = rowSums(dplyr::across(tidyselect::ends_with("_beddays"))), + yearstay = rowSums(dplyr::pick(tidyselect::ends_with("_beddays"))), # cost total net - cost_total_net = rowSums(dplyr::across(tidyselect::ends_with("_cost"))), + cost_total_net = rowSums(dplyr::pick(tidyselect::ends_with("_cost"))), # total length of stay stay = calculate_stay( .data$year, @@ -65,10 +67,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) { smrtype = add_smr_type(.data$recid) ) - - # Outfile --------------------------------------- - - outfile <- mh_clean %>% + mh_processed <- mh_clean %>% dplyr::arrange(.data$chi, .data$record_keydate1) %>% dplyr::select( "year", @@ -116,10 +115,11 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) { ) if (write_to_disk) { - outfile %>% - # Save as rds file - write_file(get_source_extract_path(year, "MH", check_mode = "write")) + write_file( + mh_processed, + get_source_extract_path(year, "MH", check_mode = "write") + ) } - return(outfile) + return(mh_processed) } diff --git a/R/process_extract_ooh_diagnosis.R b/R/process_extract_ooh_diagnosis.R index 128c6c772..f2afd634e 100644 --- a/R/process_extract_ooh_diagnosis.R +++ b/R/process_extract_ooh_diagnosis.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' GP OOH Diagnosis extract, it will return the final data -#' but also write this out an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. diff --git a/R/process_extract_ooh_outcomes.R b/R/process_extract_ooh_outcomes.R index 6a14bced5..f188e6de1 100644 --- a/R/process_extract_ooh_outcomes.R +++ b/R/process_extract_ooh_outcomes.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' GP OOH Outcomes extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. diff --git a/R/process_extract_outpatients.R b/R/process_extract_outpatients.R index 39b421ab4..341ee0f1a 100644 --- a/R/process_extract_outpatients.R +++ b/R/process_extract_outpatients.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' outpatients extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -49,11 +49,7 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) { ) ) - - ## save outfile --------------------------------------- - - outfile <- - outpatients_clean %>% + outpatients_processed <- outpatients_clean %>% dplyr::select( "year", "recid", @@ -89,12 +85,11 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) { ) if (write_to_disk) { - # Save as rds file - outfile %>% - write_file( - get_source_extract_path(year, "Outpatients", check_mode = "write") - ) + write_file( + outpatients_processed, + get_source_extract_path(year, "Outpatients", check_mode = "write") + ) } - return(outfile) + return(outpatients_processed) } diff --git a/R/process_extract_prescribing.R b/R/process_extract_prescribing.R index 776299d47..68c388b83 100644 --- a/R/process_extract_prescribing.R +++ b/R/process_extract_prescribing.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' prescribing extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process, in FY format. @@ -50,9 +50,10 @@ process_extract_prescribing <- function(data, year, write_to_disk = TRUE) { } if (write_to_disk) { - # Save as rds file - pis_clean %>% - write_file(get_source_extract_path(year, "PIS", check_mode = "write")) + write_file( + pis_clean, + get_source_extract_path(year, "PIS", check_mode = "write") + ) } return(pis_clean) diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index a58651749..bd9e93a3f 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' (year specific) SDS extract, it will return the final data -#' but also write this out as rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_extract_care_home #' diff --git a/R/process_lookup_gpprac.R b/R/process_lookup_gpprac.R index 45773613f..2afe1affd 100644 --- a/R/process_lookup_gpprac.R +++ b/R/process_lookup_gpprac.R @@ -12,10 +12,11 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_lookup_gpprac <- function(open_data = get_gpprac_opendata(), - gpprac_ref_path = get_gpprac_ref_path(), - spd_path = get_spd_path(), - write_to_disk = TRUE) { +process_lookup_gpprac <- function( + open_data = get_gpprac_opendata(), + gpprac_ref_path = get_gpprac_ref_path(), + spd_path = get_spd_path(), + write_to_disk = TRUE) { gpprac_ref_file <- read_file(path = gpprac_ref_path) %>% dplyr::select( "gpprac" = "praccode", @@ -65,15 +66,11 @@ process_lookup_gpprac <- function(open_data = get_gpprac_opendata(), ) %>% dplyr::mutate( lca = convert_ca_to_lca(.data$ca2018), - hbpraccode = dplyr::if_else( - .data$gpprac %in% c(99942L, 99957L, 99961L, 99981L, 99999L), - "S08200003", - .data$hbpraccode - ), - hbpraccode = dplyr::if_else( - .data$gpprac == 99995L, - "S08200001", - .data$hbpraccode + hbpraccode = dplyr::case_match( + .data$gpprac, + c(99942L, 99957L, 99961L, 99981L, 99999L) ~ "S08200003", + 99995L ~ "S08200001", + .default = .data$hbpraccode ) ) diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R new file mode 100644 index 000000000..c0138d10a --- /dev/null +++ b/R/process_lookup_homelessness.R @@ -0,0 +1,108 @@ +#' Create a homelessness lookup +#' @description Reads in the homelessness extract and creates +#' a lookup at CHI level, with one row per application start +#' and end date for each CHI. +#' +#' @param homelessness_data the processed homelessness data for +#' the financial year (created with [process_extract_homelessness()]). +#' @inheritParams create_episode_file +#' +#' @return the final data as a [tibble][tibble::tibble-package]. +#' @export +#' @family process extracts +create_homelessness_lookup <- function( + year, + homelessness_data = read_file(get_source_extract_path(year, "Homelessness"))) { + homelessness_lookup <- homelessness_data %>% + dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>% + tidyr::drop_na(.data$chi) %>% + dplyr::mutate(hl1_in_fy = 1L) + + return(homelessness_lookup) +} + + +#' Add 'homelessness in FY' flag +#' @description Add a flag to the data indicating if the CHI +#' had a homelessness episode within the financial year. +#' +#' @param data The data to add the flag to - the episode +#' or individual file. +#' @param lookup The homelessness lookup created by [create_homelessness_lookup()] +#' @inheritParams create_episode_file +#' +#' @return the final data as a [tibble][tibble::tibble-package] +#' @export +add_homelessness_flag <- function(data, year, + lookup = create_homelessness_lookup(year)) { + ## need to decide which recids this relates to + data <- data %>% + dplyr::left_join( + lookup %>% + dplyr::distinct(.data$chi, .data$hl1_in_fy), + by = "chi", + relationship = "many-to-one" + ) %>% + dplyr::mutate(hl1_in_fy = tidyr::replace_na(.data$hl1_in_fy, 0L)) + + return(data) +} + + +#' Add homelessness date flags episode +#' +#' @description Add flags to episodes indicating if they +#' have had at least one active homelessness application in +#' 6 months before, 6 months after, or during an episode. +#' +#' @inheritParams add_homelessness_flag +#' +#' @return the final data as a [tibble][tibble::tibble-package]. +#' @export +add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { + lookup <- lookup %>% + dplyr::filter(!(is.na(.data$record_keydate2))) %>% + dplyr::rename( + application_date = .data$record_keydate1, + end_date = .data$record_keydate2 + ) %>% + dplyr::mutate( + six_months_pre_app = .data$application_date - lubridate::days(180), + six_months_post_app = .data$end_date + lubridate::days(180) + ) %>% + dplyr::distinct(.data$chi, .data$hl1_in_fy, .data$six_months_pre_app, .data$six_months_post_app, .data$application_date, .data$end_date) + + + homeless_flag <- data %>% + dplyr::select(.data$chi, .data$record_keydate1, .data$record_keydate2, .data$recid) %>% + dplyr::filter(.data$recid %in% c("00B", "01B", "GLS", "DD", "02B", "04B", "AE2", "OoH", "DN", "CMH", "NRS")) %>% + dplyr::distinct() %>% + dplyr::left_join( + lookup, + by = "chi", relationship = "many-to-many" + ) %>% + dplyr::filter(.data$hl1_in_fy == 1) %>% + dplyr::mutate(hl1_6before_ep = ifelse((.data$end_date <= .data$record_keydate2) & + (.data$record_keydate1 <= .data$six_months_post_app), 1, 0)) %>% + dplyr::mutate(hl1_6after_ep = ifelse((.data$six_months_pre_app <= .data$record_keydate2) & + (.data$record_keydate1 <= .data$application_date), 1, 0)) %>% + dplyr::mutate(hl1_during_ep = ifelse((.data$application_date <= .data$record_keydate2) & + (.data$record_keydate1 <= .data$end_date), 1, 0)) %>% + dplyr::group_by(.data$chi, .data$recid, .data$record_keydate1, .data$record_keydate2) %>% + dplyr::summarise( + hl1_6before_ep = max(.data$hl1_6before_ep), + hl1_6after_ep = max(.data$hl1_6after_ep), + hl1_during_ep = max(.data$hl1_during_ep) + ) %>% + dplyr::ungroup() + + + data <- data %>% + dplyr::left_join( + homeless_flag, + by = c("chi", "record_keydate1", "record_keydate2", "recid"), + relationship = "many-to-one" + ) + + return(data) +} diff --git a/R/process_lookup_ltc.R b/R/process_lookup_ltc.R index 80a4b8706..8ea33da48 100644 --- a/R/process_lookup_ltc.R +++ b/R/process_lookup_ltc.R @@ -17,20 +17,18 @@ process_lookup_ltc <- function(data, year, write_to_disk = TRUE) { ltc_flags <- data %>% dplyr::mutate(dplyr::across( tidyselect::ends_with("date"), - list(flag = ~ dplyr::if_else(is.na(.x) | .x > end_fy(year), 0L, 1L)) + list(flag = ~ as.integer(!(is.na(.x) | .x > end_fy(year)))) )) %>% dplyr::rename_with( .cols = tidyselect::ends_with("flag"), .fn = ~ stringr::str_remove(.x, "_date_flag") ) - # Save Outfile--------------------------------------------- - if (write_to_disk) { - # Save .rds file - ltc_flags %>% - dplyr::arrange(.data$chi) %>% - write_file(get_ltcs_path(year, check_mode = "write")) + write_file( + ltc_flags, + get_ltcs_path(year, check_mode = "write") + ) } return(ltc_flags) diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R index 878c51f37..f9f1d47f4 100644 --- a/R/process_lookup_postcode.R +++ b/R/process_lookup_postcode.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' postcode lookup, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param simd_path Path to SIMD lookup. #' @param locality_path Path to locality lookup. @@ -53,21 +53,19 @@ process_lookup_postcode <- function(spd_path = get_spd_path(), tidyselect::matches("datazone\\d{4}$") ) %>% dplyr::mutate( - locality = tidyr::replace_na("locality", "No Locality Information") + locality = tidyr::replace_na(.data$locality, "No Locality Information") ) # Join data together ----------------------------------------------------- - data <- - dplyr::left_join(spd_file, simd_file, by = "pc7") %>% + data <- dplyr::left_join(spd_file, simd_file, by = "pc7") %>% dplyr::rename(postcode = "pc7") %>% dplyr::left_join(locality_file, by = "datazone2011") # Finalise output ----------------------------------------------------- - outfile <- - data %>% + slf_pc_lookup <- data %>% dplyr::select( "postcode", "lca", @@ -89,13 +87,12 @@ process_lookup_postcode <- function(spd_path = get_spd_path(), tidyselect::matches("ur2_\\d{4}$") ) - - # Save out ---------------------------------------------------------------- if (write_to_disk) { - outfile %>% - # Save .rds file - write_file(get_slf_postcode_path(check_mode = "write")) + write_file( + slf_pc_lookup, + get_slf_postcode_path(check_mode = "write") + ) } - return(outfile) + return(slf_pc_lookup) } diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 1874c2b5a..845570b93 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' social care client lookup, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param year The year to process @@ -108,10 +108,7 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) { .fn = ~ paste0("sc_", .x) ) - - ## save outfile --------------------------------------- - outfile <- - client_clean %>% + sc_client_lookup <- client_clean %>% # reorder dplyr::select( "sending_location", @@ -125,10 +122,11 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) { ) if (write_to_disk) { - # Save .rds file - outfile %>% - write_file(get_source_extract_path(year, "Client", check_mode = "write")) + write_file( + sc_client_lookup, + get_sc_client_lookup_path(year, check_mode = "write") + ) } - return(outfile) + return(sc_client_lookup) } diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index f93a5a4db..8c363f547 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' social care demographic lookup, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process. #' @param spd_path Path to the Scottish Postcode Directory. @@ -12,7 +12,10 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), write_to_disk = TRUE) { +process_lookup_sc_demographics <- function( + data, + spd_path = get_spd_path(), + write_to_disk = TRUE) { # Deal with postcodes --------------------------------------- # UK postcode regex - see https://ideal-postcodes.co.uk/guides/postcode-validation @@ -51,8 +54,7 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ )) # count number of na postcodes - na_postcodes <- - sc_demog %>% + na_postcodes <- sc_demog %>% dplyr::count(dplyr::across(tidyselect::contains("postcode"), ~ is.na(.x))) sc_demog <- sc_demog %>% @@ -67,19 +69,28 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA) )) %>% dplyr::select( - "latest_record_flag", "extract_date", "sending_location", "social_care_id", "upi", "gender", - "dob", "submitted_postcode", "chi_postcode" + "latest_record_flag", + "extract_date", + "sending_location", + "social_care_id", + "upi", + "gender", + "dob", + "submitted_postcode", + "chi_postcode" ) %>% # check if submitted_postcode matches with postcode lookup - dplyr::mutate(valid_pc = dplyr::if_else(.data$submitted_postcode %in% valid_spd_postcodes, 1L, 0L)) %>% + dplyr::mutate( + valid_pc = .data$submitted_postcode %in% valid_spd_postcodes + ) %>% # use submitted_postcode if valid, otherwise use chi_postcode dplyr::mutate(postcode = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc == 1L) ~ .data$submitted_postcode, - (is.na(.data$submitted_postcode) & .data$valid_pc == 0L) ~ .data$chi_postcode + (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode, + (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode )) %>% dplyr::mutate(postcode_type = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc == 1L) ~ "submitted", - (is.na(.data$submitted_postcode) & .data$valid_pc == 0L) ~ "chi", + (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted", + (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi", (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing" )) @@ -88,17 +99,11 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ dplyr::count(.data$postcode_type) # count number of replaced postcode - compare with count above - na_replaced_postcodes <- - sc_demog %>% + na_replaced_postcodes <- sc_demog %>% dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x))) - na_replaced_postcodes - na_postcodes - - ## save outfile --------------------------------------- - outfile <- - sc_demog %>% + sc_demog_lookup <- sc_demog %>% # group by sending location and ID dplyr::group_by(.data$sending_location, .data$social_care_id) %>% # arrange so latest submissions are last @@ -117,14 +122,12 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ ) %>% dplyr::ungroup() - - ## save file ## - if (write_to_disk) { - # Save .rds file - outfile %>% - write_file(get_sc_demog_lookup_path(check_mode = "write")) + write_file( + sc_demog_lookup, + get_sc_demog_lookup_path(check_mode = "write") + ) } - return(outfile) + return(sc_demog_lookup) } diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index 61bce41b6..620b14cee 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' all Alarms Telecare extract, it will return the final data -#' but also write this out as a rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_sc_all_care_home #' @@ -121,9 +121,10 @@ process_sc_all_alarms_telecare <- function( tibble::as_tibble() if (write_to_disk) { - # Save .rds file ---- - qtr_merge %>% - write_file(get_sc_at_episodes_path(check_mode = "write")) + write_file( + qtr_merge, + get_sc_at_episodes_path(check_mode = "write") + ) } return(qtr_merge) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index a11f275e8..c41e1a1d5 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' all Care Home extract, it will return the final data -#' but also write this out as a rds. +#' and (optionally) write it to disk. #' #' @param data The extract to process #' @param sc_demog_lookup The Social Care Demographics lookup produced by diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index b812d492b..5f2b4db49 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' all home care extract, it will return the final data -#' but also write this out as a rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_sc_all_care_home #' diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index 802c9215c..c17f74f28 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -1,7 +1,7 @@ #' Process the all SDS extract #' @description This will read and process the #' all SDS extract, it will return the final data -#' but also write this out as a rds. +#' and (optionally) write it to disk. #' #' @inheritParams process_sc_all_care_home #' @@ -36,10 +36,12 @@ process_sc_all_sds <- function( # SDS option 4 is derived when a person receives more than one option. # e.g. if a person has options 1 and 2 then option 4 will be derived dplyr::mutate( - sds_option_4 = rowSums(dplyr::across(tidyselect::starts_with("sds_option_"))) > 1L, + sds_option_4 = rowSums( + dplyr::pick(tidyselect::starts_with("sds_option_")) + ) > 1L, .after = .data$sds_option_3 ) %>% - # If sds start date is missing, assign start of FY + # If SDS start date is missing, assign start of FY dplyr::mutate(sds_start_date = fix_sc_start_dates( .data$sds_start_date, .data$period @@ -117,12 +119,11 @@ process_sc_all_sds <- function( # change the data format from data.table to data.frame tibble::as_tibble() - - # Save outfile------------------------------------------------ if (write_to_disk) { - # Save .rds file - final_data %>% - write_file(get_sc_sds_episodes_path(check_mode = "write")) + write_file( + final_data, + get_sc_sds_episodes_path(check_mode = "write") + ) } return(final_data) diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R index 079e6810e..a0c46ff07 100644 --- a/R/process_tests_alarms_telecare.R +++ b/R/process_tests_alarms_telecare.R @@ -37,8 +37,8 @@ produce_source_at_tests <- function(data, # create test flags create_demog_test_flags() %>% dplyr::mutate( - n_at_alarms = dplyr::if_else(.data$smrtype == "AT-Alarm", 1L, 0L), - n_at_telecare = dplyr::if_else(.data$smrtype == "AT-Tele", 1L, 0L) + n_at_alarms = .data$smrtype == "AT-Alarm", + n_at_telecare = .data$smrtype == "AT-Tele" ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R index 0b673ad4c..3633c9882 100644 --- a/R/process_tests_care_home.R +++ b/R/process_tests_care_home.R @@ -48,17 +48,17 @@ produce_source_ch_tests <- function(data, create_demog_test_flags() %>% dplyr::mutate( n_episodes = 1L, - ch_name_missing = dplyr::if_else(is.na(.data$ch_name), 1L, 0L), + ch_name_missing = is.na(.data$ch_name), ch_provider_1_to_5 = dplyr::case_when( .data$ch_provider %in% c("1", "2", "3", "4", "5") ~ 1L, TRUE ~ 0L ), - ch_provider_other = dplyr::if_else(.data$ch_provider == "6", 1L, 0L), - ch_adm_reason_missing = dplyr::if_else(is.na(.data$ch_adm_reason), 1L, 0L) + ch_provider_other = .data$ch_provider == "6", + ch_adm_reason_missing = is.na(.data$ch_adm_reason) ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R index 1baec563c..1fa21b71f 100644 --- a/R/process_tests_cmh.R +++ b/R/process_tests_cmh.R @@ -45,7 +45,7 @@ produce_source_cmh_tests <- function(data) { create_hb_test_flags(hb_var = .data$hbrescode) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R index 2b8b35e8a..7f73570e4 100644 --- a/R/process_tests_district_nursing.R +++ b/R/process_tests_district_nursing.R @@ -18,7 +18,7 @@ process_tests_district_nursing <- function(data, year) { # replace NA by 0 in monthly costs dplyr::mutate(dplyr::across( dplyr::ends_with("_cost"), - ~ tidyr::replace_na(.x, 0) + ~ tidyr::replace_na(.x, 0.0) )) comparison <- produce_test_comparison( @@ -52,9 +52,8 @@ process_tests_district_nursing <- function(data, year) { #' from [calculate_measures()] #' #' @family extract test functions -#' @seealso [create_hb_test_flags()] -#' #' [create_hscp_test_flags()] and [create_hb_cost_test_flags()] -#' for creating test flags +#' @seealso [create_hb_test_flags()], [create_hscp_test_flags()] +#' and [create_hb_cost_test_flags()] for creating test flags. #' @seealso calculate_measures produce_source_dn_tests <- function(data, sum_mean_vars = c("cost", "yearstay"), @@ -68,7 +67,7 @@ produce_source_dn_tests <- function(data, create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% # keep variables for comparison - dplyr::select(c(.data$valid_chi:.data$NHS_Lanarkshire_cost)) %>% + dplyr::select(.data$valid_chi:.data$NHS_Lanarkshire_cost) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index 46e9e7171..fc31727ed 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -14,16 +14,16 @@ process_tests_episode_file <- function(data, year) { "gender", "postcode", "hbtreatcode", + "hscp2018", "dob", "recid", "yearstay", "record_keydate1", "record_keydate2", dplyr::contains(c("beddays", "cost", "cij")) - ) %>% - slfhelper::get_chi() + ) - old_data <- get_existing_data_for_tests(data) + old_data <- get_existing_data_for_tests(data, anon_chi = TRUE) comparison <- produce_test_comparison( old_data = produce_episode_file_tests(old_data), @@ -62,6 +62,7 @@ process_tests_episode_file <- function(data, year) { #' [create_hscp_test_flags()] and [create_hb_cost_test_flags()] #' for creating test flags #' @seealso calculate_measures +#' @export produce_episode_file_tests <- function( data, sum_mean_vars = c("beddays", "cost", "yearstay"), @@ -72,7 +73,15 @@ produce_episode_file_tests <- function( test_flags <- data %>% dplyr::group_by(.data$recid) %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + dplyr::mutate( + unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi, + n_missing_anon_chi = is_missing(.data$anon_chi), + n_males = .data$gender == 1L, + n_females = .data$gender == 2L, + n_postcode = !is.na(.data$postcode) | !.data$postcode == "", + n_missing_postcode = is_missing(.data$postcode), + missing_dob = is.na(.data$dob) + ) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% # Flags to count stay types @@ -97,9 +106,11 @@ produce_episode_file_tests <- function( 1L, 0L ) - ) %>% + ) + + test_flags <- test_flags %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("unique_anon_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum", group_by = "recid") diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R index bd1bcf97e..71938d889 100644 --- a/R/process_tests_home_care.R +++ b/R/process_tests_home_care.R @@ -59,7 +59,7 @@ produce_source_hc_tests <- function(data, ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R index c80205d15..bea7fc881 100644 --- a/R/process_tests_homelessness.R +++ b/R/process_tests_homelessness.R @@ -39,7 +39,7 @@ produce_slf_homelessness_tests <- function(data, create_demog_test_flags() %>% create_lca_test_flags(.data$hl1_sending_lca) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 695dc19a0..a9d193465 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -100,11 +100,18 @@ produce_individual_file_tests <- function(data) { measure = "sum" ) + dup_chi <- data.frame( + measure = "duplicated chi number", + value = duplicated(data$chi) %>% + sum() %>% as.integer() + ) + join_output <- list( test_flags, all_measures, min_max_measures, - sum_measures + sum_measures, + dup_chi ) %>% purrr::reduce(dplyr::full_join, by = c("measure", "value")) diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R index 4a82acb37..d10eadd23 100644 --- a/R/process_tests_it_chi_deaths.R +++ b/R/process_tests_it_chi_deaths.R @@ -47,7 +47,7 @@ produce_it_chi_deaths_tests <- function(data) { "n_deaths_{current_year_2}" := .data$death_year == current_year_2, "n_deaths_{current_year_3}" := .data$death_year == current_year_3, "n_deaths_{current_year_4}" := .data$death_year == current_year_4, - "n_deaths_{current_year_5}" := .data$death_year == current_year_5, + "n_deaths_{current_year_5}" := .data$death_year == current_year_5 ) %>% # remove variables that are not test flags dplyr::select(dplyr::starts_with("n_")) %>% diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R index 1d41a64c3..fd96fa5c4 100644 --- a/R/process_tests_nrs_deaths.R +++ b/R/process_tests_nrs_deaths.R @@ -39,7 +39,7 @@ produce_source_nrs_tests <- function(data) { create_demog_test_flags() %>% dplyr::mutate(n_deaths = 1L) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R index f8a7a6a2e..5ab3e82db 100644 --- a/R/process_tests_outpatients.R +++ b/R/process_tests_outpatients.R @@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) { comparison <- produce_test_comparison( old_data = produce_source_extract_tests(old_data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ), new_data = produce_source_extract_tests(data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ) ) %>% write_tests_xlsx(sheet_name = "00B", year) diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R index a950f50e6..4b4c4dcb3 100644 --- a/R/process_tests_prescribing.R +++ b/R/process_tests_prescribing.R @@ -42,7 +42,7 @@ produce_source_pis_tests <- function(data) { create_demog_test_flags() %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_sc_client_lookup.R b/R/process_tests_sc_client_lookup.R new file mode 100644 index 000000000..c3e4e70f9 --- /dev/null +++ b/R/process_tests_sc_client_lookup.R @@ -0,0 +1,64 @@ +#' Social care client lookup tests +#' +#' @description This script takes the processed social care client lookup and +#' produces a test comparison with the previous data. This is written to +#' disk in the tests workbook. +#' +#' @inherit process_tests_acute +#' +#' @export +process_tests_sc_client_lookup <- function(data, year) { + comparison <- produce_test_comparison( + old_data = produce_tests_sc_client_lookup( + read_file(get_sc_client_lookup_path(year, update = previous_update())) + ), + new_data = produce_tests_sc_client_lookup(data) + ) + + comparison %>% + write_tests_xlsx(sheet_name = "sc_client", year) + + return(comparison) +} + + +#' Social care Client lookup Tests +#' +#' @description Produce the test for the social care Client all episodes +#' +#' @param data new or old data for testing summary flags +#' (data is from [get_source_extract_path()]) +#' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()] +#' @return a dataframe with a count of each flag. +#' +#' @family social care test functions +produce_tests_sc_client_lookup <- function(data) { + test_flags <- data %>% + # create test flags + create_sending_location_test_flags(.data$sending_location) %>% + dplyr::arrange(.data$sending_location, .data$social_care_id) %>% + dplyr::mutate( + unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id, + n_sc_living_alone_yes = .data$sc_living_alone == "Yes", + n_sc_living_alone_no = .data$sc_living_alone == "No", + n_sc_living_alone_not_known = .data$sc_living_alone == "Not Known", + n_sc_support_from_unpaid_carer_yes = .data$sc_support_from_unpaid_carer == "Yes", + n_sc_support_from_unpaid_carer_no = .data$sc_support_from_unpaid_carer == "No", + n_sc_support_from_unpaid_carer_not_known = .data$sc_support_from_unpaid_carer == "Not Known", + n_sc_social_worker_yes = .data$sc_social_worker == "Yes", + n_sc_social_worker_no = .data$sc_social_worker == "No", + n_sc_social_worker_not_known = .data$sc_social_worker == "Not Known", + n_sc_meals_yes = .data$sc_meals == "Yes", + n_sc_meals_no = .data$sc_meals == "No", + n_sc_meals_not_known = .data$sc_meals == "Not Known", + n_sc_day_care_yes = .data$sc_day_care == "Yes", + n_sc_day_care_no = .data$sc_day_care == "No", + n_sc_day_care_not_known = .data$sc_day_care == "Not Known", + ) %>% + # remove variables that won't be summed + dplyr::select("Aberdeen_City":dplyr::last_col()) %>% + # use function to sum new test flags + calculate_measures(measure = "sum") + + return(test_flags) +} diff --git a/R/produce_homelessness_completeness.R b/R/produce_homelessness_completeness.R index f85e44beb..00a459df7 100644 --- a/R/produce_homelessness_completeness.R +++ b/R/produce_homelessness_completeness.R @@ -41,18 +41,18 @@ produce_homelessness_completeness <- function( openxlsx::read.xlsx( sg_pub_path, sheet = "Table 1", - rows = 8:39, - cols = 1:25, + rows = 8L:39L, + cols = 1L:25L, colNames = FALSE ) %>% dplyr::rename_with(~ c( "CAName", - paste0(paste0("q", 1:4), "_", rep(2016, 4)), - paste0(paste0("q", 1:4), "_", rep(2017, 4)), - paste0(paste0("q", 1:4), "_", rep(2018, 4)), - paste0(paste0("q", 1:4), "_", rep(2019, 4)), - paste0(paste0("q", 1:4), "_", rep(2020, 4)), - paste0(paste0("q", 1:4), "_", rep(2021, 4)) + paste0(paste0("q", 1L:4L), "_", rep(2016L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2017L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2018L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2019L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2020L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L)) )) %>% tidyr::pivot_longer( !"CAName", diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index 20d1a4191..7f8feda92 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -13,6 +13,7 @@ #' (data is from [get_source_extract_path()]) #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()] #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()] +#' @param add_hscp_count Default set to TRUE. For use where `hscp variable` is not available, specify FALSE. #' #' @return a dataframe with a count of each flag #' from [calculate_measures()] @@ -28,14 +29,21 @@ produce_source_extract_tests <- function(data, max_min_vars = c( "record_keydate1", "record_keydate2", "cost_total_net", "yearstay" - )) { + ), + add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags create_demog_test_flags() %>% create_hb_test_flags(.data$hbtreatcode) %>% - create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% + create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) + + if (add_hscp_count) { + test_flags <- create_hscp_test_flags(test_flags, .data$hscp) + } + + test_flags <- test_flags %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R index a0fba0707..6a0d23b11 100644 --- a/R/read_extract_acute.R +++ b/R/read_extract_acute.R @@ -152,7 +152,9 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye GLS_record = "GLS Record" ) %>% # replace NA in cost_total_net by 0 - dplyr::mutate(cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0)) + dplyr::mutate( + cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0) + ) return(extract_acute) } diff --git a/R/read_extract_gp_ooh.R b/R/read_extract_gp_ooh.R index 98606eb8a..3a711c2f8 100644 --- a/R/read_extract_gp_ooh.R +++ b/R/read_extract_gp_ooh.R @@ -2,7 +2,7 @@ #' #' @description This will read and process the #' GP OoH extract, it will return the final data -#' but also write this out as an rds. +#' and (optionally) write it to disk. #' #' @param year The year to process, in FY format. #' @param diagnosis_path Path to diagnosis BOXI extract location. diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R index fe82732c8..248316975 100644 --- a/R/read_extract_mental_health.R +++ b/R/read_extract_mental_health.R @@ -129,7 +129,9 @@ read_extract_mental_health <- function( uri = "Unique Record Identifier" ) %>% # replace NA in cost_total_net by 0 - dplyr::mutate(cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0)) + dplyr::mutate( + cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0) + ) return(extract_mental_health) } diff --git a/R/read_file.R b/R/read_file.R index 2941b62ed..be0a6fc65 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { "parquet" ) + # Return an empty tibble if trying to read the dummy path + if (path == get_dummy_boxi_extract_path()) { + return(tibble::tibble()) + } + ext <- fs::path_ext(path) if (ext == "gz") { diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index 88fcf826a..cc98060f3 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -3,13 +3,14 @@ #' @description This will read and process the #' social care client lookup #' -#' @param sc_dvprod_connection The connection to the SC platform. #' @param fyyear The year to process, in the standard format '1718' +#' @param sc_dvprod_connection The connection to the SC platform. #' #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -read_lookup_sc_client <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD"), fyyear) { +read_lookup_sc_client <- function(fyyear, + sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { check_year_format(fyyear) year <- convert_fyyear_to_year(fyyear) diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index 4647244bb..2c7bd03db 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -22,18 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection "service_start_date", "service_end_date" ) %>% - # fix bad period (2017, 2020 & 2021) + dplyr::collect() %>% + # fix bad period (2017, 2020, 2021, and so on) dplyr::mutate( - period = dplyr::if_else(.data$period == "2017", "2017Q4", .data$period), - period = dplyr::if_else(.data$period == "2020", "2020Q4", .data$period), - period = dplyr::if_else(.data$period == "2021", "2021Q4", .data$period) + period = dplyr::if_else( + grepl("\\d{4}$", .data$period), + paste0(.data$period, "Q4"), + .data$period + ) ) %>% - # order - dplyr::arrange(.data$sending_location, .data$social_care_id) %>% - dplyr::collect() %>% dplyr::mutate( - dplyr::across(c("sending_location", "service_type"), as.integer) - ) + dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x)) + ) %>% + dplyr::arrange(.data$sending_location, .data$social_care_id) return(at_full_data) } diff --git a/R/write_file.R b/R/write_file.R index a4f888bdc..62b0025a7 100644 --- a/R/write_file.R +++ b/R/write_file.R @@ -37,7 +37,7 @@ write_file <- function(data, path, ...) { sink = path, compression = "zstd", version = "latest", - ..., + ... ) ) diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index e187149d5..68452b0cf 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -47,7 +47,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) { while (fs::file_exists(path = in_use_path) && seconds < max_wait) { # While the tests are in use (wait a random number of seconds from 1 to 30) cli::cli_progress_update() - wait <- sample(x = 3:15, size = 1) + wait <- sample(x = 3L:15L, size = 1L) Sys.sleep(wait) seconds <- seconds + wait @@ -56,7 +56,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) { } # Final check to maybe avoid corrupting the workbook - Sys.sleep(sample(x = 1:3, size = 1)) + Sys.sleep(sample(x = 1L:3L, size = 1L)) if (!fs::file_exists(path = in_use_path)) { fs::file_create(path = in_use_path) } else { diff --git a/Rmarkdown/costs_care_home.Rmd b/Rmarkdown/costs_care_home.Rmd index 7d2e65c2d..09d2e5a68 100644 --- a/Rmarkdown/costs_care_home.Rmd +++ b/Rmarkdown/costs_care_home.Rmd @@ -22,23 +22,28 @@ fs::file_copy(get_ch_costs_path(), ) ## Read costs from the CHC Open data -ch_costs_data <- - phsopendata::get_resource( - res_id = "4ee7dc84-ca65-455c-9e76-b614091f389f", - col_select = c("Date", "KeyStatistic", "CA", "Value") - ) %>% +ch_costs_data <- phsopendata::get_resource( + res_id = "4ee7dc84-ca65-455c-9e76-b614091f389f", + col_select = c("Date", "KeyStatistic", "CA", "Value") +) %>% janitor::clean_names() %>% # Dates are at end of the fin year # so cost are for the fin year to that date. - mutate(year = createslf::convert_year_to_fyyear((date %/% 10000) - 1)) %>% + mutate(year = createslf::convert_year_to_fyyear((date %/% 10000L) - 1L)) %>% filter(year >= "1617") %>% - mutate(funding_source = stringr::str_extract(key_statistic, "((:?All)|(:?Self)|(:?Publicly))")) %>% - mutate(nursing_care_provision = if_else(stringr::str_detect(key_statistic, "Without"), 1, 0)) %>% - select(year, - ca, - funding_source, - nursing_care_provision, - cost_per_week = value + mutate(funding_source = stringr::str_extract( + string = key_statistic, + pattern = "((:?All)|(:?Self)|(:?Publicly))" + )) %>% + mutate( + nursing_care_provision = as.integer(stringr::str_detect(key_statistic, "Without")) + ) %>% + select( + "year", + "ca", + "funding_source", + "nursing_care_provision", + cost_per_week = "value" ) @@ -105,7 +110,7 @@ matched_costs_data <- # match to new costs full_join(old_costs, by = c("year", "nursing_care_provision")) %>% # compute difference - mutate(pct_diff = (cost_per_day - cost_old) / cost_old * 100) + mutate(pct_diff = (cost_per_day - cost_old) / cost_old * 100.0) summary(matched_costs_data$pct_diff) diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd index 825a931ed..e3c9bba13 100644 --- a/Rmarkdown/costs_district_nursing.Rmd +++ b/Rmarkdown/costs_district_nursing.Rmd @@ -79,7 +79,7 @@ population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021 # Select only the HSCPs for NHS Highland & years since 2015 filter( hscp2019 %in% c("S37000004", "S37000016"), - year >= 2015 + year >= 2015L ) %>% # Create year as FY = YYYY from CCYY. rename(calendar_year = year) %>% @@ -93,7 +93,7 @@ population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021 ## compute proportion ## mutate( pop_proportion = pop / total_pop, - pop_pct = pop_proportion * 100 + pop_pct = pop_proportion * 100.0 ) %>% ## Argyll and Bute is the only HSCP in NHS Highland that submits data ## filter(hscp2019name == "Argyll and Bute") @@ -110,7 +110,9 @@ matched_data <- full_join(dn_raw_costs_contacts, # recode NA pop_proportion with 1 mutate(pop_proportion = replace_na(pop_proportion, 1)) %>% ## total net cost ## - mutate(cost_total_net = ((cost * 1000) / (number_of_contacts / pop_proportion))) %>% + mutate( + cost_total_net = ((cost * 1000) / (number_of_contacts / pop_proportion)) + ) %>% # sort by HB2019 and year arrange(hb2019, year) %>% # keep only records with cost diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R index d33dfbe49..ea6f81bfc 100644 --- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R +++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R @@ -6,7 +6,7 @@ library(glue) nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") # Change the year -fin_year <- "1516" +fin_year <- "2223" db_connection <- odbc::dbConnect( odbc::odbc(), @@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect( ) # Check the table name and change if required. -table <- dbplyr::in_schema("ROBERM18", "FINAL_2") +table <- dbplyr::in_schema("ROBERM18", "FINAL_1") # Read NSU data nsu_data <- @@ -35,9 +35,11 @@ nsu_data <- collect() # Write out the data -file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav")) +file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet")) # This will archive the existing file for later comparison if (file_exists(file_path)) { - file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav"))) + file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet"))) } -write_sav(nsu_data, file_path, compress = TRUE) + +nsu_data %>% + arrow::write_parquet(file_path, compression = "zstd", compression_level = 10) diff --git a/_targets.R b/_targets.R index f50045aed..3473679de 100644 --- a/_targets.R +++ b/_targets.R @@ -19,17 +19,17 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223") +years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), tar_rds( file_path_ext_clean, make_lowercase_ext(), - priority = 1, + priority = 1.0, cue = tar_cue_age( name = file_path_ext_clean, - age = as.difftime(7, units = "days") + age = as.difftime(7.0, units = "days") ) ), ## Lookup data ## @@ -44,14 +44,18 @@ list( ), tar_file_read(dd_data, get_dd_path(), read_extract_delayed_discharges(!!.x)), tar_file_read(ltc_data, get_it_ltc_path(), read_lookup_ltc(!!.x)), - tar_target(slf_ch_name_lookup_path, get_slf_ch_name_lookup_path(), format = "file"), + tar_target( + slf_ch_name_lookup_path, + get_slf_ch_name_lookup_path(), + format = "file" + ), ## Process Lookups ## tar_target( sc_demog_data, read_lookup_sc_demographics(), cue = tar_cue_age( name = sc_demog_data, - age = as.difftime(28, units = "days") + age = as.difftime(28.0, units = "days") ) ), tar_target( @@ -117,7 +121,7 @@ list( read_sc_all_alarms_telecare(), cue = tar_cue_age( name = all_at_extract, - age = as.difftime(28, units = "days") + age = as.difftime(28.0, units = "days") ) ), tar_target( @@ -134,7 +138,7 @@ list( read_sc_all_home_care(), cue = tar_cue_age( name = all_home_care_extract, - age = as.difftime(28, units = "days") + age = as.difftime(28.0, units = "days") ) ), tar_target( @@ -151,7 +155,7 @@ list( read_sc_all_care_home(), cue = tar_cue_age( name = all_care_home_extract, - age = as.difftime(28, units = "days") + age = as.difftime(28.0, units = "days") ) ), tar_target( @@ -175,7 +179,7 @@ list( read_sc_all_sds(), cue = tar_cue_age( name = all_sds_extract, - age = as.difftime(28, units = "days") + age = as.difftime(28.0, units = "days") ) ), tar_target( @@ -192,10 +196,10 @@ list( tar_rds( compress_extracts, gzip_files(year), - priority = 1, + priority = 1.0, cue = tar_cue_age( name = compress_extracts, - age = as.difftime(7, units = "days") + age = as.difftime(7.0, units = "days") ) ), ### target data extracts ### @@ -335,11 +339,14 @@ list( year ) ), - tar_target(source_homelessness_extract, process_extract_homelessness( - homelessness_data, - year, - write_to_disk = write_to_disk - )), + tar_target( + source_homelessness_extract, + process_extract_homelessness( + homelessness_data, + year, + write_to_disk = write_to_disk + ) + ), tar_target( tests_source_homelessness_extract, process_tests_homelessness( @@ -445,6 +452,10 @@ list( write_to_disk = write_to_disk ) ), + tar_target( + tests_sc_client_lookup, + process_tests_sc_client_lookup(sc_client_lookup, year = year) + ), tar_target( source_sc_alarms_tele, process_extract_alarms_telecare( @@ -539,11 +550,26 @@ list( source_sc_alarms_tele ) ), + tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)), + tar_target( + homelessness_lookup, + create_homelessness_lookup( + year, + homelessness_data = source_homelessness_extract + ) + ), tar_target( episode_file, - run_episode_file( + create_episode_file( processed_data_list, year, + homelessness_lookup = homelessness_lookup, + dd_data = source_dd_extract, + nsu_cohort = nsu_cohort, + ltc_data = source_ltc_lookup, + slf_pc_lookup = source_pc_lookup, + slf_gpprac_lookup = source_gp_lookup, + slf_deaths_lookup = slf_deaths_lookup, write_to_disk ) ), @@ -559,6 +585,7 @@ list( create_individual_file( episode_file = episode_file, year = year, + homelessness_lookup = homelessness_lookup, write_to_disk = write_to_disk ) ), @@ -568,36 +595,36 @@ list( data = individual_file, year = year ) - ), - tar_target( - episode_file_dataset, - arrow::write_dataset( - dataset = episode_file, - path = fs::path( - get_year_dir(year), - stringr::str_glue("source-episode-file-{year}") - ), - format = "parquet", - # Should correspond to the available slfhelper filters - partitioning = c("recid", "hscp2018"), - compression = "zstd", - version = "latest" - ) - ), - tar_target( - individual_file_dataset, - arrow::write_dataset( - dataset = individual_file, - path = fs::path( - get_year_dir(year), - stringr::str_glue("source-individual-file-{year}") - ), - format = "parquet", - # Should correspond to the available slfhelper filters - partitioning = c("hscp2018"), - compression = "zstd", - version = "latest" - ) - ) + ) # , + # tar_target( + # episode_file_dataset, + # arrow::write_dataset( + # dataset = episode_file, + # path = fs::path( + # get_year_dir(year), + # stringr::str_glue("source-episode-file-{year}") + # ), + # format = "parquet", + # # Should correspond to the available slfhelper filters + # partitioning = c("recid", "hscp2018"), + # compression = "zstd", + # version = "latest" + # ) + # ), + # tar_target( + # individual_file_dataset, + # arrow::write_dataset( + # dataset = individual_file, + # path = fs::path( + # get_year_dir(year), + # stringr::str_glue("source-individual-file-{year}") + # ), + # format = "parquet", + # # Should correspond to the available slfhelper filters + # partitioning = c("hscp2018"), + # compression = "zstd", + # version = "latest" + # ) + # ) ) ) diff --git a/_targets.yaml b/_targets.yaml index 24c8a3733..5f5f0303e 100644 --- a/_targets.yaml +++ b/_targets.yaml @@ -2,4 +2,5 @@ main: store: /conf/sourcedev/Source_Linkage_File_Updates/_targets workers: '16' reporter_make: timestamp_positives + reporter_outdated: forecast seconds_interval: 30 diff --git a/hc_methodology.Rmd b/hc_methodology.Rmd index 5270735a9..23aedb5c6 100644 --- a/hc_methodology.Rmd +++ b/hc_methodology.Rmd @@ -43,7 +43,14 @@ knitr::opts_chunk$set(echo = TRUE) replaced_start_dates %>% group_by(sending_location_name) %>% summarise(before = n_distinct(social_care_id)) %>% - left_join(fixed_sc_ids %>% group_by(sending_location_name) %>% summarise(after = n_distinct(social_care_id))) %>% - mutate(diff = before - after, diff_pct = scales::percent(diff / before, accuracy = 0.1)) %>% + left_join( + fixed_sc_ids %>% + group_by(sending_location_name) %>% + summarise(after = n_distinct(social_care_id)) + ) %>% + mutate( + diff = before - after, + diff_pct = scales::percent(diff / before, accuracy = 0.1) + ) %>% gt::gt() ``` diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd index 52ba071b6..c2659f821 100644 --- a/man/add_acute_columns.Rd +++ b/man/add_acute_columns.Rd @@ -7,7 +7,7 @@ add_acute_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd index 9b7099513..fdc31b7ff 100644 --- a/man/add_ae_columns.Rd +++ b/man/add_ae_columns.Rd @@ -7,7 +7,7 @@ add_ae_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd index d502e95c3..1d2e587db 100644 --- a/man/add_all_columns.Rd +++ b/man/add_all_columns.Rd @@ -7,7 +7,7 @@ add_all_columns(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Add new columns based on SMRType and recid which follow a pattern diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd index e05ea9101..af978530a 100644 --- a/man/add_at_columns.Rd +++ b/man/add_at_columns.Rd @@ -7,7 +7,7 @@ add_at_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd index 4938f7690..a036a257e 100644 --- a/man/add_ch_columns.Rd +++ b/man/add_ch_columns.Rd @@ -7,7 +7,7 @@ add_ch_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd index 7d00e6299..c48c1a3ef 100644 --- a/man/add_cij_columns.Rd +++ b/man/add_cij_columns.Rd @@ -7,7 +7,7 @@ add_cij_columns(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Add new columns related to CIJ diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd index a1d82cba6..a1cb74abb 100644 --- a/man/add_cmh_columns.Rd +++ b/man/add_cmh_columns.Rd @@ -7,7 +7,7 @@ add_cmh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd index 08d9c0fe4..11e85fdc7 100644 --- a/man/add_dd_columns.Rd +++ b/man/add_dd_columns.Rd @@ -7,7 +7,7 @@ add_dd_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd index bf6af008f..ffdf59a82 100644 --- a/man/add_dn_columns.Rd +++ b/man/add_dn_columns.Rd @@ -7,7 +7,7 @@ add_dn_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd index e71dc755b..6ab7e9645 100644 --- a/man/add_gls_columns.Rd +++ b/man/add_gls_columns.Rd @@ -7,7 +7,7 @@ add_gls_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd index 95d8f1d3b..a58f226ec 100644 --- a/man/add_hc_columns.Rd +++ b/man/add_hc_columns.Rd @@ -7,7 +7,7 @@ add_hc_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd index 7600db5e9..24fc714e9 100644 --- a/man/add_hl1_columns.Rd +++ b/man/add_hl1_columns.Rd @@ -7,7 +7,7 @@ add_hl1_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_homelessness_date_flags.Rd b/man/add_homelessness_date_flags.Rd new file mode 100644 index 000000000..7b386a185 --- /dev/null +++ b/man/add_homelessness_date_flags.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_lookup_homelessness.R +\name{add_homelessness_date_flags} +\alias{add_homelessness_date_flags} +\title{Add homelessness date flags episode} +\usage{ +add_homelessness_date_flags( + data, + year, + lookup = create_homelessness_lookup(year) +) +} +\arguments{ +\item{data}{The data to add the flag to - the episode +or individual file.} + +\item{year}{The year to process, in FY format.} + +\item{lookup}{The homelessness lookup created by \code{\link[=create_homelessness_lookup]{create_homelessness_lookup()}}} +} +\value{ +the final data as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Add flags to episodes indicating if they +have had at least one active homelessness application in +6 months before, 6 months after, or during an episode. +} diff --git a/man/add_homelessness_flag.Rd b/man/add_homelessness_flag.Rd new file mode 100644 index 000000000..1ab409020 --- /dev/null +++ b/man/add_homelessness_flag.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_lookup_homelessness.R +\name{add_homelessness_flag} +\alias{add_homelessness_flag} +\title{Add 'homelessness in FY' flag} +\usage{ +add_homelessness_flag(data, year, lookup = create_homelessness_lookup(year)) +} +\arguments{ +\item{data}{The data to add the flag to - the episode +or individual file.} + +\item{year}{The year to process, in FY format.} + +\item{lookup}{The homelessness lookup created by \code{\link[=create_homelessness_lookup]{create_homelessness_lookup()}}} +} +\value{ +the final data as a \link[tibble:tibble-package]{tibble} +} +\description{ +Add a flag to the data indicating if the CHI +had a homelessness episode within the financial year. +} diff --git a/man/add_hri_variables.Rd b/man/add_hri_variables.Rd new file mode 100644 index 000000000..131a00f6b --- /dev/null +++ b/man/add_hri_variables.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_hri_variables.R +\name{add_hri_variables} +\alias{add_hri_variables} +\title{Add HRI variables to an SLF Individual File} +\usage{ +add_hri_variables( + data, + chi_variable = "chi", + slf_pc_lookup = read_file(get_slf_postcode_path(), col_select = "postcode") +) +} +\arguments{ +\item{data}{An SLF individual file.} + +\item{slf_pc_lookup}{The Source postcode lookup, defaults +to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.} +} +\value{ +The individual file with HRI variables matched on +} +\description{ +Add HRI variables to an SLF Individual File +} +\details{ +Filters the dataset to only include Scottish residents, then +creates a lookup where HRIs are calculated at Scotland, Health Board, and +LCA level. Then joins on this lookup by chi/anon_chi. +} diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd index 0f91cbd90..bd630b9d3 100644 --- a/man/add_ipdc_cols.Rd +++ b/man/add_ipdc_cols.Rd @@ -7,7 +7,7 @@ add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd index aae729323..5faab0dc1 100644 --- a/man/add_mat_columns.Rd +++ b/man/add_mat_columns.Rd @@ -7,7 +7,7 @@ add_mat_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd index 3c50c6cb8..c587c490a 100644 --- a/man/add_mh_columns.Rd +++ b/man/add_mh_columns.Rd @@ -7,7 +7,7 @@ add_mh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd index 9d7b3f8bf..b41201a57 100644 --- a/man/add_nrs_columns.Rd +++ b/man/add_nrs_columns.Rd @@ -7,7 +7,7 @@ add_nrs_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd index 723c105e1..4ea9324e0 100644 --- a/man/add_nsu_cohort.Rd +++ b/man/add_nsu_cohort.Rd @@ -4,12 +4,14 @@ \alias{add_nsu_cohort} \title{Add NSU cohort to working file} \usage{ -add_nsu_cohort(data, year) +add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year))) } \arguments{ \item{data}{The input data frame} \item{year}{The year being processed} + +\item{nsu_cohort}{The NSU data for the year} } \value{ A data frame containing the Non-Service Users as additional rows diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd index 6a54bbcbf..5aed481f0 100644 --- a/man/add_nsu_columns.Rd +++ b/man/add_nsu_columns.Rd @@ -7,7 +7,7 @@ add_nsu_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd index 01814ab6d..f1e6b63f5 100644 --- a/man/add_ooh_columns.Rd +++ b/man/add_ooh_columns.Rd @@ -7,7 +7,7 @@ add_ooh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd index 08c4419e2..9fb8bc158 100644 --- a/man/add_op_columns.Rd +++ b/man/add_op_columns.Rd @@ -7,7 +7,7 @@ add_op_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_operation_flag.Rd b/man/add_operation_flag.Rd index cb7dff76d..bda825a7c 100644 --- a/man/add_operation_flag.Rd +++ b/man/add_operation_flag.Rd @@ -10,7 +10,8 @@ add_operation_flag(op1a) \item{op1a}{A vector of operation codes} } \value{ -A boolean vector showing whether a record contains an operation or not +A boolean vector showing whether a record contains an operation or +not. } \description{ Add operation flag diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd index b582acf2e..836218da0 100644 --- a/man/add_pis_columns.Rd +++ b/man/add_pis_columns.Rd @@ -7,7 +7,7 @@ add_pis_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd index d5a5fb2cf..c06b88527 100644 --- a/man/add_sds_columns.Rd +++ b/man/add_sds_columns.Rd @@ -7,7 +7,7 @@ add_sds_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index 744aa49de..4392157d2 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -13,7 +13,7 @@ add_standard_cols( ) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 73804ad9b..84c9c0ad3 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R +% Please edit documentation in R/aggregate_by_chi.R \name{aggregate_by_chi} \alias{aggregate_by_chi} \title{Aggregate by CHI} \usage{ -aggregate_by_chi(episode_file) +aggregate_by_chi(episode_file, exclude_sc_var = FALSE) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Aggregate episode file by CHI to convert into diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd deleted file mode 100644 index 3d4961e19..000000000 --- a/man/aggregate_by_chi_zihao.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_by_chi_zihao.R -\name{aggregate_by_chi_zihao} -\alias{aggregate_by_chi_zihao} -\title{Aggregate by CHI} -\usage{ -aggregate_by_chi_zihao(episode_file) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} -} -\description{ -Aggregate episode file by CHI to convert into -individual file. -} diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd index 2753da14f..3223e6d25 100644 --- a/man/aggregate_ch_episodes.Rd +++ b/man/aggregate_ch_episodes.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R +% Please edit documentation in R/aggregate_by_chi.R \name{aggregate_ch_episodes} \alias{aggregate_ch_episodes} -\title{Aggregate CIS episodes} +\title{Aggregate Care Home episodes to ch_cis} \usage{ aggregate_ch_episodes(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Aggregate CH variables by CHI and CIS. diff --git a/man/aggregate_ch_episodes_zihao.Rd b/man/aggregate_ch_episodes_zihao.Rd deleted file mode 100644 index 808262654..000000000 --- a/man/aggregate_ch_episodes_zihao.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_by_chi_zihao.R -\name{aggregate_ch_episodes_zihao} -\alias{aggregate_ch_episodes_zihao} -\title{Aggregate CIS episodes} -\usage{ -aggregate_ch_episodes_zihao(episode_file) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} -} -\description{ -Aggregate CH variables by CHI and CIS. -} diff --git a/man/assign_cohort_names.Rd b/man/assign_cohort_names.Rd index a0edb373d..e68ad7c42 100644 --- a/man/assign_cohort_names.Rd +++ b/man/assign_cohort_names.Rd @@ -10,7 +10,8 @@ assign_cohort_names(data) \item{data}{A data frame} } \value{ -A data frame with an additional variable containing the assigned cohort +A data frame with an additional variable containing the assigned +cohort } \description{ Assign service use cohort into string format diff --git a/man/assign_s_cohort_limited_daycases.Rd b/man/assign_s_cohort_limited_daycases.Rd index c63569e5c..69f49b4dc 100644 --- a/man/assign_s_cohort_limited_daycases.Rd +++ b/man/assign_s_cohort_limited_daycases.Rd @@ -15,8 +15,8 @@ assign_s_cohort_limited_daycases(elective_inpatient_flag, elective_instances) A boolean vector of limited daycases cohort flags } \description{ -If the record does not have an elective inpatient flag and they have -3 or fewer elective instances, return \code{TRUE} +If the record does not have an elective inpatient flag +and they have 3 or fewer elective instances, return \code{TRUE}. } \seealso{ Other Demographic and Service Use Cohort functions: diff --git a/man/assign_s_cohort_outpatient.Rd b/man/assign_s_cohort_outpatient.Rd index 264044b2c..5d811b6af 100644 --- a/man/assign_s_cohort_outpatient.Rd +++ b/man/assign_s_cohort_outpatient.Rd @@ -13,7 +13,8 @@ assign_s_cohort_outpatient(outpatient_cost) A boolean vector of outpatient cohort flags } \description{ -If the record has a outpatient cost greater than zero, assign \code{TRUE} +If the record has a outpatient cost greater than zero, +assign \code{TRUE}. } \seealso{ Other Demographic and Service Use Cohort functions: diff --git a/man/assign_s_cohort_prescribing.Rd b/man/assign_s_cohort_prescribing.Rd index 34ead6130..4b938f518 100644 --- a/man/assign_s_cohort_prescribing.Rd +++ b/man/assign_s_cohort_prescribing.Rd @@ -13,7 +13,8 @@ assign_s_cohort_prescribing(prescribing_cost) A boolean vector of prescribing cohort flags } \description{ -If the record has a prescribing cost greater than zero, assign \code{TRUE} +If the record has a prescribing cost greater than zero, +assign \code{TRUE}. } \seealso{ Other Demographic and Service Use Cohort functions: diff --git a/man/assign_s_cohort_routine_daycase.Rd b/man/assign_s_cohort_routine_daycase.Rd index af67448a9..03f5f51e0 100644 --- a/man/assign_s_cohort_routine_daycase.Rd +++ b/man/assign_s_cohort_routine_daycase.Rd @@ -15,8 +15,8 @@ assign_s_cohort_routine_daycase(elective_inpatient_flag, elective_instances) A boolean vector of routine daycase cohort flags } \description{ -If the record does not have an elective inpatient flag and they have -4 or more elective instances, return \code{TRUE} +If the record does not have an elective inpatient flag and +they have 4 or more elective instances, return \code{TRUE}. } \seealso{ Other Demographic and Service Use Cohort functions: diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd index 0182c84e8..c0c61966d 100644 --- a/man/clean_up_ch.Rd +++ b/man/clean_up_ch.Rd @@ -7,7 +7,7 @@ clean_up_ch(episode_file, year) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{year}{The year to process, in FY format.} } diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 4892ce7f4..c27e32af5 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -15,7 +15,8 @@ compute_mid_year_age(fyyear, dob) a vector of ages at the financial year midpoint } \description{ -Compute the age of a client at the midpoint of the year - 30-09-YYYY +Compute the age of a client at the midpoint of the year - +30-09-YYYY } \examples{ dob <- as.Date(c("01-01-1990", "31-10-1997"), format = "\%d-\%m-\%Y") diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd index 8c7a29088..78bf475ba 100644 --- a/man/convert_sending_location_to_lca.Rd +++ b/man/convert_sending_location_to_lca.Rd @@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the Local Council Authority Codes. } \examples{ -sending_location <- c("100", "120") +sending_location <- c(100, 120) convert_sending_location_to_lca(sending_location) } diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd index 97a7f046f..558514dc6 100644 --- a/man/correct_cij_vars.Rd +++ b/man/correct_cij_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{correct_cij_vars} \alias{correct_cij_vars} \title{Correct the CIJ variables} diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd index f0ad267aa..109869074 100644 --- a/man/create_cohort_lookups.Rd +++ b/man/create_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cohort_lookups} \alias{create_cohort_lookups} \title{Create the cohort lookups} diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd index 69e7e37b5..47c38b176 100644 --- a/man/create_cost_inc_dna.Rd +++ b/man/create_cost_inc_dna.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cost_inc_dna} \alias{create_cost_inc_dna} \title{Create cost total net inc DNA} diff --git a/man/create_demog_test_flags.Rd b/man/create_demog_test_flags.Rd index b555b1699..589877738 100644 --- a/man/create_demog_test_flags.Rd +++ b/man/create_demog_test_flags.Rd @@ -21,6 +21,7 @@ Other flag functions: \code{\link{create_hb_cost_test_flags}()}, \code{\link{create_hb_test_flags}()}, \code{\link{create_hscp_test_flags}()}, -\code{\link{create_lca_test_flags}()} +\code{\link{create_lca_test_flags}()}, +\code{\link{create_sending_location_test_flags}()} } \concept{flag functions} diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd new file mode 100644 index 000000000..5d85744e2 --- /dev/null +++ b/man/create_episode_file.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_episode_file.R +\name{create_episode_file} +\alias{create_episode_file} +\title{Produce the Source Episode file} +\usage{ +create_episode_file( + processed_data_list, + year, + dd_data = read_file(get_source_extract_path(year, "DD")), + homelessness_lookup = create_homelessness_lookup(year), + nsu_cohort = read_file(get_nsu_path(year)), + ltc_data = read_file(get_ltcs_path(year)), + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")), + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)), + write_to_disk = TRUE, + anon_chi_out = TRUE +) +} +\arguments{ +\item{processed_data_list}{containing data from processed extracts.} + +\item{year}{The year to process, in FY format.} + +\item{dd_data}{The processed DD extract} + +\item{nsu_cohort}{The NSU data for the year} + +\item{ltc_data}{The LTC data for the year} + +\item{slf_pc_lookup}{The SLF Postcode lookup} + +\item{slf_gpprac_lookup}{The SLF GP Practice lookup} + +\item{slf_deaths_lookup}{The SLF deaths lookup.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing the episode file +} +\description{ +Produce the Source Episode file +} diff --git a/man/create_hb_cost_test_flags.Rd b/man/create_hb_cost_test_flags.Rd index 6e2ec141f..1a0c48cf7 100644 --- a/man/create_hb_cost_test_flags.Rd +++ b/man/create_hb_cost_test_flags.Rd @@ -25,6 +25,7 @@ Other flag functions: \code{\link{create_demog_test_flags}()}, \code{\link{create_hb_test_flags}()}, \code{\link{create_hscp_test_flags}()}, -\code{\link{create_lca_test_flags}()} +\code{\link{create_lca_test_flags}()}, +\code{\link{create_sending_location_test_flags}()} } \concept{flag functions} diff --git a/man/create_hb_test_flags.Rd b/man/create_hb_test_flags.Rd index 81e1a38e7..66eb767c0 100644 --- a/man/create_hb_test_flags.Rd +++ b/man/create_hb_test_flags.Rd @@ -22,6 +22,7 @@ Other flag functions: \code{\link{create_demog_test_flags}()}, \code{\link{create_hb_cost_test_flags}()}, \code{\link{create_hscp_test_flags}()}, -\code{\link{create_lca_test_flags}()} +\code{\link{create_lca_test_flags}()}, +\code{\link{create_sending_location_test_flags}()} } \concept{flag functions} diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd new file mode 100644 index 000000000..4a0be24f9 --- /dev/null +++ b/man/create_homelessness_lookup.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_lookup_homelessness.R +\name{create_homelessness_lookup} +\alias{create_homelessness_lookup} +\title{Create a homelessness lookup} +\usage{ +create_homelessness_lookup( + year, + homelessness_data = read_file(get_source_extract_path(year, "Homelessness")) +) +} +\arguments{ +\item{year}{The year to process, in FY format.} + +\item{homelessness_data}{the processed homelessness data for +the financial year (created with \code{\link[=process_extract_homelessness]{process_extract_homelessness()}}).} +} +\value{ +the final data as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Reads in the homelessness extract and creates +a lookup at CHI level, with one row per application start +and end date for each CHI. +} +\seealso{ +Other process extracts: +\code{\link{process_extract_acute}()}, +\code{\link{process_extract_ae}()}, +\code{\link{process_extract_alarms_telecare}()}, +\code{\link{process_extract_care_home}()}, +\code{\link{process_extract_cmh}()}, +\code{\link{process_extract_delayed_discharges}()}, +\code{\link{process_extract_district_nursing}()}, +\code{\link{process_extract_gp_ooh}()}, +\code{\link{process_extract_home_care}()}, +\code{\link{process_extract_homelessness}()}, +\code{\link{process_extract_maternity}()}, +\code{\link{process_extract_mental_health}()}, +\code{\link{process_extract_nrs_deaths}()}, +\code{\link{process_extract_ooh_consultations}()}, +\code{\link{process_extract_ooh_diagnosis}()}, +\code{\link{process_extract_ooh_outcomes}()}, +\code{\link{process_extract_outpatients}()}, +\code{\link{process_extract_prescribing}()}, +\code{\link{process_extract_sds}()}, +\code{\link{process_it_chi_deaths}()}, +\code{\link{process_lookup_gpprac}()}, +\code{\link{process_lookup_postcode}()}, +\code{\link{process_lookup_sc_client}()}, +\code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_sc_all_alarms_telecare}()}, +\code{\link{process_sc_all_care_home}()}, +\code{\link{process_sc_all_home_care}()}, +\code{\link{process_sc_all_sds}()}, +\code{\link{read_extract_gp_ooh}()}, +\code{\link{read_it_chi_deaths}()}, +\code{\link{read_lookup_sc_client}()} +} +\concept{process extracts} diff --git a/man/create_hscp_test_flags.Rd b/man/create_hscp_test_flags.Rd index d9cc25a72..847eab6a5 100644 --- a/man/create_hscp_test_flags.Rd +++ b/man/create_hscp_test_flags.Rd @@ -12,7 +12,7 @@ create_hscp_test_flags(data, hscp_var) \item{hscp_var}{HSCP variable e.g. HSCP2019 HSCP2018} } \value{ -a dataframe with flag (1 or 0) for each HSCP +a dataframe with flag (TRUE or FALSE) for each HSCP } \description{ Create flags for Health & Social Care Partnerships @@ -22,6 +22,7 @@ Other flag functions: \code{\link{create_demog_test_flags}()}, \code{\link{create_hb_cost_test_flags}()}, \code{\link{create_hb_test_flags}()}, -\code{\link{create_lca_test_flags}()} +\code{\link{create_lca_test_flags}()}, +\code{\link{create_sending_location_test_flags}()} } \concept{flag functions} diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index fa759e7b1..4c87b0731 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -2,18 +2,19 @@ % Please edit documentation in R/create_individual_file.R \name{create_individual_file} \alias{create_individual_file} -\title{Create individual file} +\title{Create the Source Individual file} \usage{ create_individual_file( episode_file, year, + homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, anon_chi_out = TRUE ) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{year}{The year to process, in FY format.} @@ -21,7 +22,7 @@ create_individual_file( \code{TRUE} i.e. write the data to disk.} \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input -(instead of chi)} +(instead of chi).} \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output (instead of chi)} @@ -30,5 +31,5 @@ create_individual_file( The processed individual file } \description{ -Creates individual file from episode file +Creates the individual file from the episode file. } diff --git a/man/create_lca_test_flags.Rd b/man/create_lca_test_flags.Rd index 6a31477a4..136ffcb78 100644 --- a/man/create_lca_test_flags.Rd +++ b/man/create_lca_test_flags.Rd @@ -22,6 +22,7 @@ Other flag functions: \code{\link{create_demog_test_flags}()}, \code{\link{create_hb_cost_test_flags}()}, \code{\link{create_hb_test_flags}()}, -\code{\link{create_hscp_test_flags}()} +\code{\link{create_hscp_test_flags}()}, +\code{\link{create_sending_location_test_flags}()} } \concept{flag functions} diff --git a/man/create_sending_location_test_flags.Rd b/man/create_sending_location_test_flags.Rd new file mode 100644 index 000000000..5d1ad09f7 --- /dev/null +++ b/man/create_sending_location_test_flags.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_sending_location_test_flags.R +\name{create_sending_location_test_flags} +\alias{create_sending_location_test_flags} +\title{Create sending location test flags} +\usage{ +create_sending_location_test_flags(data, sending_location_var) +} +\arguments{ +\item{data}{the data containing the variable sending_location} + +\item{sending_location_var}{sending_location variable} +} +\value{ +a dataframe with flag (T or F) for each sending location +} +\description{ +Create flags for sending location +} +\seealso{ +Other flag functions: +\code{\link{create_demog_test_flags}()}, +\code{\link{create_hb_cost_test_flags}()}, +\code{\link{create_hb_test_flags}()}, +\code{\link{create_hscp_test_flags}()}, +\code{\link{create_lca_test_flags}()} +} +\concept{flag functions} diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd index 5308fd8d0..bb619405b 100644 --- a/man/fill_geographies.Rd +++ b/man/fill_geographies.Rd @@ -4,10 +4,19 @@ \alias{fill_geographies} \title{Fill postcode and GP practice geographies} \usage{ -fill_geographies(data) +fill_geographies( + data, + slf_pc_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")) +) } \arguments{ \item{data}{the SLF} + +\item{slf_pc_lookup}{The SLF Postcode lookup} + +\item{slf_gpprac_lookup}{The SLF GP Practice lookup} } \value{ a \link[tibble:tibble-package]{tibble} of the SLF with improved diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd index 03b64217e..4795eed7a 100644 --- a/man/fill_missing_cij_markers.Rd +++ b/man/fill_missing_cij_markers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{fill_missing_cij_markers} \alias{fill_missing_cij_markers} \title{Fill any missing CIJ markers for records that should have them} diff --git a/man/flag_non_scottish_residents.Rd b/man/flag_non_scottish_residents.Rd new file mode 100644 index 000000000..ec97dedf6 --- /dev/null +++ b/man/flag_non_scottish_residents.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_hri_variables.R +\name{flag_non_scottish_residents} +\alias{flag_non_scottish_residents} +\title{Flag non-Scottish residents} +\usage{ +flag_non_scottish_residents(data, slf_pc_lookup) +} +\arguments{ +\item{data}{An SLF individual file.} + +\item{slf_pc_lookup}{The Source postcode lookup, defaults +to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.} +} +\value{ +A data frame with the variable 'keep_flag' +} +\description{ +Flag non-Scottish residents +} +\details{ +The variable keep flag can be in the range c(0:4) where +\itemize{ +\item{keep_flag = 0 when resident is Scottish} +\item{keep_flag = 1 when resident is not Scottish} +\item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing} +\item{keep_flag = 3 when the gpprac is not English and the postcode is missing} +\item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy} +} +The intention is to only keep the records where keep_flag = 0 +} diff --git a/man/get_existing_data_for_tests.Rd b/man/get_existing_data_for_tests.Rd index 60eb6e459..101749418 100644 --- a/man/get_existing_data_for_tests.Rd +++ b/man/get_existing_data_for_tests.Rd @@ -4,7 +4,11 @@ \alias{get_existing_data_for_tests} \title{SLF Data for Testing} \usage{ -get_existing_data_for_tests(new_data, file_version = "episode") +get_existing_data_for_tests( + new_data, + file_version = "episode", + anon_chi = FALSE +) } \arguments{ \item{new_data}{a \link[tibble:tibble-package]{tibble} of the @@ -12,6 +16,9 @@ new data which the SLF data will be compared to.} \item{file_version}{whether to test against the "episode" file (the default) or the "individual" file.} + +\item{anon_chi}{Default set as FALSE. For use in episode tests where +we want anon_chi instead of chi.} } \value{ a \link[tibble:tibble-package]{tibble} from the diff --git a/man/get_sc_client_lookup_path.Rd b/man/get_sc_client_lookup_path.Rd new file mode 100644 index 000000000..481baf223 --- /dev/null +++ b/man/get_sc_client_lookup_path.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_sc_lookup_paths.R +\name{get_sc_client_lookup_path} +\alias{get_sc_client_lookup_path} +\title{Social Care Client Lookup File Path} +\usage{ +get_sc_client_lookup_path(year, update = latest_update(), ...) +} +\arguments{ +\item{year}{Financial year.} + +\item{update}{The update month to use, +defaults to \code{\link[=latest_update]{latest_update()}}} + +\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}} +} +\value{ +The path to the social care demographic file +as an \code{\link[fs:path]{fs::path()}} +} +\description{ +Get the file path for the Social Care Client lookup file +} +\seealso{ +\code{\link[=get_file_path]{get_file_path()}} for the generic function. + +Other social care lookup file paths: +\code{\link{get_sc_demog_lookup_path}()} +} +\concept{social care lookup file paths} diff --git a/man/get_sc_demog_lookup_path.Rd b/man/get_sc_demog_lookup_path.Rd index 8e45e9731..7b39cf287 100644 --- a/man/get_sc_demog_lookup_path.Rd +++ b/man/get_sc_demog_lookup_path.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_sc_demog_path.R +% Please edit documentation in R/get_sc_lookup_paths.R \name{get_sc_demog_lookup_path} \alias{get_sc_demog_lookup_path} \title{Social Care Demographic Lookup File Path} @@ -21,5 +21,8 @@ Get the file path for the Social Care Demographic lookup file } \seealso{ \code{\link[=get_file_path]{get_file_path()}} for the generic function. + +Other social care lookup file paths: +\code{\link{get_sc_client_lookup_path}()} } \concept{social care lookup file paths} diff --git a/man/get_slf_ep_temp_path.Rd b/man/get_slf_ep_temp_path.Rd deleted file mode 100644 index 44e1a44db..000000000 --- a/man/get_slf_ep_temp_path.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_temp_file_paths.R -\name{get_slf_ep_temp_path} -\alias{get_slf_ep_temp_path} -\title{Get a temporary version of the SLF episode file} -\usage{ -get_slf_ep_temp_path(year, temp_version) -} -\arguments{ -\item{year}{The financial year} - -\item{temp_version}{The temp version e.g. 1 or 7} -} -\value{ -The path to the file (\code{.rds}) -} -\description{ -Get a temporary version of the SLF episode file -} diff --git a/man/get_slf_indiv_temp_path.Rd b/man/get_slf_indiv_temp_path.Rd deleted file mode 100644 index 6ff1c70bd..000000000 --- a/man/get_slf_indiv_temp_path.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_temp_file_paths.R -\name{get_slf_indiv_temp_path} -\alias{get_slf_indiv_temp_path} -\title{Get a temporary version of the SLF individual file} -\usage{ -get_slf_indiv_temp_path(year, temp_version) -} -\arguments{ -\item{year}{The financial year} - -\item{temp_version}{The temp version e.g. 1 or 7} -} -\value{ -The path to the file (\code{.rds}) -} -\description{ -Get a temporary version of the SLF individual file -} diff --git a/man/get_slf_temp_path.Rd b/man/get_slf_temp_path.Rd deleted file mode 100644 index 31f4dde38..000000000 --- a/man/get_slf_temp_path.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_temp_file_paths.R -\name{get_slf_temp_path} -\alias{get_slf_temp_path} -\title{Get a temporary version of the SLF} -\usage{ -get_slf_temp_path( - year, - temp_version, - file_version = c("episode", "individual") -) -} -\arguments{ -\item{year}{The financial year} - -\item{temp_version}{The temp version e.g. 1 or 7} - -\item{file_version}{Episode or Individual file} -} -\value{ -The path to the file (\code{.rds}) -} -\description{ -Get a temporary version of the SLF -} diff --git a/man/get_source_extract_path.Rd b/man/get_source_extract_path.Rd index e51cbb2c7..fd9502b83 100644 --- a/man/get_source_extract_path.Rd +++ b/man/get_source_extract_path.Rd @@ -6,8 +6,8 @@ \usage{ get_source_extract_path( year, - type = c("Acute", "AE", "AT", "CH", "Client", "CMH", "DD", "Deaths", "DN", "GPOoH", - "HC", "Homelessness", "Maternity", "MH", "Outpatients", "PIS", "SDS"), + type = c("Acute", "AE", "AT", "CH", "CMH", "DD", "Deaths", "DN", "GPOoH", "HC", + "Homelessness", "Maternity", "MH", "Outpatients", "PIS", "SDS"), ... ) } diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 445dcd7c0..3ef549cc3 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -1,10 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{join_cohort_lookups} \alias{join_cohort_lookups} \title{Join cohort lookups} \usage{ -join_cohort_lookups(data, year, update = latest_update()) +join_cohort_lookups( + data, + year, + update = latest_update(), + demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select = + c("chi", "demographic_cohort")), + service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select = + c("chi", "service_use_cohort")) +) } \arguments{ \item{data}{The in-progress episode file data.} @@ -12,6 +20,8 @@ join_cohort_lookups(data, year, update = latest_update()) \item{year}{The year to process, in FY format.} \item{update}{The update to use} + +\item{demographic_cohort, service_use_cohort}{The cohort data} } \value{ The data including the Demographic and Service Use lookups. diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd index 6508d7893..f3b68fe1a 100644 --- a/man/join_deaths_data.Rd +++ b/man/join_deaths_data.Rd @@ -7,7 +7,7 @@ join_deaths_data( data, year, - slf_deaths_lookup_path = get_slf_deaths_lookup_path(year) + slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) ) } \arguments{ @@ -15,7 +15,7 @@ join_deaths_data( \item{year}{financial year, e.g. '1920'} -\item{slf_deaths_lookup_path}{Path to slf deaths lookup.} +\item{slf_deaths_lookup}{The SLF deaths lookup.} } \value{ The data including the deaths lookup matched diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd index a30719698..465126dba 100644 --- a/man/join_sc_client.Rd +++ b/man/join_sc_client.Rd @@ -7,7 +7,7 @@ join_sc_client( individual_file, year, - sc_client = read_file(get_source_extract_path(year, "Client")), + sc_client = read_file(get_sc_client_lookup_path(year)), sc_demographics = read_file(get_sc_demog_lookup_path(), col_select = c("sending_location", "social_care_id", "chi")) ) diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd index cee9cc440..509b0e00c 100644 --- a/man/load_ep_file_vars.Rd +++ b/man/load_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{load_ep_file_vars} \alias{load_ep_file_vars} \title{Load the unneeded episode file variables} diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd index 0c7e7fb53..e0def00cc 100644 --- a/man/match_on_ltcs.Rd +++ b/man/match_on_ltcs.Rd @@ -4,12 +4,14 @@ \alias{match_on_ltcs} \title{Match on LTC DoB and dates of LTC incidence} \usage{ -match_on_ltcs(data, year) +match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year))) } \arguments{ \item{data}{episode files} \item{year}{financial year, e.g. '1920'} + +\item{ltc_data}{The LTC data for the year} } \value{ data matched with long term conditions diff --git a/man/phs_db_connection.Rd b/man/phs_db_connection.Rd index 93e73ee55..8ff9d0a32 100644 --- a/man/phs_db_connection.Rd +++ b/man/phs_db_connection.Rd @@ -4,19 +4,23 @@ \alias{phs_db_connection} \title{Open a connection to a PHS database} \usage{ -phs_db_connection(dsn, username = Sys.getenv("USER")) +phs_db_connection(dsn, username) } \arguments{ -\item{dsn}{The Data Source Name passed on to \code{odbc::dbconnect} -the dsn must be setup first. e.g. SMRA or DVPROD} +\item{dsn}{The Data Source Name (DSN) passed on to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} +the DSN must be set up first. e.g. \code{SMRA} or \code{DVPROD}} \item{username}{The username to use for authentication, -if not supplied it first will check the environment variable -and finally ask the user for input.} +if not supplied it will try to find it automatically and if possible ask the +user for input.} } \value{ -a connection to the specified dsn +a connection to the specified Data Source. } \description{ -Opens a connection to PHS database to allow data to be collected +Opens a connection to PHS database given a Data Source Name +(DSN) it will try to get the username, asking for input if in an interactive +session. It will also use \link[keyring:keyring-package]{keyring} to find +an existing keyring called 'createslf' which should contain a \code{db_password} +key with the users database password. } diff --git a/man/process_costs_ch_rmd.Rd b/man/process_costs_ch_rmd.Rd index 520898c9e..b990564b7 100644 --- a/man/process_costs_ch_rmd.Rd +++ b/man/process_costs_ch_rmd.Rd @@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data. \description{ This will read and process the care homes cost lookup, it will return the final data -but also write this out as a rds. +and write it to disk. } diff --git a/man/process_costs_dn_rmd.Rd b/man/process_costs_dn_rmd.Rd index bde475d5a..46bcd93dd 100644 --- a/man/process_costs_dn_rmd.Rd +++ b/man/process_costs_dn_rmd.Rd @@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data. \description{ This will read and process the District Nursing cost lookup, it will return the final data -but also write this out as a rds. +and write it to disk. } diff --git a/man/process_costs_gp_ooh_rmd.Rd b/man/process_costs_gp_ooh_rmd.Rd index fd71066c0..f5c611f11 100644 --- a/man/process_costs_gp_ooh_rmd.Rd +++ b/man/process_costs_gp_ooh_rmd.Rd @@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data. \description{ This will read and process the GP ooh cost lookup, it will return the final data -but also write this out as a rds. +and write it to disk. } diff --git a/man/process_costs_hc_rmd.Rd b/man/process_costs_hc_rmd.Rd index b15c311da..c3448bcbc 100644 --- a/man/process_costs_hc_rmd.Rd +++ b/man/process_costs_hc_rmd.Rd @@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data. \description{ This will read and process the Home Care cost lookup, it will return the final data -but also write this out as a rds. +and write it to disk. } diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd index af6b85bfe..77a99cef3 100644 --- a/man/process_extract_acute.Rd +++ b/man/process_extract_acute.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the acute extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, \code{\link{process_extract_care_home}()}, diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd index 58878e689..9eec39ba5 100644 --- a/man/process_extract_ae.Rd +++ b/man/process_extract_ae.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the A&E extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_alarms_telecare}()}, \code{\link{process_extract_care_home}()}, diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd index a6e61365d..7305b7b49 100644 --- a/man/process_extract_alarms_telecare.Rd +++ b/man/process_extract_alarms_telecare.Rd @@ -29,10 +29,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the (year specific) Alarms Telecare extract, it will return the final data -but also write this out as rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_care_home}()}, diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd index f058ca787..7eed509d8 100644 --- a/man/process_extract_care_home.Rd +++ b/man/process_extract_care_home.Rd @@ -32,10 +32,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the (year specific) Care Home extract, it will return the final data -but also write this out as rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd index 147651f37..64e085dcf 100644 --- a/man/process_extract_cmh.Rd +++ b/man/process_extract_cmh.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the CMH extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd index ddc41ec46..c6fd560a7 100644 --- a/man/process_extract_delayed_discharges.Rd +++ b/man/process_extract_delayed_discharges.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the delayed discharges extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd index 4d9383c2e..eb2814fbc 100644 --- a/man/process_extract_district_nursing.Rd +++ b/man/process_extract_district_nursing.Rd @@ -27,10 +27,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the District Nursing extract, it will return the final data -but also write this out an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd index 8217f0d6f..ddec006fe 100644 --- a/man/process_extract_gp_ooh.Rd +++ b/man/process_extract_gp_ooh.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the GP OoH extract, it will return the final data -but also write this out an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd index 4fef5ac14..e4e02fdad 100644 --- a/man/process_extract_home_care.Rd +++ b/man/process_extract_home_care.Rd @@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the (year specific) Home Care extract, it will return the final data -but also write this out as rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd index 7531f8f22..7b2254050 100644 --- a/man/process_extract_homelessness.Rd +++ b/man/process_extract_homelessness.Rd @@ -31,10 +31,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the homelessness extract, it will return the final data -and optionally write it out as rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd index cd01e6931..17dd1a64c 100644 --- a/man/process_extract_maternity.Rd +++ b/man/process_extract_maternity.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the maternity extract, it will return the final data -but also write this out an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd index 7159aae8b..5f1fc7330 100644 --- a/man/process_extract_mental_health.Rd +++ b/man/process_extract_mental_health.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the mental health extract, it will return the final data -but also write this out an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd index 724af5d87..1938e15ec 100644 --- a/man/process_extract_nrs_deaths.Rd +++ b/man/process_extract_nrs_deaths.Rd @@ -23,6 +23,7 @@ final data and write this out. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd index d682197ca..e00155191 100644 --- a/man/process_extract_ooh_consultations.Rd +++ b/man/process_extract_ooh_consultations.Rd @@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the GP OOH Consultations extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd index 2a962989a..2dcbee647 100644 --- a/man/process_extract_ooh_diagnosis.Rd +++ b/man/process_extract_ooh_diagnosis.Rd @@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the GP OOH Diagnosis extract, it will return the final data -but also write this out an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd index 5b220e04a..31ec64439 100644 --- a/man/process_extract_ooh_outcomes.Rd +++ b/man/process_extract_ooh_outcomes.Rd @@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the GP OOH Outcomes extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd index c5e10abc8..3a46ad119 100644 --- a/man/process_extract_outpatients.Rd +++ b/man/process_extract_outpatients.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the outpatients extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd index cf294d95c..195a60bfe 100644 --- a/man/process_extract_prescribing.Rd +++ b/man/process_extract_prescribing.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the prescribing extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd index 7e8e44a38..70742bd2e 100644 --- a/man/process_extract_sds.Rd +++ b/man/process_extract_sds.Rd @@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the (year specific) SDS extract, it will return the final data -but also write this out as rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_it_chi_deaths.Rd b/man/process_it_chi_deaths.Rd index f19d8b6cc..1d8e085ab 100644 --- a/man/process_it_chi_deaths.Rd +++ b/man/process_it_chi_deaths.Rd @@ -21,6 +21,7 @@ final data and write the data out. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_lookup_gpprac.Rd b/man/process_lookup_gpprac.Rd index 4f19f85e1..107af24c0 100644 --- a/man/process_lookup_gpprac.Rd +++ b/man/process_lookup_gpprac.Rd @@ -30,6 +30,7 @@ the final data and also write this out to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd index 6ad56e5b3..e556efd51 100644 --- a/man/process_lookup_postcode.Rd +++ b/man/process_lookup_postcode.Rd @@ -27,10 +27,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the postcode lookup, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd index e48426419..ceb3caf15 100644 --- a/man/process_lookup_sc_client.Rd +++ b/man/process_lookup_sc_client.Rd @@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the social care client lookup, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd index 6c00b4352..a89933425 100644 --- a/man/process_lookup_sc_demographics.Rd +++ b/man/process_lookup_sc_demographics.Rd @@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the social care demographic lookup, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd index 7e21407f9..1dded751d 100644 --- a/man/process_sc_all_alarms_telecare.Rd +++ b/man/process_sc_all_alarms_telecare.Rd @@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the all Alarms Telecare extract, it will return the final data -but also write this out as a rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd index 691fe51db..37d6332ca 100644 --- a/man/process_sc_all_care_home.Rd +++ b/man/process_sc_all_care_home.Rd @@ -36,10 +36,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the all Care Home extract, it will return the final data -but also write this out as a rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd index d498514db..1e0afcafd 100644 --- a/man/process_sc_all_home_care.Rd +++ b/man/process_sc_all_home_care.Rd @@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the all home care extract, it will return the final data -but also write this out as a rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd index f2d6d8a1c..69d79fc9d 100644 --- a/man/process_sc_all_sds.Rd +++ b/man/process_sc_all_sds.Rd @@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the all SDS extract, it will return the final data -but also write this out as a rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/process_tests_sc_client_lookup.Rd b/man/process_tests_sc_client_lookup.Rd new file mode 100644 index 000000000..7d115e2d3 --- /dev/null +++ b/man/process_tests_sc_client_lookup.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_sc_client_lookup.R +\name{process_tests_sc_client_lookup} +\alias{process_tests_sc_client_lookup} +\title{Social care client lookup tests} +\usage{ +process_tests_sc_client_lookup(data, year) +} +\arguments{ +\item{data}{a \link[tibble:tibble-package]{tibble} of the processed data extract.} + +\item{year}{the financial year of the extract in the format '1718'.} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing a test comparison. +} +\description{ +This script takes the processed social care client lookup and +produces a test comparison with the previous data. This is written to +disk in the tests workbook. +} diff --git a/man/produce_sc_ch_episodes_tests.Rd b/man/produce_sc_ch_episodes_tests.Rd index f07dbe382..60fd9c9a9 100644 --- a/man/produce_sc_ch_episodes_tests.Rd +++ b/man/produce_sc_ch_episodes_tests.Rd @@ -20,6 +20,7 @@ Produce the test for the Care Home all episodes Other social care test functions: \code{\link{produce_sc_demog_lookup_tests}()}, \code{\link{produce_source_at_tests}()}, -\code{\link{produce_source_sds_tests}()} +\code{\link{produce_source_sds_tests}()}, +\code{\link{produce_tests_sc_client_lookup}()} } \concept{social care test functions} diff --git a/man/produce_sc_demog_lookup_tests.Rd b/man/produce_sc_demog_lookup_tests.Rd index 36d139955..a214f1ece 100644 --- a/man/produce_sc_demog_lookup_tests.Rd +++ b/man/produce_sc_demog_lookup_tests.Rd @@ -20,6 +20,7 @@ Produce the tests for Social Care Demographic Lookup Other social care test functions: \code{\link{produce_sc_ch_episodes_tests}()}, \code{\link{produce_source_at_tests}()}, -\code{\link{produce_source_sds_tests}()} +\code{\link{produce_source_sds_tests}()}, +\code{\link{produce_tests_sc_client_lookup}()} } \concept{social care test functions} diff --git a/man/produce_source_at_tests.Rd b/man/produce_source_at_tests.Rd index 06efaa426..96033fe0d 100644 --- a/man/produce_source_at_tests.Rd +++ b/man/produce_source_at_tests.Rd @@ -25,6 +25,7 @@ Produce the test for the Alarm Telecare all episodes Other social care test functions: \code{\link{produce_sc_ch_episodes_tests}()}, \code{\link{produce_sc_demog_lookup_tests}()}, -\code{\link{produce_source_sds_tests}()} +\code{\link{produce_source_sds_tests}()}, +\code{\link{produce_tests_sc_client_lookup}()} } \concept{social care test functions} diff --git a/man/produce_source_dn_tests.Rd b/man/produce_source_dn_tests.Rd index 779dbb3bc..52ebbd611 100644 --- a/man/produce_source_dn_tests.Rd +++ b/man/produce_source_dn_tests.Rd @@ -35,9 +35,8 @@ It will also produce various summary statistics for bedday, cost and episode date variables. } \seealso{ -\code{\link[=create_hb_test_flags]{create_hb_test_flags()}} -#' \code{\link[=create_hscp_test_flags]{create_hscp_test_flags()}} and \code{\link[=create_hb_cost_test_flags]{create_hb_cost_test_flags()}} -for creating test flags +\code{\link[=create_hb_test_flags]{create_hb_test_flags()}}, \code{\link[=create_hscp_test_flags]{create_hscp_test_flags()}} +and \code{\link[=create_hb_cost_test_flags]{create_hb_cost_test_flags()}} for creating test flags. calculate_measures diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd index 679132127..97984103a 100644 --- a/man/produce_source_extract_tests.Rd +++ b/man/produce_source_extract_tests.Rd @@ -7,7 +7,8 @@ produce_source_extract_tests( data, sum_mean_vars = c("beddays", "cost", "yearstay"), - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"), + add_hscp_count = TRUE ) } \arguments{ @@ -17,6 +18,8 @@ produce_source_extract_tests( \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}} \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}} + +\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.} } \value{ a dataframe with a count of each flag diff --git a/man/produce_source_sds_tests.Rd b/man/produce_source_sds_tests.Rd index 6c0cefa46..b4cbc8d41 100644 --- a/man/produce_source_sds_tests.Rd +++ b/man/produce_source_sds_tests.Rd @@ -26,6 +26,7 @@ Produce the test for the SDS all episodes Other social care test functions: \code{\link{produce_sc_ch_episodes_tests}()}, \code{\link{produce_sc_demog_lookup_tests}()}, -\code{\link{produce_source_at_tests}()} +\code{\link{produce_source_at_tests}()}, +\code{\link{produce_tests_sc_client_lookup}()} } \concept{social care test functions} diff --git a/man/produce_tests_sc_client_lookup.Rd b/man/produce_tests_sc_client_lookup.Rd new file mode 100644 index 000000000..08c5edbad --- /dev/null +++ b/man/produce_tests_sc_client_lookup.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_sc_client_lookup.R +\name{produce_tests_sc_client_lookup} +\alias{produce_tests_sc_client_lookup} +\title{Social care Client lookup Tests} +\usage{ +produce_tests_sc_client_lookup(data) +} +\arguments{ +\item{data}{new or old data for testing summary flags +(data is from \code{\link[=get_source_extract_path]{get_source_extract_path()}})} + +\item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}} +} +\value{ +a dataframe with a count of each flag. +} +\description{ +Produce the test for the social care Client all episodes +} +\seealso{ +Other social care test functions: +\code{\link{produce_sc_ch_episodes_tests}()}, +\code{\link{produce_sc_demog_lookup_tests}()}, +\code{\link{produce_source_at_tests}()}, +\code{\link{produce_source_sds_tests}()} +} +\concept{social care test functions} diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd index eae6c52dc..233844074 100644 --- a/man/read_extract_gp_ooh.Rd +++ b/man/read_extract_gp_ooh.Rd @@ -26,10 +26,11 @@ the final data as a \link[tibble:tibble-package]{tibble}. \description{ This will read and process the GP OoH extract, it will return the final data -but also write this out as an rds. +and (optionally) write it to disk. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/read_file.Rd b/man/read_file.Rd index b8231218f..1ef351342 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...) \link[tidyselect:eval_select]{tidy selection specification} of columns, as used in \code{dplyr::select()}.} -\item{as_data_frame}{Should the function return a \code{data.frame} (default) or +\item{as_data_frame}{Should the function return a \code{tibble} (default) or an Arrow \link[arrow]{Table}?} \item{...}{Addition arguments passed to the relevant function.} diff --git a/man/read_it_chi_deaths.Rd b/man/read_it_chi_deaths.Rd index e2b9e4c40..d1bfe5cf7 100644 --- a/man/read_it_chi_deaths.Rd +++ b/man/read_it_chi_deaths.Rd @@ -17,6 +17,7 @@ This will read the CHI deaths extract and return the data. } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/read_lookup_sc_client.Rd b/man/read_lookup_sc_client.Rd index 6579fa9f7..283bc6a9a 100644 --- a/man/read_lookup_sc_client.Rd +++ b/man/read_lookup_sc_client.Rd @@ -5,14 +5,14 @@ \title{Process the social care client lookup} \usage{ read_lookup_sc_client( - sc_dvprod_connection = phs_db_connection(dsn = "DVPROD"), - fyyear + fyyear, + sc_dvprod_connection = phs_db_connection(dsn = "DVPROD") ) } \arguments{ -\item{sc_dvprod_connection}{The connection to the SC platform.} - \item{fyyear}{The year to process, in the standard format '1718'} + +\item{sc_dvprod_connection}{The connection to the SC platform.} } \value{ the final data as a \link[tibble:tibble-package]{tibble}. @@ -23,6 +23,7 @@ social care client lookup } \seealso{ Other process extracts: +\code{\link{create_homelessness_lookup}()}, \code{\link{process_extract_acute}()}, \code{\link{process_extract_ae}()}, \code{\link{process_extract_alarms_telecare}()}, diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd index 526d2829d..aaa28e6eb 100644 --- a/man/recode_gender.Rd +++ b/man/recode_gender.Rd @@ -7,7 +7,7 @@ recode_gender(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Recode gender to 1.5 if 0 or 9. diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd index 9cba40a8f..b290dd1e7 100644 --- a/man/remove_blank_chi.Rd +++ b/man/remove_blank_chi.Rd @@ -7,7 +7,7 @@ remove_blank_chi(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Convert blank strings to NA and remove NAs from CHI column diff --git a/man/run_episode_file.Rd b/man/run_episode_file.Rd deleted file mode 100644 index 59d5fea1d..000000000 --- a/man/run_episode_file.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R -\name{run_episode_file} -\alias{run_episode_file} -\title{Produce the Source Episode file} -\usage{ -run_episode_file( - processed_data_list, - year, - write_to_disk = TRUE, - anon_chi_out = TRUE -) -} -\arguments{ -\item{processed_data_list}{containing data from processed extracts.} - -\item{year}{The year to process, in FY format.} - -\item{write_to_disk}{(optional) Should the data be written to disk default is -\code{TRUE} i.e. write the data to disk.} - -\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output -(instead of chi)} -} -\value{ -a \link[tibble:tibble-package]{tibble} containing the episode file -} -\description{ -Produce the Source Episode file -} diff --git a/man/select.Rd b/man/select.Rd deleted file mode 100644 index 435096d9a..000000000 --- a/man/select.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_by_chi_zihao.R -\name{vars_end_with} -\alias{vars_end_with} -\alias{vars_start_with} -\alias{vars_contain} -\title{select columns ending with some patterns} -\usage{ -vars_end_with(data, vars, ignore_case = FALSE) - -vars_start_with(data, vars, ignore_case = FALSE) - -vars_contain(data, vars, ignore_case = FALSE) -} -\description{ -select columns ending with some patterns - -select columns starting with some patterns - -select columns contains some characters -} -\section{Functions}{ -\itemize{ -\item \code{vars_end_with()}: columns based on patterns - -\item \code{vars_start_with()}: columns based on patterns - -\item \code{vars_contain()}: columns based on patterns - -}} diff --git a/man/setup_keyring.Rd b/man/setup_keyring.Rd new file mode 100644 index 000000000..c40ef31c1 --- /dev/null +++ b/man/setup_keyring.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_connection_PHS_database.R +\name{setup_keyring} +\alias{setup_keyring} +\title{Interactively set up the keyring} +\usage{ +setup_keyring( + keyring = "createslf", + key = "db_password", + keyring_exists = FALSE, + key_exists = FALSE, + env_var_pass_exists = FALSE +) +} +\arguments{ +\item{keyring}{Name of the keyring} + +\item{key}{Name of the key} + +\item{keyring_exists}{Does the keyring already exist} + +\item{key_exists}{Does the key already exist} + +\item{env_var_pass_exists}{Does the password for the keyring already exist +in the environment.} +} +\value{ +NULL (invisibly) +} +\description{ +This is meant to be used with \code{\link[=phs_db_connection]{phs_db_connection()}}, it can only be used +interactively i.e. not in targets or in a workbench job. + +With the default options it will go through the steps to set up a keyring +which can be used to supply passwords to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} (or others) in a +secure and seamless way. +\enumerate{ +\item Create an .Renviron file in the project and add a password (for the +keyring) to it. +\item Create a keyring with the password - Since we have saved the password as +an environment variable it can be picked unlocked and used automatically. +\item Add the database password to the keyring. +} +} diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd index 06316aac1..880266d58 100644 --- a/man/store_ep_file_vars.Rd +++ b/man/store_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{store_ep_file_vars} \alias{store_ep_file_vars} \title{Store the unneeded episode file variables} diff --git a/man/vars_select.Rd b/man/vars_select.Rd new file mode 100644 index 000000000..22222ac22 --- /dev/null +++ b/man/vars_select.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi.R +\name{vars_end_with} +\alias{vars_end_with} +\alias{vars_start_with} +\alias{vars_contain} +\title{Select columns according to a pattern} +\usage{ +vars_end_with(data, vars, ignore_case = FALSE) + +vars_start_with(data, vars, ignore_case = FALSE) + +vars_contain(data, vars, ignore_case = FALSE) +} +\arguments{ +\item{data}{The data from which to select columns/variables.} + +\item{vars}{The variables / pattern to find, as a character vector} + +\item{ignore_case}{Should case be ignored (Default: FALSE)} +} +\description{ +Select columns according to a pattern +} +\section{Functions}{ +\itemize{ +\item \code{vars_end_with()}: Choose variables ending in a given pattern. + +\item \code{vars_start_with()}: Choose variables starting with a given pattern. + +\item \code{vars_contain()}: Choose variables which contain a given pattern. + +}} diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..ebc58895f --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1718")) +) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..83bbcedef --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1819")) +) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..1640d1900 --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1920")) +) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..80749e81a --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2021")) +) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..aa95d7b24 --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2122")) +) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..2ded7d5fd --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2223")) +) diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..b875984f4 --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2324")) +) diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md new file mode 100644 index 000000000..1fa02dc14 --- /dev/null +++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md @@ -0,0 +1,12 @@ +# Can convert a SC sending location to lca code + + Code + convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L, + 170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L, + 300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L, + 0L, NA_integer_)) + Output + [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" + [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" + [31] "31" "32" NA NA NA + diff --git a/tests/testthat/test-00-update_refs.R b/tests/testthat/test-00-update_refs.R index a1cabf9c5..4f3ef4ed1 100644 --- a/tests/testthat/test-00-update_refs.R +++ b/tests/testthat/test-00-update_refs.R @@ -13,7 +13,7 @@ test_that("Previous Update string looks valid", { }) test_that("Previous Update works for different month values", { - expect_equal(previous_update(0), latest_update()) + expect_equal(previous_update(months_ago = 0L), latest_update()) latest_update_month <- lubridate::month( lubridate::my(latest_update()), diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index ca0738c89..eda74dbdf 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", { expect_true(check_year_valid("1920", "NSU")) expect_true(check_year_valid("2021", "NSU")) expect_true(check_year_valid("2122", "NSU")) - expect_false(check_year_valid("2223", "NSU")) + expect_true(check_year_valid("2223", "NSU")) + expect_false(check_year_valid("2324", "NSU")) # SPARRA expect_false(check_year_valid("1415", "SPARRA")) diff --git a/tests/testthat/test-compute_mid_year_age.R b/tests/testthat/test-compute_mid_year_age.R new file mode 100644 index 000000000..a4a542b9e --- /dev/null +++ b/tests/testthat/test-compute_mid_year_age.R @@ -0,0 +1,16 @@ +test_that("Accurately compute mid year age", { + expect_equal( + compute_mid_year_age("1718", lubridate::make_date("2000")), + phsmethods::age_calculate( + lubridate::make_date("2000"), + lubridate::make_date("2017", 9L, 30L) + ) + ) + expect_equal( + compute_mid_year_age("2021", lubridate::make_date("1999") + 1:1000), + phsmethods::age_calculate( + lubridate::make_date("1999") + 1:1000, + lubridate::make_date("2020", 9L, 30L) + ) + ) +}) diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R new file mode 100644 index 000000000..eb66802a6 --- /dev/null +++ b/tests/testthat/test-convert_sending_location_to_lca.R @@ -0,0 +1,52 @@ +test_that("Can convert a SC sending location to lca code", { + expect_snapshot( + convert_sending_location_to_lca( + c( + 100L, + 110L, + 120L, + 130L, + 355L, + 150L, + 395L, + 170L, + 180L, + 190L, + 200L, + 210L, + 220L, + 230L, + 240L, + 250L, + 260L, + 270L, + 280L, + 290L, + 300L, + 310L, + 320L, + 330L, + 340L, + 350L, + 360L, + 370L, + 380L, + 390L, + 400L, + 235L, + 999L, + 0L, + NA_integer_ + ) + ) + ) +}) + +test_that("Errors on unexpected input", { + expect_error( + convert_sending_location_to_lca("100") + ) + expect_error( + convert_sending_location_to_lca(c("100", 99L)) + ) +}) diff --git a/tests/testthat/test-create_service_use_lookup.R b/tests/testthat/test-create_service_use_lookup.R index a58741e63..bb6abf6f9 100644 --- a/tests/testthat/test-create_service_use_lookup.R +++ b/tests/testthat/test-create_service_use_lookup.R @@ -165,7 +165,29 @@ test_that("Costs are assigned correctly", { # Operation flag expect_equal( add_operation_flag(dummy_data[["op1a"]]), - c(F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T) + c( + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE + ) ) dummy_data <- tibble::tribble( @@ -181,7 +203,7 @@ test_that("Costs are assigned correctly", { dummy_data[["acute_elective_cost"]], dummy_data[["elective_inpatient_cost"]] ), - c(T, T, T, F) + c(TRUE, TRUE, TRUE, FALSE) ) }) @@ -241,29 +263,42 @@ dummy_data <- tibble::tribble( ~psychiatry_cost, ~maternity_cost, ~geriatric_cost, ~elective_inpatient_flag, ~elective_instances, ~emergency_instances, ~prescribing_cost, ~outpatient_cost, ~care_home_cost, ~community_health_cost, ~ae2_cost, - 10, 0, 0, F, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 0, F, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 10, F, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, T, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, F, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 0, F, 15, 0, 0, 0, 0, 0, 0, - 0, 0, 0, F, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, F, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 0, F, 0, 0, 10, 0, 0, 0, 0, - 0, 0, 0, F, 0, 0, 0, 10, 0, 0, 0, - 0, 0, 0, F, 0, 0, 0, 0, 10, 0, 0, - 0, 0, 0, F, 0, 0, 0, 0, 0, 10, 0, - 0, 0, 0, F, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, F, 3.5, 0, 0, 0, 0, 0, 0, - 10, 10, 10, T, 10, 10, 10, 10, 10, 10, 10 + 10, 0, 0, FALSE, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 0, FALSE, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 10, FALSE, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, TRUE, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, FALSE, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, FALSE, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 0, FALSE, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, FALSE, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, FALSE, 0, 0, 10, 0, 0, 0, 0, + 0, 0, 0, FALSE, 0, 0, 0, 10, 0, 0, 0, + 0, 0, 0, FALSE, 0, 0, 0, 0, 10, 0, 0, + 0, 0, 0, FALSE, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, FALSE, 0, 0, 0, 0, 0, 0, 10, + 0, 0, 0, FALSE, 3.5, 0, 0, 0, 0, 0, 0, + 10, 10, 10, TRUE, 10, 10, 10, 10, 10, 10, 10 ) test_that("Psychiatry cohort is assigned correctly", { expect_equal( assign_s_cohort_psychiatry(dummy_data[["psychiatry_cost"]]), c( - TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, - FALSE, FALSE, FALSE, FALSE, FALSE, TRUE + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE ) ) }) @@ -388,13 +423,13 @@ test_that("Recalculated costs are calculated correctly", { ~elective_inpatient_cohort, ~limited_daycases_cohort, ~routine_daycase_cohort, ~single_emergency_cohort, ~multiple_emergency_cohort, ~community_care_cohort, ~acute_elective_cost, ~acute_emergency_cost, ~community_health_cost, ~cost_total_net, - T, F, F, F, F, F, 10, 0, 0, 10, - F, T, F, F, F, F, 10, 0, 0, 10, - F, F, T, F, F, F, 10, 0, 0, 10, - F, F, F, T, F, F, 0, 10, 0, 10, - F, F, F, F, T, F, 0, 10, 0, 10, - F, F, F, F, F, T, 0, 0, 10, 10, - T, T, T, T, T, T, 10, 20, 30, 10 + TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 10, 0, 0, 10, + FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 10, 0, 0, 10, + FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 10, 0, 0, 10, + FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 0, 10, 0, 10, + FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 0, 10, 0, 10, + FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 0, 0, 10, 10, + TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 10, 20, 30, 10 ) # Elective @@ -448,7 +483,7 @@ test_that("Recalculated costs are calculated correctly", { # Residential care (not used) expect_equal( calculate_residential_care_cost(), - c(0) + 0.0 ) }) diff --git a/tests/testthat/test-flag_non_scottish_residents.R b/tests/testthat/test-flag_non_scottish_residents.R new file mode 100644 index 000000000..b61d9e159 --- /dev/null +++ b/tests/testthat/test-flag_non_scottish_residents.R @@ -0,0 +1,26 @@ +test_that("Records are flagged correctly", { + test_frame <- tibble::tribble( + ~postcode, ~gpprac, + # Scottish resident + "AB1 1AA", 18574, + # Dummy postcode and missing gpprac + "BF010AA", NA, + # Dummy postcode and missing gpprac (2) + "ZZ014AA", NA, + # Missing postcode and missing gpprac + NA, NA, + # Not English practice and missing postcode + NA, 18574, + # Not English practice and dummy postcode + "NF1 1AB", 18574, + # English postcode and English gpprac + "BS4 4RG", 99942 + ) + + test_frame_flagged <- flag_non_scottish_residents(test_frame) + + expect_equal( + test_frame_flagged$keep_flag, + c(0, 2, 2, 2, 3, 4, 1) + ) +}) diff --git a/tests/testthat/test-get_file_paths.R b/tests/testthat/test-get_file_paths.R index 2bec746f7..a3b29a290 100644 --- a/tests/testthat/test-get_file_paths.R +++ b/tests/testthat/test-get_file_paths.R @@ -1,3 +1,28 @@ +test_that("Errors properly", { + expect_error( + get_file_path(directory = "foo", file_name = "bar"), + "The directory .+? does not exist\\." + ) + + expect_error( + get_file_path( + directory = ".", + file_name_regexp = "targets", + check_mode = "write" + ), + "`check_mode = \"write\"` can't be used" + ) +}) + +test_that("Can do check exists", { + expect_false(get_file_path( + directory = ".", + file_name = "foo.R", + check_mode = "exists" + )) +}) + + skip_on_ci() slf_updates_dir <- fs::path( diff --git a/tests/testthat/test-get_it_extract_paths.R b/tests/testthat/test-get_it_extract_paths.R index baaad52a5..52f9e4181 100644 --- a/tests/testthat/test-get_it_extract_paths.R +++ b/tests/testthat/test-get_it_extract_paths.R @@ -1,3 +1,25 @@ +test_that("IT reference cleanup works", { + expect_equal(check_it_reference("SCTASK0439133"), "0439133") + expect_equal(check_it_reference("0439133"), "0439133") + + expect_error( + check_it_reference("123456789"), + "`it_reference` must be exactly 7 numbers\\." + ) + expect_error( + check_it_reference("1234567890"), + "`it_reference` must be exactly 7 numbers\\." + ) + expect_error( + check_it_reference("SCTASK123456789"), + "`it_reference` must be exactly 7 numbers\\." + ) + expect_error( + check_it_reference("ABCDEF123456789"), + "`it_reference` must be exactly 7 numbers\\." + ) +}) + skip_on_ci() test_that("IT extract file paths work", {