diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 50162e23f..3236edd84 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -28,6 +28,7 @@ cmh CNWs commhosp congen +costincdnas costmonthnum costsfy covr @@ -45,6 +46,7 @@ dbconnect dbplyr deathdiag demog +dfc disch dischloc dischto @@ -70,6 +72,7 @@ fyyear geogs ggplot GLS +gls gms GPOo gpprac @@ -86,24 +89,32 @@ hhg hjust hms homecare +homev hscp hscpnames +IDPC infyyear ipdc itle iwalk jaccard jan +jennifer jul keydate keyring keytime keytimex +kis +lgl +kis los ltc ltcs lubridate magrittr +Mcbride +mcmahon MMMYY monthflag mpat @@ -111,6 +122,7 @@ multiday multisession multistaff NAs +newcons nhs nhshosp NRS @@ -142,7 +154,9 @@ purrr quickstart Rbuildignore rcmdcheck +rdd rds +reabl reablement readcode readr @@ -159,8 +173,12 @@ rspm RStudio rstudioapi Rtype +SDcols seealso selfharm +setkeyv +setnafill +setnames Siar sigfac simd @@ -200,5 +218,6 @@ xintercept xlsx yearstay YYYYQX +zihao zsav zstd diff --git a/DESCRIPTION b/DESCRIPTION index 31b205b36..a437b80cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,8 @@ Imports: dtplyr (>= 1.3.0), fs (>= 1.6.1), fst (>= 0.9.8), + future (>= 1.33.0), + future.callr (>= 0.8.1), glue (>= 1.6.2), haven (>= 2.5.2), hms (>= 1.1.0), @@ -53,7 +55,8 @@ Imports: stringr (>= 1.5.0), tibble (>= 3.2.1), tidyr (>= 1.3.0), - tidyselect (>= 1.2.0) + tidyselect (>= 1.2.0), + zoo (>= 1.8.0) Suggests: covr (>= 3.6.1), roxygen2 (>= 7.2.3), diff --git a/NAMESPACE b/NAMESPACE index 642146578..464cced34 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_individual_file) export(create_service_use_cohorts) export(end_fy) export(end_fy_quarter) @@ -114,6 +115,7 @@ export(process_tests_ae) export(process_tests_alarms_telecare) export(process_tests_care_home) export(process_tests_cmh) +export(process_tests_delayed_discharges) export(process_tests_district_nursing) export(process_tests_episode_file) export(process_tests_gp_ooh) @@ -160,6 +162,8 @@ export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +importFrom(data.table,.N) +importFrom(data.table,.SD) importFrom(magrittr,"%>%") importFrom(readr,col_character) importFrom(readr,col_date) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R new file mode 100644 index 000000000..7d9ce5ed3 --- /dev/null +++ b/R/aggregate_by_chi_zihao.R @@ -0,0 +1,214 @@ +#' Aggregate by CHI +#' +#' @description Aggregate episode file by CHI to convert into +#' individual file. +#' +#' @importFrom data.table .N +#' @importFrom data.table .SD +#' +#' @inheritParams create_individual_file +aggregate_by_chi_zihao <- function(episode_file) { + cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + + # Convert to data.table + data.table::setDT(episode_file) + + # Ensure all variable names are lowercase + data.table::setnames(episode_file, stringr::str_to_lower) + + # Sort the data + data.table::setkeyv( + episode_file, + c( + "chi", + "record_keydate1", + "keytime1", + "record_keydate2", + "keytime2" + ) + ) + + data.table::setnames( + episode_file, + c( + "ch_chi_cis", "cij_marker", "ooh_case_id" + # ,"hh_in_fy" + ), + c( + "ch_cis_episodes", "cij_total", "ooh_cases" + # ,"hl1_in_fy" + ) + ) + + # column specification, grouped by chi + # columns to select last + cols2 <- c( + "postcode", + "dob", + "gpprac", + vars_start_with(episode_file, "sc_") + ) + # columns to count unique rows + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "cij_el", + "cij_non_el", + "cij_mat", + "cij_delay", + "ooh_cases", + "preventable_admissions" + ) + # columns to sum up + cols4 <- c( + vars_end_with( + episode_file, + c( + "episodes", + "beddays", + "cost", + "attendances", + "attend", + "contacts", + "hours", + "alarms", + "telecare", + "paid_items", + "advice", + "homev", + "time", + "assessment", + "other", + "dn", + "nhs24", + "pcc" + ) + ), + vars_start_with( + episode_file, + "sds_option" + ), + "health_net_cost_inc_dnas" + ) + cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] + # columns to select maximum + cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy"))) + data.table::setnafill(episode_file, fill = 0L, cols = cols5) + # compute + individual_file_cols1 <- episode_file[, + .(gender = mean(gender)), + by = "chi" + ] + individual_file_cols2 <- episode_file[, + .SD[.N], + .SDcols = cols2, + by = "chi" + ] + individual_file_cols3 <- episode_file[, + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = "chi" + ] + individual_file_cols4 <- episode_file[, + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = "chi" + ] + individual_file_cols5 <- episode_file[, + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = "chi" + ] + individual_file_cols6 <- episode_file[, + .( + preventable_beddays = ifelse( + max(cij_ppa, na.rm = TRUE), + max(cij_end_date) - min(cij_start_date), + NA_real_ + ) + ), + # cij_marker has been renamed as cij_total + by = c("chi", "cij_total") + ] + individual_file_cols6 <- individual_file_cols6[, + .( + preventable_beddays = sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" + ] + + individual_file <- dplyr::bind_cols( + individual_file_cols1, + individual_file_cols2[, chi := NULL], + individual_file_cols3[, chi := NULL], + individual_file_cols4[, chi := NULL], + individual_file_cols5[, chi := NULL], + individual_file_cols6[, chi := NULL] + ) + + # convert back to tibble + return(dplyr::as_tibble(individual_file)) +} + + +#' select columns ending with some patterns +#' @describeIn select columns based on patterns +vars_end_with <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_ends( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] +} + +#' select columns starting with some patterns +#' @describeIn select columns based on patterns +vars_start_with <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_starts( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] +} + +#' select columns contains some characters +#' @describeIn select columns based on patterns +vars_contain <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_detect( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] +} + +#' Aggregate CIS episodes +#' +#' @description Aggregate CH variables by CHI and CIS. +#' +#' @inheritParams create_individual_file +aggregate_ch_episodes_zihao <- function(episode_file) { + cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") + + # Convert to data.table + data.table::setDT(episode_file) + + # Perform grouping and aggregation + episode_file <- episode_file[, `:=`( + ch_no_cost = max(ch_no_cost), + ch_ep_start = min(record_keydate1), + ch_ep_end = max(ch_ep_end), + ch_cost_per_day = mean(ch_cost_per_day) + ), by = c("chi", "ch_chi_cis")] + + # Convert back to tibble if needed + episode_file <- tibble::as_tibble(episode_file) + + return(episode_file) +} diff --git a/R/create_individual_file.R b/R/create_individual_file.R new file mode 100644 index 000000000..e2cf996a1 --- /dev/null +++ b/R/create_individual_file.R @@ -0,0 +1,867 @@ +#' Create individual file +#' +#' @description Creates individual file from episode file +#' +#' @param episode_file Tibble containing episodic data +#' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input +#' (instead of chi) +#' @inheritParams run_episode_file +#' +#' @return The processed individual file +#' @export +create_individual_file <- function( + episode_file, + year, + write_to_disk = TRUE, + anon_chi_in = TRUE, + anon_chi_out = TRUE) { + if (anon_chi_in) { + episode_file <- slfhelper::get_chi( + episode_file, + anon_chi_var = "anon_chi", + drop = TRUE + ) %>% + dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) + } + + individual_file <- episode_file %>% + dplyr::select(dplyr::any_of(c( + "year", + "chi", + "dob", + "gender", + "record_keydate1", + "record_keydate2", + "keytime1", + "keytime2", + "recid", + "smrtype", + "ipdc", + "postcode", + "gpprac", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype", + "cij_pattype_code", + "cij_ppa", + "ch_chi_cis", + "yearstay", + "cost_total_net", + "cost_total_net_inc_dnas", + "attendance_status", + "no_paid_items", + "total_no_dn_contacts", + "primary_delay_reason", + "sc_latest_submission", + "hc_hours_annual", + "hc_reablement", + "ooh_case_id" + ))) %>% + remove_blank_chi() %>% + add_cij_columns() %>% + add_all_columns() %>% + aggregate_ch_episodes_zihao() %>% + clean_up_ch(year) %>% + recode_gender() %>% + aggregate_by_chi_zihao() %>% + clean_individual_file(year) %>% + join_cohort_lookups(year) %>% + match_on_ltcs(year) %>% + join_deaths_data(year) %>% + join_sparra_hhg(year) %>% + join_slf_lookup_vars() %>% + join_sc_client(year) %>% + dplyr::mutate(year = year) + + if (anon_chi_out) { + individual_file <- individual_file %>% + tidyr::replace_na(list(chi = "")) %>% + slfhelper::get_anon_chi() %>% + dplyr::mutate(anon_chi = dplyr::na_if(.data$anon_chi, "")) + } + + if (write_to_disk) { + slf_indiv_path <- get_file_path( + get_year_dir(year), + stringr::str_glue( + "source-individual-file-{year}.parquet" + ), + check_mode = "write" + ) + + write_file(individual_file, slf_indiv_path) + } + + return(individual_file) +} + +#' Remove blank CHI +#' +#' @description Convert blank strings to NA and remove NAs from CHI column +#' +#' @inheritParams create_individual_file +remove_blank_chi <- function(episode_file) { + cli::cli_alert_info("Remove blank CHI function started at {Sys.time()}") + + episode_file %>% + dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>% + dplyr::filter(!is.na(.data$chi)) +} + + +#' Add CIJ-related columns +#' +#' @description Add new columns related to CIJ +#' +#' @inheritParams create_individual_file +add_cij_columns <- function(episode_file) { + cli::cli_alert_info("Add cij columns function started at {Sys.time()}") + + episode_file %>% + dplyr::mutate( + cij_non_el = dplyr::if_else( + .data$cij_pattype_code == 0, + .data$cij_marker, + NA_real_ + ), + cij_el = dplyr::if_else( + .data$cij_pattype_code == 1, + .data$cij_marker, + NA_real_ + ), + cij_mat = dplyr::if_else( + .data$cij_pattype_code == 2, + .data$cij_marker, + NA_real_ + ), + cij_delay = dplyr::if_else( + .data$recid == "DD", + .data$cij_marker, + NA_real_ + ), + preventable_admissions = dplyr::if_else( + .data$cij_ppa == 1, + .data$cij_marker, + NA_integer_ + ) + ) +} + +#' Add all columns +#' +#' @description Add new columns based on SMRType and recid which follow a pattern +#' of prefixed column names created based on some condition. +#' +#' @inheritParams create_individual_file +add_all_columns <- function(episode_file) { + cli::cli_alert_info("Add all columns function started at {Sys.time()}") + + episode_file %>% + add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>% + add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>% + add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>% + add_gls_columns("GLS", .data$smrtype == "GLS-IP") %>% + add_op_columns("OP", .data$recid == "00B") %>% + add_ae_columns("AE", .data$recid == "AE2") %>% + add_pis_columns("PIS", .data$recid == "PIS") %>% + add_ooh_columns("OoH", .data$recid == "OoH") %>% + add_dn_columns("DN", .data$recid == "DN") %>% + add_cmh_columns("CMH", .data$recid == "CMH") %>% + add_dd_columns("DD", .data$recid == "DD") %>% + add_nsu_columns("NSU", .data$recid == "NSU") %>% + add_nrs_columns("NRS", .data$recid == "NRS") %>% + add_hl1_columns("HL1", .data$recid == "HL1") %>% + add_ch_columns("CH", .data$recid == "CH") %>% + add_hc_columns("HC", .data$recid == "HC") %>% + add_at_columns("AT", .data$recid == "AT") %>% + add_sds_columns("SDS", .data$recid == "SDS") %>% + dplyr::mutate( + health_net_cost = rowSums( + dplyr::pick( + .data$Acute_cost, + .data$Mat_cost, + .data$MH_cost, + .data$GLS_cost, + .data$OP_cost_attend, + .data$AE_cost, + .data$PIS_cost, + .data$OoH_cost + ), + na.rm = TRUE + ), + health_net_cost_inc_dnas = .data$health_net_cost + dplyr::if_else( + is.na(.data$OP_cost_dnas), + 0, + .data$OP_cost_dnas + ) + ) +} + +#' Add Acute columns +#' +#' @inheritParams create_individual_file +#' @param prefix Prefix to add to related columns, e.g. "Acute" +#' @param condition Condition to create new columns based on +add_acute_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition) +} + +#' Add Mat columns +#' +#' @inheritParams add_acute_columns +add_mat_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, elective = FALSE) +} + +#' Add MH columns +#' +#' @inheritParams add_acute_columns +add_mh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, ipdc_d = FALSE) +} + +#' Add GLS columns +#' +#' @inheritParams add_acute_columns +add_gls_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, ipdc_d = FALSE) +} + +#' Add OP columns +#' +#' @inheritParams add_acute_columns +add_op_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition) + condition_1 <- substitute(condition & attendance_status == 1) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1L, NA_integer_), + "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), .data$cost_total_net, NA_real_) + ) + condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8)) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_), + "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_inc_dnas, NA_real_) + ) + return(episode_file) +} + +#' Add AE columns +#' +#' @inheritParams add_acute_columns +add_ae_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1L, NA_integer_)) +} + +#' Add PIS columns +#' +#' @inheritParams add_acute_columns +add_pis_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_integer_)) +} + +#' Add OoH columns +#' +#' @inheritParams add_acute_columns +add_ooh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate( + "{prefix}_homeV" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-HomeV", 1L, NA_integer_), + "{prefix}_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Advice", 1L, NA_integer_), + "{prefix}_DN" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-DN", 1L, NA_integer_), + "{prefix}_NHS24" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-NHS24", 1L, NA_integer_), + "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1L, NA_integer_), + "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1L, NA_integer_), + "{prefix}_covid_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1L, NA_integer_), + "{prefix}_covid_assessment" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1L, NA_integer_), + "{prefix}_covid_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1L, NA_integer_) + ) + + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_consultation_time" := dplyr::if_else( + eval(condition), + pmax( + 0, + as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins") + ), + NA_real_ + ), + ) + + return(episode_file) +} + +#' Add DN columns +#' +#' @inheritParams add_acute_columns +add_dn_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + if ("total_no_dn_contacts" %in% names(episode_file)) { + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + dplyr::mutate( + "{prefix}_contacts" := dplyr::if_else( + eval(condition), + .data$total_no_dn_contacts, + NA_integer_ + ) + ) + } else { + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + dplyr::mutate("{prefix}_contacts" := NA_integer_) + } +} + +#' Add CMH columns +#' +#' @inheritParams add_acute_columns +add_cmh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1L, NA_integer_)) +} + +#' Add DD columns +#' +#' @inheritParams add_acute_columns +add_dd_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + condition_delay <- substitute(condition & primary_delay_reason != "9") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_NonCode9_episodes" := dplyr::if_else(eval(condition_delay), 1L, NA_integer_), + "{prefix}_NonCode9_beddays" := dplyr::if_else(eval(condition_delay), .data$yearstay, NA_real_) + ) + condition_delay_9 <- substitute(condition & primary_delay_reason == "9") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1L, NA_integer_), + "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), .data$yearstay, NA_real_) + ) + return(episode_file) +} + +#' Add NSU columns +#' +#' @inheritParams add_acute_columns +add_nsu_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) +} + +#' Add NRS columns +#' +#' @inheritParams add_acute_columns +add_nrs_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) +} + +#' Add HL1 columns +#' +#' @inheritParams add_acute_columns +add_hl1_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) +} + +#' Add CH columns +#' +#' @inheritParams add_acute_columns +add_ch_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + ch_cost_per_day = dplyr::if_else( + eval(condition) & .data$yearstay > 0, + .data$cost_total_net / .data$yearstay, + .data$cost_total_net + ), + ch_no_cost = eval(condition) & is.na(.data$ch_cost_per_day), + ch_ep_end = dplyr::if_else( + eval(condition), + .data$record_keydate2, + lubridate::NA_Date_ + ), + # If end date is missing use the first day of next FY quarter + ch_ep_end = dplyr::if_else( + eval(condition) & is.na(.data$ch_ep_end), + start_next_fy_quarter(.data$sc_latest_submission), + .data$ch_ep_end + ) + ) +} + +#' Add HC columns +#' +#' @inheritParams add_acute_columns +add_hc_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE) %>% + dplyr::mutate( + "{prefix}_total_hours" := dplyr::if_else(eval(condition), .data$hc_hours_annual, NA_real_), + "{prefix}_total_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_), + ) + condition_per <- substitute(condition & smrtype == "HC-Per") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_personal_episodes" := dplyr::if_else(eval(condition_per), 1L, NA_integer_), + "{prefix}_personal_hours" := dplyr::if_else(eval(condition_per), .data$HC_total_hours, NA_real_), + "{prefix}_personal_hours_cost" := dplyr::if_else(eval(condition_per), .data$cost_total_net, NA_real_) + ) + condition_non_per <- substitute(condition & smrtype == "HC-Non-Per") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_non_personal_episodes" := dplyr::if_else(eval(condition_non_per), 1L, NA_integer_), + "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), .data$hc_hours_annual, NA_real_), + "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), .data$cost_total_net, NA_real_) + ) + condition_reabl <- substitute(condition & hc_reablement == 1) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1L, NA_integer_), + "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), .data$hc_hours_annual, NA_real_), + "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), .data$cost_total_net, NA_real_) + ) +} + +#' Add AT columns +#' +#' @inheritParams add_acute_columns +add_at_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1L, NA_integer_), + "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1L, NA_integer_) + ) +} + +#' Add SDS columns +#' +#' @inheritParams add_acute_columns +add_sds_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1L, NA_integer_), + "{prefix}_option_2" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-2", 1L, NA_integer_), + "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1L, NA_integer_), + "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1L, NA_integer_) + ) +} + +#' Add columns based on IPDC +#' +#' @description Add columns based on value in IPDC column, which can +#' be further split by Elective/Non-Elective CIJ. +#' +#' @inheritParams add_acute_columns +#' @param ipdc_d Whether to create columns based on IPDC = "D" (lgl) +#' @param elective Whether to create columns based on Elective/Non-Elective cij_pattype (lgl) +add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) { + condition_i <- substitute(eval(condition) & ipdc == "I") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_inpatient_cost" := dplyr::if_else(eval(condition_i), .data$cost_total_net, NA_real_), + "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1L, NA_integer_), + "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), .data$yearstay, NA_real_) + ) + if (elective) { + condition_el <- substitute(condition_i & cij_pattype == "Elective") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_el_inpatient_episodes" := dplyr::if_else(eval(condition_el), 1L, NA_integer_), + "{prefix}_el_inpatient_beddays" := dplyr::if_else(eval(condition_el), .data$yearstay, NA_real_), + "{prefix}_el_inpatient_cost" := dplyr::if_else(eval(condition_el), .data$cost_total_net, NA_real_) + ) + condition_non_el <- substitute(condition_i & cij_pattype == "Non-Elective") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_non_el_inpatient_episodes" := dplyr::if_else(eval(condition_non_el), 1L, NA_integer_), + "{prefix}_non_el_inpatient_beddays" := dplyr::if_else(eval(condition_non_el), .data$yearstay, NA_real_), + "{prefix}_non_el_inpatient_cost" := dplyr::if_else(eval(condition_non_el), .data$cost_total_net, NA_real_) + ) + } + if (ipdc_d) { + condition_d <- substitute(eval(condition) & ipdc == "D") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_daycase_episodes" := dplyr::if_else(eval(condition_d), 1L, NA_integer_), + "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), .data$cost_total_net, NA_real_) + ) + } + return(episode_file) +} + +#' Add standard columns +#' +#' @description Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. +#' +#' @inheritParams add_acute_columns +#' @param episode Whether to create prefix_episodes col, e.g. "Acute_episodes" +#' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" +add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { + if (episode) { + episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) + } + if (cost) { + episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) + } + return(episode_file) +} + + +#' Aggregate CIS episodes +#' +#' @description Aggregate CH variables by CHI and CIS. +#' +#' @inheritParams create_individual_file +aggregate_ch_episodes <- function(episode_file) { + cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") + + episode_file %>% + # dplyr::filter(!is.na(.data$ch_chi_cis)) %>% + # use as.data.table to change the data format to data.table to accelerate + data.table::as.data.table() %>% + dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% + dplyr::mutate( + ch_no_cost = max(.data$ch_no_cost), + ch_ep_start = min(.data$record_keydate1), + ch_ep_end = max(.data$ch_ep_end), + ch_cost_per_day = mean(.data$ch_cost_per_day) + ) %>% + dplyr::ungroup() %>% + # change the data format from data.table to data.frame + tibble::as_tibble() + + # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% + # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>% + # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) +} + +#' Clean up CH +#' +#' @description Clean up CH-related columns. +#' +#' @inheritParams create_individual_file +clean_up_ch <- function(episode_file, year) { + cli::cli_alert_info("Clean up CH function started at {Sys.time()}") + + episode_file %>% + dplyr::mutate( + fy_end = end_fy(year), + fy_start = start_fy(year) + ) %>% + dplyr::mutate( + term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1), + term_2 = pmax(.data$ch_ep_start, .data$fy_start) + ) %>% + dplyr::mutate( + ch_beddays = dplyr::if_else( + .data$recid == "CH", + as.numeric(.data$term_1 - .data$term_2), + NA_real_ + ), + ch_cost = dplyr::if_else( + .data$recid == "CH" & .data$ch_no_cost == 0, + .data$ch_beddays * .data$ch_cost_per_day, + NA_real_ + ), + ch_beddays = dplyr::if_else( + .data$recid == "CH" & .data$ch_chi_cis == 0, + 0, + .data$ch_beddays + ), + ch_cost = dplyr::if_else( + .data$recid == "CH" & .data$ch_chi_cis == 0, + 0, + .data$ch_cost + ) + ) %>% + dplyr::select(-c("fy_end", "fy_start", "term_1", "term_2")) +} + +#' Recode gender +#' +#' @description Recode gender to 1.5 if 0 or 9. +#' +#' @inheritParams create_individual_file +recode_gender <- function(episode_file) { + cli::cli_alert_info("Recode Gender function started at {Sys.time()}") + + episode_file %>% + dplyr::mutate( + gender = dplyr::if_else( + .data$gender %in% c(0, 9), + 1.5, + .data$gender + ) + ) +} + +#' Aggregate by CHI +#' +#' @description Aggregate episode file by CHI to convert into +#' individual file. +#' +#' @inheritParams create_individual_file +aggregate_by_chi <- function(episode_file) { + cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + + episode_file %>% + dplyr::arrange( + chi, + record_keydate1, + keytime1, + record_keydate2, + keytime2 + ) %>% + dplyr::group_by(.data$chi) %>% + dplyr::summarise( + gender = mean(gender), + dplyr::across( + dplyr::ends_with(c("postcode", "DoB", "gpprac")), + ~ dplyr::last(., na_rm = TRUE) + ), + dplyr::across( + c( + "ch_cis_episodes" = "ch_chi_cis", + "cij_total" = "cij_marker", + "cij_el", + "cij_non_el", + "cij_mat", + # "cij_delay", + "ooh_cases" = "ooh_case_id", + "preventable_admissions" + ), + ~ dplyr::n_distinct(.x, na.rm = TRUE) + ), + dplyr::across( + c( + dplyr::ends_with( + c( + "episodes", + "beddays", + "cost", + "attendances", + "attend", + "contacts", + "hours", + "alarms", + "telecare", + "paid_items", + "advice", + "homeV", + "time", + "assessment", + "other", + # "DN", + "NHS24", + "PCC", + "_dnas" + ) + ), + dplyr::starts_with("SDS_option") + ), + ~ sum(., na.rm = TRUE) + ), + # dplyr::across( + # c( + # # dplyr::starts_with("sc_"), + # #-"sc_send_lca", + # #-"sc_latest_submission", + # # "HL1_in_FY" = "hh_in_fy", + # "NSU" + # ), + # ~ max_no_inf(.) + # ), + dplyr::across( + c( + condition_cols(), + # "death_date", + # "deceased", + "year", + dplyr::ends_with(c( + "_Cohort", "end_fy", "start_fy" + )), + ), + ~ dplyr::first(., na_rm = TRUE) + ) + ) %>% + dplyr::ungroup() +} + +#' Condition columns +#' +#' @description Returns chr vector of column names +#' which follow format "condition" and "condition_date" e.g. +#' "dementia" and "dementia_date" +condition_cols <- function() { + conditions <- slfhelper::ltc_vars + date_cols <- paste0(conditions, "_date") + all_cols <- c(conditions, date_cols) + return(all_cols) +} + +#' Custom maximum +#' +#' @description Custom maximum function which removes +#' missing values but doesn't return Inf if all values +#' are missing (instead returns NA) +#' +#' @param x Vector to return max of +max_no_inf <- function(x) { + dplyr::if_else(all(is.na(x)), NA, max(x, na.rm = TRUE)) +} + +#' Custom minimum +#' +#' @description Custom minimum function which removes +#' missing values but doesn't return Inf if all values +#' are missing (instead returns NA) +#' +#' @param x Vector to return min of +min_no_inf <- function(x) { + dplyr::if_else(all(is.na(x)), NA, min(x, na.rm = TRUE)) +} + +#' Clean individual file +#' +#' @description Clean up columns in individual file +#' +#' @param individual_file Individual file where each row represents a unique CHI +#' @param year Financial year e.g 1718 +clean_individual_file <- function(individual_file, year) { + cli::cli_alert_info("Clean individual file function started at {Sys.time()}") + + individual_file %>% + dplyr::select(!dplyr::any_of(c( + "ch_no_cost", + "no_paid_items", + "total_no_dn_contacts", + "cost_total_net_inc_dnas" + ))) %>% + clean_up_gender() %>% + dplyr::mutate(age = compute_mid_year_age(year, .data$dob)) +} + +#' Clean up gender column +#' +#' @description Clean up column containing gender. +#' +#' @inheritParams clean_individual_file +clean_up_gender <- function(individual_file) { + individual_file %>% + dplyr::mutate( + gender = dplyr::case_when( + .data$gender != 1.5 ~ round(.data$gender), + .default = phsmethods::sex_from_chi(.data$chi, chi_check = FALSE) + ) + ) +} + +#' Join slf lookup variables +#' +#' @description Join lookup variables from slf postcode lookup and slf gpprac +#' lookup. +#' +#' @param individual_file the processed individual file. +#' @param slf_postcode_lookup SLF processed postcode lookup +#' @param slf_gpprac_lookup SLF processed gpprac lookup +#' @param hbrescode_var hbrescode variable +#' +join_slf_lookup_vars <- function(individual_file, + slf_postcode_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ), + hbrescode_var = "hb2018") { + individual_file <- individual_file %>% + dplyr::left_join( + slf_postcode_lookup, + by = "postcode" + ) %>% + dplyr::left_join( + slf_gpprac_lookup, + by = "gpprac" + ) %>% + dplyr::rename(hbrescode = hbrescode_var) + + return(individual_file) +} +# TODO Remove the client data from the individual Social Care extracts +# and instead, use this function in the episode file to match on the client +# data to all episodes. +#' Join sc client variables onto individual file +#' +#' @description Match on sc client variables. +#' +#' @param individual_file the processed individual file +#' @param year financial year. +#' @param sc_client SC client lookup +#' @param sc_demographics SC Demographic lookup +join_sc_client <- function(individual_file, + year, + sc_client = read_file(get_source_extract_path(year, "Client")), + sc_demographics = read_file(get_sc_demog_lookup_path(), + col_select = c("sending_location", "social_care_id", "chi") + )) { + # TODO Update the client lookup processing script to match + # on demographics there so the client lookup already has CHI. + + # Match to demographics lookup to get CHI + join_client_demog <- sc_client %>% + dplyr::left_join( + sc_demographics %>% + dplyr::select("sending_location", "social_care_id", "chi"), + by = c("sending_location", "social_care_id") + ) + + # Match on client variables by chi + individual_file <- individual_file %>% + dplyr::left_join( + join_client_demog, + by = "chi" + ) %>% + dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission")) + + return(individual_file) +} diff --git a/R/fill_geographies.R b/R/fill_geographies.R index 22b3f03ba..58d001493 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -14,7 +14,7 @@ fill_geographies <- function(data) { "hbrescode", "hscp", "lca", - "datazone", + "datazone2011", "hbpraccode", "hbtreatcode", "gpprac" @@ -85,7 +85,7 @@ make_gpprac_lookup <- function(data) { } fill_postcode_geogs <- function(data) { - spd <- read_file(get_slf_postcode_path()) + slf_pc_lookup <- read_file(get_slf_postcode_path()) filled_postcodes <- dplyr::left_join( data, @@ -102,7 +102,7 @@ fill_postcode_geogs <- function(data) { ) %>% # Fill geographies dplyr::left_join( - spd, + slf_pc_lookup, by = "postcode", suffix = c("_old", "") ) %>% @@ -117,10 +117,11 @@ fill_postcode_geogs <- function(data) { cascade_geographies() %>% dplyr::mutate( hbrescode = dplyr::coalesce(.data$hb2018, .data$hbrescode), - hscp = dplyr::coalesce(.data$hscp2018, .data$hscp), - lca = dplyr::coalesce(.data$lca, .data$lca_old) + hscp2018 = dplyr::coalesce(.data$hscp2018, .data$hscp), + lca = dplyr::coalesce(.data$lca, .data$lca_old), + datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old) ) %>% - dplyr::select(!c("hb2018", "hscp2018", "lca_old", "most_recent_postcode")) + dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode")) return(filled_postcodes) } diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index db810b9fb..7d47d0ef4 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -79,7 +79,7 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { "hbrescode", "lca", "hscp", - "datazone", + "datazone2011", "location", "hbtreatcode", "yearstay", diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R index 2097b38d2..a1b3bf816 100644 --- a/R/process_extract_district_nursing.R +++ b/R/process_extract_district_nursing.R @@ -107,7 +107,7 @@ process_extract_district_nursing <- function( "gender", "gpprac", "postcode", - "datazone", + "datazone2011", "lca", "hscp", "hbrescode", diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index d85ce33f5..4add41cfa 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -111,7 +111,7 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { "gpprac", "postcode", "hbrescode", - "datazone", + "datazone2011", "hscp", "hbtreatcode", "location", diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R index f79eff35f..108c14c61 100644 --- a/R/process_extract_mental_health.R +++ b/R/process_extract_mental_health.R @@ -85,7 +85,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) { "hbrescode", "lca", "hscp", - "datazone", + "datazone2011", "location", "hbtreatcode", "stay", diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R new file mode 100644 index 000000000..b540d1f74 --- /dev/null +++ b/R/process_tests_delayed_discharges.R @@ -0,0 +1,50 @@ +#' Process Delayed Discharges tests +#' +#' @description Takes the processed Delayed Discharges extract and produces +#' a test comparison with the previous data. This is written to disk as a CSV. +#' +#' @param data a [tibble][tibble::tibble-package] of the processed data extract. +#' @param year the financial year of the extract in the format '1718'. +#' +#' @return a [tibble][tibble::tibble-package] containing a test comparison. +#' +#' @export +process_tests_delayed_discharges <- function(data, year) { + old_data <- get_existing_data_for_tests(data) + + comparison <- produce_test_comparison( + old_data = produce_source_dd_tests(old_data), + new_data = produce_source_dd_tests(data) + ) %>% + write_tests_xlsx(sheet_name = "DD", year) + + return(comparison) +} + +#' Delayed Discharges extract tests +#' +#' @description Produce tests for the delayed discharges extract. +#' +#' @param data new or old data for testing summary flags +#' (data is from [get_source_extract_path()]) +#' +#' @return a dataframe with a count of each flag +#' from [calculate_measures()] +#' +#' @family extract test functions +#' for creating test flags +#' @seealso calculate_measures +produce_source_dd_tests <- function(data) { + test_flags <- data %>% + dplyr::mutate( + n_delay_episodes = 1L, + code9_episodes = .data$primary_delay_reason == "9" + ) %>% + create_hb_test_flags(.data$hbtreatcode) %>% + # keep variables for comparison + dplyr::select(c("n_delay_episodes":dplyr::last_col())) %>% + # use function to sum new test flags + calculate_measures(measure = "sum") + + return(test_flags) +} diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index b595d1d54..46e9e7171 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -10,7 +10,7 @@ process_tests_episode_file <- function(data, year) { data <- data %>% dplyr::select( "year", - "chi", + "anon_chi", "gender", "postcode", "hbtreatcode", @@ -20,7 +20,8 @@ process_tests_episode_file <- function(data, year) { "record_keydate1", "record_keydate2", dplyr::contains(c("beddays", "cost", "cij")) - ) + ) %>% + slfhelper::get_chi() old_data <- get_existing_data_for_tests(data) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 2eb3503e2..695dc19a0 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -10,12 +10,12 @@ process_tests_individual_file <- function(data, year) { data <- data %>% dplyr::select( "year", - "chi", + "anon_chi", "gender", - # "postcode", # Add back in once postcode is fixed + "postcode", "dob", - # "hbrescode", #add back in when available - # "health_net_cost", + "hbrescode", + "health_net_cost", slfhelper::ltc_vars, dplyr::contains(c( "beddays", @@ -26,7 +26,8 @@ process_tests_individual_file <- function(data, year) { "cases", "consultations" )) - ) + ) %>% + slfhelper::get_chi() old_data <- get_existing_data_for_tests(data, file_version = "individual") @@ -61,8 +62,8 @@ produce_individual_file_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags create_demog_test_flags() %>% - # create_hb_test_flags(.data$hbrescode) %>% - # create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% + create_hb_test_flags(.data$hbrescode) %>% + create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% # keep variables for comparison dplyr::select(c("valid_chi":dplyr::last_col())) %>% # use function to sum new test flags @@ -82,13 +83,13 @@ produce_individual_file_tests <- function(data) { measure = "all" ) - # min_max_measures <- data %>% - # calculate_measures( - # vars = c( - # "health_net_cost", - # ), - # measure = "min-max" - # ) + min_max_measures <- data %>% + calculate_measures( + vars = c( + "health_net_cost" + ), + measure = "min-max" + ) sum_measures <- data %>% dplyr::select(slfhelper::ltc_vars) %>% @@ -102,7 +103,7 @@ produce_individual_file_tests <- function(data) { join_output <- list( test_flags, all_measures, - # min_max_measures, + min_max_measures, sum_measures ) %>% purrr::reduce(dplyr::full_join, by = c("measure", "value")) diff --git a/R/produce_test_comparison.R b/R/produce_test_comparison.R index dda1f2411..624623806 100644 --- a/R/produce_test_comparison.R +++ b/R/produce_test_comparison.R @@ -23,7 +23,7 @@ produce_test_comparison <- function(old_data, new_data, recid = FALSE) { dplyr::mutate( difference = round(.data$value_new - .data$value_old, digits = 2L), pct_change = .data$difference / .data$value_old, - issue = !dplyr::between(pct_change, -0.05, 0.05) + issue = !dplyr::between(.data$pct_change, -0.05, 0.05) ) } else { dplyr::full_join(old_data, @@ -34,7 +34,7 @@ produce_test_comparison <- function(old_data, new_data, recid = FALSE) { dplyr::mutate( difference = round(.data$value_new - .data$value_old, digits = 2L), pct_change = .data$difference / .data$value_old, - issue = !dplyr::between(pct_change, -0.05, 0.05) + issue = !dplyr::between(.data$pct_change, -0.05, 0.05) ) } } diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R index 6c699d6b4..a0fba0707 100644 --- a/R/read_extract_acute.R +++ b/R/read_extract_acute.R @@ -107,7 +107,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye disch = "Discharge Type Code", falls_adm = "Falls Related Admission (01)", lca = "Geo Council Area Code", - datazone = "Geo Data Zone 2011", + datazone2011 = "Geo Data Zone 2011", postcode = "Geo Postcode [C]", hscp = "Geo HSCP of Residence Code - current", conc = "Lead Consultant/HCP Code", diff --git a/R/read_extract_district_nursing.R b/R/read_extract_district_nursing.R index 5640fb7b7..607f9b47e 100644 --- a/R/read_extract_district_nursing.R +++ b/R/read_extract_district_nursing.R @@ -43,7 +43,7 @@ read_extract_district_nursing <- function( lca = "Patient Council Area Code (Contact)", postcode = "Patient Postcode [C] (Contact)", gpprac = "Practice Code (Contact)", - datazone = "Patient Data Zone 2011 (Contact)", + datazone2011 = "Patient Data Zone 2011 (Contact)", hbpraccode = "Practice NHS Board Code 9 (Contact)", hbtreatcode = "Treatment NHS Board Code 9", chi = "UPI Number [C]", diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R index bbdd1d5f9..fe82732c8 100644 --- a/R/read_extract_mental_health.R +++ b/R/read_extract_mental_health.R @@ -83,7 +83,7 @@ read_extract_mental_health <- function( hbrescode = "NHS Board of Residence Code - current", lca = "Geo Council Area Code", hscp = "Geo HSCP of Residence Code - current", - datazone = "Geo Data Zone 2011", + datazone2011 = "Geo Data Zone 2011", location = "Treatment Location Code", hbtreatcode = "Treatment NHS Board Code - current", yearstay = "Occupied Bed Days (04)", diff --git a/R/read_extract_nrs_deaths.R b/R/read_extract_nrs_deaths.R index 8fd2f26e9..1734b23aa 100644 --- a/R/read_extract_nrs_deaths.R +++ b/R/read_extract_nrs_deaths.R @@ -39,7 +39,7 @@ read_extract_nrs_deaths <- function( dplyr::rename( death_location_code = "Death Location Code", lca = "Geo Council Area Code", - datazone = "Geo Data Zone 2011", + datazone2011 = "Geo Data Zone 2011", postcode = "Geo Postcode [C]", hscp = "Geo HSCP of Residence Code - current", death_board_occurrence = "NHS Board of Occurrence Code - current", diff --git a/R/read_extract_ooh_consultations.R b/R/read_extract_ooh_consultations.R index 1c32ca085..4e16527a3 100644 --- a/R/read_extract_ooh_consultations.R +++ b/R/read_extract_ooh_consultations.R @@ -34,7 +34,7 @@ read_extract_ooh_consultations <- function( postcode = "Patient Postcode [C]", hbrescode = "Patient NHS Board Code 9 - current", hscp = "HSCP of Residence Code Current", - datazone = "Patient Data Zone 2011", + datazone2011 = "Patient Data Zone 2011", gpprac = "Practice Code", ooh_case_id = "GUID", attendance_status = "Consultation Recorded", diff --git a/R/read_file.R b/R/read_file.R index 53300c70b..2941b62ed 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -16,10 +16,27 @@ #' @return the data a [tibble][tibble::tibble-package] #' @export read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { - valid_extensions <- c("rds", "fst", "sav", "zsav", "csv", "gz", "parquet") + valid_extensions <- c( + "rds", + "rds.gz", + "fst", + "sav", + "zsav", + "csv", + "csv.gz", + "parquet" + ) ext <- fs::path_ext(path) + if (ext == "gz") { + ext <- paste( + fs::path_ext(fs::path_ext_remove(path)), + "gz", + sep = "." + ) + } + if (!(ext %in% valid_extensions)) { cli::cli_abort(c( "x" = "Invalid extension: {.val {ext}}", @@ -36,17 +53,19 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { } data <- switch(ext, - "rds" = readr::read_rds(path), - "fst" = fst::read_fst(path), - "sav" = haven::read_spss(path, ...), - "zsav" = haven::read_spss(path, ...), - "csv" = readr::read_csv(path, ..., show_col_types = FALSE), - "gz" = readr::read_csv(path, ..., show_col_types = FALSE), - "parquet" = if (is.null(col_select)) { - arrow::read_parquet(path, as_data_frame = as_data_frame, ...) - } else { - arrow::read_parquet(path, col_select = col_select, as_data_frame = as_data_frame, ...) - } + "rds" = readr::read_rds(file = path), + "rds.gz" = readr::read_rds(file = path), + "fst" = tibble::as_tibble(fst::read_fst(path = path)), + "sav" = haven::read_spss(file = path, ...), + "zsav" = haven::read_spss(file = path, ...), + "csv" = readr::read_csv(file = path, ..., show_col_types = FALSE), + "csv.gz" = readr::read_csv(file = path, ..., show_col_types = FALSE), + "parquet" = arrow::read_parquet( + file = path, + col_select = !!col_select, + as_data_frame = as_data_frame, + ... + ) ) return(data) diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 00e864f40..852a4fd8b 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -4,11 +4,17 @@ #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. +#' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output +#' (instead of chi) #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export #' -run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { +run_episode_file <- function( + processed_data_list, + year, + write_to_disk = TRUE, + anon_chi_out = TRUE) { episode_file <- dplyr::bind_rows(processed_data_list) %>% create_cost_inc_dna() %>% apply_cost_uplift() %>% @@ -51,7 +57,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { "cij_dis_spec", "cost_total_net", "hscp", - "datazone", + "datazone2011", "attendance_status", "deathdiag1", "deathdiag2", @@ -80,7 +86,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { ) ) %>% # Check chi is valid using phsmethods function - # If the CHI is invalid for whatever reason, set the CHI to blank string + # If the CHI is invalid for whatever reason, set the CHI to NA dplyr::mutate( chi = dplyr::if_else( phsmethods::chi_check(.data$chi) != "Valid CHI", @@ -103,8 +109,17 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { join_deaths_data(year) %>% load_ep_file_vars(year) + if (anon_chi_out) { + # TODO When slfhelper is updated remove the unnecessary code + episode_file <- episode_file %>% + tidyr::replace_na(list(chi = "")) %>% + slfhelper::get_anon_chi() %>% + dplyr::mutate(anon_chi = dplyr::na_if(.data$anon_chi, "")) + } + if (write_to_disk) { - slf_path <- get_file_path( + # TODO make the slf_path a function + slf_episode_path <- get_file_path( get_year_dir(year), stringr::str_glue( "source-episode-file-{year}.parquet" @@ -112,7 +127,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { check_mode = "write" ) - write_file(episode_file, slf_path) + write_file(episode_file, slf_episode_path) } return(episode_file) @@ -120,10 +135,10 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { #' Store the unneeded episode file variables #' -#' @param data The in progress episode file data. +#' @param data The in-progress episode file data. #' @inheritParams run_episode_file -#' @param vars_to_keep a character vector of variable to keep, all others will -#' be stored. +#' @param vars_to_keep a character vector of the variables to keep, all others +#' will be stored. #' #' @return `data` with only the `vars_to_keep` kept store_ep_file_vars <- function(data, year, vars_to_keep) { @@ -309,7 +324,7 @@ create_cost_inc_dna <- function(data) { #' #' @return The data unchanged (the cohorts are written to disk) create_cohort_lookups <- function(data, year, update = latest_update()) { - # Use future so the cohorts can be create simultaneously (in parallel) + # Use future so the cohorts can be created simultaneously (in parallel) future::plan(strategy = future.callr::callr, .skip = TRUE) options(future.globals.maxSize = 21474836480) @@ -340,6 +355,7 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { #' Join cohort lookups #' #' @inheritParams store_ep_file_vars +#' @inheritParams get_demographic_cohorts_path #' #' @return The data including the Demographic and Service Use lookups. join_cohort_lookups <- function(data, year, update = latest_update()) { diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index c517e496e..e187149d5 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -47,7 +47,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) { while (fs::file_exists(path = in_use_path) && seconds < max_wait) { # While the tests are in use (wait a random number of seconds from 1 to 30) cli::cli_progress_update() - wait <- round(runif(1, 1, 15)) + wait <- sample(x = 3:15, size = 1) Sys.sleep(wait) seconds <- seconds + wait @@ -56,7 +56,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) { } # Final check to maybe avoid corrupting the workbook - Sys.sleep(round(runif(1, 1, 3))) + Sys.sleep(sample(x = 1:3, size = 1)) if (!fs::file_exists(path = in_use_path)) { fs::file_create(path = in_use_path) } else { diff --git a/_targets.R b/_targets.R index 3625fa072..f50045aed 100644 --- a/_targets.R +++ b/_targets.R @@ -59,7 +59,8 @@ list( process_lookup_sc_demographics( sc_demog_data, write_to_disk = write_to_disk - ) + ), + priority = 0.9 ), tar_target( tests_sc_demog_lookup, @@ -70,7 +71,8 @@ list( process_it_chi_deaths( data = it_chi_deaths_extract, write_to_disk = write_to_disk - ) + ), + priority = 0.9 ), tar_target( tests_it_chi_deaths, @@ -83,7 +85,8 @@ list( gpprac_ref_path = gpprac_ref_path, spd_path = spd_path, write_to_disk = write_to_disk - ) + ), + priority = 0.9 ), tar_target( tests_source_gp_lookup, @@ -96,16 +99,17 @@ list( simd_path = simd_path, locality_path = locality_path, write_to_disk = write_to_disk - ) + ), + priority = 0.9 ), tar_target( tests_source_pc_lookup, process_tests_lookup_pc(source_pc_lookup) ), ## Cost Lookups ## - tar_target(ch_cost_lookup, process_costs_ch_rmd()), - tar_target(dn_cost_lookup, process_costs_dn_rmd()), - tar_target(hc_cost_lookup, process_costs_hc_rmd()), + tar_target(ch_cost_lookup, process_costs_ch_rmd(), priority = 0.8), + tar_target(dn_cost_lookup, process_costs_dn_rmd(), priority = 0.8), + tar_target(hc_cost_lookup, process_costs_hc_rmd(), priority = 0.8), tar_target(gp_ooh_cost_lookup, process_costs_gp_ooh_rmd()), ## Social Care - 'All' data ## tar_target( @@ -122,7 +126,8 @@ list( all_at_extract, sc_demog_lookup = sc_demog_lookup, write_to_disk = write_to_disk - ) + ), + priority = 0.5 ), tar_target( all_home_care_extract, @@ -138,7 +143,8 @@ list( all_home_care_extract, sc_demog_lookup = sc_demog_lookup, write_to_disk = write_to_disk - ) + ), + priority = 0.5 ), tar_target( all_care_home_extract, @@ -157,7 +163,8 @@ list( ch_name_lookup_path = slf_ch_name_lookup_path, spd_path = spd_path, write_to_disk = write_to_disk - ) + ), + priority = 0.5 ), tar_target( tests_all_care_home, @@ -177,7 +184,8 @@ list( all_sds_extract, sc_demog_lookup = sc_demog_lookup, write_to_disk = write_to_disk - ) + ), + priority = 0.5 ), tar_map( list(year = years_to_run), @@ -256,14 +264,14 @@ list( get_boxi_extract_path(year = year, type = "GP_OoH-c"), format = "file" ), - tar_target(ooh_data, + tar_qs( + ooh_data, read_extract_gp_ooh( year, diagnosis_data_path, outcomes_data_path, consultations_data_path - ), - format = "rds" + ) ), ### Target source processed extracts ### tar_target(source_acute_extract, process_extract_acute( @@ -302,12 +310,18 @@ list( year ) ), - # TODO add tests for the Delayed Discharges extract tar_target(source_dd_extract, process_extract_delayed_discharges( dd_data, year, write_to_disk = write_to_disk )), + tar_target( + tests_source_dd_extract, + process_tests_delayed_discharges( + source_dd_extract, + year + ) + ), tar_target(source_dn_extract, process_extract_district_nursing( dn_data, year, @@ -539,6 +553,51 @@ list( data = episode_file, year = year ) + ), + tar_target( + individual_file, + create_individual_file( + episode_file = episode_file, + year = year, + write_to_disk = write_to_disk + ) + ), + tar_target( + individual_file_tests, + process_tests_individual_file( + data = individual_file, + year = year + ) + ), + tar_target( + episode_file_dataset, + arrow::write_dataset( + dataset = episode_file, + path = fs::path( + get_year_dir(year), + stringr::str_glue("source-episode-file-{year}") + ), + format = "parquet", + # Should correspond to the available slfhelper filters + partitioning = c("recid", "hscp2018"), + compression = "zstd", + version = "latest" + ) + ), + tar_target( + individual_file_dataset, + arrow::write_dataset( + dataset = individual_file, + path = fs::path( + get_year_dir(year), + stringr::str_glue("source-individual-file-{year}") + ), + format = "parquet", + # Should correspond to the available slfhelper filters + partitioning = c("hscp2018"), + compression = "zstd", + version = "latest" + ) ) ) ) diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd new file mode 100644 index 000000000..52ba071b6 --- /dev/null +++ b/man/add_acute_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_acute_columns} +\alias{add_acute_columns} +\title{Add Acute columns} +\usage{ +add_acute_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add Acute columns +} diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd new file mode 100644 index 000000000..9b7099513 --- /dev/null +++ b/man/add_ae_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ae_columns} +\alias{add_ae_columns} +\title{Add AE columns} +\usage{ +add_ae_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add AE columns +} diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd new file mode 100644 index 000000000..d502e95c3 --- /dev/null +++ b/man/add_all_columns.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_all_columns} +\alias{add_all_columns} +\title{Add all columns} +\usage{ +add_all_columns(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Add new columns based on SMRType and recid which follow a pattern +of prefixed column names created based on some condition. +} diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd new file mode 100644 index 000000000..e05ea9101 --- /dev/null +++ b/man/add_at_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_at_columns} +\alias{add_at_columns} +\title{Add AT columns} +\usage{ +add_at_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add AT columns +} diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd new file mode 100644 index 000000000..4938f7690 --- /dev/null +++ b/man/add_ch_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ch_columns} +\alias{add_ch_columns} +\title{Add CH columns} +\usage{ +add_ch_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add CH columns +} diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd new file mode 100644 index 000000000..7d00e6299 --- /dev/null +++ b/man/add_cij_columns.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_cij_columns} +\alias{add_cij_columns} +\title{Add CIJ-related columns} +\usage{ +add_cij_columns(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Add new columns related to CIJ +} diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd new file mode 100644 index 000000000..a1d82cba6 --- /dev/null +++ b/man/add_cmh_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_cmh_columns} +\alias{add_cmh_columns} +\title{Add CMH columns} +\usage{ +add_cmh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add CMH columns +} diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd new file mode 100644 index 000000000..08d9c0fe4 --- /dev/null +++ b/man/add_dd_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_dd_columns} +\alias{add_dd_columns} +\title{Add DD columns} +\usage{ +add_dd_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add DD columns +} diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd new file mode 100644 index 000000000..bf6af008f --- /dev/null +++ b/man/add_dn_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_dn_columns} +\alias{add_dn_columns} +\title{Add DN columns} +\usage{ +add_dn_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add DN columns +} diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd new file mode 100644 index 000000000..e71dc755b --- /dev/null +++ b/man/add_gls_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_gls_columns} +\alias{add_gls_columns} +\title{Add GLS columns} +\usage{ +add_gls_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add GLS columns +} diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd new file mode 100644 index 000000000..95d8f1d3b --- /dev/null +++ b/man/add_hc_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_hc_columns} +\alias{add_hc_columns} +\title{Add HC columns} +\usage{ +add_hc_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add HC columns +} diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd new file mode 100644 index 000000000..7600db5e9 --- /dev/null +++ b/man/add_hl1_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_hl1_columns} +\alias{add_hl1_columns} +\title{Add HL1 columns} +\usage{ +add_hl1_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add HL1 columns +} diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd new file mode 100644 index 000000000..0f91cbd90 --- /dev/null +++ b/man/add_ipdc_cols.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ipdc_cols} +\alias{add_ipdc_cols} +\title{Add columns based on IPDC} +\usage{ +add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} + +\item{ipdc_d}{Whether to create columns based on IPDC = "D" (lgl)} + +\item{elective}{Whether to create columns based on Elective/Non-Elective cij_pattype (lgl)} +} +\description{ +Add columns based on value in IPDC column, which can +be further split by Elective/Non-Elective CIJ. +} diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd new file mode 100644 index 000000000..aae729323 --- /dev/null +++ b/man/add_mat_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_mat_columns} +\alias{add_mat_columns} +\title{Add Mat columns} +\usage{ +add_mat_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add Mat columns +} diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd new file mode 100644 index 000000000..3c50c6cb8 --- /dev/null +++ b/man/add_mh_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_mh_columns} +\alias{add_mh_columns} +\title{Add MH columns} +\usage{ +add_mh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add MH columns +} diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd new file mode 100644 index 000000000..9d7b3f8bf --- /dev/null +++ b/man/add_nrs_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_nrs_columns} +\alias{add_nrs_columns} +\title{Add NRS columns} +\usage{ +add_nrs_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add NRS columns +} diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd new file mode 100644 index 000000000..6a54bbcbf --- /dev/null +++ b/man/add_nsu_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_nsu_columns} +\alias{add_nsu_columns} +\title{Add NSU columns} +\usage{ +add_nsu_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add NSU columns +} diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd new file mode 100644 index 000000000..01814ab6d --- /dev/null +++ b/man/add_ooh_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ooh_columns} +\alias{add_ooh_columns} +\title{Add OoH columns} +\usage{ +add_ooh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add OoH columns +} diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd new file mode 100644 index 000000000..08c4419e2 --- /dev/null +++ b/man/add_op_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_op_columns} +\alias{add_op_columns} +\title{Add OP columns} +\usage{ +add_op_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add OP columns +} diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd new file mode 100644 index 000000000..b582acf2e --- /dev/null +++ b/man/add_pis_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_pis_columns} +\alias{add_pis_columns} +\title{Add PIS columns} +\usage{ +add_pis_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add PIS columns +} diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd new file mode 100644 index 000000000..d5a5fb2cf --- /dev/null +++ b/man/add_sds_columns.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_sds_columns} +\alias{add_sds_columns} +\title{Add SDS columns} +\usage{ +add_sds_columns(episode_file, prefix, condition) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add SDS columns +} diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd new file mode 100644 index 000000000..744aa49de --- /dev/null +++ b/man/add_standard_cols.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_standard_cols} +\alias{add_standard_cols} +\title{Add standard columns} +\usage{ +add_standard_cols( + episode_file, + prefix, + condition, + episode = FALSE, + cost = FALSE +) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} + +\item{episode}{Whether to create prefix_episodes col, e.g. "Acute_episodes"} + +\item{cost}{Whether to create prefix_cost col, e.g. "Acute_cost"} +} +\description{ +Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. +} diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd new file mode 100644 index 000000000..73804ad9b --- /dev/null +++ b/man/aggregate_by_chi.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{aggregate_by_chi} +\alias{aggregate_by_chi} +\title{Aggregate by CHI} +\usage{ +aggregate_by_chi(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate episode file by CHI to convert into +individual file. +} diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd new file mode 100644 index 000000000..3d4961e19 --- /dev/null +++ b/man/aggregate_by_chi_zihao.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{aggregate_by_chi_zihao} +\alias{aggregate_by_chi_zihao} +\title{Aggregate by CHI} +\usage{ +aggregate_by_chi_zihao(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate episode file by CHI to convert into +individual file. +} diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd new file mode 100644 index 000000000..2753da14f --- /dev/null +++ b/man/aggregate_ch_episodes.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{aggregate_ch_episodes} +\alias{aggregate_ch_episodes} +\title{Aggregate CIS episodes} +\usage{ +aggregate_ch_episodes(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate CH variables by CHI and CIS. +} diff --git a/man/aggregate_ch_episodes_zihao.Rd b/man/aggregate_ch_episodes_zihao.Rd new file mode 100644 index 000000000..808262654 --- /dev/null +++ b/man/aggregate_ch_episodes_zihao.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{aggregate_ch_episodes_zihao} +\alias{aggregate_ch_episodes_zihao} +\title{Aggregate CIS episodes} +\usage{ +aggregate_ch_episodes_zihao(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate CH variables by CHI and CIS. +} diff --git a/man/clean_individual_file.Rd b/man/clean_individual_file.Rd new file mode 100644 index 000000000..fb2d3ae13 --- /dev/null +++ b/man/clean_individual_file.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_individual_file} +\alias{clean_individual_file} +\title{Clean individual file} +\usage{ +clean_individual_file(individual_file, year) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} + +\item{year}{Financial year e.g 1718} +} +\description{ +Clean up columns in individual file +} diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd new file mode 100644 index 000000000..0182c84e8 --- /dev/null +++ b/man/clean_up_ch.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_ch} +\alias{clean_up_ch} +\title{Clean up CH} +\usage{ +clean_up_ch(episode_file, year) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{year}{The year to process, in FY format.} +} +\description{ +Clean up CH-related columns. +} diff --git a/man/clean_up_gender.Rd b/man/clean_up_gender.Rd new file mode 100644 index 000000000..edf05bfc8 --- /dev/null +++ b/man/clean_up_gender.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_gender} +\alias{clean_up_gender} +\title{Clean up gender column} +\usage{ +clean_up_gender(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Clean up column containing gender. +} diff --git a/man/condition_cols.Rd b/man/condition_cols.Rd new file mode 100644 index 000000000..ba037a609 --- /dev/null +++ b/man/condition_cols.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{condition_cols} +\alias{condition_cols} +\title{Condition columns} +\usage{ +condition_cols() +} +\description{ +Returns chr vector of column names +which follow format "condition" and "condition_date" e.g. +"dementia" and "dementia_date" +} diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd index 18ce990f8..97a7f046f 100644 --- a/man/correct_cij_vars.Rd +++ b/man/correct_cij_vars.Rd @@ -7,7 +7,7 @@ correct_cij_vars(data) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} } \value{ The data with CIJ variables corrected. diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd index cbfc1442f..f0ad267aa 100644 --- a/man/create_cohort_lookups.Rd +++ b/man/create_cohort_lookups.Rd @@ -7,7 +7,7 @@ create_cohort_lookups(data, year, update = latest_update()) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} \item{year}{The year to process, in FY format.} diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd index 588c602be..69e7e37b5 100644 --- a/man/create_cost_inc_dna.Rd +++ b/man/create_cost_inc_dna.Rd @@ -7,7 +7,7 @@ create_cost_inc_dna(data) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} } \value{ The data with cost including dna. diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd new file mode 100644 index 000000000..fa759e7b1 --- /dev/null +++ b/man/create_individual_file.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{create_individual_file} +\alias{create_individual_file} +\title{Create individual file} +\usage{ +create_individual_file( + episode_file, + year, + write_to_disk = TRUE, + anon_chi_in = TRUE, + anon_chi_out = TRUE +) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} + +\item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input +(instead of chi)} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} +} +\value{ +The processed individual file +} +\description{ +Creates individual file from episode file +} diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd index 002c8d927..03b64217e 100644 --- a/man/fill_missing_cij_markers.Rd +++ b/man/fill_missing_cij_markers.Rd @@ -7,7 +7,7 @@ fill_missing_cij_markers(data) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} } \value{ A data frame with CIJ markers filled in for those missing. diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 21f376bdc..445dcd7c0 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -7,9 +7,11 @@ join_cohort_lookups(data, year, update = latest_update()) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} \item{year}{The year to process, in FY format.} + +\item{update}{The update to use} } \value{ The data including the Demographic and Service Use lookups. diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd new file mode 100644 index 000000000..a30719698 --- /dev/null +++ b/man/join_sc_client.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{join_sc_client} +\alias{join_sc_client} +\title{Join sc client variables onto individual file} +\usage{ +join_sc_client( + individual_file, + year, + sc_client = read_file(get_source_extract_path(year, "Client")), + sc_demographics = read_file(get_sc_demog_lookup_path(), col_select = + c("sending_location", "social_care_id", "chi")) +) +} +\arguments{ +\item{individual_file}{the processed individual file} + +\item{year}{financial year.} + +\item{sc_client}{SC client lookup} + +\item{sc_demographics}{SC Demographic lookup} +} +\description{ +Match on sc client variables. +} diff --git a/man/join_slf_lookup_vars.Rd b/man/join_slf_lookup_vars.Rd new file mode 100644 index 000000000..980c66f31 --- /dev/null +++ b/man/join_slf_lookup_vars.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{join_slf_lookup_vars} +\alias{join_slf_lookup_vars} +\title{Join slf lookup variables} +\usage{ +join_slf_lookup_vars( + individual_file, + slf_postcode_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")), + hbrescode_var = "hb2018" +) +} +\arguments{ +\item{individual_file}{the processed individual file.} + +\item{slf_postcode_lookup}{SLF processed postcode lookup} + +\item{slf_gpprac_lookup}{SLF processed gpprac lookup} + +\item{hbrescode_var}{hbrescode variable} +} +\description{ +Join lookup variables from slf postcode lookup and slf gpprac +lookup. +} diff --git a/man/join_sparra_hhg.Rd b/man/join_sparra_hhg.Rd index 9bbdd916a..ab4d3b946 100644 --- a/man/join_sparra_hhg.Rd +++ b/man/join_sparra_hhg.Rd @@ -7,7 +7,7 @@ join_sparra_hhg(data, year) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} \item{year}{The year to process, in FY format.} } diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd index d290ba512..cee9cc440 100644 --- a/man/load_ep_file_vars.Rd +++ b/man/load_ep_file_vars.Rd @@ -7,7 +7,7 @@ load_ep_file_vars(data, year) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} \item{year}{The year to process, in FY format.} } diff --git a/man/max_no_inf.Rd b/man/max_no_inf.Rd new file mode 100644 index 000000000..79b9a1057 --- /dev/null +++ b/man/max_no_inf.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{max_no_inf} +\alias{max_no_inf} +\title{Custom maximum} +\usage{ +max_no_inf(x) +} +\arguments{ +\item{x}{Vector to return max of} +} +\description{ +Custom maximum function which removes +missing values but doesn't return Inf if all values +are missing (instead returns NA) +} diff --git a/man/min_no_inf.Rd b/man/min_no_inf.Rd new file mode 100644 index 000000000..38029214f --- /dev/null +++ b/man/min_no_inf.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{min_no_inf} +\alias{min_no_inf} +\title{Custom minimum} +\usage{ +min_no_inf(x) +} +\arguments{ +\item{x}{Vector to return min of} +} +\description{ +Custom minimum function which removes +missing values but doesn't return Inf if all values +are missing (instead returns NA) +} diff --git a/man/process_tests_delayed_discharges.Rd b/man/process_tests_delayed_discharges.Rd new file mode 100644 index 000000000..68e1b8f17 --- /dev/null +++ b/man/process_tests_delayed_discharges.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_delayed_discharges.R +\name{process_tests_delayed_discharges} +\alias{process_tests_delayed_discharges} +\title{Process Delayed Discharges tests} +\usage{ +process_tests_delayed_discharges(data, year) +} +\arguments{ +\item{data}{a \link[tibble:tibble-package]{tibble} of the processed data extract.} + +\item{year}{the financial year of the extract in the format '1718'.} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing a test comparison. +} +\description{ +Takes the processed Delayed Discharges extract and produces +a test comparison with the previous data. This is written to disk as a CSV. +} diff --git a/man/produce_source_dd_tests.Rd b/man/produce_source_dd_tests.Rd new file mode 100644 index 000000000..2eb9f6455 --- /dev/null +++ b/man/produce_source_dd_tests.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_delayed_discharges.R +\name{produce_source_dd_tests} +\alias{produce_source_dd_tests} +\title{Delayed Discharges extract tests} +\usage{ +produce_source_dd_tests(data) +} +\arguments{ +\item{data}{new or old data for testing summary flags +(data is from \code{\link[=get_source_extract_path]{get_source_extract_path()}})} +} +\value{ +a dataframe with a count of each flag +from \code{\link[=calculate_measures]{calculate_measures()}} +} +\description{ +Produce tests for the delayed discharges extract. +} +\seealso{ +calculate_measures + +Other extract test functions +for creating test flags: +\code{\link{produce_source_pis_tests}()} +} +\concept{extract test functions +for creating test flags} diff --git a/man/produce_source_pis_tests.Rd b/man/produce_source_pis_tests.Rd index 070cc789d..487ad2fd7 100644 --- a/man/produce_source_pis_tests.Rd +++ b/man/produce_source_pis_tests.Rd @@ -24,6 +24,10 @@ episode date variables. } \seealso{ calculate_measures + +Other extract test functions +for creating test flags: +\code{\link{produce_source_dd_tests}()} } \concept{extract test functions for creating test flags} diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd new file mode 100644 index 000000000..526d2829d --- /dev/null +++ b/man/recode_gender.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{recode_gender} +\alias{recode_gender} +\title{Recode gender} +\usage{ +recode_gender(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Recode gender to 1.5 if 0 or 9. +} diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd new file mode 100644 index 000000000..9cba40a8f --- /dev/null +++ b/man/remove_blank_chi.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{remove_blank_chi} +\alias{remove_blank_chi} +\title{Remove blank CHI} +\usage{ +remove_blank_chi(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Convert blank strings to NA and remove NAs from CHI column +} diff --git a/man/run_episode_file.Rd b/man/run_episode_file.Rd index e85621b59..59d5fea1d 100644 --- a/man/run_episode_file.Rd +++ b/man/run_episode_file.Rd @@ -4,7 +4,12 @@ \alias{run_episode_file} \title{Produce the Source Episode file} \usage{ -run_episode_file(processed_data_list, year, write_to_disk = TRUE) +run_episode_file( + processed_data_list, + year, + write_to_disk = TRUE, + anon_chi_out = TRUE +) } \arguments{ \item{processed_data_list}{containing data from processed extracts.} @@ -13,6 +18,9 @@ run_episode_file(processed_data_list, year, write_to_disk = TRUE) \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} } \value{ a \link[tibble:tibble-package]{tibble} containing the episode file diff --git a/man/select.Rd b/man/select.Rd new file mode 100644 index 000000000..435096d9a --- /dev/null +++ b/man/select.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{vars_end_with} +\alias{vars_end_with} +\alias{vars_start_with} +\alias{vars_contain} +\title{select columns ending with some patterns} +\usage{ +vars_end_with(data, vars, ignore_case = FALSE) + +vars_start_with(data, vars, ignore_case = FALSE) + +vars_contain(data, vars, ignore_case = FALSE) +} +\description{ +select columns ending with some patterns + +select columns starting with some patterns + +select columns contains some characters +} +\section{Functions}{ +\itemize{ +\item \code{vars_end_with()}: columns based on patterns + +\item \code{vars_start_with()}: columns based on patterns + +\item \code{vars_contain()}: columns based on patterns + +}} diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd index f31f63976..06316aac1 100644 --- a/man/store_ep_file_vars.Rd +++ b/man/store_ep_file_vars.Rd @@ -7,12 +7,12 @@ store_ep_file_vars(data, year, vars_to_keep) } \arguments{ -\item{data}{The in progress episode file data.} +\item{data}{The in-progress episode file data.} \item{year}{The year to process, in FY format.} -\item{vars_to_keep}{a character vector of variable to keep, all others will -be stored.} +\item{vars_to_keep}{a character vector of the variables to keep, all others +will be stored.} } \value{ \code{data} with only the \code{vars_to_keep} kept diff --git a/tests/testthat/test-read_file.R b/tests/testthat/test-read_file.R index 392ba4a49..e823180fb 100644 --- a/tests/testthat/test-read_file.R +++ b/tests/testthat/test-read_file.R @@ -1,5 +1,6 @@ test_that("read_file works", { rds_path <- tempfile(fileext = ".rds") + rds_gz_path <- tempfile(fileext = ".rds.gz") fst_path <- tempfile(fileext = ".fst") sav_path <- tempfile(fileext = ".sav") zsav_path <- tempfile(fileext = ".zsav") @@ -10,6 +11,7 @@ test_that("read_file works", { aq_data <- tibble::as_tibble(datasets::airquality) readr::write_rds(aq_data, rds_path) + readr::write_rds(aq_data, rds_gz_path) fst::write_fst(aq_data, fst_path) haven::write_sav(aq_data, sav_path) haven::write_sav(aq_data, zsav_path, compress = "zsav") @@ -18,7 +20,8 @@ test_that("read_file works", { arrow::write_parquet(aq_data, parquet_path) expect_equal(aq_data, read_file(rds_path)) - expect_equal(aq_data, tibble::as_tibble(read_file(fst_path))) + expect_equal(aq_data, read_file(rds_gz_path)) + expect_equal(aq_data, read_file(fst_path)) expect_equal(aq_data, haven::zap_formats(read_file(sav_path))) expect_equal(aq_data, haven::zap_formats(read_file(zsav_path))) expect_equal(aq_data, read_file(csv_gz_path))