From e6d6ca6761a0fc9b79c9c2cb34076955f5dca139 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Tue, 7 Feb 2023 11:11:02 +0000 Subject: [PATCH 001/200] Convert D01 up until L470 --- R/create_individual_file.R | 472 +++++++++++++++++++++++++++++++++++++ 1 file changed, 472 insertions(+) create mode 100644 R/create_individual_file.R diff --git a/R/create_individual_file.R b/R/create_individual_file.R new file mode 100644 index 000000000..b1e39f8df --- /dev/null +++ b/R/create_individual_file.R @@ -0,0 +1,472 @@ +#' Create individual file +#' +#' @description Creates individual file from episode file +#' +#' @param episode_file Tibble containing episodic data +create_individual_file <- function(episode_file) { + episode_file %>% + remove_blank_chi() %>% + find_non_duplicates(.data$cij_marker, "Distinct_CIJ") %>% + add_cij_columns() %>% + find_non_duplicates(.data$ch_chi_cis, "first_ch_ep") %>% + add_all_columns() +} + +#' Remove blank CHI +#' +#' @description Convert blank strings to NA and remove NAs from CHI column +#' +#' @inheritParams create_individual_file +remove_blank_chi <- function(episode_file) { + episode_file %>% + dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>% + dplyr::filter(!is.na(.data$chi)) +} + +#' Find non-duplicates +#' +#' @description Create new column which marks first (per group) +#' non-duplicated observation as 1, with any duplicates marked as 0. +#' +#' @inheritParams create_individual_file +#' @param group Column to group by +#' @param col_name Name of new column +find_non_duplicates <- function(episode_file, group, col_name) { + episode_file %>% + dplyr::group_by(.data$chi, {{ group }}) %>% + dplyr::mutate("{col_name}" := dplyr::if_else(duplicated({{ group }}), 0, 1)) %>% + dplyr::ungroup() %>% + dplyr::mutate("{col_name}" := dplyr::if_else(is.na({{ group }}), 0, .data[[col_name]])) +} + +#' Add CIJ-related columns +#' +#' @description Add new columns related to CIJ +#' +#' @inheritParams create_individual_file +add_cij_columns <- function(episode_file) { + episode_file %>% + dplyr::mutate( + CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, + .data$Distinct_CIJ, + NA_real_ + ), + CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, + .data$Distinct_CIJ, + NA_real_ + ), + CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, + .data$Distinct_CIJ, + NA_real_ + ) + ) %>% + dplyr::mutate(cij_delay = dplyr::if_else( + (.data$cij_delay == 1 & .data$Distinct_CIJ == 1), + 1, + 0 + )) %>% + dplyr::mutate( + preventable_admissions = dplyr::if_else( + (cij_ppa == 1 & Distinct_CIJ == 1), + 1, + 0 + ), + preventable_beddays = dplyr::if_else( + (cij_ppa == 1 & Distinct_CIJ == 1), + as.numeric(cij_end_date - cij_start_date), + 0 + ) + ) +} + +#' Add all columns +#' +#' @description Add new columns based on SMRType and recid which follow a pattern +#' of prefixed column names created based on some condition. +#' +#' @inheritParams create_individual_filw +add_all_columns <- function(episode_file) { + episode_file %>% + add_acute_columns("Acute", (smrtype == "Acute-DC" | smrtype == "Acute-IP") & cij_pattype != "Maternity") %>% + add_mat_columns("Mat", recid == "02B" | cij_pattype == "Maternity") %>% + add_mh_columns("MH", recid == "04B" & cij_pattype != "Maternity") %>% + add_gls_columns("GLS", smrtype == "GLS-IP") %>% + add_op_columns("OP", recid == "00B") %>% + add_ae_columns("AE", recid == "AE2") %>% + add_pis_columns("PIS", recid == "PIS") %>% + add_ooh_columns("OoH", recid == "OoH") %>% + add_dn_columns("DN", recid == "DN") %>% + add_cmh_columns("CMH", recid == "CMH") %>% + add_dd_columns("DD", recid == "DD") %>% + add_nsu_columns("NSU", recid == "NSU") %>% + add_nrs_columns("NRS", recid == "NRS") %>% + add_hl1_columns("HL1", recid == "HL1") %>% + add_ch_columns("CH", recid == "CH") %>% + add_hc_columns("HC", recid == "HC") %>% + add_at_columns("AT", recid == "AT") %>% + add_sds_columns("SDS", recid == "SDS") +} + +#' Add Acute columns +#' +#' @inheritParams create_individuaL_file +#' @param prefix Prefix to add to related columns, e.g. "Acute" +#' @param condition Condition to create new columns based on +add_acute_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition) +} + +#' Add Mat columns +#' +#' @inheritParams add_acute_columns +add_mat_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, elective = FALSE) +} + +#' Add MH columns +#' +#' @inheritParams add_acute_columns +add_mh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, ipdc_d = FALSE) +} + +#' Add GLS columns +#' +#' @inheritParams add_acute_columns +add_gls_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + add_ipdc_cols(prefix, condition, ipdc_d = FALSE) +} + +#' Add OP columns +#' +#' @inheritParams add_acute_columns +add_op_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition) + condition_1 <- substitute(condition & attendance_status == 1) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1, NA_real_), + "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), cost_total_net, NA_real_) + ) + condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8)) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1, NA_real_), + "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), cost_total_net_incdnas, NA_real_) + ) + return(episode_file) +} + +#' Add AE columns +#' +#' @inheritParams add_acute_columns +add_ae_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), cost_total_net, NA_real_)) +} + +#' Add PIS columns +#' +#' @inheritParams add_acute_columns +add_pis_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), no_paid_items, NA_real_)) +} + +#' Add OoH columns +#' +#' @inheritParams add_acute_columns +add_ooh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition, cost = TRUE) %>% + dplyr::mutate( + "{prefix}_homeV" := dplyr::if_else(eval(condition) & smrtype == "OOH-HomeV", 1, NA_real_), + "{prefix}_advice" := dplyr::if_else(eval(condition) & smrtype == "OOH-Advice", 1, NA_real_), + "{prefix}_DN" := dplyr::if_else(eval(condition) & smrtype == "OOH-DN", 1, NA_real_), + "{prefix}_NHS24" := dplyr::if_else(eval(condition) & smrtype == "OOH-NHS24", 1, NA_real_), + "{prefix}_other" := dplyr::if_else(eval(condition) & smrtype == "OOH-Other", 1, NA_real_), + "{prefix}_PCC" := dplyr::if_else(eval(condition) & smrtype == "OOH-PCC", 1, NA_real_), + ooh_covid_advice = dplyr::if_else(eval(condition) & smrtype == "OOH-C19Adv", 1, NA_real_), + ooh_covid_assessment = dplyr::if_else(eval(condition) & smrtype == "OOH-C19Ass", 1, NA_real_), + ooh_covid_other = dplyr::if_else(eval(condition) & smrtype == "OOH-C190th", 1, NA_real_) + ) + + episode_file <- episode_file %>% + dplyr::mutate( + OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(keytime2) + keydate2_dateformat) - (lubridate::seconds_to_period(keytime1) + keydate1_dateformat), units = "mins"), NA_real_), + OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, OoH_consultation_time) + ) + return(episode_file) +} + +#' Add DN columns +#' +#' @inheritParams add_acute_columns +add_dn_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), totalnodncontacts, NA_real_)) +} + +#' Add CMH columns +#' +#' @inheritParams add_acute_columns +add_cmh_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1, NA_real_)) +} + +#' Add DD columns +#' +#' @inheritParams add_acute_columns +add_dd_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + condition_delay <- substitute(condition & primary_delay_reason != "9") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_NonCode9_episodes" := dplyr::if_else(eval(condition_delay), 1, NA_real_), + "{prefix}_NonCode9_beddays" := dplyr::if_else(eval(condition_delay), yearstay, NA_real_) + ) + condition_delay_9 <- substitute(condition & primary_delay_reason == "9") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1, NA_real_), + "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), yearstay, NA_real_) + ) + return(episode_file) +} + +#' Add NSU columns +#' +#' @inheritParams add_acute_columns +add_nsu_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}", dplyr::if_else(eval(condition), 1, NA_real_)) +} + +#' Add NRS columns +#' +#' @inheritParams add_acute_columns +add_nrs_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate("{prefix}", dplyr::if_else(eval(condition), 1, NA_real_)) +} + +#' Add HL1 columns +#' +#' @inheritParams add_acute_columns +add_hl1_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition, drop = "gpprac") +} + +#' Add CH columns +#' +#' @inheritParams add_acute_columns +add_ch_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + ch_cis_episodes = dplyr::if_else(eval(condition), first_ch_ep, NA_real_), + ch_cost_per_day = dplyr::if_else(eval(condition) & yearstay > 0, cost_total_net / yearstay, NA_real_), + ch_cost_per_day = dplyr::if_else(eval(condition) & yearstay == 0, cost_total_net / yearstay, ch_cost_per_day), + ch_no_cost = eval(condition) & is.na(ch_cost_per_day), + ch_ep_end = dplyr::if_else(eval(condition), keydate2_dateformat, lubridate::NA_Date_), + ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(sc_latest_submission), type = "date_first"), ch_ep_end) + ) +} + +#' Add HC columns +#' +#' @inheritParams add_acute_columns +add_hc_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file <- episode_file %>% + add_standard_cols(prefix, condition, episode = TRUE) %>% + dplyr::mutate( + "{prefix}_total_hours" := dplyr::if_else(eval(condition), hc_hours_annual, NA_real_), + "{prefix}_total_cost" := dplyr::if_else(eval(condition), cost_total_net, NA_real_), + ) + condition_per <- substitute(condition & smrtype == "HC-Per") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_personal_episodes" := dplyr::if_else(eval(condition_per), 1, NA_real_), + "{prefix}_personal_hours" := dplyr::if_else(eval(condition_per), HC_total_hours, NA_real_), + "{prefix}_personal_hours_cost" := dplyr::if_else(eval(condition_per), cost_total_net, NA_real_) + ) + condition_non_per <- substitute(condition & smrtype == "HC-Non-Per") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_non_personal_episodes" := dplyr::if_else(eval(condition_non_per), 1, NA_real_), + "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), hc_hours_annual, NA_real_), + "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), cost_total_net, NA_real_) + ) + condition_reabl <- substitute(condition & hc_reablement == 1) + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1, NA_real_), + "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), hc_hours_annual, NA_real_), + "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), cost_total_net, NA_real_) + ) +} + +#' Add AT columns +#' +#' @inheritParams add_acute_columns +add_at_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + "{prefix}_alarms" := dplyr::if_else(eval(condition) & smrtype == "AT-Alarm", 1, NA_real_), + "{prefix}_telecare" := dplyr::if_else(eval(condition) & smrtype == "AT-Tele", 1, NA_real_) + ) +} + +#' Add SDS columns +#' +#' @inheritParams add_acute_columns +add_sds_columns <- function(episode_file, prefix, condition) { + condition <- substitute(condition) + episode_file %>% + add_standard_cols(prefix, condition) %>% + dplyr::mutate( + "{prefix}_option_1" := dplyr::if_else(eval(condition) & smrtype == "SDS-1", 1, NA_real_), + "{prefix}_option_2" := dplyr::if_else(eval(condition) & smrtype == "SDS-2", 1, NA_real_), + "{prefix}_option_3" := dplyr::if_else(eval(condition) & smrtype == "SDS-3", 1, NA_real_), + "{prefix}_option_4" := dplyr::if_else(eval(condition) & smrtype == "SDS-4", 1, NA_real_) + ) +} + +#' Add columns based on IPDC +#' +#' @description Add columns based on value in IPDC column, which can +#' be further split by Elective/Non-Elective CIJ. +#' +#' @inheritParams add_acute_columns +#' @param ipdc_d Whether to create columns based on IPDC = "D" (lgl) +#' @param elective Whether to create columns based on Elective/Non-Elective cij_pattype (lgl) +add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) { + condition_i <- substitute(eval(condition) & ipdc == "I") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1, NA_real_), + "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), yearstay, NA_real_) + ) + if (elective) { + condition_el <- substitute(condition_i & cij_pattype == "Elective") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_el_inpatient_episodes" := dplyr::if_else(eval(condition_el), 1, NA_real_), + "{prefix}_el_inpatient_beddays" := dplyr::if_else(eval(condition_el), yearstay, NA_real_), + "{prefix}_el_inpatient_cost" := dplyr::if_else(eval(condition_el), cost_total_net, NA_real_) + ) + condition_non_el <- substitute(condition_i & cij_pattype == "Non-Elective") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_non_el_inpatient_episodes" := dplyr::if_else(eval(condition_non_el), 1, NA_real_), + "{prefix}_non_el_inpatient_beddays" := dplyr::if_else(eval(condition_non_el), yearstay, NA_real_), + "{prefix}_non_el_inpatient_cost" := dplyr::if_else(eval(condition_non_el), cost_total_net, NA_real_) + ) + } + if (ipdc_d) { + condition_d <- substitute(eval(condition) & ipdc == "D") + episode_file <- episode_file %>% + dplyr::mutate( + "{prefix}_daycase_episodes" := dplyr::if_else(eval(condition_d), 1, NA_real_), + "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), cost_total_net, NA_real_) + ) + } + return(episode_file) +} + +#' Add columns based on IPDC +#' +#' @description Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. +#' +#' @inheritParams add_acute_columns +#' @param drop Any columns out of "DoB", "postcode", and "gpprac" that should be dropped +#' @param episode Whether to create prefix_episodes col, e.g. "Acute_episodes" +#' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" +add_standard_cols <- function(episode_file, prefix, condition, drop = NULL, episode = FALSE, cost = FALSE) { + episode_file <- dplyr::bind_cols(episode_file, create_cols(episode_file, prefix, condition, drop)) + if (episode) { + episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1, NA_real_)) + } + if (cost) { + episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), cost_total_net, NA_real_)) + } + return(episode_file) +} + +#' Create standard cols +#' +#' @description Create standard cols (DoB, postcode, gpprac). +#' +#' @inheritParams add_acute_columns +#' @param drop Any columns out of "DoB", "postcode", and "gpprac" that should be dropped +create_cols <- function(episode_file, prefix, condition, drop) { + cols <- c("DoB", "postcode", "gpprac") + if (!is.null(drop)) { + cols <- cols[cols != drop] + } + episode_file <- purrr::map_dfc(cols, ~ create_col(episode_file, .x, prefix, condition)) + return(episode_file) +} + +#' Create standard col +#' +#' @description Create single standard column. +#' +#' @inheritParams add_acute_columns +#' @inheritParams na_type +create_col <- function(episode_file, col, prefix, condition) { + episode_file %>% + dplyr::mutate("{prefix}_{col}" := dplyr::if_else(eval(condition), .data[[tolower(col)]], na_type(col))) %>% + dplyr::select(dplyr::last_col()) +} + +#' NA type +#' +#' @description Helper function to use correct NA type depending on +#' which type of column is created. +#' +#' @param col Which column to create ("DoB", "postcode", or "gpprac") +na_type <- function(col = c("DoB", "postcode", "gpprac")) { + match.arg(col) + na_type <- switch(col, + "DoB" = lubridate::NA_Date_, + "postcode" = NA_character_, + "gpprac" = NA_real_ + ) + return(na_type) +} + From e095ddf5d3fe5c0bccf1e3a991be66bd7fa5c408 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Wed, 8 Feb 2023 11:33:24 +0000 Subject: [PATCH 002/200] Update documentation --- man/add_acute_columns.Rd | 16 ++++++++++++++++ man/add_ae_columns.Rd | 16 ++++++++++++++++ man/add_all_columns.Rd | 12 ++++++++++++ man/add_at_columns.Rd | 16 ++++++++++++++++ man/add_ch_columns.Rd | 16 ++++++++++++++++ man/add_cij_columns.Rd | 14 ++++++++++++++ man/add_cmh_columns.Rd | 16 ++++++++++++++++ man/add_dd_columns.Rd | 16 ++++++++++++++++ man/add_dn_columns.Rd | 16 ++++++++++++++++ man/add_gls_columns.Rd | 16 ++++++++++++++++ man/add_hc_columns.Rd | 16 ++++++++++++++++ man/add_hl1_columns.Rd | 16 ++++++++++++++++ man/add_ipdc_cols.Rd | 21 +++++++++++++++++++++ man/add_mat_columns.Rd | 16 ++++++++++++++++ man/add_mh_columns.Rd | 16 ++++++++++++++++ man/add_nrs_columns.Rd | 16 ++++++++++++++++ man/add_nsu_columns.Rd | 16 ++++++++++++++++ man/add_ooh_columns.Rd | 16 ++++++++++++++++ man/add_op_columns.Rd | 16 ++++++++++++++++ man/add_pis_columns.Rd | 16 ++++++++++++++++ man/add_sds_columns.Rd | 16 ++++++++++++++++ man/add_standard_cols.Rd | 29 +++++++++++++++++++++++++++++ man/create_col.Rd | 18 ++++++++++++++++++ man/create_cols.Rd | 18 ++++++++++++++++++ man/create_individual_file.Rd | 14 ++++++++++++++ man/find_non_duplicates.Rd | 19 +++++++++++++++++++ man/na_type.Rd | 15 +++++++++++++++ man/remove_blank_chi.Rd | 14 ++++++++++++++ 28 files changed, 462 insertions(+) create mode 100644 man/add_acute_columns.Rd create mode 100644 man/add_ae_columns.Rd create mode 100644 man/add_all_columns.Rd create mode 100644 man/add_at_columns.Rd create mode 100644 man/add_ch_columns.Rd create mode 100644 man/add_cij_columns.Rd create mode 100644 man/add_cmh_columns.Rd create mode 100644 man/add_dd_columns.Rd create mode 100644 man/add_dn_columns.Rd create mode 100644 man/add_gls_columns.Rd create mode 100644 man/add_hc_columns.Rd create mode 100644 man/add_hl1_columns.Rd create mode 100644 man/add_ipdc_cols.Rd create mode 100644 man/add_mat_columns.Rd create mode 100644 man/add_mh_columns.Rd create mode 100644 man/add_nrs_columns.Rd create mode 100644 man/add_nsu_columns.Rd create mode 100644 man/add_ooh_columns.Rd create mode 100644 man/add_op_columns.Rd create mode 100644 man/add_pis_columns.Rd create mode 100644 man/add_sds_columns.Rd create mode 100644 man/add_standard_cols.Rd create mode 100644 man/create_col.Rd create mode 100644 man/create_cols.Rd create mode 100644 man/create_individual_file.Rd create mode 100644 man/find_non_duplicates.Rd create mode 100644 man/na_type.Rd create mode 100644 man/remove_blank_chi.Rd diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd new file mode 100644 index 000000000..db02a88a5 --- /dev/null +++ b/man/add_acute_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_acute_columns} +\alias{add_acute_columns} +\title{Add Acute columns} +\usage{ +add_acute_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add Acute columns +} diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd new file mode 100644 index 000000000..3c90fb4f1 --- /dev/null +++ b/man/add_ae_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ae_columns} +\alias{add_ae_columns} +\title{Add AE columns} +\usage{ +add_ae_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add AE columns +} diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd new file mode 100644 index 000000000..ce0540864 --- /dev/null +++ b/man/add_all_columns.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_all_columns} +\alias{add_all_columns} +\title{Add all columns} +\usage{ +add_all_columns(episode_file) +} +\description{ +Add new columns based on SMRType and recid which follow a pattern +of prefixed column names created based on some condition. +} diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd new file mode 100644 index 000000000..5cb469eea --- /dev/null +++ b/man/add_at_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_at_columns} +\alias{add_at_columns} +\title{Add AT columns} +\usage{ +add_at_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add AT columns +} diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd new file mode 100644 index 000000000..e0abfeaa0 --- /dev/null +++ b/man/add_ch_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ch_columns} +\alias{add_ch_columns} +\title{Add CH columns} +\usage{ +add_ch_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add CH columns +} diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd new file mode 100644 index 000000000..7d00e6299 --- /dev/null +++ b/man/add_cij_columns.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_cij_columns} +\alias{add_cij_columns} +\title{Add CIJ-related columns} +\usage{ +add_cij_columns(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Add new columns related to CIJ +} diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd new file mode 100644 index 000000000..ebb80c293 --- /dev/null +++ b/man/add_cmh_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_cmh_columns} +\alias{add_cmh_columns} +\title{Add CMH columns} +\usage{ +add_cmh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add CMH columns +} diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd new file mode 100644 index 000000000..425169e70 --- /dev/null +++ b/man/add_dd_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_dd_columns} +\alias{add_dd_columns} +\title{Add DD columns} +\usage{ +add_dd_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add DD columns +} diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd new file mode 100644 index 000000000..0f97bd01f --- /dev/null +++ b/man/add_dn_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_dn_columns} +\alias{add_dn_columns} +\title{Add DN columns} +\usage{ +add_dn_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add DN columns +} diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd new file mode 100644 index 000000000..4475fa5d0 --- /dev/null +++ b/man/add_gls_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_gls_columns} +\alias{add_gls_columns} +\title{Add GLS columns} +\usage{ +add_gls_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add GLS columns +} diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd new file mode 100644 index 000000000..60352e37b --- /dev/null +++ b/man/add_hc_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_hc_columns} +\alias{add_hc_columns} +\title{Add HC columns} +\usage{ +add_hc_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add HC columns +} diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd new file mode 100644 index 000000000..03dcc2dac --- /dev/null +++ b/man/add_hl1_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_hl1_columns} +\alias{add_hl1_columns} +\title{Add HL1 columns} +\usage{ +add_hl1_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add HL1 columns +} diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd new file mode 100644 index 000000000..537f6d0ce --- /dev/null +++ b/man/add_ipdc_cols.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ipdc_cols} +\alias{add_ipdc_cols} +\title{Add columns based on IPDC} +\usage{ +add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} + +\item{ipdc_d}{Whether to create columns based on IPDC = "D" (lgl)} + +\item{elective}{Whether to create columns based on Elective/Non-Elective cij_pattype (lgl)} +} +\description{ +Add columns based on value in IPDC column, which can +be further split by Elective/Non-Elective CIJ. +} diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd new file mode 100644 index 000000000..2836faa2a --- /dev/null +++ b/man/add_mat_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_mat_columns} +\alias{add_mat_columns} +\title{Add Mat columns} +\usage{ +add_mat_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add Mat columns +} diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd new file mode 100644 index 000000000..5c1279656 --- /dev/null +++ b/man/add_mh_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_mh_columns} +\alias{add_mh_columns} +\title{Add MH columns} +\usage{ +add_mh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add MH columns +} diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd new file mode 100644 index 000000000..20076ce93 --- /dev/null +++ b/man/add_nrs_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_nrs_columns} +\alias{add_nrs_columns} +\title{Add NRS columns} +\usage{ +add_nrs_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add NRS columns +} diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd new file mode 100644 index 000000000..8518dc6dd --- /dev/null +++ b/man/add_nsu_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_nsu_columns} +\alias{add_nsu_columns} +\title{Add NSU columns} +\usage{ +add_nsu_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add NSU columns +} diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd new file mode 100644 index 000000000..5a9078259 --- /dev/null +++ b/man/add_ooh_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_ooh_columns} +\alias{add_ooh_columns} +\title{Add OoH columns} +\usage{ +add_ooh_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add OoH columns +} diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd new file mode 100644 index 000000000..5fd8d78c7 --- /dev/null +++ b/man/add_op_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_op_columns} +\alias{add_op_columns} +\title{Add OP columns} +\usage{ +add_op_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add OP columns +} diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd new file mode 100644 index 000000000..b19178df7 --- /dev/null +++ b/man/add_pis_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_pis_columns} +\alias{add_pis_columns} +\title{Add PIS columns} +\usage{ +add_pis_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add PIS columns +} diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd new file mode 100644 index 000000000..ec2a4668e --- /dev/null +++ b/man/add_sds_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_sds_columns} +\alias{add_sds_columns} +\title{Add SDS columns} +\usage{ +add_sds_columns(episode_file, prefix, condition) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Add SDS columns +} diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd new file mode 100644 index 000000000..35ee445f2 --- /dev/null +++ b/man/add_standard_cols.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{add_standard_cols} +\alias{add_standard_cols} +\title{Add columns based on IPDC} +\usage{ +add_standard_cols( + episode_file, + prefix, + condition, + drop = NULL, + episode = FALSE, + cost = FALSE +) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} + +\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} + +\item{episode}{Whether to create prefix_episodes col, e.g. "Acute_episodes"} + +\item{cost}{Whether to create prefix_cost col, e.g. "Acute_cost"} +} +\description{ +Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. +} diff --git a/man/create_col.Rd b/man/create_col.Rd new file mode 100644 index 000000000..496057bc1 --- /dev/null +++ b/man/create_col.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{create_col} +\alias{create_col} +\title{Create standard col} +\usage{ +create_col(episode_file, col, prefix, condition) +} +\arguments{ +\item{col}{Which column to create ("DoB", "postcode", or "gpprac")} + +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} +} +\description{ +Create single standard column. +} diff --git a/man/create_cols.Rd b/man/create_cols.Rd new file mode 100644 index 000000000..d6540fa89 --- /dev/null +++ b/man/create_cols.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{create_cols} +\alias{create_cols} +\title{Create standard cols} +\usage{ +create_cols(episode_file, prefix, condition, drop) +} +\arguments{ +\item{prefix}{Prefix to add to related columns, e.g. "Acute"} + +\item{condition}{Condition to create new columns based on} + +\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} +} +\description{ +Create standard cols (DoB, postcode, gpprac). +} diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd new file mode 100644 index 000000000..8b0887565 --- /dev/null +++ b/man/create_individual_file.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{create_individual_file} +\alias{create_individual_file} +\title{Create individual file} +\usage{ +create_individual_file(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Creates individual file from episode file +} diff --git a/man/find_non_duplicates.Rd b/man/find_non_duplicates.Rd new file mode 100644 index 000000000..ba82bd5c4 --- /dev/null +++ b/man/find_non_duplicates.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{find_non_duplicates} +\alias{find_non_duplicates} +\title{Find non-duplicates} +\usage{ +find_non_duplicates(episode_file, group, col_name) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} + +\item{group}{Column to group by} + +\item{col_name}{Name of new column} +} +\description{ +Create new column which marks first (per group) +non-duplicated observation as 1, with any duplicates marked as 0. +} diff --git a/man/na_type.Rd b/man/na_type.Rd new file mode 100644 index 000000000..f8cbc9581 --- /dev/null +++ b/man/na_type.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{na_type} +\alias{na_type} +\title{NA type} +\usage{ +na_type(col = c("DoB", "postcode", "gpprac")) +} +\arguments{ +\item{col}{Which column to create ("DoB", "postcode", or "gpprac")} +} +\description{ +Helper function to use correct NA type depending on +which type of column is created. +} diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd new file mode 100644 index 000000000..9cba40a8f --- /dev/null +++ b/man/remove_blank_chi.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{remove_blank_chi} +\alias{remove_blank_chi} +\title{Remove blank CHI} +\usage{ +remove_blank_chi(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Convert blank strings to NA and remove NAs from CHI column +} From 78cc51fb9509cc0830b2f43cb8a4ad3c11b82375 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Wed, 8 Feb 2023 11:38:11 +0000 Subject: [PATCH 003/200] Style code --- R/create_individual_file.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b1e39f8df..f6c16722a 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -48,16 +48,16 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$Distinct_CIJ, - NA_real_ + .data$Distinct_CIJ, + NA_real_ ), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$Distinct_CIJ, - NA_real_ + .data$Distinct_CIJ, + NA_real_ ), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$Distinct_CIJ, - NA_real_ + .data$Distinct_CIJ, + NA_real_ ) ) %>% dplyr::mutate(cij_delay = dplyr::if_else( @@ -469,4 +469,3 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { ) return(na_type) } - From e1b69a0d6a191da6f80bc68452945a91a1fe8485 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Wed, 8 Feb 2023 11:42:57 +0000 Subject: [PATCH 004/200] Fix typo --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b1e39f8df..d650c611a 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -408,7 +408,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi return(episode_file) } -#' Add columns based on IPDC +#' Add standard columns #' #' @description Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. #' From 18f78f9e51e2bb783cad02f527e9c3d538aa19e4 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Wed, 8 Feb 2023 11:47:19 +0000 Subject: [PATCH 005/200] Update documentation --- man/add_standard_cols.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index 35ee445f2..2e22df08a 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_individual_file.R \name{add_standard_cols} \alias{add_standard_cols} -\title{Add columns based on IPDC} +\title{Add standard columns} \usage{ add_standard_cols( episode_file, From de62045de2a311d809235f01e37b24092c8062b6 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Wed, 8 Feb 2023 12:56:07 +0000 Subject: [PATCH 006/200] Fix typos --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 1b11cd0ae..e3caf9ae6 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -265,7 +265,7 @@ add_nsu_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% - dplyr::mutate("{prefix}", dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1, NA_real_)) } #' Add NRS columns @@ -275,7 +275,7 @@ add_nrs_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% - dplyr::mutate("{prefix}", dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1, NA_real_)) } #' Add HL1 columns From 765072bd61a028e45d43373c01a46665f7b90a75 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Wed, 8 Feb 2023 13:29:21 +0000 Subject: [PATCH 007/200] Fix warnings? --- R/create_individual_file.R | 134 ++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e3caf9ae6..f0d9f7b02 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -67,13 +67,13 @@ add_cij_columns <- function(episode_file) { )) %>% dplyr::mutate( preventable_admissions = dplyr::if_else( - (cij_ppa == 1 & Distinct_CIJ == 1), + (.data$cij_ppa == 1 & .data$Distinct_CIJ == 1), 1, 0 ), preventable_beddays = dplyr::if_else( - (cij_ppa == 1 & Distinct_CIJ == 1), - as.numeric(cij_end_date - cij_start_date), + (.data$cij_ppa == 1 & .data$Distinct_CIJ == 1), + as.numeric(.data$cij_end_date - .data$cij_start_date), 0 ) ) @@ -84,32 +84,32 @@ add_cij_columns <- function(episode_file) { #' @description Add new columns based on SMRType and recid which follow a pattern #' of prefixed column names created based on some condition. #' -#' @inheritParams create_individual_filw +#' @inheritParams create_individual_file add_all_columns <- function(episode_file) { episode_file %>% - add_acute_columns("Acute", (smrtype == "Acute-DC" | smrtype == "Acute-IP") & cij_pattype != "Maternity") %>% - add_mat_columns("Mat", recid == "02B" | cij_pattype == "Maternity") %>% - add_mh_columns("MH", recid == "04B" & cij_pattype != "Maternity") %>% - add_gls_columns("GLS", smrtype == "GLS-IP") %>% - add_op_columns("OP", recid == "00B") %>% - add_ae_columns("AE", recid == "AE2") %>% - add_pis_columns("PIS", recid == "PIS") %>% - add_ooh_columns("OoH", recid == "OoH") %>% - add_dn_columns("DN", recid == "DN") %>% - add_cmh_columns("CMH", recid == "CMH") %>% - add_dd_columns("DD", recid == "DD") %>% - add_nsu_columns("NSU", recid == "NSU") %>% - add_nrs_columns("NRS", recid == "NRS") %>% - add_hl1_columns("HL1", recid == "HL1") %>% - add_ch_columns("CH", recid == "CH") %>% - add_hc_columns("HC", recid == "HC") %>% - add_at_columns("AT", recid == "AT") %>% - add_sds_columns("SDS", recid == "SDS") + add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>% + add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>% + add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>% + add_gls_columns("GLS", .data$smrtype == "GLS-IP") %>% + add_op_columns("OP", .data$recid == "00B") %>% + add_ae_columns("AE", .data$recid == "AE2") %>% + add_pis_columns("PIS", .data$recid == "PIS") %>% + add_ooh_columns("OoH", .data$recid == "OoH") %>% + add_dn_columns("DN", .data$recid == "DN") %>% + add_cmh_columns("CMH", .data$recid == "CMH") %>% + add_dd_columns("DD", .data$recid == "DD") %>% + add_nsu_columns("NSU", .data$recid == "NSU") %>% + add_nrs_columns("NRS", .data$recid == "NRS") %>% + add_hl1_columns("HL1", .data$recid == "HL1") %>% + add_ch_columns("CH", .data$recid == "CH") %>% + add_hc_columns("HC", .data$recid == "HC") %>% + add_at_columns("AT", .data$recid == "AT") %>% + add_sds_columns("SDS", .data$recid == "SDS") } #' Add Acute columns #' -#' @inheritParams create_individuaL_file +#' @inheritParams create_individual_file #' @param prefix Prefix to add to related columns, e.g. "Acute" #' @param condition Condition to create new columns based on add_acute_columns <- function(episode_file, prefix, condition) { @@ -160,13 +160,13 @@ add_op_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% dplyr::mutate( "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1, NA_real_), - "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), cost_total_net, NA_real_) + "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), .data$cost_total_net, NA_real_) ) condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8)) episode_file <- episode_file %>% dplyr::mutate( "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1, NA_real_), - "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), cost_total_net_incdnas, NA_real_) + "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_incdnas, NA_real_) ) return(episode_file) } @@ -178,7 +178,7 @@ add_ae_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), cost_total_net, NA_real_)) + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) } #' Add PIS columns @@ -188,7 +188,7 @@ add_pis_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), no_paid_items, NA_real_)) + dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_real_)) } #' Add OoH columns @@ -199,21 +199,21 @@ add_ooh_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% dplyr::mutate( - "{prefix}_homeV" := dplyr::if_else(eval(condition) & smrtype == "OOH-HomeV", 1, NA_real_), - "{prefix}_advice" := dplyr::if_else(eval(condition) & smrtype == "OOH-Advice", 1, NA_real_), - "{prefix}_DN" := dplyr::if_else(eval(condition) & smrtype == "OOH-DN", 1, NA_real_), - "{prefix}_NHS24" := dplyr::if_else(eval(condition) & smrtype == "OOH-NHS24", 1, NA_real_), - "{prefix}_other" := dplyr::if_else(eval(condition) & smrtype == "OOH-Other", 1, NA_real_), - "{prefix}_PCC" := dplyr::if_else(eval(condition) & smrtype == "OOH-PCC", 1, NA_real_), - ooh_covid_advice = dplyr::if_else(eval(condition) & smrtype == "OOH-C19Adv", 1, NA_real_), - ooh_covid_assessment = dplyr::if_else(eval(condition) & smrtype == "OOH-C19Ass", 1, NA_real_), - ooh_covid_other = dplyr::if_else(eval(condition) & smrtype == "OOH-C190th", 1, NA_real_) + "{prefix}_homeV" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-HomeV", 1, NA_real_), + "{prefix}_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Advice", 1, NA_real_), + "{prefix}_DN" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-DN", 1, NA_real_), + "{prefix}_NHS24" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-NHS24", 1, NA_real_), + "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1, NA_real_), + "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1, NA_real_), + ooh_covid_advice = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1, NA_real_), + ooh_covid_assessment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), + ooh_covid_other = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1, NA_real_) ) episode_file <- episode_file %>% dplyr::mutate( - OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(keytime2) + keydate2_dateformat) - (lubridate::seconds_to_period(keytime1) + keydate1_dateformat), units = "mins"), NA_real_), - OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, OoH_consultation_time) + OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$keydate2_dateformat) - (lubridate::seconds_to_period(.data$keytime1) + .data$keydate1_dateformat), units = "mins"), NA_real_), + OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time) ) return(episode_file) } @@ -225,7 +225,7 @@ add_dn_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% - dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), totalnodncontacts, NA_real_)) + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), .data$totalnodncontacts, NA_real_)) } #' Add CMH columns @@ -247,13 +247,13 @@ add_dd_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% dplyr::mutate( "{prefix}_NonCode9_episodes" := dplyr::if_else(eval(condition_delay), 1, NA_real_), - "{prefix}_NonCode9_beddays" := dplyr::if_else(eval(condition_delay), yearstay, NA_real_) + "{prefix}_NonCode9_beddays" := dplyr::if_else(eval(condition_delay), .data$yearstay, NA_real_) ) condition_delay_9 <- substitute(condition & primary_delay_reason == "9") episode_file <- episode_file %>% dplyr::mutate( "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1, NA_real_), - "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), yearstay, NA_real_) + "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), .data$yearstay, NA_real_) ) return(episode_file) } @@ -295,12 +295,12 @@ add_ch_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - ch_cis_episodes = dplyr::if_else(eval(condition), first_ch_ep, NA_real_), - ch_cost_per_day = dplyr::if_else(eval(condition) & yearstay > 0, cost_total_net / yearstay, NA_real_), - ch_cost_per_day = dplyr::if_else(eval(condition) & yearstay == 0, cost_total_net / yearstay, ch_cost_per_day), + ch_cis_episodes = dplyr::if_else(eval(condition), .data$first_ch_ep, NA_real_), + ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), + ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), keydate2_dateformat, lubridate::NA_Date_), - ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(sc_latest_submission), type = "date_first"), ch_ep_end) + ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_), + ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first"), .data$ch_ep_end) ) } @@ -312,29 +312,29 @@ add_hc_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE) %>% dplyr::mutate( - "{prefix}_total_hours" := dplyr::if_else(eval(condition), hc_hours_annual, NA_real_), - "{prefix}_total_cost" := dplyr::if_else(eval(condition), cost_total_net, NA_real_), + "{prefix}_total_hours" := dplyr::if_else(eval(condition), .data$hc_hours_annual, NA_real_), + "{prefix}_total_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_), ) condition_per <- substitute(condition & smrtype == "HC-Per") episode_file <- episode_file %>% dplyr::mutate( "{prefix}_personal_episodes" := dplyr::if_else(eval(condition_per), 1, NA_real_), - "{prefix}_personal_hours" := dplyr::if_else(eval(condition_per), HC_total_hours, NA_real_), - "{prefix}_personal_hours_cost" := dplyr::if_else(eval(condition_per), cost_total_net, NA_real_) + "{prefix}_personal_hours" := dplyr::if_else(eval(condition_per), .data$HC_total_hours, NA_real_), + "{prefix}_personal_hours_cost" := dplyr::if_else(eval(condition_per), .data$cost_total_net, NA_real_) ) condition_non_per <- substitute(condition & smrtype == "HC-Non-Per") episode_file <- episode_file %>% dplyr::mutate( "{prefix}_non_personal_episodes" := dplyr::if_else(eval(condition_non_per), 1, NA_real_), - "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), hc_hours_annual, NA_real_), - "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), cost_total_net, NA_real_) + "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), .data$hc_hours_annual, NA_real_), + "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), .data$cost_total_net, NA_real_) ) condition_reabl <- substitute(condition & hc_reablement == 1) episode_file <- episode_file %>% dplyr::mutate( "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1, NA_real_), - "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), hc_hours_annual, NA_real_), - "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), cost_total_net, NA_real_) + "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), .data$hc_hours_annual, NA_real_), + "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), .data$cost_total_net, NA_real_) ) } @@ -346,8 +346,8 @@ add_at_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - "{prefix}_alarms" := dplyr::if_else(eval(condition) & smrtype == "AT-Alarm", 1, NA_real_), - "{prefix}_telecare" := dplyr::if_else(eval(condition) & smrtype == "AT-Tele", 1, NA_real_) + "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1, NA_real_), + "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1, NA_real_) ) } @@ -359,10 +359,10 @@ add_sds_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - "{prefix}_option_1" := dplyr::if_else(eval(condition) & smrtype == "SDS-1", 1, NA_real_), - "{prefix}_option_2" := dplyr::if_else(eval(condition) & smrtype == "SDS-2", 1, NA_real_), - "{prefix}_option_3" := dplyr::if_else(eval(condition) & smrtype == "SDS-3", 1, NA_real_), - "{prefix}_option_4" := dplyr::if_else(eval(condition) & smrtype == "SDS-4", 1, NA_real_) + "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1, NA_real_), + "{prefix}_option_2" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-2", 1, NA_real_), + "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1, NA_real_), + "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1, NA_real_) ) } @@ -379,22 +379,22 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi episode_file <- episode_file %>% dplyr::mutate( "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1, NA_real_), - "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), yearstay, NA_real_) + "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), .data$yearstay, NA_real_) ) if (elective) { condition_el <- substitute(condition_i & cij_pattype == "Elective") episode_file <- episode_file %>% dplyr::mutate( "{prefix}_el_inpatient_episodes" := dplyr::if_else(eval(condition_el), 1, NA_real_), - "{prefix}_el_inpatient_beddays" := dplyr::if_else(eval(condition_el), yearstay, NA_real_), - "{prefix}_el_inpatient_cost" := dplyr::if_else(eval(condition_el), cost_total_net, NA_real_) + "{prefix}_el_inpatient_beddays" := dplyr::if_else(eval(condition_el), .data$yearstay, NA_real_), + "{prefix}_el_inpatient_cost" := dplyr::if_else(eval(condition_el), .data$cost_total_net, NA_real_) ) condition_non_el <- substitute(condition_i & cij_pattype == "Non-Elective") episode_file <- episode_file %>% dplyr::mutate( "{prefix}_non_el_inpatient_episodes" := dplyr::if_else(eval(condition_non_el), 1, NA_real_), - "{prefix}_non_el_inpatient_beddays" := dplyr::if_else(eval(condition_non_el), yearstay, NA_real_), - "{prefix}_non_el_inpatient_cost" := dplyr::if_else(eval(condition_non_el), cost_total_net, NA_real_) + "{prefix}_non_el_inpatient_beddays" := dplyr::if_else(eval(condition_non_el), .data$yearstay, NA_real_), + "{prefix}_non_el_inpatient_cost" := dplyr::if_else(eval(condition_non_el), .data$cost_total_net, NA_real_) ) } if (ipdc_d) { @@ -402,7 +402,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi episode_file <- episode_file %>% dplyr::mutate( "{prefix}_daycase_episodes" := dplyr::if_else(eval(condition_d), 1, NA_real_), - "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), cost_total_net, NA_real_) + "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), .data$cost_total_net, NA_real_) ) } return(episode_file) @@ -422,7 +422,7 @@ add_standard_cols <- function(episode_file, prefix, condition, drop = NULL, epis episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1, NA_real_)) } if (cost) { - episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), cost_total_net, NA_real_)) + episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) } return(episode_file) } From f466f66a96ad86abbe6844fa81ef625a1d1a7d22 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Wed, 8 Feb 2023 13:30:39 +0000 Subject: [PATCH 008/200] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4ac6f798f..f7307ce85 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,8 @@ Imports: stringr (>= 1.4.0), tibble (>= 3.1.7), tidyr (>= 1.2.0), - tidyselect (>= 1.2.0) + tidyselect (>= 1.2.0), + zoo (>= 1.8.0) Suggests: covr (>= 3.6.1), roxygen2 (>= 7.2.3), From 567be35ba2dee02df66219eea6d2fcc714712801 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Wed, 8 Feb 2023 13:34:11 +0000 Subject: [PATCH 009/200] Update documentation --- man/add_acute_columns.Rd | 2 ++ man/add_ae_columns.Rd | 2 ++ man/add_all_columns.Rd | 3 +++ man/add_at_columns.Rd | 2 ++ man/add_ch_columns.Rd | 2 ++ man/add_cmh_columns.Rd | 2 ++ man/add_dd_columns.Rd | 2 ++ man/add_dn_columns.Rd | 2 ++ man/add_gls_columns.Rd | 2 ++ man/add_hc_columns.Rd | 2 ++ man/add_hl1_columns.Rd | 2 ++ man/add_ipdc_cols.Rd | 2 ++ man/add_mat_columns.Rd | 2 ++ man/add_mh_columns.Rd | 2 ++ man/add_nrs_columns.Rd | 2 ++ man/add_nsu_columns.Rd | 2 ++ man/add_ooh_columns.Rd | 2 ++ man/add_op_columns.Rd | 2 ++ man/add_pis_columns.Rd | 2 ++ man/add_sds_columns.Rd | 2 ++ man/add_standard_cols.Rd | 2 ++ man/create_col.Rd | 2 ++ man/create_cols.Rd | 2 ++ 23 files changed, 47 insertions(+) diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd index db02a88a5..52ba071b6 100644 --- a/man/add_acute_columns.Rd +++ b/man/add_acute_columns.Rd @@ -7,6 +7,8 @@ add_acute_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd index 3c90fb4f1..9b7099513 100644 --- a/man/add_ae_columns.Rd +++ b/man/add_ae_columns.Rd @@ -7,6 +7,8 @@ add_ae_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd index ce0540864..d502e95c3 100644 --- a/man/add_all_columns.Rd +++ b/man/add_all_columns.Rd @@ -6,6 +6,9 @@ \usage{ add_all_columns(episode_file) } +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} \description{ Add new columns based on SMRType and recid which follow a pattern of prefixed column names created based on some condition. diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd index 5cb469eea..e05ea9101 100644 --- a/man/add_at_columns.Rd +++ b/man/add_at_columns.Rd @@ -7,6 +7,8 @@ add_at_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd index e0abfeaa0..4938f7690 100644 --- a/man/add_ch_columns.Rd +++ b/man/add_ch_columns.Rd @@ -7,6 +7,8 @@ add_ch_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd index ebb80c293..a1d82cba6 100644 --- a/man/add_cmh_columns.Rd +++ b/man/add_cmh_columns.Rd @@ -7,6 +7,8 @@ add_cmh_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd index 425169e70..08d9c0fe4 100644 --- a/man/add_dd_columns.Rd +++ b/man/add_dd_columns.Rd @@ -7,6 +7,8 @@ add_dd_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd index 0f97bd01f..bf6af008f 100644 --- a/man/add_dn_columns.Rd +++ b/man/add_dn_columns.Rd @@ -7,6 +7,8 @@ add_dn_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd index 4475fa5d0..e71dc755b 100644 --- a/man/add_gls_columns.Rd +++ b/man/add_gls_columns.Rd @@ -7,6 +7,8 @@ add_gls_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd index 60352e37b..95d8f1d3b 100644 --- a/man/add_hc_columns.Rd +++ b/man/add_hc_columns.Rd @@ -7,6 +7,8 @@ add_hc_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd index 03dcc2dac..7600db5e9 100644 --- a/man/add_hl1_columns.Rd +++ b/man/add_hl1_columns.Rd @@ -7,6 +7,8 @@ add_hl1_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd index 537f6d0ce..0f91cbd90 100644 --- a/man/add_ipdc_cols.Rd +++ b/man/add_ipdc_cols.Rd @@ -7,6 +7,8 @@ add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd index 2836faa2a..aae729323 100644 --- a/man/add_mat_columns.Rd +++ b/man/add_mat_columns.Rd @@ -7,6 +7,8 @@ add_mat_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd index 5c1279656..3c50c6cb8 100644 --- a/man/add_mh_columns.Rd +++ b/man/add_mh_columns.Rd @@ -7,6 +7,8 @@ add_mh_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd index 20076ce93..9d7b3f8bf 100644 --- a/man/add_nrs_columns.Rd +++ b/man/add_nrs_columns.Rd @@ -7,6 +7,8 @@ add_nrs_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd index 8518dc6dd..6a54bbcbf 100644 --- a/man/add_nsu_columns.Rd +++ b/man/add_nsu_columns.Rd @@ -7,6 +7,8 @@ add_nsu_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd index 5a9078259..01814ab6d 100644 --- a/man/add_ooh_columns.Rd +++ b/man/add_ooh_columns.Rd @@ -7,6 +7,8 @@ add_ooh_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd index 5fd8d78c7..08c4419e2 100644 --- a/man/add_op_columns.Rd +++ b/man/add_op_columns.Rd @@ -7,6 +7,8 @@ add_op_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd index b19178df7..b582acf2e 100644 --- a/man/add_pis_columns.Rd +++ b/man/add_pis_columns.Rd @@ -7,6 +7,8 @@ add_pis_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd index ec2a4668e..d5a5fb2cf 100644 --- a/man/add_sds_columns.Rd +++ b/man/add_sds_columns.Rd @@ -7,6 +7,8 @@ add_sds_columns(episode_file, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index 2e22df08a..becec0ddd 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -14,6 +14,8 @@ add_standard_cols( ) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} diff --git a/man/create_col.Rd b/man/create_col.Rd index 496057bc1..7357adf5d 100644 --- a/man/create_col.Rd +++ b/man/create_col.Rd @@ -7,6 +7,8 @@ create_col(episode_file, col, prefix, condition) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{col}{Which column to create ("DoB", "postcode", or "gpprac")} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/create_cols.Rd b/man/create_cols.Rd index d6540fa89..6bbe1d98a 100644 --- a/man/create_cols.Rd +++ b/man/create_cols.Rd @@ -7,6 +7,8 @@ create_cols(episode_file, prefix, condition, drop) } \arguments{ +\item{episode_file}{Tibble containing episodic data} + \item{prefix}{Prefix to add to related columns, e.g. "Acute"} \item{condition}{Condition to create new columns based on} From 90257a63688f9736a6036978756fb6f1ef564e64 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Thu, 9 Feb 2023 09:25:37 +0000 Subject: [PATCH 010/200] WIP --- R/create_individual_file.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f0d9f7b02..8259d126c 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -9,7 +9,10 @@ create_individual_file <- function(episode_file) { find_non_duplicates(.data$cij_marker, "Distinct_CIJ") %>% add_cij_columns() %>% find_non_duplicates(.data$ch_chi_cis, "first_ch_ep") %>% - add_all_columns() + add_all_columns() %>% + find_non_duplicates(.data$ooh_case_id, "unique_ooh_case") %>% + dplyr::mutate(unique_ooh_case = dplyr::if_else(recid != "OoH", 0, unique_ooh_case)) %>% + aggregate_cis_episodes() } #' Remove blank CHI @@ -469,3 +472,14 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { ) return(na_type) } + +aggregate_cis_episodes <- function(episode_file) { + episode_file %>% + dplyr::group_by(.data$chi, .data$ch_chi_cis == 1) %>% + dplyr::mutate( + ch_no_cost = max(.data$ch_no_cost), + ch_ep_start = min(.data$keydate1_dateformat), + ch_ep_end = max(.data$ch_ep_end), + ch_cost_per_day = mean(.data$ch_cost_per_day) + ) +} From b0b1460cfee0525739e34cb73ff8ef699eb030e9 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Thu, 9 Feb 2023 09:51:45 +0000 Subject: [PATCH 011/200] Add rowwise() as quarter not vectorised --- R/create_individual_file.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f0d9f7b02..5445ae623 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -299,9 +299,12 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_), + ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_)) %>% + dplyr::rowwise() %>% + dplyr::mutate( ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first"), .data$ch_ep_end) - ) + ) %>% + dplyr::ungroup() } #' Add HC columns @@ -469,3 +472,4 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { ) return(na_type) } + From 50f83739035e94d1f0538ee964f7e65e3e1e4074 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Thu, 9 Feb 2023 09:54:30 +0000 Subject: [PATCH 012/200] Style code --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5445ae623..4bc7e68d1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -299,7 +299,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_)) %>% + ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_) + ) %>% dplyr::rowwise() %>% dplyr::mutate( ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first"), .data$ch_ep_end) @@ -472,4 +473,3 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { ) return(na_type) } - From 0c426c5d9fb47840dd4d17c6406f27698c0de9a6 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Tue, 14 Feb 2023 11:06:47 +0000 Subject: [PATCH 013/200] Until L594 --- R/create_individual_file.R | 167 ++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 3 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b12971990..afd925b76 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -12,7 +12,10 @@ create_individual_file <- function(episode_file) { add_all_columns() %>% find_non_duplicates(.data$ooh_case_id, "unique_ooh_case") %>% dplyr::mutate(unique_ooh_case = dplyr::if_else(recid != "OoH", 0, unique_ooh_case)) %>% - aggregate_cis_episodes() + aggregate_cis_episodes() %>% + clean_up_ch() %>% + recode_gender() %>% + aggregate_by_chi() } #' Remove blank CHI @@ -305,7 +308,7 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_)) %>% dplyr::rowwise() %>% dplyr::mutate( - ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first"), .data$ch_ep_end) + ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first", fiscal_start = 4), .data$ch_ep_end) ) %>% dplyr::ungroup() } @@ -478,11 +481,169 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { aggregate_cis_episodes <- function(episode_file) { episode_file %>% - dplyr::group_by(.data$chi, .data$ch_chi_cis == 1) %>% + dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% dplyr::mutate( ch_no_cost = max(.data$ch_no_cost), ch_ep_start = min(.data$keydate1_dateformat), ch_ep_end = max(.data$ch_ep_end), ch_cost_per_day = mean(.data$ch_cost_per_day) + ) %>% + dplyr::ungroup() +} + +#' @inheritParams create_individual_file +clean_up_ch <- function(episode_file) { + episode_file %>% + dplyr::mutate( + fy_end = date_from_fy(year, "end") + 1, + fy_start = date_from_fy(year, "start")) %>% + dplyr::rowwise() %>% + dplyr::mutate( + term_1 = min(ch_ep_end, fy_end + 1), + term_2 = max(ch_ep_start, fy_start) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + ch_beddays = dplyr::if_else( + recid == "CH", + as.numeric(term_1 - term_2), + NA_real_ + ), + ch_cost = dplyr::if_else( + recid == "CH" & ch_no_cost == 0, + ch_beddays * ch_cost_per_day, + NA_real_ + ), + ch_beddays = dplyr::if_else( + recid == "CH" & first_ch_ep == 0, + 0, + ch_beddays + ), + ch_cost = dplyr::if_else( + recid == "CH" & first_ch_ep == 0, + 0, + ch_cost + ) + ) %>% + dplyr::select( + -fy_end, -fy_start, -term_1, -term_2 ) } + +date_from_fy <- function(financial_year, type = c("start", "end")) { + match.arg(type) + n <- switch(type, + "start" = 0, + "end" = 2) + year = as.numeric(paste0("20", substr(financial_year, 1 + n, 2 + n))) + if (type == "start") { + date <- lubridate::make_date(year, 4, 1) + return(date) + } + date <- lubridate::make_date(year, 3, 31) + return(date) +} + +recode_gender <- function(episode_file) { + episode_file %>% + dplyr::mutate( + gender = dplyr::if_else( + gender == 0 | gender == 9, + 1.5, + gender + ) + ) +} + +aggregate_by_chi <- function(episode_file) { + episode_file %>% + dplyr::arrange(chi, + keydate1_dateformat, + keytime1, + keydate2_dateformat, + keytime2) %>% + dplyr::group_by(chi) %>% + dplyr::summarise( + gender = mean(gender), + dplyr::across(dplyr::ends_with(c( + "postcode", "DoB", "gpprac" + )), ~ dplyr::last(., na_rm = TRUE)), + dplyr::across( + c( + "CIJ_el", + "CIJ_non_el", + "CIJ_mat", + "cij_delay", + dplyr::ends_with( + c( + "episodes", + "beddays", + "cost", + "attendances", + "case", + "attend", + "contacts", + "hours", + "alarms", + "telecare", + "paid_items", + "advice", + "homeV", + "time", + "admissions" + ) + ), + dplyr::starts_with("SDS_option") + ), + ~ sum(., na.rm = TRUE) + ), + dplyr::across( + c( + dplyr::starts_with("sc_"), + -"sc_send_lca", + -"sc_latest_submission", + "hh_in_FY", + "NSU" + ), + ~ max(., na.rm = TRUE) + ), + dplyr::across( + c(condition_cols(), + dplyr::ends_with(c( + "_Cohort", "end_fy", "start_fy" + )),), + ~ dplyr::first(., na_rm = TRUE) + ) + ) +} + + +conditions_cols <- function() { + condition_cols <- c( + "arth", + "asthma", + "atrialfib", + "cancer", + "cvd", + "liver", + "copd", + "dementia", + "diabetes", + "epilepsy", + "chd", + "hefailure", + "ms", + "parkinsons", + "refailure", + "congen" , + "bloodbfo", + "endomet", + "digestive" + ) + date_cols <- paste0(conditions, "_date") + all_cols <- c(condition_cols, date_cols) + return(all_cols) +} + + +# need to rename: OoH_cases, HL1_in_FY From 7f17ef24ff08cee78af16c4e90f9917045e261f9 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Thu, 16 Feb 2023 10:32:10 +0000 Subject: [PATCH 014/200] Converted until L677 --- R/create_individual_file.R | 128 +++++++++++++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 12 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index afd925b76..ff84fc98f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -15,7 +15,8 @@ create_individual_file <- function(episode_file) { aggregate_cis_episodes() %>% clean_up_ch() %>% recode_gender() %>% - aggregate_by_chi() + aggregate_by_chi() %>% + clean_individual_file() } #' Remove blank CHI @@ -212,7 +213,7 @@ add_ooh_columns <- function(episode_file, prefix, condition) { "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1, NA_real_), "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1, NA_real_), ooh_covid_advice = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1, NA_real_), - ooh_covid_assessment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), + ooh_covid_assesment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), ooh_covid_other = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1, NA_real_) ) @@ -387,6 +388,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi condition_i <- substitute(eval(condition) & ipdc == "I") episode_file <- episode_file %>% dplyr::mutate( + "{prefix}_inpatient_cost" := dplyr::if_else(eval(condition_i), .data$cost_total_net, NA_real_), "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1, NA_real_), "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), .data$yearstay, NA_real_) ) @@ -530,17 +532,21 @@ clean_up_ch <- function(episode_file) { ) } -date_from_fy <- function(financial_year, type = c("start", "end")) { +date_from_fy <- function(financial_year, type = c("start", "end", "mid")) { match.arg(type) n <- switch(type, "start" = 0, + "mid" = 0, "end" = 2) year = as.numeric(paste0("20", substr(financial_year, 1 + n, 2 + n))) if (type == "start") { date <- lubridate::make_date(year, 4, 1) return(date) + } else if (type == "end") { + date <- lubridate::make_date(year, 3, 31) + return(date) } - date <- lubridate::make_date(year, 3, 31) + date <- lubridate::make_date(year, 9, 30) return(date) } @@ -574,13 +580,13 @@ aggregate_by_chi <- function(episode_file) { "CIJ_non_el", "CIJ_mat", "cij_delay", + "OoH_cases" = "unique_ooh_case", dplyr::ends_with( c( "episodes", "beddays", "cost", "attendances", - "case", "attend", "contacts", "hours", @@ -590,7 +596,13 @@ aggregate_by_chi <- function(episode_file) { "advice", "homeV", "time", - "admissions" + "admissions", + "assesment", + "other", + "DN", + "NHS24", + "PCC", + "_dnas" ) ), dplyr::starts_with("SDS_option") @@ -602,13 +614,16 @@ aggregate_by_chi <- function(episode_file) { dplyr::starts_with("sc_"), -"sc_send_lca", -"sc_latest_submission", - "hh_in_FY", + "HL1_in_FY" = "hh_in_fy", "NSU" ), - ~ max(., na.rm = TRUE) + ~ max_no_inf(.) ), dplyr::across( c(condition_cols(), + "death_date", + "deceased", + "year", dplyr::ends_with(c( "_Cohort", "end_fy", "start_fy" )),), @@ -618,8 +633,8 @@ aggregate_by_chi <- function(episode_file) { } -conditions_cols <- function() { - condition_cols <- c( +condition_cols <- function() { + conditions <- c( "arth", "asthma", "atrialfib", @@ -641,9 +656,98 @@ conditions_cols <- function() { "digestive" ) date_cols <- paste0(conditions, "_date") - all_cols <- c(condition_cols, date_cols) + all_cols <- c(conditions, date_cols) return(all_cols) } +max_no_inf <- function(x) { + ifelse(!all(is.na(x)), max(x, na.rm = TRUE), NA) +} + +clean_individual_file <- function(individual_file) { + individual_file %>% + drop_cols() %>% + clean_up_gender() %>% + clean_up_dob() +} + +drop_cols <- function(individual_file) { + individual_file %>% + dplyr::select( + -month_cols(), + -"ch_no_cost", + -"dob", + -"postcode", + -"gpprac", + -"no_paid_items", + -"totalnodncontacts" + ) +} + +month_cols <- function() { + suffix <- c("_beddays", "_cost") + months <- tolower(c(rep(month.abb, each = 2))) + month_cols <- paste0(months, suffix) + return(month_cols) +} + +clean_up_gender <- function(individual_file) { + individual_file %>% + dplyr::mutate( + gender = dplyr::case_when( + gender != 1.5 ~ round(gender), + as.numeric(substr(chi_subset, 9, 9)) %% 2 == 1 ~ 1, + TRUE ~ 2 + ), + gender = dplyr::case_when( + gender == 1 ~ "Male", + gender == 2 ~ "Female" + ) + ) +} + +clean_up_dob <- function(individual_file) { + individual_file %>% + dplyr::mutate( + chi_dob_1 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "19", substr(.data$chi, 5, 6))), + chi_dob_2 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "20", substr(.data$chi, 5, 6))), + chi_age_1 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_1), date_from_fy(year, "mid")), "years"), # date_from_fy(year, "mid") - lubridate::ymd(.data$chi_dob_1) + chi_age_2 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_2), date_from_fy(year, "mid")), "years") # date_from_fy(year, "mid") - lubridate::ymd(.data$chi_dob_2) + ) %>% + dplyr::rowwise() %>% + dplyr::mutate( + dob_condition_1 = .data$chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB"))), + dob_condition_2 = .data$chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB"))), + dob_condition_3 = .data$chi_dob_2 > min(lubridate::today(), date_from_fy(year, "end")), + dob_condition_4 = .data$chi_dob_2 > min(dplyr::pick(.data$arth_date:.data$death_date)), + dob_condition_5 = .data$congen_date %in% c(.data$chi_dob_1, .data$chi_dob_2) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + DoB = dplyr::case_when( + .data$dob_condition_1 ~ .data$chi_dob_1, + .data$dob_condition_2 ~ .data$chi_dob_2 + ) + ) %>% + dplyr::mutate( + DoB = dplyr::case_when( + is.na(.data$DoB) & is.na(.data$chi_dob_1) & !is.na(.data$chi_dob_2) ~ .data$chi_dob_2, + is.na(.data$DoB) & is.na(.data$chi_dob_2) & !is.na(.data$chi_dob_1) ~ .data$chi_dob_1, + is.na(.data$DoB) & .data$chi_age_2 < 0 ~ .data$chi_dob_1, + is.na(.data$DoB) & .data$dob_condition_3 ~ .data$chi_dob_1, + is.na(.data$DoB) & .data$dob_condition_4 ~ .data$chi_dob_1, + is.na(.data$DoB) & .data$dob_condition_5 ~ .data$congen_date, + is.na(.data$DoB) & .data$chi_age_1 > 115 ~ .data$chi_dob_2 + ) + ) %>% + dplyr::select( + -dplyr::starts_with(c("dob_condition_", "chi_dob_", "chi_age_")) + ) +} + +clean_up_age <- function(individual_file) { + individual_file %>% + dplyr::mutate( -# need to rename: OoH_cases, HL1_in_FY + ) +} From 52a4ffdb50523e276352ef3ff4af9434e528074f Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Fri, 17 Feb 2023 10:35:03 +0000 Subject: [PATCH 015/200] Until L731 --- R/create_individual_file.R | 152 +++++++++++++++++++++++++++++++++---- 1 file changed, 139 insertions(+), 13 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ff84fc98f..352c26e95 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -12,7 +12,7 @@ create_individual_file <- function(episode_file) { add_all_columns() %>% find_non_duplicates(.data$ooh_case_id, "unique_ooh_case") %>% dplyr::mutate(unique_ooh_case = dplyr::if_else(recid != "OoH", 0, unique_ooh_case)) %>% - aggregate_cis_episodes() %>% + aggregate_ch_episodes() %>% clean_up_ch() %>% recode_gender() %>% aggregate_by_chi() %>% @@ -481,7 +481,12 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { return(na_type) } -aggregate_cis_episodes <- function(episode_file) { +#' Aggregate CIS episodes +#' +#' @description Aggregate CH variables by CHI and CIS. +#' +#' @inheritParams create_individual_file +aggregate_ch_episodes <- function(episode_file) { episode_file %>% dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% dplyr::mutate( @@ -493,6 +498,10 @@ aggregate_cis_episodes <- function(episode_file) { dplyr::ungroup() } +#' Clean up CH +#' +#' @description Clean up CH-related columns. +#' #' @inheritParams create_individual_file clean_up_ch <- function(episode_file) { episode_file %>% @@ -532,6 +541,12 @@ clean_up_ch <- function(episode_file) { ) } +#' Date from FY +#' +#' @description Return start, mid, or end date from financial year in format "2122". +#' +#' @param financial_year Financial year represented in "YYYY" format e.g. "2122" +#' @param type One of "start", "end", and "mid", representing the date to return date_from_fy <- function(financial_year, type = c("start", "end", "mid")) { match.arg(type) n <- switch(type, @@ -550,6 +565,11 @@ date_from_fy <- function(financial_year, type = c("start", "end", "mid")) { return(date) } +#' Recode gender +#' +#' @description Recode gender to 1.5 if 0 or 9. +#' +#' @inheritParams create_individual_file recode_gender <- function(episode_file) { episode_file %>% dplyr::mutate( @@ -561,6 +581,12 @@ recode_gender <- function(episode_file) { ) } +#' Aggregate by CHI +#' +#' @description Aggregate episode file by CHI to convert into +#' individual file. +#' +#' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file) { episode_file %>% dplyr::arrange(chi, @@ -632,7 +658,11 @@ aggregate_by_chi <- function(episode_file) { ) } - +#' Condition columns +#' +#' @description Returns chr vector of column names +#' which follow format "condition" and "condition_date" e.g. +#' "dementia" and "dementia_date" condition_cols <- function() { conditions <- c( "arth", @@ -660,17 +690,48 @@ condition_cols <- function() { return(all_cols) } +#' Custom maximum +#' +#' @description Custom maximum function which removes +#' missing values but doesn't return Inf if all values +#' are missing (instead returns NA) +#' +#' @param x Vector to return max of max_no_inf <- function(x) { ifelse(!all(is.na(x)), max(x, na.rm = TRUE), NA) } +#' Custom minimum +#' +#' @description Custom minimum function which removes +#' missing values but doesn't return Inf if all values +#' are missing (instead returns NA) +#' +#' @param x Vector to return min of +min_no_inf <- function(x) { + ifelse(!all(is.na(x)), min(x, na.rm = TRUE), NA) +} + +#' Clean individual file +#' +#' @description Clean up columns in individual file +#' +#' @param individual_file Individual file where each row represents a unique CHI clean_individual_file <- function(individual_file) { individual_file %>% drop_cols() %>% clean_up_gender() %>% - clean_up_dob() + clean_up_dob() %>% + dplyr::mutate( + age = floor(as.numeric(lubridate::interval(.data$DoB, date_from_fy(year, "mid")), "years")) + ) } +#' Drop redundant columns +#' +#' @description Drop redundant columns from individual file. +#' +#' @inheritParams clean_individual_file drop_cols <- function(individual_file) { individual_file %>% dplyr::select( @@ -684,6 +745,10 @@ drop_cols <- function(individual_file) { ) } +#' Month columns +#' +#' @description Return chr of column names following pattern +#' "month_beddays" and "month_cost" e.g. apr_beddays" and "apr_cost" month_cols <- function() { suffix <- c("_beddays", "_cost") months <- tolower(c(rep(month.abb, each = 2))) @@ -691,35 +756,45 @@ month_cols <- function() { return(month_cols) } +#' Clean up gender column +#' +#' @description Clean up column containing gender. +#' +#' @inheritParams clean_individual_file clean_up_gender <- function(individual_file) { individual_file %>% dplyr::mutate( gender = dplyr::case_when( - gender != 1.5 ~ round(gender), - as.numeric(substr(chi_subset, 9, 9)) %% 2 == 1 ~ 1, + .data$gender != 1.5 ~ round(.data$gender), + as.numeric(substr(.data$chi, 9, 9)) %% 2 == 1 ~ 1, TRUE ~ 2 ), gender = dplyr::case_when( - gender == 1 ~ "Male", - gender == 2 ~ "Female" + .data$gender == 1 ~ "Male", + .data$gender == 2 ~ "Female" ) ) } +#' Clean up date of birth column +#' +#' @description Clean up column containing date of birth. +#' +#' @inheritParams clean_individual_file clean_up_dob <- function(individual_file) { individual_file %>% dplyr::mutate( chi_dob_1 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "19", substr(.data$chi, 5, 6))), chi_dob_2 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "20", substr(.data$chi, 5, 6))), - chi_age_1 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_1), date_from_fy(year, "mid")), "years"), # date_from_fy(year, "mid") - lubridate::ymd(.data$chi_dob_1) - chi_age_2 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_2), date_from_fy(year, "mid")), "years") # date_from_fy(year, "mid") - lubridate::ymd(.data$chi_dob_2) + chi_age_1 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_1), date_from_fy(year, "mid")), "years"), + chi_age_2 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_2), date_from_fy(year, "mid")), "years") ) %>% dplyr::rowwise() %>% dplyr::mutate( dob_condition_1 = .data$chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB"))), dob_condition_2 = .data$chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB"))), dob_condition_3 = .data$chi_dob_2 > min(lubridate::today(), date_from_fy(year, "end")), - dob_condition_4 = .data$chi_dob_2 > min(dplyr::pick(.data$arth_date:.data$death_date)), + dob_condition_4 = unclass(.data$chi_dob_2) > min_no_inf(as.numeric(dplyr::pick(.data$arth_date:.data$death_date))), dob_condition_5 = .data$congen_date %in% c(.data$chi_dob_1, .data$chi_dob_2) ) %>% dplyr::ungroup() %>% @@ -737,17 +812,68 @@ clean_up_dob <- function(individual_file) { is.na(.data$DoB) & .data$dob_condition_3 ~ .data$chi_dob_1, is.na(.data$DoB) & .data$dob_condition_4 ~ .data$chi_dob_1, is.na(.data$DoB) & .data$dob_condition_5 ~ .data$congen_date, - is.na(.data$DoB) & .data$chi_age_1 > 115 ~ .data$chi_dob_2 + is.na(.data$DoB) & .data$chi_age_1 > 115 ~ .data$chi_dob_2, + TRUE ~ .data$DoB ) ) %>% + fill_dob() %>% dplyr::select( -dplyr::starts_with(c("dob_condition_", "chi_dob_", "chi_age_")) ) } -clean_up_age <- function(individual_file) { +#' Fill missing date of births +#' +#' @description Fill missing date of births with +#' date of births from specific episode columns in hierarchy. +#' +#' @inheritParams clean_individual_file +fill_dob <- function(individual_file) { + column_prefix <- c("PIS", "AE", "OoH", "OP", "Acute", "Mat", "DN", "CMH", "MH", + "GLS", "HL1", "CH", "HC", "AT", "SDS", "NSU", "NRS") + columns <- paste0(column_prefix, "_DoB") + for (i in length(columns)) { + individual_file = replace_dob_with_col(individual_file, columns[i]) + } + return(individual_file) +} + +#' Fill missing date of births +#' +#' @description Fill missing date of births with +#' date of births from an episode date of birth column. +#' +#' @inheritParams clean_individual_file +#' @param col Column containing date of birth for episode +replace_dob_with_col <- function(individual_file, col) { individual_file %>% dplyr::mutate( + DoB = dplyr::if_else( + is.na(.data$DoB) & !is.na(.data[[col]]), + .data[[col]], + .data$DoB + ) + ) +} +# WIP function to clean up postcodes L721-L805 Of D01 Make Individual File.sps +clean_up_postcode <- function(individual_file) { + postcode_lookup <- readr::read_rds(get_slf_postcode_path()) + testy2= testy %>% + dplyr::mutate( + all_blank = dplyr::if_else( + all(is.na(dplyr::pick(dplyr::ends_with("_postcode")))), + 1, + 0 + ) + ) %>% + dplyr::mutate( + HL1_postcode = dplyr::if_else( + all_blank == 1, + "XXX XXX", + .data$HL1_postcode + ) ) + } + From 3223189890568f0095e209e765f0dc4a4a7a41b6 Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Fri, 17 Feb 2023 10:41:44 +0000 Subject: [PATCH 016/200] Update documentation --- man/aggregate_by_chi.Rd | 15 +++++++++++++++ man/aggregate_ch_episodes.Rd | 14 ++++++++++++++ man/clean_individual_file.Rd | 14 ++++++++++++++ man/clean_up_ch.Rd | 14 ++++++++++++++ man/clean_up_dob.Rd | 14 ++++++++++++++ man/clean_up_gender.Rd | 14 ++++++++++++++ man/condition_cols.Rd | 13 +++++++++++++ man/date_from_fy.Rd | 16 ++++++++++++++++ man/drop_cols.Rd | 14 ++++++++++++++ man/fill_dob.Rd | 15 +++++++++++++++ man/max_no_inf.Rd | 16 ++++++++++++++++ man/min_no_inf.Rd | 16 ++++++++++++++++ man/month_cols.Rd | 12 ++++++++++++ man/recode_gender.Rd | 14 ++++++++++++++ man/replace_dob_with_col.Rd | 17 +++++++++++++++++ 15 files changed, 218 insertions(+) create mode 100644 man/aggregate_by_chi.Rd create mode 100644 man/aggregate_ch_episodes.Rd create mode 100644 man/clean_individual_file.Rd create mode 100644 man/clean_up_ch.Rd create mode 100644 man/clean_up_dob.Rd create mode 100644 man/clean_up_gender.Rd create mode 100644 man/condition_cols.Rd create mode 100644 man/date_from_fy.Rd create mode 100644 man/drop_cols.Rd create mode 100644 man/fill_dob.Rd create mode 100644 man/max_no_inf.Rd create mode 100644 man/min_no_inf.Rd create mode 100644 man/month_cols.Rd create mode 100644 man/recode_gender.Rd create mode 100644 man/replace_dob_with_col.Rd diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd new file mode 100644 index 000000000..73804ad9b --- /dev/null +++ b/man/aggregate_by_chi.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{aggregate_by_chi} +\alias{aggregate_by_chi} +\title{Aggregate by CHI} +\usage{ +aggregate_by_chi(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate episode file by CHI to convert into +individual file. +} diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd new file mode 100644 index 000000000..2753da14f --- /dev/null +++ b/man/aggregate_ch_episodes.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{aggregate_ch_episodes} +\alias{aggregate_ch_episodes} +\title{Aggregate CIS episodes} +\usage{ +aggregate_ch_episodes(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate CH variables by CHI and CIS. +} diff --git a/man/clean_individual_file.Rd b/man/clean_individual_file.Rd new file mode 100644 index 000000000..30d5479c6 --- /dev/null +++ b/man/clean_individual_file.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_individual_file} +\alias{clean_individual_file} +\title{Clean individual file} +\usage{ +clean_individual_file(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Clean up columns in individual file +} diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd new file mode 100644 index 000000000..64bb3e330 --- /dev/null +++ b/man/clean_up_ch.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_ch} +\alias{clean_up_ch} +\title{Clean up CH} +\usage{ +clean_up_ch(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Clean up CH-related columns. +} diff --git a/man/clean_up_dob.Rd b/man/clean_up_dob.Rd new file mode 100644 index 000000000..4b9003726 --- /dev/null +++ b/man/clean_up_dob.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_dob} +\alias{clean_up_dob} +\title{Clean up date of birth column} +\usage{ +clean_up_dob(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Clean up column containing date of birth. +} diff --git a/man/clean_up_gender.Rd b/man/clean_up_gender.Rd new file mode 100644 index 000000000..edf05bfc8 --- /dev/null +++ b/man/clean_up_gender.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_gender} +\alias{clean_up_gender} +\title{Clean up gender column} +\usage{ +clean_up_gender(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Clean up column containing gender. +} diff --git a/man/condition_cols.Rd b/man/condition_cols.Rd new file mode 100644 index 000000000..ba037a609 --- /dev/null +++ b/man/condition_cols.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{condition_cols} +\alias{condition_cols} +\title{Condition columns} +\usage{ +condition_cols() +} +\description{ +Returns chr vector of column names +which follow format "condition" and "condition_date" e.g. +"dementia" and "dementia_date" +} diff --git a/man/date_from_fy.Rd b/man/date_from_fy.Rd new file mode 100644 index 000000000..cc3b8f8a3 --- /dev/null +++ b/man/date_from_fy.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{date_from_fy} +\alias{date_from_fy} +\title{Date from FY} +\usage{ +date_from_fy(financial_year, type = c("start", "end", "mid")) +} +\arguments{ +\item{financial_year}{Financial year represented in "YYYY" format e.g. "2122"} + +\item{type}{One of "start", "end", and "mid", representing the date to return} +} +\description{ +Return start, mid, or end date from financial year in format "2122". +} diff --git a/man/drop_cols.Rd b/man/drop_cols.Rd new file mode 100644 index 000000000..8029d289c --- /dev/null +++ b/man/drop_cols.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{drop_cols} +\alias{drop_cols} +\title{Drop redundant columns} +\usage{ +drop_cols(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Drop redundant columns from individual file. +} diff --git a/man/fill_dob.Rd b/man/fill_dob.Rd new file mode 100644 index 000000000..99d3c03bd --- /dev/null +++ b/man/fill_dob.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{fill_dob} +\alias{fill_dob} +\title{Fill missing date of births} +\usage{ +fill_dob(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Fill missing date of births with +date of births from specific episode columns in hierarchy. +} diff --git a/man/max_no_inf.Rd b/man/max_no_inf.Rd new file mode 100644 index 000000000..79b9a1057 --- /dev/null +++ b/man/max_no_inf.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{max_no_inf} +\alias{max_no_inf} +\title{Custom maximum} +\usage{ +max_no_inf(x) +} +\arguments{ +\item{x}{Vector to return max of} +} +\description{ +Custom maximum function which removes +missing values but doesn't return Inf if all values +are missing (instead returns NA) +} diff --git a/man/min_no_inf.Rd b/man/min_no_inf.Rd new file mode 100644 index 000000000..38029214f --- /dev/null +++ b/man/min_no_inf.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{min_no_inf} +\alias{min_no_inf} +\title{Custom minimum} +\usage{ +min_no_inf(x) +} +\arguments{ +\item{x}{Vector to return min of} +} +\description{ +Custom minimum function which removes +missing values but doesn't return Inf if all values +are missing (instead returns NA) +} diff --git a/man/month_cols.Rd b/man/month_cols.Rd new file mode 100644 index 000000000..b8dd641e5 --- /dev/null +++ b/man/month_cols.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{month_cols} +\alias{month_cols} +\title{Month columns} +\usage{ +month_cols() +} +\description{ +Return chr of column names following pattern +"month_beddays" and "month_cost" e.g. apr_beddays" and "apr_cost" +} diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd new file mode 100644 index 000000000..526d2829d --- /dev/null +++ b/man/recode_gender.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{recode_gender} +\alias{recode_gender} +\title{Recode gender} +\usage{ +recode_gender(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Recode gender to 1.5 if 0 or 9. +} diff --git a/man/replace_dob_with_col.Rd b/man/replace_dob_with_col.Rd new file mode 100644 index 000000000..61016ec2e --- /dev/null +++ b/man/replace_dob_with_col.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{replace_dob_with_col} +\alias{replace_dob_with_col} +\title{Fill missing date of births} +\usage{ +replace_dob_with_col(individual_file, col) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} + +\item{col}{Column containing date of birth for episode} +} +\description{ +Fill missing date of births with +date of births from an episode date of birth column. +} From 1c67f20269cf1459bca51614df0706c9347e55c1 Mon Sep 17 00:00:00 2001 From: Mandy Norrbo Date: Fri, 17 Feb 2023 10:43:29 +0000 Subject: [PATCH 017/200] Remove test ref --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 352c26e95..5f6b8acfe 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -859,7 +859,7 @@ replace_dob_with_col <- function(individual_file, col) { # WIP function to clean up postcodes L721-L805 Of D01 Make Individual File.sps clean_up_postcode <- function(individual_file) { postcode_lookup <- readr::read_rds(get_slf_postcode_path()) - testy2= testy %>% + individual_file %>% dplyr::mutate( all_blank = dplyr::if_else( all(is.na(dplyr::pick(dplyr::ends_with("_postcode")))), From b14e4b2175639b09e66325dff00a71d551b2523c Mon Sep 17 00:00:00 2001 From: jr-mandy Date: Fri, 17 Feb 2023 10:48:52 +0000 Subject: [PATCH 018/200] Style code --- R/create_individual_file.R | 51 ++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5f6b8acfe..0d598a38e 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -306,7 +306,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_)) %>% + ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_) + ) %>% dplyr::rowwise() %>% dplyr::mutate( ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first", fiscal_start = 4), .data$ch_ep_end) @@ -506,12 +507,13 @@ aggregate_ch_episodes <- function(episode_file) { clean_up_ch <- function(episode_file) { episode_file %>% dplyr::mutate( - fy_end = date_from_fy(year, "end") + 1, - fy_start = date_from_fy(year, "start")) %>% + fy_end = date_from_fy(year, "end") + 1, + fy_start = date_from_fy(year, "start") + ) %>% dplyr::rowwise() %>% dplyr::mutate( - term_1 = min(ch_ep_end, fy_end + 1), - term_2 = max(ch_ep_start, fy_start) + term_1 = min(ch_ep_end, fy_end + 1), + term_2 = max(ch_ep_start, fy_start) ) %>% dplyr::ungroup() %>% dplyr::mutate( @@ -550,10 +552,11 @@ clean_up_ch <- function(episode_file) { date_from_fy <- function(financial_year, type = c("start", "end", "mid")) { match.arg(type) n <- switch(type, - "start" = 0, - "mid" = 0, - "end" = 2) - year = as.numeric(paste0("20", substr(financial_year, 1 + n, 2 + n))) + "start" = 0, + "mid" = 0, + "end" = 2 + ) + year <- as.numeric(paste0("20", substr(financial_year, 1 + n, 2 + n))) if (type == "start") { date <- lubridate::make_date(year, 4, 1) return(date) @@ -589,11 +592,13 @@ recode_gender <- function(episode_file) { #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file) { episode_file %>% - dplyr::arrange(chi, - keydate1_dateformat, - keytime1, - keydate2_dateformat, - keytime2) %>% + dplyr::arrange( + chi, + keydate1_dateformat, + keytime1, + keydate2_dateformat, + keytime2 + ) %>% dplyr::group_by(chi) %>% dplyr::summarise( gender = mean(gender), @@ -646,13 +651,15 @@ aggregate_by_chi <- function(episode_file) { ~ max_no_inf(.) ), dplyr::across( - c(condition_cols(), + c( + condition_cols(), "death_date", "deceased", "year", dplyr::ends_with(c( "_Cohort", "end_fy", "start_fy" - )),), + )), + ), ~ dplyr::first(., na_rm = TRUE) ) ) @@ -680,7 +687,7 @@ condition_cols <- function() { "ms", "parkinsons", "refailure", - "congen" , + "congen", "bloodbfo", "endomet", "digestive" @@ -829,11 +836,13 @@ clean_up_dob <- function(individual_file) { #' #' @inheritParams clean_individual_file fill_dob <- function(individual_file) { - column_prefix <- c("PIS", "AE", "OoH", "OP", "Acute", "Mat", "DN", "CMH", "MH", - "GLS", "HL1", "CH", "HC", "AT", "SDS", "NSU", "NRS") + column_prefix <- c( + "PIS", "AE", "OoH", "OP", "Acute", "Mat", "DN", "CMH", "MH", + "GLS", "HL1", "CH", "HC", "AT", "SDS", "NSU", "NRS" + ) columns <- paste0(column_prefix, "_DoB") for (i in length(columns)) { - individual_file = replace_dob_with_col(individual_file, columns[i]) + individual_file <- replace_dob_with_col(individual_file, columns[i]) } return(individual_file) } @@ -874,6 +883,4 @@ clean_up_postcode <- function(individual_file) { .data$HL1_postcode ) ) - } - From f31f19bff6584def710d931f72ec7d2781e69f78 Mon Sep 17 00:00:00 2001 From: "shintoLampgit config --global user.email bateman.mcbride@phs.scotm git config --global user.name shintoLamp" Date: Thu, 2 Mar 2023 17:17:56 +0000 Subject: [PATCH 019/200] WIP writing functions to fill postcode in line with previous DOB functions --- R/create_individual_file.R | 56 ++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 0d598a38e..f2f32ee5d 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -731,7 +731,8 @@ clean_individual_file <- function(individual_file) { clean_up_dob() %>% dplyr::mutate( age = floor(as.numeric(lubridate::interval(.data$DoB, date_from_fy(year, "mid")), "years")) - ) + ) %>% + clean_up_postcode() } #' Drop redundant columns @@ -865,18 +866,19 @@ replace_dob_with_col <- function(individual_file, col) { ) } -# WIP function to clean up postcodes L721-L805 Of D01 Make Individual File.sps +#' Clean up postcode column +#' +#' @description Clean up column containing postcode. +#' +#' @inheritParams clean_individual_file clean_up_postcode <- function(individual_file) { postcode_lookup <- readr::read_rds(get_slf_postcode_path()) individual_file %>% dplyr::mutate( - all_blank = dplyr::if_else( - all(is.na(dplyr::pick(dplyr::ends_with("_postcode")))), - 1, - 0 - ) - ) %>% - dplyr::mutate( + # all_blank is TRUE when all postcode variables are blank + all_blank = all(is.na(dplyr::pick(dplyr::ends_with("_postcode")))), + # Use NRS_postcode to store the dummy for no other reason than it's last + # in the hierarchy HL1_postcode = dplyr::if_else( all_blank == 1, "XXX XXX", @@ -884,3 +886,39 @@ clean_up_postcode <- function(individual_file) { ) ) } + +#' Fill missing postcodes +#' +#' @description Fill missing postcodes with +#' postcodes from specific episode columns in hierarchy. +#' +#' @inheritParams clean_individual_file +fill_dob <- function(individual_file) { + column_prefix <- c( + "PIS", "AE", "OoH", "OP", "Acute", "Mat", "HC", "DN", "CMH", "MH", + "GLS", "AT", "SDS", "CH", "NSU", "NRS", "HL1" + ) + columns <- paste0(column_prefix, "_postcode") + for (i in length(columns)) { + individual_file <- replace_postcode_with_col(individual_file, columns[i]) + } + return(individual_file) +} + +#' Fill missing postcode +#' +#' @description Fill missing postcode with +#' postcodes from an episode postcode column. +#' +#' @inheritParams clean_individual_file +#' @param col Column containing postcode for episode +replace_postcode_with_col <- function(individual_file, col) { + individual_file %>% + dplyr::mutate( + postcode = dplyr::if_else( + is.na(.data$postcode) & !is.na(.data[[col]]), + .data[[col]], + .data$postcode + ) + ) +} From 891c9a9fedf76a1ef4074cecb7961a1148dd78c9 Mon Sep 17 00:00:00 2001 From: shintoLamp Date: Thu, 2 Mar 2023 17:21:35 +0000 Subject: [PATCH 020/200] Update documentation --- man/clean_up_postcode.Rd | 14 ++++++++++++++ man/fill_dob.Rd | 5 +++++ man/replace_postcode_with_col.Rd | 17 +++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 man/clean_up_postcode.Rd create mode 100644 man/replace_postcode_with_col.Rd diff --git a/man/clean_up_postcode.Rd b/man/clean_up_postcode.Rd new file mode 100644 index 000000000..b3cd91548 --- /dev/null +++ b/man/clean_up_postcode.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{clean_up_postcode} +\alias{clean_up_postcode} +\title{Clean up postcode column} +\usage{ +clean_up_postcode(individual_file) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} +} +\description{ +Clean up column containing postcode. +} diff --git a/man/fill_dob.Rd b/man/fill_dob.Rd index 99d3c03bd..3dc8e4295 100644 --- a/man/fill_dob.Rd +++ b/man/fill_dob.Rd @@ -4,6 +4,8 @@ \alias{fill_dob} \title{Fill missing date of births} \usage{ +fill_dob(individual_file) + fill_dob(individual_file) } \arguments{ @@ -12,4 +14,7 @@ fill_dob(individual_file) \description{ Fill missing date of births with date of births from specific episode columns in hierarchy. + +Fill missing postcodes with +postcodes from specific episode columns in hierarchy. } diff --git a/man/replace_postcode_with_col.Rd b/man/replace_postcode_with_col.Rd new file mode 100644 index 000000000..3feb0fbcb --- /dev/null +++ b/man/replace_postcode_with_col.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{replace_postcode_with_col} +\alias{replace_postcode_with_col} +\title{Fill missing postcode} +\usage{ +replace_postcode_with_col(individual_file, col) +} +\arguments{ +\item{individual_file}{Individual file where each row represents a unique CHI} + +\item{col}{Column containing postcode for episode} +} +\description{ +Fill missing postcode with +postcodes from an episode postcode column. +} From 73f77d5a965d5c81eabade4115be415a6b8c3747 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 May 2023 13:35:06 +0100 Subject: [PATCH 021/200] implement quick fix for running 22/23 --- R/add_nsu_cohort.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index ff27b0afa..1f4d4b334 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -11,6 +11,10 @@ add_nsu_cohort <- function(data, year) { year_param <- year + if (year == "2223"){ + return(data) + }else{ + # Check that the variables we need are in the data check_variables_exist(data, variables = c( @@ -110,4 +114,5 @@ add_nsu_cohort <- function(data, year) { dplyr::select(-dplyr::contains("_nsu"), -"has_chi") return(return_df) + } } From 00f37c807a266b2d09ed6f4f461b44ee303279be Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 3 May 2023 12:42:33 +0000 Subject: [PATCH 022/200] Style code --- R/add_nsu_cohort.R | 195 ++++++++++++++++++++++----------------------- 1 file changed, 97 insertions(+), 98 deletions(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 1f4d4b334..d98da5ee1 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -11,108 +11,107 @@ add_nsu_cohort <- function(data, year) { year_param <- year - if (year == "2223"){ + if (year == "2223") { return(data) - }else{ - - # Check that the variables we need are in the data - check_variables_exist(data, - variables = c( - "year", - "chi", - "recid", - "smrtype", - "postcode", - "gpprac", - "dob", - "gender" + } else { + # Check that the variables we need are in the data + check_variables_exist(data, + variables = c( + "year", + "chi", + "recid", + "smrtype", + "postcode", + "gpprac", + "dob", + "gender" + ) ) - ) - matched <- dplyr::full_join(data, - # NSU cohort file - read_file(get_nsu_path(year)) %>% - dplyr::mutate( - dob = as.Date(.data[["dob"]]), - gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) - ), - # Match on by chi - by = "chi", - # Name the incoming variables with "_nsu" - suffix = c("", "_nsu"), - # Keep the chi from both sources - keep = TRUE - ) %>% - # Change the chi from the NSU cohort to a boolean - dplyr::mutate(has_chi = !is_missing(.data[["chi_nsu"]])) - - return_df <- matched %>% - # Get data from non service user lookup if the recid is empty - dplyr::mutate( - year = year_param, - recid = dplyr::if_else( - is_missing(.data[["recid"]]), - "NSU", - .data[["recid"]] - ), - smrtype = dplyr::if_else( - is_missing(.data[["recid"]]), - "Non-User", - .data[["smrtype"]] - ), - postcode = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["postcode_nsu"]], - .data[["postcode"]] - ), - gpprac = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["gpprac_nsu"]], - .data[["gpprac"]] - ), - dob = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["dob_nsu"]], - .data[["dob"]] - ), - gender = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["gender_nsu"]], - .data[["gender"]] - ) + matched <- dplyr::full_join(data, + # NSU cohort file + read_file(get_nsu_path(year)) %>% + dplyr::mutate( + dob = as.Date(.data[["dob"]]), + gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) + ), + # Match on by chi + by = "chi", + # Name the incoming variables with "_nsu" + suffix = c("", "_nsu"), + # Keep the chi from both sources + keep = TRUE ) %>% - # If the data has come from the NSU cohort, - # use that data for the below variables - dplyr::mutate( - postcode = dplyr::if_else( - is_missing(.data[["postcode"]]) & .data[["has_chi"]], - .data[["postcode_nsu"]], - .data[["postcode"]] - ), - gpprac = dplyr::if_else( - is.na(.data[["gpprac"]]) & .data[["has_chi"]], - .data[["gpprac_nsu"]], - .data[["gpprac"]] - ), - dob = dplyr::if_else( - is.na(.data[["dob"]]) & .data[["has_chi"]], - .data[["dob_nsu"]], - .data[["dob"]] - ), - gender = dplyr::if_else( - is.na(.data[["gender"]]) & .data[["has_chi"]], - .data[["gender_nsu"]], - .data[["gender"]] - ), - chi = dplyr::if_else( - is_missing(.data[["chi"]]) & .data[["has_chi"]], - .data[["chi_nsu"]], - .data[["chi"]] - ) - ) %>% - # Remove the additional columns - dplyr::select(-dplyr::contains("_nsu"), -"has_chi") + # Change the chi from the NSU cohort to a boolean + dplyr::mutate(has_chi = !is_missing(.data[["chi_nsu"]])) + + return_df <- matched %>% + # Get data from non service user lookup if the recid is empty + dplyr::mutate( + year = year_param, + recid = dplyr::if_else( + is_missing(.data[["recid"]]), + "NSU", + .data[["recid"]] + ), + smrtype = dplyr::if_else( + is_missing(.data[["recid"]]), + "Non-User", + .data[["smrtype"]] + ), + postcode = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["postcode_nsu"]], + .data[["postcode"]] + ), + gpprac = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["gpprac_nsu"]], + .data[["gpprac"]] + ), + dob = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["dob_nsu"]], + .data[["dob"]] + ), + gender = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["gender_nsu"]], + .data[["gender"]] + ) + ) %>% + # If the data has come from the NSU cohort, + # use that data for the below variables + dplyr::mutate( + postcode = dplyr::if_else( + is_missing(.data[["postcode"]]) & .data[["has_chi"]], + .data[["postcode_nsu"]], + .data[["postcode"]] + ), + gpprac = dplyr::if_else( + is.na(.data[["gpprac"]]) & .data[["has_chi"]], + .data[["gpprac_nsu"]], + .data[["gpprac"]] + ), + dob = dplyr::if_else( + is.na(.data[["dob"]]) & .data[["has_chi"]], + .data[["dob_nsu"]], + .data[["dob"]] + ), + gender = dplyr::if_else( + is.na(.data[["gender"]]) & .data[["has_chi"]], + .data[["gender_nsu"]], + .data[["gender"]] + ), + chi = dplyr::if_else( + is_missing(.data[["chi"]]) & .data[["has_chi"]], + .data[["chi_nsu"]], + .data[["chi"]] + ) + ) %>% + # Remove the additional columns + dplyr::select(-dplyr::contains("_nsu"), -"has_chi") - return(return_df) + return(return_df) } } From 2cacee859549e031fde19c92dc92d4ad38222082 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 3 May 2023 14:46:21 +0100 Subject: [PATCH 023/200] Fix missed comma --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index de98cd97e..0d2df7dc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,7 @@ Imports: stringr (>= 1.5.0), tibble (>= 3.2.1), tidyr (>= 1.3.0), - tidyselect (>= 1.2.0) + tidyselect (>= 1.2.0), zoo (>= 1.8.0) Suggests: covr (>= 3.6.1), From 369a8bbfa1bf6418d8aee3e36ebee5c7541c0d6a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 5 May 2023 11:05:18 +0100 Subject: [PATCH 024/200] Exclude DD code for now - TEMP fix --- R/create_individual_file.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f2f32ee5d..1e0a28457 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -67,11 +67,11 @@ add_cij_columns <- function(episode_file) { NA_real_ ) ) %>% - dplyr::mutate(cij_delay = dplyr::if_else( - (.data$cij_delay == 1 & .data$Distinct_CIJ == 1), - 1, - 0 - )) %>% + # dplyr::mutate(cij_delay = dplyr::if_else( + # (.data$cij_delay == 1 & .data$Distinct_CIJ == 1), + # 1, + # 0 + # )) %>% dplyr::mutate( preventable_admissions = dplyr::if_else( (.data$cij_ppa == 1 & .data$Distinct_CIJ == 1), @@ -104,7 +104,7 @@ add_all_columns <- function(episode_file) { add_ooh_columns("OoH", .data$recid == "OoH") %>% add_dn_columns("DN", .data$recid == "DN") %>% add_cmh_columns("CMH", .data$recid == "CMH") %>% - add_dd_columns("DD", .data$recid == "DD") %>% + #add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% add_nrs_columns("NRS", .data$recid == "NRS") %>% add_hl1_columns("HL1", .data$recid == "HL1") %>% From f7158305181ba9e228d38bf827614817c6565cc2 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 5 May 2023 11:05:56 +0100 Subject: [PATCH 025/200] Correct/rename variables --- R/create_individual_file.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 1e0a28457..5f32dc39f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -173,7 +173,7 @@ add_op_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% dplyr::mutate( "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1, NA_real_), - "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_incdnas, NA_real_) + "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_inc_dnas, NA_real_) ) return(episode_file) } @@ -219,7 +219,7 @@ add_ooh_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% dplyr::mutate( - OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$keydate2_dateformat) - (lubridate::seconds_to_period(.data$keytime1) + .data$keydate1_dateformat), units = "mins"), NA_real_), + OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins"), NA_real_), OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time) ) return(episode_file) @@ -232,7 +232,7 @@ add_dn_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% - dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), .data$totalnodncontacts, NA_real_)) + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), .data$total_no_dn_contacts, NA_real_)) } #' Add CMH columns @@ -306,7 +306,7 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$keydate2_dateformat, lubridate::NA_Date_) + ch_ep_end = dplyr::if_else(eval(condition), .data$record_keydate2, lubridate::NA_Date_) ) %>% dplyr::rowwise() %>% dplyr::mutate( @@ -492,7 +492,7 @@ aggregate_ch_episodes <- function(episode_file) { dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% dplyr::mutate( ch_no_cost = max(.data$ch_no_cost), - ch_ep_start = min(.data$keydate1_dateformat), + ch_ep_start = min(.data$record_keydate1), ch_ep_end = max(.data$ch_ep_end), ch_cost_per_day = mean(.data$ch_cost_per_day) ) %>% @@ -594,9 +594,9 @@ aggregate_by_chi <- function(episode_file) { episode_file %>% dplyr::arrange( chi, - keydate1_dateformat, + record_keydate1, keytime1, - keydate2_dateformat, + record_keydate2, keytime2 ) %>% dplyr::group_by(chi) %>% @@ -749,7 +749,7 @@ drop_cols <- function(individual_file) { -"postcode", -"gpprac", -"no_paid_items", - -"totalnodncontacts" + -"total_no_dn_contacts" ) } From d537aadb1ed8da72edafc8f1f3abfcdd40f40aaf Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 5 May 2023 10:09:57 +0000 Subject: [PATCH 026/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5f32dc39f..be1c0ab81 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -104,7 +104,7 @@ add_all_columns <- function(episode_file) { add_ooh_columns("OoH", .data$recid == "OoH") %>% add_dn_columns("DN", .data$recid == "DN") %>% add_cmh_columns("CMH", .data$recid == "CMH") %>% - #add_dd_columns("DD", .data$recid == "DD") %>% + # add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% add_nrs_columns("NRS", .data$recid == "NRS") %>% add_hl1_columns("HL1", .data$recid == "HL1") %>% From 50641b3a3c29b5e80338f8de2543e6837922119e Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 12 May 2023 12:50:49 +0100 Subject: [PATCH 027/200] Include NSU in `check_year_valid` --- R/check_year_valid.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 3cdab4598..fcb93984b 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -21,16 +21,21 @@ check_year_valid <- function(year, type = c( "Homelessness", "Maternity", "MH", + "NSU", "Outpatients", "PIS", "SDS" )) { + if (year >= "2223" & type == "NSU"){ + return(FALSE) + } + if (year <= "1415") { - if (type %in% c("CMH", "DN", "Homelessness")) { + if (type %in% c("CMH", "DN", "Homelessness", "CH", "HC", "SDS", "AT")) { return(FALSE) } } else if (year <= "1516") { - if (type %in% c("CMH", "Homelessness")) { + if (type %in% c("CMH", "Homelessness", "CH", "HC", "SDS", "AT")) { return(FALSE) } } else if (year <= "1617") { From 1b52ebbc760390c5823ff25f9686f50965c97c3d Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 12 May 2023 12:52:00 +0100 Subject: [PATCH 028/200] Update `check_year_valid_tests` --- tests/testthat/test-check_year_valid.R | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index 5cc173e97..02f61d880 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -14,6 +14,25 @@ test_that("Check year valid works for specific datasets ", { expect_equal(check_year_valid("1415", "MH"), TRUE) expect_equal(check_year_valid("1516", "Maternity"), TRUE) + # year <= "1617" + expect_equal(check_year_valid("1415", "AT"), FALSE) + expect_equal(check_year_valid("1516", "AT"), FALSE) + expect_equal(check_year_valid("1617", "AT"), FALSE) + expect_equal(check_year_valid("1718", "AT"), TRUE) + expect_equal(check_year_valid("1415", "CH"), FALSE) + expect_equal(check_year_valid("1516", "CH"), FALSE) + expect_equal(check_year_valid("1617", "CH"), FALSE) + expect_equal(check_year_valid("1718", "CH"), TRUE) + expect_equal(check_year_valid("1415", "HC"), FALSE) + expect_equal(check_year_valid("1516", "HC"), FALSE) + expect_equal(check_year_valid("1617", "HC"), FALSE) + expect_equal(check_year_valid("1718", "HC"), TRUE) + expect_equal(check_year_valid("1415", "SDS"), FALSE) + expect_equal(check_year_valid("1516", "SDS"), FALSE) + expect_equal(check_year_valid("1617", "SDS"), FALSE) + expect_equal(check_year_valid("1718", "SDS"), TRUE) + + # year >= "2122" expect_equal(check_year_valid("2122", "CMH"), FALSE) expect_equal(check_year_valid("2122", "DN"), FALSE) @@ -21,10 +40,27 @@ test_that("Check year valid works for specific datasets ", { expect_equal(check_year_valid("2122", "MH"), TRUE) expect_equal(check_year_valid("2122", "Maternity"), TRUE) + # NSUs + expect_equal(check_year_valid("1415", "NSU"), TRUE) + expect_equal(check_year_valid("1516", "NSU"), TRUE) + expect_equal(check_year_valid("1617", "NSU"), TRUE) + expect_equal(check_year_valid("1718", "NSU"), TRUE) + expect_equal(check_year_valid("1819", "NSU"), TRUE) + expect_equal(check_year_valid("1920", "NSU"), TRUE) + expect_equal(check_year_valid("2021", "NSU"), TRUE) + expect_equal(check_year_valid("2122", "NSU"), TRUE) + expect_equal(check_year_valid("2223", "NSU"), FALSE) # Other extracts not within boundaries expect_equal(check_year_valid("2021", "Acute"), TRUE) expect_equal(check_year_valid("1920", "Maternity"), TRUE) expect_equal(check_year_valid("1819", "MH"), TRUE) expect_equal(check_year_valid("1718", "Outpatients"), TRUE) + + # Social care + expect_equal(check_year_valid("1819", "AT"), TRUE) + expect_equal(check_year_valid("1920", "CH"), TRUE) + expect_equal(check_year_valid("2021", "HC"), TRUE) + expect_equal(check_year_valid("2122", "SDS"), TRUE) + }) From e5cf2a0b225c0f6d3d4322324e8e4acd38699319 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 12 May 2023 11:55:10 +0000 Subject: [PATCH 029/200] Update documentation --- man/check_year_valid.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd index 587a27b2b..e469d7fae 100644 --- a/man/check_year_valid.Rd +++ b/man/check_year_valid.Rd @@ -9,7 +9,7 @@ is only available from 2016/17 onwards.} check_year_valid( year, type = c("Acute", "AE", "AT", "CH", "Client", "CMH", "DD", "Deaths", "DN", "GPOoH", - "HC", "Homelessness", "Maternity", "MH", "Outpatients", "PIS", "SDS") + "HC", "Homelessness", "Maternity", "MH", "NSU", "Outpatients", "PIS", "SDS") ) } \arguments{ From 33fe1055ab457e6a51d48a1a7b4ea1296a34bad2 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 12 May 2023 13:15:31 +0100 Subject: [PATCH 030/200] Update `add_nsu_cohort` to pick up years valid --- R/add_nsu_cohort.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index d98da5ee1..65794e8ff 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -11,7 +11,7 @@ add_nsu_cohort <- function(data, year) { year_param <- year - if (year == "2223") { + if (!check_year_valid("2223", "NSU")) { return(data) } else { # Check that the variables we need are in the data From 3473c18f67aa3f0086ac89e567e4953588e04e54 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 12 May 2023 12:23:04 +0000 Subject: [PATCH 031/200] Style code --- R/check_year_valid.R | 2 +- tests/testthat/test-check_year_valid.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index fcb93984b..1bad7d042 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -26,7 +26,7 @@ check_year_valid <- function(year, type = c( "PIS", "SDS" )) { - if (year >= "2223" & type == "NSU"){ + if (year >= "2223" & type == "NSU") { return(FALSE) } diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index 02f61d880..a7197084c 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -62,5 +62,4 @@ test_that("Check year valid works for specific datasets ", { expect_equal(check_year_valid("1920", "CH"), TRUE) expect_equal(check_year_valid("2021", "HC"), TRUE) expect_equal(check_year_valid("2122", "SDS"), TRUE) - }) From fff9badfe039b203aa7509d3b15dd481790a9009 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 12 May 2023 13:29:03 +0100 Subject: [PATCH 032/200] remove extra `!` --- R/add_nsu_cohort.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 65794e8ff..6fbcf9bc1 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -11,7 +11,7 @@ add_nsu_cohort <- function(data, year) { year_param <- year - if (!check_year_valid("2223", "NSU")) { + if (check_year_valid("2223", "NSU")) { return(data) } else { # Check that the variables we need are in the data From 8a37356e10ae9d9fa582c97f7801f0b83281741a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 12 May 2023 13:49:55 +0100 Subject: [PATCH 033/200] Exclude `cij_delay` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index be1c0ab81..4c5183d3b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -610,7 +610,7 @@ aggregate_by_chi <- function(episode_file) { "CIJ_el", "CIJ_non_el", "CIJ_mat", - "cij_delay", + #"cij_delay", "OoH_cases" = "unique_ooh_case", dplyr::ends_with( c( From b2f694118491cd75c71c779703ae63ce41413398 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 12 May 2023 12:55:41 +0000 Subject: [PATCH 034/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 4c5183d3b..9a2368aae 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -610,7 +610,7 @@ aggregate_by_chi <- function(episode_file) { "CIJ_el", "CIJ_non_el", "CIJ_mat", - #"cij_delay", + # "cij_delay", "OoH_cases" = "unique_ooh_case", dplyr::ends_with( c( From 07b03f33fe4e5c4dd141e23339201b851c343ff1 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Tue, 16 May 2023 11:50:42 +0100 Subject: [PATCH 035/200] improve `max_no_inf()` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 9a2368aae..d01967ca8 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -705,7 +705,7 @@ condition_cols <- function() { #' #' @param x Vector to return max of max_no_inf <- function(x) { - ifelse(!all(is.na(x)), max(x, na.rm = TRUE), NA) + dplyr::if_else(all(is.na(x)), NA, max(x, na.rm = TRUE)) } #' Custom minimum From 617ac68c825da0ac908693449808b0c54b1b6021 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 11:46:09 +0100 Subject: [PATCH 036/200] Use pmin/max instead of `rowwise` --- R/create_individual_file.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d01967ca8..ec18a01e2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -510,12 +510,10 @@ clean_up_ch <- function(episode_file) { fy_end = date_from_fy(year, "end") + 1, fy_start = date_from_fy(year, "start") ) %>% - dplyr::rowwise() %>% dplyr::mutate( - term_1 = min(ch_ep_end, fy_end + 1), - term_2 = max(ch_ep_start, fy_start) + term_1 = pmin(ch_ep_end, fy_end + 1), + term_2 = pmax(ch_ep_start, fy_start) ) %>% - dplyr::ungroup() %>% dplyr::mutate( ch_beddays = dplyr::if_else( recid == "CH", From 01cc1b4e1c5f81ac731ce4c68cbffcbf1d0cadae Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 11:52:44 +0100 Subject: [PATCH 037/200] improve `min_no_inf()` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ec18a01e2..8e6077fc7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -714,7 +714,7 @@ max_no_inf <- function(x) { #' #' @param x Vector to return min of min_no_inf <- function(x) { - ifelse(!all(is.na(x)), min(x, na.rm = TRUE), NA) + ifelse(all(is.na(x)), NA, min(x, na.rm = TRUE)) } #' Clean individual file From 2ff02bdb78bc5c81dcb39978fff18114909a06fb Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 12:55:38 +0100 Subject: [PATCH 038/200] Use n_distinct(cij_marker) --- R/create_individual_file.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 8e6077fc7..ad6d09132 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -599,6 +599,7 @@ aggregate_by_chi <- function(episode_file) { ) %>% dplyr::group_by(chi) %>% dplyr::summarise( + distinct_cij = n_distinct(cij_marker), gender = mean(gender), dplyr::across(dplyr::ends_with(c( "postcode", "DoB", "gpprac" From 435cd0fdd562925573c77ccff736a9b2cc8cffcf Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 13:02:10 +0100 Subject: [PATCH 039/200] deal with distinct(ch_chi_cis) --- R/create_individual_file.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ad6d09132..0dea43da0 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -489,6 +489,7 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { #' @inheritParams create_individual_file aggregate_ch_episodes <- function(episode_file) { episode_file %>% + dplyr::filter(!is.na(.data$ch_chi_cis)) %>% dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% dplyr::mutate( ch_no_cost = max(.data$ch_no_cost), @@ -496,7 +497,10 @@ aggregate_ch_episodes <- function(episode_file) { ch_ep_end = max(.data$ch_ep_end), ch_cost_per_day = mean(.data$ch_cost_per_day) ) %>% - dplyr::ungroup() + dplyr::ungroup() %>% + dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% + dplyr::select(.data$chi, .data$ch_chi_cis, ch_no_cost, ch_ep_start, ch_ep_end, ch_cost_per_day) %>% + dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) } #' Clean up CH From 5a0b5506c14a3ba69f6c4fd6367492399228d68b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 13:32:54 +0100 Subject: [PATCH 040/200] use n_distinct(ooh_case_id) --- R/create_individual_file.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 0dea43da0..35889b1a1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -220,8 +220,10 @@ add_ooh_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% dplyr::mutate( OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins"), NA_real_), - OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time) + OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time), + unique_ooh_case = dplyr::if_else(recid != "OoH", 0, n_distinct(ooh_case_id)) ) + return(episode_file) } From 0da09b03429a7cd66e2aeacd7bb9ab019c86b055 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 May 2023 15:38:58 +0100 Subject: [PATCH 041/200] remove `find_non_duplicates` --- R/create_individual_file.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 35889b1a1..dfa43c75e 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -8,10 +8,7 @@ create_individual_file <- function(episode_file) { remove_blank_chi() %>% find_non_duplicates(.data$cij_marker, "Distinct_CIJ") %>% add_cij_columns() %>% - find_non_duplicates(.data$ch_chi_cis, "first_ch_ep") %>% add_all_columns() %>% - find_non_duplicates(.data$ooh_case_id, "unique_ooh_case") %>% - dplyr::mutate(unique_ooh_case = dplyr::if_else(recid != "OoH", 0, unique_ooh_case)) %>% aggregate_ch_episodes() %>% clean_up_ch() %>% recode_gender() %>% From faa0a966ed8c52c28082b777f08995aaa9e6daba Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Wed, 17 May 2023 08:27:13 +0100 Subject: [PATCH 042/200] Use dplyr::if_else() Co-authored-by: James McMahon --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index dfa43c75e..904ecc401 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -718,7 +718,7 @@ max_no_inf <- function(x) { #' #' @param x Vector to return min of min_no_inf <- function(x) { - ifelse(all(is.na(x)), NA, min(x, na.rm = TRUE)) + dplyr::if_else(all(is.na(x)), NA, min(x, na.rm = TRUE)) } #' Clean individual file From 979fc81ae48510b15b096feaf86779064e9d4587 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 08:37:34 +0100 Subject: [PATCH 043/200] Fix typo in `ooh_covid_assessment` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 904ecc401..5f25b7afd 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -210,7 +210,7 @@ add_ooh_columns <- function(episode_file, prefix, condition) { "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1, NA_real_), "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1, NA_real_), ooh_covid_advice = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1, NA_real_), - ooh_covid_assesment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), + ooh_covid_assessment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), ooh_covid_other = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1, NA_real_) ) From 6a57809c8b108261a5d9b05176da5ca2cd873cd9 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 08:39:51 +0100 Subject: [PATCH 044/200] Move `ooh_case_id` to aggregate --- R/create_individual_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5f25b7afd..7a9eba7fa 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -602,7 +602,8 @@ aggregate_by_chi <- function(episode_file) { ) %>% dplyr::group_by(chi) %>% dplyr::summarise( - distinct_cij = n_distinct(cij_marker), + distinct_cij = n_distinct("cij_marker"), + ooh_cases = n_distinct("ooh_case_id"), gender = mean(gender), dplyr::across(dplyr::ends_with(c( "postcode", "DoB", "gpprac" From 83fbdcbe1cc744ca4babe3204e8a4bc78919ae27 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 08:52:11 +0100 Subject: [PATCH 045/200] Use `slfhelper::ltc_vars` --- R/create_individual_file.R | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 7a9eba7fa..a77ccddd0 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -674,27 +674,7 @@ aggregate_by_chi <- function(episode_file) { #' which follow format "condition" and "condition_date" e.g. #' "dementia" and "dementia_date" condition_cols <- function() { - conditions <- c( - "arth", - "asthma", - "atrialfib", - "cancer", - "cvd", - "liver", - "copd", - "dementia", - "diabetes", - "epilepsy", - "chd", - "hefailure", - "ms", - "parkinsons", - "refailure", - "congen", - "bloodbfo", - "endomet", - "digestive" - ) + conditions <- slfhelper::ltc_vars date_cols <- paste0(conditions, "_date") all_cols <- c(conditions, date_cols) return(all_cols) From 8a761c0e7e1a0f96b9e3e7b0266fefaf98c5a714 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 09:38:18 +0100 Subject: [PATCH 046/200] Remove `clean_up_dob` Already done in `correct_demographics` --- R/create_individual_file.R | 46 -------------------------------------- 1 file changed, 46 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index a77ccddd0..b9e0222de 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -711,7 +711,6 @@ clean_individual_file <- function(individual_file) { individual_file %>% drop_cols() %>% clean_up_gender() %>% - clean_up_dob() %>% dplyr::mutate( age = floor(as.numeric(lubridate::interval(.data$DoB, date_from_fy(year, "mid")), "years")) ) %>% @@ -767,51 +766,6 @@ clean_up_gender <- function(individual_file) { ) } -#' Clean up date of birth column -#' -#' @description Clean up column containing date of birth. -#' -#' @inheritParams clean_individual_file -clean_up_dob <- function(individual_file) { - individual_file %>% - dplyr::mutate( - chi_dob_1 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "19", substr(.data$chi, 5, 6))), - chi_dob_2 = lubridate::dmy(paste0(substr(.data$chi, 1, 4), "20", substr(.data$chi, 5, 6))), - chi_age_1 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_1), date_from_fy(year, "mid")), "years"), - chi_age_2 = as.numeric(lubridate::interval(lubridate::ymd(.data$chi_dob_2), date_from_fy(year, "mid")), "years") - ) %>% - dplyr::rowwise() %>% - dplyr::mutate( - dob_condition_1 = .data$chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB"))), - dob_condition_2 = .data$chi_dob_2 %in% dplyr::pick(dplyr::ends_with("_DoB")) & !(chi_dob_1 %in% dplyr::pick(dplyr::ends_with("_DoB"))), - dob_condition_3 = .data$chi_dob_2 > min(lubridate::today(), date_from_fy(year, "end")), - dob_condition_4 = unclass(.data$chi_dob_2) > min_no_inf(as.numeric(dplyr::pick(.data$arth_date:.data$death_date))), - dob_condition_5 = .data$congen_date %in% c(.data$chi_dob_1, .data$chi_dob_2) - ) %>% - dplyr::ungroup() %>% - dplyr::mutate( - DoB = dplyr::case_when( - .data$dob_condition_1 ~ .data$chi_dob_1, - .data$dob_condition_2 ~ .data$chi_dob_2 - ) - ) %>% - dplyr::mutate( - DoB = dplyr::case_when( - is.na(.data$DoB) & is.na(.data$chi_dob_1) & !is.na(.data$chi_dob_2) ~ .data$chi_dob_2, - is.na(.data$DoB) & is.na(.data$chi_dob_2) & !is.na(.data$chi_dob_1) ~ .data$chi_dob_1, - is.na(.data$DoB) & .data$chi_age_2 < 0 ~ .data$chi_dob_1, - is.na(.data$DoB) & .data$dob_condition_3 ~ .data$chi_dob_1, - is.na(.data$DoB) & .data$dob_condition_4 ~ .data$chi_dob_1, - is.na(.data$DoB) & .data$dob_condition_5 ~ .data$congen_date, - is.na(.data$DoB) & .data$chi_age_1 > 115 ~ .data$chi_dob_2, - TRUE ~ .data$DoB - ) - ) %>% - fill_dob() %>% - dplyr::select( - -dplyr::starts_with(c("dob_condition_", "chi_dob_", "chi_age_")) - ) -} #' Fill missing date of births #' From 46a7b70599e60f65461b90e22e52653a88da8d10 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 08:42:21 +0000 Subject: [PATCH 047/200] Update documentation --- man/clean_up_dob.Rd | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 man/clean_up_dob.Rd diff --git a/man/clean_up_dob.Rd b/man/clean_up_dob.Rd deleted file mode 100644 index 4b9003726..000000000 --- a/man/clean_up_dob.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{clean_up_dob} -\alias{clean_up_dob} -\title{Clean up date of birth column} -\usage{ -clean_up_dob(individual_file) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} -} -\description{ -Clean up column containing date of birth. -} From 6424c952f0f36e80d23eb6709720640c3615a5b7 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 17 May 2023 08:54:44 +0000 Subject: [PATCH 048/200] [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/4981058958/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/654#issuecomment-1551009850 Signed-off-by: check-spelling-bot --- .github/actions/spelling/expect.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 4b36c7d8e..c2380b4c6 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -65,6 +65,7 @@ fyyear geogs ggplot GLS +GPOo gpprac gss hbnames From 89268dc6ca9e706d08a88fc765388d533eafa9aa Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 10:59:10 +0100 Subject: [PATCH 049/200] Use `start_next_fy_quarter` in place of rowwise --- R/create_individual_file.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b9e0222de..77e3e34f2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -305,13 +305,11 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$record_keydate2, lubridate::NA_Date_) - ) %>% - dplyr::rowwise() %>% - dplyr::mutate( - ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), lubridate::quarter(zoo::as.yearqtr(.data$sc_latest_submission), type = "date_first", fiscal_start = 4), .data$ch_ep_end) - ) %>% - dplyr::ungroup() + ch_ep_end = dplyr::if_else(eval(condition), .data$record_keydate2, lubridate::NA_Date_), + # If end date is missing use the first day of next FY quarter + ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), start_next_fy_quarter(sc_latest_submission), .data$ch_ep_end) + ) + } #' Add HC columns From b6d93ed1f79573e6dbe179ff54dc7228235157a1 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 10:06:51 +0000 Subject: [PATCH 050/200] Style code --- R/create_individual_file.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 77e3e34f2..e89731dd0 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -309,7 +309,6 @@ add_ch_columns <- function(episode_file, prefix, condition) { # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), start_next_fy_quarter(sc_latest_submission), .data$ch_ep_end) ) - } #' Add HC columns From d4e1d4154a76e32f4bf8fe6b5c856747c213f782 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 11:10:58 +0100 Subject: [PATCH 051/200] Use `compute_mid_year_age` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e89731dd0..5ea0b8853 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -709,7 +709,7 @@ clean_individual_file <- function(individual_file) { drop_cols() %>% clean_up_gender() %>% dplyr::mutate( - age = floor(as.numeric(lubridate::interval(.data$DoB, date_from_fy(year, "mid")), "years")) + age = compute_mid_year_age(year, .data$DoB) ) %>% clean_up_postcode() } From eac15ed5cc8578ed5cd88b4ea5867bc1a780f903 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 11:18:53 +0100 Subject: [PATCH 052/200] convert code into data.table for improving speed --- R/create_individual_file.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5ea0b8853..3cab259a2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -597,7 +597,9 @@ aggregate_by_chi <- function(episode_file) { record_keydate2, keytime2 ) %>% - dplyr::group_by(chi) %>% + # use as.data.table to change the data format to data.table to accelerate + data.table::as.data.table() %>% + dplyr::group_by("chi") %>% dplyr::summarise( distinct_cij = n_distinct("cij_marker"), ooh_cases = n_distinct("ooh_case_id"), @@ -662,7 +664,9 @@ aggregate_by_chi <- function(episode_file) { ), ~ dplyr::first(., na_rm = TRUE) ) - ) + ) %>% + # change the data format from data.table to data.frame + tibble::as_tibble() } #' Condition columns From 4f6d6ff91b63ebaeadbc38cf5d9a628bc3e2eb6b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 12:58:56 +0100 Subject: [PATCH 053/200] Update `get_fy_dates`function --- R/get_fy_dates.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/R/get_fy_dates.R b/R/get_fy_dates.R index 970e049d1..3a4b96afb 100644 --- a/R/get_fy_dates.R +++ b/R/get_fy_dates.R @@ -20,9 +20,9 @@ start_fy <- function(year, format = c("fyyear", "alternate")) { format <- match.arg(format) if (format == "fyyear") { - start_fy <- as.Date(paste0(convert_fyyear_to_year(year), "-04-01")) + start_fy <- lubridate::make_date(convert_fyyear_to_year(year), 4, 1) } else if (format == "alternate") { - start_fy <- as.Date(paste0(year, "-04-01")) + start_fy <- lubridate::make_date(year, 4, 1) } return(start_fy) @@ -47,16 +47,14 @@ end_fy <- function(year, format = c("fyyear", "alternate")) { format <- "fyyear" } + year <- as.numeric(paste0("20", substr(year, 3, 4))) + format <- match.arg(format) if (format == "fyyear") { - end_fy <- as.Date( - paste0(as.numeric(convert_fyyear_to_year(year)) + 1L, "-03-31") - ) + end_fy <- lubridate::make_date(year, 3, 31) } else if (format == "alternate") { - end_fy <- as.Date( - paste0(as.numeric(year) + 1L, "-03-31") - ) + end_fy <- lubridate::make_date(year + 1L, 3, 31) } return(end_fy) @@ -85,9 +83,9 @@ midpoint_fy <- function(year, format = c("fyyear", "alternate")) { format <- match.arg(format) if (format == "fyyear") { - midpoint_fy <- as.Date(paste0(convert_fyyear_to_year(year), "-09-30")) + midpoint_fy <- lubridate::make_date(convert_fyyear_to_year(year), 9, 30) } else if (format == "alternate") { - midpoint_fy <- as.Date(paste0(year, "-09-30")) + midpoint_fy <- lubridate::make_date(year, 9, 30) } return(midpoint_fy) From 4c9134bd3b8ed1d8bfae0ddd86502a17c0e29189 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 13:08:40 +0100 Subject: [PATCH 054/200] remove `date_from_fy`, use `get_fy_dates` --- R/create_individual_file.R | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 3cab259a2..af1931d08 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -507,8 +507,8 @@ aggregate_ch_episodes <- function(episode_file) { clean_up_ch <- function(episode_file) { episode_file %>% dplyr::mutate( - fy_end = date_from_fy(year, "end") + 1, - fy_start = date_from_fy(year, "start") + fy_end = end_fy(year), + fy_start = start_fy(year) ) %>% dplyr::mutate( term_1 = pmin(ch_ep_end, fy_end + 1), @@ -541,31 +541,6 @@ clean_up_ch <- function(episode_file) { ) } -#' Date from FY -#' -#' @description Return start, mid, or end date from financial year in format "2122". -#' -#' @param financial_year Financial year represented in "YYYY" format e.g. "2122" -#' @param type One of "start", "end", and "mid", representing the date to return -date_from_fy <- function(financial_year, type = c("start", "end", "mid")) { - match.arg(type) - n <- switch(type, - "start" = 0, - "mid" = 0, - "end" = 2 - ) - year <- as.numeric(paste0("20", substr(financial_year, 1 + n, 2 + n))) - if (type == "start") { - date <- lubridate::make_date(year, 4, 1) - return(date) - } else if (type == "end") { - date <- lubridate::make_date(year, 3, 31) - return(date) - } - date <- lubridate::make_date(year, 9, 30) - return(date) -} - #' Recode gender #' #' @description Recode gender to 1.5 if 0 or 9. From 3730ee14eace457dc4dddff828880a2bf1209544 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 12:12:14 +0000 Subject: [PATCH 055/200] Update documentation --- man/date_from_fy.Rd | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 man/date_from_fy.Rd diff --git a/man/date_from_fy.Rd b/man/date_from_fy.Rd deleted file mode 100644 index cc3b8f8a3..000000000 --- a/man/date_from_fy.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{date_from_fy} -\alias{date_from_fy} -\title{Date from FY} -\usage{ -date_from_fy(financial_year, type = c("start", "end", "mid")) -} -\arguments{ -\item{financial_year}{Financial year represented in "YYYY" format e.g. "2122"} - -\item{type}{One of "start", "end", and "mid", representing the date to return} -} -\description{ -Return start, mid, or end date from financial year in format "2122". -} From c9852b44a6db597b5d899bbbdb3517c4bd537b77 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 15:10:01 +0100 Subject: [PATCH 056/200] Remove `clean_up_postcode` function Not needed anymore --- R/create_individual_file.R | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index af1931d08..1b41b93ad 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -779,26 +779,6 @@ replace_dob_with_col <- function(individual_file, col) { ) } -#' Clean up postcode column -#' -#' @description Clean up column containing postcode. -#' -#' @inheritParams clean_individual_file -clean_up_postcode <- function(individual_file) { - postcode_lookup <- readr::read_rds(get_slf_postcode_path()) - individual_file %>% - dplyr::mutate( - # all_blank is TRUE when all postcode variables are blank - all_blank = all(is.na(dplyr::pick(dplyr::ends_with("_postcode")))), - # Use NRS_postcode to store the dummy for no other reason than it's last - # in the hierarchy - HL1_postcode = dplyr::if_else( - all_blank == 1, - "XXX XXX", - .data$HL1_postcode - ) - ) -} #' Fill missing postcodes #' From 3714bcaa4e54d5956d8e0761a33a06927c0d115d Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 15:15:32 +0100 Subject: [PATCH 057/200] Remove non duplicates function/move to aggregate --- R/create_individual_file.R | 63 +++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 1b41b93ad..6c55f2b40 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -6,7 +6,6 @@ create_individual_file <- function(episode_file) { episode_file %>% remove_blank_chi() %>% - find_non_duplicates(.data$cij_marker, "Distinct_CIJ") %>% add_cij_columns() %>% add_all_columns() %>% aggregate_ch_episodes() %>% @@ -22,26 +21,14 @@ create_individual_file <- function(episode_file) { #' #' @inheritParams create_individual_file remove_blank_chi <- function(episode_file) { + + cli::cli_alert_info("Remove blank CHI at {Sys.time()} and the memory usage was {object.size()}") + episode_file %>% dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>% dplyr::filter(!is.na(.data$chi)) } -#' Find non-duplicates -#' -#' @description Create new column which marks first (per group) -#' non-duplicated observation as 1, with any duplicates marked as 0. -#' -#' @inheritParams create_individual_file -#' @param group Column to group by -#' @param col_name Name of new column -find_non_duplicates <- function(episode_file, group, col_name) { - episode_file %>% - dplyr::group_by(.data$chi, {{ group }}) %>% - dplyr::mutate("{col_name}" := dplyr::if_else(duplicated({{ group }}), 0, 1)) %>% - dplyr::ungroup() %>% - dplyr::mutate("{col_name}" := dplyr::if_else(is.na({{ group }}), 0, .data[[col_name]])) -} #' Add CIJ-related columns #' @@ -52,34 +39,35 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$Distinct_CIJ, + .data$cij_marker, NA_real_ ), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$Distinct_CIJ, + .data$cij_marker, NA_real_ ), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$Distinct_CIJ, + .data$cij_marker, NA_real_ ) ) %>% # dplyr::mutate(cij_delay = dplyr::if_else( - # (.data$cij_delay == 1 & .data$Distinct_CIJ == 1), + # (.data$cij_delay == 1 & .data$cij_marker == 1), # 1, # 0 # )) %>% dplyr::mutate( preventable_admissions = dplyr::if_else( - (.data$cij_ppa == 1 & .data$Distinct_CIJ == 1), + (.data$cij_ppa == 1 & .data$cij_marker == 1), 1, 0 - ), - preventable_beddays = dplyr::if_else( - (.data$cij_ppa == 1 & .data$Distinct_CIJ == 1), - as.numeric(.data$cij_end_date - .data$cij_start_date), - 0 - ) + ) # , + # Come back to here + # preventable_beddays = dplyr::if_else( + # (.data$cij_ppa == 1 & .data$Distinct_cij == 1), + # as.numeric(.data$cij_end_date - .data$cij_start_date), + # 0 + # ) ) } @@ -218,7 +206,6 @@ add_ooh_columns <- function(episode_file, prefix, condition) { dplyr::mutate( OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins"), NA_real_), OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time), - unique_ooh_case = dplyr::if_else(recid != "OoH", 0, n_distinct(ooh_case_id)) ) return(episode_file) @@ -565,6 +552,8 @@ recode_gender <- function(episode_file) { #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file) { episode_file %>% + # use as.data.table to change the data format to data.table to accelerate + data.table::as.data.table() %>% dplyr::arrange( chi, record_keydate1, @@ -572,23 +561,26 @@ aggregate_by_chi <- function(episode_file) { record_keydate2, keytime2 ) %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% dplyr::group_by("chi") %>% dplyr::summarise( - distinct_cij = n_distinct("cij_marker"), - ooh_cases = n_distinct("ooh_case_id"), gender = mean(gender), dplyr::across(dplyr::ends_with(c( "postcode", "DoB", "gpprac" )), ~ dplyr::last(., na_rm = TRUE)), dplyr::across( c( + "cij_total" = "cij_marker", "CIJ_el", "CIJ_non_el", "CIJ_mat", # "cij_delay", - "OoH_cases" = "unique_ooh_case", + "ooh_cases" = "ooh_case_id", + "preventable_admissions" + ), + ~ dplyr::n_distinct(.x, na.rm = TRUE) + ), + dplyr::across( + c( dplyr::ends_with( c( "episodes", @@ -605,13 +597,14 @@ aggregate_by_chi <- function(episode_file) { "homeV", "time", "admissions", - "assesment", + "assessment", "other", "DN", "NHS24", "PCC", "_dnas" - ) + ), + -"preventable_admissions" ), dplyr::starts_with("SDS_option") ), From 15ae96afaefa3b8521c9b7d667b9f4feb3252ceb Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 14:18:07 +0000 Subject: [PATCH 058/200] Style code --- R/create_individual_file.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 6c55f2b40..9d29c71ba 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -21,7 +21,6 @@ create_individual_file <- function(episode_file) { #' #' @inheritParams create_individual_file remove_blank_chi <- function(episode_file) { - cli::cli_alert_info("Remove blank CHI at {Sys.time()} and the memory usage was {object.size()}") episode_file %>% From e182a1488f66ff909b1e861c8ddcf66533953f76 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 14:18:12 +0000 Subject: [PATCH 059/200] Update documentation --- man/clean_up_postcode.Rd | 14 -------------- man/find_non_duplicates.Rd | 19 ------------------- 2 files changed, 33 deletions(-) delete mode 100644 man/clean_up_postcode.Rd delete mode 100644 man/find_non_duplicates.Rd diff --git a/man/clean_up_postcode.Rd b/man/clean_up_postcode.Rd deleted file mode 100644 index b3cd91548..000000000 --- a/man/clean_up_postcode.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{clean_up_postcode} -\alias{clean_up_postcode} -\title{Clean up postcode column} -\usage{ -clean_up_postcode(individual_file) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} -} -\description{ -Clean up column containing postcode. -} diff --git a/man/find_non_duplicates.Rd b/man/find_non_duplicates.Rd deleted file mode 100644 index ba82bd5c4..000000000 --- a/man/find_non_duplicates.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{find_non_duplicates} -\alias{find_non_duplicates} -\title{Find non-duplicates} -\usage{ -find_non_duplicates(episode_file, group, col_name) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} - -\item{group}{Column to group by} - -\item{col_name}{Name of new column} -} -\description{ -Create new column which marks first (per group) -non-duplicated observation as 1, with any duplicates marked as 0. -} From 73852cc5c02ca347332c5be57720325d0c252d0f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 17 May 2023 15:47:09 +0100 Subject: [PATCH 060/200] Add time stamps to `create_individual_file` --- R/create_individual_file.R | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 9d29c71ba..e4c68e2d2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -21,7 +21,7 @@ create_individual_file <- function(episode_file) { #' #' @inheritParams create_individual_file remove_blank_chi <- function(episode_file) { - cli::cli_alert_info("Remove blank CHI at {Sys.time()} and the memory usage was {object.size()}") + cli::cli_alert_info("Remove blank CHI function started at {Sys.time()}") episode_file %>% dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>% @@ -35,6 +35,9 @@ remove_blank_chi <- function(episode_file) { #' #' @inheritParams create_individual_file add_cij_columns <- function(episode_file) { + + cli::cli_alert_info("Add cij columns function started at {Sys.time()}") + episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, @@ -77,6 +80,9 @@ add_cij_columns <- function(episode_file) { #' #' @inheritParams create_individual_file add_all_columns <- function(episode_file) { + + cli::cli_alert_info("Add all columns function started at {Sys.time()}") + episode_file %>% add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>% add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>% @@ -470,6 +476,9 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { #' #' @inheritParams create_individual_file aggregate_ch_episodes <- function(episode_file) { + + cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") + episode_file %>% dplyr::filter(!is.na(.data$ch_chi_cis)) %>% dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% @@ -491,6 +500,9 @@ aggregate_ch_episodes <- function(episode_file) { #' #' @inheritParams create_individual_file clean_up_ch <- function(episode_file) { + + cli::cli_alert_info("Clean up CH function started at {Sys.time()}") + episode_file %>% dplyr::mutate( fy_end = end_fy(year), @@ -533,6 +545,9 @@ clean_up_ch <- function(episode_file) { #' #' @inheritParams create_individual_file recode_gender <- function(episode_file) { + + cli::cli_alert_info("Recode Gender function started at {Sys.time()}") + episode_file %>% dplyr::mutate( gender = dplyr::if_else( @@ -550,6 +565,9 @@ recode_gender <- function(episode_file) { #' #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file) { + + cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + episode_file %>% # use as.data.table to change the data format to data.table to accelerate data.table::as.data.table() %>% @@ -676,6 +694,9 @@ min_no_inf <- function(x) { #' #' @param individual_file Individual file where each row represents a unique CHI clean_individual_file <- function(individual_file) { + + cli::cli_alert_info("Clean individual file function started at {Sys.time()}") + individual_file %>% drop_cols() %>% clean_up_gender() %>% From a358cc57367850bf66ed2bb1e0cf6fc875e941bf Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 17 May 2023 14:51:12 +0000 Subject: [PATCH 061/200] Style code --- R/create_individual_file.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e4c68e2d2..e591f0b27 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -35,7 +35,6 @@ remove_blank_chi <- function(episode_file) { #' #' @inheritParams create_individual_file add_cij_columns <- function(episode_file) { - cli::cli_alert_info("Add cij columns function started at {Sys.time()}") episode_file %>% @@ -80,7 +79,6 @@ add_cij_columns <- function(episode_file) { #' #' @inheritParams create_individual_file add_all_columns <- function(episode_file) { - cli::cli_alert_info("Add all columns function started at {Sys.time()}") episode_file %>% @@ -476,7 +474,6 @@ na_type <- function(col = c("DoB", "postcode", "gpprac")) { #' #' @inheritParams create_individual_file aggregate_ch_episodes <- function(episode_file) { - cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") episode_file %>% @@ -500,7 +497,6 @@ aggregate_ch_episodes <- function(episode_file) { #' #' @inheritParams create_individual_file clean_up_ch <- function(episode_file) { - cli::cli_alert_info("Clean up CH function started at {Sys.time()}") episode_file %>% @@ -545,7 +541,6 @@ clean_up_ch <- function(episode_file) { #' #' @inheritParams create_individual_file recode_gender <- function(episode_file) { - cli::cli_alert_info("Recode Gender function started at {Sys.time()}") episode_file %>% @@ -565,7 +560,6 @@ recode_gender <- function(episode_file) { #' #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file) { - cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") episode_file %>% @@ -694,7 +688,6 @@ min_no_inf <- function(x) { #' #' @param individual_file Individual file where each row represents a unique CHI clean_individual_file <- function(individual_file) { - cli::cli_alert_info("Clean individual file function started at {Sys.time()}") individual_file %>% From ca0c7b68a51b9e768a1faca866db8d08cddf866a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 07:46:46 +0100 Subject: [PATCH 062/200] remove `clean_up_postcode` --- R/create_individual_file.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e591f0b27..5bd66ddee 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -695,8 +695,7 @@ clean_individual_file <- function(individual_file) { clean_up_gender() %>% dplyr::mutate( age = compute_mid_year_age(year, .data$DoB) - ) %>% - clean_up_postcode() + ) } #' Drop redundant columns From 2cb8a24b98f9fa351e0bd68aafa57a2a9b66aa6d Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 08:00:48 +0100 Subject: [PATCH 063/200] Deal with ch cis episodes --- R/create_individual_file.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5bd66ddee..6b172e6e3 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -291,7 +291,6 @@ add_ch_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - ch_cis_episodes = dplyr::if_else(eval(condition), .data$first_ch_ep, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), ch_no_cost = eval(condition) & is.na(ch_cost_per_day), @@ -579,7 +578,7 @@ aggregate_by_chi <- function(episode_file) { "postcode", "DoB", "gpprac" )), ~ dplyr::last(., na_rm = TRUE)), dplyr::across( - c( + c("ch_cis_episodes" = "ch_chi_cis", "cij_total" = "cij_marker", "CIJ_el", "CIJ_non_el", From fee2b46bed4b4d4587696c3b9c38bf7e46a8756e Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 18 May 2023 07:02:47 +0000 Subject: [PATCH 064/200] Style code --- R/create_individual_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 6b172e6e3..607b4fcc2 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -578,7 +578,8 @@ aggregate_by_chi <- function(episode_file) { "postcode", "DoB", "gpprac" )), ~ dplyr::last(., na_rm = TRUE)), dplyr::across( - c("ch_cis_episodes" = "ch_chi_cis", + c( + "ch_cis_episodes" = "ch_chi_cis", "cij_total" = "cij_marker", "CIJ_el", "CIJ_non_el", From ee36738ede0a0119960832c05d2538b7d43b9020 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 08:32:24 +0100 Subject: [PATCH 065/200] add .data$ --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 607b4fcc2..d97393cfa 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -293,10 +293,10 @@ add_ch_columns <- function(episode_file, prefix, condition) { dplyr::mutate( ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), - ch_no_cost = eval(condition) & is.na(ch_cost_per_day), + ch_no_cost = eval(condition) & is.na(.data$ch_cost_per_day), ch_ep_end = dplyr::if_else(eval(condition), .data$record_keydate2, lubridate::NA_Date_), # If end date is missing use the first day of next FY quarter - ch_ep_end = dplyr::if_else(eval(condition) & is.na(ch_ep_end), start_next_fy_quarter(sc_latest_submission), .data$ch_ep_end) + ch_ep_end = dplyr::if_else(eval(condition) & is.na(.data$ch_ep_end), start_next_fy_quarter(.data$sc_latest_submission), .data$ch_ep_end) ) } From feef2b62ad5e0aa197bc974a0892d57d7b14e71c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 08:33:03 +0100 Subject: [PATCH 066/200] Turn ch aggregate into a data table --- R/create_individual_file.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d97393cfa..ffa61a93b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -476,7 +476,9 @@ aggregate_ch_episodes <- function(episode_file) { cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") episode_file %>% - dplyr::filter(!is.na(.data$ch_chi_cis)) %>% + # dplyr::filter(!is.na(.data$ch_chi_cis)) %>% + # use as.data.table to change the data format to data.table to accelerate + data.table::as.data.table() %>% dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% dplyr::mutate( ch_no_cost = max(.data$ch_no_cost), @@ -485,9 +487,12 @@ aggregate_ch_episodes <- function(episode_file) { ch_cost_per_day = mean(.data$ch_cost_per_day) ) %>% dplyr::ungroup() %>% - dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% - dplyr::select(.data$chi, .data$ch_chi_cis, ch_no_cost, ch_ep_start, ch_ep_end, ch_cost_per_day) %>% - dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) + # change the data format from data.table to data.frame + tibble::as_tibble() + + # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% + # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>% + # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) } #' Clean up CH From da13d9224eb3866546613b44029a71ce8936e83a Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 18 May 2023 07:36:00 +0000 Subject: [PATCH 067/200] Style code --- R/create_individual_file.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ffa61a93b..9142091dd 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -476,7 +476,7 @@ aggregate_ch_episodes <- function(episode_file) { cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") episode_file %>% - # dplyr::filter(!is.na(.data$ch_chi_cis)) %>% + # dplyr::filter(!is.na(.data$ch_chi_cis)) %>% # use as.data.table to change the data format to data.table to accelerate data.table::as.data.table() %>% dplyr::group_by(.data$chi, .data$ch_chi_cis) %>% @@ -490,9 +490,9 @@ aggregate_ch_episodes <- function(episode_file) { # change the data format from data.table to data.frame tibble::as_tibble() - # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% - # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>% - # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) + # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>% + # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>% + # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis)) } #' Clean up CH From 7fc40fa01d97de52a8ed8a5423c8735ca50844cf Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 08:51:13 +0100 Subject: [PATCH 068/200] use ch_chi_cis --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 9142091dd..bdd2dd98b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -524,12 +524,12 @@ clean_up_ch <- function(episode_file) { NA_real_ ), ch_beddays = dplyr::if_else( - recid == "CH" & first_ch_ep == 0, + recid == "CH" & ch_chi_cis == 0, 0, ch_beddays ), ch_cost = dplyr::if_else( - recid == "CH" & first_ch_ep == 0, + recid == "CH" & ch_chi_cis == 0, 0, ch_cost ) From 45eeca09f3d5f8f3b5de8a9f6a6099770ed80e69 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 09:27:02 +0100 Subject: [PATCH 069/200] remove `preventable_admissions` from aggregate --- R/create_individual_file.R | 48 ++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index bdd2dd98b..eec9ee2fd 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -612,7 +612,6 @@ aggregate_by_chi <- function(episode_file) { "advice", "homeV", "time", - "admissions", "assessment", "other", "DN", @@ -620,33 +619,32 @@ aggregate_by_chi <- function(episode_file) { "PCC", "_dnas" ), - -"preventable_admissions" + dplyr::starts_with("SDS_option") ), - dplyr::starts_with("SDS_option") + ~ sum(., na.rm = TRUE) ), - ~ sum(., na.rm = TRUE) - ), - dplyr::across( - c( - dplyr::starts_with("sc_"), - -"sc_send_lca", - -"sc_latest_submission", - "HL1_in_FY" = "hh_in_fy", - "NSU" - ), - ~ max_no_inf(.) - ), - dplyr::across( - c( - condition_cols(), - "death_date", - "deceased", - "year", - dplyr::ends_with(c( - "_Cohort", "end_fy", "start_fy" - )), + dplyr::across( + c( + dplyr::starts_with("sc_"), + -"sc_send_lca", + -"sc_latest_submission", + "HL1_in_FY" = "hh_in_fy", + "NSU" + ), + ~ max_no_inf(.) ), - ~ dplyr::first(., na_rm = TRUE) + dplyr::across( + c( + condition_cols(), + "death_date", + "deceased", + "year", + dplyr::ends_with(c( + "_Cohort", "end_fy", "start_fy" + )), + ), + ~ dplyr::first(., na_rm = TRUE) + ) ) ) %>% # change the data format from data.table to data.frame From d89b0aae78b714fa174bab5b90d003f10975a713 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 10:17:49 +0100 Subject: [PATCH 070/200] exclude `hh_in_fy` for now --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index eec9ee2fd..f18bdbc08 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -628,7 +628,7 @@ aggregate_by_chi <- function(episode_file) { dplyr::starts_with("sc_"), -"sc_send_lca", -"sc_latest_submission", - "HL1_in_FY" = "hh_in_fy", + #"HL1_in_FY" = "hh_in_fy", "NSU" ), ~ max_no_inf(.) From 2326c0f4fb36cde1eb0df6e54c79870b77cae95e Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 18 May 2023 09:19:54 +0000 Subject: [PATCH 071/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f18bdbc08..340267561 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -628,7 +628,7 @@ aggregate_by_chi <- function(episode_file) { dplyr::starts_with("sc_"), -"sc_send_lca", -"sc_latest_submission", - #"HL1_in_FY" = "hh_in_fy", + # "HL1_in_FY" = "hh_in_fy", "NSU" ), ~ max_no_inf(.) From 78d2c3643d41b5bec65e88330fd9956c541cb1e6 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 12:54:20 +0100 Subject: [PATCH 072/200] Test - exclude `sc_` vars from aggregate --- R/create_individual_file.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 340267561..3d65cfde9 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -625,9 +625,9 @@ aggregate_by_chi <- function(episode_file) { ), dplyr::across( c( - dplyr::starts_with("sc_"), - -"sc_send_lca", - -"sc_latest_submission", + #dplyr::starts_with("sc_"), + #-"sc_send_lca", + #-"sc_latest_submission", # "HL1_in_FY" = "hh_in_fy", "NSU" ), From 3ac7d269cc2b0847947c795e704cf3122095b896 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 18 May 2023 11:56:41 +0000 Subject: [PATCH 073/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 3d65cfde9..c4ef08742 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -625,7 +625,7 @@ aggregate_by_chi <- function(episode_file) { ), dplyr::across( c( - #dplyr::starts_with("sc_"), + # dplyr::starts_with("sc_"), #-"sc_send_lca", #-"sc_latest_submission", # "HL1_in_FY" = "hh_in_fy", From 141c8808cf7a459aa0dd5ccb3e6d7ff54f4cdaca Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 13:45:49 +0100 Subject: [PATCH 074/200] Exclude for now --- R/create_individual_file.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index c4ef08742..72ae73e1f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -623,16 +623,16 @@ aggregate_by_chi <- function(episode_file) { ), ~ sum(., na.rm = TRUE) ), - dplyr::across( - c( - # dplyr::starts_with("sc_"), - #-"sc_send_lca", - #-"sc_latest_submission", - # "HL1_in_FY" = "hh_in_fy", - "NSU" - ), - ~ max_no_inf(.) - ), + # dplyr::across( + # c( + # # dplyr::starts_with("sc_"), + # #-"sc_send_lca", + # #-"sc_latest_submission", + # # "HL1_in_FY" = "hh_in_fy", + # "NSU" + # ), + # ~ max_no_inf(.) + # ), dplyr::across( c( condition_cols(), From 93fcd437b63c2874d36d7eda9350b732fcdc14d4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 18 May 2023 14:31:00 +0100 Subject: [PATCH 075/200] exclude for now --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 72ae73e1f..b3f79cd3d 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -636,8 +636,8 @@ aggregate_by_chi <- function(episode_file) { dplyr::across( c( condition_cols(), - "death_date", - "deceased", + #"death_date", + #"deceased", "year", dplyr::ends_with(c( "_Cohort", "end_fy", "start_fy" From baf5d1339cdd1ccdc13d3f3ed6c876c6f80ccec8 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 18 May 2023 13:36:16 +0000 Subject: [PATCH 076/200] Style code --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b3f79cd3d..4a25e8222 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -636,8 +636,8 @@ aggregate_by_chi <- function(episode_file) { dplyr::across( c( condition_cols(), - #"death_date", - #"deceased", + # "death_date", + # "deceased", "year", dplyr::ends_with(c( "_Cohort", "end_fy", "start_fy" From 3bf8fb77c9eae035892f5b800bb60428a46df250 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 23 May 2023 14:00:49 +0100 Subject: [PATCH 077/200] automate `check_year_valid` --- R/add_nsu_cohort.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 6fbcf9bc1..bb0d9cc4e 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -11,9 +11,10 @@ add_nsu_cohort <- function(data, year) { year_param <- year - if (check_year_valid("2223", "NSU")) { + if (!check_year_valid(year, "NSU")) { return(data) - } else { + } + # Check that the variables we need are in the data check_variables_exist(data, variables = c( @@ -113,5 +114,4 @@ add_nsu_cohort <- function(data, year) { dplyr::select(-dplyr::contains("_nsu"), -"has_chi") return(return_df) - } } From 3e5a059cbb326e0785a4e2b5d7f89610705827af Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 23 May 2023 14:03:05 +0100 Subject: [PATCH 078/200] Return dummy file path for NSU not valid --- R/get_nsu_paths.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/get_nsu_paths.R b/R/get_nsu_paths.R index c4c430672..2d53f0c84 100644 --- a/R/get_nsu_paths.R +++ b/R/get_nsu_paths.R @@ -10,11 +10,16 @@ #' @family file path functions #' @seealso [get_file_path()] for the generic function. get_nsu_path <- function(year, ...) { - nsu_file_path <- get_file_path( - directory = fs::path(get_slf_dir(), "NSU"), - file_name = stringr::str_glue("All_CHIs_20{year}.parquet"), - ... - ) + + if (!check_year_valid(year, "NSU")) { + return(get_dummy_boxi_extract_path()) + } + + nsu_file_path <- get_file_path( + directory = fs::path(get_slf_dir(), "NSU"), + file_name = stringr::str_glue("All_CHIs_20{year}.parquet"), + ... + ) return(nsu_file_path) } From bfeffc7007688ebb741a15211b2e9bd417925be6 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 23 May 2023 13:13:11 +0000 Subject: [PATCH 079/200] Style code --- R/add_nsu_cohort.R | 190 ++++++++++++++++++++++----------------------- R/get_nsu_paths.R | 11 ++- 2 files changed, 100 insertions(+), 101 deletions(-) diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index bb0d9cc4e..c5a26da12 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -15,103 +15,103 @@ add_nsu_cohort <- function(data, year) { return(data) } - # Check that the variables we need are in the data - check_variables_exist(data, - variables = c( - "year", - "chi", - "recid", - "smrtype", - "postcode", - "gpprac", - "dob", - "gender" - ) + # Check that the variables we need are in the data + check_variables_exist(data, + variables = c( + "year", + "chi", + "recid", + "smrtype", + "postcode", + "gpprac", + "dob", + "gender" ) + ) - matched <- dplyr::full_join(data, - # NSU cohort file - read_file(get_nsu_path(year)) %>% - dplyr::mutate( - dob = as.Date(.data[["dob"]]), - gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) - ), - # Match on by chi - by = "chi", - # Name the incoming variables with "_nsu" - suffix = c("", "_nsu"), - # Keep the chi from both sources - keep = TRUE - ) %>% - # Change the chi from the NSU cohort to a boolean - dplyr::mutate(has_chi = !is_missing(.data[["chi_nsu"]])) - - return_df <- matched %>% - # Get data from non service user lookup if the recid is empty + matched <- dplyr::full_join(data, + # NSU cohort file + read_file(get_nsu_path(year)) %>% dplyr::mutate( - year = year_param, - recid = dplyr::if_else( - is_missing(.data[["recid"]]), - "NSU", - .data[["recid"]] - ), - smrtype = dplyr::if_else( - is_missing(.data[["recid"]]), - "Non-User", - .data[["smrtype"]] - ), - postcode = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["postcode_nsu"]], - .data[["postcode"]] - ), - gpprac = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["gpprac_nsu"]], - .data[["gpprac"]] - ), - dob = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["dob_nsu"]], - .data[["dob"]] - ), - gender = dplyr::if_else( - is_missing(.data[["recid"]]), - .data[["gender_nsu"]], - .data[["gender"]] - ) - ) %>% - # If the data has come from the NSU cohort, - # use that data for the below variables - dplyr::mutate( - postcode = dplyr::if_else( - is_missing(.data[["postcode"]]) & .data[["has_chi"]], - .data[["postcode_nsu"]], - .data[["postcode"]] - ), - gpprac = dplyr::if_else( - is.na(.data[["gpprac"]]) & .data[["has_chi"]], - .data[["gpprac_nsu"]], - .data[["gpprac"]] - ), - dob = dplyr::if_else( - is.na(.data[["dob"]]) & .data[["has_chi"]], - .data[["dob_nsu"]], - .data[["dob"]] - ), - gender = dplyr::if_else( - is.na(.data[["gender"]]) & .data[["has_chi"]], - .data[["gender_nsu"]], - .data[["gender"]] - ), - chi = dplyr::if_else( - is_missing(.data[["chi"]]) & .data[["has_chi"]], - .data[["chi_nsu"]], - .data[["chi"]] - ) - ) %>% - # Remove the additional columns - dplyr::select(-dplyr::contains("_nsu"), -"has_chi") + dob = as.Date(.data[["dob"]]), + gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]) + ), + # Match on by chi + by = "chi", + # Name the incoming variables with "_nsu" + suffix = c("", "_nsu"), + # Keep the chi from both sources + keep = TRUE + ) %>% + # Change the chi from the NSU cohort to a boolean + dplyr::mutate(has_chi = !is_missing(.data[["chi_nsu"]])) + + return_df <- matched %>% + # Get data from non service user lookup if the recid is empty + dplyr::mutate( + year = year_param, + recid = dplyr::if_else( + is_missing(.data[["recid"]]), + "NSU", + .data[["recid"]] + ), + smrtype = dplyr::if_else( + is_missing(.data[["recid"]]), + "Non-User", + .data[["smrtype"]] + ), + postcode = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["postcode_nsu"]], + .data[["postcode"]] + ), + gpprac = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["gpprac_nsu"]], + .data[["gpprac"]] + ), + dob = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["dob_nsu"]], + .data[["dob"]] + ), + gender = dplyr::if_else( + is_missing(.data[["recid"]]), + .data[["gender_nsu"]], + .data[["gender"]] + ) + ) %>% + # If the data has come from the NSU cohort, + # use that data for the below variables + dplyr::mutate( + postcode = dplyr::if_else( + is_missing(.data[["postcode"]]) & .data[["has_chi"]], + .data[["postcode_nsu"]], + .data[["postcode"]] + ), + gpprac = dplyr::if_else( + is.na(.data[["gpprac"]]) & .data[["has_chi"]], + .data[["gpprac_nsu"]], + .data[["gpprac"]] + ), + dob = dplyr::if_else( + is.na(.data[["dob"]]) & .data[["has_chi"]], + .data[["dob_nsu"]], + .data[["dob"]] + ), + gender = dplyr::if_else( + is.na(.data[["gender"]]) & .data[["has_chi"]], + .data[["gender_nsu"]], + .data[["gender"]] + ), + chi = dplyr::if_else( + is_missing(.data[["chi"]]) & .data[["has_chi"]], + .data[["chi_nsu"]], + .data[["chi"]] + ) + ) %>% + # Remove the additional columns + dplyr::select(-dplyr::contains("_nsu"), -"has_chi") - return(return_df) + return(return_df) } diff --git a/R/get_nsu_paths.R b/R/get_nsu_paths.R index 2d53f0c84..107a92168 100644 --- a/R/get_nsu_paths.R +++ b/R/get_nsu_paths.R @@ -10,16 +10,15 @@ #' @family file path functions #' @seealso [get_file_path()] for the generic function. get_nsu_path <- function(year, ...) { - if (!check_year_valid(year, "NSU")) { return(get_dummy_boxi_extract_path()) } - nsu_file_path <- get_file_path( - directory = fs::path(get_slf_dir(), "NSU"), - file_name = stringr::str_glue("All_CHIs_20{year}.parquet"), - ... - ) + nsu_file_path <- get_file_path( + directory = fs::path(get_slf_dir(), "NSU"), + file_name = stringr::str_glue("All_CHIs_20{year}.parquet"), + ... + ) return(nsu_file_path) } From 4aacf7a4ab1faa2a3b15fd8ccc657c2618eb234b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 May 2023 12:54:06 +0100 Subject: [PATCH 080/200] Fix brackets in aggregate --- R/create_individual_file.R | 61 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 4a25e8222..064f5c529 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -576,12 +576,13 @@ aggregate_by_chi <- function(episode_file) { record_keydate2, keytime2 ) %>% - dplyr::group_by("chi") %>% + dplyr::group_by(.data$chi) %>% dplyr::summarise( gender = mean(gender), - dplyr::across(dplyr::ends_with(c( - "postcode", "DoB", "gpprac" - )), ~ dplyr::last(., na_rm = TRUE)), + dplyr::across( + dplyr::ends_with(c("postcode", "DoB", "gpprac")), + ~ dplyr::last(., na_rm = TRUE) + ), dplyr::across( c( "ch_cis_episodes" = "ch_chi_cis", @@ -614,37 +615,37 @@ aggregate_by_chi <- function(episode_file) { "time", "assessment", "other", - "DN", + # "DN", "NHS24", "PCC", "_dnas" - ), - dplyr::starts_with("SDS_option") + ) ), - ~ sum(., na.rm = TRUE) + dplyr::starts_with("SDS_option") ), - # dplyr::across( - # c( - # # dplyr::starts_with("sc_"), - # #-"sc_send_lca", - # #-"sc_latest_submission", - # # "HL1_in_FY" = "hh_in_fy", - # "NSU" - # ), - # ~ max_no_inf(.) - # ), - dplyr::across( - c( - condition_cols(), - # "death_date", - # "deceased", - "year", - dplyr::ends_with(c( - "_Cohort", "end_fy", "start_fy" - )), - ), - ~ dplyr::first(., na_rm = TRUE) - ) + ~ sum(., na.rm = TRUE) + ), + # dplyr::across( + # c( + # # dplyr::starts_with("sc_"), + # #-"sc_send_lca", + # #-"sc_latest_submission", + # # "HL1_in_FY" = "hh_in_fy", + # "NSU" + # ), + # ~ max_no_inf(.) + # ), + dplyr::across( + c( + condition_cols(), + # "death_date", + # "deceased", + "year", + dplyr::ends_with(c( + "_Cohort", "end_fy", "start_fy" + )), + ), + ~ dplyr::first(., na_rm = TRUE) ) ) %>% # change the data format from data.table to data.frame From 5bf6a4b124e69517944d8dee831a7da2d7bc767c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 May 2023 12:54:44 +0100 Subject: [PATCH 081/200] TEMP - exclude variables --- R/create_individual_file.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 064f5c529..ffed97b20 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -90,8 +90,8 @@ add_all_columns <- function(episode_file) { add_ae_columns("AE", .data$recid == "AE2") %>% add_pis_columns("PIS", .data$recid == "PIS") %>% add_ooh_columns("OoH", .data$recid == "OoH") %>% - add_dn_columns("DN", .data$recid == "DN") %>% - add_cmh_columns("CMH", .data$recid == "CMH") %>% + # add_dn_columns("DN", .data$recid == "DN") %>% + # add_cmh_columns("CMH", .data$recid == "CMH") %>% # add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% add_nrs_columns("NRS", .data$recid == "NRS") %>% @@ -715,8 +715,8 @@ drop_cols <- function(individual_file) { -"dob", -"postcode", -"gpprac", - -"no_paid_items", - -"total_no_dn_contacts" + -"no_paid_items"#, + #-"total_no_dn_contacts" ) } From e045cccc7a3c5718b321a4348b064f9759504806 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 May 2023 12:55:19 +0100 Subject: [PATCH 082/200] Use `phsmethods::sex_from_chi` --- R/create_individual_file.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ffed97b20..314f2abf3 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -741,12 +741,7 @@ clean_up_gender <- function(individual_file) { dplyr::mutate( gender = dplyr::case_when( .data$gender != 1.5 ~ round(.data$gender), - as.numeric(substr(.data$chi, 9, 9)) %% 2 == 1 ~ 1, - TRUE ~ 2 - ), - gender = dplyr::case_when( - .data$gender == 1 ~ "Male", - .data$gender == 2 ~ "Female" + .default = phsmethods::sex_from_chi(.data$chi, chi_check = FALSE) ) ) } From 173ae02055d34c380c6dc3b772b667369ddbc2ad Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 24 May 2023 11:58:26 +0000 Subject: [PATCH 083/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 314f2abf3..d1d787683 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -715,7 +715,7 @@ drop_cols <- function(individual_file) { -"dob", -"postcode", -"gpprac", - -"no_paid_items"#, + -"no_paid_items" # , #-"total_no_dn_contacts" ) } From e5332ee71a0d77997f2fac2c9e007c1819e5cdab Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 May 2023 18:07:46 +0100 Subject: [PATCH 084/200] Add ungroup() --- R/create_individual_file.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d1d787683..cf481065b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -648,6 +648,7 @@ aggregate_by_chi <- function(episode_file) { ~ dplyr::first(., na_rm = TRUE) ) ) %>% + ungroup() %>% # change the data format from data.table to data.frame tibble::as_tibble() } From cec63a3498f418c5439180e3b45b21aa83931bc4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 May 2023 18:09:30 +0100 Subject: [PATCH 085/200] lowercase dob --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index cf481065b..fb5813b14 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -699,7 +699,7 @@ clean_individual_file <- function(individual_file) { drop_cols() %>% clean_up_gender() %>% dplyr::mutate( - age = compute_mid_year_age(year, .data$DoB) + age = compute_mid_year_age(year, .data$dob) ) } From 8a652dfe96249c026ac30d31378dab45a7da9fa1 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 25 May 2023 11:06:44 +0100 Subject: [PATCH 086/200] Remove as.data.table --- R/create_individual_file.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index fb5813b14..fe440c723 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -567,8 +567,6 @@ aggregate_by_chi <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") episode_file %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% dplyr::arrange( chi, record_keydate1, @@ -648,9 +646,7 @@ aggregate_by_chi <- function(episode_file) { ~ dplyr::first(., na_rm = TRUE) ) ) %>% - ungroup() %>% - # change the data format from data.table to data.frame - tibble::as_tibble() + dplyr::ungroup() } #' Condition columns From fc979d9642aa33a751362c738c0b78a35ace22da Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 31 May 2023 16:52:48 +0100 Subject: [PATCH 087/200] rewrite aggregate_by_chi with data.table --- R/aggregate_by_chi_zihao.R | 125 +++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 R/aggregate_by_chi_zihao.R diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R new file mode 100644 index 000000000..85b87e413 --- /dev/null +++ b/R/aggregate_by_chi_zihao.R @@ -0,0 +1,125 @@ +library(data.table) + +aggregate_by_chi_zihao <- function(episode_file) { + cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + + data.table::setDT(episode_file) # Convert to data.table + + # Sort the data within each chunk + data.table::setkeyv(episode_file, c("chi", "record_keydate1", "keytime1", "record_keydate2", "keytime2")) + + data.table::setnames( + episode_file, + c("ch_chi_cis", "cij_marker", "ooh_case_id" + #,"hh_in_fy" + ), + c("ch_cis_episodes", "cij_total", "ooh_cases" + #,"HL1_in_FY" + ) + ) + + # Initialize an empty data.table for the aggregated results + aggregated_data <- data.table::data.table() + + # Process the data in chunks + chunk_size <- min(nrow(episode_file), 1e7) # Adjust the chunk size as per your system's memory capacity + n_chunks <- nrow(episode_file) %/% chunk_size + + + # colums specification + cols2 <- names(episode_file)[grepl("postcode$|DoB$|gpprac$", + names(episode_file), + ignore.case = TRUE)] + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "CIJ_el", + "CIJ_non_el", + "CIJ_mat", + # "cij_delay", + "ooh_cases", + "preventable_admissions" + ) + cols4 <- names(episode_file)[grepl( + paste( + "episodes$", + "beddays$", + "cost$", + "attendances$", + "attend$", + # "contacts$", + "hours$", + "alarms$", + "telecare$", + "paid_items$", + "advice$", + "homeV$", + "time$", + "assessment$", + "other$", + # "DN$", + "NHS24$", + "PCC$", + "_dnas$", + "^SDS_option", + sep = "|" + ), + names(episode_file), + ignore.case = TRUE + )] + cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] + # cols5 <- names(episode_file)[grepl("^sc|HL1_in_FY|NSU", names(episode_file), ignore.case = TRUE)] + # cols5 <- cols5[!(cols5 %in% c("sc_send_lca", "sc_latest_submission"))] + cols6 <- c(condition_cols(), + # "death_date", + # "deceased", + "year", + names(episode_file)[grepl("_Cohort$|end_fy$|start_fy$", + names(episode_file), + ignore.case = TRUE)]) + + for (i in 1:n_chunks) { + start <- (i - 1) * chunk_size + 1 + end <- i * chunk_size + # Subset the data to the current chunk + chunk <- episode_file[start:end] + + # compute + chunk_cols1 <- chunk[, + .(gender = mean(gender)), + by = chi] + chunk_cols2 <- chunk[, + .SD[.N], + # .SDcols = patterns("postcode$|DoB$|gpprac$"), + .SDcols = cols2, + by = chi] + chunk_cols3 <- chunk[, + lapply(.SD, function(x) data.table::uniqueN(x, na.rm = TRUE)), + .SDcols = cols3, + by = chi] + chunk_cols4 <- chunk[, + lapply(.SD, function(x) sum(x, na.rm = TRUE)), + .SDcols = cols4, + by = chi] + # chunk_cols5 <- chunk[, + # lapply(.SD, function(x) max(x, na.rm = TRUE)), + # .SDcols = cols5, + # by = chi] + chunk_cols6 <- chunk[, + # .SD[1] + lapply(.SD, function(x) x[!is.na(x)][1]), + .SDcols = cols6, + by = chi] + chunk_agg <- cbind(chunk_cols1, + chunk_cols2[, chi := NULL], + chunk_cols3[, chi := NULL], + chunk_cols4[, chi := NULL], + # chunk_cols5[, chi := NULL], + chunk_cols6[, chi := NULL]) + + # Append the aggregated chunk to the overall result + aggregated_data <- data.table::rbindlist(list(aggregated_data, chunk_agg)) + } + aggregated_data <- dplyr::as_tibble(aggregated_data) + return(aggregated_data) +} From 7c63f5731946a538201fb75378ac1d03c0e60212 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 31 May 2023 15:59:26 +0000 Subject: [PATCH 088/200] Style code --- R/aggregate_by_chi_zihao.R | 87 ++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 85b87e413..fb8a6b48a 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -3,18 +3,20 @@ library(data.table) aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - data.table::setDT(episode_file) # Convert to data.table + data.table::setDT(episode_file) # Convert to data.table # Sort the data within each chunk data.table::setkeyv(episode_file, c("chi", "record_keydate1", "keytime1", "record_keydate2", "keytime2")) data.table::setnames( episode_file, - c("ch_chi_cis", "cij_marker", "ooh_case_id" - #,"hh_in_fy" + c( + "ch_chi_cis", "cij_marker", "ooh_case_id" + # ,"hh_in_fy" ), - c("ch_cis_episodes", "cij_total", "ooh_cases" - #,"HL1_in_FY" + c( + "ch_cis_episodes", "cij_total", "ooh_cases" + # ,"HL1_in_FY" ) ) @@ -22,14 +24,15 @@ aggregate_by_chi_zihao <- function(episode_file) { aggregated_data <- data.table::data.table() # Process the data in chunks - chunk_size <- min(nrow(episode_file), 1e7) # Adjust the chunk size as per your system's memory capacity + chunk_size <- min(nrow(episode_file), 1e7) # Adjust the chunk size as per your system's memory capacity n_chunks <- nrow(episode_file) %/% chunk_size # colums specification cols2 <- names(episode_file)[grepl("postcode$|DoB$|gpprac$", - names(episode_file), - ignore.case = TRUE)] + names(episode_file), + ignore.case = TRUE + )] cols3 <- c( "ch_cis_episodes", "cij_total", @@ -70,13 +73,16 @@ aggregate_by_chi_zihao <- function(episode_file) { cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # cols5 <- names(episode_file)[grepl("^sc|HL1_in_FY|NSU", names(episode_file), ignore.case = TRUE)] # cols5 <- cols5[!(cols5 %in% c("sc_send_lca", "sc_latest_submission"))] - cols6 <- c(condition_cols(), - # "death_date", - # "deceased", - "year", - names(episode_file)[grepl("_Cohort$|end_fy$|start_fy$", - names(episode_file), - ignore.case = TRUE)]) + cols6 <- c( + condition_cols(), + # "death_date", + # "deceased", + "year", + names(episode_file)[grepl("_Cohort$|end_fy$|start_fy$", + names(episode_file), + ignore.case = TRUE + )] + ) for (i in 1:n_chunks) { start <- (i - 1) * chunk_size + 1 @@ -86,36 +92,43 @@ aggregate_by_chi_zihao <- function(episode_file) { # compute chunk_cols1 <- chunk[, - .(gender = mean(gender)), - by = chi] + .(gender = mean(gender)), + by = chi + ] chunk_cols2 <- chunk[, - .SD[.N], - # .SDcols = patterns("postcode$|DoB$|gpprac$"), - .SDcols = cols2, - by = chi] + .SD[.N], + # .SDcols = patterns("postcode$|DoB$|gpprac$"), + .SDcols = cols2, + by = chi + ] chunk_cols3 <- chunk[, - lapply(.SD, function(x) data.table::uniqueN(x, na.rm = TRUE)), - .SDcols = cols3, - by = chi] + lapply(.SD, function(x) data.table::uniqueN(x, na.rm = TRUE)), + .SDcols = cols3, + by = chi + ] chunk_cols4 <- chunk[, - lapply(.SD, function(x) sum(x, na.rm = TRUE)), - .SDcols = cols4, - by = chi] + lapply(.SD, function(x) sum(x, na.rm = TRUE)), + .SDcols = cols4, + by = chi + ] # chunk_cols5 <- chunk[, # lapply(.SD, function(x) max(x, na.rm = TRUE)), # .SDcols = cols5, # by = chi] chunk_cols6 <- chunk[, - # .SD[1] - lapply(.SD, function(x) x[!is.na(x)][1]), - .SDcols = cols6, - by = chi] - chunk_agg <- cbind(chunk_cols1, - chunk_cols2[, chi := NULL], - chunk_cols3[, chi := NULL], - chunk_cols4[, chi := NULL], - # chunk_cols5[, chi := NULL], - chunk_cols6[, chi := NULL]) + # .SD[1] + lapply(.SD, function(x) x[!is.na(x)][1]), + .SDcols = cols6, + by = chi + ] + chunk_agg <- cbind( + chunk_cols1, + chunk_cols2[, chi := NULL], + chunk_cols3[, chi := NULL], + chunk_cols4[, chi := NULL], + # chunk_cols5[, chi := NULL], + chunk_cols6[, chi := NULL] + ) # Append the aggregated chunk to the overall result aggregated_data <- data.table::rbindlist(list(aggregated_data, chunk_agg)) From 70f0891c0bf64f3fbf58f266230d2fd11785f4aa Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 31 May 2023 17:34:44 +0100 Subject: [PATCH 089/200] minor changes --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index fe440c723..4596460dd 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -709,7 +709,7 @@ drop_cols <- function(individual_file) { dplyr::select( -month_cols(), -"ch_no_cost", - -"dob", + # -"dob", -"postcode", -"gpprac", -"no_paid_items" # , From abda3d57ddf8da7e44d06c7aad0dfd07cbf69bdf Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Jun 2023 12:47:47 +0100 Subject: [PATCH 090/200] Use the updated function --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 4596460dd..05df574e0 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -11,7 +11,7 @@ create_individual_file <- function(episode_file) { aggregate_ch_episodes() %>% clean_up_ch() %>% recode_gender() %>% - aggregate_by_chi() %>% + aggregate_by_chi_zihao() %>% clean_individual_file() } From 38be4d2de94bb2e6c1659c788629cb57ae8a9c11 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 6 Jun 2023 11:22:25 +0100 Subject: [PATCH 091/200] to properly import data.table --- NAMESPACE | 3 +++ R/aggregate_by_chi_zihao.R | 14 +++++++++++--- man/aggregate_by_chi_zihao.Rd | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 man/aggregate_by_chi_zihao.Rd diff --git a/NAMESPACE b/NAMESPACE index 5610cf6f2..28cccd6aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,9 @@ export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +importFrom(data.table,":=") +importFrom(data.table,.N) +importFrom(data.table,.SD) importFrom(magrittr,"%>%") importFrom(readr,col_character) importFrom(readr,col_date) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index fb8a6b48a..069aa53a4 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -1,5 +1,13 @@ -library(data.table) - +#' Aggregate by CHI +#' +#' @description Aggregate episode file by CHI to convert into +#' individual file. +#' +#' @importFrom data.table := +#' @importFrom data.table .N +#' @importFrom data.table .SD +#' +#' @inheritParams create_individual_file aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") @@ -24,7 +32,7 @@ aggregate_by_chi_zihao <- function(episode_file) { aggregated_data <- data.table::data.table() # Process the data in chunks - chunk_size <- min(nrow(episode_file), 1e7) # Adjust the chunk size as per your system's memory capacity + chunk_size <- min(nrow(episode_file), 5e7) # Adjust the chunk size as per your system's memory capacity n_chunks <- nrow(episode_file) %/% chunk_size diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd new file mode 100644 index 000000000..3d4961e19 --- /dev/null +++ b/man/aggregate_by_chi_zihao.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{aggregate_by_chi_zihao} +\alias{aggregate_by_chi_zihao} +\title{Aggregate by CHI} +\usage{ +aggregate_by_chi_zihao(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate episode file by CHI to convert into +individual file. +} From 6368535136ce2133820fb48e3d0760a9db8344fd Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 6 Jun 2023 12:23:25 +0100 Subject: [PATCH 092/200] remove redundant columns dob postcode and gpprac --- NAMESPACE | 1 - R/aggregate_by_chi_zihao.R | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 28cccd6aa..395814633 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,7 +158,6 @@ export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) -importFrom(data.table,":=") importFrom(data.table,.N) importFrom(data.table,.SD) importFrom(magrittr,"%>%") diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 069aa53a4..b461484cb 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -3,7 +3,6 @@ #' @description Aggregate episode file by CHI to convert into #' individual file. #' -#' @importFrom data.table := #' @importFrom data.table .N #' @importFrom data.table .SD #' @@ -11,6 +10,11 @@ aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + episode_file <- episode_file %>% + dplyr::select(-dplyr::ends_with("_gpprac") | "most_recent_gpprac") %>% + dplyr::select(-dplyr::ends_with("_postcode") | "most_recent_postcode") %>% + dplyr::select(-dplyr::ends_with("_DoB") | "dob") + data.table::setDT(episode_file) # Convert to data.table # Sort the data within each chunk From 2d7001962d7001abe38b220078ef6dde1e7ecde4 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 6 Jun 2023 16:07:48 +0100 Subject: [PATCH 093/200] minor changes to remove redundant postcode gpprac columns --- R/aggregate_by_chi_zihao.R | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index b461484cb..656f25885 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -11,14 +11,28 @@ aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") episode_file <- episode_file %>% - dplyr::select(-dplyr::ends_with("_gpprac") | "most_recent_gpprac") %>% - dplyr::select(-dplyr::ends_with("_postcode") | "most_recent_postcode") %>% - dplyr::select(-dplyr::ends_with("_DoB") | "dob") + dplyr::select(-c(postcode, gpprac)) %>% + dplyr::rename("gpprac" = "most_recent_gpprac", + "postcode" = "most_recent_postcode") %>% + dplyr::select(-c( + dplyr::ends_with("_gpprac"), + dplyr::ends_with("_postcode"), + dplyr::ends_with("_DoB") + )) data.table::setDT(episode_file) # Convert to data.table # Sort the data within each chunk - data.table::setkeyv(episode_file, c("chi", "record_keydate1", "keytime1", "record_keydate2", "keytime2")) + data.table::setkeyv( + episode_file, + c( + "chi", + "record_keydate1", + "keytime1", + "record_keydate2", + "keytime2" + ) + ) data.table::setnames( episode_file, @@ -36,7 +50,8 @@ aggregate_by_chi_zihao <- function(episode_file) { aggregated_data <- data.table::data.table() # Process the data in chunks - chunk_size <- min(nrow(episode_file), 5e7) # Adjust the chunk size as per your system's memory capacity + chunk_size <- min(nrow(episode_file), 5e7) + # Adjust the chunk size as per your system's memory capacity n_chunks <- nrow(episode_file) %/% chunk_size From 9f23cff13b75b1201310ffd7ee85a32479188b2b Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 6 Jun 2023 15:10:03 +0000 Subject: [PATCH 094/200] Style code --- R/aggregate_by_chi_zihao.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 656f25885..17641d1fb 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -12,8 +12,10 @@ aggregate_by_chi_zihao <- function(episode_file) { episode_file <- episode_file %>% dplyr::select(-c(postcode, gpprac)) %>% - dplyr::rename("gpprac" = "most_recent_gpprac", - "postcode" = "most_recent_postcode") %>% + dplyr::rename( + "gpprac" = "most_recent_gpprac", + "postcode" = "most_recent_postcode" + ) %>% dplyr::select(-c( dplyr::ends_with("_gpprac"), dplyr::ends_with("_postcode"), From b361616cdfa32683a8fd1421c0b6a9b58329b261 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 6 Jun 2023 16:11:33 +0100 Subject: [PATCH 095/200] rename columns with small letters --- R/aggregate_by_chi_zihao.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 656f25885..b5f4fd549 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -161,5 +161,6 @@ aggregate_by_chi_zihao <- function(episode_file) { aggregated_data <- data.table::rbindlist(list(aggregated_data, chunk_agg)) } aggregated_data <- dplyr::as_tibble(aggregated_data) + names(aggregated_data) = tolower(names(aggregated_data)) return(aggregated_data) } From 550adab299b13a28901192ac99dc72becbabceb6 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 6 Jun 2023 15:17:09 +0000 Subject: [PATCH 096/200] Style code --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index a6e115260..8cd443a9b 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -163,6 +163,6 @@ aggregate_by_chi_zihao <- function(episode_file) { aggregated_data <- data.table::rbindlist(list(aggregated_data, chunk_agg)) } aggregated_data <- dplyr::as_tibble(aggregated_data) - names(aggregated_data) = tolower(names(aggregated_data)) + names(aggregated_data) <- tolower(names(aggregated_data)) return(aggregated_data) } From fee7d8bb0b1db8e4b62e27dc6a5fd9b6273a258d Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 7 Jun 2023 11:30:34 +0100 Subject: [PATCH 097/200] newaggregate_ch_episodes --- R/aggregate_by_chi_zihao.R | 27 +++++++++++++++++++++++++++ R/create_individual_file.R | 6 +++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 8cd443a9b..4143b28c1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -166,3 +166,30 @@ aggregate_by_chi_zihao <- function(episode_file) { names(aggregated_data) <- tolower(names(aggregated_data)) return(aggregated_data) } + + +#' Aggregate CIS episodes +#' +#' @description Aggregate CH variables by CHI and CIS. +#' +#' +#' @inheritParams create_individual_file +aggregate_ch_episodes_zihao <- function(episode_file) { + cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") + + # Convert to data.table + data.table::setDT(episode_file) + + # Perform grouping and aggregation + episode_file <- episode_file[, `:=`( + ch_no_cost = max(ch_no_cost), + ch_ep_start = min(record_keydate1), + ch_ep_end = max(ch_ep_end), + ch_cost_per_day = mean(ch_cost_per_day) + ), by = .(chi, ch_chi_cis)] + + # Convert back to tibble if needed + episode_file <- tibble::as_tibble(episode_file) + + return(episode_file) +} diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 05df574e0..04a8be917 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -8,7 +8,7 @@ create_individual_file <- function(episode_file) { remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% - aggregate_ch_episodes() %>% + aggregate_ch_episodes_zihao() %>% clean_up_ch() %>% recode_gender() %>% aggregate_by_chi_zihao() %>% @@ -710,8 +710,8 @@ drop_cols <- function(individual_file) { -month_cols(), -"ch_no_cost", # -"dob", - -"postcode", - -"gpprac", + # -"postcode", + # -"gpprac", -"no_paid_items" # , #-"total_no_dn_contacts" ) From a8f4ae27acede3e6e1ed1a509333d0e9e74ff3ad Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 7 Jun 2023 10:36:21 +0000 Subject: [PATCH 098/200] Update documentation --- man/aggregate_ch_episodes_zihao.Rd | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 man/aggregate_ch_episodes_zihao.Rd diff --git a/man/aggregate_ch_episodes_zihao.Rd b/man/aggregate_ch_episodes_zihao.Rd new file mode 100644 index 000000000..808262654 --- /dev/null +++ b/man/aggregate_ch_episodes_zihao.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{aggregate_ch_episodes_zihao} +\alias{aggregate_ch_episodes_zihao} +\title{Aggregate CIS episodes} +\usage{ +aggregate_ch_episodes_zihao(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} +} +\description{ +Aggregate CH variables by CHI and CIS. +} From cd8a08bbd0e242d8e2dc35a38b39f6496aa6a922 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 13 Jun 2023 11:43:01 +0100 Subject: [PATCH 099/200] add functions to replace regular expressions to select column/variables --- R/aggregate_by_chi_zihao.R | 271 ++++++++++++++++++++----------------- 1 file changed, 144 insertions(+), 127 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 4143b28c1..280f1aa0c 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -12,16 +12,16 @@ aggregate_by_chi_zihao <- function(episode_file) { episode_file <- episode_file %>% dplyr::select(-c(postcode, gpprac)) %>% - dplyr::rename( - "gpprac" = "most_recent_gpprac", - "postcode" = "most_recent_postcode" - ) %>% + dplyr::rename("gpprac" = "most_recent_gpprac", + "postcode" = "most_recent_postcode") %>% dplyr::select(-c( dplyr::ends_with("_gpprac"), dplyr::ends_with("_postcode"), dplyr::ends_with("_DoB") )) + names(episode_file) <- tolower(names(episode_file)) + data.table::setDT(episode_file) # Convert to data.table # Sort the data within each chunk @@ -38,135 +38,152 @@ aggregate_by_chi_zihao <- function(episode_file) { data.table::setnames( episode_file, - c( - "ch_chi_cis", "cij_marker", "ooh_case_id" + c("ch_chi_cis", "cij_marker", "ooh_case_id" # ,"hh_in_fy" - ), - c( - "ch_cis_episodes", "cij_total", "ooh_cases" - # ,"HL1_in_FY" - ) - ) + ), + c("ch_cis_episodes", "cij_total", "ooh_cases" + # ,"hl1_in_fy" + ) + ) + + # Initialize an empty data.table for the aggregated results + aggregated_data <- data.table::data.table() + + # Process the data in chunks + chunk_size <- min(nrow(episode_file), 5e7) + # Adjust the chunk size as per your system's memory capacity + n_chunks <- nrow(episode_file) %/% chunk_size + + + # colums specification + # columns to select last + cols2 <- vars_end_with(episode_file, + c("postcode", "dob", "ggprac")) + # columns to select last unique rows + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "CIJ_el", + "CIJ_non_el", + "CIJ_mat", + # "cij_delay", + "ooh_cases", + "preventable_admissions" + ) + # columns to sum up + cols4 <- c(vars_end_with( + episode_file, + c( + "episodes", + "beddays", + "cost", + "attendances", + "attend", + # "contacts", + "hours", + "alarms", + "telecare", + "paid_items", + "advice", + "homev", + "time", + "assessment", + "other", + # "dn", + "nhs24", + "pcc", + "_dnas" + ) + ), + vars_start_with(episode_file, + "sds_option")) + cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] + # # columns to select maximum + # cols5 <- vars_contain(episode_file, "nsu") + # columns to select first row + cols6 <- c(condition_cols(), + # "death_date", + # "deceased", + "year", + vars_end_with(episode_file, + c("_cohort", "end_fy", "start_fy"))) + + for (i in 1:n_chunks) { + start <- (i - 1) * chunk_size + 1 + end <- i * chunk_size + # Subset the data to the current chunk + chunk <- episode_file[start:end] + + # compute + chunk_cols1 <- chunk[, + .(gender = mean(gender)), + by = chi] + chunk_cols2 <- chunk[, + .SD[.N], + .SDcols = cols2, + by = chi] + chunk_cols3 <- chunk[, + lapply(.SD, function(x) + data.table::uniqueN(x, na.rm = TRUE)), + .SDcols = cols3, + by = chi] + chunk_cols4 <- chunk[, + lapply(.SD, function(x) + sum(x, na.rm = TRUE)), + .SDcols = cols4, + by = chi] + # chunk_cols5 <- chunk[, + # lapply(.SD, function(x) max(x, na.rm = TRUE)), + # .SDcols = cols5, + # by = chi] + chunk_cols6 <- chunk[, + lapply(.SD, function(x) + x[!is.na(x)][1]), + .SDcols = cols6, + by = chi] + chunk_agg <- dplyr::bind_cols(chunk_cols1, + chunk_cols2[, chi := NULL], + chunk_cols3[, chi := NULL], + chunk_cols4[, chi := NULL], + # chunk_cols5[, chi := NULL], + chunk_cols6[, chi := NULL]) + + # Append the aggregated chunk to the overall result + aggregated_data <- + data.table::rbindlist(list(aggregated_data, chunk_agg)) + } + aggregated_data <- dplyr::as_tibble(aggregated_data) + + return(aggregated_data) +} - # Initialize an empty data.table for the aggregated results - aggregated_data <- data.table::data.table() - - # Process the data in chunks - chunk_size <- min(nrow(episode_file), 5e7) - # Adjust the chunk size as per your system's memory capacity - n_chunks <- nrow(episode_file) %/% chunk_size - - - # colums specification - cols2 <- names(episode_file)[grepl("postcode$|DoB$|gpprac$", - names(episode_file), - ignore.case = TRUE - )] - cols3 <- c( - "ch_cis_episodes", - "cij_total", - "CIJ_el", - "CIJ_non_el", - "CIJ_mat", - # "cij_delay", - "ooh_cases", - "preventable_admissions" - ) - cols4 <- names(episode_file)[grepl( - paste( - "episodes$", - "beddays$", - "cost$", - "attendances$", - "attend$", - # "contacts$", - "hours$", - "alarms$", - "telecare$", - "paid_items$", - "advice$", - "homeV$", - "time$", - "assessment$", - "other$", - # "DN$", - "NHS24$", - "PCC$", - "_dnas$", - "^SDS_option", - sep = "|" - ), - names(episode_file), - ignore.case = TRUE - )] - cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] - # cols5 <- names(episode_file)[grepl("^sc|HL1_in_FY|NSU", names(episode_file), ignore.case = TRUE)] - # cols5 <- cols5[!(cols5 %in% c("sc_send_lca", "sc_latest_submission"))] - cols6 <- c( - condition_cols(), - # "death_date", - # "deceased", - "year", - names(episode_file)[grepl("_Cohort$|end_fy$|start_fy$", - names(episode_file), - ignore.case = TRUE - )] - ) - for (i in 1:n_chunks) { - start <- (i - 1) * chunk_size + 1 - end <- i * chunk_size - # Subset the data to the current chunk - chunk <- episode_file[start:end] - - # compute - chunk_cols1 <- chunk[, - .(gender = mean(gender)), - by = chi - ] - chunk_cols2 <- chunk[, - .SD[.N], - # .SDcols = patterns("postcode$|DoB$|gpprac$"), - .SDcols = cols2, - by = chi - ] - chunk_cols3 <- chunk[, - lapply(.SD, function(x) data.table::uniqueN(x, na.rm = TRUE)), - .SDcols = cols3, - by = chi - ] - chunk_cols4 <- chunk[, - lapply(.SD, function(x) sum(x, na.rm = TRUE)), - .SDcols = cols4, - by = chi - ] - # chunk_cols5 <- chunk[, - # lapply(.SD, function(x) max(x, na.rm = TRUE)), - # .SDcols = cols5, - # by = chi] - chunk_cols6 <- chunk[, - # .SD[1] - lapply(.SD, function(x) x[!is.na(x)][1]), - .SDcols = cols6, - by = chi - ] - chunk_agg <- cbind( - chunk_cols1, - chunk_cols2[, chi := NULL], - chunk_cols3[, chi := NULL], - chunk_cols4[, chi := NULL], - # chunk_cols5[, chi := NULL], - chunk_cols6[, chi := NULL] - ) +#' select columns ending with some patterns +#' @describeIn select columns based on patterns +#' +vars_end_with <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_ends(names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case))] +} - # Append the aggregated chunk to the overall result - aggregated_data <- data.table::rbindlist(list(aggregated_data, chunk_agg)) - } - aggregated_data <- dplyr::as_tibble(aggregated_data) - names(aggregated_data) <- tolower(names(aggregated_data)) - return(aggregated_data) +#' select columns starting with some patterns +#' @describeIn select columns based on patterns +#' +vars_start_with <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_starts(names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case))] } +#' select columns contains some characters +#' @describeIn select columns based on patterns +#' +vars_contain <- function(data, vars, ignore_case = FALSE) { + names(data)[stringr::str_detect(names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case))] +} #' Aggregate CIS episodes #' From e03b02df7b7568061414910a8d5842697b967027 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 13 Jun 2023 11:03:25 +0000 Subject: [PATCH 100/200] Update documentation --- man/select.Rd | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 man/select.Rd diff --git a/man/select.Rd b/man/select.Rd new file mode 100644 index 000000000..435096d9a --- /dev/null +++ b/man/select.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_by_chi_zihao.R +\name{vars_end_with} +\alias{vars_end_with} +\alias{vars_start_with} +\alias{vars_contain} +\title{select columns ending with some patterns} +\usage{ +vars_end_with(data, vars, ignore_case = FALSE) + +vars_start_with(data, vars, ignore_case = FALSE) + +vars_contain(data, vars, ignore_case = FALSE) +} +\description{ +select columns ending with some patterns + +select columns starting with some patterns + +select columns contains some characters +} +\section{Functions}{ +\itemize{ +\item \code{vars_end_with()}: columns based on patterns + +\item \code{vars_start_with()}: columns based on patterns + +\item \code{vars_contain()}: columns based on patterns + +}} From 66e21e61409bf2138f6766c68a924e3061f56827 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 13 Jun 2023 11:06:39 +0000 Subject: [PATCH 101/200] Style code --- R/aggregate_by_chi_zihao.R | 283 +++++++++++++++++++++---------------- 1 file changed, 158 insertions(+), 125 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 280f1aa0c..63aaa9cbb 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -12,8 +12,10 @@ aggregate_by_chi_zihao <- function(episode_file) { episode_file <- episode_file %>% dplyr::select(-c(postcode, gpprac)) %>% - dplyr::rename("gpprac" = "most_recent_gpprac", - "postcode" = "most_recent_postcode") %>% + dplyr::rename( + "gpprac" = "most_recent_gpprac", + "postcode" = "most_recent_postcode" + ) %>% dplyr::select(-c( dplyr::ends_with("_gpprac"), dplyr::ends_with("_postcode"), @@ -38,123 +40,145 @@ aggregate_by_chi_zihao <- function(episode_file) { data.table::setnames( episode_file, - c("ch_chi_cis", "cij_marker", "ooh_case_id" + c( + "ch_chi_cis", "cij_marker", "ooh_case_id" # ,"hh_in_fy" - ), - c("ch_cis_episodes", "cij_total", "ooh_cases" - # ,"hl1_in_fy" - ) - ) + ), + c( + "ch_cis_episodes", "cij_total", "ooh_cases" + # ,"hl1_in_fy" + ) + ) + + # Initialize an empty data.table for the aggregated results + aggregated_data <- data.table::data.table() - # Initialize an empty data.table for the aggregated results - aggregated_data <- data.table::data.table() - - # Process the data in chunks - chunk_size <- min(nrow(episode_file), 5e7) - # Adjust the chunk size as per your system's memory capacity - n_chunks <- nrow(episode_file) %/% chunk_size - - - # colums specification - # columns to select last - cols2 <- vars_end_with(episode_file, - c("postcode", "dob", "ggprac")) - # columns to select last unique rows - cols3 <- c( - "ch_cis_episodes", - "cij_total", - "CIJ_el", - "CIJ_non_el", - "CIJ_mat", - # "cij_delay", - "ooh_cases", - "preventable_admissions" + # Process the data in chunks + chunk_size <- min(nrow(episode_file), 5e7) + # Adjust the chunk size as per your system's memory capacity + n_chunks <- nrow(episode_file) %/% chunk_size + + + # colums specification + # columns to select last + cols2 <- vars_end_with( + episode_file, + c("postcode", "dob", "ggprac") + ) + # columns to select last unique rows + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "CIJ_el", + "CIJ_non_el", + "CIJ_mat", + # "cij_delay", + "ooh_cases", + "preventable_admissions" + ) + # columns to sum up + cols4 <- c( + vars_end_with( + episode_file, + c( + "episodes", + "beddays", + "cost", + "attendances", + "attend", + # "contacts", + "hours", + "alarms", + "telecare", + "paid_items", + "advice", + "homev", + "time", + "assessment", + "other", + # "dn", + "nhs24", + "pcc", + "_dnas" ) - # columns to sum up - cols4 <- c(vars_end_with( - episode_file, - c( - "episodes", - "beddays", - "cost", - "attendances", - "attend", - # "contacts", - "hours", - "alarms", - "telecare", - "paid_items", - "advice", - "homev", - "time", - "assessment", - "other", - # "dn", - "nhs24", - "pcc", - "_dnas" - ) - ), - vars_start_with(episode_file, - "sds_option")) - cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] - # # columns to select maximum - # cols5 <- vars_contain(episode_file, "nsu") - # columns to select first row - cols6 <- c(condition_cols(), - # "death_date", - # "deceased", - "year", - vars_end_with(episode_file, - c("_cohort", "end_fy", "start_fy"))) - - for (i in 1:n_chunks) { - start <- (i - 1) * chunk_size + 1 - end <- i * chunk_size - # Subset the data to the current chunk - chunk <- episode_file[start:end] - - # compute - chunk_cols1 <- chunk[, - .(gender = mean(gender)), - by = chi] - chunk_cols2 <- chunk[, - .SD[.N], - .SDcols = cols2, - by = chi] - chunk_cols3 <- chunk[, - lapply(.SD, function(x) - data.table::uniqueN(x, na.rm = TRUE)), - .SDcols = cols3, - by = chi] - chunk_cols4 <- chunk[, - lapply(.SD, function(x) - sum(x, na.rm = TRUE)), - .SDcols = cols4, - by = chi] - # chunk_cols5 <- chunk[, - # lapply(.SD, function(x) max(x, na.rm = TRUE)), - # .SDcols = cols5, - # by = chi] - chunk_cols6 <- chunk[, - lapply(.SD, function(x) - x[!is.na(x)][1]), - .SDcols = cols6, - by = chi] - chunk_agg <- dplyr::bind_cols(chunk_cols1, - chunk_cols2[, chi := NULL], - chunk_cols3[, chi := NULL], - chunk_cols4[, chi := NULL], - # chunk_cols5[, chi := NULL], - chunk_cols6[, chi := NULL]) - - # Append the aggregated chunk to the overall result - aggregated_data <- - data.table::rbindlist(list(aggregated_data, chunk_agg)) - } - aggregated_data <- dplyr::as_tibble(aggregated_data) - - return(aggregated_data) + ), + vars_start_with( + episode_file, + "sds_option" + ) + ) + cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] + # # columns to select maximum + # cols5 <- vars_contain(episode_file, "nsu") + # columns to select first row + cols6 <- c( + condition_cols(), + # "death_date", + # "deceased", + "year", + vars_end_with( + episode_file, + c("_cohort", "end_fy", "start_fy") + ) + ) + + for (i in 1:n_chunks) { + start <- (i - 1) * chunk_size + 1 + end <- i * chunk_size + # Subset the data to the current chunk + chunk <- episode_file[start:end] + + # compute + chunk_cols1 <- chunk[, + .(gender = mean(gender)), + by = chi + ] + chunk_cols2 <- chunk[, + .SD[.N], + .SDcols = cols2, + by = chi + ] + chunk_cols3 <- chunk[, + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = chi + ] + chunk_cols4 <- chunk[, + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = chi + ] + # chunk_cols5 <- chunk[, + # lapply(.SD, function(x) max(x, na.rm = TRUE)), + # .SDcols = cols5, + # by = chi] + chunk_cols6 <- chunk[, + lapply(.SD, function(x) { + x[!is.na(x)][1] + }), + .SDcols = cols6, + by = chi + ] + chunk_agg <- dplyr::bind_cols( + chunk_cols1, + chunk_cols2[, chi := NULL], + chunk_cols3[, chi := NULL], + chunk_cols4[, chi := NULL], + # chunk_cols5[, chi := NULL], + chunk_cols6[, chi := NULL] + ) + + # Append the aggregated chunk to the overall result + aggregated_data <- + data.table::rbindlist(list(aggregated_data, chunk_agg)) + } + aggregated_data <- dplyr::as_tibble(aggregated_data) + + return(aggregated_data) } @@ -162,27 +186,36 @@ aggregate_by_chi_zihao <- function(episode_file) { #' @describeIn select columns based on patterns #' vars_end_with <- function(data, vars, ignore_case = FALSE) { - names(data)[stringr::str_ends(names(data), - stringr::regex(paste(vars, collapse = "|"), - ignore_case = ignore_case))] + names(data)[stringr::str_ends( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] } #' select columns starting with some patterns #' @describeIn select columns based on patterns #' vars_start_with <- function(data, vars, ignore_case = FALSE) { - names(data)[stringr::str_starts(names(data), - stringr::regex(paste(vars, collapse = "|"), - ignore_case = ignore_case))] + names(data)[stringr::str_starts( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] } #' select columns contains some characters #' @describeIn select columns based on patterns #' vars_contain <- function(data, vars, ignore_case = FALSE) { - names(data)[stringr::str_detect(names(data), - stringr::regex(paste(vars, collapse = "|"), - ignore_case = ignore_case))] + names(data)[stringr::str_detect( + names(data), + stringr::regex(paste(vars, collapse = "|"), + ignore_case = ignore_case + ) + )] } #' Aggregate CIS episodes From f0fce5b7e065573e86e07d24720e2b7ba8be03b4 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 13 Jun 2023 15:03:39 +0100 Subject: [PATCH 102/200] minor changes --- R/aggregate_by_chi_zihao.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 63aaa9cbb..c7073d6ad 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -69,12 +69,13 @@ aggregate_by_chi_zihao <- function(episode_file) { cols3 <- c( "ch_cis_episodes", "cij_total", - "CIJ_el", - "CIJ_non_el", - "CIJ_mat", + "cij_el", + "cij_non_el", + "cij_mat", # "cij_delay", "ooh_cases", - "preventable_admissions" + "preventable_admissions", + "gpprac" ) # columns to sum up cols4 <- c( From f1b96d114544857276dcb6b0ac1eb42b14667198 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 14 Jun 2023 15:19:16 +0100 Subject: [PATCH 103/200] add a missing variable, cij_delay --- R/link_delayed_discharge_eps.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 677480697..c983afbeb 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -273,6 +273,15 @@ link_delayed_discharge_eps <- function(data, year) { .data$record_keydate2_dd, .keep_all = TRUE ) %>% + # add cij_delay + dplyr::mutate(has_delay = dplyr::if_else( + .data$chi != "" & !is.na(.data$cij_marker), + .data$smrtype == "DD-CIJ", + NA + )) %>% + dplyr::group_by(chi, cij_marker) %>% + dplyr::mutate(cij_delay = max(has_delay)) %>% + dplyr::ungroup() # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", @@ -300,6 +309,7 @@ link_delayed_discharge_eps <- function(data, year) { "cij_admtype", "cij_adm_spec", "cij_dis_spec", + "cij_delay", "location", "spec" = "spec_dd", "dd_type" From f565922e172fc023be3b17b8b6d9734bcdb18dcc Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 14 Jun 2023 14:22:51 +0000 Subject: [PATCH 104/200] Style code --- R/link_delayed_discharge_eps.R | 64 +++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index c983afbeb..e4f418516 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -282,38 +282,38 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::group_by(chi, cij_marker) %>% dplyr::mutate(cij_delay = max(has_delay)) %>% dplyr::ungroup() - # tidy up and rename columns to match the format of episode files - dplyr::select( - "year" = "year_dd", - "recid" = "recid_dd", - "record_keydate1" = "record_keydate1_dd", - "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", - "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca", - "hbtreatcode" = "hbtreatcode_dd", - "original_admission_date", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", - "spec" = "spec_dd", - "dd_type" - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca", + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "cij_delay", + "location", + "spec" = "spec_dd", + "dd_type" + ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date data %>% From 5237f9edb9d5706667a6ab51176d651aa3063aa3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 14 Jun 2023 15:32:11 +0100 Subject: [PATCH 105/200] add variables cij_delay, preventable_beddays --- R/create_individual_file.R | 48 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 04a8be917..1d0f8ac60 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -40,36 +40,30 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$cij_marker, - NA_real_ - ), + .data$cij_marker, + NA_real_), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$cij_marker, - NA_real_ - ), + .data$cij_marker, + NA_real_), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$cij_marker, - NA_real_ - ) - ) %>% - # dplyr::mutate(cij_delay = dplyr::if_else( - # (.data$cij_delay == 1 & .data$cij_marker == 1), - # 1, - # 0 - # )) %>% - dplyr::mutate( - preventable_admissions = dplyr::if_else( - (.data$cij_ppa == 1 & .data$cij_marker == 1), + .data$cij_marker, + NA_real_), + # assume cij_delay is logic variable + cij_delay = dplyr::if_else( + (.data$cij_delay & .data$cij_marker == 1), 1, - 0 - ) # , - # Come back to here - # preventable_beddays = dplyr::if_else( - # (.data$cij_ppa == 1 & .data$Distinct_cij == 1), - # as.numeric(.data$cij_end_date - .data$cij_start_date), - # 0 - # ) - ) + 0), + preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & + .data$cij_marker == 1), + 1, + 0), + preventable_beddays = dplyr::if_else((.data$cij_ppa == 1 & + .data$cij_marker == 1), + as.numeric( + min(.data$cij_end_date, end_fy(year)) - + min(.data$cij_start_date, start_fy(year)) + ), + 0)) } #' Add all columns From bdfc0b441cffc5c0a2c9c65f1f8d488e5669ba67 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 14 Jun 2023 15:33:35 +0100 Subject: [PATCH 106/200] add missing variables health_net_cost, health_net_costincdnas, and cmh, dd sds columns --- R/create_individual_file.R | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 1d0f8ac60..a1743dfd6 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -85,15 +85,26 @@ add_all_columns <- function(episode_file) { add_pis_columns("PIS", .data$recid == "PIS") %>% add_ooh_columns("OoH", .data$recid == "OoH") %>% # add_dn_columns("DN", .data$recid == "DN") %>% - # add_cmh_columns("CMH", .data$recid == "CMH") %>% - # add_dd_columns("DD", .data$recid == "DD") %>% + add_cmh_columns("CMH", .data$recid == "CMH") %>% + add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% add_nrs_columns("NRS", .data$recid == "NRS") %>% add_hl1_columns("HL1", .data$recid == "HL1") %>% add_ch_columns("CH", .data$recid == "CH") %>% add_hc_columns("HC", .data$recid == "HC") %>% add_at_columns("AT", .data$recid == "AT") %>% - add_sds_columns("SDS", .data$recid == "SDS") + add_sds_columns("SDS", .data$recid == "SDS") %>% + dplyr::mutate( + health_net_cost = Acute_cost + + Mat_cost + + MH_cost + + GLS_cost + + OP_cost_attend + + AE_cost + + PIS_cost + + OoH_cost, + health_net_costincdnas = health_net_cost + OP_cost_dnas + ) } #' Add Acute columns From 7b288bdd42e5d62a961924900488adb2f0c7b05d Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 14 Jun 2023 14:43:52 +0000 Subject: [PATCH 107/200] Style code --- R/create_individual_file.R | 39 ++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index a1743dfd6..fa6c43e0f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -40,30 +40,37 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), # assume cij_delay is logic variable cij_delay = dplyr::if_else( (.data$cij_delay & .data$cij_marker == 1), 1, - 0), + 0 + ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - 1, - 0), + .data$cij_marker == 1), + 1, + 0 + ), preventable_beddays = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - as.numeric( - min(.data$cij_end_date, end_fy(year)) - - min(.data$cij_start_date, start_fy(year)) - ), - 0)) + .data$cij_marker == 1), + as.numeric( + min(.data$cij_end_date, end_fy(year)) - + min(.data$cij_start_date, start_fy(year)) + ), + 0 + ) + ) } #' Add all columns From e907dd93f815fe02d19b9dca626d2a4a282d0b08 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 14 Jun 2023 16:32:51 +0100 Subject: [PATCH 108/200] add more variables needed --- R/aggregate_by_chi_zihao.R | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index c7073d6ad..8ea384a22 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -75,7 +75,31 @@ aggregate_by_chi_zihao <- function(episode_file) { # "cij_delay", "ooh_cases", "preventable_admissions", - "gpprac" + "gpprac", + + "hbrescode", + "hscp", + "lca", + "ca2018", + "locality", + "datazone2011", + "hbpraccode", + "cluster", + "simd2020v2_rank", + "simd2020v2_sc_decile", + "simd2020v2_sc_quintile", + "simd2020v2_hb2019_decile", + "simd2020v2_hb2019_quintile", + "simd2020v2_hscp2019_decile", + "simd2020v2_hscp2019_quintile", + "ur8_2020", + "ur6_2020", + "ur3_2020", + "ur2_2020", + "hb2019", + "hscp2019", + "ca2019", + vars_start_with(episode_file, "sc_") ) # columns to sum up cols4 <- c( From 464790466fb5e893e5e809c3643e794bc823c2e1 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 14 Jun 2023 15:35:09 +0000 Subject: [PATCH 109/200] Style code --- R/aggregate_by_chi_zihao.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 8ea384a22..270f8c87c 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -76,7 +76,6 @@ aggregate_by_chi_zihao <- function(episode_file) { "ooh_cases", "preventable_admissions", "gpprac", - "hbrescode", "hscp", "lca", From 45688c309696885cf1bc5235cf30cf599403f461 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 19 Jun 2023 16:48:43 +0100 Subject: [PATCH 110/200] Update R/link_delayed_discharge_eps.R --- R/link_delayed_discharge_eps.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index e35ca0367..cfc06524e 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -279,7 +279,7 @@ link_delayed_discharge_eps <- function(data, year) { )) %>% dplyr::group_by(chi, cij_marker) %>% dplyr::mutate(cij_delay = max(has_delay)) %>% - dplyr::ungroup() + dplyr::ungroup() %>% # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", From b2676d41be406d18831805958666e6b5f419558f Mon Sep 17 00:00:00 2001 From: Moohan Date: Mon, 19 Jun 2023 15:52:11 +0000 Subject: [PATCH 111/200] Style code --- R/link_delayed_discharge_eps.R | 64 +++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index cfc06524e..ee99503dd 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -280,38 +280,38 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::group_by(chi, cij_marker) %>% dplyr::mutate(cij_delay = max(has_delay)) %>% dplyr::ungroup() %>% - # tidy up and rename columns to match the format of episode files - dplyr::select( - "year" = "year_dd", - "recid" = "recid_dd", - "record_keydate1" = "record_keydate1_dd", - "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", - "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca", - "hbtreatcode" = "hbtreatcode_dd", - "original_admission_date", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", - "spec" = "spec_dd", - "dd_type" - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca", + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "cij_delay", + "location", + "spec" = "spec_dd", + "dd_type" + ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date data %>% From 8048e68c829edbf6c0c43e1bf3ade1d142e0e250 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 20 Jun 2023 11:40:35 +0100 Subject: [PATCH 112/200] amend costs --- R/create_individual_file.R | 40 ++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index fa6c43e0f..46d825bad 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -51,12 +51,12 @@ add_cij_columns <- function(episode_file) { .data$cij_marker, NA_real_ ), - # assume cij_delay is logic variable - cij_delay = dplyr::if_else( - (.data$cij_delay & .data$cij_marker == 1), - 1, - 0 - ), + # # assume cij_delay is logic variable + # cij_delay = dplyr::if_else( + # (.data$cij_delay & .data$cij_marker == 1), + # 1, + # 0 + # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), 1, @@ -102,15 +102,25 @@ add_all_columns <- function(episode_file) { add_at_columns("AT", .data$recid == "AT") %>% add_sds_columns("SDS", .data$recid == "SDS") %>% dplyr::mutate( - health_net_cost = Acute_cost + - Mat_cost + - MH_cost + - GLS_cost + - OP_cost_attend + - AE_cost + - PIS_cost + - OoH_cost, - health_net_costincdnas = health_net_cost + OP_cost_dnas + health_net_cost = rowSums(dplyr::select( + ., + c( + Acute_cost, + Mat_cost, + MH_cost, + GLS_cost, + OP_cost_attend, + AE_cost, + PIS_cost, + OoH_cost + )), + na.rm = TRUE), + health_net_costincdnas = rowSums(dplyr::select(., + c( + health_net_cost, + OP_cost_dnas + )), + na.rm = TRUE) ) } From 78197c61ae2ee001e33a26788480954278e9a32c Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 20 Jun 2023 10:43:07 +0000 Subject: [PATCH 113/200] Style code --- R/create_individual_file.R | 45 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 46d825bad..8f7ca9a50 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -102,25 +102,32 @@ add_all_columns <- function(episode_file) { add_at_columns("AT", .data$recid == "AT") %>% add_sds_columns("SDS", .data$recid == "SDS") %>% dplyr::mutate( - health_net_cost = rowSums(dplyr::select( - ., - c( - Acute_cost, - Mat_cost, - MH_cost, - GLS_cost, - OP_cost_attend, - AE_cost, - PIS_cost, - OoH_cost - )), - na.rm = TRUE), - health_net_costincdnas = rowSums(dplyr::select(., - c( - health_net_cost, - OP_cost_dnas - )), - na.rm = TRUE) + health_net_cost = rowSums( + dplyr::select( + ., + c( + Acute_cost, + Mat_cost, + MH_cost, + GLS_cost, + OP_cost_attend, + AE_cost, + PIS_cost, + OoH_cost + ) + ), + na.rm = TRUE + ), + health_net_costincdnas = rowSums( + dplyr::select( + ., + c( + health_net_cost, + OP_cost_dnas + ) + ), + na.rm = TRUE + ) ) } From 4fd8ac4bb880b893095f74c1b195896d15e77fed Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 20 Jun 2023 14:06:52 +0100 Subject: [PATCH 114/200] Revert "amend costs" This reverts commit 8048e68c829edbf6c0c43e1bf3ade1d142e0e250. --- R/create_individual_file.R | 47 ++++++++++++-------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 8f7ca9a50..fa6c43e0f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -51,12 +51,12 @@ add_cij_columns <- function(episode_file) { .data$cij_marker, NA_real_ ), - # # assume cij_delay is logic variable - # cij_delay = dplyr::if_else( - # (.data$cij_delay & .data$cij_marker == 1), - # 1, - # 0 - # ), + # assume cij_delay is logic variable + cij_delay = dplyr::if_else( + (.data$cij_delay & .data$cij_marker == 1), + 1, + 0 + ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), 1, @@ -102,32 +102,15 @@ add_all_columns <- function(episode_file) { add_at_columns("AT", .data$recid == "AT") %>% add_sds_columns("SDS", .data$recid == "SDS") %>% dplyr::mutate( - health_net_cost = rowSums( - dplyr::select( - ., - c( - Acute_cost, - Mat_cost, - MH_cost, - GLS_cost, - OP_cost_attend, - AE_cost, - PIS_cost, - OoH_cost - ) - ), - na.rm = TRUE - ), - health_net_costincdnas = rowSums( - dplyr::select( - ., - c( - health_net_cost, - OP_cost_dnas - ) - ), - na.rm = TRUE - ) + health_net_cost = Acute_cost + + Mat_cost + + MH_cost + + GLS_cost + + OP_cost_attend + + AE_cost + + PIS_cost + + OoH_cost, + health_net_costincdnas = health_net_cost + OP_cost_dnas ) } From b6a1e6f8a5d7560a43828f96df2427fd1c58fb34 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 20 Jun 2023 14:10:37 +0100 Subject: [PATCH 115/200] Add DN and cij_delay back in --- R/aggregate_by_chi_zihao.R | 4 ++-- R/create_individual_file.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 270f8c87c..b665a8db2 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -72,7 +72,7 @@ aggregate_by_chi_zihao <- function(episode_file) { "cij_el", "cij_non_el", "cij_mat", - # "cij_delay", + "cij_delay", "ooh_cases", "preventable_admissions", "gpprac", @@ -120,7 +120,7 @@ aggregate_by_chi_zihao <- function(episode_file) { "time", "assessment", "other", - # "dn", + "dn", "nhs24", "pcc", "_dnas" diff --git a/R/create_individual_file.R b/R/create_individual_file.R index fa6c43e0f..964f36a1b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -91,7 +91,7 @@ add_all_columns <- function(episode_file) { add_ae_columns("AE", .data$recid == "AE2") %>% add_pis_columns("PIS", .data$recid == "PIS") %>% add_ooh_columns("OoH", .data$recid == "OoH") %>% - # add_dn_columns("DN", .data$recid == "DN") %>% + add_dn_columns("DN", .data$recid == "DN") %>% add_cmh_columns("CMH", .data$recid == "CMH") %>% add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% From f32c7a2f6cdd0dfd9b1ba861986366ca73763dd9 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 20 Jun 2023 14:22:47 +0100 Subject: [PATCH 116/200] fix the issue --- R/create_individual_file.R | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 8f7ca9a50..43de83f39 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -118,16 +118,9 @@ add_all_columns <- function(episode_file) { ), na.rm = TRUE ), - health_net_costincdnas = rowSums( - dplyr::select( - ., - c( - health_net_cost, - OP_cost_dnas - ) - ), - na.rm = TRUE - ) + health_net_costincdnas = + health_net_cost + + dplyr::if_else(is.na(OP_cost_dnas), 0, OP_cost_dnas) ) } From 04fe893ac0a55487fe915097d0b9d0594241643b Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 20 Jun 2023 13:57:02 +0000 Subject: [PATCH 117/200] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index dc3cc28ac..b7a7d27a6 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -120,7 +120,7 @@ add_all_columns <- function(episode_file) { ), health_net_costincdnas = health_net_cost + - dplyr::if_else(is.na(OP_cost_dnas), 0, OP_cost_dnas) + dplyr::if_else(is.na(OP_cost_dnas), 0, OP_cost_dnas) ) } From b468271f4d6ff31de1b6e4e8c31d305b349ffa13 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 09:50:51 +0100 Subject: [PATCH 118/200] remove running in chunks --- R/aggregate_by_chi_zihao.R | 142 ++++++++++++++++--------------------- R/create_individual_file.R | 12 ++-- 2 files changed, 69 insertions(+), 85 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 270f8c87c..884ca56a1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -7,10 +7,10 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi_zihao <- function(episode_file) { +aggregate_by_chi_zihao <- function(individual_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - episode_file <- episode_file %>% + individual_file <- individual_file %>% dplyr::select(-c(postcode, gpprac)) %>% dplyr::rename( "gpprac" = "most_recent_gpprac", @@ -22,13 +22,13 @@ aggregate_by_chi_zihao <- function(episode_file) { dplyr::ends_with("_DoB") )) - names(episode_file) <- tolower(names(episode_file)) + names(individual_file) <- tolower(names(individual_file)) - data.table::setDT(episode_file) # Convert to data.table + data.table::setDT(individual_file) # Convert to data.table # Sort the data within each chunk data.table::setkeyv( - episode_file, + individual_file, c( "chi", "record_keydate1", @@ -39,7 +39,7 @@ aggregate_by_chi_zihao <- function(episode_file) { ) data.table::setnames( - episode_file, + individual_file, c( "ch_chi_cis", "cij_marker", "ooh_case_id" # ,"hh_in_fy" @@ -50,19 +50,10 @@ aggregate_by_chi_zihao <- function(episode_file) { ) ) - # Initialize an empty data.table for the aggregated results - aggregated_data <- data.table::data.table() - - # Process the data in chunks - chunk_size <- min(nrow(episode_file), 5e7) - # Adjust the chunk size as per your system's memory capacity - n_chunks <- nrow(episode_file) %/% chunk_size - - # colums specification # columns to select last cols2 <- vars_end_with( - episode_file, + individual_file, c("postcode", "dob", "ggprac") ) # columns to select last unique rows @@ -98,12 +89,12 @@ aggregate_by_chi_zihao <- function(episode_file) { "hb2019", "hscp2019", "ca2019", - vars_start_with(episode_file, "sc_") + vars_start_with(individual_file, "sc_") ) # columns to sum up cols4 <- c( vars_end_with( - episode_file, + individual_file, c( "episodes", "beddays", @@ -120,20 +111,22 @@ aggregate_by_chi_zihao <- function(episode_file) { "time", "assessment", "other", - # "dn", + "dn", "nhs24", "pcc", "_dnas" ) ), vars_start_with( - episode_file, + individual_file, "sds_option" - ) + ), + "health_net_costincdnas" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] - # # columns to select maximum - # cols5 <- vars_contain(episode_file, "nsu") + # columns to select maximum + cols5 <- vars_contain(individual_file, c("nsu", "hl1_in_fy")) + cols5 <- cols5[!(cols5 %in% c("ooh_consultation_time"))] # columns to select first row cols6 <- c( condition_cols(), @@ -141,68 +134,59 @@ aggregate_by_chi_zihao <- function(episode_file) { # "deceased", "year", vars_end_with( - episode_file, + individual_file, c("_cohort", "end_fy", "start_fy") ) ) - for (i in 1:n_chunks) { - start <- (i - 1) * chunk_size + 1 - end <- i * chunk_size - # Subset the data to the current chunk - chunk <- episode_file[start:end] - - # compute - chunk_cols1 <- chunk[, - .(gender = mean(gender)), - by = chi - ] - chunk_cols2 <- chunk[, - .SD[.N], - .SDcols = cols2, - by = chi - ] - chunk_cols3 <- chunk[, - lapply(.SD, function(x) { - data.table::uniqueN(x, na.rm = TRUE) - }), - .SDcols = cols3, - by = chi - ] - chunk_cols4 <- chunk[, - lapply(.SD, function(x) { - sum(x, na.rm = TRUE) - }), - .SDcols = cols4, - by = chi - ] - # chunk_cols5 <- chunk[, - # lapply(.SD, function(x) max(x, na.rm = TRUE)), - # .SDcols = cols5, - # by = chi] - chunk_cols6 <- chunk[, - lapply(.SD, function(x) { - x[!is.na(x)][1] - }), - .SDcols = cols6, - by = chi - ] - chunk_agg <- dplyr::bind_cols( - chunk_cols1, - chunk_cols2[, chi := NULL], - chunk_cols3[, chi := NULL], - chunk_cols4[, chi := NULL], - # chunk_cols5[, chi := NULL], - chunk_cols6[, chi := NULL] - ) + # compute + individual_file_cols1 <- individual_file[, + .(gender = mean(gender)), + by = chi + ] + individual_file_cols2 <- individual_file[, + .SD[.N], + .SDcols = cols2, + by = chi + ] + individual_file_cols3 <- individual_file[, + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = chi + ] + individual_file_cols4 <- individual_file[, + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = chi + ] + individual_file_cols5 <- individual_file[, + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = chi] + individual_file_cols6 <- individual_file[, + lapply(.SD, function(x) { + x[!is.na(x)][1] + }), + .SDcols = cols6, + by = chi + ] + individual_file <- dplyr::bind_cols( + individual_file_cols1, + individual_file_cols2[, chi := NULL], + individual_file_cols3[, chi := NULL], + individual_file_cols4[, chi := NULL], + individual_file_cols5[, chi := NULL], + individual_file_cols6[, chi := NULL] + ) - # Append the aggregated chunk to the overall result - aggregated_data <- - data.table::rbindlist(list(aggregated_data, chunk_agg)) - } - aggregated_data <- dplyr::as_tibble(aggregated_data) + # convert back to tibble + individual_file <- dplyr::as_tibble(individual_file) - return(aggregated_data) + return(individual_file) } diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b7a7d27a6..76473807f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -51,12 +51,12 @@ add_cij_columns <- function(episode_file) { .data$cij_marker, NA_real_ ), - # assume cij_delay is logic variable - cij_delay = dplyr::if_else( - (.data$cij_delay & .data$cij_marker == 1), - 1, - 0 - ), + # # assume cij_delay is logic variable + # cij_delay = dplyr::if_else( + # (.data$cij_delay & .data$cij_marker == 1), + # 1, + # 0 + # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), 1, From 55a075cfae4601759cf1ae807fe0b2b64c0f4785 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 08:53:07 +0000 Subject: [PATCH 119/200] Style code --- R/aggregate_by_chi_zihao.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 884ca56a1..4fa1fdfd2 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -164,9 +164,10 @@ aggregate_by_chi_zihao <- function(individual_file) { by = chi ] individual_file_cols5 <- individual_file[, - lapply(.SD, function(x) max(x, na.rm = TRUE)), - .SDcols = cols5, - by = chi] + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = chi + ] individual_file_cols6 <- individual_file[, lapply(.SD, function(x) { x[!is.na(x)][1] From b9fbf295d87d2b1182a1a1172c082ea91046139c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 21 Jun 2023 10:32:51 +0100 Subject: [PATCH 120/200] Update tests to include missing variables --- R/process_tests_individual_file.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 2eb3503e2..26d3439d1 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -12,10 +12,10 @@ process_tests_individual_file <- function(data, year) { "year", "chi", "gender", - # "postcode", # Add back in once postcode is fixed + "postcode", "dob", - # "hbrescode", #add back in when available - # "health_net_cost", + "hbrescode", + "health_net_cost", slfhelper::ltc_vars, dplyr::contains(c( "beddays", @@ -61,8 +61,8 @@ produce_individual_file_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags create_demog_test_flags() %>% - # create_hb_test_flags(.data$hbrescode) %>% - # create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% + create_hb_test_flags(.data$hbrescode) %>% + create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% # keep variables for comparison dplyr::select(c("valid_chi":dplyr::last_col())) %>% # use function to sum new test flags @@ -82,13 +82,13 @@ produce_individual_file_tests <- function(data) { measure = "all" ) - # min_max_measures <- data %>% - # calculate_measures( - # vars = c( - # "health_net_cost", - # ), - # measure = "min-max" - # ) + min_max_measures <- data %>% + calculate_measures( + vars = c( + "health_net_cost", + ), + measure = "min-max" + ) sum_measures <- data %>% dplyr::select(slfhelper::ltc_vars) %>% @@ -102,7 +102,7 @@ produce_individual_file_tests <- function(data) { join_output <- list( test_flags, all_measures, - # min_max_measures, + min_max_measures, sum_measures ) %>% purrr::reduce(dplyr::full_join, by = c("measure", "value")) From 74da47c10701427954c995c706a0f77d66b65483 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 21 Jun 2023 10:43:56 +0100 Subject: [PATCH 121/200] Remove unnecessary comma --- R/process_tests_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 26d3439d1..32bbd8d3a 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -85,7 +85,7 @@ produce_individual_file_tests <- function(data) { min_max_measures <- data %>% calculate_measures( vars = c( - "health_net_cost", + "health_net_cost" ), measure = "min-max" ) From 79981a328163e9a2410aa04ee8f0ff2bd8369243 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 12:41:34 +0100 Subject: [PATCH 122/200] fix the bug of preventable_beddays --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 76473807f..5b5fef0ef 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -65,8 +65,8 @@ add_cij_columns <- function(episode_file) { preventable_beddays = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), as.numeric( - min(.data$cij_end_date, end_fy(year)) - - min(.data$cij_start_date, start_fy(year)) + pmin(.data$cij_end_date, end_fy(year)) - + pmin(.data$cij_start_date, start_fy(year)) ), 0 ) From a029a10f27d150f6f6e5db605e020fea45467420 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 11:44:20 +0000 Subject: [PATCH 123/200] Update documentation --- man/aggregate_by_chi_zihao.Rd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd index 3d4961e19..a754fde4d 100644 --- a/man/aggregate_by_chi_zihao.Rd +++ b/man/aggregate_by_chi_zihao.Rd @@ -4,10 +4,7 @@ \alias{aggregate_by_chi_zihao} \title{Aggregate by CHI} \usage{ -aggregate_by_chi_zihao(episode_file) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} +aggregate_by_chi_zihao(individual_file) } \description{ Aggregate episode file by CHI to convert into From 71702e0b7cb1663b0818ae4aa15bed04e009aae3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 13:50:13 +0100 Subject: [PATCH 124/200] fix total ae_attendances --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5b5fef0ef..99a1a6e77 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -195,7 +195,7 @@ add_ae_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1, NA_real_)) } #' Add PIS columns From 1667ff052d18e06bfebaeefbc4ace38898ef3c19 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 14:16:54 +0100 Subject: [PATCH 125/200] fix the bug of preventable_admissions --- R/aggregate_by_chi_zihao.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 4fa1fdfd2..818c17af0 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -65,7 +65,6 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_mat", # "cij_delay", "ooh_cases", - "preventable_admissions", "gpprac", "hbrescode", "hscp", @@ -121,7 +120,8 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file, "sds_option" ), - "health_net_costincdnas" + "health_net_costincdnas", + "preventable_admissions" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum From 0a517d3838897da8c9a5fc25bd718b7d7f898f03 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 15:37:32 +0100 Subject: [PATCH 126/200] fix the bug of hbrescode etc --- R/aggregate_by_chi_zihao.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 818c17af0..a6b95453f 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -54,16 +54,7 @@ aggregate_by_chi_zihao <- function(individual_file) { # columns to select last cols2 <- vars_end_with( individual_file, - c("postcode", "dob", "ggprac") - ) - # columns to select last unique rows - cols3 <- c( - "ch_cis_episodes", - "cij_total", - "cij_el", - "cij_non_el", - "cij_mat", - # "cij_delay", + c("postcode", "dob", "ggprac"), "ooh_cases", "gpprac", "hbrescode", @@ -90,6 +81,15 @@ aggregate_by_chi_zihao <- function(individual_file) { "ca2019", vars_start_with(individual_file, "sc_") ) + # columns to select last unique rows + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "cij_el", + "cij_non_el", + "cij_mat", + # "cij_delay" + ) # columns to sum up cols4 <- c( vars_end_with( From 3b24326066ccb47c73318116676da27d159712d3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 17:10:00 +0100 Subject: [PATCH 127/200] minor fix --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index a6b95453f..7162b55db 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -87,7 +87,7 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_total", "cij_el", "cij_non_el", - "cij_mat", + "cij_mat" # "cij_delay" ) # columns to sum up From 4e4330cd8c8360c0c1470d9aa76467ae427d2306 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 17:32:46 +0100 Subject: [PATCH 128/200] minor fix --- R/aggregate_by_chi_zihao.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 7162b55db..3171a59dd 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -52,9 +52,9 @@ aggregate_by_chi_zihao <- function(individual_file) { # colums specification # columns to select last - cols2 <- vars_end_with( - individual_file, - c("postcode", "dob", "ggprac"), + cols2 <- c( + vars_end_with(individual_file, + c("postcode", "dob", "ggprac")), "ooh_cases", "gpprac", "hbrescode", From 6bdd780a798e85c796f7ae53b129ec5881e29e63 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 16:35:00 +0000 Subject: [PATCH 129/200] Style code --- R/aggregate_by_chi_zihao.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 3171a59dd..9a7769d6a 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -53,8 +53,10 @@ aggregate_by_chi_zihao <- function(individual_file) { # colums specification # columns to select last cols2 <- c( - vars_end_with(individual_file, - c("postcode", "dob", "ggprac")), + vars_end_with( + individual_file, + c("postcode", "dob", "ggprac") + ), "ooh_cases", "gpprac", "hbrescode", From 06e1c7c893873e60bcce7e1a1e4606e23a7ad525 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 20 Jun 2023 11:16:28 +0100 Subject: [PATCH 130/200] Fix some warnings being produced by the tests --- R/create_monthly_costs.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/create_monthly_costs.R b/R/create_monthly_costs.R index 08f4ce59a..e9e5eedf3 100644 --- a/R/create_monthly_costs.R +++ b/R/create_monthly_costs.R @@ -65,14 +65,14 @@ create_monthly_costs <- function(data, add_months <- setdiff(full_cost_col, available_months) add_months_df <- dplyr::as_tibble( - matrix(0, nrow = nrow(data), ncol = length(add_months)) + matrix(0, nrow = nrow(data), ncol = length(add_months)), + .name_repair = ~add_months ) - names(add_months_df) <- add_months daycase_cost_months <- daycase_cost_months %>% dplyr::bind_cols(add_months_df) %>% dplyr::select(c( - full_cost_col, + dplyr::all_of(full_cost_col), "daycase_check" )) From e14ae02b7127eed5d751d2ab2753a3d60e13bed7 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 20 Jun 2023 11:33:17 +0100 Subject: [PATCH 131/200] Fix failing test --- tests/testthat/test-get_existing_data_for_tests.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-get_existing_data_for_tests.R b/tests/testthat/test-get_existing_data_for_tests.R index 57309f031..2b21d71db 100644 --- a/tests/testthat/test-get_existing_data_for_tests.R +++ b/tests/testthat/test-get_existing_data_for_tests.R @@ -11,7 +11,11 @@ test_that("Get existing data works", { slf_data <- suppressWarnings(get_existing_data_for_tests(dummy_new_data)) - expect_named(slf_data, c("chi", "year", "recid", "diag1", "diag2")) + expect_named( + slf_data, + c("chi", "year", "recid", "diag1", "diag2"), + ignore.order = TRUE + ) expect_gte(nrow(slf_data), 20000) expect_equal(unique(slf_data$recid), "04B") expect_equal(unique(slf_data$year), "1920") From dc79a75d4f1809e66fc3d0208fa447724a6be3e5 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 09:50:51 +0100 Subject: [PATCH 132/200] remove running in chunks --- R/aggregate_by_chi_zihao.R | 142 ++++++++++++++++--------------------- R/create_individual_file.R | 12 ++-- 2 files changed, 69 insertions(+), 85 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 270f8c87c..884ca56a1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -7,10 +7,10 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi_zihao <- function(episode_file) { +aggregate_by_chi_zihao <- function(individual_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - episode_file <- episode_file %>% + individual_file <- individual_file %>% dplyr::select(-c(postcode, gpprac)) %>% dplyr::rename( "gpprac" = "most_recent_gpprac", @@ -22,13 +22,13 @@ aggregate_by_chi_zihao <- function(episode_file) { dplyr::ends_with("_DoB") )) - names(episode_file) <- tolower(names(episode_file)) + names(individual_file) <- tolower(names(individual_file)) - data.table::setDT(episode_file) # Convert to data.table + data.table::setDT(individual_file) # Convert to data.table # Sort the data within each chunk data.table::setkeyv( - episode_file, + individual_file, c( "chi", "record_keydate1", @@ -39,7 +39,7 @@ aggregate_by_chi_zihao <- function(episode_file) { ) data.table::setnames( - episode_file, + individual_file, c( "ch_chi_cis", "cij_marker", "ooh_case_id" # ,"hh_in_fy" @@ -50,19 +50,10 @@ aggregate_by_chi_zihao <- function(episode_file) { ) ) - # Initialize an empty data.table for the aggregated results - aggregated_data <- data.table::data.table() - - # Process the data in chunks - chunk_size <- min(nrow(episode_file), 5e7) - # Adjust the chunk size as per your system's memory capacity - n_chunks <- nrow(episode_file) %/% chunk_size - - # colums specification # columns to select last cols2 <- vars_end_with( - episode_file, + individual_file, c("postcode", "dob", "ggprac") ) # columns to select last unique rows @@ -98,12 +89,12 @@ aggregate_by_chi_zihao <- function(episode_file) { "hb2019", "hscp2019", "ca2019", - vars_start_with(episode_file, "sc_") + vars_start_with(individual_file, "sc_") ) # columns to sum up cols4 <- c( vars_end_with( - episode_file, + individual_file, c( "episodes", "beddays", @@ -120,20 +111,22 @@ aggregate_by_chi_zihao <- function(episode_file) { "time", "assessment", "other", - # "dn", + "dn", "nhs24", "pcc", "_dnas" ) ), vars_start_with( - episode_file, + individual_file, "sds_option" - ) + ), + "health_net_costincdnas" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] - # # columns to select maximum - # cols5 <- vars_contain(episode_file, "nsu") + # columns to select maximum + cols5 <- vars_contain(individual_file, c("nsu", "hl1_in_fy")) + cols5 <- cols5[!(cols5 %in% c("ooh_consultation_time"))] # columns to select first row cols6 <- c( condition_cols(), @@ -141,68 +134,59 @@ aggregate_by_chi_zihao <- function(episode_file) { # "deceased", "year", vars_end_with( - episode_file, + individual_file, c("_cohort", "end_fy", "start_fy") ) ) - for (i in 1:n_chunks) { - start <- (i - 1) * chunk_size + 1 - end <- i * chunk_size - # Subset the data to the current chunk - chunk <- episode_file[start:end] - - # compute - chunk_cols1 <- chunk[, - .(gender = mean(gender)), - by = chi - ] - chunk_cols2 <- chunk[, - .SD[.N], - .SDcols = cols2, - by = chi - ] - chunk_cols3 <- chunk[, - lapply(.SD, function(x) { - data.table::uniqueN(x, na.rm = TRUE) - }), - .SDcols = cols3, - by = chi - ] - chunk_cols4 <- chunk[, - lapply(.SD, function(x) { - sum(x, na.rm = TRUE) - }), - .SDcols = cols4, - by = chi - ] - # chunk_cols5 <- chunk[, - # lapply(.SD, function(x) max(x, na.rm = TRUE)), - # .SDcols = cols5, - # by = chi] - chunk_cols6 <- chunk[, - lapply(.SD, function(x) { - x[!is.na(x)][1] - }), - .SDcols = cols6, - by = chi - ] - chunk_agg <- dplyr::bind_cols( - chunk_cols1, - chunk_cols2[, chi := NULL], - chunk_cols3[, chi := NULL], - chunk_cols4[, chi := NULL], - # chunk_cols5[, chi := NULL], - chunk_cols6[, chi := NULL] - ) + # compute + individual_file_cols1 <- individual_file[, + .(gender = mean(gender)), + by = chi + ] + individual_file_cols2 <- individual_file[, + .SD[.N], + .SDcols = cols2, + by = chi + ] + individual_file_cols3 <- individual_file[, + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = chi + ] + individual_file_cols4 <- individual_file[, + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = chi + ] + individual_file_cols5 <- individual_file[, + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = chi] + individual_file_cols6 <- individual_file[, + lapply(.SD, function(x) { + x[!is.na(x)][1] + }), + .SDcols = cols6, + by = chi + ] + individual_file <- dplyr::bind_cols( + individual_file_cols1, + individual_file_cols2[, chi := NULL], + individual_file_cols3[, chi := NULL], + individual_file_cols4[, chi := NULL], + individual_file_cols5[, chi := NULL], + individual_file_cols6[, chi := NULL] + ) - # Append the aggregated chunk to the overall result - aggregated_data <- - data.table::rbindlist(list(aggregated_data, chunk_agg)) - } - aggregated_data <- dplyr::as_tibble(aggregated_data) + # convert back to tibble + individual_file <- dplyr::as_tibble(individual_file) - return(aggregated_data) + return(individual_file) } diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b7a7d27a6..76473807f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -51,12 +51,12 @@ add_cij_columns <- function(episode_file) { .data$cij_marker, NA_real_ ), - # assume cij_delay is logic variable - cij_delay = dplyr::if_else( - (.data$cij_delay & .data$cij_marker == 1), - 1, - 0 - ), + # # assume cij_delay is logic variable + # cij_delay = dplyr::if_else( + # (.data$cij_delay & .data$cij_marker == 1), + # 1, + # 0 + # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), 1, From 28289fafb4c07ab090d291cc5586b5339b138066 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 08:53:07 +0000 Subject: [PATCH 133/200] Style code --- R/aggregate_by_chi_zihao.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 884ca56a1..4fa1fdfd2 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -164,9 +164,10 @@ aggregate_by_chi_zihao <- function(individual_file) { by = chi ] individual_file_cols5 <- individual_file[, - lapply(.SD, function(x) max(x, na.rm = TRUE)), - .SDcols = cols5, - by = chi] + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = chi + ] individual_file_cols6 <- individual_file[, lapply(.SD, function(x) { x[!is.na(x)][1] From 5ad5c783afc04f2ce9bdaa90ba29ae081f4145b7 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 21 Jun 2023 10:32:42 +0100 Subject: [PATCH 134/200] Update the targets config to use `timestamp_positives` as the default reporter --- _targets.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/_targets.yaml b/_targets.yaml index db0d98f0b..24c8a3733 100644 --- a/_targets.yaml +++ b/_targets.yaml @@ -1,3 +1,5 @@ main: store: /conf/sourcedev/Source_Linkage_File_Updates/_targets - workers: '18' + workers: '16' + reporter_make: timestamp_positives + seconds_interval: 30 From f6e04ceaf8cd272a6c0e69efceaff8b9f66269fb Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 12:41:34 +0100 Subject: [PATCH 135/200] fix the bug of preventable_beddays --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 76473807f..5b5fef0ef 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -65,8 +65,8 @@ add_cij_columns <- function(episode_file) { preventable_beddays = dplyr::if_else((.data$cij_ppa == 1 & .data$cij_marker == 1), as.numeric( - min(.data$cij_end_date, end_fy(year)) - - min(.data$cij_start_date, start_fy(year)) + pmin(.data$cij_end_date, end_fy(year)) - + pmin(.data$cij_start_date, start_fy(year)) ), 0 ) From 6bcf2b2fd1a52ba7190a1c2fcded14d178904e8f Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 11:44:20 +0000 Subject: [PATCH 136/200] Update documentation --- man/aggregate_by_chi_zihao.Rd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd index 3d4961e19..a754fde4d 100644 --- a/man/aggregate_by_chi_zihao.Rd +++ b/man/aggregate_by_chi_zihao.Rd @@ -4,10 +4,7 @@ \alias{aggregate_by_chi_zihao} \title{Aggregate by CHI} \usage{ -aggregate_by_chi_zihao(episode_file) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} +aggregate_by_chi_zihao(individual_file) } \description{ Aggregate episode file by CHI to convert into From b0065c9b8cc6107d6dda81ce4408a0522bf6f1b6 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 13:50:13 +0100 Subject: [PATCH 137/200] fix total ae_attendances --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5b5fef0ef..99a1a6e77 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -195,7 +195,7 @@ add_ae_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1, NA_real_)) } #' Add PIS columns From 42f107a95a756f98eca162a2f56bfef2a6dbec51 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 14:16:54 +0100 Subject: [PATCH 138/200] fix the bug of preventable_admissions --- R/aggregate_by_chi_zihao.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 4fa1fdfd2..818c17af0 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -65,7 +65,6 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_mat", # "cij_delay", "ooh_cases", - "preventable_admissions", "gpprac", "hbrescode", "hscp", @@ -121,7 +120,8 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file, "sds_option" ), - "health_net_costincdnas" + "health_net_costincdnas", + "preventable_admissions" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum From 9612b9af3c89188dd3decd7c671e257c25bb4a4c Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 15:37:32 +0100 Subject: [PATCH 139/200] fix the bug of hbrescode etc --- R/aggregate_by_chi_zihao.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 818c17af0..a6b95453f 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -54,16 +54,7 @@ aggregate_by_chi_zihao <- function(individual_file) { # columns to select last cols2 <- vars_end_with( individual_file, - c("postcode", "dob", "ggprac") - ) - # columns to select last unique rows - cols3 <- c( - "ch_cis_episodes", - "cij_total", - "cij_el", - "cij_non_el", - "cij_mat", - # "cij_delay", + c("postcode", "dob", "ggprac"), "ooh_cases", "gpprac", "hbrescode", @@ -90,6 +81,15 @@ aggregate_by_chi_zihao <- function(individual_file) { "ca2019", vars_start_with(individual_file, "sc_") ) + # columns to select last unique rows + cols3 <- c( + "ch_cis_episodes", + "cij_total", + "cij_el", + "cij_non_el", + "cij_mat", + # "cij_delay" + ) # columns to sum up cols4 <- c( vars_end_with( From 4750913e54b505775d7b0526367fc0d330f659ac Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 17:10:00 +0100 Subject: [PATCH 140/200] minor fix --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index a6b95453f..7162b55db 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -87,7 +87,7 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_total", "cij_el", "cij_non_el", - "cij_mat", + "cij_mat" # "cij_delay" ) # columns to sum up From 338479f1022b0c0857e4580fda48aa7e53851246 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 21 Jun 2023 17:32:46 +0100 Subject: [PATCH 141/200] minor fix --- R/aggregate_by_chi_zihao.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 7162b55db..3171a59dd 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -52,9 +52,9 @@ aggregate_by_chi_zihao <- function(individual_file) { # colums specification # columns to select last - cols2 <- vars_end_with( - individual_file, - c("postcode", "dob", "ggprac"), + cols2 <- c( + vars_end_with(individual_file, + c("postcode", "dob", "ggprac")), "ooh_cases", "gpprac", "hbrescode", From 724f31902f15a0ddb7ad47b9bd0a1e3e22ba18a4 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 21 Jun 2023 16:35:00 +0000 Subject: [PATCH 142/200] Style code --- R/aggregate_by_chi_zihao.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 3171a59dd..9a7769d6a 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -53,8 +53,10 @@ aggregate_by_chi_zihao <- function(individual_file) { # colums specification # columns to select last cols2 <- c( - vars_end_with(individual_file, - c("postcode", "dob", "ggprac")), + vars_end_with( + individual_file, + c("postcode", "dob", "ggprac") + ), "ooh_cases", "gpprac", "hbrescode", From e9c8ef020aec636386ad2c16c54fcfe9a104f0f2 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 13:08:01 +0100 Subject: [PATCH 143/200] fix home care cost --- R/process_extract_home_care.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index a556dd8e6..d541cc86a 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -66,7 +66,10 @@ process_extract_home_care <- function( # remove cost variables not from current year dplyr::select(-(tidyselect::contains("hc_cost_2"))) %>% # create cost total net - dplyr::mutate(cost_total_net = rowSums(dplyr::pick(tidyselect::contains("hc_cost_q")))) + dplyr::mutate(cost_total_net = rowSums( + dplyr::pick(tidyselect::contains("hc_cost_q"))), + na.rm = TRUE + ) # Outfile --------------------------------------- From 9a951a53d4c415dafc246c5fe18a0013a1fdd97f Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 13:09:05 +0100 Subject: [PATCH 144/200] add ipdc to fix maternity --- R/process_extract_maternity.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R index 188d55ab5..04fa46ced 100644 --- a/R/process_extract_maternity.R +++ b/R/process_extract_maternity.R @@ -55,7 +55,12 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) { discondition = factor(.data$discondition, levels = c(1L:5L, 8L) ), - smrtype = add_smr_type(.data$recid, .data$mpat) + smrtype = add_smr_type(.data$recid, .data$mpat), + ipdc = dplyr::case_match( + .data$smrtype, + "Matern-IP" ~ "I", + "Matern-DC" ~ "D" + ) ) @@ -102,7 +107,8 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) { "cost_total_net", tidyselect::ends_with("_beddays"), tidyselect::ends_with("_cost"), - "uri" + "uri", + "ipdc" ) %>% dplyr::arrange(.data$chi, .data$record_keydate1) From 4b63ee87af6e89f9b35922befcc57defb222a859 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 14:05:59 +0100 Subject: [PATCH 145/200] fix preventable addmission and care home cost --- R/create_individual_file.R | 52 ++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 99a1a6e77..b94e3e027 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -40,17 +40,14 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$cij_marker, - NA_real_ - ), + .data$cij_marker, + NA_real_), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$cij_marker, - NA_real_ - ), + .data$cij_marker, + NA_real_), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$cij_marker, - NA_real_ - ), + .data$cij_marker, + NA_real_), # # assume cij_delay is logic variable # cij_delay = dplyr::if_else( # (.data$cij_delay & .data$cij_marker == 1), @@ -58,18 +55,10 @@ add_cij_columns <- function(episode_file) { # 0 # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - 1, - 0 - ), - preventable_beddays = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - as.numeric( - pmin(.data$cij_end_date, end_fy(year)) - - pmin(.data$cij_start_date, start_fy(year)) - ), - 0 - ) + .data$cij_marker == 1), + 1, + 0) + # preventable_beddays is now added in aggragate_by_chi ) } @@ -313,12 +302,25 @@ add_ch_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, NA_real_), - ch_cost_per_day = dplyr::if_else(eval(condition) & .data$yearstay == 0, .data$cost_total_net / .data$yearstay, .data$ch_cost_per_day), + ch_cost_per_day = dplyr::if_else( + eval(condition) & + .data$yearstay > 0, + .data$cost_total_net / .data$yearstay, + .data$cost_total_net + ), ch_no_cost = eval(condition) & is.na(.data$ch_cost_per_day), - ch_ep_end = dplyr::if_else(eval(condition), .data$record_keydate2, lubridate::NA_Date_), + ch_ep_end = dplyr::if_else( + eval(condition), + .data$record_keydate2, + lubridate::NA_Date_ + ), # If end date is missing use the first day of next FY quarter - ch_ep_end = dplyr::if_else(eval(condition) & is.na(.data$ch_ep_end), start_next_fy_quarter(.data$sc_latest_submission), .data$ch_ep_end) + ch_ep_end = dplyr::if_else( + eval(condition) & + is.na(.data$ch_ep_end), + start_next_fy_quarter(.data$sc_latest_submission), + .data$ch_ep_end + ) ) } From c42d7ba9d6fb8f2f8d66b092d347c24d7bf49da3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 14:07:54 +0100 Subject: [PATCH 146/200] fix preventable_admissions and calculate preventable_beddays here --- R/aggregate_by_chi_zihao.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 3171a59dd..454ae920e 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -87,9 +87,12 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_total", "cij_el", "cij_non_el", - "cij_mat" - # "cij_delay" + "cij_mat", + # "cij_delay", + "preventable_admissions" ) + # columns to select last unique rows group by chi and cij_marker + cols3.1 <- c("preventable_beddays") # columns to sum up cols4 <- c( vars_end_with( @@ -120,8 +123,7 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file, "sds_option" ), - "health_net_costincdnas", - "preventable_admissions" + "health_net_costincdnas" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum @@ -156,6 +158,14 @@ aggregate_by_chi_zihao <- function(individual_file) { .SDcols = cols3, by = chi ] + individual_file_cols3.1 <- individual_file[, + preventable_beddays := + data.table::fifelse(cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA_real_), + .SDcols = cols3.1, + by = c("chi", "cij_marker") + ] individual_file_cols4 <- individual_file[, lapply(.SD, function(x) { sum(x, na.rm = TRUE) @@ -183,6 +193,9 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file_cols5[, chi := NULL], individual_file_cols6[, chi := NULL] ) + # cannot simply combine individual_file_cols3.1 as different group_by factors. + individual_file <- individual_file[individual_file_cols3.1, + on = "chi"] # convert back to tibble individual_file <- dplyr::as_tibble(individual_file) From f0671fc3d76bcbf94a043870efe328d7d3fe7533 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 14:43:42 +0100 Subject: [PATCH 147/200] add monthly_beddays and yearstay to dd --- R/link_delayed_discharge_eps.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index ee99503dd..67a6940c7 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -280,6 +280,12 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::group_by(chi, cij_marker) %>% dplyr::mutate(cij_delay = max(has_delay)) %>% dplyr::ungroup() %>% + # add yearstay and monthly beddays + create_monthly_beddays() %>% + dplyr::mutate(yearstay = dplyr::rowSums( + paste0(month.abb[c(4:12,1:3)] %>% tolower(), "_beddays") + )) + # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", @@ -310,7 +316,9 @@ link_delayed_discharge_eps <- function(data, year) { "cij_delay", "location", "spec" = "spec_dd", - "dd_type" + "dd_type", + paste0(month.abb[c(4:12,1:3)] %>% tolower(), "_beddays"), + "yearstay" ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date From 9cc84f3832e6f7c46a8b72fbc8cb2a84d653c842 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 27 Jun 2023 13:48:55 +0000 Subject: [PATCH 148/200] Style code --- R/aggregate_by_chi_zihao.R | 11 ++++-- R/create_individual_file.R | 22 ++++++----- R/link_delayed_discharge_eps.R | 70 +++++++++++++++++----------------- R/process_extract_home_care.R | 6 ++- 4 files changed, 59 insertions(+), 50 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index eea4ea898..a2e301a90 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -162,9 +162,11 @@ aggregate_by_chi_zihao <- function(individual_file) { ] individual_file_cols3.1 <- individual_file[, preventable_beddays := - data.table::fifelse(cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA_real_), + data.table::fifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA_real_ + ), .SDcols = cols3.1, by = c("chi", "cij_marker") ] @@ -197,7 +199,8 @@ aggregate_by_chi_zihao <- function(individual_file) { ) # cannot simply combine individual_file_cols3.1 as different group_by factors. individual_file <- individual_file[individual_file_cols3.1, - on = "chi"] + on = "chi" + ] # convert back to tibble individual_file <- dplyr::as_tibble(individual_file) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b94e3e027..5444a850e 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -40,14 +40,17 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, - .data$cij_marker, - NA_real_), + .data$cij_marker, + NA_real_ + ), # # assume cij_delay is logic variable # cij_delay = dplyr::if_else( # (.data$cij_delay & .data$cij_marker == 1), @@ -55,9 +58,10 @@ add_cij_columns <- function(episode_file) { # 0 # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - 1, - 0) + .data$cij_marker == 1), + 1, + 0 + ) # preventable_beddays is now added in aggragate_by_chi ) } diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 67a6940c7..479545ed6 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -283,43 +283,43 @@ link_delayed_discharge_eps <- function(data, year) { # add yearstay and monthly beddays create_monthly_beddays() %>% dplyr::mutate(yearstay = dplyr::rowSums( - paste0(month.abb[c(4:12,1:3)] %>% tolower(), "_beddays") + paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays") )) - # tidy up and rename columns to match the format of episode files - dplyr::select( - "year" = "year_dd", - "recid" = "recid_dd", - "record_keydate1" = "record_keydate1_dd", - "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", - "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca", - "hbtreatcode" = "hbtreatcode_dd", - "original_admission_date", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", - "spec" = "spec_dd", - "dd_type", - paste0(month.abb[c(4:12,1:3)] %>% tolower(), "_beddays"), - "yearstay" - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca", + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "cij_delay", + "location", + "spec" = "spec_dd", + "dd_type", + paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays"), + "yearstay" + ) %>% # combine DD with episode data dplyr::bind_rows( # restore cij_end_date data %>% diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index d541cc86a..b510f5c3b 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -66,8 +66,10 @@ process_extract_home_care <- function( # remove cost variables not from current year dplyr::select(-(tidyselect::contains("hc_cost_2"))) %>% # create cost total net - dplyr::mutate(cost_total_net = rowSums( - dplyr::pick(tidyselect::contains("hc_cost_q"))), + dplyr::mutate( + cost_total_net = rowSums( + dplyr::pick(tidyselect::contains("hc_cost_q")) + ), na.rm = TRUE ) From d6391e54cd4e99310e2f4b8e24bcdee9d8fd682b Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Jun 2023 22:03:54 +0100 Subject: [PATCH 149/200] fix preventable_admissions and preventable_beddays --- R/aggregate_by_chi_zihao.R | 52 +++++++++++++++++++++----------------- R/create_individual_file.R | 8 +++--- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index a2e301a90..af65695e1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -50,7 +50,7 @@ aggregate_by_chi_zihao <- function(individual_file) { ) ) - # colums specification + # colums specification, grouped by chi # columns to select last cols2 <- c( vars_end_with( @@ -83,18 +83,15 @@ aggregate_by_chi_zihao <- function(individual_file) { "ca2019", vars_start_with(individual_file, "sc_") ) - # columns to select last unique rows + # columns to count unique rows cols3 <- c( "ch_cis_episodes", "cij_total", "cij_el", "cij_non_el", - "cij_mat", - # "cij_delay", - "preventable_admissions" + "cij_mat" + # "cij_delay" ) - # columns to select last unique rows group by chi and cij_marker - cols3.1 <- c("preventable_beddays") # columns to sum up cols4 <- c( vars_end_with( @@ -142,6 +139,8 @@ aggregate_by_chi_zihao <- function(individual_file) { c("_cohort", "end_fy", "start_fy") ) ) + # columns to group by chi and cij_marker, mainly preventable + # cols7 <- c("preventable_admissions", "preventable_beddays") # compute individual_file_cols1 <- individual_file[, @@ -160,16 +159,6 @@ aggregate_by_chi_zihao <- function(individual_file) { .SDcols = cols3, by = chi ] - individual_file_cols3.1 <- individual_file[, - preventable_beddays := - data.table::fifelse( - cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA_real_ - ), - .SDcols = cols3.1, - by = c("chi", "cij_marker") - ] individual_file_cols4 <- individual_file[, lapply(.SD, function(x) { sum(x, na.rm = TRUE) @@ -189,19 +178,36 @@ aggregate_by_chi_zihao <- function(individual_file) { .SDcols = cols6, by = chi ] + individual_file_cols7 <- individual_file[, + `:=`( + preventable_beddays = + data.table::fifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA_integer_ + ) + ), + by = c("chi", "cij_marker") + ] + individual_file_cols7 <- individual_file_cols7[, + `:=`( + preventable_admissions = + (unique(preventable_admissions) %>% uniqueN(na.rm = TRUE)), + preventable_beddays = + sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" + ] + individual_file <- dplyr::bind_cols( individual_file_cols1, individual_file_cols2[, chi := NULL], individual_file_cols3[, chi := NULL], individual_file_cols4[, chi := NULL], individual_file_cols5[, chi := NULL], - individual_file_cols6[, chi := NULL] + individual_file_cols6[, chi := NULL], + individual_file_cols7[, chi := NULL], ) - # cannot simply combine individual_file_cols3.1 as different group_by factors. - individual_file <- individual_file[individual_file_cols3.1, - on = "chi" - ] - # convert back to tibble individual_file <- dplyr::as_tibble(individual_file) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5444a850e..d677609df 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -57,11 +57,9 @@ add_cij_columns <- function(episode_file) { # 1, # 0 # ), - preventable_admissions = dplyr::if_else((.data$cij_ppa == 1 & - .data$cij_marker == 1), - 1, - 0 - ) + preventable_admissions = dplyr::if_else((.data$cij_ppa == 1), + cij_marker, + NA_integer_) # preventable_beddays is now added in aggragate_by_chi ) } From 1a136fdae625a5d8c782cec84ddc93af3aaa3545 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 27 Jun 2023 21:06:06 +0000 Subject: [PATCH 150/200] Style code --- R/aggregate_by_chi_zihao.R | 30 +++++++++++++++--------------- R/create_individual_file.R | 5 +++-- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index af65695e1..5c3665e2a 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -179,24 +179,24 @@ aggregate_by_chi_zihao <- function(individual_file) { by = chi ] individual_file_cols7 <- individual_file[, - `:=`( - preventable_beddays = - data.table::fifelse( - cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA_integer_ - ) - ), + `:=`( + preventable_beddays = + data.table::fifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA_integer_ + ) + ), by = c("chi", "cij_marker") ] individual_file_cols7 <- individual_file_cols7[, - `:=`( - preventable_admissions = - (unique(preventable_admissions) %>% uniqueN(na.rm = TRUE)), - preventable_beddays = - sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" + `:=`( + preventable_admissions = + (unique(preventable_admissions) %>% uniqueN(na.rm = TRUE)), + preventable_beddays = + sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" ] individual_file <- dplyr::bind_cols( diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d677609df..91111b336 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -58,8 +58,9 @@ add_cij_columns <- function(episode_file) { # 0 # ), preventable_admissions = dplyr::if_else((.data$cij_ppa == 1), - cij_marker, - NA_integer_) + cij_marker, + NA_integer_ + ) # preventable_beddays is now added in aggragate_by_chi ) } From c631f4e6d1de0cff9baca0d002fce0657483c78f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Jun 2023 11:54:26 +0100 Subject: [PATCH 151/200] include parameter for write to disk/year --- NAMESPACE | 1 + R/create_individual_file.R | 25 +++++++++++++++++++++++-- man/create_individual_file.Rd | 10 +++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c3d083704..68529122d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_individual_file) export(create_service_use_cohorts) export(end_fy) export(end_fy_quarter) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 99a1a6e77..50e881ace 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -3,8 +3,15 @@ #' @description Creates individual file from episode file #' #' @param episode_file Tibble containing episodic data -create_individual_file <- function(episode_file) { - episode_file %>% +#' @param year The year to process, in FY format. +#' @param write_to_disk (optional) Should the data be written to disk default is +#' `TRUE` i.e. write the data to disk. +#' +#' @return The processed individual file +#' @export +#' +create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { + individual_file <- episode_file %>% remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% @@ -13,6 +20,20 @@ create_individual_file <- function(episode_file) { recode_gender() %>% aggregate_by_chi_zihao() %>% clean_individual_file() + + if (write_to_disk) { + slf_path <- get_file_path( + get_year_dir(year), + stringr::str_glue( + "source-individual-file-{year}.parquet" + ), + check_mode = "write" + ) + + write_file(episode_file, slf_path) + } + + return(individual_file) } #' Remove blank CHI diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index 8b0887565..d1feb23df 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -4,10 +4,18 @@ \alias{create_individual_file} \title{Create individual file} \usage{ -create_individual_file(episode_file) +create_individual_file(episode_file, year, write_to_disk = TRUE) } \arguments{ \item{episode_file}{Tibble containing episodic data} + +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} +} +\value{ +The processed individual file } \description{ Creates individual file from episode file From b7316768a72b5cd7a91cb3e5b1881d40a84d93f4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Jun 2023 13:45:26 +0100 Subject: [PATCH 152/200] Add lookups to indiv file creation pipeline --- R/create_individual_file.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 50e881ace..075ae5de5 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -19,7 +19,11 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { clean_up_ch() %>% recode_gender() %>% aggregate_by_chi_zihao() %>% - clean_individual_file() + clean_individual_file() %>% + join_cohort_lookups(year) %>% + match_on_ltcs(year) %>% + join_deaths_data(year) %>% + join_sparra_hhg(year) if (write_to_disk) { slf_path <- get_file_path( From 5507a332cc0868c81eba806be490ad99fdf1106e Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Jun 2023 11:54:26 +0100 Subject: [PATCH 153/200] include parameter for write to disk/year --- NAMESPACE | 1 + R/create_individual_file.R | 25 +++++++++++++++++++++++-- man/create_individual_file.Rd | 10 +++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index aefb84b62..d81c66bfe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_individual_file) export(create_service_use_cohorts) export(end_fy) export(end_fy_quarter) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 99a1a6e77..50e881ace 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -3,8 +3,15 @@ #' @description Creates individual file from episode file #' #' @param episode_file Tibble containing episodic data -create_individual_file <- function(episode_file) { - episode_file %>% +#' @param year The year to process, in FY format. +#' @param write_to_disk (optional) Should the data be written to disk default is +#' `TRUE` i.e. write the data to disk. +#' +#' @return The processed individual file +#' @export +#' +create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { + individual_file <- episode_file %>% remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% @@ -13,6 +20,20 @@ create_individual_file <- function(episode_file) { recode_gender() %>% aggregate_by_chi_zihao() %>% clean_individual_file() + + if (write_to_disk) { + slf_path <- get_file_path( + get_year_dir(year), + stringr::str_glue( + "source-individual-file-{year}.parquet" + ), + check_mode = "write" + ) + + write_file(episode_file, slf_path) + } + + return(individual_file) } #' Remove blank CHI diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index 8b0887565..d1feb23df 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -4,10 +4,18 @@ \alias{create_individual_file} \title{Create individual file} \usage{ -create_individual_file(episode_file) +create_individual_file(episode_file, year, write_to_disk = TRUE) } \arguments{ \item{episode_file}{Tibble containing episodic data} + +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} +} +\value{ +The processed individual file } \description{ Creates individual file from episode file From e8f1099c763e77788df943366e6ecef9d7c0c5b8 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 28 Jun 2023 17:42:08 +0100 Subject: [PATCH 154/200] fix delay discharge beddays and yearstay --- R/link_delayed_discharge_eps.R | 130 ++++++++++++++++----------------- 1 file changed, 63 insertions(+), 67 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 479545ed6..9f6996baa 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -16,13 +16,15 @@ link_delayed_discharge_eps <- function(data, year) { cij_start_date_lower = .data$cij_start_date - lubridate::days(1L), cij_end_date_upper = .data$cij_end_date + lubridate::days(1L), cij_end_month = last_date_month(.data$cij_end_date), - is_dummy_cij_start = is.na(.data$cij_start_date) & !is.na(.data$cij_end_date), + is_dummy_cij_start = is.na(.data$cij_start_date) & + !is.na(.data$cij_end_date), dummy_cij_start = dplyr::if_else( .data$is_dummy_cij_start, lubridate::as_date("1900-01-01"), .data$cij_start_date_lower ), - is_dummy_cij_end = !is.na(.data$cij_start_date) & is.na(.data$cij_end_date), + is_dummy_cij_end = !is.na(.data$cij_start_date) & + is.na(.data$cij_end_date), dummy_cij_end = dplyr::if_else( .data$is_dummy_cij_end, lubridate::today(), @@ -34,37 +36,31 @@ link_delayed_discharge_eps <- function(data, year) { # no flag for last reported dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% - dplyr::rename( - # TODO Change the name of the variables in the DD extract rather than here. + dplyr::rename(# TODO Change the name of the variables in the DD extract rather than here. record_keydate1 = "keydate1_dateformat", - record_keydate2 = "keydate2_dateformat" - ) %>% + record_keydate2 = "keydate2_dateformat") %>% dplyr::mutate( # remember to revoke the keydate2 and amended_dates with dummy_keydate2 is_dummy_keydate2 = is.na(.data$record_keydate2), - dummy_keydate2 = dplyr::if_else(.data$is_dummy_keydate2, + dummy_keydate2 = dplyr::if_else( + .data$is_dummy_keydate2, lubridate::today(), .data$record_keydate2 ), dummy_id = dplyr::row_number() ) - by_dd <- dplyr::join_by( - chi, - x$record_keydate1 >= y$dummy_cij_start, - x$dummy_keydate2 <= y$dummy_cij_end - ) + by_dd <- dplyr::join_by(chi, + x$record_keydate1 >= y$dummy_cij_start, + x$dummy_keydate2 <= y$dummy_cij_end) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "") - ) %>% - dplyr::arrange( - .data$cij_start_date, - .data$cij_end_date, - .data$cij_marker, - .data$postcode - ) %>% + by = by_dd, + suffix = c("_dd", "")) %>% + dplyr::arrange(.data$cij_start_date, + .data$cij_end_date, + .data$cij_marker, + .data$postcode) %>% # remove duplicate rows, but still got some duplicate mismatches dplyr::distinct( .data$chi, @@ -265,12 +261,10 @@ link_delayed_discharge_eps <- function(data, year) { .data$datediff_end, dplyr::desc(.data$datediff_start) ) %>% - dplyr::distinct( - .data$postcode, - .data$record_keydate1_dd, - .data$record_keydate2_dd, - .keep_all = TRUE - ) %>% + dplyr::distinct(.data$postcode, + .data$record_keydate1_dd, + .data$record_keydate2_dd, + .keep_all = TRUE) %>% # add cij_delay dplyr::mutate(has_delay = dplyr::if_else( .data$chi != "" & !is.na(.data$cij_marker), @@ -281,47 +275,50 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::mutate(cij_delay = max(has_delay)) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays - create_monthly_beddays() %>% - dplyr::mutate(yearstay = dplyr::rowSums( + create_monthly_beddays(year, + .data$record_keydate1, + .data$record_keydate2) %>% + dplyr::mutate(yearstay = rowSums(dplyr::select( + ., paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays") - )) + ))) %>% - # tidy up and rename columns to match the format of episode files - dplyr::select( - "year" = "year_dd", - "recid" = "recid_dd", - "record_keydate1" = "record_keydate1_dd", - "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", - "postcode" = "postcode_dd", - "lca" = "dd_responsible_lca", - "hbtreatcode" = "hbtreatcode_dd", - "original_admission_date", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", - "spec" = "spec_dd", - "dd_type", - paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays"), - "yearstay" - ) %>% + # tidy up and rename columns to match the format of episode files + dplyr::select( + "year" = "year_dd", + "recid" = "recid_dd", + "record_keydate1" = "record_keydate1_dd", + "record_keydate2" = "record_keydate2_dd", + "smrtype", + "chi", + "gender", + "dob", + "age", + "gpprac", + "postcode" = "postcode_dd", + "lca" = "dd_responsible_lca", + "hbtreatcode" = "hbtreatcode_dd", + "original_admission_date", + "delay_end_reason", + "primary_delay_reason", + "secondary_delay_reason", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype_code", + "cij_ipdc", + "cij_admtype", + "cij_adm_spec", + "cij_dis_spec", + "cij_delay", + "location", + "spec" = "spec_dd", + "dd_type", + all_of(paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays")), + "yearstay" + ) %>% # combine DD with episode data - dplyr::bind_rows( # restore cij_end_date + dplyr::bind_rows(# restore cij_end_date data %>% dplyr::select( -c( @@ -333,8 +330,7 @@ link_delayed_discharge_eps <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - ) - ) + )) return(data) } From ff36479cc7a80488bd4f32f0baea34648fb9165a Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 28 Jun 2023 16:44:42 +0000 Subject: [PATCH 155/200] Style code --- R/link_delayed_discharge_eps.R | 49 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 9f6996baa..884fb3043 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -36,9 +36,10 @@ link_delayed_discharge_eps <- function(data, year) { # no flag for last reported dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% - dplyr::rename(# TODO Change the name of the variables in the DD extract rather than here. + dplyr::rename( # TODO Change the name of the variables in the DD extract rather than here. record_keydate1 = "keydate1_dateformat", - record_keydate2 = "keydate2_dateformat") %>% + record_keydate2 = "keydate2_dateformat" + ) %>% dplyr::mutate( # remember to revoke the keydate2 and amended_dates with dummy_keydate2 is_dummy_keydate2 = is.na(.data$record_keydate2), @@ -50,17 +51,22 @@ link_delayed_discharge_eps <- function(data, year) { dummy_id = dplyr::row_number() ) - by_dd <- dplyr::join_by(chi, - x$record_keydate1 >= y$dummy_cij_start, - x$dummy_keydate2 <= y$dummy_cij_end) + by_dd <- dplyr::join_by( + chi, + x$record_keydate1 >= y$dummy_cij_start, + x$dummy_keydate2 <= y$dummy_cij_end + ) data <- dd_data %>% dplyr::inner_join(data, - by = by_dd, - suffix = c("_dd", "")) %>% - dplyr::arrange(.data$cij_start_date, - .data$cij_end_date, - .data$cij_marker, - .data$postcode) %>% + by = by_dd, + suffix = c("_dd", "") + ) %>% + dplyr::arrange( + .data$cij_start_date, + .data$cij_end_date, + .data$cij_marker, + .data$postcode + ) %>% # remove duplicate rows, but still got some duplicate mismatches dplyr::distinct( .data$chi, @@ -262,9 +268,10 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::desc(.data$datediff_start) ) %>% dplyr::distinct(.data$postcode, - .data$record_keydate1_dd, - .data$record_keydate2_dd, - .keep_all = TRUE) %>% + .data$record_keydate1_dd, + .data$record_keydate2_dd, + .keep_all = TRUE + ) %>% # add cij_delay dplyr::mutate(has_delay = dplyr::if_else( .data$chi != "" & !is.na(.data$cij_marker), @@ -275,14 +282,15 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::mutate(cij_delay = max(has_delay)) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays - create_monthly_beddays(year, - .data$record_keydate1, - .data$record_keydate2) %>% + create_monthly_beddays( + year, + .data$record_keydate1, + .data$record_keydate2 + ) %>% dplyr::mutate(yearstay = rowSums(dplyr::select( ., paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays") ))) %>% - # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", @@ -318,7 +326,7 @@ link_delayed_discharge_eps <- function(data, year) { "yearstay" ) %>% # combine DD with episode data - dplyr::bind_rows(# restore cij_end_date + dplyr::bind_rows( # restore cij_end_date data %>% dplyr::select( -c( @@ -330,7 +338,8 @@ link_delayed_discharge_eps <- function(data, year) { "is_dummy_cij_end", "dummy_cij_end" ) - )) + ) + ) return(data) } From 23e851306a24d151576807aee52dca3bc69fa752 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 28 Jun 2023 17:58:07 +0100 Subject: [PATCH 156/200] fix preventable issues --- R/aggregate_by_chi_zihao.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 5c3665e2a..8e3889260 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -181,18 +181,20 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file_cols7 <- individual_file[, `:=`( preventable_beddays = - data.table::fifelse( + # ifelse is faster than dplyr::if_else here + ifelse( cij_ppa == 1, max(cij_end_date) - min(cij_start_date), - NA_integer_ + NA ) ), - by = c("chi", "cij_marker") + # cij_marker has been renamed as cij_total + by = c("chi", "cij_total") ] individual_file_cols7 <- individual_file_cols7[, `:=`( preventable_admissions = - (unique(preventable_admissions) %>% uniqueN(na.rm = TRUE)), + (unique(preventable_admissions) %>% data.table::uniqueN(na.rm = TRUE)), preventable_beddays = sum(preventable_beddays, na.rm = TRUE) ), From 3022576ff2b856b77963548de1c44a947f70404e Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 28 Jun 2023 17:01:41 +0000 Subject: [PATCH 157/200] Style code --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 8e3889260..405d5a795 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -181,7 +181,7 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file_cols7 <- individual_file[, `:=`( preventable_beddays = - # ifelse is faster than dplyr::if_else here + # ifelse is faster than dplyr::if_else here ifelse( cij_ppa == 1, max(cij_end_date) - min(cij_start_date), From 9a7b8e0bbcb440e7017d3767162bfaac742662d0 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 28 Jun 2023 18:14:44 +0100 Subject: [PATCH 158/200] fix the issue of preventable stuff --- R/aggregate_by_chi_zihao.R | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 405d5a795..387a093dc 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -179,26 +179,27 @@ aggregate_by_chi_zihao <- function(individual_file) { by = chi ] individual_file_cols7 <- individual_file[, - `:=`( - preventable_beddays = - # ifelse is faster than dplyr::if_else here - ifelse( - cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA - ) + .( + preventable_admissions = preventable_admissions, + preventable_beddays = + # ifelse is faster than dplyr::if_else here + ifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA + ) ), # cij_marker has been renamed as cij_total by = c("chi", "cij_total") ] individual_file_cols7 <- individual_file_cols7[, - `:=`( - preventable_admissions = - (unique(preventable_admissions) %>% data.table::uniqueN(na.rm = TRUE)), - preventable_beddays = - sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" + .( + preventable_admissions = + data.table::uniqueN(unique(preventable_admissions), na.rm = TRUE), + preventable_beddays = + sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" ] individual_file <- dplyr::bind_cols( From d2649439692bc4a1b78e8c6d9eadf6293d197c92 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 28 Jun 2023 17:18:21 +0000 Subject: [PATCH 159/200] Style code --- R/aggregate_by_chi_zihao.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 387a093dc..b4951bbc6 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -180,26 +180,26 @@ aggregate_by_chi_zihao <- function(individual_file) { ] individual_file_cols7 <- individual_file[, .( - preventable_admissions = preventable_admissions, - preventable_beddays = - # ifelse is faster than dplyr::if_else here - ifelse( - cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA - ) + preventable_admissions = preventable_admissions, + preventable_beddays = + # ifelse is faster than dplyr::if_else here + ifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA + ) ), # cij_marker has been renamed as cij_total by = c("chi", "cij_total") ] individual_file_cols7 <- individual_file_cols7[, - .( - preventable_admissions = - data.table::uniqueN(unique(preventable_admissions), na.rm = TRUE), - preventable_beddays = - sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" + .( + preventable_admissions = + data.table::uniqueN(unique(preventable_admissions), na.rm = TRUE), + preventable_beddays = + sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" ] individual_file <- dplyr::bind_cols( From 7433fb86c4f2beee64b0eb27ee5c91470097cab3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 28 Jun 2023 18:41:04 +0100 Subject: [PATCH 160/200] Update R/aggregate_by_chi_zihao.R --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index b4951bbc6..f855d918e 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -209,7 +209,7 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file_cols4[, chi := NULL], individual_file_cols5[, chi := NULL], individual_file_cols6[, chi := NULL], - individual_file_cols7[, chi := NULL], + individual_file_cols7[, chi := NULL] ) # convert back to tibble individual_file <- dplyr::as_tibble(individual_file) From 8f31277a0569264231be18c871b1170230e0788d Mon Sep 17 00:00:00 2001 From: Moohan Date: Mon, 3 Jul 2023 10:49:27 +0000 Subject: [PATCH 161/200] Update documentation --- man/process_slf_deaths_lookup.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index c512777fa..2ecde97ce 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -18,7 +18,7 @@ process_slf_deaths_lookup( \item{nrs_deaths_data}{NRS deaths data.} \item{chi_deaths_data}{IT CHI deaths data.} - + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} } From b3f2d11e9492956c5276dfc4f7802bb623c34040 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 11:51:30 +0100 Subject: [PATCH 162/200] Fix minor typos --- R/aggregate_by_chi_zihao.R | 2 +- R/create_individual_file.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index f855d918e..0e3389b85 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -50,7 +50,7 @@ aggregate_by_chi_zihao <- function(individual_file) { ) ) - # colums specification, grouped by chi + # column specification, grouped by chi # columns to select last cols2 <- c( vars_end_with( diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 0b1b8fd30..bc9c1cd28 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -82,7 +82,7 @@ add_cij_columns <- function(episode_file) { cij_marker, NA_integer_ ) - # preventable_beddays is now added in aggragate_by_chi + # preventable_beddays is now added in aggregate_by_chi ) } From 1bc1d6c6244f4e8236d69254ba78285e25d2d785 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 10:52:41 +0000 Subject: [PATCH 163/200] [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5443581387/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/709#issuecomment-1617917895 Signed-off-by: check-spelling-bot --- .github/actions/spelling/expect.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 75e977ea6..394a4bbe0 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -28,6 +28,7 @@ cmh CNWs commhosp congen +costincdnas costmonthnum costsfy covr @@ -45,6 +46,7 @@ dbconnect dbplyr deathdiag demog +dfc disch dischloc dischto @@ -70,6 +72,7 @@ fyyear geogs ggplot GLS +gls gms GPOo gpprac @@ -99,6 +102,7 @@ keydate keyring keytime keytimex +lgl los ltc ltcs @@ -111,6 +115,7 @@ multiday multisession multistaff NAs +newcons nhs nhshosp NRS @@ -141,6 +146,7 @@ purrr quickstart Rbuildignore rds +reabl reablement readcode readr @@ -197,6 +203,6 @@ xintercept xlsx yearstay YYYYQX +zihao zsav zstd -zstd From 31b3782e9515b433c4759c50b5d83e98c18df263 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 12:03:48 +0100 Subject: [PATCH 164/200] Remove some obsolete comments --- R/aggregate_by_chi_zihao.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 0e3389b85..cebeae9d6 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -139,8 +139,6 @@ aggregate_by_chi_zihao <- function(individual_file) { c("_cohort", "end_fy", "start_fy") ) ) - # columns to group by chi and cij_marker, mainly preventable - # cols7 <- c("preventable_admissions", "preventable_beddays") # compute individual_file_cols1 <- individual_file[, From a1371eda09400a85977b31bace0129d0d91bec71 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 12:04:26 +0100 Subject: [PATCH 165/200] Remove some unnecessary brackets --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index bc9c1cd28..406eeae7f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -78,7 +78,7 @@ add_cij_columns <- function(episode_file) { # 1, # 0 # ), - preventable_admissions = dplyr::if_else((.data$cij_ppa == 1), + preventable_admissions = dplyr::if_else(.data$cij_ppa == 1, cij_marker, NA_integer_ ) From 64081c87f93c565aa886878500091215fa5d4706 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 12:05:00 +0100 Subject: [PATCH 166/200] Reformat some code --- R/create_individual_file.R | 6 ++---- R/link_delayed_discharge_eps.R | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 406eeae7f..5fde82353 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -327,8 +327,7 @@ add_ch_columns <- function(episode_file, prefix, condition) { add_standard_cols(prefix, condition) %>% dplyr::mutate( ch_cost_per_day = dplyr::if_else( - eval(condition) & - .data$yearstay > 0, + eval(condition) & .data$yearstay > 0, .data$cost_total_net / .data$yearstay, .data$cost_total_net ), @@ -340,8 +339,7 @@ add_ch_columns <- function(episode_file, prefix, condition) { ), # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else( - eval(condition) & - is.na(.data$ch_ep_end), + eval(condition) & is.na(.data$ch_ep_end), start_next_fy_quarter(.data$sc_latest_submission), .data$ch_ep_end ) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index fcd1715a4..c3920f60c 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -36,7 +36,8 @@ link_delayed_discharge_eps <- function(data, year) { # no flag for last reported dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% - dplyr::rename( # TODO Change the name of the variables in the DD extract rather than here. + # TODO Change the name of the variables in the DD extract rather than here. + dplyr::rename( record_keydate1 = "keydate1_dateformat", record_keydate2 = "keydate2_dateformat" ) %>% From 0800662389b559304c7e6f24e99e9399aed7bbab Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 12:05:37 +0100 Subject: [PATCH 167/200] Use some `dplyr` functions for readability --- R/link_delayed_discharge_eps.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index c3920f60c..de21cc6b1 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -288,10 +288,7 @@ link_delayed_discharge_eps <- function(data, year) { .data$record_keydate1, .data$record_keydate2 ) %>% - dplyr::mutate(yearstay = rowSums(dplyr::select( - ., - paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays") - ))) %>% + dplyr::mutate(yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays")))) %>% # tidy up and rename columns to match the format of episode files dplyr::select( "year" = "year_dd", @@ -323,7 +320,7 @@ link_delayed_discharge_eps <- function(data, year) { "location", "spec" = "spec_dd", "dd_type", - all_of(paste0(month.abb[c(4:12, 1:3)] %>% tolower(), "_beddays")), +dplyr::ends_with("_beddays"), "yearstay" ) %>% # combine DD with episode data From a954611ed60f1d01d6ad2513798a97db94767bd4 Mon Sep 17 00:00:00 2001 From: Moohan Date: Mon, 3 Jul 2023 11:07:32 +0000 Subject: [PATCH 168/200] Style code --- R/link_delayed_discharge_eps.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index de21cc6b1..e52031b77 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -36,7 +36,7 @@ link_delayed_discharge_eps <- function(data, year) { # no flag for last reported dd_data <- read_file(get_source_extract_path(year_param, "DD")) %>% - # TODO Change the name of the variables in the DD extract rather than here. + # TODO Change the name of the variables in the DD extract rather than here. dplyr::rename( record_keydate1 = "keydate1_dateformat", record_keydate2 = "keydate2_dateformat" @@ -320,7 +320,7 @@ link_delayed_discharge_eps <- function(data, year) { "location", "spec" = "spec_dd", "dd_type", -dplyr::ends_with("_beddays"), + dplyr::ends_with("_beddays"), "yearstay" ) %>% # combine DD with episode data From 689dac2eaef5185bf05d72037b240080337e1f0a Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 3 Jul 2023 12:23:46 +0100 Subject: [PATCH 169/200] Update R/link_delayed_discharge_eps.R --- R/link_delayed_discharge_eps.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index e8ef322d7..3a65bda58 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -268,7 +268,8 @@ link_delayed_discharge_eps <- function(data, year) { .data$datediff_end, dplyr::desc(.data$datediff_start) ) %>% - dplyr::distinct(.data$postcode, + dplyr::distinct( + .data$postcode, .data$record_keydate1_dd, .data$record_keydate2_dd, .keep_all = TRUE From fa6120d906f4b410a0433d561e5c832ffb403183 Mon Sep 17 00:00:00 2001 From: Moohan Date: Mon, 3 Jul 2023 11:25:48 +0000 Subject: [PATCH 170/200] Style code --- R/link_delayed_discharge_eps.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 3a65bda58..574b1a6fd 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -269,7 +269,7 @@ link_delayed_discharge_eps <- function(data, year) { dplyr::desc(.data$datediff_start) ) %>% dplyr::distinct( - .data$postcode, + .data$postcode, .data$record_keydate1_dd, .data$record_keydate2_dd, .keep_all = TRUE From 16d6d22ebc54d751c738d22f2a0640085862e8fb Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 15:46:25 +0100 Subject: [PATCH 171/200] Remove some code which is no longer needed We now match on these variables after --- R/aggregate_by_chi_zihao.R | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index cebeae9d6..3bf1feeb1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -128,17 +128,6 @@ aggregate_by_chi_zihao <- function(individual_file) { # columns to select maximum cols5 <- vars_contain(individual_file, c("nsu", "hl1_in_fy")) cols5 <- cols5[!(cols5 %in% c("ooh_consultation_time"))] - # columns to select first row - cols6 <- c( - condition_cols(), - # "death_date", - # "deceased", - "year", - vars_end_with( - individual_file, - c("_cohort", "end_fy", "start_fy") - ) - ) # compute individual_file_cols1 <- individual_file[, @@ -170,13 +159,6 @@ aggregate_by_chi_zihao <- function(individual_file) { by = chi ] individual_file_cols6 <- individual_file[, - lapply(.SD, function(x) { - x[!is.na(x)][1] - }), - .SDcols = cols6, - by = chi - ] - individual_file_cols7 <- individual_file[, .( preventable_admissions = preventable_admissions, preventable_beddays = @@ -190,7 +172,7 @@ aggregate_by_chi_zihao <- function(individual_file) { # cij_marker has been renamed as cij_total by = c("chi", "cij_total") ] - individual_file_cols7 <- individual_file_cols7[, + individual_file_cols6 <- individual_file_cols6[, .( preventable_admissions = data.table::uniqueN(unique(preventable_admissions), na.rm = TRUE), @@ -206,8 +188,7 @@ aggregate_by_chi_zihao <- function(individual_file) { individual_file_cols3[, chi := NULL], individual_file_cols4[, chi := NULL], individual_file_cols5[, chi := NULL], - individual_file_cols6[, chi := NULL], - individual_file_cols7[, chi := NULL] + individual_file_cols6[, chi := NULL] ) # convert back to tibble individual_file <- dplyr::as_tibble(individual_file) From 77ddd9e27ab7687baba2daddf63bf13c0f0d391c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 15:49:03 +0100 Subject: [PATCH 172/200] Work out preventable admissions with similar indicators --- R/aggregate_by_chi_zihao.R | 20 ++++++++------------ R/create_individual_file.R | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 3bf1feeb1..cfece9f19 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -89,8 +89,9 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_total", "cij_el", "cij_non_el", - "cij_mat" - # "cij_delay" + "cij_mat", + # "cij_delay", + "preventable_admissions" ) # columns to sum up cols4 <- c( @@ -160,22 +161,17 @@ aggregate_by_chi_zihao <- function(individual_file) { ] individual_file_cols6 <- individual_file[, .( - preventable_admissions = preventable_admissions, - preventable_beddays = - # ifelse is faster than dplyr::if_else here - ifelse( - cij_ppa == 1, - max(cij_end_date) - min(cij_start_date), - NA - ) + preventable_beddays = ifelse( + cij_ppa == 1, + max(cij_end_date) - min(cij_start_date), + NA_real_ + ) ), # cij_marker has been renamed as cij_total by = c("chi", "cij_total") ] individual_file_cols6 <- individual_file_cols6[, .( - preventable_admissions = - data.table::uniqueN(unique(preventable_admissions), na.rm = TRUE), preventable_beddays = sum(preventable_beddays, na.rm = TRUE) ), diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 5fde82353..4d3555d71 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -79,7 +79,7 @@ add_cij_columns <- function(episode_file) { # 0 # ), preventable_admissions = dplyr::if_else(.data$cij_ppa == 1, - cij_marker, + .data$cij_marker, NA_integer_ ) # preventable_beddays is now added in aggregate_by_chi From 51a0767f0bc92034e69c350aefd051c5bd9ca50c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 15:56:11 +0100 Subject: [PATCH 173/200] Lowercase variable names --- R/create_individual_file.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 4d3555d71..61663269d 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -60,15 +60,15 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( - CIJ_non_el = dplyr::if_else(.data$cij_pattype_code == 0, + cij_non_el = dplyr::if_else(.data$cij_pattype_code == 0, .data$cij_marker, NA_real_ ), - CIJ_el = dplyr::if_else(.data$cij_pattype_code == 1, + cij_el = dplyr::if_else(.data$cij_pattype_code == 1, .data$cij_marker, NA_real_ ), - CIJ_mat = dplyr::if_else(.data$cij_pattype_code == 2, + cij_mat = dplyr::if_else(.data$cij_pattype_code == 2, .data$cij_marker, NA_real_ ), @@ -631,9 +631,9 @@ aggregate_by_chi <- function(episode_file) { c( "ch_cis_episodes" = "ch_chi_cis", "cij_total" = "cij_marker", - "CIJ_el", - "CIJ_non_el", - "CIJ_mat", + "cij_el", + "cij_non_el", + "cij_mat", # "cij_delay", "ooh_cases" = "ooh_case_id", "preventable_admissions" From b17f80653ccc1ecad209a88ae78c245ac93e8535 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 16:03:10 +0100 Subject: [PATCH 174/200] Restore `cij_delay` --- R/aggregate_by_chi_zihao.R | 2 +- R/create_individual_file.R | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index cfece9f19..ec7fe5b64 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -90,7 +90,7 @@ aggregate_by_chi_zihao <- function(individual_file) { "cij_el", "cij_non_el", "cij_mat", - # "cij_delay", + "cij_delay", "preventable_admissions" ) # columns to sum up diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 61663269d..2d2d5be38 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -60,29 +60,31 @@ add_cij_columns <- function(episode_file) { episode_file %>% dplyr::mutate( - cij_non_el = dplyr::if_else(.data$cij_pattype_code == 0, + cij_non_el = dplyr::if_else( + .data$cij_pattype_code == 0, .data$cij_marker, NA_real_ ), - cij_el = dplyr::if_else(.data$cij_pattype_code == 1, + cij_el = dplyr::if_else( + .data$cij_pattype_code == 1, .data$cij_marker, NA_real_ ), - cij_mat = dplyr::if_else(.data$cij_pattype_code == 2, + cij_mat = dplyr::if_else( + .data$cij_pattype_code == 2, .data$cij_marker, NA_real_ ), - # # assume cij_delay is logic variable - # cij_delay = dplyr::if_else( - # (.data$cij_delay & .data$cij_marker == 1), - # 1, - # 0 - # ), - preventable_admissions = dplyr::if_else(.data$cij_ppa == 1, + cij_delay = dplyr::if_else( + .data$recid == "DD", + .data$cij_marker, + NA_real_ + ), + preventable_admissions = dplyr::if_else( + .data$cij_ppa == 1, .data$cij_marker, NA_integer_ ) - # preventable_beddays is now added in aggregate_by_chi ) } From 12ec4f63cdf65a0cb35bdec9fd0b4767a885b4a1 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 16:08:01 +0100 Subject: [PATCH 175/200] Restore DN variables --- R/aggregate_by_chi_zihao.R | 2 +- R/create_individual_file.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index ec7fe5b64..e84f7aac6 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -103,7 +103,7 @@ aggregate_by_chi_zihao <- function(individual_file) { "cost", "attendances", "attend", - # "contacts", + "contacts", "hours", "alarms", "telecare", diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 2d2d5be38..be31c2ed6 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -106,7 +106,7 @@ add_all_columns <- function(episode_file) { add_ae_columns("AE", .data$recid == "AE2") %>% add_pis_columns("PIS", .data$recid == "PIS") %>% add_ooh_columns("OoH", .data$recid == "OoH") %>% - # add_dn_columns("DN", .data$recid == "DN") %>% + add_dn_columns("DN", .data$recid == "DN") %>% add_cmh_columns("CMH", .data$recid == "CMH") %>% add_dd_columns("DD", .data$recid == "DD") %>% add_nsu_columns("NSU", .data$recid == "NSU") %>% From 33681d3bf5501048e5336dd35419f32b02525f30 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 16:28:24 +0100 Subject: [PATCH 176/200] Tidy the code and use integers where possible --- R/aggregate_by_chi_zihao.R | 21 +++---- R/create_individual_file.R | 116 ++++++++++++++++++++----------------- 2 files changed, 70 insertions(+), 67 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index e84f7aac6..5b2ccc1ff 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -11,7 +11,7 @@ aggregate_by_chi_zihao <- function(individual_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") individual_file <- individual_file %>% - dplyr::select(-c(postcode, gpprac)) %>% + dplyr::select(-c("postcode", "gpprac")) %>% dplyr::rename( "gpprac" = "most_recent_gpprac", "postcode" = "most_recent_postcode" @@ -133,31 +133,31 @@ aggregate_by_chi_zihao <- function(individual_file) { # compute individual_file_cols1 <- individual_file[, .(gender = mean(gender)), - by = chi + by = "chi" ] individual_file_cols2 <- individual_file[, .SD[.N], .SDcols = cols2, - by = chi + by = "chi" ] individual_file_cols3 <- individual_file[, lapply(.SD, function(x) { data.table::uniqueN(x, na.rm = TRUE) }), .SDcols = cols3, - by = chi + by = "chi" ] individual_file_cols4 <- individual_file[, lapply(.SD, function(x) { sum(x, na.rm = TRUE) }), .SDcols = cols4, - by = chi + by = "chi" ] individual_file_cols5 <- individual_file[, lapply(.SD, function(x) max(x, na.rm = TRUE)), .SDcols = cols5, - by = chi + by = "chi" ] individual_file_cols6 <- individual_file[, .( @@ -172,8 +172,7 @@ aggregate_by_chi_zihao <- function(individual_file) { ] individual_file_cols6 <- individual_file_cols6[, .( - preventable_beddays = - sum(preventable_beddays, na.rm = TRUE) + preventable_beddays = sum(preventable_beddays, na.rm = TRUE) ), by = "chi" ] @@ -195,7 +194,6 @@ aggregate_by_chi_zihao <- function(individual_file) { #' select columns ending with some patterns #' @describeIn select columns based on patterns -#' vars_end_with <- function(data, vars, ignore_case = FALSE) { names(data)[stringr::str_ends( names(data), @@ -207,7 +205,6 @@ vars_end_with <- function(data, vars, ignore_case = FALSE) { #' select columns starting with some patterns #' @describeIn select columns based on patterns -#' vars_start_with <- function(data, vars, ignore_case = FALSE) { names(data)[stringr::str_starts( names(data), @@ -219,7 +216,6 @@ vars_start_with <- function(data, vars, ignore_case = FALSE) { #' select columns contains some characters #' @describeIn select columns based on patterns -#' vars_contain <- function(data, vars, ignore_case = FALSE) { names(data)[stringr::str_detect( names(data), @@ -233,7 +229,6 @@ vars_contain <- function(data, vars, ignore_case = FALSE) { #' #' @description Aggregate CH variables by CHI and CIS. #' -#' #' @inheritParams create_individual_file aggregate_ch_episodes_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") @@ -247,7 +242,7 @@ aggregate_ch_episodes_zihao <- function(episode_file) { ch_ep_start = min(record_keydate1), ch_ep_end = max(ch_ep_end), ch_cost_per_day = mean(ch_cost_per_day) - ), by = .(chi, ch_chi_cis)] + ), by = .("chi", "ch_chi_cis")] # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index be31c2ed6..54d1dbe93 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -9,7 +9,6 @@ #' #' @return The processed individual file #' @export -#' create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { individual_file <- episode_file %>% remove_blank_chi() %>% @@ -118,24 +117,23 @@ add_all_columns <- function(episode_file) { add_sds_columns("SDS", .data$recid == "SDS") %>% dplyr::mutate( health_net_cost = rowSums( - dplyr::select( - ., - c( - Acute_cost, - Mat_cost, - MH_cost, - GLS_cost, - OP_cost_attend, - AE_cost, - PIS_cost, - OoH_cost - ) + dplyr::pick( + .data$Acute_cost, + .data$Mat_cost, + .data$MH_cost, + .data$GLS_cost, + .data$OP_cost_attend, + .data$AE_cost, + .data$PIS_cost, + .data$OoH_cost ), na.rm = TRUE ), - health_net_costincdnas = - health_net_cost + - dplyr::if_else(is.na(OP_cost_dnas), 0, OP_cost_dnas) + health_net_costincdnas = .data$health_net_cost + dplyr::if_else( + is.na(.data$OP_cost_dnas), + 0, + .data$OP_cost_dnas + ) ) } @@ -191,13 +189,13 @@ add_op_columns <- function(episode_file, prefix, condition) { condition_1 <- substitute(condition & attendance_status == 1) episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1, NA_real_), + "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1L, NA_integer_), "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), .data$cost_total_net, NA_real_) ) condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8)) episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1, NA_real_), + "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_), "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_inc_dnas, NA_real_) ) return(episode_file) @@ -210,7 +208,7 @@ add_ae_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } #' Add PIS columns @@ -220,7 +218,7 @@ add_pis_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% - dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_real_)) + dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_integer_)) } #' Add OoH columns @@ -231,21 +229,27 @@ add_ooh_columns <- function(episode_file, prefix, condition) { episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% dplyr::mutate( - "{prefix}_homeV" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-HomeV", 1, NA_real_), - "{prefix}_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Advice", 1, NA_real_), - "{prefix}_DN" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-DN", 1, NA_real_), - "{prefix}_NHS24" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-NHS24", 1, NA_real_), - "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1, NA_real_), - "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1, NA_real_), - ooh_covid_advice = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1, NA_real_), - ooh_covid_assessment = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1, NA_real_), - ooh_covid_other = dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1, NA_real_) + "{prefix}_homeV" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-HomeV", 1L, NA_integer_), + "{prefix}_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Advice", 1L, NA_integer_), + "{prefix}_DN" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-DN", 1L, NA_integer_), + "{prefix}_NHS24" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-NHS24", 1L, NA_integer_), + "{prefix}_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-Other", 1L, NA_integer_), + "{prefix}_PCC" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-PCC", 1L, NA_integer_), + "{prefix}_covid_advice" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Adv", 1L, NA_integer_), + "{prefix}_covid_assessment" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C19Ass", 1L, NA_integer_), + "{prefix}_covid_other" := dplyr::if_else(eval(condition) & .data$smrtype == "OOH-C190th", 1L, NA_integer_) ) episode_file <- episode_file %>% dplyr::mutate( - OoH_consultation_time = dplyr::if_else(eval(condition), as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins"), NA_real_), - OoH_consultation_time = dplyr::if_else(OoH_consultation_time < 0, 0, .data$OoH_consultation_time), + "{prefix}_consultation_time" := dplyr::if_else( + eval(condition), + pmax( + 0, + as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins") + ), + NA_real_ + ), ) return(episode_file) @@ -258,7 +262,7 @@ add_dn_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% - dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), .data$total_no_dn_contacts, NA_real_)) + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), .data$total_no_dn_contacts, NA_integer_)) } #' Add CMH columns @@ -268,7 +272,7 @@ add_cmh_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% - dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } #' Add DD columns @@ -279,13 +283,13 @@ add_dd_columns <- function(episode_file, prefix, condition) { condition_delay <- substitute(condition & primary_delay_reason != "9") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_NonCode9_episodes" := dplyr::if_else(eval(condition_delay), 1, NA_real_), + "{prefix}_NonCode9_episodes" := dplyr::if_else(eval(condition_delay), 1L, NA_integer_), "{prefix}_NonCode9_beddays" := dplyr::if_else(eval(condition_delay), .data$yearstay, NA_real_) ) condition_delay_9 <- substitute(condition & primary_delay_reason == "9") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1, NA_real_), + "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1L, NA_integer_), "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), .data$yearstay, NA_real_) ) return(episode_file) @@ -298,7 +302,7 @@ add_nsu_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% - dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } #' Add NRS columns @@ -308,7 +312,7 @@ add_nrs_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% - dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1, NA_real_)) + dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } #' Add HL1 columns @@ -362,21 +366,21 @@ add_hc_columns <- function(episode_file, prefix, condition) { condition_per <- substitute(condition & smrtype == "HC-Per") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_personal_episodes" := dplyr::if_else(eval(condition_per), 1, NA_real_), + "{prefix}_personal_episodes" := dplyr::if_else(eval(condition_per), 1L, NA_integer_), "{prefix}_personal_hours" := dplyr::if_else(eval(condition_per), .data$HC_total_hours, NA_real_), "{prefix}_personal_hours_cost" := dplyr::if_else(eval(condition_per), .data$cost_total_net, NA_real_) ) condition_non_per <- substitute(condition & smrtype == "HC-Non-Per") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_non_personal_episodes" := dplyr::if_else(eval(condition_non_per), 1, NA_real_), + "{prefix}_non_personal_episodes" := dplyr::if_else(eval(condition_non_per), 1L, NA_integer_), "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), .data$hc_hours_annual, NA_real_), "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), .data$cost_total_net, NA_real_) ) condition_reabl <- substitute(condition & hc_reablement == 1) episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1, NA_real_), + "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1L, NA_integer_), "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), .data$hc_hours_annual, NA_real_), "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), .data$cost_total_net, NA_real_) ) @@ -390,8 +394,8 @@ add_at_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1, NA_real_), - "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1, NA_real_) + "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1L, NA_integer_), + "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1L, NA_integer_) ) } @@ -403,10 +407,10 @@ add_sds_columns <- function(episode_file, prefix, condition) { episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( - "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1, NA_real_), - "{prefix}_option_2" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-2", 1, NA_real_), - "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1, NA_real_), - "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1, NA_real_) + "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1L, NA_integer_), + "{prefix}_option_2" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-2", 1L, NA_integer_), + "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1L, NA_integer_), + "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1L, NA_integer_) ) } @@ -423,21 +427,21 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi episode_file <- episode_file %>% dplyr::mutate( "{prefix}_inpatient_cost" := dplyr::if_else(eval(condition_i), .data$cost_total_net, NA_real_), - "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1, NA_real_), + "{prefix}_inpatient_episodes" := dplyr::if_else(eval(condition_i), 1L, NA_integer_), "{prefix}_inpatient_beddays" := dplyr::if_else(eval(condition_i), .data$yearstay, NA_real_) ) if (elective) { condition_el <- substitute(condition_i & cij_pattype == "Elective") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_el_inpatient_episodes" := dplyr::if_else(eval(condition_el), 1, NA_real_), + "{prefix}_el_inpatient_episodes" := dplyr::if_else(eval(condition_el), 1L, NA_integer_), "{prefix}_el_inpatient_beddays" := dplyr::if_else(eval(condition_el), .data$yearstay, NA_real_), "{prefix}_el_inpatient_cost" := dplyr::if_else(eval(condition_el), .data$cost_total_net, NA_real_) ) condition_non_el <- substitute(condition_i & cij_pattype == "Non-Elective") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_non_el_inpatient_episodes" := dplyr::if_else(eval(condition_non_el), 1, NA_real_), + "{prefix}_non_el_inpatient_episodes" := dplyr::if_else(eval(condition_non_el), 1L, NA_integer_), "{prefix}_non_el_inpatient_beddays" := dplyr::if_else(eval(condition_non_el), .data$yearstay, NA_real_), "{prefix}_non_el_inpatient_cost" := dplyr::if_else(eval(condition_non_el), .data$cost_total_net, NA_real_) ) @@ -446,7 +450,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi condition_d <- substitute(eval(condition) & ipdc == "D") episode_file <- episode_file %>% dplyr::mutate( - "{prefix}_daycase_episodes" := dplyr::if_else(eval(condition_d), 1, NA_real_), + "{prefix}_daycase_episodes" := dplyr::if_else(eval(condition_d), 1L, NA_integer_), "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), .data$cost_total_net, NA_real_) ) } @@ -464,7 +468,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi add_standard_cols <- function(episode_file, prefix, condition, drop = NULL, episode = FALSE, cost = FALSE) { episode_file <- dplyr::bind_cols(episode_file, create_cols(episode_file, prefix, condition, drop)) if (episode) { - episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1, NA_real_)) + episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } if (cost) { episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) @@ -495,7 +499,11 @@ create_cols <- function(episode_file, prefix, condition, drop) { #' @inheritParams na_type create_col <- function(episode_file, col, prefix, condition) { episode_file %>% - dplyr::mutate("{prefix}_{col}" := dplyr::if_else(eval(condition), .data[[tolower(col)]], na_type(col))) %>% + dplyr::mutate("{prefix}_{col}" := dplyr::if_else( + eval(condition), + .data[[tolower(col)]], + na_type(col) + )) %>% dplyr::select(dplyr::last_col()) } @@ -598,9 +606,9 @@ recode_gender <- function(episode_file) { episode_file %>% dplyr::mutate( gender = dplyr::if_else( - gender == 0 | gender == 9, + .data$gender %in% c(0, 9), 1.5, - gender + .data$gender ) ) } From f9e6f81465a17e84c6162b7949a029ab0c83dacf Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 4 Jul 2023 18:03:36 +0100 Subject: [PATCH 177/200] Supply `year` as a parameter to `clean_up_ch` --- R/create_individual_file.R | 28 +++++++++++++--------------- man/clean_up_ch.Rd | 4 +++- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 54d1dbe93..56afed567 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -15,7 +15,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { add_cij_columns() %>% add_all_columns() %>% aggregate_ch_episodes_zihao() %>% - clean_up_ch() %>% + clean_up_ch(year) %>% recode_gender() %>% aggregate_by_chi_zihao() %>% clean_individual_file() @@ -556,7 +556,7 @@ aggregate_ch_episodes <- function(episode_file) { #' @description Clean up CH-related columns. #' #' @inheritParams create_individual_file -clean_up_ch <- function(episode_file) { +clean_up_ch <- function(episode_file, year) { cli::cli_alert_info("Clean up CH function started at {Sys.time()}") episode_file %>% @@ -565,34 +565,32 @@ clean_up_ch <- function(episode_file) { fy_start = start_fy(year) ) %>% dplyr::mutate( - term_1 = pmin(ch_ep_end, fy_end + 1), - term_2 = pmax(ch_ep_start, fy_start) + term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1), + term_2 = pmax(.data$ch_ep_start, .data$fy_start) ) %>% dplyr::mutate( ch_beddays = dplyr::if_else( - recid == "CH", - as.numeric(term_1 - term_2), + .data$recid == "CH", + as.numeric(.data$term_1 - .data$term_2), NA_real_ ), ch_cost = dplyr::if_else( - recid == "CH" & ch_no_cost == 0, - ch_beddays * ch_cost_per_day, + .data$recid == "CH" & .data$ch_no_cost == 0, + .data$ch_beddays * .data$ch_cost_per_day, NA_real_ ), ch_beddays = dplyr::if_else( - recid == "CH" & ch_chi_cis == 0, + .data$recid == "CH" & .data$ch_chi_cis == 0, 0, - ch_beddays + .data$ch_beddays ), ch_cost = dplyr::if_else( - recid == "CH" & ch_chi_cis == 0, + .data$recid == "CH" & .data$ch_chi_cis == 0, 0, - ch_cost + .data$ch_cost ) ) %>% - dplyr::select( - -fy_end, -fy_start, -term_1, -term_2 - ) + dplyr::select(-c("fy_end", "fy_start", "term_1", "term_2")) } #' Recode gender diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd index 64bb3e330..0182c84e8 100644 --- a/man/clean_up_ch.Rd +++ b/man/clean_up_ch.Rd @@ -4,10 +4,12 @@ \alias{clean_up_ch} \title{Clean up CH} \usage{ -clean_up_ch(episode_file) +clean_up_ch(episode_file, year) } \arguments{ \item{episode_file}{Tibble containing episodic data} + +\item{year}{The year to process, in FY format.} } \description{ Clean up CH-related columns. From cb73e0ff3098c1dc41c1dee12b65910e900c5ab8 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 10:17:04 +0100 Subject: [PATCH 178/200] Supply `year` as a parameter to `clean_individual_file` --- R/create_individual_file.R | 4 ++-- man/clean_individual_file.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 56afed567..e20221c2e 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -18,7 +18,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { clean_up_ch(year) %>% recode_gender() %>% aggregate_by_chi_zihao() %>% - clean_individual_file() + clean_individual_file(year) if (write_to_disk) { slf_path <- get_file_path( @@ -742,7 +742,7 @@ min_no_inf <- function(x) { #' @description Clean up columns in individual file #' #' @param individual_file Individual file where each row represents a unique CHI -clean_individual_file <- function(individual_file) { +clean_individual_file <- function(individual_file, year) { cli::cli_alert_info("Clean individual file function started at {Sys.time()}") individual_file %>% diff --git a/man/clean_individual_file.Rd b/man/clean_individual_file.Rd index 30d5479c6..c56e4265f 100644 --- a/man/clean_individual_file.Rd +++ b/man/clean_individual_file.Rd @@ -4,7 +4,7 @@ \alias{clean_individual_file} \title{Clean individual file} \usage{ -clean_individual_file(individual_file) +clean_individual_file(individual_file, year) } \arguments{ \item{individual_file}{Individual file where each row represents a unique CHI} From 42cc15e9a2614cc5e864c374cd627df85555c13f Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 10:49:08 +0100 Subject: [PATCH 179/200] Only keep required variables to save memory --- R/create_individual_file.R | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e20221c2e..f6c92b99c 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -11,6 +11,39 @@ #' @export create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { individual_file <- episode_file %>% + dplyr::select( + "year", + "chi", + "dob", + "gender", + "record_keydate1", + "record_keydate2", + "keytime1", + "keytime2", + "recid", + "smrtype", + "ipdc", + "postcode", + "gpprac", + "cij_marker", + "cij_start_date", + "cij_end_date", + "cij_pattype", + "cij_pattype_code", + "cij_ppa", + "ch_chi_cis", + "yearstay", + "cost_total_net", + "cost_total_net_inc_dnas", + "attendance_status", + "no_paid_items", + "total_no_dn_contacts", + "primary_delay_reason", + "sc_latest_submission", + "hc_hours_annual", + "hc_reablement", + "ooh_case_id" + ) %>% remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% From 35a6ef2155cf8270bc9bbdb9e47d7e3a9f4c7e9a Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 10:51:25 +0100 Subject: [PATCH 180/200] Rename the parameter so the documentation works --- R/aggregate_by_chi_zihao.R | 4 ++-- man/aggregate_by_chi_zihao.Rd | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 5b2ccc1ff..edd9dc980 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -7,10 +7,10 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi_zihao <- function(individual_file) { +aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - individual_file <- individual_file %>% + individual_file <- episode_file %>% dplyr::select(-c("postcode", "gpprac")) %>% dplyr::rename( "gpprac" = "most_recent_gpprac", diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd index a754fde4d..3d4961e19 100644 --- a/man/aggregate_by_chi_zihao.Rd +++ b/man/aggregate_by_chi_zihao.Rd @@ -4,7 +4,10 @@ \alias{aggregate_by_chi_zihao} \title{Aggregate by CHI} \usage{ -aggregate_by_chi_zihao(individual_file) +aggregate_by_chi_zihao(episode_file) +} +\arguments{ +\item{episode_file}{Tibble containing episodic data} } \description{ Aggregate episode file by CHI to convert into From 978d9e83a637d0c502d82787dff2644287d9e475 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 14:44:47 +0100 Subject: [PATCH 181/200] Use `setnames` to change names to lower --- R/aggregate_by_chi_zihao.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index edd9dc980..b8ee86ff3 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -21,10 +21,11 @@ aggregate_by_chi_zihao <- function(episode_file) { dplyr::ends_with("_postcode"), dplyr::ends_with("_DoB") )) + # Convert to data.table + data.table::setDT(episode_file) - names(individual_file) <- tolower(names(individual_file)) - - data.table::setDT(individual_file) # Convert to data.table + # Ensure all variable names are lowercase + data.table::setnames(episode_file, stringr::str_to_lower) # Sort the data within each chunk data.table::setkeyv( @@ -39,7 +40,7 @@ aggregate_by_chi_zihao <- function(episode_file) { ) data.table::setnames( - individual_file, + episode_file, c( "ch_chi_cis", "cij_marker", "ooh_case_id" # ,"hh_in_fy" From 9be6385e32e2d8445e8b1d9c864397ae5034191a Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 15:08:59 +0100 Subject: [PATCH 182/200] Remove unneeded code --- R/aggregate_by_chi_zihao.R | 43 +--------- R/create_individual_file.R | 162 ++----------------------------------- 2 files changed, 11 insertions(+), 194 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index b8ee86ff3..390f3a119 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -10,17 +10,6 @@ aggregate_by_chi_zihao <- function(episode_file) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - individual_file <- episode_file %>% - dplyr::select(-c("postcode", "gpprac")) %>% - dplyr::rename( - "gpprac" = "most_recent_gpprac", - "postcode" = "most_recent_postcode" - ) %>% - dplyr::select(-c( - dplyr::ends_with("_gpprac"), - dplyr::ends_with("_postcode"), - dplyr::ends_with("_DoB") - )) # Convert to data.table data.table::setDT(episode_file) @@ -54,35 +43,11 @@ aggregate_by_chi_zihao <- function(episode_file) { # column specification, grouped by chi # columns to select last cols2 <- c( - vars_end_with( - individual_file, - c("postcode", "dob", "ggprac") - ), - "ooh_cases", + "postcode", + "dob", "gpprac", - "hbrescode", - "hscp", - "lca", - "ca2018", - "locality", - "datazone2011", - "hbpraccode", - "cluster", - "simd2020v2_rank", - "simd2020v2_sc_decile", - "simd2020v2_sc_quintile", - "simd2020v2_hb2019_decile", - "simd2020v2_hb2019_quintile", - "simd2020v2_hscp2019_decile", - "simd2020v2_hscp2019_quintile", - "ur8_2020", - "ur6_2020", - "ur3_2020", - "ur2_2020", - "hb2019", - "hscp2019", - "ca2019", - vars_start_with(individual_file, "sc_") + "ooh_cases", + vars_start_with(episode_file, "sc_") ) # columns to count unique rows cols3 <- c( diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f6c92b99c..bcf17269c 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -354,7 +354,7 @@ add_nrs_columns <- function(episode_file, prefix, condition) { add_hl1_columns <- function(episode_file, prefix, condition) { condition <- substitute(condition) episode_file %>% - add_standard_cols(prefix, condition, drop = "gpprac") + add_standard_cols(prefix, condition) } #' Add CH columns @@ -498,8 +498,7 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi #' @param drop Any columns out of "DoB", "postcode", and "gpprac" that should be dropped #' @param episode Whether to create prefix_episodes col, e.g. "Acute_episodes" #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" -add_standard_cols <- function(episode_file, prefix, condition, drop = NULL, episode = FALSE, cost = FALSE) { - episode_file <- dplyr::bind_cols(episode_file, create_cols(episode_file, prefix, condition, drop)) +add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { if (episode) { episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } @@ -509,52 +508,6 @@ add_standard_cols <- function(episode_file, prefix, condition, drop = NULL, epis return(episode_file) } -#' Create standard cols -#' -#' @description Create standard cols (DoB, postcode, gpprac). -#' -#' @inheritParams add_acute_columns -#' @param drop Any columns out of "DoB", "postcode", and "gpprac" that should be dropped -create_cols <- function(episode_file, prefix, condition, drop) { - cols <- c("DoB", "postcode", "gpprac") - if (!is.null(drop)) { - cols <- cols[cols != drop] - } - episode_file <- purrr::map_dfc(cols, ~ create_col(episode_file, .x, prefix, condition)) - return(episode_file) -} - -#' Create standard col -#' -#' @description Create single standard column. -#' -#' @inheritParams add_acute_columns -#' @inheritParams na_type -create_col <- function(episode_file, col, prefix, condition) { - episode_file %>% - dplyr::mutate("{prefix}_{col}" := dplyr::if_else( - eval(condition), - .data[[tolower(col)]], - na_type(col) - )) %>% - dplyr::select(dplyr::last_col()) -} - -#' NA type -#' -#' @description Helper function to use correct NA type depending on -#' which type of column is created. -#' -#' @param col Which column to create ("DoB", "postcode", or "gpprac") -na_type <- function(col = c("DoB", "postcode", "gpprac")) { - match.arg(col) - na_type <- switch(col, - "DoB" = lubridate::NA_Date_, - "postcode" = NA_character_, - "gpprac" = NA_real_ - ) - return(na_type) -} #' Aggregate CIS episodes #' @@ -778,41 +731,14 @@ min_no_inf <- function(x) { clean_individual_file <- function(individual_file, year) { cli::cli_alert_info("Clean individual file function started at {Sys.time()}") - individual_file %>% - drop_cols() %>% - clean_up_gender() %>% - dplyr::mutate( - age = compute_mid_year_age(year, .data$dob) - ) -} - -#' Drop redundant columns -#' -#' @description Drop redundant columns from individual file. -#' -#' @inheritParams clean_individual_file -drop_cols <- function(individual_file) { individual_file %>% dplyr::select( - -month_cols(), -"ch_no_cost", - # -"dob", - # -"postcode", - # -"gpprac", - -"no_paid_items" # , - #-"total_no_dn_contacts" - ) -} - -#' Month columns -#' -#' @description Return chr of column names following pattern -#' "month_beddays" and "month_cost" e.g. apr_beddays" and "apr_cost" -month_cols <- function() { - suffix <- c("_beddays", "_cost") - months <- tolower(c(rep(month.abb, each = 2))) - month_cols <- paste0(months, suffix) - return(month_cols) + -"no_paid_items", + -"total_no_dn_contacts" + ) %>% + clean_up_gender() %>% + dplyr::mutate(age = compute_mid_year_age(year, .data$dob)) } #' Clean up gender column @@ -829,77 +755,3 @@ clean_up_gender <- function(individual_file) { ) ) } - - -#' Fill missing date of births -#' -#' @description Fill missing date of births with -#' date of births from specific episode columns in hierarchy. -#' -#' @inheritParams clean_individual_file -fill_dob <- function(individual_file) { - column_prefix <- c( - "PIS", "AE", "OoH", "OP", "Acute", "Mat", "DN", "CMH", "MH", - "GLS", "HL1", "CH", "HC", "AT", "SDS", "NSU", "NRS" - ) - columns <- paste0(column_prefix, "_DoB") - for (i in length(columns)) { - individual_file <- replace_dob_with_col(individual_file, columns[i]) - } - return(individual_file) -} - -#' Fill missing date of births -#' -#' @description Fill missing date of births with -#' date of births from an episode date of birth column. -#' -#' @inheritParams clean_individual_file -#' @param col Column containing date of birth for episode -replace_dob_with_col <- function(individual_file, col) { - individual_file %>% - dplyr::mutate( - DoB = dplyr::if_else( - is.na(.data$DoB) & !is.na(.data[[col]]), - .data[[col]], - .data$DoB - ) - ) -} - - -#' Fill missing postcodes -#' -#' @description Fill missing postcodes with -#' postcodes from specific episode columns in hierarchy. -#' -#' @inheritParams clean_individual_file -fill_dob <- function(individual_file) { - column_prefix <- c( - "PIS", "AE", "OoH", "OP", "Acute", "Mat", "HC", "DN", "CMH", "MH", - "GLS", "AT", "SDS", "CH", "NSU", "NRS", "HL1" - ) - columns <- paste0(column_prefix, "_postcode") - for (i in length(columns)) { - individual_file <- replace_postcode_with_col(individual_file, columns[i]) - } - return(individual_file) -} - -#' Fill missing postcode -#' -#' @description Fill missing postcode with -#' postcodes from an episode postcode column. -#' -#' @inheritParams clean_individual_file -#' @param col Column containing postcode for episode -replace_postcode_with_col <- function(individual_file, col) { - individual_file %>% - dplyr::mutate( - postcode = dplyr::if_else( - is.na(.data$postcode) & !is.na(.data[[col]]), - .data[[col]], - .data$postcode - ) - ) -} From 1ca40001731ef8542c09955e2133fc4e70a164f6 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 15:14:51 +0100 Subject: [PATCH 183/200] Update file path name --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index bcf17269c..051b10461 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -54,7 +54,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { clean_individual_file(year) if (write_to_disk) { - slf_path <- get_file_path( + slf_indiv_path <- get_file_path( get_year_dir(year), stringr::str_glue( "source-individual-file-{year}.parquet" @@ -62,7 +62,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { check_mode = "write" ) - write_file(episode_file, slf_path) + write_file(individual_file, slf_indiv_path) } return(individual_file) From 3ebfecca09f955b8594352483eac8a5500110b60 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 15:16:06 +0100 Subject: [PATCH 184/200] Trim the return code --- R/aggregate_by_chi_zihao.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 390f3a119..00029d469 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -151,10 +151,9 @@ aggregate_by_chi_zihao <- function(episode_file) { individual_file_cols5[, chi := NULL], individual_file_cols6[, chi := NULL] ) - # convert back to tibble - individual_file <- dplyr::as_tibble(individual_file) - return(individual_file) + # convert back to tibble + return(dplyr::as_tibble(individual_file)) } From beae36a76bae412c06eb37f2e3a894da3818b3d3 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 15:20:23 +0100 Subject: [PATCH 185/200] Some fixes --- R/aggregate_by_chi_zihao.R | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 00029d469..2f05aebf5 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -16,9 +16,9 @@ aggregate_by_chi_zihao <- function(episode_file) { # Ensure all variable names are lowercase data.table::setnames(episode_file, stringr::str_to_lower) - # Sort the data within each chunk + # Sort the data data.table::setkeyv( - individual_file, + episode_file, c( "chi", "record_keydate1", @@ -62,7 +62,7 @@ aggregate_by_chi_zihao <- function(episode_file) { # columns to sum up cols4 <- c( vars_end_with( - individual_file, + episode_file, c( "episodes", "beddays", @@ -86,49 +86,48 @@ aggregate_by_chi_zihao <- function(episode_file) { ) ), vars_start_with( - individual_file, + episode_file, "sds_option" ), "health_net_costincdnas" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum - cols5 <- vars_contain(individual_file, c("nsu", "hl1_in_fy")) - cols5 <- cols5[!(cols5 %in% c("ooh_consultation_time"))] - + cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy"))) + data.table::setnafill(episode_file, fill= 0L, cols = cols5) # compute - individual_file_cols1 <- individual_file[, + individual_file_cols1 <- episode_file[, .(gender = mean(gender)), by = "chi" ] - individual_file_cols2 <- individual_file[, + individual_file_cols2 <- episode_file[, .SD[.N], .SDcols = cols2, by = "chi" ] - individual_file_cols3 <- individual_file[, + individual_file_cols3 <- episode_file[, lapply(.SD, function(x) { data.table::uniqueN(x, na.rm = TRUE) }), .SDcols = cols3, by = "chi" ] - individual_file_cols4 <- individual_file[, + individual_file_cols4 <- episode_file[, lapply(.SD, function(x) { sum(x, na.rm = TRUE) }), .SDcols = cols4, by = "chi" ] - individual_file_cols5 <- individual_file[, + individual_file_cols5 <- episode_file[, lapply(.SD, function(x) max(x, na.rm = TRUE)), .SDcols = cols5, by = "chi" ] - individual_file_cols6 <- individual_file[, + individual_file_cols6 <- episode_file[, .( preventable_beddays = ifelse( - cij_ppa == 1, + max(cij_ppa, na.rm = TRUE), max(cij_end_date) - min(cij_start_date), NA_real_ ) @@ -207,7 +206,7 @@ aggregate_ch_episodes_zihao <- function(episode_file) { ch_ep_start = min(record_keydate1), ch_ep_end = max(ch_ep_end), ch_cost_per_day = mean(ch_cost_per_day) - ), by = .("chi", "ch_chi_cis")] + ), by = c("chi", "ch_chi_cis")] # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) From 13b7f1105083ed3e24683b8f734dfc1975d93d4c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 5 Jul 2023 16:50:34 +0100 Subject: [PATCH 186/200] Correctly compute `ooh_cases` --- R/aggregate_by_chi_zihao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 2f05aebf5..a2638cbe1 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -46,7 +46,6 @@ aggregate_by_chi_zihao <- function(episode_file) { "postcode", "dob", "gpprac", - "ooh_cases", vars_start_with(episode_file, "sc_") ) # columns to count unique rows @@ -57,6 +56,7 @@ aggregate_by_chi_zihao <- function(episode_file) { "cij_non_el", "cij_mat", "cij_delay", + "ooh_cases", "preventable_admissions" ) # columns to sum up From c03a0ee47e021e0e95d6f12f009992041a7b80cc Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 5 Jul 2023 15:53:06 +0000 Subject: [PATCH 187/200] Update documentation --- man/add_standard_cols.Rd | 5 ++--- man/create_col.Rd | 20 -------------------- man/create_cols.Rd | 20 -------------------- man/drop_cols.Rd | 14 -------------- man/fill_dob.Rd | 20 -------------------- man/month_cols.Rd | 12 ------------ man/na_type.Rd | 15 --------------- man/replace_dob_with_col.Rd | 17 ----------------- man/replace_postcode_with_col.Rd | 17 ----------------- 9 files changed, 2 insertions(+), 138 deletions(-) delete mode 100644 man/create_col.Rd delete mode 100644 man/create_cols.Rd delete mode 100644 man/drop_cols.Rd delete mode 100644 man/fill_dob.Rd delete mode 100644 man/month_cols.Rd delete mode 100644 man/na_type.Rd delete mode 100644 man/replace_dob_with_col.Rd delete mode 100644 man/replace_postcode_with_col.Rd diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index becec0ddd..0a44e95ee 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -8,7 +8,6 @@ add_standard_cols( episode_file, prefix, condition, - drop = NULL, episode = FALSE, cost = FALSE ) @@ -20,11 +19,11 @@ add_standard_cols( \item{condition}{Condition to create new columns based on} -\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} - \item{episode}{Whether to create prefix_episodes col, e.g. "Acute_episodes"} \item{cost}{Whether to create prefix_cost col, e.g. "Acute_cost"} + +\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} } \description{ Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. diff --git a/man/create_col.Rd b/man/create_col.Rd deleted file mode 100644 index 7357adf5d..000000000 --- a/man/create_col.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{create_col} -\alias{create_col} -\title{Create standard col} -\usage{ -create_col(episode_file, col, prefix, condition) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} - -\item{col}{Which column to create ("DoB", "postcode", or "gpprac")} - -\item{prefix}{Prefix to add to related columns, e.g. "Acute"} - -\item{condition}{Condition to create new columns based on} -} -\description{ -Create single standard column. -} diff --git a/man/create_cols.Rd b/man/create_cols.Rd deleted file mode 100644 index 6bbe1d98a..000000000 --- a/man/create_cols.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{create_cols} -\alias{create_cols} -\title{Create standard cols} -\usage{ -create_cols(episode_file, prefix, condition, drop) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} - -\item{prefix}{Prefix to add to related columns, e.g. "Acute"} - -\item{condition}{Condition to create new columns based on} - -\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} -} -\description{ -Create standard cols (DoB, postcode, gpprac). -} diff --git a/man/drop_cols.Rd b/man/drop_cols.Rd deleted file mode 100644 index 8029d289c..000000000 --- a/man/drop_cols.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{drop_cols} -\alias{drop_cols} -\title{Drop redundant columns} -\usage{ -drop_cols(individual_file) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} -} -\description{ -Drop redundant columns from individual file. -} diff --git a/man/fill_dob.Rd b/man/fill_dob.Rd deleted file mode 100644 index 3dc8e4295..000000000 --- a/man/fill_dob.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{fill_dob} -\alias{fill_dob} -\title{Fill missing date of births} -\usage{ -fill_dob(individual_file) - -fill_dob(individual_file) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} -} -\description{ -Fill missing date of births with -date of births from specific episode columns in hierarchy. - -Fill missing postcodes with -postcodes from specific episode columns in hierarchy. -} diff --git a/man/month_cols.Rd b/man/month_cols.Rd deleted file mode 100644 index b8dd641e5..000000000 --- a/man/month_cols.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{month_cols} -\alias{month_cols} -\title{Month columns} -\usage{ -month_cols() -} -\description{ -Return chr of column names following pattern -"month_beddays" and "month_cost" e.g. apr_beddays" and "apr_cost" -} diff --git a/man/na_type.Rd b/man/na_type.Rd deleted file mode 100644 index f8cbc9581..000000000 --- a/man/na_type.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{na_type} -\alias{na_type} -\title{NA type} -\usage{ -na_type(col = c("DoB", "postcode", "gpprac")) -} -\arguments{ -\item{col}{Which column to create ("DoB", "postcode", or "gpprac")} -} -\description{ -Helper function to use correct NA type depending on -which type of column is created. -} diff --git a/man/replace_dob_with_col.Rd b/man/replace_dob_with_col.Rd deleted file mode 100644 index 61016ec2e..000000000 --- a/man/replace_dob_with_col.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{replace_dob_with_col} -\alias{replace_dob_with_col} -\title{Fill missing date of births} -\usage{ -replace_dob_with_col(individual_file, col) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} - -\item{col}{Column containing date of birth for episode} -} -\description{ -Fill missing date of births with -date of births from an episode date of birth column. -} diff --git a/man/replace_postcode_with_col.Rd b/man/replace_postcode_with_col.Rd deleted file mode 100644 index 3feb0fbcb..000000000 --- a/man/replace_postcode_with_col.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_individual_file.R -\name{replace_postcode_with_col} -\alias{replace_postcode_with_col} -\title{Fill missing postcode} -\usage{ -replace_postcode_with_col(individual_file, col) -} -\arguments{ -\item{individual_file}{Individual file where each row represents a unique CHI} - -\item{col}{Column containing postcode for episode} -} -\description{ -Fill missing postcode with -postcodes from an episode postcode column. -} From 00275769e0a1d03a7a201e64e014af12fe3e4c2d Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 5 Jul 2023 15:58:40 +0000 Subject: [PATCH 188/200] Style code --- R/aggregate_by_chi_zihao.R | 2 +- R/create_individual_file.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index a2638cbe1..735a549b2 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -94,7 +94,7 @@ aggregate_by_chi_zihao <- function(episode_file) { cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy"))) - data.table::setnafill(episode_file, fill= 0L, cols = cols5) + data.table::setnafill(episode_file, fill = 0L, cols = cols5) # compute individual_file_cols1 <- episode_file[, .(gender = mean(gender)), diff --git a/R/create_individual_file.R b/R/create_individual_file.R index a47d91997..ab1c0cc32 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -52,7 +52,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { recode_gender() %>% aggregate_by_chi_zihao() %>% clean_individual_file(year) - join_cohort_lookups(year) %>% + join_cohort_lookups(year) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) From 60e3f3a50e40668f13649d7b0cd4b10385d229c5 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 6 Jul 2023 09:11:09 +0000 Subject: [PATCH 189/200] [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5466392495/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/719#issuecomment-1623280566 Signed-off-by: check-spelling-bot --- .github/actions/spelling/expect.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index f2b3553d2..ea3bab150 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -89,6 +89,7 @@ hhg hjust hms homecare +homev hscp hscpnames infyyear @@ -165,8 +166,12 @@ rspm RStudio rstudioapi Rtype +SDcols seealso selfharm +setkeyv +setnafill +setnames Siar sigfac simd From c8d86c5dffffadada834e507ec280951e63c5188 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 10 Jul 2023 15:59:59 +0100 Subject: [PATCH 190/200] Add targets for the individual file --- _targets.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/_targets.R b/_targets.R index ec887724c..587abfd38 100644 --- a/_targets.R +++ b/_targets.R @@ -533,6 +533,21 @@ list( data = episode_file, year = year ) + ), + tar_target( + individual_file, + create_individual_file( + episode_file = episode_file, + year = year, + write_to_disk = write_to_disk + ) + ), + tar_target( + individual_file_tests, + process_tests_individual_file( + data = individual_file, + year = year + ) ) ) ) From 62c70c5fd65186a1669b753137ab99ddb22d341c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 10 Jul 2023 16:42:06 +0100 Subject: [PATCH 191/200] Fix missed pipe --- R/create_individual_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ab1c0cc32..6b45ef722 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -51,8 +51,8 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { clean_up_ch(year) %>% recode_gender() %>% aggregate_by_chi_zihao() %>% - clean_individual_file(year) - join_cohort_lookups(year) %>% + clean_individual_file(year) %>% + join_cohort_lookups(year) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) From 54252a12c88204f822e0447293c859c5c6d42d0e Mon Sep 17 00:00:00 2001 From: Moohan Date: Tue, 11 Jul 2023 13:08:37 +0000 Subject: [PATCH 192/200] Style code --- R/create_demographic_lookup.R | 3 +-- R/create_service_use_lookup.R | 3 +-- R/run_episode_file.R | 28 ++++++++++++++-------------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R index 8fabd39a5..dfc2e25cf 100644 --- a/R/create_demographic_lookup.R +++ b/R/create_demographic_lookup.R @@ -13,8 +13,7 @@ create_demographic_cohorts <- function( data, year, update = latest_update(), - write_to_disk = TRUE -) { + write_to_disk = TRUE) { check_variables_exist( data, c( diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R index 7164038bc..30d3b0789 100644 --- a/R/create_service_use_lookup.R +++ b/R/create_service_use_lookup.R @@ -10,8 +10,7 @@ create_service_use_cohorts <- function( data, year, update = latest_update(), - write_to_disk = TRUE -) { + write_to_disk = TRUE) { check_variables_exist(data, variables = c( "chi", "recid", diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 0ec10c474..f0c8478dc 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -313,19 +313,19 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { future_demographic <- future::future({ create_demographic_cohorts( - data, - year, - update, - write_to_disk = TRUE - ) - }) + data, + year, + update, + write_to_disk = TRUE + ) + }) future_service_use <- future::future({ - create_service_use_cohorts( - data, - year, - update, - write_to_disk = TRUE - ) + create_service_use_cohorts( + data, + year, + update, + write_to_disk = TRUE + ) }) # This 'blocks' the code until they have both finished executing @@ -341,11 +341,11 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { #' #' @return The data including the Demographic and Service Use lookups. join_cohort_lookups <- function(data, year, update = latest_update()) { - join_cohort_lookups <- data %>% + join_cohort_lookups <- data %>% dplyr::left_join( read_file( get_demographic_cohorts_path(year, update), - col_select = c("chi","demographic_cohort") + col_select = c("chi", "demographic_cohort") ), by = "chi" ) %>% From 9ae871a312c9f4064dbdb16f1fdd4f41181302d6 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 11 Jul 2023 12:04:08 +0100 Subject: [PATCH 193/200] Update some targets to only run once a week --- _targets.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/_targets.R b/_targets.R index 587abfd38..45a081b14 100644 --- a/_targets.R +++ b/_targets.R @@ -27,7 +27,10 @@ list( file_path_ext_clean, make_lowercase_ext(), priority = 1, - cue = tar_cue(mode = "always") + cue = tar_cue_age( + name = file_path_ext_clean, + age = as.difftime(7, units = "days") + ) ), ## Lookup data ## tar_target(gpprac_opendata, get_gpprac_opendata()), @@ -182,7 +185,10 @@ list( compress_extracts, gzip_files(year), priority = 1, - cue = tar_cue(mode = "always") + cue = tar_cue_age( + name = compress_extracts, + age = as.difftime(7, units = "days") + ) ), ### target data extracts ### tar_file_read( From 486b51d54a4e73be3f2dc1fb6a81f5639f3298ad Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 11 Jul 2023 16:30:55 +0100 Subject: [PATCH 194/200] Make the deaths lookup unique --- R/process_lookup_deaths.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 2141e14f2..50689d24b 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -22,7 +22,9 @@ process_slf_deaths_lookup <- function( chi_deaths_data = read_file(get_slf_chi_deaths_path()), write_to_disk = TRUE) { slf_deaths_lookup <- nrs_deaths_data %>% - dplyr::select("chi", "record_keydate1") %>% + # Only modification over 'raw' NRS is to keep the earliest death date + dplyr::arrange(.data$record_keydate1) %>% + dplyr::distinct(.data$chi, .data$record_keydate1) %>% dplyr::mutate( death_date = .data$record_keydate1, deceased = TRUE, From 5ad1928255db9cae1bfd7bfb1bf171ec01c5a20f Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 12 Jul 2023 10:14:51 +0100 Subject: [PATCH 195/200] Add `year` back to the individual file --- R/create_individual_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 6b45ef722..975f0317d 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -55,7 +55,8 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { join_cohort_lookups(year) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% - join_sparra_hhg(year) + join_sparra_hhg(year) %>% + dplyr::mutate(year = year) if (write_to_disk) { slf_indiv_path <- get_file_path( From 507fffee54ed6202e2b2903a74c196fc141e08b5 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Fri, 14 Jul 2023 08:23:13 +0100 Subject: [PATCH 196/200] Remove `cost_total_net_inc_dnas` from the indiv file (#737) * Drop `cost_total_net_inc_dnas` * Rename `health_net_costincdnas` to `health_net_cost_inc_dnas` --- R/aggregate_by_chi_zihao.R | 2 +- R/create_individual_file.R | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R index 735a549b2..0eee203e8 100644 --- a/R/aggregate_by_chi_zihao.R +++ b/R/aggregate_by_chi_zihao.R @@ -89,7 +89,7 @@ aggregate_by_chi_zihao <- function(episode_file) { episode_file, "sds_option" ), - "health_net_costincdnas" + "health_net_cost_inc_dnas" ) cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))] # columns to select maximum diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 975f0317d..0ecfaaaab 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -167,7 +167,7 @@ add_all_columns <- function(episode_file) { ), na.rm = TRUE ), - health_net_costincdnas = .data$health_net_cost + dplyr::if_else( + health_net_cost_inc_dnas = .data$health_net_cost + dplyr::if_else( is.na(.data$OP_cost_dnas), 0, .data$OP_cost_dnas @@ -738,9 +738,12 @@ clean_individual_file <- function(individual_file, year) { individual_file %>% dplyr::select( - -"ch_no_cost", - -"no_paid_items", - -"total_no_dn_contacts" + !c( + "ch_no_cost", + "no_paid_items", + "total_no_dn_contacts", + "cost_total_net_inc_dnas" + ) ) %>% clean_up_gender() %>% dplyr::mutate(age = compute_mid_year_age(year, .data$dob)) From 292f4d814a9ea4dc1104bafd06b90f89f75f4c95 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Fri, 14 Jul 2023 15:07:21 +0100 Subject: [PATCH 197/200] Join slf lookups onto individual file (#724) * Create function for matching on slf lookups * fix some build warnings * Add `hbrescode` to select list * Pass lookups as parameters/deal with hbrescode * Update R/create_individual_file.R --------- Co-authored-by: James McMahon --- R/create_individual_file.R | 34 +++++++++++++++++++++++++++++++++- R/run_episode_file.R | 1 + man/add_standard_cols.Rd | 2 -- man/clean_individual_file.Rd | 2 ++ man/join_cohort_lookups.Rd | 2 ++ man/join_slf_lookup_vars.Rd | 27 +++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 man/join_slf_lookup_vars.Rd diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 0ecfaaaab..7825951c6 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -56,6 +56,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) %>% + join_slf_lookup_vars() %>% dplyr::mutate(year = year) if (write_to_disk) { @@ -500,7 +501,6 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi #' @description Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. #' #' @inheritParams add_acute_columns -#' @param drop Any columns out of "DoB", "postcode", and "gpprac" that should be dropped #' @param episode Whether to create prefix_episodes col, e.g. "Acute_episodes" #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { @@ -733,6 +733,7 @@ min_no_inf <- function(x) { #' @description Clean up columns in individual file #' #' @param individual_file Individual file where each row represents a unique CHI +#' @param year Financial year e.g 1718 clean_individual_file <- function(individual_file, year) { cli::cli_alert_info("Clean individual file function started at {Sys.time()}") @@ -763,3 +764,34 @@ clean_up_gender <- function(individual_file) { ) ) } + +#' Join slf lookup variables +#' +#' @description Join lookup variables from slf postcode lookup and slf gpprac +#' lookup. +#' +#' @param individual_file the processed individual file. +#' @param slf_postcode_lookup SLF processed postcode lookup +#' @param slf_gpprac_lookup SLF processed gpprac lookup +#' @param hbrescode_var hbrescode variable +#' +join_slf_lookup_vars <- function(individual_file, + slf_postcode_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file( + get_slf_gpprac_path(), + col_select = c("gpprac", "cluster", "hbpraccode") + ), + hbrescode_var = "hb2018") { + individual_file <- individual_file %>% + dplyr::left_join( + slf_postcode_lookup, + by = "postcode" + ) %>% + dplyr::left_join( + slf_gpprac_lookup, + by = "gpprac" + ) %>% + dplyr::rename(hbrescode = hbrescode_var) + + return(individual_file) +} diff --git a/R/run_episode_file.R b/R/run_episode_file.R index ea22f776a..f640a4437 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -339,6 +339,7 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { #' Join cohort lookups #' #' @inheritParams store_ep_file_vars +#' @param update The latest update e.g. "Jun_2023" #' #' @return The data including the Demographic and Service Use lookups. join_cohort_lookups <- function(data, year, update = latest_update()) { diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index 0a44e95ee..744aa49de 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -22,8 +22,6 @@ add_standard_cols( \item{episode}{Whether to create prefix_episodes col, e.g. "Acute_episodes"} \item{cost}{Whether to create prefix_cost col, e.g. "Acute_cost"} - -\item{drop}{Any columns out of "DoB", "postcode", and "gpprac" that should be dropped} } \description{ Add standard columns (DoB, postcode, gpprac, episodes, cost) to episode file. diff --git a/man/clean_individual_file.Rd b/man/clean_individual_file.Rd index c56e4265f..fb2d3ae13 100644 --- a/man/clean_individual_file.Rd +++ b/man/clean_individual_file.Rd @@ -8,6 +8,8 @@ clean_individual_file(individual_file, year) } \arguments{ \item{individual_file}{Individual file where each row represents a unique CHI} + +\item{year}{Financial year e.g 1718} } \description{ Clean up columns in individual file diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 21f376bdc..7e18e022c 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -10,6 +10,8 @@ join_cohort_lookups(data, year, update = latest_update()) \item{data}{The in progress episode file data.} \item{year}{The year to process, in FY format.} + +\item{update}{The latest update e.g. "Jun_2023"} } \value{ The data including the Demographic and Service Use lookups. diff --git a/man/join_slf_lookup_vars.Rd b/man/join_slf_lookup_vars.Rd new file mode 100644 index 000000000..980c66f31 --- /dev/null +++ b/man/join_slf_lookup_vars.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{join_slf_lookup_vars} +\alias{join_slf_lookup_vars} +\title{Join slf lookup variables} +\usage{ +join_slf_lookup_vars( + individual_file, + slf_postcode_lookup = read_file(get_slf_postcode_path()), + slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac", + "cluster", "hbpraccode")), + hbrescode_var = "hb2018" +) +} +\arguments{ +\item{individual_file}{the processed individual file.} + +\item{slf_postcode_lookup}{SLF processed postcode lookup} + +\item{slf_gpprac_lookup}{SLF processed gpprac lookup} + +\item{hbrescode_var}{hbrescode variable} +} +\description{ +Join lookup variables from slf postcode lookup and slf gpprac +lookup. +} From c644992aced8341d866d6111eaeb1e493a41f6ad Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:17:06 +0100 Subject: [PATCH 198/200] Join sc client variables onto individual file (#740) * New function for matching sc client to indiv file * Style code * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5555048903/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/740#issuecomment-1635955654 Signed-off-by: check-spelling-bot * Code layout * Style code * Remove redundant sc variables Co-authored-by: James McMahon * Update comments Co-authored-by: James McMahon * Update comments Co-authored-by: James McMahon * Sort order of parameters to pass `data` first * Update documentation * Style code * Update R/create_individual_file.R * Update R/create_individual_file.R * Update R/create_individual_file.R * Style code --------- Signed-off-by: check-spelling-bot Co-authored-by: Jennit07 Co-authored-by: James McMahon Co-authored-by: Moohan --- .github/actions/spelling/expect.txt | 1 + R/create_individual_file.R | 39 +++++++++++++++++++++++++++++ man/join_sc_client.Rd | 26 +++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 man/join_sc_client.Rd diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index ea3bab150..f6f191d5a 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -148,6 +148,7 @@ purrr quickstart Rbuildignore rcmdcheck +rdd rds reabl reablement diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 7825951c6..ab895926b 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -57,6 +57,7 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { join_deaths_data(year) %>% join_sparra_hhg(year) %>% join_slf_lookup_vars() %>% + join_sc_client(year) %>% dplyr::mutate(year = year) if (write_to_disk) { @@ -795,3 +796,41 @@ join_slf_lookup_vars <- function(individual_file, return(individual_file) } +# TODO Remove the client data from the individual Social Care extracts +# and instead, use this function in the episode file to match on the client +# data to all episodes. +#' Join sc client variables onto individual file +#' +#' @description Match on sc client variables. +#' +#' @param individual_file the processed individual file +#' @param year financial year. +#' @param sc_client SC client lookup +#' @param sc_demographics SC Demographic lookup +join_sc_client <- function(individual_file, + year, + sc_client = read_file(get_source_extract_path(year, "Client")), + sc_demographics = read_file(get_sc_demog_lookup_path(), + col_select = c("sending_location", "social_care_id", "chi") + )) { + # TODO Update the client lookup processing script to match + # on demographics there so the client lookup already has CHI. + + # Match to demographics lookup to get CHI + join_client_demog <- sc_client %>% + dplyr::left_join( + sc_demographics %>% + dplyr::select("sending_location", "social_care_id", "chi"), + by = c("sending_location", "social_care_id") + ) + + # Match on client variables by chi + individual_file <- individual_file %>% + dplyr::left_join( + join_client_demog, + by = "chi" + ) %>% + dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission")) + + return(individual_file) +} diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd new file mode 100644 index 000000000..a30719698 --- /dev/null +++ b/man/join_sc_client.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_individual_file.R +\name{join_sc_client} +\alias{join_sc_client} +\title{Join sc client variables onto individual file} +\usage{ +join_sc_client( + individual_file, + year, + sc_client = read_file(get_source_extract_path(year, "Client")), + sc_demographics = read_file(get_sc_demog_lookup_path(), col_select = + c("sending_location", "social_care_id", "chi")) +) +} +\arguments{ +\item{individual_file}{the processed individual file} + +\item{year}{financial year.} + +\item{sc_client}{SC client lookup} + +\item{sc_demographics}{SC Demographic lookup} +} +\description{ +Match on sc client variables. +} From 1bb52aa07f5f3e8dfef79b28957519b81f255a02 Mon Sep 17 00:00:00 2001 From: Moohan Date: Mon, 17 Jul 2023 14:39:09 +0000 Subject: [PATCH 199/200] Update documentation --- man/join_cohort_lookups.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 7e18e022c..fcd419a1b 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -11,7 +11,7 @@ join_cohort_lookups(data, year, update = latest_update()) \item{year}{The year to process, in FY format.} -\item{update}{The latest update e.g. "Jun_2023"} +\item{update}{The update to use} } \value{ The data including the Demographic and Service Use lookups. From dabbf57cbd8dafa5340da323c54d4e389619835a Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 19 Jul 2023 11:15:18 +0100 Subject: [PATCH 200/200] Output the individual file with `anon_chi` (#748) * Make episode file output with `anon_chi` I've added this as a parameter so you can output CHI if desired, but the default is for anon_chi. For the tests, it swaps back to CHI as there are some tests which specifically us the CHI number. * Output `anon_chi` in the individual file * Style code * Sort variables with issues `hbrescode` (HB2018), `datazone` and `hscp` (#746) * rename `hscp` to `hscp2018` * rename `spd` as `slf_pc_lookup` * Add `datazone2011` to coalesce code * Rename `datazone` to `datazone2011` * include `datazone2011_old` in selections * Update R/fill_geographies.R --------- Co-authored-by: James McMahon * Fix for anon_chi being NA --------- Co-authored-by: Moohan Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- .github/actions/spelling/expect.txt | 1 + R/create_individual_file.R | 29 +++++++++++++++++++++++++---- R/fill_geographies.R | 11 ++++++----- R/process_tests_episode_file.R | 5 +++-- R/process_tests_individual_file.R | 5 +++-- R/run_episode_file.R | 16 +++++++++++++++- man/create_individual_file.Rd | 14 +++++++++++++- man/run_episode_file.Rd | 10 +++++++++- 8 files changed, 75 insertions(+), 16 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 6c13224ba..3236edd84 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -105,6 +105,7 @@ keydate keyring keytime keytimex +kis lgl kis los diff --git a/R/create_individual_file.R b/R/create_individual_file.R index ab895926b..675e2066a 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -3,13 +3,27 @@ #' @description Creates individual file from episode file #' #' @param episode_file Tibble containing episodic data -#' @param year The year to process, in FY format. -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. +#' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input +#' (instead of chi) +#' @inheritParams run_episode_file #' #' @return The processed individual file #' @export -create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { +create_individual_file <- function( + episode_file, + year, + write_to_disk = TRUE, + anon_chi_in = TRUE, + anon_chi_out = TRUE) { + if (anon_chi_in) { + episode_file <- slfhelper::get_chi( + episode_file, + anon_chi_var = "anon_chi", + drop = TRUE + ) %>% + dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) + } + individual_file <- episode_file %>% dplyr::select( "year", @@ -60,6 +74,13 @@ create_individual_file <- function(episode_file, year, write_to_disk = TRUE) { join_sc_client(year) %>% dplyr::mutate(year = year) + if (anon_chi_out) { + individual_file <- individual_file %>% + tidyr::replace_na(list(chi = "")) %>% + slfhelper::get_anon_chi() %>% + dplyr::mutate(anon_chi = dplyr::na_if(.data$anon_chi, "")) + } + if (write_to_disk) { slf_indiv_path <- get_file_path( get_year_dir(year), diff --git a/R/fill_geographies.R b/R/fill_geographies.R index 28bab7fa2..58d001493 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -85,7 +85,7 @@ make_gpprac_lookup <- function(data) { } fill_postcode_geogs <- function(data) { - spd <- read_file(get_slf_postcode_path()) + slf_pc_lookup <- read_file(get_slf_postcode_path()) filled_postcodes <- dplyr::left_join( data, @@ -102,7 +102,7 @@ fill_postcode_geogs <- function(data) { ) %>% # Fill geographies dplyr::left_join( - spd, + slf_pc_lookup, by = "postcode", suffix = c("_old", "") ) %>% @@ -117,10 +117,11 @@ fill_postcode_geogs <- function(data) { cascade_geographies() %>% dplyr::mutate( hbrescode = dplyr::coalesce(.data$hb2018, .data$hbrescode), - hscp = dplyr::coalesce(.data$hscp2018, .data$hscp), - lca = dplyr::coalesce(.data$lca, .data$lca_old) + hscp2018 = dplyr::coalesce(.data$hscp2018, .data$hscp), + lca = dplyr::coalesce(.data$lca, .data$lca_old), + datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old) ) %>% - dplyr::select(!c("hb2018", "hscp2018", "lca_old", "most_recent_postcode")) + dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode")) return(filled_postcodes) } diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index b595d1d54..46e9e7171 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -10,7 +10,7 @@ process_tests_episode_file <- function(data, year) { data <- data %>% dplyr::select( "year", - "chi", + "anon_chi", "gender", "postcode", "hbtreatcode", @@ -20,7 +20,8 @@ process_tests_episode_file <- function(data, year) { "record_keydate1", "record_keydate2", dplyr::contains(c("beddays", "cost", "cij")) - ) + ) %>% + slfhelper::get_chi() old_data <- get_existing_data_for_tests(data) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 32bbd8d3a..695dc19a0 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -10,7 +10,7 @@ process_tests_individual_file <- function(data, year) { data <- data %>% dplyr::select( "year", - "chi", + "anon_chi", "gender", "postcode", "dob", @@ -26,7 +26,8 @@ process_tests_individual_file <- function(data, year) { "cases", "consultations" )) - ) + ) %>% + slfhelper::get_chi() old_data <- get_existing_data_for_tests(data, file_version = "individual") diff --git a/R/run_episode_file.R b/R/run_episode_file.R index 45a4e6ed5..1f2bb33ed 100644 --- a/R/run_episode_file.R +++ b/R/run_episode_file.R @@ -4,11 +4,17 @@ #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. +#' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output +#' (instead of chi) #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export #' -run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { +run_episode_file <- function( + processed_data_list, + year, + write_to_disk = TRUE, + anon_chi_out = TRUE) { episode_file <- dplyr::bind_rows(processed_data_list) %>% create_cost_inc_dna() %>% apply_cost_uplift() %>% @@ -103,6 +109,14 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) { join_deaths_data(year) %>% load_ep_file_vars(year) + if (anon_chi_out) { + episode_file <- slfhelper::get_anon_chi( + episode_file, + chi_var = "chi", + drop = TRUE + ) + } + if (write_to_disk) { slf_path <- get_file_path( get_year_dir(year), diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index d1feb23df..fa759e7b1 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -4,7 +4,13 @@ \alias{create_individual_file} \title{Create individual file} \usage{ -create_individual_file(episode_file, year, write_to_disk = TRUE) +create_individual_file( + episode_file, + year, + write_to_disk = TRUE, + anon_chi_in = TRUE, + anon_chi_out = TRUE +) } \arguments{ \item{episode_file}{Tibble containing episodic data} @@ -13,6 +19,12 @@ create_individual_file(episode_file, year, write_to_disk = TRUE) \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} + +\item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input +(instead of chi)} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} } \value{ The processed individual file diff --git a/man/run_episode_file.Rd b/man/run_episode_file.Rd index e85621b59..59d5fea1d 100644 --- a/man/run_episode_file.Rd +++ b/man/run_episode_file.Rd @@ -4,7 +4,12 @@ \alias{run_episode_file} \title{Produce the Source Episode file} \usage{ -run_episode_file(processed_data_list, year, write_to_disk = TRUE) +run_episode_file( + processed_data_list, + year, + write_to_disk = TRUE, + anon_chi_out = TRUE +) } \arguments{ \item{processed_data_list}{containing data from processed extracts.} @@ -13,6 +18,9 @@ run_episode_file(processed_data_list, year, write_to_disk = TRUE) \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} + +\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output +(instead of chi)} } \value{ a \link[tibble:tibble-package]{tibble} containing the episode file