diff --git a/NAMESPACE b/NAMESPACE index 874a4980b..40f1799c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export("%>%") export(add_homelessness_date_flags) export(add_homelessness_flag) +export(add_hri_variables) export(add_nsu_cohort) export(check_year_format) export(clean_up_free_text) diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R new file mode 100644 index 000000000..320b6222e --- /dev/null +++ b/R/add_hri_variables.R @@ -0,0 +1,141 @@ +#' Flag non-Scottish residents +#' +#' @details The variable keep flag can be in the range c(0:4) where +#' \itemize{ +#' \item{keep_flag = 0 when resident is Scottish} +#' \item{keep_flag = 1 when resident is not Scottish} +#' \item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing} +#' \item{keep_flag = 3 when the gpprac is not English and the postcode is missing} +#' \item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy} +#' } +#' The intention is to only keep the records where keep_flag = 0 +#' +#' @inheritParams add_hri_variables +#' +#' @return A data frame with the variable 'keep_flag' +flag_non_scottish_residents <- function( + data, + slf_pc_lookup) { + check_variables_exist(data, c("postcode", "gpprac")) + + # Make a lookup of postcode areas, which consist of the first characters + # of the postcode + pc_areas <- slf_pc_lookup %>% + dplyr::mutate( + pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}"), + scot_flag = TRUE + ) %>% + dplyr::distinct(pc_area, scot_flag) + + # Create a flag, 'keep_flag', to determine whether individuals are Scottish + # residents or not + return_data <- data %>% + dplyr::mutate(pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}")) %>% + dplyr::left_join(pc_areas, by = "pc_area") %>% + dplyr::mutate( + dummy_postcode = .data$postcode %in% c("BF010AA", "NF1 1AB", "NK010AA") | + stringr::str_sub(.data$postcode, 1, 4) %in% c("ZZ01", "ZZ61"), + eng_prac = .data$gpprac %in% c(99942, 99957, 99961, 99976, 99981, 99995, 99999), + scottish_resident = dplyr::case_when( + .data$scot_flag ~ 0L, + (is_missing(.data$postcode) | .data$dummy_postcode) & is.na(.data$gpprac) ~ 2L, + !.data$eng_prac & is_missing(.data$postcode) ~ 3L, + !.data$eng_prac & .data$dummy_postcode ~ 4L, + .default = 1L + ) + ) %>% + dplyr::select(-"dummy_postcode", -"eng_prac") + + return(return_data) +} + +#' Add HRI variables to an SLF Individual File +#' +#' @details Filters the dataset to only include Scottish residents, then +#' creates a lookup where HRIs are calculated at Scotland, Health Board, and +#' LCA level. Then joins on this lookup by chi/anon_chi. +#' +#' @param data An SLF individual file. +#' @param slf_pc_lookup The Source postcode lookup, defaults +#' to [get_slf_postcode_path()] read using [read_file()]. +#' +#' @return The individual file with HRI variables matched on +#' @export +add_hri_variables <- function( + data, + slf_pc_lookup = read_file( + get_slf_postcode_path(), + col_select = "postcode" + )) { + hri_lookup <- data %>% + dplyr::select( + "year", + "chi", + "postcode", + "gpprac", + "lca", + "hbrescode", + "health_net_cost", + "acute_episodes", + "mat_episodes", + "mh_episodes", + "gls_episodes", + "op_newcons_attendances", + # op_newcons_dnas, + "ae_attendances", + "pis_paid_items", + "ooh_cases" + ) %>% + flag_non_scottish_residents(slf_pc_lookup = slf_pc_lookup) %>% + dplyr::filter(scottish_resident == 0L) %>% + # Scotland cost and proportion + dplyr::mutate( + scotland_cost = sum(health_net_cost), + scotland_pct = (health_net_cost / scotland_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost)) %>% + dplyr::mutate(hri_scotp = cumsum(scotland_pct)) %>% + # Health Board + dplyr::group_by(hbrescode) %>% + dplyr::mutate( + hb_cost = sum(health_net_cost), + hb_pct = (health_net_cost / hb_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_hbp = cumsum(hb_pct)) %>% + dplyr::ungroup() %>% + # LCA + dplyr::group_by(lca) %>% + dplyr::mutate( + lca_cost = sum(health_net_cost), + lca_pct = (health_net_cost / lca_cost) * 100 + ) %>% + dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_lcap = cumsum(lca_pct)) %>% + dplyr::ungroup() %>% + # Add HRI flags + dplyr::mutate( + hri_scot = hri_scotp <= 50.0, + hri_hb = hri_hbp <= 50.0, + hri_lca = hri_lcap <= 50.0, + # Deal with potential missing variables + hri_hb = dplyr::if_else(is_missing(hbrescode), FALSE, hri_hb), + hri_hbp = dplyr::if_else(is_missing(hbrescode), NA, hri_hbp), + hri_lca = dplyr::if_else(is_missing(lca), FALSE, hri_lca), + hri_lcap = dplyr::if_else(is_missing(lca), NA, hri_lcap) + ) %>% + # Select only required variables for the lookup + dplyr::select( + chi_variable, + "hri_scot", + "hri_scotp", + "hri_hb", + "hri_hbp", + "hri_lca", + "hri_lcap" + ) + + return_data <- dplyr::left_join(data, hri_lookup, by = chi_variable) + + return(return_data) +} diff --git a/man/add_hri_variables.Rd b/man/add_hri_variables.Rd new file mode 100644 index 000000000..d35464357 --- /dev/null +++ b/man/add_hri_variables.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_hri_variables.R +\name{add_hri_variables} +\alias{add_hri_variables} +\title{Add HRI variables to an SLF Individual File} +\usage{ +add_hri_variables( + data, + slf_pc_lookup = read_file(get_slf_postcode_path(), col_select = "postcode") +) +} +\arguments{ +\item{data}{An SLF individual file.} + +\item{slf_pc_lookup}{The Source postcode lookup, defaults +to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.} +} +\value{ +The individual file with HRI variables matched on +} +\description{ +Add HRI variables to an SLF Individual File +} +\details{ +Filters the dataset to only include Scottish residents, then +creates a lookup where HRIs are calculated at Scotland, Health Board, and +LCA level. Then joins on this lookup by chi/anon_chi. +} diff --git a/man/flag_non_scottish_residents.Rd b/man/flag_non_scottish_residents.Rd new file mode 100644 index 000000000..ec97dedf6 --- /dev/null +++ b/man/flag_non_scottish_residents.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_hri_variables.R +\name{flag_non_scottish_residents} +\alias{flag_non_scottish_residents} +\title{Flag non-Scottish residents} +\usage{ +flag_non_scottish_residents(data, slf_pc_lookup) +} +\arguments{ +\item{data}{An SLF individual file.} + +\item{slf_pc_lookup}{The Source postcode lookup, defaults +to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.} +} +\value{ +A data frame with the variable 'keep_flag' +} +\description{ +Flag non-Scottish residents +} +\details{ +The variable keep flag can be in the range c(0:4) where +\itemize{ +\item{keep_flag = 0 when resident is Scottish} +\item{keep_flag = 1 when resident is not Scottish} +\item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing} +\item{keep_flag = 3 when the gpprac is not English and the postcode is missing} +\item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy} +} +The intention is to only keep the records where keep_flag = 0 +} diff --git a/tests/testthat/test-flag_non_scottish_residents.R b/tests/testthat/test-flag_non_scottish_residents.R new file mode 100644 index 000000000..b61d9e159 --- /dev/null +++ b/tests/testthat/test-flag_non_scottish_residents.R @@ -0,0 +1,26 @@ +test_that("Records are flagged correctly", { + test_frame <- tibble::tribble( + ~postcode, ~gpprac, + # Scottish resident + "AB1 1AA", 18574, + # Dummy postcode and missing gpprac + "BF010AA", NA, + # Dummy postcode and missing gpprac (2) + "ZZ014AA", NA, + # Missing postcode and missing gpprac + NA, NA, + # Not English practice and missing postcode + NA, 18574, + # Not English practice and dummy postcode + "NF1 1AB", 18574, + # English postcode and English gpprac + "BS4 4RG", 99942 + ) + + test_frame_flagged <- flag_non_scottish_residents(test_frame) + + expect_equal( + test_frame_flagged$keep_flag, + c(0, 2, 2, 2, 3, 4, 1) + ) +})