Skip to content

Commit

Permalink
Merge branch 'master' into episode/write_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Moohan authored Jul 17, 2023
2 parents c674932 + 8e1bcc9 commit 73ad0b2
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 52 deletions.
4 changes: 2 additions & 2 deletions R/fill_geographies.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ fill_postcode_geogs <- function(data) {
hscp = dplyr::coalesce(.data$hscp2018, .data$hscp),
lca = dplyr::coalesce(.data$lca, .data$lca_old)
) %>%
dplyr::select(-"hb2018", -"hscp2018", -"lca_old")
dplyr::select(!c("hb2018", "hscp2018", "lca_old", "most_recent_postcode"))

return(filled_postcodes)
}
Expand Down Expand Up @@ -148,7 +148,7 @@ fill_gpprac_geographies <- function(data) {
dplyr::mutate(
hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old)
) %>%
dplyr::select(-"hbpraccode_old")
dplyr::select(!c("hbpraccode_old", "most_recent_gpprac"))

return(filled_gpprac)
}
Expand Down
55 changes: 29 additions & 26 deletions R/link_delayed_discharge_eps.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#' Link Delayed Discharge to WIP episode file
#'
#' @param data The input data frame
#' @param episode_file The episode file
#' @param year The year being processed
#' @param dd_data The processed DD extract
#'
#' @return A data frame with the delayed discharge cohort added and linked
#' using the `cij_marker`
#'
#' @family episode file
link_delayed_discharge_eps <- function(data, year) {
year_param <- year

data <- data %>%
link_delayed_discharge_eps <- function(
episode_file,
year,
dd_data = read_file(get_source_extract_path(year, "DD"))) {
episode_file <- episode_file %>%
dplyr::mutate(
# remember to revoke the cij_end_date with dummy_cij_end
cij_start_date_lower = .data$cij_start_date - lubridate::days(1L),
Expand All @@ -34,8 +36,7 @@ link_delayed_discharge_eps <- function(data, year) {

## handling DD ----
# no flag for last reported
dd_data <-
read_file(get_source_extract_path(year_param, "DD")) %>%
dd_data <- dd_data %>%
dplyr::mutate(
# remember to revoke the keydate2 and amended_dates with dummy_keydate2
is_dummy_keydate2 = is.na(.data$record_keydate2),
Expand All @@ -53,11 +54,12 @@ link_delayed_discharge_eps <- function(data, year) {
x$dummy_keydate2 <= y$dummy_cij_end
)

data <- dd_data %>%
dplyr::inner_join(data,
by = by_dd,
suffix = c("_dd", "")
) %>%
linked_data <- dplyr::inner_join(
x = dd_data,
y = episode_file,
by = by_dd,
suffix = c("_dd", "")
) %>%
# Remove existing beddays as we're re-calculating them for this cohort
dplyr::select(-dplyr::ends_with("beddays")) %>%
dplyr::arrange(
Expand All @@ -78,7 +80,7 @@ link_delayed_discharge_eps <- function(data, year) {
) %>%
# determine DD quality
dplyr::mutate(
dd_type = dplyr::if_else(
dd_quality = dplyr::if_else(
is.na(.data$cij_marker),
"no-cij",
dplyr::case_when(
Expand Down Expand Up @@ -199,8 +201,8 @@ link_delayed_discharge_eps <- function(data, year) {
.default = "-"
)
),
dd_type = factor(
.data$dd_type,
dd_quality = factor(
.data$dd_quality,
levels = c(
"1",
"1P",
Expand All @@ -224,16 +226,16 @@ link_delayed_discharge_eps <- function(data, year) {

# For "1APE", assign 1APE cij_end_date to record_keydate2_dd
record_keydate2_dd = dplyr::if_else(
.data$dd_type == "1APE" | .data$dd_type == "3ADPE",
.data$dd_quality == "1APE" | .data$dd_quality == "3ADPE",
max(.data$record_keydate1_dd, .data$cij_end_date),
.data$record_keydate2_dd
),
datediff_end = abs(.data$cij_end_date - .data$record_keydate2_dd),
datediff_start = .data$cij_start_date - .data$record_keydate1_dd
) %>%
dplyr::filter(.data$dd_type != "-") %>%
dplyr::filter(.data$dd_quality != "-") %>%
dplyr::mutate(smrtype = dplyr::case_match(
as.character(.data$dd_type),
as.character(.data$dd_quality),
c(
"1",
"1P",
Expand Down Expand Up @@ -262,7 +264,7 @@ link_delayed_discharge_eps <- function(data, year) {
.data$record_keydate1_dd,
.data$record_keydate2_dd,
.data$dummy_id,
.data$dd_type,
.data$dd_quality,
.data$datediff_end,
dplyr::desc(.data$datediff_start)
) %>%
Expand All @@ -289,7 +291,9 @@ link_delayed_discharge_eps <- function(data, year) {
.data$record_keydate2_dd,
count_last = TRUE
) %>%
dplyr::mutate(yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays")))) %>%
dplyr::mutate(
yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays")))
) %>%
# tidy up and rename columns to match the format of episode files
dplyr::select(
"year" = "year_dd",
Expand All @@ -303,9 +307,8 @@ link_delayed_discharge_eps <- function(data, year) {
"age",
"gpprac",
"postcode" = "postcode_dd",
"lca" = "dd_responsible_lca",
"dd_responsible_lca",
"hbtreatcode" = "hbtreatcode_dd",
"original_admission_date",
"delay_end_reason",
"primary_delay_reason",
"secondary_delay_reason",
Expand All @@ -320,13 +323,13 @@ link_delayed_discharge_eps <- function(data, year) {
"cij_delay",
"location",
"spec" = "spec_dd",
"dd_type",
"dd_quality",
dplyr::ends_with("_beddays"),
"yearstay"
) %>%
# combine DD with episode data
# Combine DD with episode data
dplyr::bind_rows(
data %>%
episode_file %>%
dplyr::select(
-c(
"cij_start_date_lower",
Expand All @@ -340,5 +343,5 @@ link_delayed_discharge_eps <- function(data, year) {
)
)

return(data)
return(linked_data)
}
21 changes: 12 additions & 9 deletions R/process_extract_delayed_discharges.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#' @return the final data as a [tibble][tibble::tibble-package].
#' @export
#' @family process extracts

process_extract_delayed_discharges <- function(data,
year,
write_to_disk = TRUE) {
process_extract_delayed_discharges <- function(
data,
year,
write_to_disk = TRUE) {
# Only run for a single year
stopifnot(length(year) == 1L)

Expand Down Expand Up @@ -48,7 +48,8 @@ process_extract_delayed_discharges <- function(data,
record_keydate1 = .data[["rdd"]],
record_keydate2 = .data[["delay_end_date"]]
) %>%
# Use end of the month date for records with no end date (but we think have ended)
# Use end of the month date for records with no end date
# (but we think have ended)
# Create a flag for these records
dplyr::mutate(
month_end = lubridate::ceiling_date(.data[["monthflag"]], "month") - 1L,
Expand Down Expand Up @@ -83,7 +84,8 @@ process_extract_delayed_discharges <- function(data,
# Flag records with no end date
not_mh_spec = is.na(.data$record_keydate2) & !(.data$spec %in% mh_spec)
) %>%
# Keep only records which have an end date (except Mental Health) and fall within our dates.
# Keep only records which have an end date (except Mental Health) and fall
# within our dates.
dplyr::filter(.data$dates_in_fyyear, !.data$not_mh_spec)

dd_final <- dd_clean %>%
Expand All @@ -106,9 +108,10 @@ process_extract_delayed_discharges <- function(data,
)

if (write_to_disk) {
dd_final %>%
# Save as rds file
write_file(get_source_extract_path(year, "DD", check_mode = "write"))
write_file(
dd_final,
get_source_extract_path(year, "DD", check_mode = "write")
)
}

return(dd_final)
Expand Down
27 changes: 14 additions & 13 deletions R/process_sc_all_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,6 @@ process_sc_all_home_care <- function(
)
)


# Outfile ---------------------------------------

merge_data <- pivoted_hours %>%
# group the data to be merged
dplyr::group_by(
Expand All @@ -169,16 +166,19 @@ process_sc_all_home_care <- function(
# Store the period for the latest submitted record
sc_latest_submission = dplyr::last(.data$period),
# Sum the (quarterly) hours
dplyr::across(tidyselect::starts_with("hc_hours_"), sum),
dplyr::across(tidyselect::starts_with("hc_cost_"), sum),
dplyr::across(
c(dplyr::starts_with("hc_hours_"), -"hc_hours_derived"),
sum
),
dplyr::across(dplyr::starts_with("hc_cost_"), sum),
# Shouldn't matter as these are all the same
dplyr::across(c("gender", "dob", "postcode"), dplyr::first)
) %>%
dplyr::ungroup()


# Create Source variables---------------------------------------
final_data <- merge_data %>%
all_hc_processed <- merge_data %>%
# rename
dplyr::rename(
record_keydate1 = "hc_service_start_date",
Expand All @@ -198,15 +198,16 @@ process_sc_all_home_care <- function(
# person_id
create_person_id(type = "SC") %>%
# compute lca variable from sending_location
dplyr::mutate(sc_send_lca = convert_sending_location_to_lca(.data$sending_location))

# Save outfile---------------------------------------------------
dplyr::mutate(
sc_send_lca = convert_sending_location_to_lca(.data$sending_location)
)

if (write_to_disk) {
# Save .rds file
final_data %>%
write_file(get_sc_hc_episodes_path(check_mode = "write"))
write_file(
all_hc_processed,
get_sc_hc_episodes_path(check_mode = "write")
)
}

return(final_data)
return(all_hc_processed)
}
1 change: 1 addition & 0 deletions R/run_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) {
join_cohort_lookups(year) %>%
join_sparra_hhg(year) %>%
fill_geographies() %>%
join_deaths_data(year) %>%
load_ep_file_vars(year)

if (write_to_disk) {
Expand Down
10 changes: 8 additions & 2 deletions man/link_delayed_discharge_eps.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 73ad0b2

Please sign in to comment.