Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert CH back to September #885

Merged
merged 7 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 39 additions & 79 deletions R/process_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,28 @@ process_sc_all_care_home <- function(
write_to_disk = TRUE) {
## Data Cleaning-----------------------------------------------------
ch_clean <- data %>%
dplyr::mutate(ch_admission_date = fix_sc_start_dates(
.data$ch_admission_date,
.data$period_start_date
)) %>%
dplyr::group_by(
social_care_id,
sending_location,
ch_admission_date
) %>%
dplyr::mutate(episode_max_discharge_date = max(
pmin(period_end_date,
ch_discharge_date,
na.rm = TRUE
dplyr::mutate(
record_date = end_fy_quarter(.data[["period"]]),
qtr_start = start_fy_quarter(.data[["period"]]),
# Set missing admission date to start of the submitted quarter
ch_admission_date = dplyr::if_else(
is.na(.data[["ch_admission_date"]]),
.data[["qtr_start"]],
.data[["ch_admission_date"]]
),
# TODO check if we should set the dis date to the end of the period?
# If the dis date is before admission, remove the dis date
ch_discharge_date = dplyr::if_else(
.data[["ch_admission_date"]] > .data[["ch_discharge_date"]],
lubridate::NA_Date_,
.data[["ch_discharge_date"]]
)
)) %>%
dplyr::ungroup() %>%
dplyr::mutate(test = ifelse(ch_admission_date > ch_discharge_date, 1, 0)) %>%
# dplyr::mutate(ch_discharge_date = fix_sc_missing_end_dates(
# .data$ch_discharge_date,
# .data$period_end_date
# )) %>%
# Fix ch_discharge_date is earlier than ch_admission_date by setting end_date to the end of fy
dplyr::mutate(ch_discharge_date = fix_sc_end_dates(
.data$ch_admission_date,
.data$ch_discharge_date,
.data$period
)) %>%
) %>%
dplyr::left_join(sc_demog_lookup,
by = c("sending_location", "social_care_id")
) %>%
replace_sc_id_with_latest()
Jennit07 marked this conversation as resolved.
Show resolved Hide resolved


name_postcode_clean <- fill_ch_names(
ch_data = ch_clean,
ch_name_lookup_path = ch_name_lookup_path,
Expand All @@ -73,10 +62,11 @@ process_sc_all_care_home <- function(
ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]])
) %>%
# sort data
# TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter?
dplyr::group_by(
.data[["sending_location"]],
.data[["social_care_id"]]
dplyr::arrange(
"sending_location",
"social_care_id",
"ch_admission_date",
"period"
) %>%
dplyr::group_by(
.data[["sending_location"]],
Expand All @@ -95,51 +85,34 @@ process_sc_all_care_home <- function(
-"min_ch_provider",
-"max_ch_provider"
) %>%
# tidy up ch_provider using 6 when disagreeing values
tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>%
dplyr::ungroup()



fixed_nursing_provision <- fixed_ch_provider %>%
dplyr::arrange(
"sending_location",
"social_care_id",
"period_start_date",
"ch_admission_date"
) %>%
dplyr::group_by(
.data[["sending_location"]],
.data[["social_care_id"]],
.data[["chi"]],
.data[["ch_admission_date"]]
) %>%
# fill in nursing care provision when missing
# but present in the following entry
dplyr::mutate(
nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L)
) %>%
tidyr::fill(all_of("nursing_care_provision"), .direction = "downup") %>%
dplyr::ungroup()
tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup")
Jennit07 marked this conversation as resolved.
Show resolved Hide resolved


ready_to_merge <- fixed_nursing_provision %>%
# dplyr::filter(chi == "3005291146") %>%
# remove any duplicate records before merging for speed and simplicity
dplyr::distinct() %>%
dplyr::arrange(
sending_location,
social_care_id,
period_start_date,
ch_admission_date
) %>%
dplyr::group_by(
sending_location,
social_care_id,
chi,
ch_admission_date
) %>%
# counter for split episodes
dplyr::mutate(
split_episode = tidyr::replace_na(
"nursing_care_provision" != dplyr::lag(
"nursing_care_provision"
.data[["nursing_care_provision"]] != dplyr::lag(
.data[["nursing_care_provision"]]
),
TRUE
),
Expand All @@ -163,11 +136,10 @@ process_sc_all_care_home <- function(
) %>%
dplyr::arrange(
dplyr::desc(.data[["period"]]),
dplyr::desc(.data[["episode_max_discharge_date"]]),
# dplyr::desc(.data[["ch_discharge_date"]]),
dplyr::desc(.data[["ch_discharge_date"]]),
dplyr::desc(.data[["ch_provider"]]),
dplyr::desc(.data[["period_end_date"]]),
dplyr::desc(.data[["period_start_date"]]),
dplyr::desc(.data[["record_date"]]),
dplyr::desc(.data[["qtr_start"]]),
dplyr::desc(.data[["ch_name"]]),
dplyr::desc(.data[["ch_postcode"]]),
dplyr::desc(.data[["reason_for_admission"]]),
Expand All @@ -180,11 +152,10 @@ process_sc_all_care_home <- function(
sc_latest_submission = dplyr::first(.data[["period"]]),
dplyr::across(
c(
# "ch_discharge_date",
"episode_max_discharge_date",
"ch_discharge_date",
"ch_provider",
"period_end_date",
"period_start_date",
"record_date",
"qtr_start",
"ch_name",
"ch_postcode",
"reason_for_admission",
Expand All @@ -206,8 +177,6 @@ process_sc_all_care_home <- function(
) %>%
# counter for latest submission
# TODO check if this is the same as split_episode_counter?
# Megan - it's not! split_episode counter is a running count of cases grouped by nursing provider,
# and latest_submission counter is a running count grouped by the admission date.
dplyr::mutate(
latest_submission_counter = tidyr::replace_na(
.data[["sc_latest_submission"]] != dplyr::lag(
Expand All @@ -223,23 +192,18 @@ process_sc_all_care_home <- function(
ch_admission_date = dplyr::if_else(
.data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]),
.data[["ch_admission_date"]],
.data[["period_start_date"]]
.data[["qtr_start"]]
),
# If it's the last episode(s) then keep the discharge date(s), otherwise
# use the end of the quarter
ch_discharge_date = dplyr::if_else(
.data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]),
.data[["episode_max_discharge_date"]],
# .data[["ch_discharge_date"]],

.data[["period_end_date"]]
.data[["ch_discharge_date"]],
.data[["record_date"]]
)
) %>%
dplyr::ungroup()


test <- ch_episode %>%
dplyr::mutate(test = ifelse(ch_discharge_date == episode_max_discharge_date, 1, 0))
# Compare to Deaths Data
# match ch_episode data with deaths data
matched_deaths_data <- ch_episode %>%
Expand Down Expand Up @@ -277,11 +241,7 @@ process_sc_all_care_home <- function(

ch_markers <- matched_deaths_data %>%
# ch_chi_cis
dplyr::group_by(
.data[["chi"]],
.data[["sending_location"]],
.data[["social_care_id"]]
) %>%
dplyr::group_by(.data[["chi"]]) %>%
dplyr::mutate(
continuous_stay_chi = tidyr::replace_na(
.data[["ch_admission_date"]] <= dplyr::lag(
Expand Down Expand Up @@ -319,7 +279,7 @@ process_sc_all_care_home <- function(
ch_ep_start = min(.data[["ch_admission_date"]]),
ch_ep_end = max(
pmin(
.data[["period_end_date"]],
.data[["record_date"]],
.data[["ch_discharge_date"]],
na.rm = TRUE
)
Expand Down
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1718.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1819.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1920.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2122.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2223.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2324.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
Loading