Skip to content

Commit

Permalink
Revert CH back to September (#885)
Browse files Browse the repository at this point in the history
* revert back to September update

* revert to september update

* Style code

* revert to september code

* Style code

* Revert to september update code

---------

Co-authored-by: marjom02 <[email protected]>
Co-authored-by: SwiftySalmon <[email protected]>
Co-authored-by: Jennifer Thom <[email protected]>
  • Loading branch information
4 people authored Dec 19, 2023
1 parent 7eae022 commit c982612
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 100 deletions.
118 changes: 39 additions & 79 deletions R/process_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,28 @@ process_sc_all_care_home <- function(
write_to_disk = TRUE) {
## Data Cleaning-----------------------------------------------------
ch_clean <- data %>%
dplyr::mutate(ch_admission_date = fix_sc_start_dates(
.data$ch_admission_date,
.data$period_start_date
)) %>%
dplyr::group_by(
social_care_id,
sending_location,
ch_admission_date
) %>%
dplyr::mutate(episode_max_discharge_date = max(
pmin(period_end_date,
ch_discharge_date,
na.rm = TRUE
dplyr::mutate(
record_date = end_fy_quarter(.data[["period"]]),
qtr_start = start_fy_quarter(.data[["period"]]),
# Set missing admission date to start of the submitted quarter
ch_admission_date = dplyr::if_else(
is.na(.data[["ch_admission_date"]]),
.data[["qtr_start"]],
.data[["ch_admission_date"]]
),
# TODO check if we should set the dis date to the end of the period?
# If the dis date is before admission, remove the dis date
ch_discharge_date = dplyr::if_else(
.data[["ch_admission_date"]] > .data[["ch_discharge_date"]],
lubridate::NA_Date_,
.data[["ch_discharge_date"]]
)
)) %>%
dplyr::ungroup() %>%
dplyr::mutate(test = ifelse(ch_admission_date > ch_discharge_date, 1, 0)) %>%
# dplyr::mutate(ch_discharge_date = fix_sc_missing_end_dates(
# .data$ch_discharge_date,
# .data$period_end_date
# )) %>%
# Fix ch_discharge_date is earlier than ch_admission_date by setting end_date to the end of fy
dplyr::mutate(ch_discharge_date = fix_sc_end_dates(
.data$ch_admission_date,
.data$ch_discharge_date,
.data$period
)) %>%
) %>%
dplyr::left_join(sc_demog_lookup,
by = c("sending_location", "social_care_id")
) %>%
replace_sc_id_with_latest()


name_postcode_clean <- fill_ch_names(
ch_data = ch_clean,
ch_name_lookup_path = ch_name_lookup_path,
Expand All @@ -73,10 +62,11 @@ process_sc_all_care_home <- function(
ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]])
) %>%
# sort data
# TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter?
dplyr::group_by(
.data[["sending_location"]],
.data[["social_care_id"]]
dplyr::arrange(
"sending_location",
"social_care_id",
"ch_admission_date",
"period"
) %>%
dplyr::group_by(
.data[["sending_location"]],
Expand All @@ -95,51 +85,34 @@ process_sc_all_care_home <- function(
-"min_ch_provider",
-"max_ch_provider"
) %>%
# tidy up ch_provider using 6 when disagreeing values
tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>%
dplyr::ungroup()



fixed_nursing_provision <- fixed_ch_provider %>%
dplyr::arrange(
"sending_location",
"social_care_id",
"period_start_date",
"ch_admission_date"
) %>%
dplyr::group_by(
.data[["sending_location"]],
.data[["social_care_id"]],
.data[["chi"]],
.data[["ch_admission_date"]]
) %>%
# fill in nursing care provision when missing
# but present in the following entry
dplyr::mutate(
nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L)
) %>%
tidyr::fill(all_of("nursing_care_provision"), .direction = "downup") %>%
dplyr::ungroup()
tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup")


ready_to_merge <- fixed_nursing_provision %>%
# dplyr::filter(chi == "3005291146") %>%
# remove any duplicate records before merging for speed and simplicity
dplyr::distinct() %>%
dplyr::arrange(
sending_location,
social_care_id,
period_start_date,
ch_admission_date
) %>%
dplyr::group_by(
sending_location,
social_care_id,
chi,
ch_admission_date
) %>%
# counter for split episodes
dplyr::mutate(
split_episode = tidyr::replace_na(
"nursing_care_provision" != dplyr::lag(
"nursing_care_provision"
.data[["nursing_care_provision"]] != dplyr::lag(
.data[["nursing_care_provision"]]
),
TRUE
),
Expand All @@ -163,11 +136,10 @@ process_sc_all_care_home <- function(
) %>%
dplyr::arrange(
dplyr::desc(.data[["period"]]),
dplyr::desc(.data[["episode_max_discharge_date"]]),
# dplyr::desc(.data[["ch_discharge_date"]]),
dplyr::desc(.data[["ch_discharge_date"]]),
dplyr::desc(.data[["ch_provider"]]),
dplyr::desc(.data[["period_end_date"]]),
dplyr::desc(.data[["period_start_date"]]),
dplyr::desc(.data[["record_date"]]),
dplyr::desc(.data[["qtr_start"]]),
dplyr::desc(.data[["ch_name"]]),
dplyr::desc(.data[["ch_postcode"]]),
dplyr::desc(.data[["reason_for_admission"]]),
Expand All @@ -180,11 +152,10 @@ process_sc_all_care_home <- function(
sc_latest_submission = dplyr::first(.data[["period"]]),
dplyr::across(
c(
# "ch_discharge_date",
"episode_max_discharge_date",
"ch_discharge_date",
"ch_provider",
"period_end_date",
"period_start_date",
"record_date",
"qtr_start",
"ch_name",
"ch_postcode",
"reason_for_admission",
Expand All @@ -206,8 +177,6 @@ process_sc_all_care_home <- function(
) %>%
# counter for latest submission
# TODO check if this is the same as split_episode_counter?
# Megan - it's not! split_episode counter is a running count of cases grouped by nursing provider,
# and latest_submission counter is a running count grouped by the admission date.
dplyr::mutate(
latest_submission_counter = tidyr::replace_na(
.data[["sc_latest_submission"]] != dplyr::lag(
Expand All @@ -223,23 +192,18 @@ process_sc_all_care_home <- function(
ch_admission_date = dplyr::if_else(
.data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]),
.data[["ch_admission_date"]],
.data[["period_start_date"]]
.data[["qtr_start"]]
),
# If it's the last episode(s) then keep the discharge date(s), otherwise
# use the end of the quarter
ch_discharge_date = dplyr::if_else(
.data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]),
.data[["episode_max_discharge_date"]],
# .data[["ch_discharge_date"]],

.data[["period_end_date"]]
.data[["ch_discharge_date"]],
.data[["record_date"]]
)
) %>%
dplyr::ungroup()


test <- ch_episode %>%
dplyr::mutate(test = ifelse(ch_discharge_date == episode_max_discharge_date, 1, 0))
# Compare to Deaths Data
# match ch_episode data with deaths data
matched_deaths_data <- ch_episode %>%
Expand Down Expand Up @@ -277,11 +241,7 @@ process_sc_all_care_home <- function(

ch_markers <- matched_deaths_data %>%
# ch_chi_cis
dplyr::group_by(
.data[["chi"]],
.data[["sending_location"]],
.data[["social_care_id"]]
) %>%
dplyr::group_by(.data[["chi"]]) %>%
dplyr::mutate(
continuous_stay_chi = tidyr::replace_na(
.data[["ch_admission_date"]] <= dplyr::lag(
Expand Down Expand Up @@ -319,7 +279,7 @@ process_sc_all_care_home <- function(
ch_ep_start = min(.data[["ch_admission_date"]]),
ch_ep_end = max(
pmin(
.data[["period_end_date"]],
.data[["record_date"]],
.data[["ch_discharge_date"]],
na.rm = TRUE
)
Expand Down
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1718.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1819.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_1920.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2122.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2223.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)
4 changes: 1 addition & 3 deletions Run_SLF_Files_manually/run_episode_file_2324.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324")

# Run episode file
create_episode_file(processed_data_list, year = year) %>%
process_tests_episode_file(year = year)


process_tests_episode_file(year = year)

0 comments on commit c982612

Please sign in to comment.