diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index a38b56f3b..d287f2042 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -29,39 +29,28 @@ process_sc_all_care_home <- function( write_to_disk = TRUE) { ## Data Cleaning----------------------------------------------------- ch_clean <- data %>% - dplyr::mutate(ch_admission_date = fix_sc_start_dates( - .data$ch_admission_date, - .data$period_start_date - )) %>% - dplyr::group_by( - social_care_id, - sending_location, - ch_admission_date - ) %>% - dplyr::mutate(episode_max_discharge_date = max( - pmin(period_end_date, - ch_discharge_date, - na.rm = TRUE + dplyr::mutate( + record_date = end_fy_quarter(.data[["period"]]), + qtr_start = start_fy_quarter(.data[["period"]]), + # Set missing admission date to start of the submitted quarter + ch_admission_date = dplyr::if_else( + is.na(.data[["ch_admission_date"]]), + .data[["qtr_start"]], + .data[["ch_admission_date"]] + ), + # TODO check if we should set the dis date to the end of the period? + # If the dis date is before admission, remove the dis date + ch_discharge_date = dplyr::if_else( + .data[["ch_admission_date"]] > .data[["ch_discharge_date"]], + lubridate::NA_Date_, + .data[["ch_discharge_date"]] ) - )) %>% - dplyr::ungroup() %>% - dplyr::mutate(test = ifelse(ch_admission_date > ch_discharge_date, 1, 0)) %>% - # dplyr::mutate(ch_discharge_date = fix_sc_missing_end_dates( - # .data$ch_discharge_date, - # .data$period_end_date - # )) %>% - # Fix ch_discharge_date is earlier than ch_admission_date by setting end_date to the end of fy - dplyr::mutate(ch_discharge_date = fix_sc_end_dates( - .data$ch_admission_date, - .data$ch_discharge_date, - .data$period - )) %>% + ) %>% dplyr::left_join(sc_demog_lookup, by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() - name_postcode_clean <- fill_ch_names( ch_data = ch_clean, ch_name_lookup_path = ch_name_lookup_path, @@ -73,10 +62,11 @@ process_sc_all_care_home <- function( ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) ) %>% # sort data - # TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter? - dplyr::group_by( - .data[["sending_location"]], - .data[["social_care_id"]] + dplyr::arrange( + "sending_location", + "social_care_id", + "ch_admission_date", + "period" ) %>% dplyr::group_by( .data[["sending_location"]], @@ -95,20 +85,16 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% + # tidy up ch_provider using 6 when disagreeing values + tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% dplyr::ungroup() + fixed_nursing_provision <- fixed_ch_provider %>% - dplyr::arrange( - "sending_location", - "social_care_id", - "period_start_date", - "ch_admission_date" - ) %>% dplyr::group_by( .data[["sending_location"]], .data[["social_care_id"]], - .data[["chi"]], .data[["ch_admission_date"]] ) %>% # fill in nursing care provision when missing @@ -116,30 +102,17 @@ process_sc_all_care_home <- function( dplyr::mutate( nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L) ) %>% - tidyr::fill(all_of("nursing_care_provision"), .direction = "downup") %>% - dplyr::ungroup() + tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup") + ready_to_merge <- fixed_nursing_provision %>% - # dplyr::filter(chi == "3005291146") %>% # remove any duplicate records before merging for speed and simplicity dplyr::distinct() %>% - dplyr::arrange( - sending_location, - social_care_id, - period_start_date, - ch_admission_date - ) %>% - dplyr::group_by( - sending_location, - social_care_id, - chi, - ch_admission_date - ) %>% # counter for split episodes dplyr::mutate( split_episode = tidyr::replace_na( - "nursing_care_provision" != dplyr::lag( - "nursing_care_provision" + .data[["nursing_care_provision"]] != dplyr::lag( + .data[["nursing_care_provision"]] ), TRUE ), @@ -163,11 +136,10 @@ process_sc_all_care_home <- function( ) %>% dplyr::arrange( dplyr::desc(.data[["period"]]), - dplyr::desc(.data[["episode_max_discharge_date"]]), - # dplyr::desc(.data[["ch_discharge_date"]]), + dplyr::desc(.data[["ch_discharge_date"]]), dplyr::desc(.data[["ch_provider"]]), - dplyr::desc(.data[["period_end_date"]]), - dplyr::desc(.data[["period_start_date"]]), + dplyr::desc(.data[["record_date"]]), + dplyr::desc(.data[["qtr_start"]]), dplyr::desc(.data[["ch_name"]]), dplyr::desc(.data[["ch_postcode"]]), dplyr::desc(.data[["reason_for_admission"]]), @@ -180,11 +152,10 @@ process_sc_all_care_home <- function( sc_latest_submission = dplyr::first(.data[["period"]]), dplyr::across( c( - # "ch_discharge_date", - "episode_max_discharge_date", + "ch_discharge_date", "ch_provider", - "period_end_date", - "period_start_date", + "record_date", + "qtr_start", "ch_name", "ch_postcode", "reason_for_admission", @@ -206,8 +177,6 @@ process_sc_all_care_home <- function( ) %>% # counter for latest submission # TODO check if this is the same as split_episode_counter? - # Megan - it's not! split_episode counter is a running count of cases grouped by nursing provider, - # and latest_submission counter is a running count grouped by the admission date. dplyr::mutate( latest_submission_counter = tidyr::replace_na( .data[["sc_latest_submission"]] != dplyr::lag( @@ -223,23 +192,18 @@ process_sc_all_care_home <- function( ch_admission_date = dplyr::if_else( .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]), .data[["ch_admission_date"]], - .data[["period_start_date"]] + .data[["qtr_start"]] ), # If it's the last episode(s) then keep the discharge date(s), otherwise # use the end of the quarter ch_discharge_date = dplyr::if_else( .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]), - .data[["episode_max_discharge_date"]], - # .data[["ch_discharge_date"]], - - .data[["period_end_date"]] + .data[["ch_discharge_date"]], + .data[["record_date"]] ) ) %>% dplyr::ungroup() - - test <- ch_episode %>% - dplyr::mutate(test = ifelse(ch_discharge_date == episode_max_discharge_date, 1, 0)) # Compare to Deaths Data # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% @@ -277,11 +241,7 @@ process_sc_all_care_home <- function( ch_markers <- matched_deaths_data %>% # ch_chi_cis - dplyr::group_by( - .data[["chi"]], - .data[["sending_location"]], - .data[["social_care_id"]] - ) %>% + dplyr::group_by(.data[["chi"]]) %>% dplyr::mutate( continuous_stay_chi = tidyr::replace_na( .data[["ch_admission_date"]] <= dplyr::lag( @@ -319,7 +279,7 @@ process_sc_all_care_home <- function( ch_ep_start = min(.data[["ch_admission_date"]]), ch_ep_end = max( pmin( - .data[["period_end_date"]], + .data[["record_date"]], .data[["ch_discharge_date"]], na.rm = TRUE ) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 08c1eca13..bcb132f2f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index aca0bc017..aba8d984b 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 76182d869..52939a75f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index ea812ca33..28cabfe5c 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 8f4bce090..f12142164 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 19b4bef29..aa5caacfc 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 308226169..1733b076a 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year)