From 30bfde1073a968eaf83b64966e28aa9138bd2b6e Mon Sep 17 00:00:00 2001 From: marjom02 Date: Mon, 18 Dec 2023 14:33:22 +0000 Subject: [PATCH 1/6] revert back to September update --- R/process_sc_all_care_home.R | 160 ++++++++++++++++------------------- 1 file changed, 74 insertions(+), 86 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index a38b56f3b..21d14130d 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -29,39 +29,28 @@ process_sc_all_care_home <- function( write_to_disk = TRUE) { ## Data Cleaning----------------------------------------------------- ch_clean <- data %>% - dplyr::mutate(ch_admission_date = fix_sc_start_dates( - .data$ch_admission_date, - .data$period_start_date - )) %>% - dplyr::group_by( - social_care_id, - sending_location, - ch_admission_date - ) %>% - dplyr::mutate(episode_max_discharge_date = max( - pmin(period_end_date, - ch_discharge_date, - na.rm = TRUE + dplyr::mutate( + record_date = end_fy_quarter(.data[["period"]]), + qtr_start = start_fy_quarter(.data[["period"]]), + # Set missing admission date to start of the submitted quarter + ch_admission_date = dplyr::if_else( + is.na(.data[["ch_admission_date"]]), + .data[["qtr_start"]], + .data[["ch_admission_date"]] + ), + # TODO check if we should set the dis date to the end of the period? + # If the dis date is before admission, remove the dis date + ch_discharge_date = dplyr::if_else( + .data[["ch_admission_date"]] > .data[["ch_discharge_date"]], + lubridate::NA_Date_, + .data[["ch_discharge_date"]] ) - )) %>% - dplyr::ungroup() %>% - dplyr::mutate(test = ifelse(ch_admission_date > ch_discharge_date, 1, 0)) %>% - # dplyr::mutate(ch_discharge_date = fix_sc_missing_end_dates( - # .data$ch_discharge_date, - # .data$period_end_date - # )) %>% - # Fix ch_discharge_date is earlier than ch_admission_date by setting end_date to the end of fy - dplyr::mutate(ch_discharge_date = fix_sc_end_dates( - .data$ch_admission_date, - .data$ch_discharge_date, - .data$period - )) %>% + ) %>% dplyr::left_join(sc_demog_lookup, - by = c("sending_location", "social_care_id") + by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() - name_postcode_clean <- fill_ch_names( ch_data = ch_clean, ch_name_lookup_path = ch_name_lookup_path, @@ -69,14 +58,15 @@ process_sc_all_care_home <- function( ) fixed_ch_provider <- name_postcode_clean %>% + # sort data dplyr::mutate( ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) ) %>% - # sort data - # TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter? - dplyr::group_by( - .data[["sending_location"]], - .data[["social_care_id"]] + dplyr::arrange( + "sending_location", + "social_care_id", + "ch_admission_date", + "period" ) %>% dplyr::group_by( .data[["sending_location"]], @@ -94,21 +84,14 @@ process_sc_all_care_home <- function( dplyr::select( -"min_ch_provider", -"max_ch_provider" - ) %>% - dplyr::ungroup() + ) %>% + ungroup() fixed_nursing_provision <- fixed_ch_provider %>% - dplyr::arrange( - "sending_location", - "social_care_id", - "period_start_date", - "ch_admission_date" - ) %>% dplyr::group_by( .data[["sending_location"]], .data[["social_care_id"]], - .data[["chi"]], .data[["ch_admission_date"]] ) %>% # fill in nursing care provision when missing @@ -116,30 +99,48 @@ process_sc_all_care_home <- function( dplyr::mutate( nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L) ) %>% - tidyr::fill(all_of("nursing_care_provision"), .direction = "downup") %>% - dplyr::ungroup() + tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup") - ready_to_merge <- fixed_nursing_provision %>% - # dplyr::filter(chi == "3005291146") %>% - # remove any duplicate records before merging for speed and simplicity - dplyr::distinct() %>% - dplyr::arrange( - sending_location, - social_care_id, - period_start_date, - ch_admission_date + + fixed_ch_provider <- fixed_nursing_provision %>% + dplyr::mutate( + ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) + ) %>% + # sort data + # TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter? + dplyr::group_by( + .data[["sending_location"]], + .data[["social_care_id"]] ) %>% dplyr::group_by( - sending_location, - social_care_id, - chi, - ch_admission_date + .data[["sending_location"]], + .data[["social_care_id"]] + ) %>% + dplyr::mutate( + min_ch_provider = min(.data[["ch_provider"]]), + max_ch_provider = max(.data[["ch_provider"]]), + ch_provider = dplyr::if_else( + .data[["min_ch_provider"]] != .data[["max_ch_provider"]], + 6L, + .data[["ch_provider"]] + ) + ) %>% + dplyr::select( + -"min_ch_provider", + -"max_ch_provider" ) %>% + dplyr::ungroup()%>% + # tidy up ch_provider using 6 when disagreeing values + tidyr::fill(.data[["ch_provider"]], .direction = "downup") + + ready_to_merge <- %>% + # remove any duplicate records before merging for speed and simplicity + dplyr::distinct() %>% # counter for split episodes dplyr::mutate( split_episode = tidyr::replace_na( - "nursing_care_provision" != dplyr::lag( - "nursing_care_provision" + .data[["nursing_care_provision"]] != dplyr::lag( + .data[["nursing_care_provision"]] ), TRUE ), @@ -163,11 +164,10 @@ process_sc_all_care_home <- function( ) %>% dplyr::arrange( dplyr::desc(.data[["period"]]), - dplyr::desc(.data[["episode_max_discharge_date"]]), - # dplyr::desc(.data[["ch_discharge_date"]]), + dplyr::desc(.data[["ch_discharge_date"]]), dplyr::desc(.data[["ch_provider"]]), - dplyr::desc(.data[["period_end_date"]]), - dplyr::desc(.data[["period_start_date"]]), + dplyr::desc(.data[["record_date"]]), + dplyr::desc(.data[["qtr_start"]]), dplyr::desc(.data[["ch_name"]]), dplyr::desc(.data[["ch_postcode"]]), dplyr::desc(.data[["reason_for_admission"]]), @@ -180,11 +180,10 @@ process_sc_all_care_home <- function( sc_latest_submission = dplyr::first(.data[["period"]]), dplyr::across( c( - # "ch_discharge_date", - "episode_max_discharge_date", + "ch_discharge_date", "ch_provider", - "period_end_date", - "period_start_date", + "record_date", + "qtr_start", "ch_name", "ch_postcode", "reason_for_admission", @@ -206,8 +205,6 @@ process_sc_all_care_home <- function( ) %>% # counter for latest submission # TODO check if this is the same as split_episode_counter? - # Megan - it's not! split_episode counter is a running count of cases grouped by nursing provider, - # and latest_submission counter is a running count grouped by the admission date. dplyr::mutate( latest_submission_counter = tidyr::replace_na( .data[["sc_latest_submission"]] != dplyr::lag( @@ -223,28 +220,23 @@ process_sc_all_care_home <- function( ch_admission_date = dplyr::if_else( .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]), .data[["ch_admission_date"]], - .data[["period_start_date"]] + .data[["qtr_start"]] ), # If it's the last episode(s) then keep the discharge date(s), otherwise # use the end of the quarter ch_discharge_date = dplyr::if_else( .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]), - .data[["episode_max_discharge_date"]], - # .data[["ch_discharge_date"]], - - .data[["period_end_date"]] + .data[["ch_discharge_date"]], + .data[["record_date"]] ) ) %>% dplyr::ungroup() - - test <- ch_episode %>% - dplyr::mutate(test = ifelse(ch_discharge_date == episode_max_discharge_date, 1, 0)) # Compare to Deaths Data # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% dplyr::left_join(it_chi_deaths_data, - by = "chi" + by = "chi" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge @@ -257,8 +249,8 @@ process_sc_all_care_home <- function( FALSE ), ch_discharge_date = dplyr::if_else(.data[["dis_after_death"]], - .data[["death_date"]], - .data[["ch_discharge_date"]] + .data[["death_date"]], + .data[["ch_discharge_date"]] ) ) %>% dplyr::ungroup() %>% @@ -277,11 +269,7 @@ process_sc_all_care_home <- function( ch_markers <- matched_deaths_data %>% # ch_chi_cis - dplyr::group_by( - .data[["chi"]], - .data[["sending_location"]], - .data[["social_care_id"]] - ) %>% + dplyr::group_by(.data[["chi"]]) %>% dplyr::mutate( continuous_stay_chi = tidyr::replace_na( .data[["ch_admission_date"]] <= dplyr::lag( @@ -319,7 +307,7 @@ process_sc_all_care_home <- function( ch_ep_start = min(.data[["ch_admission_date"]]), ch_ep_end = max( pmin( - .data[["period_end_date"]], + .data[["record_date"]], .data[["ch_discharge_date"]], na.rm = TRUE ) From 165d73f605d9719f39042aed378a2ae705058f24 Mon Sep 17 00:00:00 2001 From: marjom02 Date: Mon, 18 Dec 2023 14:34:28 +0000 Subject: [PATCH 2/6] revert to september update --- R/process_sc_all_care_home.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 21d14130d..e7a0a9af9 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -133,7 +133,7 @@ process_sc_all_care_home <- function( # tidy up ch_provider using 6 when disagreeing values tidyr::fill(.data[["ch_provider"]], .direction = "downup") - ready_to_merge <- %>% + ready_to_merge <- fixed_ch_provider %>% # remove any duplicate records before merging for speed and simplicity dplyr::distinct() %>% # counter for split episodes From 4bcfc402a02f083057bbf6e388a082a485ceaf3e Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Mon, 18 Dec 2023 14:38:07 +0000 Subject: [PATCH 3/6] Style code --- R/process_sc_all_care_home.R | 14 +++++++------- Run_SLF_Files_manually/run_episode_file_1718.R | 4 +--- Run_SLF_Files_manually/run_episode_file_1819.R | 4 +--- Run_SLF_Files_manually/run_episode_file_1920.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2021.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2122.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2223.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2324.R | 4 +--- 8 files changed, 14 insertions(+), 28 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index e7a0a9af9..b98cfe957 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -47,7 +47,7 @@ process_sc_all_care_home <- function( ) ) %>% dplyr::left_join(sc_demog_lookup, - by = c("sending_location", "social_care_id") + by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() @@ -84,8 +84,8 @@ process_sc_all_care_home <- function( dplyr::select( -"min_ch_provider", -"max_ch_provider" - ) %>% - ungroup() + ) %>% + ungroup() fixed_nursing_provision <- fixed_ch_provider %>% @@ -129,7 +129,7 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% - dplyr::ungroup()%>% + dplyr::ungroup() %>% # tidy up ch_provider using 6 when disagreeing values tidyr::fill(.data[["ch_provider"]], .direction = "downup") @@ -236,7 +236,7 @@ process_sc_all_care_home <- function( # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% dplyr::left_join(it_chi_deaths_data, - by = "chi" + by = "chi" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge @@ -249,8 +249,8 @@ process_sc_all_care_home <- function( FALSE ), ch_discharge_date = dplyr::if_else(.data[["dis_after_death"]], - .data[["death_date"]], - .data[["ch_discharge_date"]] + .data[["death_date"]], + .data[["ch_discharge_date"]] ) ) %>% dplyr::ungroup() %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 08c1eca13..bcb132f2f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index aca0bc017..aba8d984b 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 76182d869..52939a75f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index ea812ca33..28cabfe5c 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 8f4bce090..f12142164 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 19b4bef29..aa5caacfc 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 308226169..1733b076a 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) From 21d4ab02da5e07dd0571611bff2ebca4cafd72cc Mon Sep 17 00:00:00 2001 From: marjom02 Date: Mon, 18 Dec 2023 16:53:19 +0000 Subject: [PATCH 4/6] revert to september code --- R/process_sc_all_care_home.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index e7a0a9af9..42bd14966 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -85,7 +85,7 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% - ungroup() + dplyr::ungroup() fixed_nursing_provision <- fixed_ch_provider %>% @@ -129,9 +129,9 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% - dplyr::ungroup()%>% # tidy up ch_provider using 6 when disagreeing values - tidyr::fill(.data[["ch_provider"]], .direction = "downup") + tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% + dplyr::ungroup() ready_to_merge <- fixed_ch_provider %>% # remove any duplicate records before merging for speed and simplicity From e237b62c6cc448c4b1af6bd38c3dc74aec01432a Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Mon, 18 Dec 2023 17:01:48 +0000 Subject: [PATCH 5/6] Style code --- R/process_sc_all_care_home.R | 14 +++++++------- Run_SLF_Files_manually/run_episode_file_1718.R | 4 +--- Run_SLF_Files_manually/run_episode_file_1819.R | 4 +--- Run_SLF_Files_manually/run_episode_file_1920.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2021.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2122.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2223.R | 4 +--- Run_SLF_Files_manually/run_episode_file_2324.R | 4 +--- 8 files changed, 14 insertions(+), 28 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 42bd14966..fdb7a7cd3 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -47,7 +47,7 @@ process_sc_all_care_home <- function( ) ) %>% dplyr::left_join(sc_demog_lookup, - by = c("sending_location", "social_care_id") + by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() @@ -84,8 +84,8 @@ process_sc_all_care_home <- function( dplyr::select( -"min_ch_provider", -"max_ch_provider" - ) %>% - dplyr::ungroup() + ) %>% + dplyr::ungroup() fixed_nursing_provision <- fixed_ch_provider %>% @@ -131,7 +131,7 @@ process_sc_all_care_home <- function( ) %>% # tidy up ch_provider using 6 when disagreeing values tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% - dplyr::ungroup() + dplyr::ungroup() ready_to_merge <- fixed_ch_provider %>% # remove any duplicate records before merging for speed and simplicity @@ -236,7 +236,7 @@ process_sc_all_care_home <- function( # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% dplyr::left_join(it_chi_deaths_data, - by = "chi" + by = "chi" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge @@ -249,8 +249,8 @@ process_sc_all_care_home <- function( FALSE ), ch_discharge_date = dplyr::if_else(.data[["dis_after_death"]], - .data[["death_date"]], - .data[["ch_discharge_date"]] + .data[["death_date"]], + .data[["ch_discharge_date"]] ) ) %>% dplyr::ungroup() %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 08c1eca13..bcb132f2f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index aca0bc017..aba8d984b 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 76182d869..52939a75f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index ea812ca33..28cabfe5c 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 8f4bce090..f12142164 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 19b4bef29..aa5caacfc 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 308226169..1733b076a 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324") # Run episode file create_episode_file(processed_data_list, year = year) %>% -process_tests_episode_file(year = year) - - + process_tests_episode_file(year = year) From 97f0ac2af2f0ce70da4d4aca1cc432d7b23e3b97 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 19 Dec 2023 10:00:35 +0000 Subject: [PATCH 6/6] Revert to september update code --- R/process_sc_all_care_home.R | 38 +++++------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index fdb7a7cd3..d287f2042 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -58,10 +58,10 @@ process_sc_all_care_home <- function( ) fixed_ch_provider <- name_postcode_clean %>% - # sort data dplyr::mutate( ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) ) %>% + # sort data dplyr::arrange( "sending_location", "social_care_id", @@ -85,9 +85,12 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% + # tidy up ch_provider using 6 when disagreeing values + tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% dplyr::ungroup() + fixed_nursing_provision <- fixed_ch_provider %>% dplyr::group_by( .data[["sending_location"]], @@ -102,38 +105,7 @@ process_sc_all_care_home <- function( tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup") - fixed_ch_provider <- fixed_nursing_provision %>% - dplyr::mutate( - ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) - ) %>% - # sort data - # TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter? - dplyr::group_by( - .data[["sending_location"]], - .data[["social_care_id"]] - ) %>% - dplyr::group_by( - .data[["sending_location"]], - .data[["social_care_id"]] - ) %>% - dplyr::mutate( - min_ch_provider = min(.data[["ch_provider"]]), - max_ch_provider = max(.data[["ch_provider"]]), - ch_provider = dplyr::if_else( - .data[["min_ch_provider"]] != .data[["max_ch_provider"]], - 6L, - .data[["ch_provider"]] - ) - ) %>% - dplyr::select( - -"min_ch_provider", - -"max_ch_provider" - ) %>% - # tidy up ch_provider using 6 when disagreeing values - tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% - dplyr::ungroup() - - ready_to_merge <- fixed_ch_provider %>% + ready_to_merge <- fixed_nursing_provision %>% # remove any duplicate records before merging for speed and simplicity dplyr::distinct() %>% # counter for split episodes