Public-Health-Scotland · Jennit07 · Dec 19, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
@@ -29,39 +29,28 @@ process_sc_all_care_home <- function(
     write_to_disk = TRUE) {
   ## Data Cleaning-----------------------------------------------------
   ch_clean <- data %>%
-    dplyr::mutate(ch_admission_date = fix_sc_start_dates(
-      .data$ch_admission_date,
-      .data$period_start_date
-    )) %>%
-    dplyr::group_by(
-      social_care_id,
-      sending_location,
-      ch_admission_date
-    ) %>%
-    dplyr::mutate(episode_max_discharge_date = max(
-      pmin(period_end_date,
-        ch_discharge_date,
-        na.rm = TRUE
+    dplyr::mutate(
+      record_date = end_fy_quarter(.data[["period"]]),
+      qtr_start = start_fy_quarter(.data[["period"]]),
+      # Set missing admission date to start of the submitted quarter
+      ch_admission_date = dplyr::if_else(
+        is.na(.data[["ch_admission_date"]]),
+        .data[["qtr_start"]],
+        .data[["ch_admission_date"]]
+      ),
+      # TODO check if we should set the dis date to the end of the period?
+      # If the dis date is before admission, remove the dis date
+      ch_discharge_date = dplyr::if_else(
+        .data[["ch_admission_date"]] > .data[["ch_discharge_date"]],
+        lubridate::NA_Date_,
+        .data[["ch_discharge_date"]]
       )
-    )) %>%
-    dplyr::ungroup() %>%
-    dplyr::mutate(test = ifelse(ch_admission_date > ch_discharge_date, 1, 0)) %>%
-    # dplyr::mutate(ch_discharge_date = fix_sc_missing_end_dates(
-    #   .data$ch_discharge_date,
-    #  .data$period_end_date
-    #  )) %>%
-    # Fix ch_discharge_date is earlier than ch_admission_date by setting end_date to the end of fy
-    dplyr::mutate(ch_discharge_date = fix_sc_end_dates(
-      .data$ch_admission_date,
-      .data$ch_discharge_date,
-      .data$period
-    )) %>%
+    ) %>%
     dplyr::left_join(sc_demog_lookup,
       by = c("sending_location", "social_care_id")
     ) %>%
     replace_sc_id_with_latest()
 
-
   name_postcode_clean <- fill_ch_names(
     ch_data = ch_clean,
     ch_name_lookup_path = ch_name_lookup_path,
@@ -73,10 +62,11 @@ process_sc_all_care_home <- function(
       ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]])
     ) %>%
     # sort data
-    # TODO - Different from SPSS. SPSS has nursing provider and period in the group_by. Needs investigation - does it matter?
-    dplyr::group_by(
-      .data[["sending_location"]],
-      .data[["social_care_id"]]
+    dplyr::arrange(
+      "sending_location",
+      "social_care_id",
+      "ch_admission_date",
+      "period"
     ) %>%
     dplyr::group_by(
       .data[["sending_location"]],
@@ -95,51 +85,34 @@ process_sc_all_care_home <- function(
       -"min_ch_provider",
       -"max_ch_provider"
     ) %>%
+    # tidy up ch_provider using 6 when disagreeing values
+    tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>%
     dplyr::ungroup()
 
 
+
   fixed_nursing_provision <- fixed_ch_provider %>%
-    dplyr::arrange(
-      "sending_location",
-      "social_care_id",
-      "period_start_date",
-      "ch_admission_date"
-    ) %>%
     dplyr::group_by(
       .data[["sending_location"]],
       .data[["social_care_id"]],
-      .data[["chi"]],
       .data[["ch_admission_date"]]
     ) %>%
     # fill in nursing care provision when missing
     # but present in the following entry
     dplyr::mutate(
       nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L)
     ) %>%
-    tidyr::fill(all_of("nursing_care_provision"), .direction = "downup") %>%
-    dplyr::ungroup()
+    tidyr::fill(.data[["nursing_care_provision"]], .direction = "downup")
+
 
   ready_to_merge <- fixed_nursing_provision %>%
-    #  dplyr::filter(chi == "3005291146") %>%
     # remove any duplicate records before merging for speed and simplicity
     dplyr::distinct() %>%
-    dplyr::arrange(
-      sending_location,
-      social_care_id,
-      period_start_date,
-      ch_admission_date
-    ) %>%
-    dplyr::group_by(
-      sending_location,
-      social_care_id,
-      chi,
-      ch_admission_date
-    ) %>%
     # counter for split episodes
     dplyr::mutate(
       split_episode = tidyr::replace_na(
-        "nursing_care_provision" != dplyr::lag(
-          "nursing_care_provision"
+        .data[["nursing_care_provision"]] != dplyr::lag(
+          .data[["nursing_care_provision"]]
         ),
         TRUE
       ),
@@ -163,11 +136,10 @@ process_sc_all_care_home <- function(
     ) %>%
     dplyr::arrange(
       dplyr::desc(.data[["period"]]),
-      dplyr::desc(.data[["episode_max_discharge_date"]]),
-      # dplyr::desc(.data[["ch_discharge_date"]]),
+      dplyr::desc(.data[["ch_discharge_date"]]),
       dplyr::desc(.data[["ch_provider"]]),
-      dplyr::desc(.data[["period_end_date"]]),
-      dplyr::desc(.data[["period_start_date"]]),
+      dplyr::desc(.data[["record_date"]]),
+      dplyr::desc(.data[["qtr_start"]]),
       dplyr::desc(.data[["ch_name"]]),
       dplyr::desc(.data[["ch_postcode"]]),
       dplyr::desc(.data[["reason_for_admission"]]),
@@ -180,11 +152,10 @@ process_sc_all_care_home <- function(
       sc_latest_submission = dplyr::first(.data[["period"]]),
       dplyr::across(
         c(
-          # "ch_discharge_date",
-          "episode_max_discharge_date",
+          "ch_discharge_date",
           "ch_provider",
-          "period_end_date",
-          "period_start_date",
+          "record_date",
+          "qtr_start",
           "ch_name",
           "ch_postcode",
           "reason_for_admission",
@@ -206,8 +177,6 @@ process_sc_all_care_home <- function(
     ) %>%
     # counter for latest submission
     # TODO check if this is the same as split_episode_counter?
-    # Megan - it's not! split_episode counter is a running count of cases grouped by nursing provider,
-    # and latest_submission counter is a running count grouped by the admission date.
     dplyr::mutate(
       latest_submission_counter = tidyr::replace_na(
         .data[["sc_latest_submission"]] != dplyr::lag(
@@ -223,23 +192,18 @@ process_sc_all_care_home <- function(
       ch_admission_date = dplyr::if_else(
         .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]),
         .data[["ch_admission_date"]],
-        .data[["period_start_date"]]
+        .data[["qtr_start"]]
       ),
       # If it's the last episode(s) then keep the discharge date(s), otherwise
       # use the end of the quarter
       ch_discharge_date = dplyr::if_else(
         .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]),
-        .data[["episode_max_discharge_date"]],
-        # .data[["ch_discharge_date"]],
-
-        .data[["period_end_date"]]
+        .data[["ch_discharge_date"]],
+        .data[["record_date"]]
       )
     ) %>%
     dplyr::ungroup()
 
-
-  test <- ch_episode %>%
-    dplyr::mutate(test = ifelse(ch_discharge_date == episode_max_discharge_date, 1, 0))
   # Compare to Deaths Data
   # match ch_episode data with deaths data
   matched_deaths_data <- ch_episode %>%
@@ -277,11 +241,7 @@ process_sc_all_care_home <- function(
 
   ch_markers <- matched_deaths_data %>%
     # ch_chi_cis
-    dplyr::group_by(
-      .data[["chi"]],
-      .data[["sending_location"]],
-      .data[["social_care_id"]]
-    ) %>%
+    dplyr::group_by(.data[["chi"]]) %>%
     dplyr::mutate(
       continuous_stay_chi = tidyr::replace_na(
         .data[["ch_admission_date"]] <= dplyr::lag(
@@ -319,7 +279,7 @@ process_sc_all_care_home <- function(
       ch_ep_start = min(.data[["ch_admission_date"]]),
       ch_ep_end = max(
         pmin(
-          .data[["period_end_date"]],
+          .data[["record_date"]],
           .data[["ch_discharge_date"]],
           na.rm = TRUE
         )

diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1718")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1819")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_1920")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2021")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2122")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2223")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -7,6 +7,4 @@ processed_data_list <- targets::tar_read("processed_data_list_2324")
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
-process_tests_episode_file(year = year)
-
-
+  process_tests_episode_file(year = year)