Public-Health-Scotland · Moohan · Aug 16, 2023 · Aug 7, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
@@ -108,6 +108,7 @@ keyring
 keytime
 keytimex
 kis
+lazydt
 lgl
 los
 ltc

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,48 @@
-# March 2023 Update - Unreleased
+# September 2023 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files. 
+* New 2023/24 files. 
+* New NSU cohort for 2022/23 file. 
+* Re addition of:
+  * HRIs in individual file.
+  * Homelessness Flags.
+* Bug fixes: 
+  * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. 
+  * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which 
+    brings the difference down to `3.3%`. 
+  * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. 
+  * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. 
+* Internal changes to SLF development: 
+  * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. 
+  * Tests updated to include `HSCP`count. 
+  * Tests created for `Delayed Discharges` extract and `Social care Client lookup`.
 
 
+# June 2023 Update - Released 24-Jul-2023
+* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet).
+* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file.
+* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update).
+* Files changed into parquet format. 
+* SLFhelper updated. 
+* Removal of `keydate1_dateformat` and `keydate2_dateformat`.
+* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code.
+* Preventable beddays - not able to calculate these correctly. * Death fixes not included.
+* Variables not ordered in R like they used to be in SPSS.
+* End of HHG.
+* New variable `ch_postcode`.
+* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. 
+* HRI's not included. 
+* Homelessness flags not included. 
+* Keep_population flag not included. 
+
+
+# March 2023 Update - Released 10-Mar-2023
+* 2021/22 episode and individual files refreshed with updated activity.
+* 2022/23 file updated and contains data up to the end of Q3. 
+* Social care data is available for 2022/23. 
+* Typo in the variable name `ooh_covid_assessment`
+* Next update in May as a test run in R but won't be released. 
+* Next release in June. 
+
 # December 2022 Update - Released 07-Dec-2022
 * Now using the 2022v2 Scottish Postcode Directory.
 * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`.

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
@@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) {
   data.table::setDT(episode_file)
 
   # Perform grouping and aggregation
-  episode_file <- episode_file[, `:=`(
-    ch_no_cost = max(ch_no_cost),
-    ch_ep_start = min(record_keydate1),
-    ch_ep_end = max(ch_ep_end),
-    ch_cost_per_day = mean(ch_cost_per_day)
-  ), by = c("chi", "ch_chi_cis")]
+  episode_file[, c(
+    "ch_no_cost",
+    "ch_ep_start",
+    "ch_ep_end",
+    "ch_cost_per_day"
+  ) := list(
+    max(ch_no_cost),
+    min(record_keydate1),
+    max(ch_ep_end),
+    mean(ch_cost_per_day)
+  ),
+  by = c("chi", "ch_chi_cis")
+  ]
 
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
@@ -9,7 +9,7 @@
 #' @export
 #'
 #' @examples
-#' sending_location <- c("100", "120")
+#' sending_location <- c(100, 120)
 #' convert_sending_location_to_lca(sending_location)
 #'
 #' @family code functions
@@ -18,38 +18,40 @@
 convert_sending_location_to_lca <- function(sending_location) {
   lca <- dplyr::case_match(
     sending_location,
-    "100" ~ "01", # Aberdeen City
-    "110" ~ "02", # Aberdeenshire
-    "120" ~ "03", # Angus
-    "130" ~ "04", # Argyll and Bute
-    "355" ~ "05", # Scottish Borders
-    "150" ~ "06", # Clackmannanshire
-    "395" ~ "07", # West Dumbartonshire
-    "170" ~ "08", # Dumfries and Galloway
-    "180" ~ "09", # Dundee City
-    "190" ~ "10", # East Ayrshire
-    "200" ~ "11", # East Dunbartonshire
-    "210" ~ "12", # East Lothian
-    "220" ~ "13", # East Renfrewshire
-    "230" ~ "14", # City of Edinburgh
-    "240" ~ "15", # Falkirk
-    "250" ~ "16", # Fife
-    "260" ~ "17", # Glasgow City
-    "270" ~ "18", # Highland
-    "280" ~ "19", # Inverclyde
-    "290" ~ "20", # Midlothian
-    "300" ~ "21", # Moray
-    "310" ~ "22", # North Ayrshire
-    "320" ~ "23", # North Lanarkshire
-    "330" ~ "24", # Orkney Islands
-    "340" ~ "25", # Perth and Kinross
-    "350" ~ "26", # Renfrewshire
-    "360" ~ "27", # Shetland Islands
-    "370" ~ "28", # South Ayrshire
-    "380" ~ "29", # South Lanarkshire
-    "390" ~ "30", # Stirling
-    "400" ~ "31", # West Lothian
-    "235" ~ "32" # Na_h_Eileanan_Siar
+    100L ~ "01", # Aberdeen City
+    110L ~ "02", # Aberdeenshire
+    120L ~ "03", # Angus
+    130L ~ "04", # Argyll and Bute
+    355L ~ "05", # Scottish Borders
+    150L ~ "06", # Clackmannanshire
+    395L ~ "07", # West Dunbartonshire
+    170L ~ "08", # Dumfries and Galloway
+    180L ~ "09", # Dundee City
+    190L ~ "10", # East Ayrshire
+    200L ~ "11", # East Dunbartonshire
+    210L ~ "12", # East Lothian
+    220L ~ "13", # East Renfrewshire
+    230L ~ "14", # City of Edinburgh
+    240L ~ "15", # Falkirk
+    250L ~ "16", # Fife
+    260L ~ "17", # Glasgow City
+    270L ~ "18", # Highland
+    280L ~ "19", # Inverclyde
+    290L ~ "20", # Midlothian
+    300L ~ "21", # Moray
+    310L ~ "22", # North Ayrshire
+    320L ~ "23", # North Lanarkshire
+    330L ~ "24", # Orkney Islands
+    340L ~ "25", # Perth and Kinross
+    350L ~ "26", # Renfrewshire
+    360L ~ "27", # Shetland Islands
+    370L ~ "28", # South Ayrshire
+    380L ~ "29", # South Lanarkshire
+    390L ~ "30", # Stirling
+    400L ~ "31", # West Lothian
+    235L ~ "32", # Na_h_Eileanan_Siar
+    .default = NA_character_
   )
+
   return(lca)
 }
diff --git a/R/create_sending_location_test_flags.R b/R/create_sending_location_test_flags.R
@@ -0,0 +1,48 @@
+#' Create sending location test flags
+#'
+#' @description Create flags for sending location
+#'
+#' @param data the data containing the variable sending_location
+#' @param sending_location_var sending_location variable
+#' @return a dataframe with flag (T or F) for each sending location
+#'
+#' @family flag functions
+create_sending_location_test_flags <- function(data, sending_location_var) {
+  data <- data %>%
+    dplyr::mutate(
+      Aberdeen_City = {{ sending_location_var }} == 100L,
+      Aberdeenshire = {{ sending_location_var }} == 110L,
+      Angus = {{ sending_location_var }} == 120L,
+      Argyll_and_Bute = {{ sending_location_var }} == 130L,
+      City_of_Edinburgh = {{ sending_location_var }} == 230L,
+      Clackmannanshire = {{ sending_location_var }} == 150L,
+      Dumfries_and_Galloway = {{ sending_location_var }} == 170L,
+      Dundee_City = {{ sending_location_var }} == 180L,
+      East_Ayrshire = {{ sending_location_var }} == 190L,
+      East_Dunbartonshire = {{ sending_location_var }} == 200L,
+      East_Lothian = {{ sending_location_var }} == 210L,
+      East_Renfrewshire = {{ sending_location_var }} == 220L,
+      Falkirk = {{ sending_location_var }} == 240L,
+      Fife = {{ sending_location_var }} == 250L,
+      Glasgow_City = {{ sending_location_var }} == 260L,
+      Highland = {{ sending_location_var }} == 270L,
+      Inverclyde = {{ sending_location_var }} == 280L,
+      Midlothian = {{ sending_location_var }} == 290L,
+      Moray = {{ sending_location_var }} == 300L,
+      Na_h_Eileanan_Siar = {{ sending_location_var }} == 235L,
+      North_Ayrshire = {{ sending_location_var }} == 310L,
+      North_Lanarkshire = {{ sending_location_var }} == 320L,
+      Orkney_Islands = {{ sending_location_var }} == 330L,
+      Perth_and_Kinross = {{ sending_location_var }} == 340L,
+      Renfrewshire = {{ sending_location_var }} == 350L,
+      Scottish_Borders = {{ sending_location_var }} == 355L,
+      Shetland_Islands = {{ sending_location_var }} == 360L,
+      South_Ayrshire = {{ sending_location_var }} == 370L,
+      South_Lanarkshire = {{ sending_location_var }} == 380L,
+      Stirling = {{ sending_location_var }} == 390L,
+      West_Dunbartonshire = {{ sending_location_var }} == 395L,
+      West_Lothian = {{ sending_location_var }} == 400L
+    )
+
+  return(data)
+}
diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
@@ -10,27 +10,34 @@
 #' @export
 #'
 #' @family extract file paths
-get_source_extract_path <- function(year,
-                                    type = c(
-                                      "Acute",
-                                      "AE",
-                                      "AT",
-                                      "CH",
-                                      "Client",
-                                      "CMH",
-                                      "DD",
-                                      "Deaths",
-                                      "DN",
-                                      "GPOoH",
-                                      "HC",
-                                      "Homelessness",
-                                      "Maternity",
-                                      "MH",
-                                      "Outpatients",
-                                      "PIS",
-                                      "SDS"
-                                    ),
-                                    ...) {
+get_source_extract_path <- function(
+    year,
+    type = c(
+      "Acute",
+      "AE",
+      "AT",
+      "CH",
+      "Client",
+      "CMH",
+      "DD",
+      "Deaths",
+      "DN",
+      "GPOoH",
+      "HC",
+      "Homelessness",
+      "Maternity",
+      "MH",
+      "Outpatients",
+      "PIS",
+      "SDS"
+    ),
+    ...) {
+  if (year %in% type) {
+    cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.")
+  }
+
+  year <- check_year_format(year)
+
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
@@ -146,13 +146,14 @@ process_extract_homelessness <- function(
     )
 
   if (write_to_disk) {
-    final_data %>%
-      write_file(get_file_path(
-        get_year_dir(year),
-        stringr::str_glue("homelessness_for_source-20{year}"),
-        ext = "rds",
+    write_file(
+      final_data,
+      get_source_extract_path(
+        year = year,
+        type = "Homelessness",
         check_mode = "write"
-      ))
+      )
+    )
   }
 
   return(final_data)

diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
@@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) {
   comparison <- produce_test_comparison(
     old_data = produce_source_extract_tests(old_data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     ),
     new_data = produce_source_extract_tests(data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     )
   ) %>%
     write_tests_xlsx(sheet_name = "00B", year)

diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R
@@ -13,6 +13,7 @@
 #' (data is from [get_source_extract_path()])
 #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()]
 #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()]
+#' @param add_hscp_count  Default set to TRUE. For use where `hscp variable` is not available, specify FALSE.
 #'
 #' @return a dataframe with a count of each flag
 #' from [calculate_measures()]
@@ -28,13 +29,19 @@ produce_source_extract_tests <- function(data,
                                          max_min_vars = c(
                                            "record_keydate1", "record_keydate2",
                                            "cost_total_net", "yearstay"
-                                         )) {
+                                         ),
+                                         add_hscp_count = TRUE) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
-    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
-    create_hscp_test_flags(.data$hscp) %>%
+    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net)
+
+  if (add_hscp_count) {
+    test_flags <- create_hscp_test_flags(test_flags, .data$hscp)
+  }
+
+  test_flags <- test_flags %>%
     # keep variables for comparison
     dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags

diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
@@ -22,21 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_start_date",
       "service_end_date"
     ) %>%
-    # fix bad period (2017, 2020 & 2021)
+    dplyr::collect() %>%
+    # fix bad period (2017, 2020, 2021, and so on)
     dplyr::mutate(
-      period = dplyr::case_match(
-        .data$period,
-        "2017" ~ "2017Q4",
-        "2020" ~ "2020Q4",
-        "2021" ~ "2021Q4",
-        .default = .data$period
+      period = dplyr::if_else(
+        grepl("\\d{4}$", .data$period),
+        paste0(.data$period, "Q4"),
+        .data$period
       )
     ) %>%
     dplyr::mutate(
       dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x))
     ) %>%
-    dplyr::arrange(.data$sending_location, .data$social_care_id) %>%
-    dplyr::collect()
+    dplyr::arrange(.data$sending_location, .data$social_care_id)
 
   return(at_full_data)
 }