Public-Health-Scotland · Moohan · Jul 17, 2023 · Jul 14, 2023 · Jul 17, 2023 · Jul 17, 2023
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
@@ -14,7 +14,7 @@
     "hbrescode",
     "hscp",
     "lca",
-    "datazone",
+    "datazone2011",
     "hbpraccode",
     "hbtreatcode",
     "gpprac"
@@ -85,7 +85,7 @@
 }
 
 fill_postcode_geogs <- function(data) {
-  spd <- read_file(get_slf_postcode_path())
+  slf_pc_lookup <- read_file(get_slf_postcode_path())
 
   filled_postcodes <- dplyr::left_join(
     data,
@@ -102,7 +102,7 @@
     ) %>%
     # Fill geographies
     dplyr::left_join(
-      spd,
+      slf_pc_lookup,
       by = "postcode",
       suffix = c("_old", "")
     ) %>%
@@ -117,10 +117,11 @@
     cascade_geographies() %>%
     dplyr::mutate(
       hbrescode = dplyr::coalesce(.data$hb2018, .data$hbrescode),
-      hscp = dplyr::coalesce(.data$hscp2018, .data$hscp),
-      lca = dplyr::coalesce(.data$lca, .data$lca_old)
+      hscp2018 = dplyr::coalesce(.data$hscp2018, .data$hscp),
+      lca = dplyr::coalesce(.data$lca, .data$lca_old),
+      datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old)
     ) %>%
-    dplyr::select(!c("hb2018", "hscp2018", "lca_old", "most_recent_postcode"))
+    dplyr::select(!c("hb2018", "hscp2018", "lca_old", "datazone2011_old", "most_recent_postcode"))
 
   return(filled_postcodes)
 }
@@ -161,13 +162,13 @@
 #'
 #' @return data with matched HSCP and LCA codes
 cascade_geographies <- function(data) {
  # TODO rework this function into a series of smaller functions which operate on vectors
  # e.g. cascade_hscp_lca <- function(hscp, lca) {...}
  # Would take HSCP and populate any missing LCA using it
  data <- data %>%
    dplyr::mutate(
      # If we can, 'cascade' the geographies upwards
      # i.e. if they have an LCA use this to fill in HSCP2018 and so on for hbrescode
      # Codes are correct as at August 2018
      lca = dplyr::case_when(
        !is_missing(lca) ~ lca,

diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
@@ -27,21 +27,21 @@
      year = year,
      # Set recid as 01B and flag GLS records
      recid = dplyr::if_else(.data$GLS_record == "Y", "GLS", "01B"),
      # Set IDPC marker for the episode
      ipdc = dplyr::case_when(
        .data$ipdc == "IP" ~ "I",
        .data$ipdc == "DC" ~ "D"
      ),
      # Set IDPC marker for the cij
      cij_ipdc = dplyr::case_when(
        .data$cij_ipdc == "IP" ~ "I",
        .data$cij_ipdc == "DC" ~ "D"
      )
    ) %>%
    # Recode GP practice into 5 digit number
    # We assume that if it starts with a letter it's an English practice and so recode to 99995.
    dplyr::mutate(gpprac = convert_eng_gpprac_to_dummy(.data$gpprac)) %>%
    # Calculate the total length of stay (for the entire episode, not just within the financial year).
    dplyr::mutate(
      stay = calculate_stay(year, .data$record_keydate1, .data$record_keydate2),
      # create and populate SMRType
@@ -49,8 +49,8 @@
    ) %>%
    # Apply new costs for C3 specialty, these are taken from the 2017/18 file
    fix_c3_costs(year) %>%
    # initialise monthly cost/beddays variables in a separate data frame for matching
    convert_monthly_rows_to_vars(.data$costmonthnum, .data$cost_total_net, .data$yearstay) %>%
    # add yearstay and cost_total_net variables
    dplyr::mutate(
      yearstay = rowSums(dplyr::across(tidyselect::ends_with("_beddays"))),
@@ -79,7 +79,7 @@
       "hbrescode",
       "lca",
       "hscp",
-      "datazone",
+      "datazone2011",
       "location",
       "hbtreatcode",
       "yearstay",

diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R
@@ -13,7 +13,7 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
 process_extract_district_nursing <- function(
    data,
    year,
    costs = read_file(get_dn_costs_path()),
@@ -107,7 +107,7 @@
           "gender",
           "gpprac",
           "postcode",
-          "datazone",
+          "datazone2011",
           "lca",
           "hscp",
           "hbrescode",

diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
@@ -13,8 +13,8 @@
 #' @export
 #' @family process extracts
 process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) {
  diagnosis_extract <- process_extract_ooh_diagnosis(data_list[["diagnosis"]], year)
  outcomes_extract <- process_extract_ooh_outcomes(data_list[["outcomes"]], year)
  consultations_extract <- process_extract_ooh_consultations(data_list[["consultations"]], year)


@@ -63,10 +63,10 @@
      location = dplyr::na_if(.data$location, "UNKNOWN"),
      recid = "OoH",
      smrtype = add_smr_type(.data$recid, consultation_type = .data$consultation_type),
      kis_accessed = factor(
        dplyr::case_when(
          kis_accessed == "Y" ~ 1L,
          kis_accessed == "N" ~ 0L,
          TRUE ~ 9L
        ),
        levels = c(0L, 1L, 9L),
@@ -111,12 +111,12 @@
       "gpprac",
       "postcode",
       "hbrescode",
-      "datazone",
+      "datazone2011",
       "hscp",
       "hbtreatcode",
       "location",
      "attendance_status",
      "kis_accessed",
      "refsource",
      tidyselect::starts_with("diag"),
      tidyselect::starts_with("ooh_outcome"),

diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R
@@ -85,7 +85,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
       "hbrescode",
       "lca",
       "hscp",
-      "datazone",
+      "datazone2011",
       "location",
       "hbtreatcode",
       "stay",

diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R
@@ -107,7 +107,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
       disch = "Discharge Type Code",
       falls_adm = "Falls Related Admission (01)",
       lca = "Geo Council Area Code",
-      datazone = "Geo Data Zone 2011",
+      datazone2011 = "Geo Data Zone 2011",
       postcode = "Geo Postcode [C]",
       hscp = "Geo HSCP of Residence Code - current",
       conc = "Lead Consultant/HCP Code",

diff --git a/R/read_extract_district_nursing.R b/R/read_extract_district_nursing.R
@@ -43,7 +43,7 @@ read_extract_district_nursing <- function(
       lca = "Patient Council Area Code (Contact)",
       postcode = "Patient Postcode [C] (Contact)",
       gpprac = "Practice Code (Contact)",
-      datazone = "Patient Data Zone 2011 (Contact)",
+      datazone2011 = "Patient Data Zone 2011 (Contact)",
       hbpraccode = "Practice NHS Board Code 9 (Contact)",
       hbtreatcode = "Treatment NHS Board Code 9",
       chi = "UPI Number [C]",

diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R
@@ -83,7 +83,7 @@ read_extract_mental_health <- function(
       hbrescode = "NHS Board of Residence Code - current",
       lca = "Geo Council Area Code",
       hscp = "Geo HSCP of Residence Code - current",
-      datazone = "Geo Data Zone 2011",
+      datazone2011 = "Geo Data Zone 2011",
       location = "Treatment Location Code",
       hbtreatcode = "Treatment NHS Board Code - current",
       yearstay = "Occupied Bed Days (04)",

diff --git a/R/read_extract_nrs_deaths.R b/R/read_extract_nrs_deaths.R
@@ -39,7 +39,7 @@ read_extract_nrs_deaths <- function(
     dplyr::rename(
       death_location_code = "Death Location Code",
       lca = "Geo Council Area Code",
-      datazone = "Geo Data Zone 2011",
+      datazone2011 = "Geo Data Zone 2011",
       postcode = "Geo Postcode [C]",
       hscp = "Geo HSCP of Residence Code - current",
       death_board_occurrence = "NHS Board of Occurrence Code - current",

diff --git a/R/read_extract_ooh_consultations.R b/R/read_extract_ooh_consultations.R
@@ -20,7 +20,7 @@
      "Consultation End Date Time" = readr::col_datetime(
        format = "%Y/%m/%d %T"
      ),
      "KIS Accessed" = readr::col_factor(levels = c("Y", "N")),
      # All other columns are character type
      .default = readr::col_character()
    )
@@ -34,7 +34,7 @@
       postcode = "Patient Postcode [C]",
       hbrescode = "Patient NHS Board Code 9 - current",
       hscp = "HSCP of Residence Code Current",
-      datazone = "Patient Data Zone 2011",
+      datazone2011 = "Patient Data Zone 2011",
       gpprac = "Practice Code",
       ooh_case_id = "GUID",
       attendance_status = "Consultation Recorded",
@@ -43,7 +43,7 @@
      location = "Treatment Location Code",
      location_description = "Treatment Location Description",
      hbtreatcode = "Treatment NHS Board Code 9",
      kis_accessed = "KIS Accessed",
      refsource = "Referral Source",
      consultation_type = "Consultation Type",
      consultation_type_unmapped = "Consultation Type Unmapped"

diff --git a/R/run_episode_file.R b/R/run_episode_file.R
@@ -51,7 +51,7 @@ run_episode_file <- function(processed_data_list, year, write_to_disk = TRUE) {
         "cij_dis_spec",
         "cost_total_net",
         "hscp",
-        "datazone",
+        "datazone2011",
         "attendance_status",
         "deathdiag1",
         "deathdiag2",