diff --git a/.Rbuildignore b/.Rbuildignore
index 168a3e006..2cab1bda6 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -22,3 +22,4 @@
 ^_targets\.R$
 ^_targets\.yaml$
 ^_SPSS_archived$
+^run_targets_
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 7c038f875..c3f39305b 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -29,6 +29,7 @@ Classificat
 cls
 cmh
 CNWs
+Comhairle
 codecov
 commhosp
 congen
@@ -97,6 +98,7 @@ hjust
 hms
 homecare
 homev
+hscdiip
 hscp
 hscpnames
 htmlwidgets
@@ -115,6 +117,7 @@ keyring
 keytime
 keytimex
 kis
+lazydt
 lgl
 lintr
 los
@@ -122,6 +125,7 @@ ltc
 ltcs
 lubridate
 magrittr
+Matern
 markdownguide
 Mcbride
 mcmahon
@@ -148,6 +152,7 @@ outfile
 pandoc
 patflow
 pattype
+PCEC
 phs
 phsmethods
 phsopendata
@@ -177,6 +182,7 @@ reasonwait
 recid
 reflectoring
 refsource
+renviron
 rlang
 rmarkdown
 roxygen
@@ -215,6 +221,7 @@ submis
 tadm
 tarchetypes
 tbl
+Telecare
 telecare
 testthat
 thom
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 060d818c5..babd1de81 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
   pull_request:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
 
 name: R-CMD-check
 
diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml
index e057592b4..e962bdf44 100644
--- a/.github/workflows/lint-changed-files.yaml
+++ b/.github/workflows/lint-changed-files.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   pull_request:
-    branches: [main-R, master, main]
+    branches: [master, main, development]
 
 name: lint-changed-files
 
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 612ecb81a..c47424169 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
   pull_request:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
 
 name: test-coverage
 
diff --git a/DESCRIPTION b/DESCRIPTION
index a437b80cc..5123289dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,9 +50,11 @@ Imports:
     rmarkdown (>= 2.17),
     rstudioapi (>= 0.14),
     scales (>= 1.2.0),
-    slfhelper (>= 0.9.0),
+    slfhelper (>= 0.10.0),
     stringdist (>= 0.9.10),
     stringr (>= 1.5.0),
+    tarchetypes (>= 0.7.6),
+    targets (>= 1.2.0),
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0),
     tidyselect (>= 1.2.0),
@@ -61,8 +63,6 @@ Suggests:
     covr (>= 3.6.1),
     roxygen2 (>= 7.2.3),
     spelling (>= 2.2),
-    tarchetypes (>= 0.7.5),
-    targets (>= 0.14.3),
     testthat (>= 3.1.7)
 Remotes: 
     Public-Health-Scotland/phsmethods,
diff --git a/NAMESPACE b/NAMESPACE
index 464cced34..c5dca28bd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(add_homelessness_date_flags)
+export(add_homelessness_flag)
+export(add_hri_variables)
 export(add_nsu_cohort)
 export(check_year_format)
 export(clean_up_free_text)
@@ -13,6 +16,8 @@ export(convert_hscp_to_hscpnames)
 export(convert_numeric_to_date)
 export(convert_sending_location_to_lca)
 export(convert_year_to_fyyear)
+export(create_episode_file)
+export(create_homelessness_lookup)
 export(create_individual_file)
 export(create_service_use_cohorts)
 export(end_fy)
@@ -29,6 +34,7 @@ export(get_demographic_cohorts_path)
 export(get_dev_dir)
 export(get_dn_costs_path)
 export(get_dn_raw_costs_path)
+export(get_existing_data_for_tests)
 export(get_file_path)
 export(get_gp_ooh_costs_path)
 export(get_gp_ooh_raw_costs_path)
@@ -49,6 +55,7 @@ export(get_practice_details_path)
 export(get_readcode_lookup_path)
 export(get_sc_at_episodes_path)
 export(get_sc_ch_episodes_path)
+export(get_sc_client_lookup_path)
 export(get_sc_demog_lookup_path)
 export(get_sc_hc_episodes_path)
 export(get_sc_sds_episodes_path)
@@ -59,9 +66,7 @@ export(get_slf_ch_name_lookup_path)
 export(get_slf_chi_deaths_path)
 export(get_slf_deaths_lookup_path)
 export(get_slf_dir)
-export(get_slf_ep_temp_path)
 export(get_slf_gpprac_path)
-export(get_slf_indiv_temp_path)
 export(get_slf_postcode_path)
 export(get_source_extract_path)
 export(get_sparra_path)
@@ -132,8 +137,10 @@ export(process_tests_nrs_deaths)
 export(process_tests_outpatients)
 export(process_tests_prescribing)
 export(process_tests_sc_ch_episodes)
+export(process_tests_sc_client_lookup)
 export(process_tests_sc_demographics)
 export(process_tests_sds)
+export(produce_episode_file_tests)
 export(produce_source_extract_tests)
 export(produce_test_comparison)
 export(read_extract_acute)
@@ -157,7 +164,7 @@ export(read_sc_all_alarms_telecare)
 export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
-export(run_episode_file)
+export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
diff --git a/NEWS.md b/NEWS.md
index cbcb62079..fdbb64c9c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,50 @@
-# March 2023 Update - Unreleased
+# September 2023 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files. 
+* New 2023/24 files.
+  *No social care data available for new 2023/24 file.
+* New NSU cohort for 2022/23 file.
+* SPD and SIMD updated.
+* Re addition of:
+  * HRIs in individual file.
+  * Homelessness Flags.
+* Bug fixes: 
+  * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. 
+  * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which 
+    brings the difference down to `3.3%`. 
+  * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. 
+  * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. 
+* Internal changes to SLF development: 
+  * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. 
+  * Tests updated to include `HSCP`count. 
+  * Tests created for `Delayed Discharges` extract and `Social care Client lookup`.
 
 
+# June 2023 Update - Released 24-Jul-2023
+* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet).
+* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file.
+* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update).
+* Files changed into parquet format. 
+* SLFhelper updated. 
+* Removal of `keydate1_dateformat` and `keydate2_dateformat`.
+* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code.
+* Preventable beddays - not able to calculate these correctly. * Death fixes not included.
+* Variables not ordered in R like they used to be in SPSS.
+* End of HHG.
+* New variable `ch_postcode`.
+* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. 
+* HRI's not included. 
+* Homelessness flags not included. 
+* Keep_population flag not included. 
+
+
+# March 2023 Update - Released 10-Mar-2023
+* 2021/22 episode and individual files refreshed with updated activity.
+* 2022/23 file updated and contains data up to the end of Q3. 
+* Social care data is available for 2022/23. 
+* Typo in the variable name `ooh_covid_assessment`
+* Next update in May as a test run in R but won't be released. 
+* Next release in June. 
+
 # December 2022 Update - Released 07-Dec-2022
 * Now using the 2022v2 Scottish Postcode Directory.
 * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`.
diff --git a/R/00-update_refs.R b/R/00-update_refs.R
index a462ffdba..aef1e0da4 100644
--- a/R/00-update_refs.R
+++ b/R/00-update_refs.R
@@ -7,7 +7,7 @@
 #'
 #' @family initialisation
 latest_update <- function() {
-  "Jun_2023"
+  "Sep_2023"
 }
 
 #' Previous update
@@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) {
 #'
 #' @family initialisation
 get_dd_period <- function() {
-  "Jul16_Mar23"
+  "Jul16_Jun23"
 }
 
 #' The latest financial year for Cost uplift setting
@@ -74,5 +74,5 @@ get_dd_period <- function() {
 #'
 #' @family initialisation
 latest_cost_year <- function() {
-  "2223"
+  "2324"
 }
diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R
new file mode 100644
index 000000000..710324646
--- /dev/null
+++ b/R/add_hri_variables.R
@@ -0,0 +1,142 @@
+#' Flag non-Scottish residents
+#'
+#' @details The variable keep flag can be in the range c(0:4) where
+#' \itemize{
+#' \item{keep_flag = 0 when resident is Scottish}
+#' \item{keep_flag = 1 when resident is not Scottish}
+#' \item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing}
+#' \item{keep_flag = 3 when the gpprac is not English and the postcode is missing}
+#' \item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy}
+#' }
+#' The intention is to only keep the records where keep_flag = 0
+#'
+#' @inheritParams add_hri_variables
+#'
+#' @return A data frame with the variable 'keep_flag'
+flag_non_scottish_residents <- function(
+    data,
+    slf_pc_lookup) {
+  check_variables_exist(data, c("postcode", "gpprac"))
+
+  # Make a lookup of postcode areas, which consist of the first characters
+  # of the postcode
+  pc_areas <- slf_pc_lookup %>%
+    dplyr::mutate(
+      pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}"),
+      scot_flag = TRUE
+    ) %>%
+    dplyr::distinct(pc_area, scot_flag)
+
+  # Create a flag, 'keep_flag', to determine whether individuals are Scottish
+  # residents or not
+  return_data <- data %>%
+    dplyr::mutate(pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}")) %>%
+    dplyr::left_join(pc_areas, by = "pc_area") %>%
+    dplyr::mutate(
+      dummy_postcode = .data$postcode %in% c("BF010AA", "NF1 1AB", "NK010AA") |
+        stringr::str_sub(.data$postcode, 1, 4) %in% c("ZZ01", "ZZ61"),
+      eng_prac = .data$gpprac %in% c(99942, 99957, 99961, 99976, 99981, 99995, 99999),
+      scottish_resident = dplyr::case_when(
+        .data$scot_flag ~ 0L,
+        (is_missing(.data$postcode) | .data$dummy_postcode) & is.na(.data$gpprac) ~ 2L,
+        !.data$eng_prac & is_missing(.data$postcode) ~ 3L,
+        !.data$eng_prac & .data$dummy_postcode ~ 4L,
+        .default = 1L
+      )
+    ) %>%
+    dplyr::select(-"dummy_postcode", -"eng_prac")
+
+  return(return_data)
+}
+
+#' Add HRI variables to an SLF Individual File
+#'
+#' @details Filters the dataset to only include Scottish residents, then
+#' creates a lookup where HRIs are calculated at Scotland, Health Board, and
+#' LCA level. Then joins on this lookup by chi/anon_chi.
+#'
+#' @param data An SLF individual file.
+#' @param slf_pc_lookup The Source postcode lookup, defaults
+#' to [get_slf_postcode_path()] read using [read_file()].
+#'
+#' @return The individual file with HRI variables matched on
+#' @export
+add_hri_variables <- function(
+    data,
+    chi_variable = "chi",
+    slf_pc_lookup = read_file(
+      get_slf_postcode_path(),
+      col_select = "postcode"
+    )) {
+  hri_lookup <- data %>%
+    dplyr::select(
+      "year",
+      chi_variable,
+      "postcode",
+      "gpprac",
+      "lca",
+      "hbrescode",
+      "health_net_cost",
+      "acute_episodes",
+      "mat_episodes",
+      "mh_episodes",
+      "gls_episodes",
+      "op_newcons_attendances",
+      # op_newcons_dnas,
+      "ae_attendances",
+      "pis_paid_items",
+      "ooh_cases"
+    ) %>%
+    flag_non_scottish_residents(slf_pc_lookup = slf_pc_lookup) %>%
+    dplyr::filter(scottish_resident == 0L) %>%
+    # Scotland cost and proportion
+    dplyr::mutate(
+      scotland_cost = sum(health_net_cost),
+      scotland_pct = (health_net_cost / scotland_cost) * 100
+    ) %>%
+    dplyr::arrange(dplyr::desc(health_net_cost)) %>%
+    dplyr::mutate(hri_scotp = cumsum(scotland_pct)) %>%
+    # Health Board
+    dplyr::group_by(hbrescode) %>%
+    dplyr::mutate(
+      hb_cost = sum(health_net_cost),
+      hb_pct = (health_net_cost / hb_cost) * 100
+    ) %>%
+    dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>%
+    dplyr::mutate(hri_hbp = cumsum(hb_pct)) %>%
+    dplyr::ungroup() %>%
+    # LCA
+    dplyr::group_by(lca) %>%
+    dplyr::mutate(
+      lca_cost = sum(health_net_cost),
+      lca_pct = (health_net_cost / lca_cost) * 100
+    ) %>%
+    dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>%
+    dplyr::mutate(hri_lcap = cumsum(lca_pct)) %>%
+    dplyr::ungroup() %>%
+    # Add HRI flags
+    dplyr::mutate(
+      hri_scot = hri_scotp <= 50.0,
+      hri_hb = hri_hbp <= 50.0,
+      hri_lca = hri_lcap <= 50.0,
+      # Deal with potential missing variables
+      hri_hb = dplyr::if_else(is_missing(hbrescode), FALSE, hri_hb),
+      hri_hbp = dplyr::if_else(is_missing(hbrescode), NA, hri_hbp),
+      hri_lca = dplyr::if_else(is_missing(lca), FALSE, hri_lca),
+      hri_lcap = dplyr::if_else(is_missing(lca), NA, hri_lcap)
+    ) %>%
+    # Select only required variables for the lookup
+    dplyr::select(
+      chi_variable,
+      "hri_scot",
+      "hri_scotp",
+      "hri_hb",
+      "hri_hbp",
+      "hri_lca",
+      "hri_lcap"
+    )
+
+  return_data <- dplyr::left_join(data, hri_lookup, by = chi_variable)
+
+  return(return_data)
+}
diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index c5a26da12..00260bb8e 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -2,13 +2,17 @@
 #'
 #' @param data The input data frame
 #' @param year The year being processed
+#' @param nsu_cohort The NSU data for the year
 #'
 #' @return A data frame containing the Non-Service Users as additional rows
 #' @export
 #'
 #' @family episode file
 #' @seealso [get_nsu_path()]
-add_nsu_cohort <- function(data, year) {
+add_nsu_cohort <- function(
+    data,
+    year,
+    nsu_cohort = read_file(get_nsu_path(year))) {
   year_param <- year
 
   if (!check_year_valid(year, "NSU")) {
@@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) {
     )
   )
 
-  matched <- dplyr::full_join(data,
-    # NSU cohort file
-    read_file(get_nsu_path(year)) %>%
+  matched <- dplyr::full_join(
+    data,
+    nsu_cohort %>%
       dplyr::mutate(
         dob = as.Date(.data[["dob"]]),
         gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
@@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) {
         .data[["chi"]]
       )
     ) %>%
-    # Remove the additional columns
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
   return(return_df)
diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R
index a6e9a175d..d0d0c4395 100644
--- a/R/add_ppa_flag.R
+++ b/R/add_ppa_flag.R
@@ -25,11 +25,11 @@ add_ppa_flag <- function(data) {
     )
   )
 
-  if (!(any(data$recid %in% c("01B", "02B", "04B", "GLS")))) {
-    nrecids <- length(unique(data$recid))
+  unique_recids <- unique(data[["recid"]])
+  if (!(any(unique_recids %in% c("01B", "02B", "04B", "GLS")))) {
     cli::cli_abort(
-      "None of the {nrecids} recid{?s} provided will relate to PPAs,
-      and the function will abort."
+      "None of the {length(unique_recids)} recid{?s} provided will relate
+      to PPAs, and the function will abort."
     )
   }
 
diff --git a/R/add_smr_type.R b/R/add_smr_type.R
index 180ea32c3..aa9e383bc 100644
--- a/R/add_smr_type.R
+++ b/R/add_smr_type.R
@@ -20,7 +20,7 @@ add_smr_type <- function(recid,
   # variable. Need to make sure to change all places where it is used as well.
 
   # Situation where some recids are not in the accepted values
-  if (any(!(recid %in% c(
+  if (!all(recid %in% c(
     "00B",
     "01B",
     "02B",
@@ -35,9 +35,7 @@ add_smr_type <- function(recid,
     "NRS",
     "OoH",
     "PIS"
-  )
-  )) &
-    !anyNA(recid)) {
+  )) && !anyNA(recid)) {
     cli::cli_warn(c("i" = "One or more values of {.var recid} do not have an
                    assignable {.var smrtype}"))
   }
@@ -51,7 +49,7 @@ add_smr_type <- function(recid,
   }
 
   # Situation where maternity records are present without a corresponding mpat
-  if (all(recid == "02B") & anyNA(mpat)) {
+  if (all(recid == "02B") && anyNA(mpat)) {
     cli::cli_abort(
       "In Maternity records, {.var mpat} is required to assign an smrtype,
                     and there are some {.val NA} values. Please check the data."
@@ -59,7 +57,7 @@ add_smr_type <- function(recid,
   }
 
   # Situation where acute records are present without a corresponding ipdc
-  if (all(recid %in% c("01B", "GLS")) & anyNA(ipdc)) {
+  if (all(recid %in% c("01B", "GLS")) && anyNA(ipdc)) {
     if (all(is.na(ipdc))) {
       cli::cli_abort(
         "In Acute records, {.var ipdc} is required to assign an smrtype, but
@@ -72,19 +70,21 @@ add_smr_type <- function(recid,
     )
   }
 
-  # Situation where Home Care records are present without a corresponding hc_service
-  if (all(recid == "HC") & anyNA(hc_service)) {
+  # Situation where Home Care records are present without
+  # a corresponding hc_service
+  if (all(recid == "HC") && anyNA(hc_service)) {
     cli::cli_abort(
       "In Home Care records, {.var hc_service} is required to assign an smrtype,
-                    and there are some {.val NA} values. Please check the data."
+      and there are some {.val NA} values. Please check the data."
     )
   }
 
-  # Situation where Homelessness records are present without a corresponding main_applicant_flag
-  if (all(recid == "HL1") & anyNA(main_applicant_flag)) {
+  # Situation where Homelessness records are present without a
+  # corresponding main_applicant_flag
+  if (all(recid == "HL1") && anyNA(main_applicant_flag)) {
     cli::cli_abort(
-      "In Homelessness records, {.var main_applicant_flag} is required to assign an smrtype,
-                    and there are some {.val NA} values. Please check the data."
+      "In Homelessness records, {.var main_applicant_flag} is required to assign
+      an smrtype, and there are some {.val NA} values. Please check the data."
     )
   }
 
@@ -92,12 +92,12 @@ add_smr_type <- function(recid,
   if (all(is.na(recid))) {
     cli::cli_abort(
       "Cannot assign {.var smrtype} when all {.var recid} are {.val NA},
-                   please check the data"
+      please check the data"
     )
   }
 
   # Situation where a maternity recid is given but no mpat marker
-  if (all(recid == "02B") & missing(mpat)) {
+  if (all(recid == "02B") && missing(mpat)) {
     cli::cli_abort(
       "An {.var mpat} vector has not been supplied, and therefore Maternity
                    records cannot be given an {.var smrtype}"
@@ -105,7 +105,7 @@ add_smr_type <- function(recid,
   }
 
   # Situation where an Acute/GLS recid is given but no ipdc marker
-  if (any(recid %in% c("01B", "GLS")) & missing(ipdc)) {
+  if (any(recid %in% c("01B", "GLS")) && missing(ipdc)) {
     cli::cli_abort(
       "An {.var ipdc} vector has not been supplied, and therefore Acute/GLS
                    records cannot be given an {.var smrtype}"
@@ -113,15 +113,16 @@ add_smr_type <- function(recid,
   }
 
   # Situation where a Home Care recid is given but no hc_service marker
-  if (any(recid == "HC") & missing(hc_service)) {
+  if (any(recid == "HC") && missing(hc_service)) {
     cli::cli_abort(
-      "An {.var hc_service} vector has not been supplied, and therefore Home Care
-                   records cannot be given an {.var smrtype}"
+      "An {.var hc_service} vector has not been supplied, and therefore
+      Home Care records cannot be given an {.var smrtype}"
     )
   }
 
-  # Situation where a Homelessness recid is given but no main_applicant_flag marker
-  if (any(recid == "HL1") & missing(main_applicant_flag)) {
+  # Situation where a Homelessness recid is given
+  # but no main_applicant_flag marker
+  if (any(recid == "HL1") && missing(main_applicant_flag)) {
     cli::cli_abort(
       "A {.var main_applicant_flag} vector has not been supplied, and therefore
                    Homelessness records cannot be given an {.var smrtype}"
@@ -158,28 +159,30 @@ add_smr_type <- function(recid,
       recid == "HL1" & main_applicant_flag == "N" ~ "HL1-Other"
     )
   } else if (all(recid == "OoH")) {
-    smrtype <- dplyr::case_when(
-      consultation_type == "DISTRICT NURSE" ~ "OOH-DN",
-      consultation_type == "DOCTOR ADVICE/NURSE ADVICE" ~ "OOH-Advice",
-      consultation_type == "HOME VISIT" ~ "OOH-HomeV",
-      consultation_type == "NHS 24 NURSE ADVICE" ~ "OOH-NHS24",
-      consultation_type == "PCEC/PCC" ~ "OOH-PCC",
-      consultation_type == "COVID19 ASSESSMENT" ~ "OOH-C19Ass",
-      consultation_type == "COVID19 ADVICE" ~ "OOH-C19Adv",
-      consultation_type == "COVID19 OTHER" ~ "OOH-C19Oth",
+    smrtype <- dplyr::case_match(
+      consultation_type,
+      "DISTRICT NURSE" ~ "OOH-DN",
+      "DOCTOR ADVICE/NURSE ADVICE" ~ "OOH-Advice",
+      "HOME VISIT" ~ "OOH-HomeV",
+      "NHS 24 NURSE ADVICE" ~ "OOH-NHS24",
+      "PCEC/PCC" ~ "OOH-PCC",
+      "COVID19 ASSESSMENT" ~ "OOH-C19Ass",
+      "COVID19 ADVICE" ~ "OOH-C19Adv",
+      "COVID19 OTHER" ~ "OOH-C19Oth",
       .default = "OOH-Other"
     )
   } else {
     # Recids that can be recoded with no identifier
-    smrtype <- dplyr::case_when(
-      recid == "00B" ~ "Outpatient",
-      recid == "04B" ~ "Psych-IP",
-      recid == "AE2" ~ "A & E",
-      recid == "CH" ~ "Care-Home",
-      recid == "CMH" ~ "Comm-MH",
-      recid == "DN" ~ "DN",
-      recid == "NRS" ~ "NRS Deaths",
-      recid == "PIS" ~ "PIS"
+    smrtype <- dplyr::case_match(
+      recid,
+      "00B" ~ "Outpatient",
+      "04B" ~ "Psych-IP",
+      "AE2" ~ "A & E",
+      "CH" ~ "Care-Home",
+      "CMH" ~ "Comm-MH",
+      "DN" ~ "DN",
+      "NRS" ~ "NRS Deaths",
+      "PIS" ~ "PIS"
     )
   }
 
diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi.R
similarity index 65%
rename from R/aggregate_by_chi_zihao.R
rename to R/aggregate_by_chi.R
index 7d9ce5ed3..8d9dff96d 100644
--- a/R/aggregate_by_chi_zihao.R
+++ b/R/aggregate_by_chi.R
@@ -7,7 +7,7 @@
 #' @importFrom data.table .SD
 #'
 #' @inheritParams create_individual_file
-aggregate_by_chi_zihao <- function(episode_file) {
+aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
   cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
 
   # Convert to data.table
@@ -28,17 +28,33 @@ aggregate_by_chi_zihao <- function(episode_file) {
     )
   )
 
-  data.table::setnames(
-    episode_file,
-    c(
-      "ch_chi_cis", "cij_marker", "ooh_case_id"
-      # ,"hh_in_fy"
-    ),
-    c(
-      "ch_cis_episodes", "cij_total", "ooh_cases"
-      # ,"hl1_in_fy"
+  if (exclude_sc_var) {
+    data.table::setnames(
+      episode_file,
+      c(
+        "cij_marker",
+        "ooh_case_id"
+      ),
+      c(
+        "cij_total",
+        "ooh_cases"
+      )
     )
-  )
+  } else {
+    data.table::setnames(
+      episode_file,
+      c(
+        "ch_chi_cis",
+        "cij_marker",
+        "ooh_case_id"
+      ),
+      c(
+        "ch_cis_episodes",
+        "cij_total",
+        "ooh_cases"
+      )
+    )
+  }
 
   # column specification, grouped by chi
   # columns to select last
@@ -48,6 +64,9 @@ aggregate_by_chi_zihao <- function(episode_file) {
     "gpprac",
     vars_start_with(episode_file, "sc_")
   )
+  if (exclude_sc_var) {
+    cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))]
+  }
   # columns to count unique rows
   cols3 <- c(
     "ch_cis_episodes",
@@ -59,6 +78,9 @@ aggregate_by_chi_zihao <- function(episode_file) {
     "ooh_cases",
     "preventable_admissions"
   )
+  if (exclude_sc_var) {
+    cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")]
+  }
   # columns to sum up
   cols4 <- c(
     vars_end_with(
@@ -90,9 +112,25 @@ aggregate_by_chi_zihao <- function(episode_file) {
     ),
     "health_net_cost_inc_dnas"
   )
-  cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))]
+  cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
+  if (exclude_sc_var) {
+    cols4 <-
+      cols4[!(cols4 %in% c(
+        vars_end_with(
+          episode_file,
+          c(
+            "alarms",
+            "telecare"
+          )
+        ),
+        vars_start_with(
+          episode_file,
+          "sds_option"
+        )
+      ))]
+  }
   # columns to select maximum
-  cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy")))
+  cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
   data.table::setnafill(episode_file, fill = 0L, cols = cols5)
   # compute
   individual_file_cols1 <- episode_file[,
@@ -126,9 +164,9 @@ aggregate_by_chi_zihao <- function(episode_file) {
   individual_file_cols6 <- episode_file[,
     .(
       preventable_beddays = ifelse(
-        max(cij_ppa, na.rm = TRUE),
-        max(cij_end_date) - min(cij_start_date),
-        NA_real_
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
       )
     ),
     # cij_marker has been renamed as cij_total
@@ -155,8 +193,13 @@ aggregate_by_chi_zihao <- function(episode_file) {
 }
 
 
-#' select columns ending with some patterns
-#' @describeIn select columns based on patterns
+#' Select columns according to a pattern
+#'
+#' @describeIn vars_select Choose variables ending in a given pattern.
+#'
+#' @param data The data from which to select columns/variables.
+#' @param vars The variables / pattern to find, as a character vector
+#' @param ignore_case Should case be ignored (Default: FALSE)
 vars_end_with <- function(data, vars, ignore_case = FALSE) {
   names(data)[stringr::str_ends(
     names(data),
@@ -166,8 +209,7 @@ vars_end_with <- function(data, vars, ignore_case = FALSE) {
   )]
 }
 
-#' select columns starting with some patterns
-#' @describeIn select columns based on patterns
+#' @describeIn vars_select Choose variables starting with a given pattern.
 vars_start_with <- function(data, vars, ignore_case = FALSE) {
   names(data)[stringr::str_starts(
     names(data),
@@ -177,35 +219,41 @@ vars_start_with <- function(data, vars, ignore_case = FALSE) {
   )]
 }
 
-#' select columns contains some characters
-#' @describeIn select columns based on patterns
+#' @describeIn vars_select Choose variables which contain a given pattern.
 vars_contain <- function(data, vars, ignore_case = FALSE) {
-  names(data)[stringr::str_detect(
+  stringr::str_subset(
     names(data),
     stringr::regex(paste(vars, collapse = "|"),
       ignore_case = ignore_case
     )
-  )]
+  )
 }
 
-#' Aggregate CIS episodes
+#' Aggregate Care Home episodes to ch_cis
 #'
 #' @description Aggregate CH variables by CHI and CIS.
 #'
 #' @inheritParams create_individual_file
-aggregate_ch_episodes_zihao <- function(episode_file) {
+aggregate_ch_episodes <- function(episode_file) {
   cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")
 
   # Convert to data.table
   data.table::setDT(episode_file)
 
   # Perform grouping and aggregation
-  episode_file <- episode_file[, `:=`(
-    ch_no_cost = max(ch_no_cost),
-    ch_ep_start = min(record_keydate1),
-    ch_ep_end = max(ch_ep_end),
-    ch_cost_per_day = mean(ch_cost_per_day)
-  ), by = c("chi", "ch_chi_cis")]
+  episode_file[, c(
+    "ch_no_cost",
+    "ch_ep_start",
+    "ch_ep_end",
+    "ch_cost_per_day"
+  ) := list(
+    max(ch_no_cost),
+    min(record_keydate1),
+    max(ch_ep_end),
+    mean(ch_cost_per_day)
+  ),
+  by = c("chi", "ch_chi_cis")
+  ]
 
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)
diff --git a/R/calculate_measures.R b/R/calculate_measures.R
index 4f23c1f6d..a8b7510b8 100644
--- a/R/calculate_measures.R
+++ b/R/calculate_measures.R
@@ -27,7 +27,7 @@ calculate_measures <- function(
   measure <- match.arg(measure)
 
   if (!is.null(group_by)) {
-    group_by <- match.arg(group_by, c("recid"))
+    group_by <- match.arg(group_by, "recid")
 
     if (group_by == "recid") {
       data <- data %>%
diff --git a/R/check_variables_exist.R b/R/check_variables_exist.R
index 6effdffd2..82bccaf4f 100644
--- a/R/check_variables_exist.R
+++ b/R/check_variables_exist.R
@@ -24,10 +24,8 @@ check_variables_exist <- function(data, variables) {
   } else {
     missing_variables <- variables[which(!variables_present)]
 
-    n_missing <- length(missing_variables)
-
     cli::cli_abort(
-      "{cli::qty(n_missing)}Variable{?s} {.val {missing_variables}} {?is/are}
+      "{cli::qty(length(missing_variables))}Variable{?s} {.val {missing_variables}} {?is/are}
       required, but {?is/are} missing from {.arg data}."
     )
   }
diff --git a/R/check_year_format.R b/R/check_year_format.R
index 8fcb29aab..2fa2dedfc 100644
--- a/R/check_year_format.R
+++ b/R/check_year_format.R
@@ -28,8 +28,8 @@ check_year_format <- function(year, format = "fyyear") {
 
   format <- match.arg(arg = format, choices = c("fyyear", "alternate"))
 
-  first_part <- as.integer(substr(year, 1L, 2L))
-  second_part <- as.integer(substr(year, 3L, 4L))
+  first_part <- as.integer(stringr::str_sub(year, 1L, 2L))
+  second_part <- as.integer(stringr::str_sub(year, 3L, 4L))
 
   if (format == "fyyear") {
     if (any(first_part + 1L != second_part)) {
@@ -40,7 +40,7 @@ check_year_format <- function(year, format = "fyyear") {
       ))
     }
   } else if (format == "alternate") {
-    if (any(!(first_part %in% 18L:20L))) {
+    if (!all(first_part %in% 18L:20L)) {
       cli::cli_abort(c(
         "The {.var year} has been entered in the wrong format.",
         "Try again using the alternate form, e.g. {.val 2017}",
@@ -51,9 +51,11 @@ check_year_format <- function(year, format = "fyyear") {
       count_bad_values <- sum(possible_bad_values)
 
       cli::cli_warn(c(
-        "{cli::qty(count_bad_values)}{?A/Some} {.var year} value{?s} ha{?s/ve} likely been entered in the wrong format.",
+        "{cli::qty(count_bad_values)}{?A/Some} {.var year} value{?s} ha{?s/ve}
+        likely been entered in the wrong format.",
         "i" = "{.val {year[possible_bad_values]}}",
-        "You might want to check and try again using the alternate form, e.g. {.val 2017}",
+        "You might want to check and try again using the alternate form,
+        e.g. {.val 2017}",
         "Or use the function {.fun convert_fyyear_to_year}."
       ))
     }
diff --git a/R/check_year_valid.R b/R/check_year_valid.R
index 9f496dc05..5491709f0 100644
--- a/R/check_year_valid.R
+++ b/R/check_year_valid.R
@@ -38,14 +38,16 @@ check_year_valid <- function(
     return(FALSE)
   } else if (year <= "1617" && type %in% c("CH", "HC", "SDS", "AT")) {
     return(FALSE)
-  } else if (year <= "1718" && type %in% c("HHG")) {
+  } else if (year <= "1718" && type %in% "HHG") {
     return(FALSE)
   } else if (year >= "2122" && type %in% c("CMH", "DN")) {
     return(FALSE)
-  } else if (year >= "2223" && type %in% c("NSU")) {
+  } else if (year >= "2324" && type %in% "NSU") {
     return(FALSE)
   } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) {
     return(FALSE)
+  } else if (year >= "2324" && type %in% c("CH", "HC", "SDS", "AT")) {
+    return(FALSE)
   }
 
   return(TRUE)
diff --git a/R/clean_up_free_text.R b/R/clean_up_free_text.R
index fb9e6ae51..d74a2fa80 100644
--- a/R/clean_up_free_text.R
+++ b/R/clean_up_free_text.R
@@ -14,9 +14,10 @@
 #' @export
 #' @examples
 #' clean_up_free_text("hiwSDS SD. h")
-clean_up_free_text <- function(string,
-                               case_to = c("upper", "lower", "sentence", "title", "none"),
-                               remove_punct = TRUE) {
+clean_up_free_text <- function(
+    string,
+    case_to = c("upper", "lower", "sentence", "title", "none"),
+    remove_punct = TRUE) {
   if (missing(case_to)) case_to <- "title"
 
   case_to <- match.arg(case_to)
diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R
index 4db1632d0..0e2483cf7 100644
--- a/R/compute_mid_year_age.R
+++ b/R/compute_mid_year_age.R
@@ -1,6 +1,7 @@
 #' Compute Age at Midpoint of Year
 #'
-#' @description Compute the age of a client at the midpoint of the year - 30-09-YYYY
+#' @description Compute the age of a client at the midpoint of the year -
+#' 30-09-YYYY
 #'
 #' @param fyyear current financial year
 #' @param dob date of birth of the clients
@@ -19,7 +20,7 @@
 compute_mid_year_age <- function(fyyear, dob) {
   age_intervals <- lubridate::interval(
     start = dob,
-    end = as.Date(midpoint_fy(fyyear))
+    end = midpoint_fy(fyyear)
   )
 
   ages <- lubridate::as.period(age_intervals)$year
diff --git a/R/convert_ca_to_lca.R b/R/convert_ca_to_lca.R
index b1537ef11..518d7e8fb 100644
--- a/R/convert_ca_to_lca.R
+++ b/R/convert_ca_to_lca.R
@@ -14,39 +14,40 @@
 #' @family code functions
 #' @seealso convert_sending_location_to_lca
 convert_ca_to_lca <- function(ca_var) {
-  lca <- dplyr::case_when(
-    ca_var == "S12000033" | ca_var == "Aberdeen City" ~ "01",
-    ca_var == "S12000034" | ca_var == "Aberdeenshire" ~ "02",
-    ca_var == "S12000041" | ca_var == "Angus" ~ "03",
-    ca_var == "S12000035" | ca_var == "Argyll & Bute" ~ "04",
-    ca_var == "S12000026" | ca_var == "Scottish Borders" ~ "05",
-    ca_var == "S12000005" | ca_var == "Clackmannanshire" ~ "06",
-    ca_var == "S12000039" | ca_var == "West Dunbartonshire" ~ "07",
-    ca_var == "S12000006" | ca_var == "Dumfries and Galloway" ~ "08",
-    ca_var == "S12000042" | ca_var == "Dundee City" ~ "09",
-    ca_var == "S12000008" | ca_var == "East Ayrshire" ~ "10",
-    ca_var == "S12000045" | ca_var == "East Dunbartonshire" ~ "11",
-    ca_var == "S12000010" | ca_var == "East Lothian" ~ "12",
-    ca_var == "S12000011" | ca_var == "East Renfrewshire" ~ "13",
-    ca_var == "S12000036" | ca_var == "City of Edinburgh" ~ "14",
-    ca_var == "S12000014" | ca_var == "Falkirk" ~ "15",
-    ca_var %in% c("S12000015", "S12000047") | ca_var == "Fife" ~ "16",
-    ca_var %in% c("S12000046", "S12000049") | ca_var == "Glasgow City" ~ "17",
-    ca_var == "S12000017" | ca_var == "Highland" ~ "18",
-    ca_var == "S12000018" | ca_var == "Inverclyde" ~ "19",
-    ca_var == "S12000019" | ca_var == "Midlothian" ~ "20",
-    ca_var == "S12000020" | ca_var == "Moray" ~ "21",
-    ca_var == "S12000021" | ca_var == "North Ayrshire" ~ "22",
-    ca_var %in% c("S12000044", "S12000050") | ca_var == "North Lanarkshire" ~ "23",
-    ca_var == "S12000023" | ca_var == "Orkney" ~ "24",
-    ca_var %in% c("S12000024", "S12000048") | ca_var == "Perth and Kinross" ~ "25",
-    ca_var == "S12000038" | ca_var == "Renfrewshire" ~ "26",
-    ca_var == "S12000027" | ca_var == "Shetland Islands" ~ "27",
-    ca_var == "S12000028" | ca_var == "South Ayrshire" ~ "28",
-    ca_var == "S12000029" | ca_var == "South Lanarkshire" ~ "29",
-    ca_var == "S12000030" | ca_var == "Stirling" ~ "30",
-    ca_var == "S12000040" | ca_var == "West Lothian" ~ "31",
-    ca_var == "S12000013" | ca_var == "Na h-Eileanan Siar" | ca_var == "Comhairle nan Eilean Siar" ~ "32"
+  lca <- dplyr::case_match(
+    ca_var,
+    c("S12000033", "Aberdeen City") ~ "01",
+    c("S12000034", "Aberdeenshire") ~ "02",
+    c("S12000041", "Angus") ~ "03",
+    c("S12000035", "Argyll & Bute") ~ "04",
+    c("S12000026", "Scottish Borders") ~ "05",
+    c("S12000005", "Clackmannanshire") ~ "06",
+    c("S12000039", "West Dunbartonshire") ~ "07",
+    c("S12000006", "Dumfries and Galloway") ~ "08",
+    c("S12000042", "Dundee City") ~ "09",
+    c("S12000008", "East Ayrshire") ~ "10",
+    c("S12000045", "East Dunbartonshire") ~ "11",
+    c("S12000010", "East Lothian") ~ "12",
+    c("S12000011", "East Renfrewshire") ~ "13",
+    c("S12000036", "City of Edinburgh") ~ "14",
+    c("S12000014", "Falkirk") ~ "15",
+    c("S12000015", "S12000047", "Fife") ~ "16",
+    c("S12000046", "S12000049", "Glasgow City") ~ "17",
+    c("S12000017", "Highland") ~ "18",
+    c("S12000018", "Inverclyde") ~ "19",
+    c("S12000019", "Midlothian") ~ "20",
+    c("S12000020", "Moray") ~ "21",
+    c("S12000021", "North Ayrshire") ~ "22",
+    c("S12000044", "S12000050", "North Lanarkshire") ~ "23",
+    c("S12000023", "Orkney") ~ "24",
+    c("S12000024", "S12000048", "Perth and Kinross") ~ "25",
+    c("S12000038", "Renfrewshire") ~ "26",
+    c("S12000027", "Shetland Islands") ~ "27",
+    c("S12000028", "South Ayrshire") ~ "28",
+    c("S12000029", "South Lanarkshire") ~ "29",
+    c("S12000030", "Stirling") ~ "30",
+    c("S12000040", "West Lothian") ~ "31",
+    c("S12000013", "Na h-Eileanan Siar", "Comhairle nan Eilean Siar") ~ "32"
   )
   return(lca)
 }
diff --git a/R/convert_codes_to_name.R b/R/convert_codes_to_name.R
index 4d6fd6b67..2b44109fd 100644
--- a/R/convert_codes_to_name.R
+++ b/R/convert_codes_to_name.R
@@ -14,38 +14,39 @@
 #'
 #' @family code functions
 convert_hscp_to_hscpnames <- function(hscp) {
-  hscpnames <- dplyr::case_when(
-    hscp == "S37000001" ~ "Aberdeen City",
-    hscp == "S37000002" ~ "Aberdeenshire",
-    hscp == "S37000003" ~ "Angus",
-    hscp == "S37000004" ~ "Argyll and Bute",
-    hscp == "S37000005" ~ "Clackmannanshire and Stirling",
-    hscp == "S37000006" ~ "Dumfries and Galloway",
-    hscp == "S37000007" ~ "Dundee City",
-    hscp == "S37000008" ~ "East Ayrshire",
-    hscp == "S37000009" ~ "East Dunbartonshire",
-    hscp == "S37000010" ~ "East Lothian",
-    hscp == "S37000011" ~ "East Renfrewshire",
-    hscp == "S37000012" ~ "Edinburgh",
-    hscp == "S37000013" ~ "Falkirk",
-    hscp == "S37000016" ~ "Highland",
-    hscp == "S37000017" ~ "Inverclyde",
-    hscp == "S37000018" ~ "Midlothian",
-    hscp == "S37000019" ~ "Moray",
-    hscp == "S37000020" ~ "North Ayrshire",
-    hscp == "S37000022" ~ "Orkney Islands",
-    hscp == "S37000024" ~ "Renfrewshire",
-    hscp == "S37000025" ~ "Scottish Borders",
-    hscp == "S37000026" ~ "Shetland Islands",
-    hscp == "S37000027" ~ "South Ayrshire",
-    hscp == "S37000028" ~ "South Lanarkshire",
-    hscp == "S37000029" ~ "West Dunbartonshire",
-    hscp == "S37000030" ~ "West Lothian",
-    hscp == "S37000031" ~ "Western Isles",
-    hscp == "S37000032" ~ "Fife",
-    hscp == "S37000033" ~ "Perth and Kinross",
-    hscp == "S37000034" ~ "Glasgow City",
-    hscp == "S37000035" ~ "North Lanarkshire"
+  hscpnames <- dplyr::case_match(
+    hscp,
+    "S37000001" ~ "Aberdeen City",
+    "S37000002" ~ "Aberdeenshire",
+    "S37000003" ~ "Angus",
+    "S37000004" ~ "Argyll and Bute",
+    "S37000005" ~ "Clackmannanshire and Stirling",
+    "S37000006" ~ "Dumfries and Galloway",
+    "S37000007" ~ "Dundee City",
+    "S37000008" ~ "East Ayrshire",
+    "S37000009" ~ "East Dunbartonshire",
+    "S37000010" ~ "East Lothian",
+    "S37000011" ~ "East Renfrewshire",
+    "S37000012" ~ "Edinburgh",
+    "S37000013" ~ "Falkirk",
+    "S37000016" ~ "Highland",
+    "S37000017" ~ "Inverclyde",
+    "S37000018" ~ "Midlothian",
+    "S37000019" ~ "Moray",
+    "S37000020" ~ "North Ayrshire",
+    "S37000022" ~ "Orkney Islands",
+    "S37000024" ~ "Renfrewshire",
+    "S37000025" ~ "Scottish Borders",
+    "S37000026" ~ "Shetland Islands",
+    "S37000027" ~ "South Ayrshire",
+    "S37000028" ~ "South Lanarkshire",
+    "S37000029" ~ "West Dunbartonshire",
+    "S37000030" ~ "West Lothian",
+    "S37000031" ~ "Western Isles",
+    "S37000032" ~ "Fife",
+    "S37000033" ~ "Perth and Kinross",
+    "S37000034" ~ "Glasgow City",
+    "S37000035" ~ "North Lanarkshire"
   )
   return(hscpnames)
 }
@@ -66,22 +67,22 @@ convert_hscp_to_hscpnames <- function(hscp) {
 #'
 #' @family code functions
 convert_hb_to_hbnames <- function(hb) {
-  hbnames <- dplyr::case_when(
-    hb == "S08000015" ~ "Ayrshire and Arran",
-    hb == "S08000016" ~ "Borders",
-    hb == "S08000017" ~ "Dumfries and Galloway",
-    hb == "S08000019" ~ "Forth Valley",
-    hb == "S08000020" ~ "Grampian",
-    hb == "S08000022" ~ "Highland",
-    hb == "S08000024" ~ "Lothian",
-    hb == "S08000025" ~ "Orkney",
-    hb == "S08000026" ~ "Shetland",
-    hb == "S08000028" ~ "Western Isles",
-    hb == "S08000029" ~ "Fife",
-    hb == "S08000030" ~ "Tayside",
-    hb == "S08000031" ~ "Greater Glasgow and Clyde",
-    hb == "S08000032" ~ "Lanarkshire"
+  hbnames <- dplyr::case_match(
+    hb,
+    "S08000015" ~ "Ayrshire and Arran",
+    "S08000016" ~ "Borders",
+    "S08000017" ~ "Dumfries and Galloway",
+    "S08000019" ~ "Forth Valley",
+    "S08000020" ~ "Grampian",
+    "S08000022" ~ "Highland",
+    "S08000024" ~ "Lothian",
+    "S08000025" ~ "Orkney",
+    "S08000026" ~ "Shetland",
+    "S08000028" ~ "Western Isles",
+    "S08000029" ~ "Fife",
+    "S08000030" ~ "Tayside",
+    "S08000031" ~ "Greater Glasgow and Clyde",
+    "S08000032" ~ "Lanarkshire"
   )
-
   return(hbnames)
 }
diff --git a/R/convert_date_types.R b/R/convert_date_types.R
index a008b73f4..4402753a3 100644
--- a/R/convert_date_types.R
+++ b/R/convert_date_types.R
@@ -29,5 +29,9 @@ convert_date_to_numeric <- function(date) {
 #'
 #' @family date functions
 convert_numeric_to_date <- function(numeric_date) {
-  as.Date(lubridate::fast_strptime(as.character(numeric_date), "%Y%m%d", tz = "UTC"))
+  as.Date(lubridate::fast_strptime(
+    x = as.character(numeric_date),
+    format = "%Y%m%d",
+    tz = "UTC"
+  ))
 }
diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
index 21d14b676..ff7e51db1 100644
--- a/R/convert_sending_location_to_lca.R
+++ b/R/convert_sending_location_to_lca.R
@@ -9,46 +9,49 @@
 #' @export
 #'
 #' @examples
-#' sending_location <- c("100", "120")
+#' sending_location <- c(100, 120)
 #' convert_sending_location_to_lca(sending_location)
 #'
 #' @family code functions
 #'
 #' @seealso convert_ca_to_lca
 convert_sending_location_to_lca <- function(sending_location) {
-  lca <- dplyr::case_when(
-    sending_location == "100" ~ "01", # Aberdeen City
-    sending_location == "110" ~ "02", # Aberdeenshire
-    sending_location == "120" ~ "03", # Angus
-    sending_location == "130" ~ "04", # Argyll and Bute
-    sending_location == "355" ~ "05", # Scottish Borders
-    sending_location == "150" ~ "06", # Clackmannanshire
-    sending_location == "395" ~ "07", # West Dumbartonshire
-    sending_location == "170" ~ "08", # Dumfries and Galloway
-    sending_location == "180" ~ "09", # Dundee City
-    sending_location == "190" ~ "10", # East Ayrshire
-    sending_location == "200" ~ "11", # East Dunbartonshire
-    sending_location == "210" ~ "12", # East Lothian
-    sending_location == "220" ~ "13", # East Renfrewshire
-    sending_location == "230" ~ "14", # City of Edinburgh
-    sending_location == "240" ~ "15", # Falkirk
-    sending_location == "250" ~ "16", # Fife
-    sending_location == "260" ~ "17", # Glasgow City
-    sending_location == "270" ~ "18", # Highland
-    sending_location == "280" ~ "19", # Inverclyde
-    sending_location == "290" ~ "20", # Midlothian
-    sending_location == "300" ~ "21", # Moray
-    sending_location == "310" ~ "22", # North Ayrshire
-    sending_location == "320" ~ "23", # North Lanarkshire
-    sending_location == "330" ~ "24", # Orkney Islands
-    sending_location == "340" ~ "25", # Perth and Kinross
-    sending_location == "350" ~ "26", # Renfrewshire
-    sending_location == "360" ~ "27", # Shetland Islands
-    sending_location == "370" ~ "28", # South Ayrshire
-    sending_location == "380" ~ "29", # South Lanarkshire
-    sending_location == "390" ~ "30", # Stirling
-    sending_location == "400" ~ "31", # West Lothian
-    sending_location == "235" ~ "32" # Na_h_Eileanan_Siar
+  lca <- dplyr::case_match(
+    sending_location,
+    100L ~ "01", # Aberdeen City
+    110L ~ "02", # Aberdeenshire
+    120L ~ "03", # Angus
+    130L ~ "04", # Argyll and Bute
+    355L ~ "05", # Scottish Borders
+    150L ~ "06", # Clackmannanshire
+    395L ~ "07", # West Dunbartonshire
+    170L ~ "08", # Dumfries and Galloway
+    180L ~ "09", # Dundee City
+    190L ~ "10", # East Ayrshire
+    200L ~ "11", # East Dunbartonshire
+    210L ~ "12", # East Lothian
+    220L ~ "13", # East Renfrewshire
+    230L ~ "14", # City of Edinburgh
+    240L ~ "15", # Falkirk
+    250L ~ "16", # Fife
+    260L ~ "17", # Glasgow City
+    270L ~ "18", # Highland
+    280L ~ "19", # Inverclyde
+    290L ~ "20", # Midlothian
+    300L ~ "21", # Moray
+    310L ~ "22", # North Ayrshire
+    320L ~ "23", # North Lanarkshire
+    330L ~ "24", # Orkney Islands
+    340L ~ "25", # Perth and Kinross
+    350L ~ "26", # Renfrewshire
+    360L ~ "27", # Shetland Islands
+    370L ~ "28", # South Ayrshire
+    380L ~ "29", # South Lanarkshire
+    390L ~ "30", # Stirling
+    400L ~ "31", # West Lothian
+    235L ~ "32", # Na_h_Eileanan_Siar
+    .default = NA_character_
   )
+
   return(lca)
 }
diff --git a/R/convert_year_types.R b/R/convert_year_types.R
index 1ba904e3d..8b9b04265 100644
--- a/R/convert_year_types.R
+++ b/R/convert_year_types.R
@@ -16,7 +16,7 @@
 convert_fyyear_to_year <- function(fyyear) {
   fyyear <- check_year_format(year = fyyear, format = "fyyear")
 
-  year <- paste0("20", substr(fyyear, 1L, 2L))
+  year <- paste0("20", stringr::str_sub(fyyear, 1L, 2L))
 
   return(year)
 }
@@ -39,8 +39,8 @@ convert_fyyear_to_year <- function(fyyear) {
 convert_year_to_fyyear <- function(year) {
   year <- check_year_format(year = year, format = "alternate")
 
-  first_part <- substr(year, 1L, 2L)
-  second_part <- substr(year, 3L, 4L)
+  first_part <- stringr::str_sub(year, 1L, 2L)
+  second_part <- stringr::str_sub(year, 3L, 4L)
 
   fyyear <-
     dplyr::if_else(
@@ -53,7 +53,8 @@ convert_year_to_fyyear <- function(year) {
     non_21c <- which(first_part != "20")
 
     cli::cli_warn(c(
-      "i" = "{cli::qty(length(non_21c))}{?A/Some} value{?s} w{?as/ere} not in the 21st century i.e. not {.val 20xx}",
+      "i" = "{cli::qty(length(non_21c))}{?A/Some} value{?s} w{?as/ere}
+      not in the 21st century i.e. not {.val 20xx}",
       "This may have produced unexpected results, specifically:",
       "*" = "{.val {year[non_21c]}} -> {.val {fyyear[non_21c]}}"
     ))
diff --git a/R/correct_demographics.R b/R/correct_demographics.R
index 67bb39abe..d7ef6f469 100644
--- a/R/correct_demographics.R
+++ b/R/correct_demographics.R
@@ -59,13 +59,13 @@ correct_demographics <- function(data, year) {
           `min`
         ) ~ chi_dob_min,
         # If they have a GLS record and the age is broadly correct, assume older
-        dplyr::between(chi_age_max, 50, 130) &
+        dplyr::between(chi_age_max, 50L, 130L) &
           recid == "GLS" ~ chi_dob_min,
-        # If a congenital defect lines up with a dob, assume it is correct
+        # If a congenital defect lines up with a DoB, assume it is correct
         chi_dob_max == congen_date ~ chi_dob_max,
         chi_dob_min == congen_date ~ chi_dob_min,
         # If being older makes them over 113, assume they are younger
-        chi_age_max > 113 ~ chi_dob_max
+        chi_age_max > 113L ~ chi_dob_max
       )
     ) %>%
     # If we still don't have an age, try and fill it in from other records.
@@ -74,7 +74,7 @@ correct_demographics <- function(data, year) {
     dplyr::ungroup() %>%
     # Fill in the ages for any that are left.
     dplyr::mutate(
-      age = compute_mid_year_age(year, .data$dob),
+      age = compute_mid_year_age(year, .data$dob)
     ) %>%
     # Fill in gender from CHI if it's missing.
     dplyr::mutate(
diff --git a/R/cost_uplift.R b/R/cost_uplift.R
index 04bd9917f..2bb1d4c1f 100644
--- a/R/cost_uplift.R
+++ b/R/cost_uplift.R
@@ -35,15 +35,20 @@ apply_cost_uplift <- function(data) {
 #'
 #' @return episode data with a uplift scale
 lookup_uplift <- function(data) {
-  # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, provided by Paul Leak.
+  # We have set uplifts to use for 2020/21, 2021/22 and 2022/23,
+  # provided by Paul Leak.
   # For older years, don't uplift.
-  # For years after 2022/23 uplift by an additional 1% per year after the latest cost year (2022/23)
-  # For non plics recids use uplift of 1 so we won't change anything.
+  # For years after 2022/23 uplift by an additional 1% per year after the latest
+  # cost year (2022/23)
+  # For non PLICS recids use uplift of 1 so we won't change anything.
 
   # to accelerate, create a data frame of year and uplift for match-joining
   start_year <- 10L
   end_year <- as.integer(format(Sys.Date(), "%y"))
-  year <- paste0(start_year:end_year, (start_year + 1):(end_year + 1)) %>% as.integer()
+  year <- as.integer(paste0(
+    start_year:end_year,
+    (start_year + 1L):(end_year + 1L)
+  ))
   uplift_df <- tibble::tibble(year,
     uplift = 1.0
   ) %>%
@@ -52,25 +57,27 @@ lookup_uplift <- function(data) {
 
   uplift_df <- uplift_df %>%
     dplyr::mutate(uplift = dplyr::case_when(
-      # We have set uplifts to use for 2020/21, 2021/22 and 2022/23, provided by Paul Leak.
+      # We have set uplifts to use for 2020/21, 2021/22 and 2022/23,
+      # provided by Paul Leak.
       year == 2021L ~ 1.015,
       year == 2122L ~ 1.015 * 1.041,
       year == 2223L ~ 1.015 * 1.041 * 1.062,
-      # For years after 2022/23 uplift by an additional 1% per year after the latest cost year (2022/23)
+      # For years after 2022/23 uplift by an additional 1% per year after
+      # the latest cost year (2022/23)
       year > as.integer(latest_cost_year()) ~ (1.015 * 1.041 * 1.062) * (1.01^(.data$row_no - latest_cost_year_row)),
       # For older years, don't uplift.
-      TRUE ~ 1
+      .default = 1.0
     )) %>%
     dplyr::mutate(year = as.character(.data$year)) %>%
     dplyr::select(-"row_no")
 
   data <- data %>%
     dplyr::left_join(uplift_df, by = "year") %>%
-    # For non plics recids use uplift of 1 so we won't change anything.
+    # For non PLICS recids use uplift of 1 so we won't change anything.
     dplyr::mutate(uplift = dplyr::if_else(
       .data$recid %in% c("00B", "01B", "GLS", "02B", "04B", "AE2"),
       .data$uplift,
-      1
+      1.0
     ))
 
   return(data)
diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R
index 0968eec06..3023292ce 100644
--- a/R/create_demog_test_flags.R
+++ b/R/create_demog_test_flags.R
@@ -13,45 +13,13 @@ create_demog_test_flags <- function(data) {
     dplyr::arrange(.data$chi) %>%
     # create test flags
     dplyr::mutate(
-      valid_chi = dplyr::if_else(
-        phsmethods::chi_check(.data$chi) == "Valid CHI",
-        1L,
-        0L
-      ),
-      unique_chi = dplyr::if_else(
-        dplyr::lag(.data$chi) != .data$chi,
-        1L,
-        0L
-      ),
-      n_missing_chi = dplyr::if_else(
-        is_missing(.data$chi),
-        1L,
-        0L
-      ),
-      n_males = dplyr::if_else(
-        .data$gender == 1L,
-        1L,
-        0L
-      ),
-      n_females = dplyr::if_else(
-        .data$gender == 2L,
-        1L,
-        0L
-      ),
-      # n_postcode = dplyr::if_else(
-      #   is.na(.data$postcode) | .data$postcode == "",
-      #   0L,
-      #   1L
-      # ),
-      # n_missing_postcode = dplyr::if_else(
-      #   is_missing(.data$postcode),
-      #   1L,
-      #   0L
-      # ),
-      missing_dob = dplyr::if_else(
-        is.na(.data$dob),
-        1L,
-        0L
-      )
+      valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI",
+      unique_chi = dplyr::lag(.data$chi) != .data$chi,
+      n_missing_chi = is_missing(.data$chi),
+      n_males = .data$gender == 1L,
+      n_females = .data$gender == 2L,
+      n_postcode = !is.na(.data$postcode) | !.data$postcode == "",
+      n_missing_postcode = is_missing(.data$postcode),
+      missing_dob = is.na(.data$dob)
     )
 }
diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R
index dfc2e25cf..2b252a151 100644
--- a/R/create_demographic_lookup.R
+++ b/R/create_demographic_lookup.R
@@ -348,7 +348,7 @@ assign_d_cohort_high_cc <- function(dementia,
     # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y",
     # then high_cc_cohort = TRUE
     # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65
-    rowSums(dplyr::across(c(
+    rowSums(dplyr::pick(c(
       "dementia",
       "hefailure",
       "refailure",
@@ -374,7 +374,7 @@ assign_d_cohort_high_cc <- function(dementia,
 #' @family Demographic and Service Use Cohort functions
 assign_d_cohort_medium_cc <- function(cvd, copd, chd, parkinsons, ms) {
   medium_cc <-
-    rowSums(dplyr::across(c(
+    rowSums(dplyr::pick(c(
       "cvd",
       "copd",
       "chd",
@@ -403,7 +403,7 @@ assign_d_cohort_low_cc <- function(epilepsy,
                                    diabetes,
                                    atrialfib) {
   low_cc <-
-    rowSums(dplyr::across(c(
+    rowSums(dplyr::pick(c(
       "epilepsy",
       "asthma",
       "arth",
@@ -596,12 +596,12 @@ assign_d_cohort_substance <- function(data) {
       f11 = .data$recid %in% c("01B", "04B") &
         rowSums(dplyr::across(
           c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"),
-          ~ stringr::str_sub(.x, 1L, 3L) %in% c("F11")
+          ~ stringr::str_sub(.x, 1L, 3L) %in% "F11"
         )) > 0L,
       f13 = .data$recid %in% c("01B", "04B") &
         rowSums(dplyr::across(
           c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"),
-          ~ stringr::str_sub(.x, 1L, 3L) %in% c("F13")
+          ~ stringr::str_sub(.x, 1L, 3L) %in% "F13"
         )) > 0L,
       t402_t404 = .data$recid %in% c("01B", "04B") &
         rowSums(dplyr::across(
@@ -611,13 +611,13 @@ assign_d_cohort_substance <- function(data) {
       t424 = .data$recid %in% c("01B", "04B") &
         rowSums(dplyr::across(
           c("diag1", "diag2", "diag3", "diag4", "diag5", "diag6"),
-          ~ stringr::str_sub(.x, 1L, 4L) %in% c("T424")
+          ~ stringr::str_sub(.x, 1L, 4L) %in% "T424"
         )) > 0L
     ) %>%
     # Aggregate to CIJ level
     dplyr::group_by(.data$chi, .data$cij_marker) %>%
     dplyr::summarise(
-      dplyr::across(c("mh":"t424"), any)
+      dplyr::across("mh":"t424", ~ any(.x))
     ) %>%
     dplyr::ungroup() %>%
     # Assign drug and alcohol misuse
diff --git a/R/run_episode_file.R b/R/create_episode_file.R
similarity index 74%
rename from R/run_episode_file.R
rename to R/create_episode_file.R
index 852a4fd8b..1e2319836 100644
--- a/R/run_episode_file.R
+++ b/R/create_episode_file.R
@@ -4,15 +4,30 @@
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
+#' @inheritParams add_nsu_cohort
+#' @inheritParams fill_geographies
+#' @inheritParams join_cohort_lookups
+#' @inheritParams join_deaths_data
+#' @inheritParams match_on_ltcs
+#' @inheritParams link_delayed_discharge_eps
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
 #' (instead of chi)
 #'
 #' @return a [tibble][tibble::tibble-package] containing the episode file
 #' @export
-#'
-run_episode_file <- function(
+create_episode_file <- function(
     processed_data_list,
     year,
+    dd_data = read_file(get_source_extract_path(year, "DD")),
+    homelessness_lookup = create_homelessness_lookup(year),
+    nsu_cohort = read_file(get_nsu_path(year)),
+    ltc_data = read_file(get_ltcs_path(year)),
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    ),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
@@ -93,28 +108,66 @@ run_episode_file <- function(
         NA_character_,
         .data$chi
       ),
-      gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
+      gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]]),
+      # PC8 format may still be used. Ensure here that all datasets are in PC7 format.
+      postcode = phsmethods::format_postcode(.data$postcode, "pc7")
     ) %>%
     correct_cij_vars() %>%
     fill_missing_cij_markers() %>%
+    add_homelessness_flag(year, lookup = homelessness_lookup) %>%
+    add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
     add_ppa_flag() %>%
-    link_delayed_discharge_eps(year) %>%
-    add_nsu_cohort(year) %>%
-    match_on_ltcs(year) %>%
+    link_delayed_discharge_eps(year, dd_data) %>%
+    add_nsu_cohort(year, nsu_cohort) %>%
+    match_on_ltcs(year, ltc_data) %>%
     correct_demographics(year) %>%
     create_cohort_lookups(year) %>%
     join_cohort_lookups(year) %>%
     join_sparra_hhg(year) %>%
-    fill_geographies() %>%
-    join_deaths_data(year) %>%
+    fill_geographies(
+      slf_pc_lookup,
+      slf_gpprac_lookup
+    ) %>%
+    join_deaths_data(
+      year,
+      slf_deaths_lookup
+    ) %>%
     load_ep_file_vars(year)
 
-  if (anon_chi_out) {
-    # TODO When slfhelper is updated remove the unnecessary code
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
     episode_file <- episode_file %>%
-      tidyr::replace_na(list(chi = "")) %>%
-      slfhelper::get_anon_chi() %>%
-      dplyr::mutate(anon_chi = dplyr::na_if(.data$anon_chi, ""))
+      dplyr::mutate(
+        sc_send_lca = NA,
+        sc_living_alone = NA,
+        sc_support_from_unpaid_carer = NA,
+        sc_social_worker = NA,
+        sc_type_of_housing = NA,
+        sc_meals = NA,
+        sc_day_care = NA,
+        sc_latest_submission = NA,
+        ch_chi_cis = NA,
+        sc_id_cis = NA,
+        ch_name = NA,
+        ch_adm_reason = NA,
+        ch_provider = NA,
+        ch_nursing = NA,
+        hc_hours_annual = NA,
+        hc_hours_q1 = NA,
+        hc_hours_q2 = NA,
+        hc_hours_q3 = NA,
+        hc_hours_q4 = NA,
+        hc_cost_q1 = NA,
+        hc_cost_q2 = NA,
+        hc_cost_q3 = NA,
+        hc_cost_q4 = NA,
+        hc_provider = NA,
+        hc_reablement = NA,
+        sds_option_4 = NA,
+      )
+  }
+
+  if (anon_chi_out) {
+    episode_file <- slfhelper::get_anon_chi(episode_file)
   }
 
   if (write_to_disk) {
@@ -136,7 +189,7 @@ run_episode_file <- function(
 #' Store the unneeded episode file variables
 #'
 #' @param data The in-progress episode file data.
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @param vars_to_keep a character vector of the variables to keep, all others
 #' will be stored.
 #'
@@ -174,7 +227,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
 
 #' Load the unneeded episode file variables
 #'
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @inheritParams store_ep_file_vars
 #'
 #' @return The full SLF data.
@@ -275,21 +328,22 @@ correct_cij_vars <- function(data) {
       ),
       cij_pattype_code = dplyr::if_else(
         !is.na(.data$chi) & .data$recid %in% c("01B", "04B", "GLS", "02B"),
-        dplyr::case_match(.data$cij_admtype,
-          c("41", "42") ~ 2,
-          c("40", "48", "99") ~ 9,
-          "18" ~ 0,
-          .default = .data$cij_pattype_code
+        dplyr::case_match(
+          .data$cij_admtype,
+          c("41", "42") ~ 2L,
+          c("40", "48", "99") ~ 9L,
+          "18" ~ 0L,
+          .default = as.integer(.data$cij_pattype_code)
         ),
         .data$cij_pattype_code
       ),
       # Recode cij_pattype based on above
       cij_pattype = dplyr::case_match(
         .data$cij_pattype_code,
-        0 ~ "Non-Elective",
-        1 ~ "Elective",
-        2 ~ "Maternity",
-        9 ~ "Other"
+        0L ~ "Non-Elective",
+        1L ~ "Elective",
+        2L ~ "Maternity",
+        9L ~ "Other"
       )
     )
 }
@@ -310,7 +364,7 @@ create_cost_inc_dna <- function(data) {
       # In the Cost_Total_Net column set the cost for
       # those with attendance status 5 or 8 (CNWs and DNAs)
       cost_total_net = dplyr::if_else(
-        .data$attendance_status %in% c(5, 8),
+        .data$attendance_status %in% c(5L, 8L),
         0.0,
         .data$cost_total_net
       )
@@ -356,22 +410,28 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
 #'
 #' @inheritParams store_ep_file_vars
 #' @inheritParams get_demographic_cohorts_path
+#' @param demographic_cohort,service_use_cohort The cohort data
 #'
 #' @return The data including the Demographic and Service Use lookups.
-join_cohort_lookups <- function(data, year, update = latest_update()) {
+join_cohort_lookups <- function(
+    data,
+    year,
+    update = latest_update(),
+    demographic_cohort = read_file(
+      get_demographic_cohorts_path(year, update),
+      col_select = c("chi", "demographic_cohort")
+    ),
+    service_use_cohort = read_file(
+      get_service_use_cohorts_path(year, update),
+      col_select = c("chi", "service_use_cohort")
+    )) {
   join_cohort_lookups <- data %>%
     dplyr::left_join(
-      read_file(
-        get_demographic_cohorts_path(year, update),
-        col_select = c("chi", "demographic_cohort")
-      ),
+      demographic_cohort,
       by = "chi"
     ) %>%
     dplyr::left_join(
-      read_file(
-        get_service_use_cohorts_path(year, update),
-        col_select = c("chi", "service_use_cohort")
-      ),
+      service_use_cohort,
       by = "chi"
     )
 
diff --git a/R/create_hb_test_flags.R b/R/create_hb_test_flags.R
index cb5855c1e..d21f1662a 100644
--- a/R/create_hb_test_flags.R
+++ b/R/create_hb_test_flags.R
@@ -11,67 +11,19 @@
 create_hb_test_flags <- function(data, hb_var) {
   data <- data %>%
     dplyr::mutate(
-      NHS_Ayrshire_and_Arran = dplyr::if_else(
-        {{ hb_var }} == "S08000015",
-        1L,
-        0L
-      ),
-      NHS_Borders = dplyr::if_else({{ hb_var }} == "S08000016", 1L, 0L),
-      NHS_Dumfries_and_Galloway = dplyr::if_else(
-        {{ hb_var }} == "S08000017",
-        1L,
-        0L
-      ),
-      NHS_Forth_Valley = dplyr::if_else({{ hb_var }} == "S08000019", 1L, 0L),
-      NHS_Grampian = dplyr::if_else(
-        {{ hb_var }} == "S08000020",
-        1L,
-        0L
-      ),
-      NHS_Highland = dplyr::if_else(
-        {{ hb_var }} == "S08000022",
-        1L,
-        0L
-      ),
-      NHS_Lothian = dplyr::if_else(
-        {{ hb_var }} == "S08000024",
-        1L,
-        0L
-      ),
-      NHS_Orkney = dplyr::if_else(
-        {{ hb_var }} == "S08000025",
-        1L,
-        0L
-      ),
-      NHS_Shetland = dplyr::if_else(
-        {{ hb_var }} == "S08000026",
-        1L,
-        0L
-      ),
-      NHS_Western_Isles = dplyr::if_else(
-        {{ hb_var }} == "S08000028",
-        1L,
-        0L
-      ),
-      NHS_Fife = dplyr::if_else(
-        {{ hb_var }} == "S08000029",
-        1L,
-        0L
-      ),
-      NHS_Tayside = dplyr::if_else(
-        {{ hb_var }} == "S08000030",
-        1L,
-        0L
-      ),
-      NHS_Greater_Glasgow_and_Clyde = dplyr::if_else(
-        {{ hb_var }} %in% c("S08000031", "S08000021"),
-        1L,
-        0L
-      ),
-      NHS_Lanarkshire = dplyr::if_else(
-        {{ hb_var }} %in% c("S08000032", "S08000023"),
-        1L,
-        0L
-      )
+      NHS_Ayrshire_and_Arran = {{ hb_var }} == "S08000015",
+      NHS_Borders = {{ hb_var }} == "S08000016",
+      NHS_Dumfries_and_Galloway = {{ hb_var }} == "S08000017",
+      NHS_Forth_Valley = {{ hb_var }} == "S08000019",
+      NHS_Grampian = {{ hb_var }} == "S08000020",
+      NHS_Highland = {{ hb_var }} == "S08000022",
+      NHS_Lothian = {{ hb_var }} == "S08000024",
+      NHS_Orkney = {{ hb_var }} == "S08000025",
+      NHS_Shetland = {{ hb_var }} == "S08000026",
+      NHS_Western_Isles = {{ hb_var }} == "S08000028",
+      NHS_Fife = {{ hb_var }} == "S08000029",
+      NHS_Tayside = {{ hb_var }} == "S08000030",
+      NHS_Greater_Glasgow_and_Clyde = {{ hb_var }} %in% c("S08000031", "S08000021"),
+      NHS_Lanarkshire = {{ hb_var }} %in% c("S08000032", "S08000023")
     )
 }
diff --git a/R/create_hscp_test_flags.R b/R/create_hscp_test_flags.R
index b7dd0a02e..55e67b67c 100644
--- a/R/create_hscp_test_flags.R
+++ b/R/create_hscp_test_flags.R
@@ -5,166 +5,42 @@
 #' @param data the data containing a HSCP variable
 #' @param hscp_var HSCP variable e.g. HSCP2019 HSCP2018
 #'
-#' @return a dataframe with flag (1 or 0) for each HSCP
+#' @return a dataframe with flag (TRUE or FALSE) for each HSCP
 #'
 #' @family flag functions
 create_hscp_test_flags <- function(data, hscp_var) {
   data <- data %>%
     dplyr::mutate(
-      Aberdeen_City = dplyr::if_else(
-        {{ hscp_var }} == "S37000001",
-        1L,
-        0L
-      ),
-      Aberdeenshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000002",
-        1L,
-        0L
-      ),
-      Angus = dplyr::if_else(
-        {{ hscp_var }} == "S37000003",
-        1L,
-        0L
-      ),
-      Argyll_and_Bute = dplyr::if_else(
-        {{ hscp_var }} == "S37000004",
-        1L,
-        0L
-      ),
-      Clackmannanshire_and_Stirling = dplyr::if_else(
-        {{ hscp_var }} == "S37000005",
-        1L,
-        0L
-      ),
-      Dumfries_and_Galloway = dplyr::if_else(
-        {{ hscp_var }} == "S37000006",
-        1L,
-        0L
-      ),
-      Dundee_City = dplyr::if_else(
-        {{ hscp_var }} == "S37000007",
-        1L,
-        0L
-      ),
-      East_Ayrshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000008",
-        1L,
-        0L
-      ),
-      East_Dunbartonshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000009",
-        1L,
-        0L
-      ),
-      East_Lothian = dplyr::if_else(
-        {{ hscp_var }} == "S37000010",
-        1L,
-        0L
-      ),
-      East_Renfrewshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000011",
-        1L,
-        0L
-      ),
-      Edinburgh = dplyr::if_else(
-        {{ hscp_var }} == "S37000012",
-        1L,
-        0L
-      ),
-      Falkirk = dplyr::if_else(
-        {{ hscp_var }} == "S37000013",
-        1L,
-        0L
-      ),
-      Highland = dplyr::if_else(
-        {{ hscp_var }} == "S37000016",
-        1L,
-        0L
-      ),
-      Inverclyde = dplyr::if_else(
-        {{ hscp_var }} == "S37000017",
-        1L,
-        0L
-      ),
-      Midlothian = dplyr::if_else(
-        {{ hscp_var }} == "S37000018",
-        1L,
-        0L
-      ),
-      Moray = dplyr::if_else(
-        {{ hscp_var }} == "S37000019",
-        1L,
-        0L
-      ),
-      North_Ayrshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000020",
-        1L,
-        0L
-      ),
-      Orkney_Islands = dplyr::if_else(
-        {{ hscp_var }} == "S37000022",
-        1L,
-        0L
-      ),
-      Renfrewshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000024",
-        1L,
-        0L
-      ),
-      Scottish_Borders = dplyr::if_else(
-        {{ hscp_var }} == "S37000025",
-        1L,
-        0L
-      ),
-      Shetland_Islands = dplyr::if_else(
-        {{ hscp_var }} == "S37000026",
-        1L,
-        0L
-      ),
-      South_Ayrshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000027",
-        1L,
-        0L
-      ),
-      South_Lanarkshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000028",
-        1L,
-        0L
-      ),
-      West_Dunbartonshire = dplyr::if_else(
-        {{ hscp_var }} == "S37000029",
-        1L,
-        0L
-      ),
-      West_Lothian = dplyr::if_else(
-        {{ hscp_var }} == "S37000030",
-        1L,
-        0L
-      ),
-      Western_Isles = dplyr::if_else(
-        {{ hscp_var }} == "S37000031",
-        1L,
-        0L
-      ),
-      Fife = dplyr::if_else(
-        {{ hscp_var }} == "S37000032",
-        1L,
-        0L
-      ),
-      Perth_and_Kinross = dplyr::if_else(
-        {{ hscp_var }} == "S37000033",
-        1L,
-        0L
-      ),
-      Glasgow_City = dplyr::if_else(
-        {{ hscp_var }} %in% c("S37000015", "S37000034"),
-        1L,
-        0L
-      ),
-      North_Lanarkshire = dplyr::if_else(
-        {{ hscp_var }} %in% c("S37000021", "S37000035"),
-        1L,
-        0L
-      )
+      Aberdeen_City = {{ hscp_var }} == "S37000001",
+      Aberdeenshire = {{ hscp_var }} == "S37000002",
+      Angus = {{ hscp_var }} == "S37000003",
+      Argyll_and_Bute = {{ hscp_var }} == "S37000004",
+      Clackmannanshire_and_Stirling = {{ hscp_var }} == "S37000005",
+      Dumfries_and_Galloway = {{ hscp_var }} == "S37000006",
+      Dundee_City = {{ hscp_var }} == "S37000007",
+      East_Ayrshire = {{ hscp_var }} == "S37000008",
+      East_Dunbartonshire = {{ hscp_var }} == "S37000009",
+      East_Lothian = {{ hscp_var }} == "S37000010",
+      East_Renfrewshire = {{ hscp_var }} == "S37000011",
+      Edinburgh = {{ hscp_var }} == "S37000012",
+      Falkirk = {{ hscp_var }} == "S37000013",
+      Highland = {{ hscp_var }} == "S37000016",
+      Inverclyde = {{ hscp_var }} == "S37000017",
+      Midlothian = {{ hscp_var }} == "S37000018",
+      Moray = {{ hscp_var }} == "S37000019",
+      North_Ayrshire = {{ hscp_var }} == "S37000020",
+      Orkney_Islands = {{ hscp_var }} == "S37000022",
+      Renfrewshire = {{ hscp_var }} == "S37000024",
+      Scottish_Borders = {{ hscp_var }} == "S37000025",
+      Shetland_Islands = {{ hscp_var }} == "S37000026",
+      South_Ayrshire = {{ hscp_var }} == "S37000027",
+      South_Lanarkshire = {{ hscp_var }} == "S37000028",
+      West_Dunbartonshire = {{ hscp_var }} == "S37000029",
+      West_Lothian = {{ hscp_var }} == "S37000030",
+      Western_Isles = {{ hscp_var }} == "S37000031",
+      Fife = {{ hscp_var }} == "S37000032",
+      Perth_and_Kinross = {{ hscp_var }} == "S37000033",
+      Glasgow_City = {{ hscp_var }} %in% c("S37000015", "S37000034"),
+      North_Lanarkshire = {{ hscp_var }} %in% c("S37000021", "S37000035"),
     )
 }
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index e2cf996a1..664e69ad2 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -1,17 +1,18 @@
-#' Create individual file
+#' Create the Source Individual file
 #'
-#' @description Creates individual file from episode file
+#' @description Creates the individual file from the episode file.
 #'
-#' @param episode_file Tibble containing episodic data
+#' @param episode_file Tibble containing episodic data.
 #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input
-#' (instead of chi)
-#' @inheritParams run_episode_file
+#' (instead of chi).
+#' @inheritParams create_episode_file
 #'
 #' @return The processed individual file
 #' @export
 create_individual_file <- function(
     episode_file,
     year,
+    homelessness_lookup = create_homelessness_lookup(year),
     write_to_disk = TRUE,
     anon_chi_in = TRUE,
     anon_chi_out = TRUE) {
@@ -56,23 +57,74 @@ create_individual_file <- function(
       "sc_latest_submission",
       "hc_hours_annual",
       "hc_reablement",
-      "ooh_case_id"
+      "ooh_case_id",
+      "lca",
+      "hbrescode",
+      "health_net_cost",
+      "acute_episodes",
+      "mat_episodes",
+      "mh_episodes",
+      "gls_episodes",
+      "op_newcons_attendances",
+      "ae_attendances",
+      "pis_paid_items",
+      "ooh_cases"
     ))) %>%
     remove_blank_chi() %>%
     add_cij_columns() %>%
-    add_all_columns() %>%
-    aggregate_ch_episodes_zihao() %>%
-    clean_up_ch(year) %>%
+    add_all_columns()
+
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    individual_file <- individual_file %>%
+      aggregate_by_chi(exclude_sc_var = TRUE)
+  } else {
+    individual_file <- individual_file %>%
+      aggregate_ch_episodes() %>%
+      clean_up_ch(year) %>%
+      aggregate_by_chi(exclude_sc_var = FALSE) %>%
+      join_sc_client(year)
+  }
+
+  individual_file <- individual_file %>%
     recode_gender() %>%
-    aggregate_by_chi_zihao() %>%
     clean_individual_file(year) %>%
     join_cohort_lookups(year) %>%
+    add_homelessness_flag(year, lookup = homelessness_lookup) %>%
     match_on_ltcs(year) %>%
     join_deaths_data(year) %>%
     join_sparra_hhg(year) %>%
     join_slf_lookup_vars() %>%
-    join_sc_client(year) %>%
-    dplyr::mutate(year = year)
+    dplyr::mutate(year = year) %>%
+    add_hri_variables(chi_variable = "chi")
+
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    individual_file <- individual_file %>%
+      dplyr::mutate(
+        ch_cis_episodes = NA,
+        ch_beddays = NA,
+        ch_cost = NA,
+        hc_episodes = NA,
+        hc_personal_episodes = NA,
+        hc_non_personal_episodes = NA,
+        hc_reablement_episodes = NA,
+        hc_total_cost = NA,
+        hc_total_hours = NA,
+        hc_personal_hours = NA,
+        hc_non_personal_hours = NA,
+        hc_reablement_hours = NA,
+        at_alarms = NA,
+        at_telecare = NA,
+        sds_option_1 = NA,
+        sds_option_2 = NA,
+        sds_option_3 = NA,
+        sds_option_4 = NA,
+        sc_living_alone = NA,
+        sc_support_from_unpaid_carer = NA,
+        sc_social_worker = NA,
+        sc_meals = NA,
+        sc_day_care = NA
+      )
+  }
 
   if (anon_chi_out) {
     individual_file <- individual_file %>%
@@ -121,17 +173,17 @@ add_cij_columns <- function(episode_file) {
   episode_file %>%
     dplyr::mutate(
       cij_non_el = dplyr::if_else(
-        .data$cij_pattype_code == 0,
+        .data$cij_pattype_code == 0L,
         .data$cij_marker,
         NA_real_
       ),
       cij_el = dplyr::if_else(
-        .data$cij_pattype_code == 1,
+        .data$cij_pattype_code == 1L,
         .data$cij_marker,
         NA_real_
       ),
       cij_mat = dplyr::if_else(
-        .data$cij_pattype_code == 2,
+        .data$cij_pattype_code == 2L,
         .data$cij_marker,
         NA_real_
       ),
@@ -141,7 +193,7 @@ add_cij_columns <- function(episode_file) {
         NA_real_
       ),
       preventable_admissions = dplyr::if_else(
-        .data$cij_ppa == 1,
+        .data$cij_ppa == 1L,
         .data$cij_marker,
         NA_integer_
       )
@@ -157,7 +209,7 @@ add_cij_columns <- function(episode_file) {
 add_all_columns <- function(episode_file) {
   cli::cli_alert_info("Add all columns function started at {Sys.time()}")
 
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>%
     add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>%
     add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>%
@@ -171,11 +223,17 @@ add_all_columns <- function(episode_file) {
     add_dd_columns("DD", .data$recid == "DD") %>%
     add_nsu_columns("NSU", .data$recid == "NSU") %>%
     add_nrs_columns("NRS", .data$recid == "NRS") %>%
-    add_hl1_columns("HL1", .data$recid == "HL1") %>%
-    add_ch_columns("CH", .data$recid == "CH") %>%
-    add_hc_columns("HC", .data$recid == "HC") %>%
-    add_at_columns("AT", .data$recid == "AT") %>%
-    add_sds_columns("SDS", .data$recid == "SDS") %>%
+    add_hl1_columns("HL1", .data$recid == "HL1")
+
+  if (check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    episode_file <- episode_file %>%
+      add_ch_columns("CH", .data$recid == "CH") %>%
+      add_hc_columns("HC", .data$recid == "HC") %>%
+      add_at_columns("AT", .data$recid == "AT") %>%
+      add_sds_columns("SDS", .data$recid == "SDS")
+  }
+
+  episode_file <- episode_file %>%
     dplyr::mutate(
       health_net_cost = rowSums(
         dplyr::pick(
@@ -192,7 +250,7 @@ add_all_columns <- function(episode_file) {
       ),
       health_net_cost_inc_dnas = .data$health_net_cost + dplyr::if_else(
         is.na(.data$OP_cost_dnas),
-        0,
+        0.0,
         .data$OP_cost_dnas
       )
     )
@@ -247,13 +305,13 @@ add_op_columns <- function(episode_file, prefix, condition) {
   condition <- substitute(condition)
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition)
-  condition_1 <- substitute(condition & attendance_status == 1)
+  condition_1 <- substitute(condition & attendance_status == 1L)
   episode_file <- episode_file %>%
     dplyr::mutate(
       "{prefix}_newcons_attendances" := dplyr::if_else(eval(condition_1), 1L, NA_integer_),
       "{prefix}_cost_attend" := dplyr::if_else(eval(condition_1), .data$cost_total_net, NA_real_)
     )
-  condition_5_8 <- substitute(condition & attendance_status %in% c(5, 8))
+  condition_5_8 <- substitute(condition & attendance_status %in% c(5L, 8L))
   episode_file <- episode_file %>%
     dplyr::mutate(
       "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_),
@@ -306,11 +364,11 @@ add_ooh_columns <- function(episode_file, prefix, condition) {
       "{prefix}_consultation_time" := dplyr::if_else(
         eval(condition),
         pmax(
-          0,
+          0.0,
           as.numeric((lubridate::seconds_to_period(.data$keytime2) + .data$record_keydate2) - (lubridate::seconds_to_period(.data$keytime1) + .data$record_keydate1), units = "mins")
         ),
         NA_real_
-      ),
+      )
     )
 
   return(episode_file)
@@ -406,7 +464,7 @@ add_ch_columns <- function(episode_file, prefix, condition) {
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate(
       ch_cost_per_day = dplyr::if_else(
-        eval(condition) & .data$yearstay > 0,
+        eval(condition) & .data$yearstay > 0.0,
         .data$cost_total_net / .data$yearstay,
         .data$cost_total_net
       ),
@@ -433,8 +491,16 @@ add_hc_columns <- function(episode_file, prefix, condition) {
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE) %>%
     dplyr::mutate(
-      "{prefix}_total_hours" := dplyr::if_else(eval(condition), .data$hc_hours_annual, NA_real_),
-      "{prefix}_total_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_),
+      "{prefix}_total_hours" := dplyr::if_else(
+        eval(condition),
+        .data$hc_hours_annual,
+        NA_real_
+      ),
+      "{prefix}_total_cost" := dplyr::if_else(
+        eval(condition),
+        .data$cost_total_net,
+        NA_real_
+      )
     )
   condition_per <- substitute(condition & smrtype == "HC-Per")
   episode_file <- episode_file %>%
@@ -450,7 +516,7 @@ add_hc_columns <- function(episode_file, prefix, condition) {
       "{prefix}_non_personal_hours" := dplyr::if_else(eval(condition_non_per), .data$hc_hours_annual, NA_real_),
       "{prefix}_non_personal_hours_cost" := dplyr::if_else(eval(condition_non_per), .data$cost_total_net, NA_real_)
     )
-  condition_reabl <- substitute(condition & hc_reablement == 1)
+  condition_reabl <- substitute(condition & hc_reablement == 1L)
   episode_file <- episode_file %>%
     dplyr::mutate(
       "{prefix}_reablement_episodes" := dplyr::if_else(eval(condition_reabl), 1L, NA_integer_),
@@ -547,35 +613,6 @@ add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE,
   return(episode_file)
 }
 
-
-#' Aggregate CIS episodes
-#'
-#' @description Aggregate CH variables by CHI and CIS.
-#'
-#' @inheritParams create_individual_file
-aggregate_ch_episodes <- function(episode_file) {
-  cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")
-
-  episode_file %>%
-    # dplyr::filter(!is.na(.data$ch_chi_cis)) %>%
-    # use as.data.table to change the data format to data.table to accelerate
-    data.table::as.data.table() %>%
-    dplyr::group_by(.data$chi, .data$ch_chi_cis) %>%
-    dplyr::mutate(
-      ch_no_cost = max(.data$ch_no_cost),
-      ch_ep_start = min(.data$record_keydate1),
-      ch_ep_end = max(.data$ch_ep_end),
-      ch_cost_per_day = mean(.data$ch_cost_per_day)
-    ) %>%
-    dplyr::ungroup() %>%
-    # change the data format from data.table to data.frame
-    tibble::as_tibble()
-
-  # dplyr::distinct(.data$chi, .data$ch_chi_cis) %>%
-  # dplyr::select(.data$chi, .data$ch_chi_cis, .data$ch_no_cost, .data$ch_ep_start, .data$ch_ep_end, .data$ch_cost_per_day) %>%
-  # dplyr::right_join(episode_file, by = c(.data$chi, .data$ch_chi_cis))
-}
-
 #' Clean up CH
 #'
 #' @description Clean up CH-related columns.
@@ -590,7 +627,7 @@ clean_up_ch <- function(episode_file, year) {
       fy_start = start_fy(year)
     ) %>%
     dplyr::mutate(
-      term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1),
+      term_1 = pmin(.data$ch_ep_end, .data$fy_end + 1L),
       term_2 = pmax(.data$ch_ep_start, .data$fy_start)
     ) %>%
     dplyr::mutate(
@@ -600,18 +637,18 @@ clean_up_ch <- function(episode_file, year) {
         NA_real_
       ),
       ch_cost = dplyr::if_else(
-        .data$recid == "CH" & .data$ch_no_cost == 0,
+        .data$recid == "CH" & .data$ch_no_cost == 0L,
         .data$ch_beddays * .data$ch_cost_per_day,
         NA_real_
       ),
       ch_beddays = dplyr::if_else(
-        .data$recid == "CH" & .data$ch_chi_cis == 0,
-        0,
+        .data$recid == "CH" & .data$ch_chi_cis == 0L,
+        0L,
         .data$ch_beddays
       ),
       ch_cost = dplyr::if_else(
-        .data$recid == "CH" & .data$ch_chi_cis == 0,
-        0,
+        .data$recid == "CH" & .data$ch_chi_cis == 0L,
+        0.0,
         .data$ch_cost
       )
     ) %>%
@@ -629,105 +666,13 @@ recode_gender <- function(episode_file) {
   episode_file %>%
     dplyr::mutate(
       gender = dplyr::if_else(
-        .data$gender %in% c(0, 9),
+        .data$gender %in% c(0L, 9L),
         1.5,
         .data$gender
       )
     )
 }
 
-#' Aggregate by CHI
-#'
-#' @description Aggregate episode file by CHI to convert into
-#' individual file.
-#'
-#' @inheritParams create_individual_file
-aggregate_by_chi <- function(episode_file) {
-  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
-
-  episode_file %>%
-    dplyr::arrange(
-      chi,
-      record_keydate1,
-      keytime1,
-      record_keydate2,
-      keytime2
-    ) %>%
-    dplyr::group_by(.data$chi) %>%
-    dplyr::summarise(
-      gender = mean(gender),
-      dplyr::across(
-        dplyr::ends_with(c("postcode", "DoB", "gpprac")),
-        ~ dplyr::last(., na_rm = TRUE)
-      ),
-      dplyr::across(
-        c(
-          "ch_cis_episodes" = "ch_chi_cis",
-          "cij_total" = "cij_marker",
-          "cij_el",
-          "cij_non_el",
-          "cij_mat",
-          # "cij_delay",
-          "ooh_cases" = "ooh_case_id",
-          "preventable_admissions"
-        ),
-        ~ dplyr::n_distinct(.x, na.rm = TRUE)
-      ),
-      dplyr::across(
-        c(
-          dplyr::ends_with(
-            c(
-              "episodes",
-              "beddays",
-              "cost",
-              "attendances",
-              "attend",
-              "contacts",
-              "hours",
-              "alarms",
-              "telecare",
-              "paid_items",
-              "advice",
-              "homeV",
-              "time",
-              "assessment",
-              "other",
-              # "DN",
-              "NHS24",
-              "PCC",
-              "_dnas"
-            )
-          ),
-          dplyr::starts_with("SDS_option")
-        ),
-        ~ sum(., na.rm = TRUE)
-      ),
-      # dplyr::across(
-      #   c(
-      #     # dplyr::starts_with("sc_"),
-      #     #-"sc_send_lca",
-      #     #-"sc_latest_submission",
-      #     # "HL1_in_FY" = "hh_in_fy",
-      #     "NSU"
-      #   ),
-      #   ~ max_no_inf(.)
-      # ),
-      dplyr::across(
-        c(
-          condition_cols(),
-          # "death_date",
-          # "deceased",
-          "year",
-          dplyr::ends_with(c(
-            "_Cohort", "end_fy", "start_fy"
-          )),
-        ),
-        ~ dplyr::first(., na_rm = TRUE)
-      )
-    ) %>%
-    dplyr::ungroup()
-}
-
 #' Condition columns
 #'
 #' @description Returns chr vector of column names
@@ -838,12 +783,13 @@ join_slf_lookup_vars <- function(individual_file,
 #' @param year financial year.
 #' @param sc_client SC client lookup
 #' @param sc_demographics SC Demographic lookup
-join_sc_client <- function(individual_file,
-                           year,
-                           sc_client = read_file(get_source_extract_path(year, "Client")),
-                           sc_demographics = read_file(get_sc_demog_lookup_path(),
-                             col_select = c("sending_location", "social_care_id", "chi")
-                           )) {
+join_sc_client <- function(
+    individual_file,
+    year,
+    sc_client = read_file(get_sc_client_lookup_path(year)),
+    sc_demographics = read_file(get_sc_demog_lookup_path(),
+      col_select = c("sending_location", "social_care_id", "chi")
+    )) {
   # TODO Update the client lookup processing script to match
   # on demographics there so the client lookup already has CHI.
 
@@ -853,13 +799,25 @@ join_sc_client <- function(individual_file,
       sc_demographics %>%
         dplyr::select("sending_location", "social_care_id", "chi"),
       by = c("sending_location", "social_care_id")
-    )
+    ) %>%
+    dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of(
+      c(
+        "sc_living_alone",
+        "sc_support_from_unpaid_carer",
+        "sc_social_worker",
+        "sc_meals",
+        "sc_day_care"
+      )
+    )) == "Not Known")) %>%
+    dplyr::arrange(chi, count_not_known) %>%
+    dplyr::distinct(chi, .keep_all = TRUE)
 
   # Match on client variables by chi
   individual_file <- individual_file %>%
     dplyr::left_join(
       join_client_demog,
-      by = "chi"
+      by = "chi",
+      relationship = "one-to-one"
     ) %>%
     dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission"))
 
diff --git a/R/create_monthly_beddays.R b/R/create_monthly_beddays.R
index 175baeb8d..f57fc067f 100644
--- a/R/create_monthly_beddays.R
+++ b/R/create_monthly_beddays.R
@@ -39,7 +39,7 @@ create_monthly_beddays <- function(data,
   if (any(
     admission_dates_vector > discharge_dates_vector,
     na.rm = TRUE
-  ) & !all(is.na(discharge_dates_vector))) {
+  ) && !all(is.na(discharge_dates_vector))) {
     first_error <- which.max(admission_dates_vector > discharge_dates_vector)
 
     cli::cli_abort(
@@ -68,7 +68,7 @@ create_monthly_beddays <- function(data,
       # Shift it forward by a day (default)
       # so we will count the last day and not the first.
       lubridate::int_shift(
-        by = lubridate::days(dplyr::if_else(count_last, 1L, 0L))
+        by = lubridate::days(as.integer(count_last))
       ))
 
   # Create the start dates of the months for the financial year
diff --git a/R/create_monthly_costs.R b/R/create_monthly_costs.R
index c9ccf4bed..3ff4a5268 100644
--- a/R/create_monthly_costs.R
+++ b/R/create_monthly_costs.R
@@ -20,7 +20,7 @@ create_monthly_costs <- function(data,
   check_variables_exist(data, c(
     "record_keydate1",
     "record_keydate2",
-    paste0(tolower(month.abb[c(4:12, 1:3)]), "_beddays")
+    paste0(tolower(month.abb[c(4L:12L, 1L:3L)]), "_beddays")
   ))
 
   beddays_months <- data %>%
@@ -29,7 +29,7 @@ create_monthly_costs <- function(data,
   # Fix the instances where the episode is a daycase (in maternity data);
   # these will sometimes have 0.33 for the yearstay,
   # this should be applied to the relevant month.
-  full_cost_col <- month.abb[c(4:12, 1:3)] %>%
+  full_cost_col <- month.abb[c(4L:12L, 1L:3L)] %>%
     tolower() %>%
     paste0("_cost")
 
@@ -37,7 +37,7 @@ create_monthly_costs <- function(data,
     dplyr::select(!dplyr::ends_with("_beddays")) %>%
     dplyr::mutate(
       daycase_added = tidyr::replace_na(
-        ({{ yearstay }} == 0.33) | ({{ yearstay }} == 0L & {{ cost_total_net }} > 0),
+        ({{ yearstay }} == 0.33) | ({{ yearstay }} == 0L & {{ cost_total_net }} > 0.0),
         replace = FALSE
       )
     ) %>%
@@ -51,12 +51,12 @@ create_monthly_costs <- function(data,
       cost_month = month.abb[.data$cost_month] %>%
         tolower() %>%
         paste0("_cost"),
-      daycase_added = dplyr::if_else(.data$daycase_added, 1, 0)
+      daycase_added = as.integer(.data$daycase_added)
     ) %>%
     tidyr::pivot_wider(
       names_from = "cost_month",
       values_from = "daycase_added",
-      values_fill = 0
+      values_fill = 0L
     ) %>%
     dplyr::select(
       tidyselect::any_of(full_cost_col),
@@ -67,7 +67,7 @@ create_monthly_costs <- function(data,
   add_months <- setdiff(full_cost_col, available_months)
 
   add_months_df <- dplyr::as_tibble(
-    matrix(0, nrow = nrow(data), ncol = length(add_months)),
+    matrix(0.0, nrow = nrow(data), ncol = length(add_months)),
     .name_repair = ~add_months
   )
 
diff --git a/R/create_sending_location_test_flags.R b/R/create_sending_location_test_flags.R
new file mode 100644
index 000000000..373dc2c03
--- /dev/null
+++ b/R/create_sending_location_test_flags.R
@@ -0,0 +1,48 @@
+#' Create sending location test flags
+#'
+#' @description Create flags for sending location
+#'
+#' @param data the data containing the variable sending_location
+#' @param sending_location_var sending_location variable
+#' @return a dataframe with flag (T or F) for each sending location
+#'
+#' @family flag functions
+create_sending_location_test_flags <- function(data, sending_location_var) {
+  data <- data %>%
+    dplyr::mutate(
+      Aberdeen_City = {{ sending_location_var }} == 100L,
+      Aberdeenshire = {{ sending_location_var }} == 110L,
+      Angus = {{ sending_location_var }} == 120L,
+      Argyll_and_Bute = {{ sending_location_var }} == 130L,
+      City_of_Edinburgh = {{ sending_location_var }} == 230L,
+      Clackmannanshire = {{ sending_location_var }} == 150L,
+      Dumfries_and_Galloway = {{ sending_location_var }} == 170L,
+      Dundee_City = {{ sending_location_var }} == 180L,
+      East_Ayrshire = {{ sending_location_var }} == 190L,
+      East_Dunbartonshire = {{ sending_location_var }} == 200L,
+      East_Lothian = {{ sending_location_var }} == 210L,
+      East_Renfrewshire = {{ sending_location_var }} == 220L,
+      Falkirk = {{ sending_location_var }} == 240L,
+      Fife = {{ sending_location_var }} == 250L,
+      Glasgow_City = {{ sending_location_var }} == 260L,
+      Highland = {{ sending_location_var }} == 270L,
+      Inverclyde = {{ sending_location_var }} == 280L,
+      Midlothian = {{ sending_location_var }} == 290L,
+      Moray = {{ sending_location_var }} == 300L,
+      Na_h_Eileanan_Siar = {{ sending_location_var }} == 235L,
+      North_Ayrshire = {{ sending_location_var }} == 310L,
+      North_Lanarkshire = {{ sending_location_var }} == 320L,
+      Orkney_Islands = {{ sending_location_var }} == 330L,
+      Perth_and_Kinross = {{ sending_location_var }} == 340L,
+      Renfrewshire = {{ sending_location_var }} == 350L,
+      Scottish_Borders = {{ sending_location_var }} == 355L,
+      Shetland_Islands = {{ sending_location_var }} == 360L,
+      South_Ayrshire = {{ sending_location_var }} == 370L,
+      South_Lanarkshire = {{ sending_location_var }} == 380L,
+      Stirling = {{ sending_location_var }} == 390L,
+      West_Dunbartonshire = {{ sending_location_var }} == 395L,
+      West_Lothian = {{ sending_location_var }} == 400L
+    )
+
+  return(data)
+}
diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R
index 30d3b0789..4acbfc507 100644
--- a/R/create_service_use_lookup.R
+++ b/R/create_service_use_lookup.R
@@ -35,9 +35,21 @@ create_service_use_cohorts <- function(
       ),
 
       # Calculate service costs
-      geriatric_cost = calculate_geriatric_cost(.data$recid, .data$spec, .data$cost_total_net),
-      maternity_cost = calculate_maternity_cost(.data$recid, .data$cij_pattype, .data$cost_total_net),
-      psychiatry_cost = calculate_psychiatry_cost(.data$recid, .data$spec, .data$cost_total_net),
+      geriatric_cost = calculate_geriatric_cost(
+        .data$recid,
+        .data$spec,
+        .data$cost_total_net
+      ),
+      maternity_cost = calculate_maternity_cost(
+        .data$recid,
+        .data$cij_pattype,
+        .data$cost_total_net
+      ),
+      psychiatry_cost = calculate_psychiatry_cost(
+        .data$recid,
+        .data$spec,
+        .data$cost_total_net
+      ),
       acute_elective_cost = calculate_acute_elective_cost(
         .data$recid, .data$cij_pattype, .data$cij_ipdc,
         .data$spec, .data$cost_total_net
@@ -46,62 +58,154 @@ create_service_use_cohorts <- function(
         .data$recid, .data$cij_pattype,
         .data$spec, .data$cost_total_net
       ),
-      outpatient_cost = calculate_outpatient_costs(.data$recid, .data$cost_total_net, .data$geriatric_cost)[[1]],
-      total_outpatient_cost = calculate_outpatient_costs(.data$recid, .data$cost_total_net, .data$geriatric_cost)[[2]],
-      care_home_cost = calculate_care_home_cost(.data$recid, .data$cost_total_net),
-      hospital_elective_cost = calculate_hospital_elective_cost(.data$recid, .data$cij_pattype, .data$cost_total_net),
-      hospital_emergency_cost = calculate_hospital_emergency_cost(.data$recid, .data$cij_pattype, .data$cost_total_net),
-      prescribing_cost = calculate_prescribing_cost(.data$recid, .data$cost_total_net),
-      ae2_cost = calculate_ae2_cost(.data$recid, .data$cost_total_net),
-      community_health_cost = calculate_community_health_cost(.data$recid, .data$cost_total_net),
+      outpatient_cost = calculate_outpatient_costs(
+        recid = .data$recid,
+        cost_total_net = .data$cost_total_net,
+        geriatric_cost = .data$geriatric_cost
+      )[["outpatient_cost"]],
+      total_outpatient_cost = calculate_outpatient_costs(
+        .data$recid,
+        .data$cost_total_net,
+        .data$geriatric_cost
+      )[["total_outpatient_cost"]],
+      care_home_cost = calculate_care_home_cost(
+        .data$recid,
+        .data$cost_total_net
+      ),
+      hospital_elective_cost = calculate_hospital_elective_cost(
+        .data$recid,
+        .data$cij_pattype,
+        .data$cost_total_net
+      ),
+      hospital_emergency_cost = calculate_hospital_emergency_cost(
+        .data$recid,
+        .data$cij_pattype,
+        .data$cost_total_net
+      ),
+      prescribing_cost = calculate_prescribing_cost(
+        .data$recid,
+        .data$cost_total_net
+      ),
+      ae2_cost = calculate_ae2_cost(
+        .data$recid,
+        .data$cost_total_net
+      ),
+      community_health_cost = calculate_community_health_cost(
+        .data$recid,
+        .data$cost_total_net
+      ),
       operation_flag = add_operation_flag(.data$op1a)
     ) %>%
     # Aggregate to CIJ level
-    dplyr::group_by(.data$chi, .data$cij_marker, .data$cij_ipdc, .data$cij_pattype) %>%
+    dplyr::group_by(
+      .data$chi,
+      .data$cij_marker,
+      .data$cij_ipdc,
+      .data$cij_pattype
+    ) %>%
     dplyr::summarise(
-      dplyr::across(c("cost_total_net", "geriatric_cost":"community_health_cost"), sum),
-      dplyr::across(c("operation_flag", "cij_attendance"), any)
+      dplyr::across(
+        c("cost_total_net", "geriatric_cost":"community_health_cost"),
+        ~ sum(.x)
+      ),
+      dplyr::across(
+        c("operation_flag", "cij_attendance"),
+        ~ any(.x)
+      )
     ) %>%
     dplyr::ungroup() %>%
     # Create specific instance counters and compute cost for elective inpatients
     dplyr::mutate(
-      emergency_instances = assign_emergency_instances(.data$cij_pattype),
-      elective_instances = assign_elective_instances(.data$cij_pattype, .data$cij_ipdc),
-      elective_inpatient_instances = assign_elective_inpatient_instances(.data$cij_pattype, .data$cij_ipdc),
-      elective_daycase_instances = assign_elective_daycase_instances(.data$cij_pattype, .data$cij_ipdc),
-      death_flag = assign_death_flag(.data$cij_marker),
+      emergency_instances = assign_emergency_instances(
+        .data$cij_pattype
+      ),
+      elective_instances = assign_elective_instances(
+        .data$cij_pattype,
+        .data$cij_ipdc
+      ),
+      elective_inpatient_instances = assign_elective_inpatient_instances(
+        .data$cij_pattype,
+        .data$cij_ipdc
+      ),
+      elective_daycase_instances = assign_elective_daycase_instances(
+        .data$cij_pattype,
+        .data$cij_ipdc
+      ),
+      death_flag = assign_death_flag(
+        .data$cij_marker
+      ),
       elective_inpatient_cost = calculate_elective_inpatient_cost(
         .data$elective_inpatient_instances,
         .data$cost_total_net
       )
     ) %>%
     # Move flags to end of data frame
-    dplyr::relocate(c("operation_flag", "death_flag"), .after = dplyr::last_col()) %>%
+    dplyr::relocate(
+      c("operation_flag", "death_flag"),
+      .after = dplyr::last_col()
+    ) %>%
     # Aggregate to chi-level
     dplyr::group_by(.data$chi) %>%
     dplyr::summarise(
-      dplyr::across(c(.data$cost_total_net:.data$elective_inpatient_cost), sum),
-      dplyr::across(c(.data$operation_flag, .data$death_flag), any)
+      dplyr::across(
+        c(.data$cost_total_net:.data$elective_inpatient_cost),
+        ~ sum(.x)
+      ),
+      dplyr::across(
+        c(.data$operation_flag, .data$death_flag),
+        ~ any(.x)
+      )
     ) %>%
     dplyr::ungroup() %>%
     dplyr::mutate(
       # Create flag for elective inpatients
-      elective_inpatient_flag = assign_elective_inpatient_flag(.data$acute_elective_cost, .data$elective_inpatient_cost),
+      elective_inpatient_flag = assign_elective_inpatient_flag(
+        .data$acute_elective_cost,
+        .data$elective_inpatient_cost
+      ),
       # Assign cohort flags
-      psychiatry_cohort = assign_s_cohort_psychiatry(.data$psychiatry_cost),
-      maternity_cohort = assign_s_cohort_maternity(.data$maternity_cost),
-      geriatric_cohort = assign_s_cohort_geriatric(.data$geriatric_cost),
-      elective_inpatient_cohort = assign_s_cohort_elective_inpatient(.data$elective_inpatient_flag),
-      limited_daycases_cohort = assign_s_cohort_limited_daycases(.data$elective_inpatient_flag, .data$elective_instances),
-      routine_daycase_cohort = assign_s_cohort_routine_daycase(.data$elective_inpatient_flag, .data$elective_instances),
-      single_emergency_cohort = assign_s_cohort_single_emergency(.data$emergency_instances),
-      multiple_emergency_cohort = assign_s_cohort_multiple_emergency(.data$emergency_instances),
-      prescribing_cohort = assign_s_cohort_prescribing(.data$prescribing_cost),
-      outpatient_cohort = assign_s_cohort_outpatient(.data$outpatient_cost),
-      ae2_cohort = assign_s_cohort_ae2(.data$ae2_cost),
-      community_care_cohort = assign_s_cohort_community_care(.data$community_health_cost),
+      psychiatry_cohort = assign_s_cohort_psychiatry(
+        .data$psychiatry_cost
+      ),
+      maternity_cohort = assign_s_cohort_maternity(
+        .data$maternity_cost
+      ),
+      geriatric_cohort = assign_s_cohort_geriatric(
+        .data$geriatric_cost
+      ),
+      elective_inpatient_cohort = assign_s_cohort_elective_inpatient(
+        .data$elective_inpatient_flag
+      ),
+      limited_daycases_cohort = assign_s_cohort_limited_daycases(
+        .data$elective_inpatient_flag,
+        .data$elective_instances
+      ),
+      routine_daycase_cohort = assign_s_cohort_routine_daycase(
+        .data$elective_inpatient_flag,
+        .data$elective_instances
+      ),
+      single_emergency_cohort = assign_s_cohort_single_emergency(
+        .data$emergency_instances
+      ),
+      multiple_emergency_cohort = assign_s_cohort_multiple_emergency(
+        .data$emergency_instances
+      ),
+      prescribing_cohort = assign_s_cohort_prescribing(
+        .data$prescribing_cost
+      ),
+      outpatient_cohort = assign_s_cohort_outpatient(
+        .data$outpatient_cost
+      ),
+      ae2_cohort = assign_s_cohort_ae2(
+        .data$ae2_cost
+      ),
+      community_care_cohort = assign_s_cohort_community_care(
+        .data$community_health_cost
+      ),
       # Assign other cohort if none have been assigned
-      other_cohort = rowSums(dplyr::across("psychiatry_cohort":"community_care_cohort")) == 0,
+      other_cohort = rowSums(
+        dplyr::pick("psychiatry_cohort":"community_care_cohort")
+      ) == 0L,
 
       # Recalculate costs based on the cohorts
       elective_inpatient_cost = recalculate_elective_inpatient_cost(
@@ -131,7 +235,7 @@ create_service_use_cohorts <- function(
       # Care Home cost is removed for now, so set to zero
       residential_care_cost = calculate_residential_care_cost(),
       # Replace any missing total costs with zero
-      dplyr::across("cost_total_net", ~ replace(., is.na(.), 0))
+      cost_total_net = tidyr::replace_na(.data$cost_total_net, 0.0)
     ) %>%
     # Add the cohort names
     assign_cohort_names() %>%
@@ -175,7 +279,9 @@ create_service_use_cohorts <- function(
 #' @family Demographic and Service Use Cohort functions
 calculate_geriatric_cost <- function(recid, spec, cost_total_net) {
   geriatric_cost <- dplyr::if_else(
-    recid %in% c("50B", "GLS") | spec %in% c("AB", "G4"), cost_total_net, 0
+    recid %in% c("50B", "GLS") | spec %in% c("AB", "G4"),
+    cost_total_net,
+    0.0
   )
   return(geriatric_cost)
 }
@@ -380,9 +486,12 @@ calculate_community_health_cost <- function(recid, cost_total_net) {
 #' @return A vector of elective inpatient costs
 #' @seealso [assign_elective_inpatient_instances()]
 #' @family Demographic and Service Use Cohort functions
-calculate_elective_inpatient_cost <- function(elective_inpatient_instances, cost_total_net) {
+calculate_elective_inpatient_cost <- function(elective_inpatient_instances,
+                                              cost_total_net) {
   elective_inpatient_cost <- dplyr::if_else(
-    elective_inpatient_instances, cost_total_net, 0
+    elective_inpatient_instances,
+    cost_total_net,
+    0.0
   )
   return(elective_inpatient_cost)
 }
@@ -391,7 +500,8 @@ calculate_elective_inpatient_cost <- function(elective_inpatient_instances, cost
 #'
 #' @param op1a A vector of operation codes
 #'
-#' @return A boolean vector showing whether a record contains an operation or not
+#' @return A boolean vector showing whether a record contains an operation or
+#' not.
 #' @family Demographic and Service Use Cohort functions
 add_operation_flag <- function(op1a) {
   operation_flag <- !is_missing(op1a)
@@ -532,29 +642,31 @@ assign_s_cohort_elective_inpatient <- function(elective_inpatient_flag) {
 }
 
 #' Assign limited daycases cohort flag
-#' @description If the record does not have an elective inpatient flag and they have
-#' 3 or fewer elective instances, return `TRUE`
+#' @description If the record does not have an elective inpatient flag
+#' and they have 3 or fewer elective instances, return `TRUE`.
 #'
 #' @param elective_inpatient_flag A vector of elective inpatient flags
 #' @param elective_instances A vector of elective instances
 #'
 #' @return A boolean vector of limited daycases cohort flags
 #' @family Demographic and Service Use Cohort functions
-assign_s_cohort_limited_daycases <- function(elective_inpatient_flag, elective_instances) {
-  limited_daycases_cohort <- !elective_inpatient_flag & elective_instances <= 3
+assign_s_cohort_limited_daycases <- function(elective_inpatient_flag,
+                                             elective_instances) {
+  limited_daycases_cohort <- !elective_inpatient_flag & elective_instances <= 3L
   return(limited_daycases_cohort)
 }
 
 #' Assign routine daycase cohort flag
-#' @description If the record does not have an elective inpatient flag and they have
-#' 4 or more elective instances, return `TRUE`
+#' @description If the record does not have an elective inpatient flag and
+#' they have 4 or more elective instances, return `TRUE`.
 #'
 #' @inheritParams assign_s_cohort_limited_daycases
 #'
 #' @return A boolean vector of routine daycase cohort flags
 #' @family Demographic and Service Use Cohort functions
-assign_s_cohort_routine_daycase <- function(elective_inpatient_flag, elective_instances) {
-  routine_daycase_cohort <- !elective_inpatient_flag & elective_instances >= 4
+assign_s_cohort_routine_daycase <- function(elective_inpatient_flag,
+                                            elective_instances) {
+  routine_daycase_cohort <- !elective_inpatient_flag & elective_instances >= 4L
   return(routine_daycase_cohort)
 }
 
@@ -565,7 +677,7 @@ assign_s_cohort_routine_daycase <- function(elective_inpatient_flag, elective_in
 #' @return A boolean vector of single emergency cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_single_emergency <- function(emergency_instances) {
-  single_emergency_cohort <- emergency_instances == 1
+  single_emergency_cohort <- emergency_instances == 1L
   return(single_emergency_cohort)
 }
 
@@ -576,31 +688,33 @@ assign_s_cohort_single_emergency <- function(emergency_instances) {
 #' @return A boolean vector of multiple emergency cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_multiple_emergency <- function(emergency_instances) {
-  multiple_emergency_cohort <- emergency_instances >= 2
+  multiple_emergency_cohort <- emergency_instances >= 2L
   return(multiple_emergency_cohort)
 }
 
 #' Assign prescribing cohort flag
-#' @description If the record has a prescribing cost greater than zero, assign `TRUE`
+#' @description If the record has a prescribing cost greater than zero,
+#' assign `TRUE`.
 #'
 #' @param prescribing_cost A vector of prescribing costs
 #'
 #' @return A boolean vector of prescribing cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_prescribing <- function(prescribing_cost) {
-  prescribing_cohort <- prescribing_cost > 0
+  prescribing_cohort <- prescribing_cost > 0.0
   return(prescribing_cohort)
 }
 
 #' Assign outpatient cohort flag
-#' @description If the record has a outpatient cost greater than zero, assign `TRUE`
+#' @description If the record has a outpatient cost greater than zero,
+#' assign `TRUE`.
 #'
 #' @param outpatient_cost A vector of outpatient costs
 #'
 #' @return A boolean vector of outpatient cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_outpatient <- function(outpatient_cost) {
-  outpatient_cohort <- outpatient_cost > 0
+  outpatient_cohort <- outpatient_cost > 0.0
   return(outpatient_cohort)
 }
 
@@ -613,7 +727,7 @@ assign_s_cohort_outpatient <- function(outpatient_cost) {
 #' @return A boolean vector of residential care cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_residential_care <- function(care_home_cost) {
-  residential_care_cohort <- care_home_cost > 0
+  residential_care_cohort <- care_home_cost > 0.0
   return(residential_care_cohort)
 }
 
@@ -625,7 +739,7 @@ assign_s_cohort_residential_care <- function(care_home_cost) {
 #' @return A boolean vector of A&E cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_ae2 <- function(ae2_cost) {
-  ae2_cohort <- ae2_cost > 0
+  ae2_cohort <- ae2_cost > 0.0
   return(ae2_cohort)
 }
 
@@ -638,7 +752,7 @@ assign_s_cohort_ae2 <- function(ae2_cost) {
 #' @return A boolean vector of Community Care cohort flags
 #' @family Demographic and Service Use Cohort functions
 assign_s_cohort_community_care <- function(community_health_cost) {
-  community_care_cohort <- community_health_cost > 0 # | home_care_cost > 0
+  community_care_cohort <- community_health_cost > 0.0 # | home_care_cost > 0
   return(community_care_cohort)
 }
 
@@ -651,8 +765,13 @@ assign_s_cohort_community_care <- function(community_health_cost) {
 #'
 #' @return A vector of elective inpatient costs
 #' @family Demographic and Service Use Cohort functions
-recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort, acute_elective_cost) {
-  elective_inpatient_cost <- dplyr::if_else(elective_inpatient_cohort, acute_elective_cost, 0)
+recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort,
+                                                acute_elective_cost) {
+  elective_inpatient_cost <- dplyr::if_else(
+    elective_inpatient_cohort,
+    acute_elective_cost,
+    0.0
+  )
   return(elective_inpatient_cost)
 }
 
@@ -663,8 +782,13 @@ recalculate_elective_inpatient_cost <- function(elective_inpatient_cohort, acute
 #'
 #' @return A vector of limited daycase costs
 #' @family Demographic and Service Use Cohort functions
-calculate_limited_daycases_cost <- function(limited_daycases_cohort, acute_elective_cost) {
-  limited_daycases_cost <- dplyr::if_else(limited_daycases_cohort, acute_elective_cost, 0)
+calculate_limited_daycases_cost <- function(limited_daycases_cohort,
+                                            acute_elective_cost) {
+  limited_daycases_cost <- dplyr::if_else(
+    limited_daycases_cohort,
+    acute_elective_cost,
+    0.0
+  )
   return(limited_daycases_cost)
 }
 
@@ -675,8 +799,13 @@ calculate_limited_daycases_cost <- function(limited_daycases_cohort, acute_elect
 #'
 #' @return A vector of routine daycase costs
 #' @family Demographic and Service Use Cohort functions
-calculate_routine_daycase_cost <- function(routine_daycase_cohort, acute_elective_cost) {
-  routine_daycase_cost <- dplyr::if_else(routine_daycase_cohort, acute_elective_cost, 0)
+calculate_routine_daycase_cost <- function(routine_daycase_cohort,
+                                           acute_elective_cost) {
+  routine_daycase_cost <- dplyr::if_else(
+    routine_daycase_cohort,
+    acute_elective_cost,
+    0.0
+  )
   return(routine_daycase_cost)
 }
 
@@ -687,8 +816,13 @@ calculate_routine_daycase_cost <- function(routine_daycase_cohort, acute_electiv
 #'
 #' @return A vector of single emergency costs
 #' @family Demographic and Service Use Cohort functions
-calculate_single_emergency_cost <- function(single_emergency_cohort, acute_emergency_cost) {
-  single_emergency_cost <- dplyr::if_else(single_emergency_cohort, acute_emergency_cost, 0)
+calculate_single_emergency_cost <- function(single_emergency_cohort,
+                                            acute_emergency_cost) {
+  single_emergency_cost <- dplyr::if_else(
+    single_emergency_cohort,
+    acute_emergency_cost,
+    0.0
+  )
   return(single_emergency_cost)
 }
 
@@ -699,8 +833,13 @@ calculate_single_emergency_cost <- function(single_emergency_cohort, acute_emerg
 #'
 #' @return A vector of multiple emergency costs
 #' @family Demographic and Service Use Cohort functions
-calculate_multiple_emergency_cost <- function(multiple_emergency_cohort, acute_emergency_cost) {
-  multiple_emergency_cost <- dplyr::if_else(multiple_emergency_cohort, acute_emergency_cost, 0)
+calculate_multiple_emergency_cost <- function(multiple_emergency_cohort,
+                                              acute_emergency_cost) {
+  multiple_emergency_cost <- dplyr::if_else(
+    multiple_emergency_cohort,
+    acute_emergency_cost,
+    0.0
+  )
   return(multiple_emergency_cost)
 }
 
@@ -711,13 +850,16 @@ calculate_multiple_emergency_cost <- function(multiple_emergency_cohort, acute_e
 #'
 #' @return A vector of community care costs
 #' @family Demographic and Service Use Cohort functions
-calculate_community_care_cost <- function(community_care_cohort, community_health_cost) {
+calculate_community_care_cost <- function(community_care_cohort,
+                                          community_health_cost) {
   community_care_cost <- dplyr::if_else(
-    community_care_cohort, community_health_cost, 0
+    community_care_cohort,
+    community_health_cost,
+    0.0
   )
   # FOR FUTURE
   # community_care_cost <- dplyr::if_else(
-  # community_care_cohort + home_care_cost, community_health_cost, 0)
+  # community_care_cohort + home_care_cost, community_health_cost, 0.0)
   return(community_care_cost)
 }
 
@@ -727,7 +869,7 @@ calculate_community_care_cost <- function(community_care_cohort, community_healt
 #' @return A vector of community care costs, currently zero
 #' @family Demographic and Service Use Cohort functions
 calculate_residential_care_cost <- function() {
-  residential_care_cost <- 0
+  residential_care_cost <- 0.0
   return(residential_care_cost)
 }
 
@@ -735,7 +877,8 @@ calculate_residential_care_cost <- function() {
 #'
 #' @param data A data frame
 #'
-#' @return A data frame with an additional variable containing the assigned cohort
+#' @return A data frame with an additional variable containing the assigned
+#' cohort
 #'
 #' @family Demographic and Service Use Cohort functions
 assign_cohort_names <- function(data) {
@@ -765,10 +908,8 @@ assign_cohort_names <- function(data) {
         # Situation where no cost is greater than another,
         # so the maximum is the same  as the mean
         .data$cost_max == rowSums(
-          dplyr::across(
-            c("psychiatry_cost":"residential_care_cost")
-          )
-        ) / 12 ~ "Unassigned",
+          dplyr::pick("psychiatry_cost":"residential_care_cost")
+        ) / 12.0 ~ "Unassigned",
         .data$cost_max == .data$psychiatry_cost ~ "Psychiatry",
         .data$cost_max == .data$maternity_cost ~ "Maternity",
         # Geriatric has to be larger or equal to psychiatry
@@ -786,7 +927,7 @@ assign_cohort_names <- function(data) {
         # Future: cost_max == .data$community_care_cost ~ "Community Care",
         .data$cost_max == .data$ae2_cost ~ "Unscheduled Care",
         .data$cost_max == .data$residential_care_cost ~ "Residential Care",
-        TRUE ~ "Unassigned"
+        .default = "Unassigned"
       )
     ) %>%
     dplyr::select(-"cost_max")
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index 58d001493..c9aee6355 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -4,10 +4,18 @@
 #' then use the lookups to match on additional variables.
 #'
 #' @param data the SLF
+#' @param slf_pc_lookup The SLF Postcode lookup
+#' @param slf_gpprac_lookup The SLF GP Practice lookup
 #'
 #' @return a [tibble][tibble::tibble-package] of the SLF with improved
 #' Postcode and GP Practice details.
-fill_geographies <- function(data) {
+fill_geographies <- function(
+    data,
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    )) {
   check_variables_exist(data, c(
     "chi",
     "postcode",
@@ -21,8 +29,15 @@ fill_geographies <- function(data) {
   ))
 
   data %>%
-    fill_postcode_geogs() %>%
-    fill_gpprac_geographies()
+    fill_postcode_geogs(
+      slf_pc_lookup = read_file(get_slf_postcode_path())
+    ) %>%
+    fill_gpprac_geographies(
+      slf_gpprac_lookup = read_file(
+        get_slf_gpprac_path(),
+        col_select = c("gpprac", "cluster", "hbpraccode")
+      )
+    )
 }
 
 #' Make a postcode lookup for filling to most recent postcodes based on CHI
@@ -38,8 +53,10 @@ make_postcode_lookup <- function(data) {
     dplyr::distinct(.data$chi, .data$postcode, .data$record_keydate2) %>%
     # Format postcodes to 7-character format and replace dummy with NA
     dplyr::mutate(
-      postcode = phsmethods::format_postcode(.data$postcode, format = "pc7"),
-      postcode = dplyr::na_if(.data$postcode, "NK010AA")
+      postcode = dplyr::na_if(
+        phsmethods::format_postcode(.data$postcode, format = "pc7"),
+        "NK010AA"
+      )
     ) %>%
     # Drop any episodes with no postcode
     dplyr::filter(!is.na(.data$postcode)) %>%
@@ -84,9 +101,9 @@ make_gpprac_lookup <- function(data) {
   return(gpprac_lookup)
 }
 
-fill_postcode_geogs <- function(data) {
-  slf_pc_lookup <- read_file(get_slf_postcode_path())
-
+fill_postcode_geogs <- function(
+    data,
+    slf_pc_lookup) {
   filled_postcodes <- dplyr::left_join(
     data,
     make_postcode_lookup(data),
@@ -121,17 +138,20 @@ fill_postcode_geogs <- function(data) {
       lca = dplyr::coalesce(.data$lca, .data$lca_old),
       datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old)
     ) %>%
-    dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode"))
+    dplyr::select(!c(
+      "hb2018",
+      "hscp",
+      "lca_old",
+      "datazone2011_old",
+      "most_recent_postcode"
+    ))
 
   return(filled_postcodes)
 }
 
-fill_gpprac_geographies <- function(data) {
-  gpprac_ref <- read_file(
-    get_slf_gpprac_path(),
-    col_select = c("gpprac", "cluster", "hbpraccode")
-  )
-
+fill_gpprac_geographies <- function(
+    data,
+    slf_gpprac_lookup) {
   filled_gpprac <- dplyr::left_join(
     data,
     make_gpprac_lookup(data),
@@ -145,7 +165,12 @@ fill_gpprac_geographies <- function(data) {
         .data$gpprac
       )
     ) %>%
-    dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>%
+    dplyr::left_join(
+      slf_gpprac_lookup %>%
+        dplyr::select("gpprac", "cluster", "hbpraccode"),
+      by = "gpprac",
+      suffix = c("_old", "")
+    ) %>%
     dplyr::mutate(
       hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old)
     ) %>%
diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R
index bffa009e0..54440586c 100644
--- a/R/fix_sc_dates.R
+++ b/R/fix_sc_dates.R
@@ -12,7 +12,7 @@ fix_sc_start_dates <- function(start_date, period) {
   # financial year
   start_date <- dplyr::if_else(
     is.na(start_date),
-    start_fy(year = substr(period, 1L, 4L), "alternate"),
+    start_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"),
     start_date
   )
 
@@ -35,7 +35,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) {
   # the end of financial year
   end_date <- dplyr::if_else(
     start_date > end_date,
-    end_fy(year = substr(period, 1L, 4L), "alternate"),
+    end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"),
     end_date
   )
 
diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R
index c3dd0fdf6..6096525e5 100644
--- a/R/get_boxi_extract_path.R
+++ b/R/get_boxi_extract_path.R
@@ -29,34 +29,40 @@ get_boxi_extract_path <- function(
     )) {
   type <- match.arg(type)
 
-  year_dir <- get_year_dir(year, extracts_dir = TRUE)
+  if (type %in% c("DN", "CMH")) {
+    dir <- fs::path(get_slf_dir(), "Archived_data")
+  } else {
+    dir <- get_year_dir(year, extracts_dir = TRUE)
+  }
 
   if (!check_year_valid(year, type)) {
     return(get_dummy_boxi_extract_path())
   }
 
-  file_name <- dplyr::case_when(
-    type == "AE" ~ "A&E-episode-level-extract",
-    type == "AE_CUP" ~ "A&E-UCD-CUP-extract",
-    type == "Acute" ~ "Acute-episode-level-extract",
-    type == "CMH" ~ "Community-MH-contact-level-extract",
-    type == "DN" ~ "District-Nursing-contact-level-extract",
-    type == "GP_OoH-c" ~ "GP-OoH-consultations-extract",
-    type == "GP_OoH-d" ~ "GP-OoH-diagnosis-extract",
-    type == "GP_OoH-o" ~ "GP-OoH-outcomes-extract",
-    type == "Homelessness" ~ "Homelessness-extract",
-    type == "Maternity" ~ "Maternity-episode-level-extract",
-    type == "MH" ~ "Mental-Health-episode-level-extract",
-    type == "Deaths" ~ "NRS-death-registrations-extract",
-    type == "Outpatients" ~ "Outpatients-episode-level-extract"
+  file_name <- dplyr::case_match(
+    type,
+    "AE" ~ "A&E-episode-level-extract",
+    "AE_CUP" ~ "A&E-UCD-CUP-extract",
+    "Acute" ~ "Acute-episode-level-extract",
+    "CMH" ~ "Community-MH-contact-level-extract",
+    "DN" ~ "District-Nursing-contact-level-extract",
+    "GP_OoH-c" ~ "GP-OoH-consultations-extract",
+    "GP_OoH-d" ~ "GP-OoH-diagnosis-extract",
+    "GP_OoH-o" ~ "GP-OoH-outcomes-extract",
+    "Homelessness" ~ "Homelessness-extract",
+    "Maternity" ~ "Maternity-episode-level-extract",
+    "MH" ~ "Mental-Health-episode-level-extract",
+    "Deaths" ~ "NRS-death-registrations-extract",
+    "Outpatients" ~ "Outpatients-episode-level-extract"
   )
 
   boxi_extract_path_csv_gz <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv.gz")
   )
+
   boxi_extract_path_csv <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv")
   )
 
diff --git a/R/get_connection_PHS_database.R b/R/get_connection_PHS_database.R
index a7c99653b..0a528f45b 100644
--- a/R/get_connection_PHS_database.R
+++ b/R/get_connection_PHS_database.R
@@ -1,22 +1,32 @@
 #' Open a connection to a PHS database
 #'
-#' @description Opens a connection to PHS database to allow data to be collected
+#' @description Opens a connection to PHS database given a Data Source Name
+#' (DSN) it will try to get the username, asking for input if in an interactive
+#' session. It will also use [keyring][keyring::keyring-package] to find
+#' an existing keyring called 'createslf' which should contain a `db_password`
+#' key with the users database password.
 #'
-#' @param dsn The Data Source Name passed on to `odbc::dbconnect`
-#' the dsn must be setup first. e.g. SMRA or DVPROD
+#' @param dsn The Data Source Name (DSN) passed on to [odbc::dbConnect()]
+#' the DSN must be set up first. e.g. `SMRA` or `DVPROD`
 #' @param username The username to use for authentication,
-#' if not supplied it first will check the environment variable
-#' and finally ask the user for input.
+#' if not supplied it will try to find it automatically and if possible ask the
+#' user for input.
 #'
-#' @return a connection to the specified dsn
+#' @return a connection to the specified Data Source.
 #' @export
-#'
-phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
-  # Collect username from the environment
-  username <- Sys.getenv("USER")
+phs_db_connection <- function(dsn, username) {
+  if (missing(username)) {
+    # Collect username if possible
+    username <- dplyr::case_when(
+      Sys.info()["USER"] != "unknown" ~ Sys.info()["USER"],
+      Sys.getenv("USER") != "" ~ Sys.getenv("USER"),
+      system2("whoami", stdout = TRUE) != "" ~ system2("whoami", stdout = TRUE),
+      .default = NA
+    )
+  }
 
-  # Check the username is not empty and take input if not
-  if (is.na(username) | username == "") {
+  # If the username is missing try to get input from the user
+  if (is.na(username)) {
     if (rlang::is_interactive()) {
       username <- rstudioapi::showPrompt(
         title = "Username",
@@ -24,46 +34,219 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
         default = ""
       )
     } else {
-      cli::cli_abort("No username found, you should supply one with {.arg username}")
+      cli::cli_abort(
+        c(
+          "x" = "No username found, you can use the {.arg username} argument.",
+          "i" = "Alternatively, add {.code USER = \"<your username>\"} to your
+          {.file .Renviron} file."
+        )
+      )
     }
   }
 
-  # TODO improve error messages and provide instructions for setting up keyring
-  # Add the following code to R profile.
-  # Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf"),
-  # keyring_create("createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")),
-  # key_set(keyring = "createslf", service = "db_password")
+  # Check the status of keyring
+  # Does the 'createslf' keyring exist
+  keyring_exists <- "createslf" %in% keyring::keyring_list()[["keyring"]]
 
-  if (!("createslf" %in% keyring::keyring_list()[["keyring"]])) {
-    cli::cli_abort("The {.val createslf} keyring does not exist.")
+  # Does the 'db_password' key exist in the 'createslf' keyring
+  if (keyring_exists) {
+    key_exists <- "db_password" %in% keyring::key_list(keyring = "createslf")[["service"]]
+  } else {
+    key_exists <- FALSE
   }
 
-  if (!("db_password" %in% keyring::key_list(keyring = "createslf")[["service"]])) {
-    cli::cli_abort("{.val db_password} is missing from the {.val createslf} keyring.")
-  }
+  # Does the 'CREATESLF_KEYRING_PASS' environment variable exist
+  env_var_pass_exists <- Sys.getenv("CREATESLF_KEYRING_PASS") != ""
 
-  if (Sys.getenv("CREATESLF_KEYRING_PASS") == "") {
-    cli::cli_abort("You must have the password to unlock the {.val createslf} keyring in your environment as
-                   {.envvar CREATESLF_KEYRING_PASS}. Please set this up in your {.file .Renviron} or {.file .Rprofile}")
+  if (!all(keyring_exists, key_exists, env_var_pass_exists)) {
+    if (rlang::is_interactive()) {
+      setup_keyring(
+        keyring = "createslf",
+        key = "db_password",
+        keyring_exists = keyring_exists,
+        key_exists = key_exists,
+        env_var_pass_exists = env_var_pass_exists
+      )
+    } else {
+      if (any(keyring_exists, key_exists, env_var_pass_exists)) {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\", key = \"db_password\",
+  keyring_exists = {keyring_exists}, key_exists = {key_exists},
+  env_var_pass_exists = {env_var_pass_exists})}"
+          )
+        )
+      } else {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\",
+            key = \"db_password\")}"
+          )
+        )
+      }
+    }
   }
 
-  keyring::keyring_unlock(keyring = "createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS"))
-
-  if (keyring::keyring_is_locked(keyring = "createslf")) {
-    cli::cli_abort("Keyring is locked. To unlock createslf keyring, please use {.fun keyring::keyring_unlock}")
+  if (env_var_pass_exists) {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = Sys.getenv("CREATESLF_KEYRING_PASS")
+    )
+  } else {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = rstudioapi::askForPassword(
+        prompt = "Enter the password for the keyring you just created."
+      )
+    )
   }
 
 
   # Create the connection
-  password_text <- stringr::str_glue("{dsn} password for user: {username}")
   db_connection <- odbc::dbConnect(
     odbc::odbc(),
     dsn = dsn,
     uid = username,
-    pwd = keyring::key_get(keyring = "createslf", service = "db_password")
+    pwd = keyring::key_get(
+      keyring = "createslf",
+      service = "db_password"
+    )
   )
 
   keyring::keyring_lock(keyring = "createslf")
 
   return(db_connection)
 }
+
+#' Interactively set up the keyring
+#'
+#' @description
+#' This is meant to be used with [phs_db_connection()], it can only be used
+#' interactively i.e. not in targets or in a workbench job.
+#'
+#' With the default options it will go through the steps to set up a keyring
+#' which can be used to supply passwords to [odbc::dbConnect()] (or others) in a
+#' secure and seamless way.
+#'
+#'  1. Create an .Renviron file in the project and add a password (for the
+#'  keyring) to it.
+#'  2. Create a keyring with the password - Since we have saved the password as
+#'  an environment variable it can be picked unlocked and used automatically.
+#'  3. Add the database password to the keyring.
+#'
+#'
+#' @param keyring Name of the keyring
+#' @param key Name of the key
+#' @param keyring_exists Does the keyring already exist
+#' @param key_exists Does the key already exist
+#' @param env_var_pass_exists Does the password for the keyring already exist
+#' in the environment.
+#'
+#' @return NULL (invisibly)
+#' @export
+setup_keyring <- function(
+    keyring = "createslf",
+    key = "db_password",
+    keyring_exists = FALSE,
+    key_exists = FALSE,
+    env_var_pass_exists = FALSE) {
+  # First we need the password as an environment variable
+  if (!env_var_pass_exists) {
+    if (Sys.getenv("CREATESLF_KEYRING_PASS") != "") {
+      cli::cli_alert_warning(
+        "{.env CREATESLF_KEYRING_PASS} already exists in the environment, you
+        will need to clean this up manually if it's not correct."
+      )
+      keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+    } else if (
+      any(stringr::str_detect(
+        readr::read_lines(".Renviron"),
+        "^CREATESLF_KEYRING_PASS\\s*?=\\s*?['\"].+?['\"]$"
+      ))
+
+    ) {
+      cli::cli_abort(
+        "Your {.file .Renviron} file looks ok, try restarting your session."
+      )
+    } else {
+      keyring_password <- rstudioapi::askForPassword(
+        prompt = stringr::str_glue(
+          "Enter a password for the '{keyring}' keyring, this should
+        not be your LDAP / database password."
+        )
+      )
+      if (is.null(keyring_password)) {
+        cli::cli_abort("No keyring password entered.")
+      }
+      if (!fs::file_exists(".Renviron")) {
+        cli::cli_alert_success("Creating an {.file .Renviron} file.")
+      }
+
+      renviron_text <- stringr::str_glue(
+        "CREATESLF_KEYRING_PASS = \"{keyring_password}\""
+      )
+
+      readr::write_lines(
+        x = renviron_text,
+        file = ".Renviron",
+        append = TRUE
+      )
+
+      cli::cli_alert_success(
+        "Added {.code {renviron_text}} to the {.file .Renviron} file."
+      )
+
+      cli::cli_alert_info("You will need to restart your R session.")
+    }
+  } else {
+    keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+  }
+
+  # If the keyring doesn't exist create it now.
+  if (!keyring_exists) {
+    if (keyring %in% keyring::keyring_list()[["keyring"]]) {
+      cli::cli_alert_warning(
+        "The {keyring} keyring already exists, you will be asked to
+        overwrite it."
+      )
+    }
+    keyring::keyring_create(
+      keyring = keyring,
+      password = keyring_password
+    )
+
+    cli::cli_alert_success(
+      "Created the '{keyring}' keyring with {.fun keyring::keyring_create}."
+    )
+  }
+
+  # If we just created the keyring it will already be unlocked
+  keyring::keyring_unlock(
+    keyring = keyring,
+    password = keyring_password
+  )
+
+  # Now add the password to the keyring
+  if (!key_exists) {
+    keyring::key_set(
+      keyring = keyring,
+      service = key,
+      prompt = "Enter you LDAP password for database connections."
+    )
+
+    cli::cli_alert_success(
+      "Added the '{key}' key to the '{keyring}' keyring with
+      {.fun keyring::keyring_set}."
+    )
+  }
+
+  keyring::keyring_lock(keyring = keyring)
+
+  cli::cli_alert_success(
+    "The keyring should now be set up correctly."
+  )
+
+  return(invisible(NULL))
+}
diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R
index a242aee42..9e7d06dcd 100644
--- a/R/get_existing_data_for_tests.R
+++ b/R/get_existing_data_for_tests.R
@@ -8,13 +8,16 @@
 #' new data which the SLF data will be compared to.
 #' @param file_version whether to test against the "episode" file (the default)
 #' or the "individual" file.
+#' @param anon_chi Default set as FALSE. For use in episode tests where
+#' we want anon_chi instead of chi.
 #'
 #' @return a [tibble][tibble::tibble-package] from the
 #' SLF with the relevant recids and variables.
 #'
 #' @family test functions
 #' @seealso produce_source_extract_tests
-get_existing_data_for_tests <- function(new_data, file_version = "episode") {
+#' @export
+get_existing_data_for_tests <- function(new_data, file_version = "episode", anon_chi = FALSE) {
   file_version <- match.arg(file_version, c("episode", "individual"))
 
   year <- new_data %>%
@@ -32,6 +35,9 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
       "anon_chi",
       dplyr::intersect(slfhelper::ep_file_vars, tolower(names(new_data)))
     )
+    if ("hscp" %in% names(new_data)) {
+      variable_names <- c("hscp2018", variable_names)
+    }
   } else if (file_version == "individual") {
     variable_names <- c(
       "anon_chi",
@@ -43,14 +49,21 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
     slf_data <- suppressMessages(slfhelper::read_slf_episode(
       year = year,
       recids = recids,
-      columns = variable_names
+      col_select = variable_names
     ))
   } else {
     slf_data <- suppressMessages(slfhelper::read_slf_individual(
       year = year,
-      columns = variable_names
+      col_select = variable_names
     ))
   }
 
-  return(slfhelper::get_chi(slf_data))
+  if (anon_chi == FALSE) {
+    slf_data <- slf_data %>%
+      slfhelper::get_chi()
+  } else {
+    slf_data <- slf_data
+  }
+
+  return(slf_data)
 }
diff --git a/R/get_file_paths.R b/R/get_file_paths.R
index 229bebf39..b65015e5c 100644
--- a/R/get_file_paths.R
+++ b/R/get_file_paths.R
@@ -72,7 +72,7 @@ find the latest file with {.arg file_name_regexp}",
     }
 
     if (!fs::file_exists(file_path) && check_mode != "exists") {
-      if (is.null(create) && check_mode == "write" |
+      if (is.null(create) && check_mode == "write" ||
         !is.null(create) && create == TRUE) {
         # The file doesn't exist but we do want to create it
         fs::file_create(file_path)
diff --git a/R/get_fy_dates.R b/R/get_fy_dates.R
index 1a4bf6f45..257a14488 100644
--- a/R/get_fy_dates.R
+++ b/R/get_fy_dates.R
@@ -20,9 +20,9 @@ start_fy <- function(year, format = c("fyyear", "alternate")) {
   format <- match.arg(format)
 
   if (format == "fyyear") {
-    start_fy <- lubridate::make_date(convert_fyyear_to_year(year), 4, 1)
+    start_fy <- lubridate::make_date(convert_fyyear_to_year(year), 4L, 1L)
   } else if (format == "alternate") {
-    start_fy <- lubridate::make_date(year, 4, 1)
+    start_fy <- lubridate::make_date(year, 4L, 1L)
   }
 
   return(start_fy)
@@ -47,14 +47,14 @@ end_fy <- function(year, format = c("fyyear", "alternate")) {
     format <- "fyyear"
   }
 
-  year <- as.numeric(paste0("20", substr(year, 3, 4)))
+  year <- as.numeric(paste0("20", stringr::str_sub(year, 3L, 4L)))
 
   format <- match.arg(format)
 
   if (format == "fyyear") {
-    end_fy <- lubridate::make_date(year, 3, 31)
+    end_fy <- lubridate::make_date(year, 3L, 31L)
   } else if (format == "alternate") {
-    end_fy <- lubridate::make_date(year + 1L, 3, 31)
+    end_fy <- lubridate::make_date(year + 1L, 3L, 31L)
   }
 
   return(end_fy)
@@ -85,9 +85,9 @@ midpoint_fy <- function(year, format = c("fyyear", "alternate")) {
   check_year_format(year, format = "fyyear")
 
   if (format == "fyyear") {
-    midpoint_fy <- lubridate::make_date(convert_fyyear_to_year(year), 9, 30)
+    midpoint_fy <- lubridate::make_date(convert_fyyear_to_year(year), 9L, 30L)
   } else if (format == "alternate") {
-    midpoint_fy <- lubridate::make_date(year, 9, 30)
+    midpoint_fy <- lubridate::make_date(year, 9L, 30L)
   }
 
   return(midpoint_fy)
@@ -113,7 +113,7 @@ next_fy <- function(year, format = c("fyyear", "alternate")) {
 
   check_year_format(year, format = "fyyear")
 
-  fy <- as.integer(substr(year, 1, 2))
+  fy <- as.integer(stringr::str_sub(year, 1L, 2L))
 
   next_fy <- paste0(fy + 1L, fy + 2L)
 
diff --git a/R/get_it_extract_paths.R b/R/get_it_extract_paths.R
index 4e44807b4..3c4dc54c3 100644
--- a/R/get_it_extract_paths.R
+++ b/R/get_it_extract_paths.R
@@ -104,10 +104,10 @@ get_it_prescribing_path <- function(year, it_reference = NULL, ...) {
 check_it_reference <- function(it_reference) {
   if (stringr::str_starts(it_reference, stringr::fixed("SCTASK"))) {
     # If the 'full' reference has been supplied trim to just the number
-    it_reference <- stringr::str_sub(it_reference, start = 7, end = 14)
+    it_reference <- stringr::str_sub(it_reference, start = 7L, end = 14L)
   }
 
-  if (stringr::str_detect(it_reference, "[0-9]{7}", negate = TRUE)) {
+  if (stringr::str_detect(it_reference, "^[0-9]{7}$", negate = TRUE)) {
     cli::cli_abort(
       c("x" = "{.arg it_reference} must be exactly 7 numbers."),
       call = rlang::caller_env()
diff --git a/R/get_sc_demog_path.R b/R/get_sc_demog_path.R
deleted file mode 100644
index 89658c356..000000000
--- a/R/get_sc_demog_path.R
+++ /dev/null
@@ -1,23 +0,0 @@
-#' Social Care Demographic Lookup File Path
-#'
-#' @description Get the file path for the Social Care Demographic lookup file
-#'
-#' @param update The update month to use,
-#' defaults to [latest_update()]
-#'
-#' @param ... additional arguments passed to [get_file_path()]
-#'
-#' @return The path to the social care demographic file
-#' as an [fs::path()]
-#' @export
-#' @family social care lookup file paths
-#' @seealso [get_file_path()] for the generic function.
-get_sc_demog_lookup_path <- function(update = latest_update(), ...) {
-  sc_demog_lookup_path <- get_file_path(
-    directory = fs::path(get_slf_dir(), "Social_care"),
-    file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"),
-    ...
-  )
-
-  return(sc_demog_lookup_path)
-}
diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R
new file mode 100644
index 000000000..5add38b08
--- /dev/null
+++ b/R/get_sc_lookup_paths.R
@@ -0,0 +1,48 @@
+#' Social Care Demographic Lookup File Path
+#'
+#' @description Get the file path for the Social Care Demographic lookup file
+#'
+#' @param update The update month to use,
+#' defaults to [latest_update()]
+#'
+#' @param ... additional arguments passed to [get_file_path()]
+#'
+#' @return The path to the social care demographic file
+#' as an [fs::path()]
+#' @export
+#' @family social care lookup file paths
+#' @seealso [get_file_path()] for the generic function.
+get_sc_demog_lookup_path <- function(update = latest_update(), ...) {
+  sc_demog_lookup_path <- get_file_path(
+    directory = fs::path(get_slf_dir(), "Social_care"),
+    file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"),
+    ...
+  )
+
+  return(sc_demog_lookup_path)
+}
+
+#' Social Care Client Lookup File Path
+#'
+#' @description Get the file path for the Social Care Client lookup file
+#'
+#' @param year Financial year.
+#' @param update The update month to use,
+#' defaults to [latest_update()]
+#'
+#' @param ... additional arguments passed to [get_file_path()]
+#'
+#' @return The path to the social care demographic file
+#' as an [fs::path()]
+#' @export
+#' @family social care lookup file paths
+#' @seealso [get_file_path()] for the generic function.
+get_sc_client_lookup_path <- function(year, update = latest_update(), ...) {
+  sc_client_lookup_path <- get_file_path(
+    directory = fs::path(get_slf_dir(), "Social_care"),
+    file_name = stringr::str_glue("sc_client_lookup_{year}_{update}.parquet"),
+    ...
+  )
+
+  return(sc_client_lookup_path)
+}
diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 89c6dc0b4..cbd3fd46e 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -10,57 +10,63 @@
 #' @export
 #'
 #' @family extract file paths
-get_source_extract_path <- function(year,
-                                    type = c(
-                                      "Acute",
-                                      "AE",
-                                      "AT",
-                                      "CH",
-                                      "Client",
-                                      "CMH",
-                                      "DD",
-                                      "Deaths",
-                                      "DN",
-                                      "GPOoH",
-                                      "HC",
-                                      "Homelessness",
-                                      "Maternity",
-                                      "MH",
-                                      "Outpatients",
-                                      "PIS",
-                                      "SDS"
-                                    ),
-                                    ...) {
+get_source_extract_path <- function(
+    year,
+    type = c(
+      "Acute",
+      "AE",
+      "AT",
+      "CH",
+      "CMH",
+      "DD",
+      "Deaths",
+      "DN",
+      "GPOoH",
+      "HC",
+      "Homelessness",
+      "Maternity",
+      "MH",
+      "Outpatients",
+      "PIS",
+      "SDS"
+    ),
+    ...) {
+  if (year %in% type) {
+    cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.")
+  }
+
+  year <- check_year_format(year)
+
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
-    return(NA)
+    return(get_dummy_boxi_extract_path())
   }
 
-  file_name <- dplyr::case_when(
-    type == "Acute" ~ "acute_for_source",
-    type == "AE" ~ "a&e_for_source",
-    type == "AT" ~ "Alarms-Telecare-for-source",
-    type == "CH" ~ "care_home_for_source",
-    type == "CMH" ~ "CMH_for_source",
-    type == "Client" ~ "client_for_source",
-    type == "DD" ~ "DD_for_source",
-    type == "Deaths" ~ "deaths_for_source",
-    type == "DN" ~ "DN_for_source",
-    type == "GPOoH" ~ "GP_OOH_for_source",
-    type == "HC" ~ "Home_Care_for_source",
-    type == "Homelessness" ~ "homelessness_for_source",
-    type == "Maternity" ~ "maternity_for_source",
-    type == "MH" ~ "mental_health_for_source",
-    type == "DD" ~ "DD_for_source",
-    type == "Outpatients" ~ "outpatients_for_source",
-    type == "PIS" ~ "prescribing_file_for_source",
-    type == "SDS" ~ "SDS-for-source"
-  )
+  file_name <- dplyr::case_match(
+    type,
+    "Acute" ~ "acute_for_source",
+    "AE" ~ "a_and_e_for_source",
+    "AT" ~ "alarms-telecare-for-source",
+    "CH" ~ "care_home_for_source",
+    "CMH" ~ "cmh_for_source",
+    "DD" ~ "delayed_discharge_for_source",
+    "Deaths" ~ "deaths_for_source",
+    "DN" ~ "district_nursing_for_source",
+    "GPOoH" ~ "gp_ooh_for_source",
+    "HC" ~ "home_care_for_source",
+    "Homelessness" ~ "homelessness_for_source",
+    "Maternity" ~ "maternity_for_source",
+    "MH" ~ "mental_health_for_source",
+    "Outpatients" ~ "outpatients_for_source",
+    "PIS" ~ "prescribing_for_source",
+    "SDS" ~ "sds_for_source"
+  ) %>%
+    stringr::str_glue("-{year}.parquet")
 
   source_extract_path <- get_file_path(
     directory = get_year_dir(year),
-    file_name = stringr::str_glue("{file_name}-20{year}.parquet"),
+    file_name = file_name,
     ...
   )
 
diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R
index 2fd1a69f9..157160ed4 100644
--- a/R/get_sparra_hhg_paths.R
+++ b/R/get_sparra_hhg_paths.R
@@ -10,6 +10,10 @@
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_hhg_path <- function(year, ...) {
+  if (!check_year_valid(year, "HHG")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   hhg_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "HHG"),
     file_name = stringr::str_glue("HHG-20{year}.parquet"),
@@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) {
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_sparra_path <- function(year, ...) {
+  if (!check_year_valid(year, "SPARRA")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   sparra_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "SPARRA"),
     file_name = stringr::str_glue("SPARRA-20{year}.parquet"),
diff --git a/R/get_temp_file_paths.R b/R/get_temp_file_paths.R
deleted file mode 100644
index 2a6bcbaee..000000000
--- a/R/get_temp_file_paths.R
+++ /dev/null
@@ -1,117 +0,0 @@
-#' Get a temporary version of the SLF
-#'
-#' @param year The financial year
-#' @param temp_version The temp version e.g. 1 or 7
-#' @param file_version Episode or Individual file
-#'
-#' @return The path to the file (`.rds`)
-get_slf_temp_path <-
-  function(year,
-           temp_version,
-           file_version = c("episode", "individual")) {
-    year <- check_year_format(year)
-    file_version <- match.arg(file_version)
-
-    base_dir <- fs::path(
-      "/",
-      "conf",
-      "sourcedev",
-      "Source_Linkage_File_Updates"
-    )
-
-    year_dir <- fs::path(base_dir, year)
-
-    temp_files_available <- fs::dir_ls(year_dir,
-      glob = "*temp-*"
-    ) %>%
-      stringr::str_match(
-        pattern = stringr::str_glue(
-          "temp-source-{file_version}-file-(?<version>[1-9])-{year}\\.rds"
-        )
-      ) %>%
-      magrittr::extract(, "version")
-
-    temp_files_available <-
-      temp_files_available[!is.na(temp_files_available)]
-
-    if (length(temp_files_available) == 0L) {
-      years_available <- fs::dir_ls(
-        base_dir,
-        recurse = TRUE,
-        glob = stringr::str_glue("*temp-source-{file_version}*")
-      ) %>%
-        stringr::str_match(
-          pattern = stringr::str_glue(
-            "temp-source-{file_version}-file-[1-9]-(?<year>[0-9]{{4}})\\.rds"
-          )
-        ) %>%
-        magrittr::extract(, "year") %>%
-        unique()
-
-      years_formatted <-
-        cli::cli_vec(years_available[!is.na(years_available)],
-          style = list("vec-last" = " or ")
-        )
-
-      cli::cli_abort(
-        c(
-          "No temp {file_version} files for {.val {year}}",
-          "{cli::qty(years_available)}{?There is only/You can choose from} {.val {years_formatted}}."
-        ),
-        call = rlang::caller_env()
-      )
-    }
-
-    if (!(temp_version %in% temp_files_available)) {
-      temp_files_formatted <- cli::cli_vec(temp_files_available,
-        style = list("vec-last" = " or ")
-      )
-
-      cli::cli_abort(
-        c(
-          "Temp {file_version} file {.val {temp_version}} isn't available for {.val {year}}.",
-          "{cli::qty(temp_files_available)}{?There is only/You can choose from} {.val {temp_files_formatted}}."
-        ),
-        call = rlang::caller_env()
-      )
-    }
-
-    # Do check to see which temp versions exist for the given year
-    # Return nice error if it doesn't work
-
-    file_name <-
-      stringr::str_glue("temp-source-{file_version}-file-{temp_version}-{year}.rds")
-
-    file_path <- get_file_path(
-      directory = year_dir,
-      file_name = file_name
-    )
-
-    return(file_path)
-  }
-
-#' Get a temporary version of the SLF episode file
-#'
-#' @inherit get_slf_temp_path
-#'
-#' @export
-get_slf_ep_temp_path <- function(year, temp_version) {
-  get_slf_temp_path(
-    year = year,
-    temp_version = temp_version,
-    file_version = "episode"
-  )
-}
-
-#' Get a temporary version of the SLF individual file
-#'
-#' @inherit get_slf_temp_path
-#'
-#' @export
-get_slf_indiv_temp_path <- function(year, temp_version) {
-  get_slf_temp_path(
-    year = year,
-    temp_version = temp_version,
-    file_version = "individual"
-  )
-}
diff --git a/R/gzip_files.R b/R/gzip_files.R
index b6cc0a2b0..9a665fbc0 100644
--- a/R/gzip_files.R
+++ b/R/gzip_files.R
@@ -17,7 +17,7 @@ gzip_files <- function(
   )
 
   n_unzipped_files <- length(unzipped_files)
-  if (n_unzipped_files > 0) {
+  if (n_unzipped_files > 0L) {
     cli::cli_inform(c(
       "i" = "{cli::qty(n_unzipped_files)}There {?is/are} {n_unzipped_files}
       uncompressed file{?s} for {year}, which will be compressed with
diff --git a/R/is_date_in_fyyear.R b/R/is_date_in_fyyear.R
index 44e816893..924e21e74 100644
--- a/R/is_date_in_fyyear.R
+++ b/R/is_date_in_fyyear.R
@@ -43,7 +43,7 @@ is_date_in_fyyear <- function(fyyear, date, date_end = NULL) {
     }
 
     # Check that date_end always comes after date (or all date_end is NA)
-    if (any(date > date_end, na.rm = TRUE) & !all(is.na(date_end))) {
+    if (any(date > date_end, na.rm = TRUE) && !all(is.na(date_end))) {
       first_error <- which.max(date > date_end)
 
       cli::cli_abort(
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 694d2e2b9..89bcbbe13 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -2,16 +2,14 @@
 #'
 #' @param data Episode file data
 #' @param year financial year, e.g. '1920'
-#' @param slf_deaths_lookup_path Path to slf deaths lookup.
+#' @param slf_deaths_lookup The SLF deaths lookup.
 #'
 #' @return The data including the deaths lookup matched
 #'         on to the episode file.
 join_deaths_data <- function(
     data,
     year,
-    slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) {
-  slf_deaths_lookup <- read_file(slf_deaths_lookup_path)
-
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) {
   return(
     data %>%
       dplyr::left_join(
diff --git a/R/last_date_month.R b/R/last_date_month.R
index 979970f87..0fddacc81 100644
--- a/R/last_date_month.R
+++ b/R/last_date_month.R
@@ -11,5 +11,5 @@
 #'
 #' @family date functions
 last_date_month <- function(date) {
-  return(lubridate::ceiling_date(date, "month") - lubridate::days(1))
+  return(lubridate::ceiling_date(date, "month") - lubridate::days(1L))
 }
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index 637e5b6c7..f83f31325 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -5,19 +5,23 @@
 #'
 #' @param data episode files
 #' @param year financial year, e.g. '1920'
+#' @param ltc_data The LTC data for the year
 #'
 #' @return data matched with long term conditions
-match_on_ltcs <- function(data, year) {
+match_on_ltcs <- function(
+    data,
+    year,
+    ltc_data = read_file(get_ltcs_path(year))) {
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
-    read_file(get_ltcs_path(year)),
+    ltc_data,
     by = "chi",
     suffix = c("", "_ltc")
   ) %>%
     dplyr::mutate(
       # Replace any NA values with 0 for the LTC flags
-      dplyr::across("arth":"digestive", ~ tidyr::replace_na(., 0)),
+      dplyr::across("arth":"digestive", ~ tidyr::replace_na(.x, 0L)),
       # Use the postcode from the LTC file if it's otherwise missing
       postcode = dplyr::if_else(is.na(.data$postcode),
         .data$postcode_ltc,
diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R
index 2efc25dd8..5d97d705f 100644
--- a/R/process_costs_rmd.R
+++ b/R/process_costs_rmd.R
@@ -14,7 +14,8 @@ process_costs_rmd <- function(file_name) {
     stringr::fixed("Rmd", ignore_case = TRUE)
   )) {
     cli::cli_abort(
-      "{.arg file_name} must be an {.code .Rmd} not a {.code .{fs::path_ext(file_name)}}."
+      "{.arg file_name} must be an {.code .Rmd} not a
+      {.code .{fs::path_ext(file_name)}}."
     )
   }
 
@@ -34,7 +35,12 @@ process_costs_rmd <- function(file_name) {
 
   output_file <- get_file_path(
     directory = output_dir,
-    file_name = fs::path_ext_set(stringr::str_glue("{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"), "html"),
+    file_name = fs::path_ext_set(
+      stringr::str_glue(
+        "{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"
+      ),
+      "html"
+    ),
     check_mode = "write"
   )
 
@@ -55,7 +61,7 @@ process_costs_rmd <- function(file_name) {
 #'
 #' @description This will read and process the
 #' District Nursing cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @param file_path Path to the cost lookup.
 #'
@@ -73,7 +79,7 @@ process_costs_dn_rmd <- function(file_path = get_dn_costs_path()) {
 #'
 #' @description This will read and process the
 #' care homes cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -91,7 +97,7 @@ process_costs_ch_rmd <- function(file_path = get_ch_costs_path()) {
 #'
 #' @description This will read and process the
 #' GP ooh cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -109,7 +115,7 @@ process_costs_gp_ooh_rmd <- function(file_path = get_gp_ooh_costs_path()) {
 #'
 #' @description This will read and process the
 #' Home Care cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index 7d47d0ef4..70ff29370 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' acute extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -53,17 +53,15 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
     convert_monthly_rows_to_vars(.data$costmonthnum, .data$cost_total_net, .data$yearstay) %>%
     # add yearstay and cost_total_net variables
     dplyr::mutate(
-      yearstay = rowSums(dplyr::across(tidyselect::ends_with("_beddays"))),
-      cost_total_net = rowSums(dplyr::across(tidyselect::ends_with("_cost")))
+      yearstay = rowSums(dplyr::pick(tidyselect::ends_with("_beddays"))),
+      cost_total_net = rowSums(dplyr::pick(tidyselect::ends_with("_cost")))
     ) %>%
     # Add oldtadm as a factor with labels
     dplyr::mutate(oldtadm = factor(.data$oldtadm,
       levels = 0L:8L
     ))
 
-
-  ## save outfile ---------------------------------------
-  outfile <- acute_clean %>%
+  acute_processed <- acute_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,10 +111,11 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Acute", check_mode = "write"))
+    write_file(
+      acute_processed,
+      get_source_extract_path(year, "Acute", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(acute_processed)
 }
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index 1be6efe39..95dfd99be 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' A&E extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -35,6 +35,10 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
       .data$postcode_chi,
       .data$postcode_epi
     )) %>%
+    # A&E data has postcode in PC8 format but we need it in PC7 format
+    dplyr::mutate(
+      postcode = phsmethods::format_postcode(.data$postcode, "pc7")
+    ) %>%
     ## recode cypher HB codes ##
     dplyr::mutate(
       dplyr::across(c("hbtreatcode", "hbrescode"), ~ dplyr::case_when(
@@ -233,9 +237,7 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
       .data$keytime2
     )
 
-
-  # Save outfile----------------------------------------
-  outfile <- matched_ae_data %>%
+  ae_processed <- matched_ae_data %>%
     dplyr::select(
       "year",
       "recid",
@@ -290,10 +292,11 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "AE", check_mode = "write"))
+    write_file(
+      ae_processed,
+      get_source_extract_path(year, "AE", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(ae_processed)
 }
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index 15cd79809..9a0745a04 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Alarms Telecare extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index 560011f84..cbf6d417c 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Care Home extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The full processed data which will be selected from to create
 #' the year specific data.
@@ -43,7 +43,7 @@ process_extract_care_home <- function(
     ) %>%
     # remove any episodes where the latest submission was before the current year
     dplyr::filter(
-      substr(.data$sc_latest_submission, 1, 4) >= convert_fyyear_to_year(year)
+      substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year)
     ) %>%
     # Match to client data
     dplyr::left_join(
diff --git a/R/process_extract_cmh.R b/R/process_extract_cmh.R
index 0b1ba0f19..a2adad75e 100644
--- a/R/process_extract_cmh.R
+++ b/R/process_extract_cmh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' CMH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -44,9 +44,7 @@ process_extract_cmh <- function(data,
     # create blank diag 6
     dplyr::mutate(diag6 = NA)
 
-  # Outfile --------------------------------------------
-
-  outfile <- cmh_clean %>%
+  cmh_processed <- cmh_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -73,10 +71,11 @@ process_extract_cmh <- function(data,
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "CMH", check_mode = "write"))
+    write_file(
+      cmh_processed,
+      get_source_extract_path(year, "CMH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(cmh_processed)
 }
diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 7262c1df6..6dc175cb8 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Consultations extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_delayed_discharges.R b/R/process_extract_delayed_discharges.R
index 29f37eb29..3c56807f9 100644
--- a/R/process_extract_delayed_discharges.R
+++ b/R/process_extract_delayed_discharges.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' delayed discharges extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R
index a1b3bf816..9d1df62a6 100644
--- a/R/process_extract_district_nursing.R
+++ b/R/process_extract_district_nursing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' District Nursing extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index 4add41cfa..2b536878a 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param data_list A list containing the extracts.
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 382521b5d..874ad899c 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Home Care extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
@@ -30,9 +30,13 @@ process_extract_home_care <- function(
 
   hc_data <- data %>%
     # select episodes for FY
-    dplyr::filter(is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2)) %>%
+    dplyr::filter(
+      is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2)
+    ) %>%
     # remove any episodes where the latest submission was before the current year
-    dplyr::filter(substr(.data$sc_latest_submission, 1, 4) >= convert_fyyear_to_year(year)) %>%
+    dplyr::filter(
+      substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year)
+    ) %>%
     # Match to client data
     dplyr::left_join(client_lookup, by = c("sending_location", "social_care_id")) %>%
     dplyr::mutate(year = year)
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 36528cfa7..f4fb7d3e5 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' homelessness extract, it will return the final data
-#' and optionally write it out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process from [read_extract_homelessness()].
 #' @param year The year to process, in FY format.
@@ -43,7 +43,7 @@ process_extract_homelessness <- function(
     ) %>%
     dplyr::mutate(
       dplyr::across(
-        c("financial_difficulties_debt_unemployment":"refused"),
+        "financial_difficulties_debt_unemployment":"refused",
         ~ tidyr::replace_na(.x, 9L)
       ),
       hl1_reason_ftm = paste0(
@@ -146,13 +146,14 @@ process_extract_homelessness <- function(
     )
 
   if (write_to_disk) {
-    final_data %>%
-      write_file(get_file_path(
-        get_year_dir(year),
-        stringr::str_glue("homelessness_for_source-20{year}"),
-        ext = "rds",
+    write_file(
+      final_data,
+      get_source_extract_path(
+        year = year,
+        type = "Homelessness",
         check_mode = "write"
-      ))
+      )
+    )
   }
 
   return(final_data)
diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R
index 04fa46ced..64fa4e205 100644
--- a/R/process_extract_maternity.R
+++ b/R/process_extract_maternity.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' maternity extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -63,10 +63,7 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  # Save outfile------------------------------------------------
-
-  outfile <- maternity_clean %>%
+  maternity_processed <- maternity_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,12 +110,11 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Maternity", check_mode = "write")
-      )
+    write_file(
+      maternity_processed,
+      get_source_extract_path(year, "Maternity", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(maternity_processed)
 }
diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R
index 108c14c61..ffea63d28 100644
--- a/R/process_extract_mental_health.R
+++ b/R/process_extract_mental_health.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' mental health extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -32,8 +32,10 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
     dplyr::mutate(gpprac = convert_eng_gpprac_to_dummy(.data$gpprac)) %>%
     # cij_ipdc
     dplyr::mutate(
-      cij_ipdc = dplyr::if_else(.data$cij_inpatient == "MH", "I", "NA"),
-      cij_ipdc = dplyr::na_if(.data$cij_ipdc, "NA")
+      cij_ipdc = dplyr::na_if(
+        dplyr::if_else(.data$cij_inpatient == "MH", "I", "NA"),
+        "NA"
+      )
     ) %>%
     dplyr::select(-.data$cij_inpatient) %>%
     # cij_admtype recode unknown to 99
@@ -52,9 +54,9 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
     ) %>%
     dplyr::mutate(
       # yearstay
-      yearstay = rowSums(dplyr::across(tidyselect::ends_with("_beddays"))),
+      yearstay = rowSums(dplyr::pick(tidyselect::ends_with("_beddays"))),
       # cost total net
-      cost_total_net = rowSums(dplyr::across(tidyselect::ends_with("_cost"))),
+      cost_total_net = rowSums(dplyr::pick(tidyselect::ends_with("_cost"))),
       # total length of stay
       stay = calculate_stay(
         .data$year,
@@ -65,10 +67,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
       smrtype = add_smr_type(.data$recid)
     )
 
-
-  # Outfile  ---------------------------------------
-
-  outfile <- mh_clean %>%
+  mh_processed <- mh_clean %>%
     dplyr::arrange(.data$chi, .data$record_keydate1) %>%
     dplyr::select(
       "year",
@@ -116,10 +115,11 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    outfile %>%
-      # Save as rds file
-      write_file(get_source_extract_path(year, "MH", check_mode = "write"))
+    write_file(
+      mh_processed,
+      get_source_extract_path(year, "MH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(mh_processed)
 }
diff --git a/R/process_extract_ooh_diagnosis.R b/R/process_extract_ooh_diagnosis.R
index 128c6c772..f2afd634e 100644
--- a/R/process_extract_ooh_diagnosis.R
+++ b/R/process_extract_ooh_diagnosis.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Diagnosis extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_ooh_outcomes.R b/R/process_extract_ooh_outcomes.R
index 6a14bced5..f188e6de1 100644
--- a/R/process_extract_ooh_outcomes.R
+++ b/R/process_extract_ooh_outcomes.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Outcomes extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_outpatients.R b/R/process_extract_outpatients.R
index 39b421ab4..341ee0f1a 100644
--- a/R/process_extract_outpatients.R
+++ b/R/process_extract_outpatients.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' outpatients extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -49,11 +49,7 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  ## save outfile ---------------------------------------
-
-  outfile <-
-    outpatients_clean %>%
+  outpatients_processed <- outpatients_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -89,12 +85,11 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Outpatients", check_mode = "write")
-      )
+    write_file(
+      outpatients_processed,
+      get_source_extract_path(year, "Outpatients", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(outpatients_processed)
 }
diff --git a/R/process_extract_prescribing.R b/R/process_extract_prescribing.R
index 776299d47..68c388b83 100644
--- a/R/process_extract_prescribing.R
+++ b/R/process_extract_prescribing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' prescribing extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -50,9 +50,10 @@ process_extract_prescribing <- function(data, year, write_to_disk = TRUE) {
   }
 
   if (write_to_disk) {
-    # Save as rds file
-    pis_clean %>%
-      write_file(get_source_extract_path(year, "PIS", check_mode = "write"))
+    write_file(
+      pis_clean,
+      get_source_extract_path(year, "PIS", check_mode = "write")
+    )
   }
 
   return(pis_clean)
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index a58651749..bd9e93a3f 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) SDS extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_lookup_gpprac.R b/R/process_lookup_gpprac.R
index 45773613f..2afe1affd 100644
--- a/R/process_lookup_gpprac.R
+++ b/R/process_lookup_gpprac.R
@@ -12,10 +12,11 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_lookup_gpprac <- function(open_data = get_gpprac_opendata(),
-                                  gpprac_ref_path = get_gpprac_ref_path(),
-                                  spd_path = get_spd_path(),
-                                  write_to_disk = TRUE) {
+process_lookup_gpprac <- function(
+    open_data = get_gpprac_opendata(),
+    gpprac_ref_path = get_gpprac_ref_path(),
+    spd_path = get_spd_path(),
+    write_to_disk = TRUE) {
   gpprac_ref_file <- read_file(path = gpprac_ref_path) %>%
     dplyr::select(
       "gpprac" = "praccode",
@@ -65,15 +66,11 @@ process_lookup_gpprac <- function(open_data = get_gpprac_opendata(),
     ) %>%
     dplyr::mutate(
       lca = convert_ca_to_lca(.data$ca2018),
-      hbpraccode = dplyr::if_else(
-        .data$gpprac %in% c(99942L, 99957L, 99961L, 99981L, 99999L),
-        "S08200003",
-        .data$hbpraccode
-      ),
-      hbpraccode = dplyr::if_else(
-        .data$gpprac == 99995L,
-        "S08200001",
-        .data$hbpraccode
+      hbpraccode = dplyr::case_match(
+        .data$gpprac,
+        c(99942L, 99957L, 99961L, 99981L, 99999L) ~ "S08200003",
+        99995L ~ "S08200001",
+        .default = .data$hbpraccode
       )
     )
 
diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R
new file mode 100644
index 000000000..c0138d10a
--- /dev/null
+++ b/R/process_lookup_homelessness.R
@@ -0,0 +1,108 @@
+#' Create a homelessness lookup
+#' @description Reads in the homelessness extract and creates
+#' a lookup at CHI level, with one row per application start
+#' and end date for each CHI.
+#'
+#' @param homelessness_data the processed homelessness data for
+#' the financial year (created with [process_extract_homelessness()]).
+#' @inheritParams create_episode_file
+#'
+#' @return the final data as a [tibble][tibble::tibble-package].
+#' @export
+#' @family process extracts
+create_homelessness_lookup <- function(
+    year,
+    homelessness_data = read_file(get_source_extract_path(year, "Homelessness"))) {
+  homelessness_lookup <- homelessness_data %>%
+    dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>%
+    tidyr::drop_na(.data$chi) %>%
+    dplyr::mutate(hl1_in_fy = 1L)
+
+  return(homelessness_lookup)
+}
+
+
+#' Add 'homelessness in FY' flag
+#' @description Add a flag to the data indicating if the CHI
+#' had a homelessness episode within the financial year.
+#'
+#' @param data The data to add the flag to - the episode
+#' or individual file.
+#' @param lookup The homelessness lookup created by [create_homelessness_lookup()]
+#' @inheritParams create_episode_file
+#'
+#' @return the final data as a [tibble][tibble::tibble-package]
+#' @export
+add_homelessness_flag <- function(data, year,
+                                  lookup = create_homelessness_lookup(year)) {
+  ## need to decide which recids this relates to
+  data <- data %>%
+    dplyr::left_join(
+      lookup %>%
+        dplyr::distinct(.data$chi, .data$hl1_in_fy),
+      by = "chi",
+      relationship = "many-to-one"
+    ) %>%
+    dplyr::mutate(hl1_in_fy = tidyr::replace_na(.data$hl1_in_fy, 0L))
+
+  return(data)
+}
+
+
+#' Add homelessness date flags episode
+#'
+#' @description Add flags to episodes indicating if they
+#' have had at least one active homelessness application in
+#' 6 months before, 6 months after, or during an episode.
+#'
+#' @inheritParams add_homelessness_flag
+#'
+#' @return the final data as a [tibble][tibble::tibble-package].
+#' @export
+add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) {
+  lookup <- lookup %>%
+    dplyr::filter(!(is.na(.data$record_keydate2))) %>%
+    dplyr::rename(
+      application_date = .data$record_keydate1,
+      end_date = .data$record_keydate2
+    ) %>%
+    dplyr::mutate(
+      six_months_pre_app = .data$application_date - lubridate::days(180),
+      six_months_post_app = .data$end_date + lubridate::days(180)
+    ) %>%
+    dplyr::distinct(.data$chi, .data$hl1_in_fy, .data$six_months_pre_app, .data$six_months_post_app, .data$application_date, .data$end_date)
+
+
+  homeless_flag <- data %>%
+    dplyr::select(.data$chi, .data$record_keydate1, .data$record_keydate2, .data$recid) %>%
+    dplyr::filter(.data$recid %in% c("00B", "01B", "GLS", "DD", "02B", "04B", "AE2", "OoH", "DN", "CMH", "NRS")) %>%
+    dplyr::distinct() %>%
+    dplyr::left_join(
+      lookup,
+      by = "chi", relationship = "many-to-many"
+    ) %>%
+    dplyr::filter(.data$hl1_in_fy == 1) %>%
+    dplyr::mutate(hl1_6before_ep = ifelse((.data$end_date <= .data$record_keydate2) &
+      (.data$record_keydate1 <= .data$six_months_post_app), 1, 0)) %>%
+    dplyr::mutate(hl1_6after_ep = ifelse((.data$six_months_pre_app <= .data$record_keydate2) &
+      (.data$record_keydate1 <= .data$application_date), 1, 0)) %>%
+    dplyr::mutate(hl1_during_ep = ifelse((.data$application_date <= .data$record_keydate2) &
+      (.data$record_keydate1 <= .data$end_date), 1, 0)) %>%
+    dplyr::group_by(.data$chi, .data$recid, .data$record_keydate1, .data$record_keydate2) %>%
+    dplyr::summarise(
+      hl1_6before_ep = max(.data$hl1_6before_ep),
+      hl1_6after_ep = max(.data$hl1_6after_ep),
+      hl1_during_ep = max(.data$hl1_during_ep)
+    ) %>%
+    dplyr::ungroup()
+
+
+  data <- data %>%
+    dplyr::left_join(
+      homeless_flag,
+      by = c("chi", "record_keydate1", "record_keydate2", "recid"),
+      relationship = "many-to-one"
+    )
+
+  return(data)
+}
diff --git a/R/process_lookup_ltc.R b/R/process_lookup_ltc.R
index 80a4b8706..8ea33da48 100644
--- a/R/process_lookup_ltc.R
+++ b/R/process_lookup_ltc.R
@@ -17,20 +17,18 @@ process_lookup_ltc <- function(data, year, write_to_disk = TRUE) {
   ltc_flags <- data %>%
     dplyr::mutate(dplyr::across(
       tidyselect::ends_with("date"),
-      list(flag = ~ dplyr::if_else(is.na(.x) | .x > end_fy(year), 0L, 1L))
+      list(flag = ~ as.integer(!(is.na(.x) | .x > end_fy(year))))
     )) %>%
     dplyr::rename_with(
       .cols = tidyselect::ends_with("flag"),
       .fn = ~ stringr::str_remove(.x, "_date_flag")
     )
 
-  # Save Outfile---------------------------------------------
-
   if (write_to_disk) {
-    # Save .rds file
-    ltc_flags %>%
-      dplyr::arrange(.data$chi) %>%
-      write_file(get_ltcs_path(year, check_mode = "write"))
+    write_file(
+      ltc_flags,
+      get_ltcs_path(year, check_mode = "write")
+    )
   }
 
   return(ltc_flags)
diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R
index 878c51f37..f9f1d47f4 100644
--- a/R/process_lookup_postcode.R
+++ b/R/process_lookup_postcode.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' postcode lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param simd_path Path to SIMD lookup.
 #' @param locality_path Path to locality lookup.
@@ -53,21 +53,19 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
       tidyselect::matches("datazone\\d{4}$")
     ) %>%
     dplyr::mutate(
-      locality = tidyr::replace_na("locality", "No Locality Information")
+      locality = tidyr::replace_na(.data$locality, "No Locality Information")
     )
 
 
   # Join data together  -----------------------------------------------------
-  data <-
-    dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
+  data <- dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
     dplyr::rename(postcode = "pc7") %>%
     dplyr::left_join(locality_file, by = "datazone2011")
 
 
   # Finalise output -----------------------------------------------------
 
-  outfile <-
-    data %>%
+  slf_pc_lookup <- data %>%
     dplyr::select(
       "postcode",
       "lca",
@@ -89,13 +87,12 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
       tidyselect::matches("ur2_\\d{4}$")
     )
 
-
-  # Save out ----------------------------------------------------------------
   if (write_to_disk) {
-    outfile %>%
-      # Save .rds file
-      write_file(get_slf_postcode_path(check_mode = "write"))
+    write_file(
+      slf_pc_lookup,
+      get_slf_postcode_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(slf_pc_lookup)
 }
diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 1874c2b5a..845570b93 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care client lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process
@@ -108,10 +108,7 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
       .fn = ~ paste0("sc_", .x)
     )
 
-
-  ## save outfile ---------------------------------------
-  outfile <-
-    client_clean %>%
+  sc_client_lookup <- client_clean %>%
     # reorder
     dplyr::select(
       "sending_location",
@@ -125,10 +122,11 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Client", check_mode = "write"))
+    write_file(
+      sc_client_lookup,
+      get_sc_client_lookup_path(year, check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_client_lookup)
 }
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index f93a5a4db..8c363f547 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care demographic lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process.
 #' @param spd_path Path to the Scottish Postcode Directory.
@@ -12,7 +12,10 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), write_to_disk = TRUE) {
+process_lookup_sc_demographics <- function(
+    data,
+    spd_path = get_spd_path(),
+    write_to_disk = TRUE) {
   # Deal with postcodes ---------------------------------------
 
   # UK postcode regex - see https://ideal-postcodes.co.uk/guides/postcode-validation
@@ -51,8 +54,7 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ))
 
   # count number of na postcodes
-  na_postcodes <-
-    sc_demog %>%
+  na_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::contains("postcode"), ~ is.na(.x)))
 
   sc_demog <- sc_demog %>%
@@ -67,19 +69,28 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
       ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA)
     )) %>%
     dplyr::select(
-      "latest_record_flag", "extract_date", "sending_location", "social_care_id", "upi", "gender",
-      "dob", "submitted_postcode", "chi_postcode"
+      "latest_record_flag",
+      "extract_date",
+      "sending_location",
+      "social_care_id",
+      "upi",
+      "gender",
+      "dob",
+      "submitted_postcode",
+      "chi_postcode"
     ) %>%
     # check if submitted_postcode matches with postcode lookup
-    dplyr::mutate(valid_pc = dplyr::if_else(.data$submitted_postcode %in% valid_spd_postcodes, 1L, 0L)) %>%
+    dplyr::mutate(
+      valid_pc = .data$submitted_postcode %in% valid_spd_postcodes
+    ) %>%
     # use submitted_postcode if valid, otherwise use chi_postcode
     dplyr::mutate(postcode = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc == 1L) ~ .data$submitted_postcode,
-      (is.na(.data$submitted_postcode) & .data$valid_pc == 0L) ~ .data$chi_postcode
+      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode,
+      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode
     )) %>%
     dplyr::mutate(postcode_type = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc == 1L) ~ "submitted",
-      (is.na(.data$submitted_postcode) & .data$valid_pc == 0L) ~ "chi",
+      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted",
+      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi",
       (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing"
     ))
 
@@ -88,17 +99,11 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     dplyr::count(.data$postcode_type)
 
   # count number of replaced postcode - compare with count above
-  na_replaced_postcodes <-
-    sc_demog %>%
+  na_replaced_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))
 
-  na_replaced_postcodes
-  na_postcodes
-
 
-  ## save outfile ---------------------------------------
-  outfile <-
-    sc_demog %>%
+  sc_demog_lookup <- sc_demog %>%
     # group by sending location and ID
     dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
     # arrange so latest submissions are last
@@ -117,14 +122,12 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ) %>%
     dplyr::ungroup()
 
-
-  ## save file ##
-
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_sc_demog_lookup_path(check_mode = "write"))
+    write_file(
+      sc_demog_lookup,
+      get_sc_demog_lookup_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_demog_lookup)
 }
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 61bce41b6..620b14cee 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Alarms Telecare extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -121,9 +121,10 @@ process_sc_all_alarms_telecare <- function(
     tibble::as_tibble()
 
   if (write_to_disk) {
-    # Save .rds file ----
-    qtr_merge %>%
-      write_file(get_sc_at_episodes_path(check_mode = "write"))
+    write_file(
+      qtr_merge,
+      get_sc_at_episodes_path(check_mode = "write")
+    )
   }
 
   return(qtr_merge)
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index a11f275e8..c41e1a1d5 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Care Home extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param sc_demog_lookup The Social Care Demographics lookup produced by
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index b812d492b..5f2b4db49 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all home care extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index 802c9215c..c17f74f28 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -1,7 +1,7 @@
 #' Process the all SDS extract
 #' @description This will read and process the
 #' all SDS extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -36,10 +36,12 @@ process_sc_all_sds <- function(
     # SDS option 4 is derived when a person receives more than one option.
     # e.g. if a person has options 1 and 2 then option 4 will be derived
     dplyr::mutate(
-      sds_option_4 = rowSums(dplyr::across(tidyselect::starts_with("sds_option_"))) > 1L,
+      sds_option_4 = rowSums(
+        dplyr::pick(tidyselect::starts_with("sds_option_"))
+      ) > 1L,
       .after = .data$sds_option_3
     ) %>%
-    # If sds start date is missing, assign start of FY
+    # If SDS start date is missing, assign start of FY
     dplyr::mutate(sds_start_date = fix_sc_start_dates(
       .data$sds_start_date,
       .data$period
@@ -117,12 +119,11 @@ process_sc_all_sds <- function(
     # change the data format from data.table to data.frame
     tibble::as_tibble()
 
-
-  # Save outfile------------------------------------------------
   if (write_to_disk) {
-    # Save .rds file
-    final_data %>%
-      write_file(get_sc_sds_episodes_path(check_mode = "write"))
+    write_file(
+      final_data,
+      get_sc_sds_episodes_path(check_mode = "write")
+    )
   }
 
   return(final_data)
diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R
index 079e6810e..a0c46ff07 100644
--- a/R/process_tests_alarms_telecare.R
+++ b/R/process_tests_alarms_telecare.R
@@ -37,8 +37,8 @@ produce_source_at_tests <- function(data,
     # create test flags
     create_demog_test_flags() %>%
     dplyr::mutate(
-      n_at_alarms = dplyr::if_else(.data$smrtype == "AT-Alarm", 1L, 0L),
-      n_at_telecare = dplyr::if_else(.data$smrtype == "AT-Tele", 1L, 0L)
+      n_at_alarms = .data$smrtype == "AT-Alarm",
+      n_at_telecare = .data$smrtype == "AT-Tele"
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # remove variables that won't be summed
diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R
index 0b673ad4c..3633c9882 100644
--- a/R/process_tests_care_home.R
+++ b/R/process_tests_care_home.R
@@ -48,17 +48,17 @@ produce_source_ch_tests <- function(data,
     create_demog_test_flags() %>%
     dplyr::mutate(
       n_episodes = 1L,
-      ch_name_missing = dplyr::if_else(is.na(.data$ch_name), 1L, 0L),
+      ch_name_missing = is.na(.data$ch_name),
       ch_provider_1_to_5 = dplyr::case_when(
         .data$ch_provider %in% c("1", "2", "3", "4", "5") ~ 1L,
         TRUE ~ 0L
       ),
-      ch_provider_other = dplyr::if_else(.data$ch_provider == "6", 1L, 0L),
-      ch_adm_reason_missing = dplyr::if_else(is.na(.data$ch_adm_reason), 1L, 0L)
+      ch_provider_other = .data$ch_provider == "6",
+      ch_adm_reason_missing = is.na(.data$ch_adm_reason)
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R
index 1baec563c..1fa21b71f 100644
--- a/R/process_tests_cmh.R
+++ b/R/process_tests_cmh.R
@@ -45,7 +45,7 @@ produce_source_cmh_tests <- function(data) {
     create_hb_test_flags(hb_var = .data$hbrescode) %>%
     dplyr::mutate(n_episodes = 1L) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R
index 2b8b35e8a..7f73570e4 100644
--- a/R/process_tests_district_nursing.R
+++ b/R/process_tests_district_nursing.R
@@ -18,7 +18,7 @@ process_tests_district_nursing <- function(data, year) {
     # replace NA by 0 in monthly costs
     dplyr::mutate(dplyr::across(
       dplyr::ends_with("_cost"),
-      ~ tidyr::replace_na(.x, 0)
+      ~ tidyr::replace_na(.x, 0.0)
     ))
 
   comparison <- produce_test_comparison(
@@ -52,9 +52,8 @@ process_tests_district_nursing <- function(data, year) {
 #' from [calculate_measures()]
 #'
 #' @family extract test functions
-#' @seealso [create_hb_test_flags()]
-#' #' [create_hscp_test_flags()] and [create_hb_cost_test_flags()]
-#' for creating test flags
+#' @seealso [create_hb_test_flags()], [create_hscp_test_flags()]
+#' and [create_hb_cost_test_flags()] for creating test flags.
 #' @seealso calculate_measures
 produce_source_dn_tests <- function(data,
                                     sum_mean_vars = c("cost", "yearstay"),
@@ -68,7 +67,7 @@ produce_source_dn_tests <- function(data,
     create_hb_test_flags(.data$hbtreatcode) %>%
     create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
     # keep variables for comparison
-    dplyr::select(c(.data$valid_chi:.data$NHS_Lanarkshire_cost)) %>%
+    dplyr::select(.data$valid_chi:.data$NHS_Lanarkshire_cost) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R
index 46e9e7171..fc31727ed 100644
--- a/R/process_tests_episode_file.R
+++ b/R/process_tests_episode_file.R
@@ -14,16 +14,16 @@ process_tests_episode_file <- function(data, year) {
       "gender",
       "postcode",
       "hbtreatcode",
+      "hscp2018",
       "dob",
       "recid",
       "yearstay",
       "record_keydate1",
       "record_keydate2",
       dplyr::contains(c("beddays", "cost", "cij"))
-    ) %>%
-    slfhelper::get_chi()
+    )
 
-  old_data <- get_existing_data_for_tests(data)
+  old_data <- get_existing_data_for_tests(data, anon_chi = TRUE)
 
   comparison <- produce_test_comparison(
     old_data = produce_episode_file_tests(old_data),
@@ -62,6 +62,7 @@ process_tests_episode_file <- function(data, year) {
 #' [create_hscp_test_flags()] and [create_hb_cost_test_flags()]
 #' for creating test flags
 #' @seealso calculate_measures
+#' @export
 produce_episode_file_tests <- function(
     data,
     sum_mean_vars = c("beddays", "cost", "yearstay"),
@@ -72,7 +73,15 @@ produce_episode_file_tests <- function(
   test_flags <- data %>%
     dplyr::group_by(.data$recid) %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    dplyr::mutate(
+      unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi,
+      n_missing_anon_chi = is_missing(.data$anon_chi),
+      n_males = .data$gender == 1L,
+      n_females = .data$gender == 2L,
+      n_postcode = !is.na(.data$postcode) | !.data$postcode == "",
+      n_missing_postcode = is_missing(.data$postcode),
+      missing_dob = is.na(.data$dob)
+    ) %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
     create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
     # Flags to count stay types
@@ -97,9 +106,11 @@ produce_episode_file_tests <- function(
         1L,
         0L
       )
-    ) %>%
+    )
+
+  test_flags <- test_flags %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("unique_anon_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum", group_by = "recid")
 
diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R
index bd1bcf97e..71938d889 100644
--- a/R/process_tests_home_care.R
+++ b/R/process_tests_home_care.R
@@ -59,7 +59,7 @@ produce_source_hc_tests <- function(data,
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R
index c80205d15..bea7fc881 100644
--- a/R/process_tests_homelessness.R
+++ b/R/process_tests_homelessness.R
@@ -39,7 +39,7 @@ produce_slf_homelessness_tests <- function(data,
     create_demog_test_flags() %>%
     create_lca_test_flags(.data$hl1_sending_lca) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index 695dc19a0..a9d193465 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -100,11 +100,18 @@ produce_individual_file_tests <- function(data) {
       measure = "sum"
     )
 
+  dup_chi <- data.frame(
+    measure = "duplicated chi number",
+    value = duplicated(data$chi) %>%
+      sum() %>% as.integer()
+  )
+
   join_output <- list(
     test_flags,
     all_measures,
     min_max_measures,
-    sum_measures
+    sum_measures,
+    dup_chi
   ) %>%
     purrr::reduce(dplyr::full_join, by = c("measure", "value"))
 
diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R
index 4a82acb37..d10eadd23 100644
--- a/R/process_tests_it_chi_deaths.R
+++ b/R/process_tests_it_chi_deaths.R
@@ -47,7 +47,7 @@ produce_it_chi_deaths_tests <- function(data) {
       "n_deaths_{current_year_2}" := .data$death_year == current_year_2,
       "n_deaths_{current_year_3}" := .data$death_year == current_year_3,
       "n_deaths_{current_year_4}" := .data$death_year == current_year_4,
-      "n_deaths_{current_year_5}" := .data$death_year == current_year_5,
+      "n_deaths_{current_year_5}" := .data$death_year == current_year_5
     ) %>%
     # remove variables that are not test flags
     dplyr::select(dplyr::starts_with("n_")) %>%
diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R
index 1d41a64c3..fd96fa5c4 100644
--- a/R/process_tests_nrs_deaths.R
+++ b/R/process_tests_nrs_deaths.R
@@ -39,7 +39,7 @@ produce_source_nrs_tests <- function(data) {
     create_demog_test_flags() %>%
     dplyr::mutate(n_deaths = 1L) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index f8a7a6a2e..5ab3e82db 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) {
   comparison <- produce_test_comparison(
     old_data = produce_source_extract_tests(old_data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     ),
     new_data = produce_source_extract_tests(data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     )
   ) %>%
     write_tests_xlsx(sheet_name = "00B", year)
diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R
index a950f50e6..4b4c4dcb3 100644
--- a/R/process_tests_prescribing.R
+++ b/R/process_tests_prescribing.R
@@ -42,7 +42,7 @@ produce_source_pis_tests <- function(data) {
     create_demog_test_flags() %>%
     dplyr::mutate(n_episodes = 1L) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_sc_client_lookup.R b/R/process_tests_sc_client_lookup.R
new file mode 100644
index 000000000..c3e4e70f9
--- /dev/null
+++ b/R/process_tests_sc_client_lookup.R
@@ -0,0 +1,64 @@
+#' Social care client lookup tests
+#'
+#' @description This script takes the processed social care client lookup and
+#' produces a test comparison with the previous data. This is written to
+#' disk in the tests workbook.
+#'
+#' @inherit process_tests_acute
+#'
+#' @export
+process_tests_sc_client_lookup <- function(data, year) {
+  comparison <- produce_test_comparison(
+    old_data = produce_tests_sc_client_lookup(
+      read_file(get_sc_client_lookup_path(year, update = previous_update()))
+    ),
+    new_data = produce_tests_sc_client_lookup(data)
+  )
+
+  comparison %>%
+    write_tests_xlsx(sheet_name = "sc_client", year)
+
+  return(comparison)
+}
+
+
+#' Social care Client lookup Tests
+#'
+#' @description Produce the test for the social care Client all episodes
+#'
+#' @param data new or old data for testing summary flags
+#' (data is from [get_source_extract_path()])
+#' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()]
+#' @return a dataframe with a count of each flag.
+#'
+#' @family social care test functions
+produce_tests_sc_client_lookup <- function(data) {
+  test_flags <- data %>%
+    # create test flags
+    create_sending_location_test_flags(.data$sending_location) %>%
+    dplyr::arrange(.data$sending_location, .data$social_care_id) %>%
+    dplyr::mutate(
+      unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id,
+      n_sc_living_alone_yes = .data$sc_living_alone == "Yes",
+      n_sc_living_alone_no = .data$sc_living_alone == "No",
+      n_sc_living_alone_not_known = .data$sc_living_alone == "Not Known",
+      n_sc_support_from_unpaid_carer_yes = .data$sc_support_from_unpaid_carer == "Yes",
+      n_sc_support_from_unpaid_carer_no = .data$sc_support_from_unpaid_carer == "No",
+      n_sc_support_from_unpaid_carer_not_known = .data$sc_support_from_unpaid_carer == "Not Known",
+      n_sc_social_worker_yes = .data$sc_social_worker == "Yes",
+      n_sc_social_worker_no = .data$sc_social_worker == "No",
+      n_sc_social_worker_not_known = .data$sc_social_worker == "Not Known",
+      n_sc_meals_yes = .data$sc_meals == "Yes",
+      n_sc_meals_no = .data$sc_meals == "No",
+      n_sc_meals_not_known = .data$sc_meals == "Not Known",
+      n_sc_day_care_yes = .data$sc_day_care == "Yes",
+      n_sc_day_care_no = .data$sc_day_care == "No",
+      n_sc_day_care_not_known = .data$sc_day_care == "Not Known",
+    ) %>%
+    # remove variables that won't be summed
+    dplyr::select("Aberdeen_City":dplyr::last_col()) %>%
+    # use function to sum new test flags
+    calculate_measures(measure = "sum")
+
+  return(test_flags)
+}
diff --git a/R/produce_homelessness_completeness.R b/R/produce_homelessness_completeness.R
index f85e44beb..00a459df7 100644
--- a/R/produce_homelessness_completeness.R
+++ b/R/produce_homelessness_completeness.R
@@ -41,18 +41,18 @@ produce_homelessness_completeness <- function(
     openxlsx::read.xlsx(
       sg_pub_path,
       sheet = "Table 1",
-      rows = 8:39,
-      cols = 1:25,
+      rows = 8L:39L,
+      cols = 1L:25L,
       colNames = FALSE
     ) %>%
     dplyr::rename_with(~ c(
       "CAName",
-      paste0(paste0("q", 1:4), "_", rep(2016, 4)),
-      paste0(paste0("q", 1:4), "_", rep(2017, 4)),
-      paste0(paste0("q", 1:4), "_", rep(2018, 4)),
-      paste0(paste0("q", 1:4), "_", rep(2019, 4)),
-      paste0(paste0("q", 1:4), "_", rep(2020, 4)),
-      paste0(paste0("q", 1:4), "_", rep(2021, 4))
+      paste0(paste0("q", 1L:4L), "_", rep(2016L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2017L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2018L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2019L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2020L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L))
     )) %>%
     tidyr::pivot_longer(
       !"CAName",
diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R
index 20d1a4191..7f8feda92 100644
--- a/R/produce_source_extract_tests.R
+++ b/R/produce_source_extract_tests.R
@@ -13,6 +13,7 @@
 #' (data is from [get_source_extract_path()])
 #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()]
 #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()]
+#' @param add_hscp_count  Default set to TRUE. For use where `hscp variable` is not available, specify FALSE.
 #'
 #' @return a dataframe with a count of each flag
 #' from [calculate_measures()]
@@ -28,14 +29,21 @@ produce_source_extract_tests <- function(data,
                                          max_min_vars = c(
                                            "record_keydate1", "record_keydate2",
                                            "cost_total_net", "yearstay"
-                                         )) {
+                                         ),
+                                         add_hscp_count = TRUE) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
-    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
+    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net)
+
+  if (add_hscp_count) {
+    test_flags <- create_hscp_test_flags(test_flags, .data$hscp)
+  }
+
+  test_flags <- test_flags %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R
index a0fba0707..6a0d23b11 100644
--- a/R/read_extract_acute.R
+++ b/R/read_extract_acute.R
@@ -152,7 +152,9 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
       GLS_record = "GLS Record"
     ) %>%
     # replace NA in cost_total_net by 0
-    dplyr::mutate(cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0))
+    dplyr::mutate(
+      cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0)
+    )
 
   return(extract_acute)
 }
diff --git a/R/read_extract_gp_ooh.R b/R/read_extract_gp_ooh.R
index 98606eb8a..3a711c2f8 100644
--- a/R/read_extract_gp_ooh.R
+++ b/R/read_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param diagnosis_path Path to diagnosis BOXI extract location.
diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R
index fe82732c8..248316975 100644
--- a/R/read_extract_mental_health.R
+++ b/R/read_extract_mental_health.R
@@ -129,7 +129,9 @@ read_extract_mental_health <- function(
       uri = "Unique Record Identifier"
     ) %>%
     # replace NA in cost_total_net by 0
-    dplyr::mutate(cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0))
+    dplyr::mutate(
+      cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0)
+    )
 
   return(extract_mental_health)
 }
diff --git a/R/read_file.R b/R/read_file.R
index 2941b62ed..be0a6fc65 100644
--- a/R/read_file.R
+++ b/R/read_file.R
@@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
     "parquet"
   )
 
+  # Return an empty tibble if trying to read the dummy path
+  if (path == get_dummy_boxi_extract_path()) {
+    return(tibble::tibble())
+  }
+
   ext <- fs::path_ext(path)
 
   if (ext == "gz") {
diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index 88fcf826a..cc98060f3 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -3,13 +3,14 @@
 #' @description This will read and process the
 #' social care client lookup
 #'
-#' @param sc_dvprod_connection The connection to the SC platform.
 #' @param fyyear The year to process, in the standard format '1718'
+#' @param sc_dvprod_connection The connection to the SC platform.
 #'
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-read_lookup_sc_client <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD"), fyyear) {
+read_lookup_sc_client <- function(fyyear,
+                                  sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) {
   check_year_format(fyyear)
   year <- convert_fyyear_to_year(fyyear)
 
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index 4647244bb..2c7bd03db 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -22,18 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_start_date",
       "service_end_date"
     ) %>%
-    # fix bad period (2017, 2020 & 2021)
+    dplyr::collect() %>%
+    # fix bad period (2017, 2020, 2021, and so on)
     dplyr::mutate(
-      period = dplyr::if_else(.data$period == "2017", "2017Q4", .data$period),
-      period = dplyr::if_else(.data$period == "2020", "2020Q4", .data$period),
-      period = dplyr::if_else(.data$period == "2021", "2021Q4", .data$period)
+      period = dplyr::if_else(
+        grepl("\\d{4}$", .data$period),
+        paste0(.data$period, "Q4"),
+        .data$period
+      )
     ) %>%
-    # order
-    dplyr::arrange(.data$sending_location, .data$social_care_id) %>%
-    dplyr::collect() %>%
     dplyr::mutate(
-      dplyr::across(c("sending_location", "service_type"), as.integer)
-    )
+      dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x))
+    ) %>%
+    dplyr::arrange(.data$sending_location, .data$social_care_id)
 
   return(at_full_data)
 }
diff --git a/R/write_file.R b/R/write_file.R
index a4f888bdc..62b0025a7 100644
--- a/R/write_file.R
+++ b/R/write_file.R
@@ -37,7 +37,7 @@ write_file <- function(data, path, ...) {
       sink = path,
       compression = "zstd",
       version = "latest",
-      ...,
+      ...
     )
   )
 
diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index e187149d5..68452b0cf 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -47,7 +47,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) {
     while (fs::file_exists(path = in_use_path) && seconds < max_wait) {
       # While the tests are in use (wait a random number of seconds from 1 to 30)
       cli::cli_progress_update()
-      wait <- sample(x = 3:15, size = 1)
+      wait <- sample(x = 3L:15L, size = 1L)
 
       Sys.sleep(wait)
       seconds <- seconds + wait
@@ -56,7 +56,7 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL) {
   }
 
   # Final check to maybe avoid corrupting the workbook
-  Sys.sleep(sample(x = 1:3, size = 1))
+  Sys.sleep(sample(x = 1L:3L, size = 1L))
   if (!fs::file_exists(path = in_use_path)) {
     fs::file_create(path = in_use_path)
   } else {
diff --git a/Rmarkdown/costs_care_home.Rmd b/Rmarkdown/costs_care_home.Rmd
index 7d2e65c2d..09d2e5a68 100644
--- a/Rmarkdown/costs_care_home.Rmd
+++ b/Rmarkdown/costs_care_home.Rmd
@@ -22,23 +22,28 @@ fs::file_copy(get_ch_costs_path(),
 )
 
 ## Read costs from the CHC Open data
-ch_costs_data <-
-  phsopendata::get_resource(
-    res_id = "4ee7dc84-ca65-455c-9e76-b614091f389f",
-    col_select = c("Date", "KeyStatistic", "CA", "Value")
-  ) %>%
+ch_costs_data <- phsopendata::get_resource(
+  res_id = "4ee7dc84-ca65-455c-9e76-b614091f389f",
+  col_select = c("Date", "KeyStatistic", "CA", "Value")
+) %>%
   janitor::clean_names() %>%
   # Dates are at end of the fin year
   # so cost are for the fin year to that date.
-  mutate(year = createslf::convert_year_to_fyyear((date %/% 10000) - 1)) %>%
+  mutate(year = createslf::convert_year_to_fyyear((date %/% 10000L) - 1L)) %>%
   filter(year >= "1617") %>%
-  mutate(funding_source = stringr::str_extract(key_statistic, "((:?All)|(:?Self)|(:?Publicly))")) %>%
-  mutate(nursing_care_provision = if_else(stringr::str_detect(key_statistic, "Without"), 1, 0)) %>%
-  select(year,
-    ca,
-    funding_source,
-    nursing_care_provision,
-    cost_per_week = value
+  mutate(funding_source = stringr::str_extract(
+    string = key_statistic,
+    pattern = "((:?All)|(:?Self)|(:?Publicly))"
+  )) %>%
+  mutate(
+    nursing_care_provision = as.integer(stringr::str_detect(key_statistic, "Without"))
+  ) %>%
+  select(
+    "year",
+    "ca",
+    "funding_source",
+    "nursing_care_provision",
+    cost_per_week = "value"
   )
 
 
@@ -105,7 +110,7 @@ matched_costs_data <-
   # match to new costs
   full_join(old_costs, by = c("year", "nursing_care_provision")) %>%
   # compute difference
-  mutate(pct_diff = (cost_per_day - cost_old) / cost_old * 100)
+  mutate(pct_diff = (cost_per_day - cost_old) / cost_old * 100.0)
 
 summary(matched_costs_data$pct_diff)
 
diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd
index 825a931ed..e3c9bba13 100644
--- a/Rmarkdown/costs_district_nursing.Rmd
+++ b/Rmarkdown/costs_district_nursing.Rmd
@@ -79,7 +79,7 @@ population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021
   # Select only the HSCPs for NHS Highland & years since 2015
   filter(
     hscp2019 %in% c("S37000004", "S37000016"),
-    year >= 2015
+    year >= 2015L
   ) %>%
   # Create year as FY = YYYY from CCYY.
   rename(calendar_year = year) %>%
@@ -93,7 +93,7 @@ population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021
   ## compute proportion ##
   mutate(
     pop_proportion = pop / total_pop,
-    pop_pct = pop_proportion * 100
+    pop_pct = pop_proportion * 100.0
   ) %>%
   ## Argyll and Bute is the only HSCP in NHS Highland that submits data ##
   filter(hscp2019name == "Argyll and Bute")
@@ -110,7 +110,9 @@ matched_data <- full_join(dn_raw_costs_contacts,
   # recode NA pop_proportion with 1
   mutate(pop_proportion = replace_na(pop_proportion, 1)) %>%
   ## total net cost ##
-  mutate(cost_total_net = ((cost * 1000) / (number_of_contacts / pop_proportion))) %>%
+  mutate(
+    cost_total_net = ((cost * 1000) / (number_of_contacts / pop_proportion))
+  ) %>%
   # sort by HB2019 and year
   arrange(hb2019, year) %>%
   # keep only records with cost
diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index d33dfbe49..ea6f81bfc 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -6,7 +6,7 @@ library(glue)
 nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
 
 # Change the year
-fin_year <- "1516"
+fin_year <- "2223"
 
 db_connection <- odbc::dbConnect(
   odbc::odbc(),
@@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect(
 )
 
 # Check the table name and change if required.
-table <- dbplyr::in_schema("ROBERM18", "FINAL_2")
+table <- dbplyr::in_schema("ROBERM18", "FINAL_1")
 
 # Read NSU data
 nsu_data <-
@@ -35,9 +35,11 @@ nsu_data <-
   collect()
 
 # Write out the data
-file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav"))
+file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet"))
 # This will archive the existing file for later comparison
 if (file_exists(file_path)) {
-  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav")))
+  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet")))
 }
-write_sav(nsu_data, file_path, compress = TRUE)
+
+nsu_data %>%
+  arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)
diff --git a/_targets.R b/_targets.R
index f50045aed..3473679de 100644
--- a/_targets.R
+++ b/_targets.R
@@ -19,17 +19,17 @@ tar_option_set(
   memory = "persistent" # default option
 )
 
-years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223")
+years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324")
 
 list(
   tar_rds(write_to_disk, TRUE),
   tar_rds(
     file_path_ext_clean,
     make_lowercase_ext(),
-    priority = 1,
+    priority = 1.0,
     cue = tar_cue_age(
       name = file_path_ext_clean,
-      age = as.difftime(7, units = "days")
+      age = as.difftime(7.0, units = "days")
     )
   ),
   ## Lookup data ##
@@ -44,14 +44,18 @@ list(
   ),
   tar_file_read(dd_data, get_dd_path(), read_extract_delayed_discharges(!!.x)),
   tar_file_read(ltc_data, get_it_ltc_path(), read_lookup_ltc(!!.x)),
-  tar_target(slf_ch_name_lookup_path, get_slf_ch_name_lookup_path(), format = "file"),
+  tar_target(
+    slf_ch_name_lookup_path,
+    get_slf_ch_name_lookup_path(),
+    format = "file"
+  ),
   ## Process Lookups ##
   tar_target(
     sc_demog_data,
     read_lookup_sc_demographics(),
     cue = tar_cue_age(
       name = sc_demog_data,
-      age = as.difftime(28, units = "days")
+      age = as.difftime(28.0, units = "days")
     )
   ),
   tar_target(
@@ -117,7 +121,7 @@ list(
     read_sc_all_alarms_telecare(),
     cue = tar_cue_age(
       name = all_at_extract,
-      age = as.difftime(28, units = "days")
+      age = as.difftime(28.0, units = "days")
     )
   ),
   tar_target(
@@ -134,7 +138,7 @@ list(
     read_sc_all_home_care(),
     cue = tar_cue_age(
       name = all_home_care_extract,
-      age = as.difftime(28, units = "days")
+      age = as.difftime(28.0, units = "days")
     )
   ),
   tar_target(
@@ -151,7 +155,7 @@ list(
     read_sc_all_care_home(),
     cue = tar_cue_age(
       name = all_care_home_extract,
-      age = as.difftime(28, units = "days")
+      age = as.difftime(28.0, units = "days")
     )
   ),
   tar_target(
@@ -175,7 +179,7 @@ list(
     read_sc_all_sds(),
     cue = tar_cue_age(
       name = all_sds_extract,
-      age = as.difftime(28, units = "days")
+      age = as.difftime(28.0, units = "days")
     )
   ),
   tar_target(
@@ -192,10 +196,10 @@ list(
     tar_rds(
       compress_extracts,
       gzip_files(year),
-      priority = 1,
+      priority = 1.0,
       cue = tar_cue_age(
         name = compress_extracts,
-        age = as.difftime(7, units = "days")
+        age = as.difftime(7.0, units = "days")
       )
     ),
     ### target data extracts ###
@@ -335,11 +339,14 @@ list(
         year
       )
     ),
-    tar_target(source_homelessness_extract, process_extract_homelessness(
-      homelessness_data,
-      year,
-      write_to_disk = write_to_disk
-    )),
+    tar_target(
+      source_homelessness_extract,
+      process_extract_homelessness(
+        homelessness_data,
+        year,
+        write_to_disk = write_to_disk
+      )
+    ),
     tar_target(
       tests_source_homelessness_extract,
       process_tests_homelessness(
@@ -445,6 +452,10 @@ list(
         write_to_disk = write_to_disk
       )
     ),
+    tar_target(
+      tests_sc_client_lookup,
+      process_tests_sc_client_lookup(sc_client_lookup, year = year)
+    ),
     tar_target(
       source_sc_alarms_tele,
       process_extract_alarms_telecare(
@@ -539,11 +550,26 @@ list(
         source_sc_alarms_tele
       )
     ),
+    tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)),
+    tar_target(
+      homelessness_lookup,
+      create_homelessness_lookup(
+        year,
+        homelessness_data = source_homelessness_extract
+      )
+    ),
     tar_target(
       episode_file,
-      run_episode_file(
+      create_episode_file(
         processed_data_list,
         year,
+        homelessness_lookup = homelessness_lookup,
+        dd_data = source_dd_extract,
+        nsu_cohort = nsu_cohort,
+        ltc_data = source_ltc_lookup,
+        slf_pc_lookup = source_pc_lookup,
+        slf_gpprac_lookup = source_gp_lookup,
+        slf_deaths_lookup = slf_deaths_lookup,
         write_to_disk
       )
     ),
@@ -559,6 +585,7 @@ list(
       create_individual_file(
         episode_file = episode_file,
         year = year,
+        homelessness_lookup = homelessness_lookup,
         write_to_disk = write_to_disk
       )
     ),
@@ -568,36 +595,36 @@ list(
         data = individual_file,
         year = year
       )
-    ),
-    tar_target(
-      episode_file_dataset,
-      arrow::write_dataset(
-        dataset = episode_file,
-        path = fs::path(
-          get_year_dir(year),
-          stringr::str_glue("source-episode-file-{year}")
-        ),
-        format = "parquet",
-        # Should correspond to the available slfhelper filters
-        partitioning = c("recid", "hscp2018"),
-        compression = "zstd",
-        version = "latest"
-      )
-    ),
-    tar_target(
-      individual_file_dataset,
-      arrow::write_dataset(
-        dataset = individual_file,
-        path = fs::path(
-          get_year_dir(year),
-          stringr::str_glue("source-individual-file-{year}")
-        ),
-        format = "parquet",
-        # Should correspond to the available slfhelper filters
-        partitioning = c("hscp2018"),
-        compression = "zstd",
-        version = "latest"
-      )
-    )
+    ) # ,
+    # tar_target(
+    #   episode_file_dataset,
+    #   arrow::write_dataset(
+    #     dataset = episode_file,
+    #     path = fs::path(
+    #       get_year_dir(year),
+    #       stringr::str_glue("source-episode-file-{year}")
+    #     ),
+    #     format = "parquet",
+    #     # Should correspond to the available slfhelper filters
+    #     partitioning = c("recid", "hscp2018"),
+    #     compression = "zstd",
+    #     version = "latest"
+    #   )
+    # ),
+    # tar_target(
+    #   individual_file_dataset,
+    #   arrow::write_dataset(
+    #     dataset = individual_file,
+    #     path = fs::path(
+    #       get_year_dir(year),
+    #       stringr::str_glue("source-individual-file-{year}")
+    #     ),
+    #     format = "parquet",
+    #     # Should correspond to the available slfhelper filters
+    #     partitioning = c("hscp2018"),
+    #     compression = "zstd",
+    #     version = "latest"
+    #   )
+    # )
   )
 )
diff --git a/_targets.yaml b/_targets.yaml
index 24c8a3733..5f5f0303e 100644
--- a/_targets.yaml
+++ b/_targets.yaml
@@ -2,4 +2,5 @@ main:
   store: /conf/sourcedev/Source_Linkage_File_Updates/_targets
   workers: '16'
   reporter_make: timestamp_positives
+  reporter_outdated: forecast
   seconds_interval: 30
diff --git a/hc_methodology.Rmd b/hc_methodology.Rmd
index 5270735a9..23aedb5c6 100644
--- a/hc_methodology.Rmd
+++ b/hc_methodology.Rmd
@@ -43,7 +43,14 @@ knitr::opts_chunk$set(echo = TRUE)
 replaced_start_dates %>%
   group_by(sending_location_name) %>%
   summarise(before = n_distinct(social_care_id)) %>%
-  left_join(fixed_sc_ids %>% group_by(sending_location_name) %>% summarise(after = n_distinct(social_care_id))) %>%
-  mutate(diff = before - after, diff_pct = scales::percent(diff / before, accuracy = 0.1)) %>%
+  left_join(
+    fixed_sc_ids %>%
+      group_by(sending_location_name) %>%
+      summarise(after = n_distinct(social_care_id))
+  ) %>%
+  mutate(
+    diff = before - after,
+    diff_pct = scales::percent(diff / before, accuracy = 0.1)
+  ) %>%
   gt::gt()
 ```
diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd
index 52ba071b6..c2659f821 100644
--- a/man/add_acute_columns.Rd
+++ b/man/add_acute_columns.Rd
@@ -7,7 +7,7 @@
 add_acute_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd
index 9b7099513..fdc31b7ff 100644
--- a/man/add_ae_columns.Rd
+++ b/man/add_ae_columns.Rd
@@ -7,7 +7,7 @@
 add_ae_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd
index d502e95c3..1d2e587db 100644
--- a/man/add_all_columns.Rd
+++ b/man/add_all_columns.Rd
@@ -7,7 +7,7 @@
 add_all_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns based on SMRType and recid which follow a pattern
diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd
index e05ea9101..af978530a 100644
--- a/man/add_at_columns.Rd
+++ b/man/add_at_columns.Rd
@@ -7,7 +7,7 @@
 add_at_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd
index 4938f7690..a036a257e 100644
--- a/man/add_ch_columns.Rd
+++ b/man/add_ch_columns.Rd
@@ -7,7 +7,7 @@
 add_ch_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd
index 7d00e6299..c48c1a3ef 100644
--- a/man/add_cij_columns.Rd
+++ b/man/add_cij_columns.Rd
@@ -7,7 +7,7 @@
 add_cij_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns related to CIJ
diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd
index a1d82cba6..a1cb74abb 100644
--- a/man/add_cmh_columns.Rd
+++ b/man/add_cmh_columns.Rd
@@ -7,7 +7,7 @@
 add_cmh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd
index 08d9c0fe4..11e85fdc7 100644
--- a/man/add_dd_columns.Rd
+++ b/man/add_dd_columns.Rd
@@ -7,7 +7,7 @@
 add_dd_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd
index bf6af008f..ffdf59a82 100644
--- a/man/add_dn_columns.Rd
+++ b/man/add_dn_columns.Rd
@@ -7,7 +7,7 @@
 add_dn_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd
index e71dc755b..6ab7e9645 100644
--- a/man/add_gls_columns.Rd
+++ b/man/add_gls_columns.Rd
@@ -7,7 +7,7 @@
 add_gls_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd
index 95d8f1d3b..a58f226ec 100644
--- a/man/add_hc_columns.Rd
+++ b/man/add_hc_columns.Rd
@@ -7,7 +7,7 @@
 add_hc_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd
index 7600db5e9..24fc714e9 100644
--- a/man/add_hl1_columns.Rd
+++ b/man/add_hl1_columns.Rd
@@ -7,7 +7,7 @@
 add_hl1_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_homelessness_date_flags.Rd b/man/add_homelessness_date_flags.Rd
new file mode 100644
index 000000000..7b386a185
--- /dev/null
+++ b/man/add_homelessness_date_flags.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_lookup_homelessness.R
+\name{add_homelessness_date_flags}
+\alias{add_homelessness_date_flags}
+\title{Add homelessness date flags episode}
+\usage{
+add_homelessness_date_flags(
+  data,
+  year,
+  lookup = create_homelessness_lookup(year)
+)
+}
+\arguments{
+\item{data}{The data to add the flag to - the episode
+or individual file.}
+
+\item{year}{The year to process, in FY format.}
+
+\item{lookup}{The homelessness lookup created by \code{\link[=create_homelessness_lookup]{create_homelessness_lookup()}}}
+}
+\value{
+the final data as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+Add flags to episodes indicating if they
+have had at least one active homelessness application in
+6 months before, 6 months after, or during an episode.
+}
diff --git a/man/add_homelessness_flag.Rd b/man/add_homelessness_flag.Rd
new file mode 100644
index 000000000..1ab409020
--- /dev/null
+++ b/man/add_homelessness_flag.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_lookup_homelessness.R
+\name{add_homelessness_flag}
+\alias{add_homelessness_flag}
+\title{Add 'homelessness in FY' flag}
+\usage{
+add_homelessness_flag(data, year, lookup = create_homelessness_lookup(year))
+}
+\arguments{
+\item{data}{The data to add the flag to - the episode
+or individual file.}
+
+\item{year}{The year to process, in FY format.}
+
+\item{lookup}{The homelessness lookup created by \code{\link[=create_homelessness_lookup]{create_homelessness_lookup()}}}
+}
+\value{
+the final data as a \link[tibble:tibble-package]{tibble}
+}
+\description{
+Add a flag to the data indicating if the CHI
+had a homelessness episode within the financial year.
+}
diff --git a/man/add_hri_variables.Rd b/man/add_hri_variables.Rd
new file mode 100644
index 000000000..131a00f6b
--- /dev/null
+++ b/man/add_hri_variables.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_hri_variables.R
+\name{add_hri_variables}
+\alias{add_hri_variables}
+\title{Add HRI variables to an SLF Individual File}
+\usage{
+add_hri_variables(
+  data,
+  chi_variable = "chi",
+  slf_pc_lookup = read_file(get_slf_postcode_path(), col_select = "postcode")
+)
+}
+\arguments{
+\item{data}{An SLF individual file.}
+
+\item{slf_pc_lookup}{The Source postcode lookup, defaults
+to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.}
+}
+\value{
+The individual file with HRI variables matched on
+}
+\description{
+Add HRI variables to an SLF Individual File
+}
+\details{
+Filters the dataset to only include Scottish residents, then
+creates a lookup where HRIs are calculated at Scotland, Health Board, and
+LCA level. Then joins on this lookup by chi/anon_chi.
+}
diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd
index 0f91cbd90..bd630b9d3 100644
--- a/man/add_ipdc_cols.Rd
+++ b/man/add_ipdc_cols.Rd
@@ -7,7 +7,7 @@
 add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd
index aae729323..5faab0dc1 100644
--- a/man/add_mat_columns.Rd
+++ b/man/add_mat_columns.Rd
@@ -7,7 +7,7 @@
 add_mat_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd
index 3c50c6cb8..c587c490a 100644
--- a/man/add_mh_columns.Rd
+++ b/man/add_mh_columns.Rd
@@ -7,7 +7,7 @@
 add_mh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd
index 9d7b3f8bf..b41201a57 100644
--- a/man/add_nrs_columns.Rd
+++ b/man/add_nrs_columns.Rd
@@ -7,7 +7,7 @@
 add_nrs_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index 723c105e1..4ea9324e0 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -4,12 +4,14 @@
 \alias{add_nsu_cohort}
 \title{Add NSU cohort to working file}
 \usage{
-add_nsu_cohort(data, year)
+add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year)))
 }
 \arguments{
 \item{data}{The input data frame}
 
 \item{year}{The year being processed}
+
+\item{nsu_cohort}{The NSU data for the year}
 }
 \value{
 A data frame containing the Non-Service Users as additional rows
diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd
index 6a54bbcbf..5aed481f0 100644
--- a/man/add_nsu_columns.Rd
+++ b/man/add_nsu_columns.Rd
@@ -7,7 +7,7 @@
 add_nsu_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd
index 01814ab6d..f1e6b63f5 100644
--- a/man/add_ooh_columns.Rd
+++ b/man/add_ooh_columns.Rd
@@ -7,7 +7,7 @@
 add_ooh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd
index 08c4419e2..9fb8bc158 100644
--- a/man/add_op_columns.Rd
+++ b/man/add_op_columns.Rd
@@ -7,7 +7,7 @@
 add_op_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_operation_flag.Rd b/man/add_operation_flag.Rd
index cb7dff76d..bda825a7c 100644
--- a/man/add_operation_flag.Rd
+++ b/man/add_operation_flag.Rd
@@ -10,7 +10,8 @@ add_operation_flag(op1a)
 \item{op1a}{A vector of operation codes}
 }
 \value{
-A boolean vector showing whether a record contains an operation or not
+A boolean vector showing whether a record contains an operation or
+not.
 }
 \description{
 Add operation flag
diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd
index b582acf2e..836218da0 100644
--- a/man/add_pis_columns.Rd
+++ b/man/add_pis_columns.Rd
@@ -7,7 +7,7 @@
 add_pis_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd
index d5a5fb2cf..c06b88527 100644
--- a/man/add_sds_columns.Rd
+++ b/man/add_sds_columns.Rd
@@ -7,7 +7,7 @@
 add_sds_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd
index 744aa49de..4392157d2 100644
--- a/man/add_standard_cols.Rd
+++ b/man/add_standard_cols.Rd
@@ -13,7 +13,7 @@ add_standard_cols(
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
index 73804ad9b..84c9c0ad3 100644
--- a/man/aggregate_by_chi.Rd
+++ b/man/aggregate_by_chi.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/create_individual_file.R
+% Please edit documentation in R/aggregate_by_chi.R
 \name{aggregate_by_chi}
 \alias{aggregate_by_chi}
 \title{Aggregate by CHI}
 \usage{
-aggregate_by_chi(episode_file)
+aggregate_by_chi(episode_file, exclude_sc_var = FALSE)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate episode file by CHI to convert into
diff --git a/man/aggregate_by_chi_zihao.Rd b/man/aggregate_by_chi_zihao.Rd
deleted file mode 100644
index 3d4961e19..000000000
--- a/man/aggregate_by_chi_zihao.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/aggregate_by_chi_zihao.R
-\name{aggregate_by_chi_zihao}
-\alias{aggregate_by_chi_zihao}
-\title{Aggregate by CHI}
-\usage{
-aggregate_by_chi_zihao(episode_file)
-}
-\arguments{
-\item{episode_file}{Tibble containing episodic data}
-}
-\description{
-Aggregate episode file by CHI to convert into
-individual file.
-}
diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd
index 2753da14f..3223e6d25 100644
--- a/man/aggregate_ch_episodes.Rd
+++ b/man/aggregate_ch_episodes.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/create_individual_file.R
+% Please edit documentation in R/aggregate_by_chi.R
 \name{aggregate_ch_episodes}
 \alias{aggregate_ch_episodes}
-\title{Aggregate CIS episodes}
+\title{Aggregate Care Home episodes to ch_cis}
 \usage{
 aggregate_ch_episodes(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate CH variables by CHI and CIS.
diff --git a/man/aggregate_ch_episodes_zihao.Rd b/man/aggregate_ch_episodes_zihao.Rd
deleted file mode 100644
index 808262654..000000000
--- a/man/aggregate_ch_episodes_zihao.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/aggregate_by_chi_zihao.R
-\name{aggregate_ch_episodes_zihao}
-\alias{aggregate_ch_episodes_zihao}
-\title{Aggregate CIS episodes}
-\usage{
-aggregate_ch_episodes_zihao(episode_file)
-}
-\arguments{
-\item{episode_file}{Tibble containing episodic data}
-}
-\description{
-Aggregate CH variables by CHI and CIS.
-}
diff --git a/man/assign_cohort_names.Rd b/man/assign_cohort_names.Rd
index a0edb373d..e68ad7c42 100644
--- a/man/assign_cohort_names.Rd
+++ b/man/assign_cohort_names.Rd
@@ -10,7 +10,8 @@ assign_cohort_names(data)
 \item{data}{A data frame}
 }
 \value{
-A data frame with an additional variable containing the assigned cohort
+A data frame with an additional variable containing the assigned
+cohort
 }
 \description{
 Assign service use cohort into string format
diff --git a/man/assign_s_cohort_limited_daycases.Rd b/man/assign_s_cohort_limited_daycases.Rd
index c63569e5c..69f49b4dc 100644
--- a/man/assign_s_cohort_limited_daycases.Rd
+++ b/man/assign_s_cohort_limited_daycases.Rd
@@ -15,8 +15,8 @@ assign_s_cohort_limited_daycases(elective_inpatient_flag, elective_instances)
 A boolean vector of limited daycases cohort flags
 }
 \description{
-If the record does not have an elective inpatient flag and they have
-3 or fewer elective instances, return \code{TRUE}
+If the record does not have an elective inpatient flag
+and they have 3 or fewer elective instances, return \code{TRUE}.
 }
 \seealso{
 Other Demographic and Service Use Cohort functions: 
diff --git a/man/assign_s_cohort_outpatient.Rd b/man/assign_s_cohort_outpatient.Rd
index 264044b2c..5d811b6af 100644
--- a/man/assign_s_cohort_outpatient.Rd
+++ b/man/assign_s_cohort_outpatient.Rd
@@ -13,7 +13,8 @@ assign_s_cohort_outpatient(outpatient_cost)
 A boolean vector of outpatient cohort flags
 }
 \description{
-If the record has a outpatient cost greater than zero, assign \code{TRUE}
+If the record has a outpatient cost greater than zero,
+assign \code{TRUE}.
 }
 \seealso{
 Other Demographic and Service Use Cohort functions: 
diff --git a/man/assign_s_cohort_prescribing.Rd b/man/assign_s_cohort_prescribing.Rd
index 34ead6130..4b938f518 100644
--- a/man/assign_s_cohort_prescribing.Rd
+++ b/man/assign_s_cohort_prescribing.Rd
@@ -13,7 +13,8 @@ assign_s_cohort_prescribing(prescribing_cost)
 A boolean vector of prescribing cohort flags
 }
 \description{
-If the record has a prescribing cost greater than zero, assign \code{TRUE}
+If the record has a prescribing cost greater than zero,
+assign \code{TRUE}.
 }
 \seealso{
 Other Demographic and Service Use Cohort functions: 
diff --git a/man/assign_s_cohort_routine_daycase.Rd b/man/assign_s_cohort_routine_daycase.Rd
index af67448a9..03f5f51e0 100644
--- a/man/assign_s_cohort_routine_daycase.Rd
+++ b/man/assign_s_cohort_routine_daycase.Rd
@@ -15,8 +15,8 @@ assign_s_cohort_routine_daycase(elective_inpatient_flag, elective_instances)
 A boolean vector of routine daycase cohort flags
 }
 \description{
-If the record does not have an elective inpatient flag and they have
-4 or more elective instances, return \code{TRUE}
+If the record does not have an elective inpatient flag and
+they have 4 or more elective instances, return \code{TRUE}.
 }
 \seealso{
 Other Demographic and Service Use Cohort functions: 
diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd
index 0182c84e8..c0c61966d 100644
--- a/man/clean_up_ch.Rd
+++ b/man/clean_up_ch.Rd
@@ -7,7 +7,7 @@
 clean_up_ch(episode_file, year)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 }
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 4892ce7f4..c27e32af5 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -15,7 +15,8 @@ compute_mid_year_age(fyyear, dob)
 a vector of ages at the financial year midpoint
 }
 \description{
-Compute the age of a client at the midpoint of the year - 30-09-YYYY
+Compute the age of a client at the midpoint of the year -
+30-09-YYYY
 }
 \examples{
 dob <- as.Date(c("01-01-1990", "31-10-1997"), format = "\%d-\%m-\%Y")
diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd
index 8c7a29088..78bf475ba 100644
--- a/man/convert_sending_location_to_lca.Rd
+++ b/man/convert_sending_location_to_lca.Rd
@@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the
 Local Council Authority Codes.
 }
 \examples{
-sending_location <- c("100", "120")
+sending_location <- c(100, 120)
 convert_sending_location_to_lca(sending_location)
 
 }
diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd
index 97a7f046f..558514dc6 100644
--- a/man/correct_cij_vars.Rd
+++ b/man/correct_cij_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{correct_cij_vars}
 \alias{correct_cij_vars}
 \title{Correct the CIJ variables}
diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd
index f0ad267aa..109869074 100644
--- a/man/create_cohort_lookups.Rd
+++ b/man/create_cohort_lookups.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cohort_lookups}
 \alias{create_cohort_lookups}
 \title{Create the cohort lookups}
diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd
index 69e7e37b5..47c38b176 100644
--- a/man/create_cost_inc_dna.Rd
+++ b/man/create_cost_inc_dna.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cost_inc_dna}
 \alias{create_cost_inc_dna}
 \title{Create cost total net inc DNA}
diff --git a/man/create_demog_test_flags.Rd b/man/create_demog_test_flags.Rd
index b555b1699..589877738 100644
--- a/man/create_demog_test_flags.Rd
+++ b/man/create_demog_test_flags.Rd
@@ -21,6 +21,7 @@ Other flag functions:
 \code{\link{create_hb_cost_test_flags}()},
 \code{\link{create_hb_test_flags}()},
 \code{\link{create_hscp_test_flags}()},
-\code{\link{create_lca_test_flags}()}
+\code{\link{create_lca_test_flags}()},
+\code{\link{create_sending_location_test_flags}()}
 }
 \concept{flag functions}
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
new file mode 100644
index 000000000..5d85744e2
--- /dev/null
+++ b/man/create_episode_file.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_episode_file.R
+\name{create_episode_file}
+\alias{create_episode_file}
+\title{Produce the Source Episode file}
+\usage{
+create_episode_file(
+  processed_data_list,
+  year,
+  dd_data = read_file(get_source_extract_path(year, "DD")),
+  homelessness_lookup = create_homelessness_lookup(year),
+  nsu_cohort = read_file(get_nsu_path(year)),
+  ltc_data = read_file(get_ltcs_path(year)),
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode")),
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
+  write_to_disk = TRUE,
+  anon_chi_out = TRUE
+)
+}
+\arguments{
+\item{processed_data_list}{containing data from processed extracts.}
+
+\item{year}{The year to process, in FY format.}
+
+\item{dd_data}{The processed DD extract}
+
+\item{nsu_cohort}{The NSU data for the year}
+
+\item{ltc_data}{The LTC data for the year}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
+
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
+
+\item{write_to_disk}{(optional) Should the data be written to disk default is
+\code{TRUE} i.e. write the data to disk.}
+
+\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
+(instead of chi)}
+}
+\value{
+a \link[tibble:tibble-package]{tibble} containing the episode file
+}
+\description{
+Produce the Source Episode file
+}
diff --git a/man/create_hb_cost_test_flags.Rd b/man/create_hb_cost_test_flags.Rd
index 6e2ec141f..1a0c48cf7 100644
--- a/man/create_hb_cost_test_flags.Rd
+++ b/man/create_hb_cost_test_flags.Rd
@@ -25,6 +25,7 @@ Other flag functions:
 \code{\link{create_demog_test_flags}()},
 \code{\link{create_hb_test_flags}()},
 \code{\link{create_hscp_test_flags}()},
-\code{\link{create_lca_test_flags}()}
+\code{\link{create_lca_test_flags}()},
+\code{\link{create_sending_location_test_flags}()}
 }
 \concept{flag functions}
diff --git a/man/create_hb_test_flags.Rd b/man/create_hb_test_flags.Rd
index 81e1a38e7..66eb767c0 100644
--- a/man/create_hb_test_flags.Rd
+++ b/man/create_hb_test_flags.Rd
@@ -22,6 +22,7 @@ Other flag functions:
 \code{\link{create_demog_test_flags}()},
 \code{\link{create_hb_cost_test_flags}()},
 \code{\link{create_hscp_test_flags}()},
-\code{\link{create_lca_test_flags}()}
+\code{\link{create_lca_test_flags}()},
+\code{\link{create_sending_location_test_flags}()}
 }
 \concept{flag functions}
diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd
new file mode 100644
index 000000000..4a0be24f9
--- /dev/null
+++ b/man/create_homelessness_lookup.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_lookup_homelessness.R
+\name{create_homelessness_lookup}
+\alias{create_homelessness_lookup}
+\title{Create a homelessness lookup}
+\usage{
+create_homelessness_lookup(
+  year,
+  homelessness_data = read_file(get_source_extract_path(year, "Homelessness"))
+)
+}
+\arguments{
+\item{year}{The year to process, in FY format.}
+
+\item{homelessness_data}{the processed homelessness data for
+the financial year (created with \code{\link[=process_extract_homelessness]{process_extract_homelessness()}}).}
+}
+\value{
+the final data as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+Reads in the homelessness extract and creates
+a lookup at CHI level, with one row per application start
+and end date for each CHI.
+}
+\seealso{
+Other process extracts: 
+\code{\link{process_extract_acute}()},
+\code{\link{process_extract_ae}()},
+\code{\link{process_extract_alarms_telecare}()},
+\code{\link{process_extract_care_home}()},
+\code{\link{process_extract_cmh}()},
+\code{\link{process_extract_delayed_discharges}()},
+\code{\link{process_extract_district_nursing}()},
+\code{\link{process_extract_gp_ooh}()},
+\code{\link{process_extract_home_care}()},
+\code{\link{process_extract_homelessness}()},
+\code{\link{process_extract_maternity}()},
+\code{\link{process_extract_mental_health}()},
+\code{\link{process_extract_nrs_deaths}()},
+\code{\link{process_extract_ooh_consultations}()},
+\code{\link{process_extract_ooh_diagnosis}()},
+\code{\link{process_extract_ooh_outcomes}()},
+\code{\link{process_extract_outpatients}()},
+\code{\link{process_extract_prescribing}()},
+\code{\link{process_extract_sds}()},
+\code{\link{process_it_chi_deaths}()},
+\code{\link{process_lookup_gpprac}()},
+\code{\link{process_lookup_postcode}()},
+\code{\link{process_lookup_sc_client}()},
+\code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_sc_all_alarms_telecare}()},
+\code{\link{process_sc_all_care_home}()},
+\code{\link{process_sc_all_home_care}()},
+\code{\link{process_sc_all_sds}()},
+\code{\link{read_extract_gp_ooh}()},
+\code{\link{read_it_chi_deaths}()},
+\code{\link{read_lookup_sc_client}()}
+}
+\concept{process extracts}
diff --git a/man/create_hscp_test_flags.Rd b/man/create_hscp_test_flags.Rd
index d9cc25a72..847eab6a5 100644
--- a/man/create_hscp_test_flags.Rd
+++ b/man/create_hscp_test_flags.Rd
@@ -12,7 +12,7 @@ create_hscp_test_flags(data, hscp_var)
 \item{hscp_var}{HSCP variable e.g. HSCP2019 HSCP2018}
 }
 \value{
-a dataframe with flag (1 or 0) for each HSCP
+a dataframe with flag (TRUE or FALSE) for each HSCP
 }
 \description{
 Create flags for Health & Social Care Partnerships
@@ -22,6 +22,7 @@ Other flag functions:
 \code{\link{create_demog_test_flags}()},
 \code{\link{create_hb_cost_test_flags}()},
 \code{\link{create_hb_test_flags}()},
-\code{\link{create_lca_test_flags}()}
+\code{\link{create_lca_test_flags}()},
+\code{\link{create_sending_location_test_flags}()}
 }
 \concept{flag functions}
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index fa759e7b1..4c87b0731 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -2,18 +2,19 @@
 % Please edit documentation in R/create_individual_file.R
 \name{create_individual_file}
 \alias{create_individual_file}
-\title{Create individual file}
+\title{Create the Source Individual file}
 \usage{
 create_individual_file(
   episode_file,
   year,
+  homelessness_lookup = create_homelessness_lookup(year),
   write_to_disk = TRUE,
   anon_chi_in = TRUE,
   anon_chi_out = TRUE
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 
@@ -21,7 +22,7 @@ create_individual_file(
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input
-(instead of chi)}
+(instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
 (instead of chi)}
@@ -30,5 +31,5 @@ create_individual_file(
 The processed individual file
 }
 \description{
-Creates individual file from episode file
+Creates the individual file from the episode file.
 }
diff --git a/man/create_lca_test_flags.Rd b/man/create_lca_test_flags.Rd
index 6a31477a4..136ffcb78 100644
--- a/man/create_lca_test_flags.Rd
+++ b/man/create_lca_test_flags.Rd
@@ -22,6 +22,7 @@ Other flag functions:
 \code{\link{create_demog_test_flags}()},
 \code{\link{create_hb_cost_test_flags}()},
 \code{\link{create_hb_test_flags}()},
-\code{\link{create_hscp_test_flags}()}
+\code{\link{create_hscp_test_flags}()},
+\code{\link{create_sending_location_test_flags}()}
 }
 \concept{flag functions}
diff --git a/man/create_sending_location_test_flags.Rd b/man/create_sending_location_test_flags.Rd
new file mode 100644
index 000000000..5d1ad09f7
--- /dev/null
+++ b/man/create_sending_location_test_flags.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_sending_location_test_flags.R
+\name{create_sending_location_test_flags}
+\alias{create_sending_location_test_flags}
+\title{Create sending location test flags}
+\usage{
+create_sending_location_test_flags(data, sending_location_var)
+}
+\arguments{
+\item{data}{the data containing the variable sending_location}
+
+\item{sending_location_var}{sending_location variable}
+}
+\value{
+a dataframe with flag (T or F) for each sending location
+}
+\description{
+Create flags for sending location
+}
+\seealso{
+Other flag functions: 
+\code{\link{create_demog_test_flags}()},
+\code{\link{create_hb_cost_test_flags}()},
+\code{\link{create_hb_test_flags}()},
+\code{\link{create_hscp_test_flags}()},
+\code{\link{create_lca_test_flags}()}
+}
+\concept{flag functions}
diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd
index 5308fd8d0..bb619405b 100644
--- a/man/fill_geographies.Rd
+++ b/man/fill_geographies.Rd
@@ -4,10 +4,19 @@
 \alias{fill_geographies}
 \title{Fill postcode and GP practice geographies}
 \usage{
-fill_geographies(data)
+fill_geographies(
+  data,
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode"))
+)
 }
 \arguments{
 \item{data}{the SLF}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} of the SLF with improved
diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd
index 03b64217e..4795eed7a 100644
--- a/man/fill_missing_cij_markers.Rd
+++ b/man/fill_missing_cij_markers.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{fill_missing_cij_markers}
 \alias{fill_missing_cij_markers}
 \title{Fill any missing CIJ markers for records that should have them}
diff --git a/man/flag_non_scottish_residents.Rd b/man/flag_non_scottish_residents.Rd
new file mode 100644
index 000000000..ec97dedf6
--- /dev/null
+++ b/man/flag_non_scottish_residents.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_hri_variables.R
+\name{flag_non_scottish_residents}
+\alias{flag_non_scottish_residents}
+\title{Flag non-Scottish residents}
+\usage{
+flag_non_scottish_residents(data, slf_pc_lookup)
+}
+\arguments{
+\item{data}{An SLF individual file.}
+
+\item{slf_pc_lookup}{The Source postcode lookup, defaults
+to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.}
+}
+\value{
+A data frame with the variable 'keep_flag'
+}
+\description{
+Flag non-Scottish residents
+}
+\details{
+The variable keep flag can be in the range c(0:4) where
+\itemize{
+\item{keep_flag = 0 when resident is Scottish}
+\item{keep_flag = 1 when resident is not Scottish}
+\item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing}
+\item{keep_flag = 3 when the gpprac is not English and the postcode is missing}
+\item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy}
+}
+The intention is to only keep the records where keep_flag = 0
+}
diff --git a/man/get_existing_data_for_tests.Rd b/man/get_existing_data_for_tests.Rd
index 60eb6e459..101749418 100644
--- a/man/get_existing_data_for_tests.Rd
+++ b/man/get_existing_data_for_tests.Rd
@@ -4,7 +4,11 @@
 \alias{get_existing_data_for_tests}
 \title{SLF Data for Testing}
 \usage{
-get_existing_data_for_tests(new_data, file_version = "episode")
+get_existing_data_for_tests(
+  new_data,
+  file_version = "episode",
+  anon_chi = FALSE
+)
 }
 \arguments{
 \item{new_data}{a \link[tibble:tibble-package]{tibble} of the
@@ -12,6 +16,9 @@ new data which the SLF data will be compared to.}
 
 \item{file_version}{whether to test against the "episode" file (the default)
 or the "individual" file.}
+
+\item{anon_chi}{Default set as FALSE. For use in episode tests where
+we want anon_chi instead of chi.}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} from the
diff --git a/man/get_sc_client_lookup_path.Rd b/man/get_sc_client_lookup_path.Rd
new file mode 100644
index 000000000..481baf223
--- /dev/null
+++ b/man/get_sc_client_lookup_path.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_sc_lookup_paths.R
+\name{get_sc_client_lookup_path}
+\alias{get_sc_client_lookup_path}
+\title{Social Care Client Lookup File Path}
+\usage{
+get_sc_client_lookup_path(year, update = latest_update(), ...)
+}
+\arguments{
+\item{year}{Financial year.}
+
+\item{update}{The update month to use,
+defaults to \code{\link[=latest_update]{latest_update()}}}
+
+\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}}
+}
+\value{
+The path to the social care demographic file
+as an \code{\link[fs:path]{fs::path()}}
+}
+\description{
+Get the file path for the Social Care Client lookup file
+}
+\seealso{
+\code{\link[=get_file_path]{get_file_path()}} for the generic function.
+
+Other social care lookup file paths: 
+\code{\link{get_sc_demog_lookup_path}()}
+}
+\concept{social care lookup file paths}
diff --git a/man/get_sc_demog_lookup_path.Rd b/man/get_sc_demog_lookup_path.Rd
index 8e45e9731..7b39cf287 100644
--- a/man/get_sc_demog_lookup_path.Rd
+++ b/man/get_sc_demog_lookup_path.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_sc_demog_path.R
+% Please edit documentation in R/get_sc_lookup_paths.R
 \name{get_sc_demog_lookup_path}
 \alias{get_sc_demog_lookup_path}
 \title{Social Care Demographic Lookup File Path}
@@ -21,5 +21,8 @@ Get the file path for the Social Care Demographic lookup file
 }
 \seealso{
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
+
+Other social care lookup file paths: 
+\code{\link{get_sc_client_lookup_path}()}
 }
 \concept{social care lookup file paths}
diff --git a/man/get_slf_ep_temp_path.Rd b/man/get_slf_ep_temp_path.Rd
deleted file mode 100644
index 44e1a44db..000000000
--- a/man/get_slf_ep_temp_path.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_temp_file_paths.R
-\name{get_slf_ep_temp_path}
-\alias{get_slf_ep_temp_path}
-\title{Get a temporary version of the SLF episode file}
-\usage{
-get_slf_ep_temp_path(year, temp_version)
-}
-\arguments{
-\item{year}{The financial year}
-
-\item{temp_version}{The temp version e.g. 1 or 7}
-}
-\value{
-The path to the file (\code{.rds})
-}
-\description{
-Get a temporary version of the SLF episode file
-}
diff --git a/man/get_slf_indiv_temp_path.Rd b/man/get_slf_indiv_temp_path.Rd
deleted file mode 100644
index 6ff1c70bd..000000000
--- a/man/get_slf_indiv_temp_path.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_temp_file_paths.R
-\name{get_slf_indiv_temp_path}
-\alias{get_slf_indiv_temp_path}
-\title{Get a temporary version of the SLF individual file}
-\usage{
-get_slf_indiv_temp_path(year, temp_version)
-}
-\arguments{
-\item{year}{The financial year}
-
-\item{temp_version}{The temp version e.g. 1 or 7}
-}
-\value{
-The path to the file (\code{.rds})
-}
-\description{
-Get a temporary version of the SLF individual file
-}
diff --git a/man/get_slf_temp_path.Rd b/man/get_slf_temp_path.Rd
deleted file mode 100644
index 31f4dde38..000000000
--- a/man/get_slf_temp_path.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_temp_file_paths.R
-\name{get_slf_temp_path}
-\alias{get_slf_temp_path}
-\title{Get a temporary version of the SLF}
-\usage{
-get_slf_temp_path(
-  year,
-  temp_version,
-  file_version = c("episode", "individual")
-)
-}
-\arguments{
-\item{year}{The financial year}
-
-\item{temp_version}{The temp version e.g. 1 or 7}
-
-\item{file_version}{Episode or Individual file}
-}
-\value{
-The path to the file (\code{.rds})
-}
-\description{
-Get a temporary version of the SLF
-}
diff --git a/man/get_source_extract_path.Rd b/man/get_source_extract_path.Rd
index e51cbb2c7..fd9502b83 100644
--- a/man/get_source_extract_path.Rd
+++ b/man/get_source_extract_path.Rd
@@ -6,8 +6,8 @@
 \usage{
 get_source_extract_path(
   year,
-  type = c("Acute", "AE", "AT", "CH", "Client", "CMH", "DD", "Deaths", "DN", "GPOoH",
-    "HC", "Homelessness", "Maternity", "MH", "Outpatients", "PIS", "SDS"),
+  type = c("Acute", "AE", "AT", "CH", "CMH", "DD", "Deaths", "DN", "GPOoH", "HC",
+    "Homelessness", "Maternity", "MH", "Outpatients", "PIS", "SDS"),
   ...
 )
 }
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 445dcd7c0..3ef549cc3 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -1,10 +1,18 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{join_cohort_lookups}
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
 \usage{
-join_cohort_lookups(data, year, update = latest_update())
+join_cohort_lookups(
+  data,
+  year,
+  update = latest_update(),
+  demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select =
+    c("chi", "demographic_cohort")),
+  service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select =
+    c("chi", "service_use_cohort"))
+)
 }
 \arguments{
 \item{data}{The in-progress episode file data.}
@@ -12,6 +20,8 @@ join_cohort_lookups(data, year, update = latest_update())
 \item{year}{The year to process, in FY format.}
 
 \item{update}{The update to use}
+
+\item{demographic_cohort, service_use_cohort}{The cohort data}
 }
 \value{
 The data including the Demographic and Service Use lookups.
diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd
index 6508d7893..f3b68fe1a 100644
--- a/man/join_deaths_data.Rd
+++ b/man/join_deaths_data.Rd
@@ -7,7 +7,7 @@
 join_deaths_data(
   data,
   year,
-  slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
 )
 }
 \arguments{
@@ -15,7 +15,7 @@ join_deaths_data(
 
 \item{year}{financial year, e.g. '1920'}
 
-\item{slf_deaths_lookup_path}{Path to slf deaths lookup.}
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
 }
 \value{
 The data including the deaths lookup matched
diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd
index a30719698..465126dba 100644
--- a/man/join_sc_client.Rd
+++ b/man/join_sc_client.Rd
@@ -7,7 +7,7 @@
 join_sc_client(
   individual_file,
   year,
-  sc_client = read_file(get_source_extract_path(year, "Client")),
+  sc_client = read_file(get_sc_client_lookup_path(year)),
   sc_demographics = read_file(get_sc_demog_lookup_path(), col_select =
     c("sending_location", "social_care_id", "chi"))
 )
diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd
index cee9cc440..509b0e00c 100644
--- a/man/load_ep_file_vars.Rd
+++ b/man/load_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{load_ep_file_vars}
 \alias{load_ep_file_vars}
 \title{Load the unneeded episode file variables}
diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd
index 0c7e7fb53..e0def00cc 100644
--- a/man/match_on_ltcs.Rd
+++ b/man/match_on_ltcs.Rd
@@ -4,12 +4,14 @@
 \alias{match_on_ltcs}
 \title{Match on LTC DoB and dates of LTC incidence}
 \usage{
-match_on_ltcs(data, year)
+match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year)))
 }
 \arguments{
 \item{data}{episode files}
 
 \item{year}{financial year, e.g. '1920'}
+
+\item{ltc_data}{The LTC data for the year}
 }
 \value{
 data matched with long term conditions
diff --git a/man/phs_db_connection.Rd b/man/phs_db_connection.Rd
index 93e73ee55..8ff9d0a32 100644
--- a/man/phs_db_connection.Rd
+++ b/man/phs_db_connection.Rd
@@ -4,19 +4,23 @@
 \alias{phs_db_connection}
 \title{Open a connection to a PHS database}
 \usage{
-phs_db_connection(dsn, username = Sys.getenv("USER"))
+phs_db_connection(dsn, username)
 }
 \arguments{
-\item{dsn}{The Data Source Name passed on to \code{odbc::dbconnect}
-the dsn must be setup first. e.g. SMRA or DVPROD}
+\item{dsn}{The Data Source Name (DSN) passed on to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}}
+the DSN must be set up first. e.g. \code{SMRA} or \code{DVPROD}}
 
 \item{username}{The username to use for authentication,
-if not supplied it first will check the environment variable
-and finally ask the user for input.}
+if not supplied it will try to find it automatically and if possible ask the
+user for input.}
 }
 \value{
-a connection to the specified dsn
+a connection to the specified Data Source.
 }
 \description{
-Opens a connection to PHS database to allow data to be collected
+Opens a connection to PHS database given a Data Source Name
+(DSN) it will try to get the username, asking for input if in an interactive
+session. It will also use \link[keyring:keyring-package]{keyring} to find
+an existing keyring called 'createslf' which should contain a \code{db_password}
+key with the users database password.
 }
diff --git a/man/process_costs_ch_rmd.Rd b/man/process_costs_ch_rmd.Rd
index 520898c9e..b990564b7 100644
--- a/man/process_costs_ch_rmd.Rd
+++ b/man/process_costs_ch_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 care homes cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_dn_rmd.Rd b/man/process_costs_dn_rmd.Rd
index bde475d5a..46bcd93dd 100644
--- a/man/process_costs_dn_rmd.Rd
+++ b/man/process_costs_dn_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 District Nursing cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_gp_ooh_rmd.Rd b/man/process_costs_gp_ooh_rmd.Rd
index fd71066c0..f5c611f11 100644
--- a/man/process_costs_gp_ooh_rmd.Rd
+++ b/man/process_costs_gp_ooh_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 GP ooh cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_hc_rmd.Rd b/man/process_costs_hc_rmd.Rd
index b15c311da..c3448bcbc 100644
--- a/man/process_costs_hc_rmd.Rd
+++ b/man/process_costs_hc_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 Home Care cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd
index af6b85bfe..77a99cef3 100644
--- a/man/process_extract_acute.Rd
+++ b/man/process_extract_acute.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 acute extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
 \code{\link{process_extract_care_home}()},
diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd
index 58878e689..9eec39ba5 100644
--- a/man/process_extract_ae.Rd
+++ b/man/process_extract_ae.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 A&E extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_alarms_telecare}()},
 \code{\link{process_extract_care_home}()},
diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd
index a6e61365d..7305b7b49 100644
--- a/man/process_extract_alarms_telecare.Rd
+++ b/man/process_extract_alarms_telecare.Rd
@@ -29,10 +29,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Alarms Telecare extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_care_home}()},
diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd
index f058ca787..7eed509d8 100644
--- a/man/process_extract_care_home.Rd
+++ b/man/process_extract_care_home.Rd
@@ -32,10 +32,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Care Home extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd
index 147651f37..64e085dcf 100644
--- a/man/process_extract_cmh.Rd
+++ b/man/process_extract_cmh.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 CMH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd
index ddc41ec46..c6fd560a7 100644
--- a/man/process_extract_delayed_discharges.Rd
+++ b/man/process_extract_delayed_discharges.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 delayed discharges extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd
index 4d9383c2e..eb2814fbc 100644
--- a/man/process_extract_district_nursing.Rd
+++ b/man/process_extract_district_nursing.Rd
@@ -27,10 +27,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 District Nursing extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd
index 8217f0d6f..ddec006fe 100644
--- a/man/process_extract_gp_ooh.Rd
+++ b/man/process_extract_gp_ooh.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd
index 4fef5ac14..e4e02fdad 100644
--- a/man/process_extract_home_care.Rd
+++ b/man/process_extract_home_care.Rd
@@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Home Care extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd
index 7531f8f22..7b2254050 100644
--- a/man/process_extract_homelessness.Rd
+++ b/man/process_extract_homelessness.Rd
@@ -31,10 +31,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 homelessness extract, it will return the final data
-and optionally write it out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd
index cd01e6931..17dd1a64c 100644
--- a/man/process_extract_maternity.Rd
+++ b/man/process_extract_maternity.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 maternity extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd
index 7159aae8b..5f1fc7330 100644
--- a/man/process_extract_mental_health.Rd
+++ b/man/process_extract_mental_health.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 mental health extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd
index 724af5d87..1938e15ec 100644
--- a/man/process_extract_nrs_deaths.Rd
+++ b/man/process_extract_nrs_deaths.Rd
@@ -23,6 +23,7 @@ final data and write this out.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd
index d682197ca..e00155191 100644
--- a/man/process_extract_ooh_consultations.Rd
+++ b/man/process_extract_ooh_consultations.Rd
@@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Consultations extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd
index 2a962989a..2dcbee647 100644
--- a/man/process_extract_ooh_diagnosis.Rd
+++ b/man/process_extract_ooh_diagnosis.Rd
@@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Diagnosis extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd
index 5b220e04a..31ec64439 100644
--- a/man/process_extract_ooh_outcomes.Rd
+++ b/man/process_extract_ooh_outcomes.Rd
@@ -17,10 +17,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Outcomes extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd
index c5e10abc8..3a46ad119 100644
--- a/man/process_extract_outpatients.Rd
+++ b/man/process_extract_outpatients.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 outpatients extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd
index cf294d95c..195a60bfe 100644
--- a/man/process_extract_prescribing.Rd
+++ b/man/process_extract_prescribing.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 prescribing extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd
index 7e8e44a38..70742bd2e 100644
--- a/man/process_extract_sds.Rd
+++ b/man/process_extract_sds.Rd
@@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) SDS extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_it_chi_deaths.Rd b/man/process_it_chi_deaths.Rd
index f19d8b6cc..1d8e085ab 100644
--- a/man/process_it_chi_deaths.Rd
+++ b/man/process_it_chi_deaths.Rd
@@ -21,6 +21,7 @@ final data and write the data out.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_lookup_gpprac.Rd b/man/process_lookup_gpprac.Rd
index 4f19f85e1..107af24c0 100644
--- a/man/process_lookup_gpprac.Rd
+++ b/man/process_lookup_gpprac.Rd
@@ -30,6 +30,7 @@ the final data and also write this out to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd
index 6ad56e5b3..e556efd51 100644
--- a/man/process_lookup_postcode.Rd
+++ b/man/process_lookup_postcode.Rd
@@ -27,10 +27,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 postcode lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index e48426419..ceb3caf15 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -20,10 +20,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care client lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd
index 6c00b4352..a89933425 100644
--- a/man/process_lookup_sc_demographics.Rd
+++ b/man/process_lookup_sc_demographics.Rd
@@ -24,10 +24,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care demographic lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index 7e21407f9..1dded751d 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Alarms Telecare extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index 691fe51db..37d6332ca 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -36,10 +36,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Care Home extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index d498514db..1e0afcafd 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all home care extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index f2d6d8a1c..69d79fc9d 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -21,10 +21,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all SDS extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/process_tests_sc_client_lookup.Rd b/man/process_tests_sc_client_lookup.Rd
new file mode 100644
index 000000000..7d115e2d3
--- /dev/null
+++ b/man/process_tests_sc_client_lookup.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_sc_client_lookup.R
+\name{process_tests_sc_client_lookup}
+\alias{process_tests_sc_client_lookup}
+\title{Social care client lookup tests}
+\usage{
+process_tests_sc_client_lookup(data, year)
+}
+\arguments{
+\item{data}{a \link[tibble:tibble-package]{tibble} of the processed data extract.}
+
+\item{year}{the financial year of the extract in the format '1718'.}
+}
+\value{
+a \link[tibble:tibble-package]{tibble} containing a test comparison.
+}
+\description{
+This script takes the processed social care client lookup and
+produces a test comparison with the previous data. This is written to
+disk in the tests workbook.
+}
diff --git a/man/produce_sc_ch_episodes_tests.Rd b/man/produce_sc_ch_episodes_tests.Rd
index f07dbe382..60fd9c9a9 100644
--- a/man/produce_sc_ch_episodes_tests.Rd
+++ b/man/produce_sc_ch_episodes_tests.Rd
@@ -20,6 +20,7 @@ Produce the test for the Care Home all episodes
 Other social care test functions: 
 \code{\link{produce_sc_demog_lookup_tests}()},
 \code{\link{produce_source_at_tests}()},
-\code{\link{produce_source_sds_tests}()}
+\code{\link{produce_source_sds_tests}()},
+\code{\link{produce_tests_sc_client_lookup}()}
 }
 \concept{social care test functions}
diff --git a/man/produce_sc_demog_lookup_tests.Rd b/man/produce_sc_demog_lookup_tests.Rd
index 36d139955..a214f1ece 100644
--- a/man/produce_sc_demog_lookup_tests.Rd
+++ b/man/produce_sc_demog_lookup_tests.Rd
@@ -20,6 +20,7 @@ Produce the tests for Social Care Demographic Lookup
 Other social care test functions: 
 \code{\link{produce_sc_ch_episodes_tests}()},
 \code{\link{produce_source_at_tests}()},
-\code{\link{produce_source_sds_tests}()}
+\code{\link{produce_source_sds_tests}()},
+\code{\link{produce_tests_sc_client_lookup}()}
 }
 \concept{social care test functions}
diff --git a/man/produce_source_at_tests.Rd b/man/produce_source_at_tests.Rd
index 06efaa426..96033fe0d 100644
--- a/man/produce_source_at_tests.Rd
+++ b/man/produce_source_at_tests.Rd
@@ -25,6 +25,7 @@ Produce the test for the Alarm Telecare all episodes
 Other social care test functions: 
 \code{\link{produce_sc_ch_episodes_tests}()},
 \code{\link{produce_sc_demog_lookup_tests}()},
-\code{\link{produce_source_sds_tests}()}
+\code{\link{produce_source_sds_tests}()},
+\code{\link{produce_tests_sc_client_lookup}()}
 }
 \concept{social care test functions}
diff --git a/man/produce_source_dn_tests.Rd b/man/produce_source_dn_tests.Rd
index 779dbb3bc..52ebbd611 100644
--- a/man/produce_source_dn_tests.Rd
+++ b/man/produce_source_dn_tests.Rd
@@ -35,9 +35,8 @@ It will also produce various summary statistics for bedday, cost and
 episode date variables.
 }
 \seealso{
-\code{\link[=create_hb_test_flags]{create_hb_test_flags()}}
-#' \code{\link[=create_hscp_test_flags]{create_hscp_test_flags()}} and \code{\link[=create_hb_cost_test_flags]{create_hb_cost_test_flags()}}
-for creating test flags
+\code{\link[=create_hb_test_flags]{create_hb_test_flags()}}, \code{\link[=create_hscp_test_flags]{create_hscp_test_flags()}}
+and \code{\link[=create_hb_cost_test_flags]{create_hb_cost_test_flags()}} for creating test flags.
 
 calculate_measures
 
diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd
index 679132127..97984103a 100644
--- a/man/produce_source_extract_tests.Rd
+++ b/man/produce_source_extract_tests.Rd
@@ -7,7 +7,8 @@
 produce_source_extract_tests(
   data,
   sum_mean_vars = c("beddays", "cost", "yearstay"),
-  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay")
+  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"),
+  add_hscp_count = TRUE
 )
 }
 \arguments{
@@ -17,6 +18,8 @@ produce_source_extract_tests(
 \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}}
 
 \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}}
+
+\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.}
 }
 \value{
 a dataframe with a count of each flag
diff --git a/man/produce_source_sds_tests.Rd b/man/produce_source_sds_tests.Rd
index 6c0cefa46..b4cbc8d41 100644
--- a/man/produce_source_sds_tests.Rd
+++ b/man/produce_source_sds_tests.Rd
@@ -26,6 +26,7 @@ Produce the test for the SDS all episodes
 Other social care test functions: 
 \code{\link{produce_sc_ch_episodes_tests}()},
 \code{\link{produce_sc_demog_lookup_tests}()},
-\code{\link{produce_source_at_tests}()}
+\code{\link{produce_source_at_tests}()},
+\code{\link{produce_tests_sc_client_lookup}()}
 }
 \concept{social care test functions}
diff --git a/man/produce_tests_sc_client_lookup.Rd b/man/produce_tests_sc_client_lookup.Rd
new file mode 100644
index 000000000..08c5edbad
--- /dev/null
+++ b/man/produce_tests_sc_client_lookup.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_sc_client_lookup.R
+\name{produce_tests_sc_client_lookup}
+\alias{produce_tests_sc_client_lookup}
+\title{Social care Client lookup Tests}
+\usage{
+produce_tests_sc_client_lookup(data)
+}
+\arguments{
+\item{data}{new or old data for testing summary flags
+(data is from \code{\link[=get_source_extract_path]{get_source_extract_path()}})}
+
+\item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}}
+}
+\value{
+a dataframe with a count of each flag.
+}
+\description{
+Produce the test for the social care Client all episodes
+}
+\seealso{
+Other social care test functions: 
+\code{\link{produce_sc_ch_episodes_tests}()},
+\code{\link{produce_sc_demog_lookup_tests}()},
+\code{\link{produce_source_at_tests}()},
+\code{\link{produce_source_sds_tests}()}
+}
+\concept{social care test functions}
diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd
index eae6c52dc..233844074 100644
--- a/man/read_extract_gp_ooh.Rd
+++ b/man/read_extract_gp_ooh.Rd
@@ -26,10 +26,11 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/read_file.Rd b/man/read_file.Rd
index b8231218f..1ef351342 100644
--- a/man/read_file.Rd
+++ b/man/read_file.Rd
@@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...)
 \link[tidyselect:eval_select]{tidy selection specification}
 of columns, as used in \code{dplyr::select()}.}
 
-\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+\item{as_data_frame}{Should the function return a \code{tibble} (default) or
 an Arrow \link[arrow]{Table}?}
 
 \item{...}{Addition arguments passed to the relevant function.}
diff --git a/man/read_it_chi_deaths.Rd b/man/read_it_chi_deaths.Rd
index e2b9e4c40..d1bfe5cf7 100644
--- a/man/read_it_chi_deaths.Rd
+++ b/man/read_it_chi_deaths.Rd
@@ -17,6 +17,7 @@ This will read the CHI deaths extract and return the data.
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/read_lookup_sc_client.Rd b/man/read_lookup_sc_client.Rd
index 6579fa9f7..283bc6a9a 100644
--- a/man/read_lookup_sc_client.Rd
+++ b/man/read_lookup_sc_client.Rd
@@ -5,14 +5,14 @@
 \title{Process the social care client lookup}
 \usage{
 read_lookup_sc_client(
-  sc_dvprod_connection = phs_db_connection(dsn = "DVPROD"),
-  fyyear
+  fyyear,
+  sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")
 )
 }
 \arguments{
-\item{sc_dvprod_connection}{The connection to the SC platform.}
-
 \item{fyyear}{The year to process, in the standard format '1718'}
+
+\item{sc_dvprod_connection}{The connection to the SC platform.}
 }
 \value{
 the final data as a \link[tibble:tibble-package]{tibble}.
@@ -23,6 +23,7 @@ social care client lookup
 }
 \seealso{
 Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
 \code{\link{process_extract_acute}()},
 \code{\link{process_extract_ae}()},
 \code{\link{process_extract_alarms_telecare}()},
diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd
index 526d2829d..aaa28e6eb 100644
--- a/man/recode_gender.Rd
+++ b/man/recode_gender.Rd
@@ -7,7 +7,7 @@
 recode_gender(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Recode gender to 1.5 if 0 or 9.
diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd
index 9cba40a8f..b290dd1e7 100644
--- a/man/remove_blank_chi.Rd
+++ b/man/remove_blank_chi.Rd
@@ -7,7 +7,7 @@
 remove_blank_chi(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Convert blank strings to NA and remove NAs from CHI column
diff --git a/man/run_episode_file.Rd b/man/run_episode_file.Rd
deleted file mode 100644
index 59d5fea1d..000000000
--- a/man/run_episode_file.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
-\name{run_episode_file}
-\alias{run_episode_file}
-\title{Produce the Source Episode file}
-\usage{
-run_episode_file(
-  processed_data_list,
-  year,
-  write_to_disk = TRUE,
-  anon_chi_out = TRUE
-)
-}
-\arguments{
-\item{processed_data_list}{containing data from processed extracts.}
-
-\item{year}{The year to process, in FY format.}
-
-\item{write_to_disk}{(optional) Should the data be written to disk default is
-\code{TRUE} i.e. write the data to disk.}
-
-\item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi)}
-}
-\value{
-a \link[tibble:tibble-package]{tibble} containing the episode file
-}
-\description{
-Produce the Source Episode file
-}
diff --git a/man/select.Rd b/man/select.Rd
deleted file mode 100644
index 435096d9a..000000000
--- a/man/select.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/aggregate_by_chi_zihao.R
-\name{vars_end_with}
-\alias{vars_end_with}
-\alias{vars_start_with}
-\alias{vars_contain}
-\title{select columns ending with some patterns}
-\usage{
-vars_end_with(data, vars, ignore_case = FALSE)
-
-vars_start_with(data, vars, ignore_case = FALSE)
-
-vars_contain(data, vars, ignore_case = FALSE)
-}
-\description{
-select columns ending with some patterns
-
-select columns starting with some patterns
-
-select columns contains some characters
-}
-\section{Functions}{
-\itemize{
-\item \code{vars_end_with()}: columns based on patterns
-
-\item \code{vars_start_with()}: columns based on patterns
-
-\item \code{vars_contain()}: columns based on patterns
-
-}}
diff --git a/man/setup_keyring.Rd b/man/setup_keyring.Rd
new file mode 100644
index 000000000..c40ef31c1
--- /dev/null
+++ b/man/setup_keyring.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_connection_PHS_database.R
+\name{setup_keyring}
+\alias{setup_keyring}
+\title{Interactively set up the keyring}
+\usage{
+setup_keyring(
+  keyring = "createslf",
+  key = "db_password",
+  keyring_exists = FALSE,
+  key_exists = FALSE,
+  env_var_pass_exists = FALSE
+)
+}
+\arguments{
+\item{keyring}{Name of the keyring}
+
+\item{key}{Name of the key}
+
+\item{keyring_exists}{Does the keyring already exist}
+
+\item{key_exists}{Does the key already exist}
+
+\item{env_var_pass_exists}{Does the password for the keyring already exist
+in the environment.}
+}
+\value{
+NULL (invisibly)
+}
+\description{
+This is meant to be used with \code{\link[=phs_db_connection]{phs_db_connection()}}, it can only be used
+interactively i.e. not in targets or in a workbench job.
+
+With the default options it will go through the steps to set up a keyring
+which can be used to supply passwords to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} (or others) in a
+secure and seamless way.
+\enumerate{
+\item Create an .Renviron file in the project and add a password (for the
+keyring) to it.
+\item Create a keyring with the password - Since we have saved the password as
+an environment variable it can be picked unlocked and used automatically.
+\item Add the database password to the keyring.
+}
+}
diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd
index 06316aac1..880266d58 100644
--- a/man/store_ep_file_vars.Rd
+++ b/man/store_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{store_ep_file_vars}
 \alias{store_ep_file_vars}
 \title{Store the unneeded episode file variables}
diff --git a/man/vars_select.Rd b/man/vars_select.Rd
new file mode 100644
index 000000000..22222ac22
--- /dev/null
+++ b/man/vars_select.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aggregate_by_chi.R
+\name{vars_end_with}
+\alias{vars_end_with}
+\alias{vars_start_with}
+\alias{vars_contain}
+\title{Select columns according to a pattern}
+\usage{
+vars_end_with(data, vars, ignore_case = FALSE)
+
+vars_start_with(data, vars, ignore_case = FALSE)
+
+vars_contain(data, vars, ignore_case = FALSE)
+}
+\arguments{
+\item{data}{The data from which to select columns/variables.}
+
+\item{vars}{The variables / pattern to find, as a character vector}
+
+\item{ignore_case}{Should case be ignored (Default: FALSE)}
+}
+\description{
+Select columns according to a pattern
+}
+\section{Functions}{
+\itemize{
+\item \code{vars_end_with()}: Choose variables ending in a given pattern.
+
+\item \code{vars_start_with()}: Choose variables starting with a given pattern.
+
+\item \code{vars_contain()}: Choose variables which contain a given pattern.
+
+}}
diff --git a/run_targets_1718.R b/run_targets_1718.R
new file mode 100644
index 000000000..ebc58895f
--- /dev/null
+++ b/run_targets_1718.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1718"))
+)
diff --git a/run_targets_1819.R b/run_targets_1819.R
new file mode 100644
index 000000000..83bbcedef
--- /dev/null
+++ b/run_targets_1819.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1819"))
+)
diff --git a/run_targets_1920.R b/run_targets_1920.R
new file mode 100644
index 000000000..1640d1900
--- /dev/null
+++ b/run_targets_1920.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1920"))
+)
diff --git a/run_targets_2021.R b/run_targets_2021.R
new file mode 100644
index 000000000..80749e81a
--- /dev/null
+++ b/run_targets_2021.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2021"))
+)
diff --git a/run_targets_2122.R b/run_targets_2122.R
new file mode 100644
index 000000000..aa95d7b24
--- /dev/null
+++ b/run_targets_2122.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2122"))
+)
diff --git a/run_targets_2223.R b/run_targets_2223.R
new file mode 100644
index 000000000..2ded7d5fd
--- /dev/null
+++ b/run_targets_2223.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2223"))
+)
diff --git a/run_targets_2324.R b/run_targets_2324.R
new file mode 100644
index 000000000..b875984f4
--- /dev/null
+++ b/run_targets_2324.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2324"))
+)
diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md
new file mode 100644
index 000000000..1fa02dc14
--- /dev/null
+++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md
@@ -0,0 +1,12 @@
+# Can convert a SC sending location to lca code
+
+    Code
+      convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L,
+        170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L,
+        300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L,
+        0L, NA_integer_))
+    Output
+       [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15"
+      [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
+      [31] "31" "32" NA   NA   NA  
+
diff --git a/tests/testthat/test-00-update_refs.R b/tests/testthat/test-00-update_refs.R
index a1cabf9c5..4f3ef4ed1 100644
--- a/tests/testthat/test-00-update_refs.R
+++ b/tests/testthat/test-00-update_refs.R
@@ -13,7 +13,7 @@ test_that("Previous Update string looks valid", {
 })
 
 test_that("Previous Update works for different month values", {
-  expect_equal(previous_update(0), latest_update())
+  expect_equal(previous_update(months_ago = 0L), latest_update())
 
   latest_update_month <- lubridate::month(
     lubridate::my(latest_update()),
diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R
index ca0738c89..eda74dbdf 100644
--- a/tests/testthat/test-check_year_valid.R
+++ b/tests/testthat/test-check_year_valid.R
@@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", {
   expect_true(check_year_valid("1920", "NSU"))
   expect_true(check_year_valid("2021", "NSU"))
   expect_true(check_year_valid("2122", "NSU"))
-  expect_false(check_year_valid("2223", "NSU"))
+  expect_true(check_year_valid("2223", "NSU"))
+  expect_false(check_year_valid("2324", "NSU"))
 
   # SPARRA
   expect_false(check_year_valid("1415", "SPARRA"))
diff --git a/tests/testthat/test-compute_mid_year_age.R b/tests/testthat/test-compute_mid_year_age.R
new file mode 100644
index 000000000..a4a542b9e
--- /dev/null
+++ b/tests/testthat/test-compute_mid_year_age.R
@@ -0,0 +1,16 @@
+test_that("Accurately compute mid year age", {
+  expect_equal(
+    compute_mid_year_age("1718", lubridate::make_date("2000")),
+    phsmethods::age_calculate(
+      lubridate::make_date("2000"),
+      lubridate::make_date("2017", 9L, 30L)
+    )
+  )
+  expect_equal(
+    compute_mid_year_age("2021", lubridate::make_date("1999") + 1:1000),
+    phsmethods::age_calculate(
+      lubridate::make_date("1999") + 1:1000,
+      lubridate::make_date("2020", 9L, 30L)
+    )
+  )
+})
diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R
new file mode 100644
index 000000000..eb66802a6
--- /dev/null
+++ b/tests/testthat/test-convert_sending_location_to_lca.R
@@ -0,0 +1,52 @@
+test_that("Can convert a SC sending location to lca code", {
+  expect_snapshot(
+    convert_sending_location_to_lca(
+      c(
+        100L,
+        110L,
+        120L,
+        130L,
+        355L,
+        150L,
+        395L,
+        170L,
+        180L,
+        190L,
+        200L,
+        210L,
+        220L,
+        230L,
+        240L,
+        250L,
+        260L,
+        270L,
+        280L,
+        290L,
+        300L,
+        310L,
+        320L,
+        330L,
+        340L,
+        350L,
+        360L,
+        370L,
+        380L,
+        390L,
+        400L,
+        235L,
+        999L,
+        0L,
+        NA_integer_
+      )
+    )
+  )
+})
+
+test_that("Errors on unexpected input", {
+  expect_error(
+    convert_sending_location_to_lca("100")
+  )
+  expect_error(
+    convert_sending_location_to_lca(c("100", 99L))
+  )
+})
diff --git a/tests/testthat/test-create_service_use_lookup.R b/tests/testthat/test-create_service_use_lookup.R
index a58741e63..bb6abf6f9 100644
--- a/tests/testthat/test-create_service_use_lookup.R
+++ b/tests/testthat/test-create_service_use_lookup.R
@@ -165,7 +165,29 @@ test_that("Costs are assigned correctly", {
   # Operation flag
   expect_equal(
     add_operation_flag(dummy_data[["op1a"]]),
-    c(F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T)
+    c(
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      TRUE
+    )
   )
 
   dummy_data <- tibble::tribble(
@@ -181,7 +203,7 @@ test_that("Costs are assigned correctly", {
       dummy_data[["acute_elective_cost"]],
       dummy_data[["elective_inpatient_cost"]]
     ),
-    c(T, T, T, F)
+    c(TRUE, TRUE, TRUE, FALSE)
   )
 })
 
@@ -241,29 +263,42 @@ dummy_data <- tibble::tribble(
   ~psychiatry_cost, ~maternity_cost, ~geriatric_cost, ~elective_inpatient_flag, ~elective_instances,
   ~emergency_instances, ~prescribing_cost, ~outpatient_cost, ~care_home_cost, ~community_health_cost,
   ~ae2_cost,
-  10, 0, 0, F, 0, 0, 0, 0, 0, 0, 0,
-  0, 10, 0, F, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 10, F, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, T, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, F, 2, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, F, 15, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, F, 0, 1, 0, 0, 0, 0, 0,
-  0, 0, 0, F, 0, 4, 0, 0, 0, 0, 0,
-  0, 0, 0, F, 0, 0, 10, 0, 0, 0, 0,
-  0, 0, 0, F, 0, 0, 0, 10, 0, 0, 0,
-  0, 0, 0, F, 0, 0, 0, 0, 10, 0, 0,
-  0, 0, 0, F, 0, 0, 0, 0, 0, 10, 0,
-  0, 0, 0, F, 0, 0, 0, 0, 0, 0, 10,
-  0, 0, 0, F, 3.5, 0, 0, 0, 0, 0, 0,
-  10, 10, 10, T, 10, 10, 10, 10, 10, 10, 10
+  10, 0, 0, FALSE, 0, 0, 0, 0, 0, 0, 0,
+  0, 10, 0, FALSE, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 10, FALSE, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, TRUE, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 2, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 15, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 0, 1, 0, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 0, 4, 0, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 0, 0, 10, 0, 0, 0, 0,
+  0, 0, 0, FALSE, 0, 0, 0, 10, 0, 0, 0,
+  0, 0, 0, FALSE, 0, 0, 0, 0, 10, 0, 0,
+  0, 0, 0, FALSE, 0, 0, 0, 0, 0, 10, 0,
+  0, 0, 0, FALSE, 0, 0, 0, 0, 0, 0, 10,
+  0, 0, 0, FALSE, 3.5, 0, 0, 0, 0, 0, 0,
+  10, 10, 10, TRUE, 10, 10, 10, 10, 10, 10, 10
 )
 
 test_that("Psychiatry cohort is assigned correctly", {
   expect_equal(
     assign_s_cohort_psychiatry(dummy_data[["psychiatry_cost"]]),
     c(
-      TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
-      FALSE, FALSE, FALSE, FALSE, FALSE, TRUE
+      TRUE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      FALSE,
+      TRUE
     )
   )
 })
@@ -388,13 +423,13 @@ test_that("Recalculated costs are calculated correctly", {
     ~elective_inpatient_cohort, ~limited_daycases_cohort, ~routine_daycase_cohort,
     ~single_emergency_cohort, ~multiple_emergency_cohort, ~community_care_cohort,
     ~acute_elective_cost, ~acute_emergency_cost, ~community_health_cost, ~cost_total_net,
-    T, F, F, F, F, F, 10, 0, 0, 10,
-    F, T, F, F, F, F, 10, 0, 0, 10,
-    F, F, T, F, F, F, 10, 0, 0, 10,
-    F, F, F, T, F, F, 0, 10, 0, 10,
-    F, F, F, F, T, F, 0, 10, 0, 10,
-    F, F, F, F, F, T, 0, 0, 10, 10,
-    T, T, T, T, T, T, 10, 20, 30, 10
+    TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 10, 0, 0, 10,
+    FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 10, 0, 0, 10,
+    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 10, 0, 0, 10,
+    FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 0, 10, 0, 10,
+    FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 0, 10, 0, 10,
+    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 0, 0, 10, 10,
+    TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 10, 20, 30, 10
   )
 
   # Elective
@@ -448,7 +483,7 @@ test_that("Recalculated costs are calculated correctly", {
   # Residential care (not used)
   expect_equal(
     calculate_residential_care_cost(),
-    c(0)
+    0.0
   )
 })
 
diff --git a/tests/testthat/test-flag_non_scottish_residents.R b/tests/testthat/test-flag_non_scottish_residents.R
new file mode 100644
index 000000000..b61d9e159
--- /dev/null
+++ b/tests/testthat/test-flag_non_scottish_residents.R
@@ -0,0 +1,26 @@
+test_that("Records are flagged correctly", {
+  test_frame <- tibble::tribble(
+    ~postcode, ~gpprac,
+    # Scottish resident
+    "AB1 1AA", 18574,
+    # Dummy postcode and missing gpprac
+    "BF010AA", NA,
+    # Dummy postcode and missing gpprac (2)
+    "ZZ014AA", NA,
+    # Missing postcode and missing gpprac
+    NA, NA,
+    # Not English practice and missing postcode
+    NA, 18574,
+    # Not English practice and dummy postcode
+    "NF1 1AB", 18574,
+    # English postcode and English gpprac
+    "BS4 4RG", 99942
+  )
+
+  test_frame_flagged <- flag_non_scottish_residents(test_frame)
+
+  expect_equal(
+    test_frame_flagged$keep_flag,
+    c(0, 2, 2, 2, 3, 4, 1)
+  )
+})
diff --git a/tests/testthat/test-get_file_paths.R b/tests/testthat/test-get_file_paths.R
index 2bec746f7..a3b29a290 100644
--- a/tests/testthat/test-get_file_paths.R
+++ b/tests/testthat/test-get_file_paths.R
@@ -1,3 +1,28 @@
+test_that("Errors properly", {
+  expect_error(
+    get_file_path(directory = "foo", file_name = "bar"),
+    "The directory .+? does not exist\\."
+  )
+
+  expect_error(
+    get_file_path(
+      directory = ".",
+      file_name_regexp = "targets",
+      check_mode = "write"
+    ),
+    "`check_mode = \"write\"` can't be used"
+  )
+})
+
+test_that("Can do check exists", {
+  expect_false(get_file_path(
+    directory = ".",
+    file_name = "foo.R",
+    check_mode = "exists"
+  ))
+})
+
+
 skip_on_ci()
 
 slf_updates_dir <- fs::path(
diff --git a/tests/testthat/test-get_it_extract_paths.R b/tests/testthat/test-get_it_extract_paths.R
index baaad52a5..52f9e4181 100644
--- a/tests/testthat/test-get_it_extract_paths.R
+++ b/tests/testthat/test-get_it_extract_paths.R
@@ -1,3 +1,25 @@
+test_that("IT reference cleanup works", {
+  expect_equal(check_it_reference("SCTASK0439133"), "0439133")
+  expect_equal(check_it_reference("0439133"), "0439133")
+
+  expect_error(
+    check_it_reference("123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("1234567890"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("SCTASK123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("ABCDEF123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+})
+
 skip_on_ci()
 
 test_that("IT extract file paths work", {