Merge branch 'master' into sc_client

Public-Health-Scotland · Aug 1, 2023 · 6419d88 · 6419d88
2 parents abef0d7 + 1efe25e
commit 6419d88
Show file tree

Hide file tree

Showing 74 changed files with 2,102 additions and 85 deletions.
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
@@ -28,6 +28,7 @@ cmh
 CNWs
 commhosp
 congen
+costincdnas
 costmonthnum
 costsfy
 covr
@@ -45,6 +46,7 @@ dbconnect
 dbplyr
 deathdiag
 demog
+dfc
 disch
 dischloc
 dischto
@@ -70,6 +72,7 @@ fyyear
 geogs
 ggplot
 GLS
+gls
 gms
 GPOo
 gpprac
@@ -86,31 +89,40 @@ hhg
 hjust
 hms
 homecare
+homev
 hscp
 hscpnames
+IDPC
 infyyear
 ipdc
 itle
 iwalk
 jaccard
 jan
+jennifer
 jul
 keydate
 keyring
 keytime
 keytimex
+kis
+lgl
+kis
 los
 ltc
 ltcs
 lubridate
 magrittr
+Mcbride
+mcmahon
 MMMYY
 monthflag
 mpat
 multiday
 multisession
 multistaff
 NAs
+newcons
 nhs
 nhshosp
 NRS
@@ -142,7 +154,9 @@ purrr
 quickstart
 Rbuildignore
 rcmdcheck
+rdd
 rds
+reabl
 reablement
 readcode
 readr
@@ -159,8 +173,12 @@ rspm
 RStudio
 rstudioapi
 Rtype
+SDcols
 seealso
 selfharm
+setkeyv
+setnafill
+setnames
 Siar
 sigfac
 simd
@@ -200,5 +218,6 @@ xintercept
 xlsx
 yearstay
 YYYYQX
+zihao
 zsav
 zstd
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,6 +28,8 @@ Imports:
     dtplyr (>= 1.3.0),
     fs (>= 1.6.1),
     fst (>= 0.9.8),
+    future (>= 1.33.0),
+    future.callr (>= 0.8.1),
     glue (>= 1.6.2),
     haven (>= 2.5.2),
     hms (>= 1.1.0),
@@ -53,7 +55,8 @@ Imports:
     stringr (>= 1.5.0),
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0),
-    tidyselect (>= 1.2.0)
+    tidyselect (>= 1.2.0),
+    zoo (>= 1.8.0)
 Suggests:
     covr (>= 3.6.1),
     roxygen2 (>= 7.2.3),

diff --git a/NAMESPACE b/NAMESPACE
@@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames)
 export(convert_numeric_to_date)
 export(convert_sending_location_to_lca)
 export(convert_year_to_fyyear)
+export(create_individual_file)
 export(create_service_use_cohorts)
 export(end_fy)
 export(end_fy_quarter)
@@ -114,6 +115,7 @@ export(process_tests_ae)
 export(process_tests_alarms_telecare)
 export(process_tests_care_home)
 export(process_tests_cmh)
+export(process_tests_delayed_discharges)
 export(process_tests_district_nursing)
 export(process_tests_episode_file)
 export(process_tests_gp_ooh)
@@ -160,6 +162,8 @@ export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
 export(write_file)
+importFrom(data.table,.N)
+importFrom(data.table,.SD)
 importFrom(magrittr,"%>%")
 importFrom(readr,col_character)
 importFrom(readr,col_date)

diff --git a/R/aggregate_by_chi_zihao.R b/R/aggregate_by_chi_zihao.R
@@ -0,0 +1,214 @@
+#' Aggregate by CHI
+#'
+#' @description Aggregate episode file by CHI to convert into
+#' individual file.
+#'
+#' @importFrom data.table .N
+#' @importFrom data.table .SD
+#'
+#' @inheritParams create_individual_file
+aggregate_by_chi_zihao <- function(episode_file) {
+  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
+
+  # Convert to data.table
+  data.table::setDT(episode_file)
+
+  # Ensure all variable names are lowercase
+  data.table::setnames(episode_file, stringr::str_to_lower)
+
+  # Sort the data
+  data.table::setkeyv(
+    episode_file,
+    c(
+      "chi",
+      "record_keydate1",
+      "keytime1",
+      "record_keydate2",
+      "keytime2"
+    )
+  )
+
+  data.table::setnames(
+    episode_file,
+    c(
+      "ch_chi_cis", "cij_marker", "ooh_case_id"
+      # ,"hh_in_fy"
+    ),
+    c(
+      "ch_cis_episodes", "cij_total", "ooh_cases"
+      # ,"hl1_in_fy"
+    )
+  )
+
+  # column specification, grouped by chi
+  # columns to select last
+  cols2 <- c(
+    "postcode",
+    "dob",
+    "gpprac",
+    vars_start_with(episode_file, "sc_")
+  )
+  # columns to count unique rows
+  cols3 <- c(
+    "ch_cis_episodes",
+    "cij_total",
+    "cij_el",
+    "cij_non_el",
+    "cij_mat",
+    "cij_delay",
+    "ooh_cases",
+    "preventable_admissions"
+  )
+  # columns to sum up
+  cols4 <- c(
+    vars_end_with(
+      episode_file,
+      c(
+        "episodes",
+        "beddays",
+        "cost",
+        "attendances",
+        "attend",
+        "contacts",
+        "hours",
+        "alarms",
+        "telecare",
+        "paid_items",
+        "advice",
+        "homev",
+        "time",
+        "assessment",
+        "other",
+        "dn",
+        "nhs24",
+        "pcc"
+      )
+    ),
+    vars_start_with(
+      episode_file,
+      "sds_option"
+    ),
+    "health_net_cost_inc_dnas"
+  )
+  cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))]
+  # columns to select maximum
+  cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy")))
+  data.table::setnafill(episode_file, fill = 0L, cols = cols5)
+  # compute
+  individual_file_cols1 <- episode_file[,
+    .(gender = mean(gender)),
+    by = "chi"
+  ]
+  individual_file_cols2 <- episode_file[,
+    .SD[.N],
+    .SDcols = cols2,
+    by = "chi"
+  ]
+  individual_file_cols3 <- episode_file[,
+    lapply(.SD, function(x) {
+      data.table::uniqueN(x, na.rm = TRUE)
+    }),
+    .SDcols = cols3,
+    by = "chi"
+  ]
+  individual_file_cols4 <- episode_file[,
+    lapply(.SD, function(x) {
+      sum(x, na.rm = TRUE)
+    }),
+    .SDcols = cols4,
+    by = "chi"
+  ]
+  individual_file_cols5 <- episode_file[,
+    lapply(.SD, function(x) max(x, na.rm = TRUE)),
+    .SDcols = cols5,
+    by = "chi"
+  ]
+  individual_file_cols6 <- episode_file[,
+    .(
+      preventable_beddays = ifelse(
+        max(cij_ppa, na.rm = TRUE),
+        max(cij_end_date) - min(cij_start_date),
+        NA_real_
+      )
+    ),
+    # cij_marker has been renamed as cij_total
+    by = c("chi", "cij_total")
+  ]
+  individual_file_cols6 <- individual_file_cols6[,
+    .(
+      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
+    ),
+    by = "chi"
+  ]
+
+  individual_file <- dplyr::bind_cols(
+    individual_file_cols1,
+    individual_file_cols2[, chi := NULL],
+    individual_file_cols3[, chi := NULL],
+    individual_file_cols4[, chi := NULL],
+    individual_file_cols5[, chi := NULL],
+    individual_file_cols6[, chi := NULL]
+  )
+
+  # convert back to tibble
+  return(dplyr::as_tibble(individual_file))
+}
+
+
+#' select columns ending with some patterns
+#' @describeIn select columns based on patterns
+vars_end_with <- function(data, vars, ignore_case = FALSE) {
+  names(data)[stringr::str_ends(
+    names(data),
+    stringr::regex(paste(vars, collapse = "|"),
+      ignore_case = ignore_case
+    )
+  )]
+}
+
+#' select columns starting with some patterns
+#' @describeIn select columns based on patterns
+vars_start_with <- function(data, vars, ignore_case = FALSE) {
+  names(data)[stringr::str_starts(
+    names(data),
+    stringr::regex(paste(vars, collapse = "|"),
+      ignore_case = ignore_case
+    )
+  )]
+}
+
+#' select columns contains some characters
+#' @describeIn select columns based on patterns
+vars_contain <- function(data, vars, ignore_case = FALSE) {
+  names(data)[stringr::str_detect(
+    names(data),
+    stringr::regex(paste(vars, collapse = "|"),
+      ignore_case = ignore_case
+    )
+  )]
+}
+
+#' Aggregate CIS episodes
+#'
+#' @description Aggregate CH variables by CHI and CIS.
+#'
+#' @inheritParams create_individual_file
+aggregate_ch_episodes_zihao <- function(episode_file) {
+  cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")
+
+  # Convert to data.table
+  data.table::setDT(episode_file)
+
+  # Perform grouping and aggregation
+  episode_file <- episode_file[, `:=`(
+    ch_no_cost = max(ch_no_cost),
+    ch_ep_start = min(record_keydate1),
+    ch_ep_end = max(ch_ep_end),
+    ch_cost_per_day = mean(ch_cost_per_day)
+  ), by = c("chi", "ch_chi_cis")]
+
+  # Convert back to tibble if needed
+  episode_file <- tibble::as_tibble(episode_file)
+
+  return(episode_file)
+}