Skip to content

Commit

Permalink
Merge branch 'master' into sc_client
Browse files Browse the repository at this point in the history
  • Loading branch information
Moohan authored Aug 1, 2023
2 parents abef0d7 + 1efe25e commit 6419d88
Show file tree
Hide file tree
Showing 74 changed files with 2,102 additions and 85 deletions.
19 changes: 19 additions & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ cmh
CNWs
commhosp
congen
costincdnas
costmonthnum
costsfy
covr
Expand All @@ -45,6 +46,7 @@ dbconnect
dbplyr
deathdiag
demog
dfc
disch
dischloc
dischto
Expand All @@ -70,6 +72,7 @@ fyyear
geogs
ggplot
GLS
gls
gms
GPOo
gpprac
Expand All @@ -86,31 +89,40 @@ hhg
hjust
hms
homecare
homev
hscp
hscpnames
IDPC
infyyear
ipdc
itle
iwalk
jaccard
jan
jennifer
jul
keydate
keyring
keytime
keytimex
kis
lgl
kis
los
ltc
ltcs
lubridate
magrittr
Mcbride
mcmahon
MMMYY
monthflag
mpat
multiday
multisession
multistaff
NAs
newcons
nhs
nhshosp
NRS
Expand Down Expand Up @@ -142,7 +154,9 @@ purrr
quickstart
Rbuildignore
rcmdcheck
rdd
rds
reabl
reablement
readcode
readr
Expand All @@ -159,8 +173,12 @@ rspm
RStudio
rstudioapi
Rtype
SDcols
seealso
selfharm
setkeyv
setnafill
setnames
Siar
sigfac
simd
Expand Down Expand Up @@ -200,5 +218,6 @@ xintercept
xlsx
yearstay
YYYYQX
zihao
zsav
zstd
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Imports:
dtplyr (>= 1.3.0),
fs (>= 1.6.1),
fst (>= 0.9.8),
future (>= 1.33.0),
future.callr (>= 0.8.1),
glue (>= 1.6.2),
haven (>= 2.5.2),
hms (>= 1.1.0),
Expand All @@ -53,7 +55,8 @@ Imports:
stringr (>= 1.5.0),
tibble (>= 3.2.1),
tidyr (>= 1.3.0),
tidyselect (>= 1.2.0)
tidyselect (>= 1.2.0),
zoo (>= 1.8.0)
Suggests:
covr (>= 3.6.1),
roxygen2 (>= 7.2.3),
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames)
export(convert_numeric_to_date)
export(convert_sending_location_to_lca)
export(convert_year_to_fyyear)
export(create_individual_file)
export(create_service_use_cohorts)
export(end_fy)
export(end_fy_quarter)
Expand Down Expand Up @@ -114,6 +115,7 @@ export(process_tests_ae)
export(process_tests_alarms_telecare)
export(process_tests_care_home)
export(process_tests_cmh)
export(process_tests_delayed_discharges)
export(process_tests_district_nursing)
export(process_tests_episode_file)
export(process_tests_gp_ooh)
Expand Down Expand Up @@ -160,6 +162,8 @@ export(start_fy)
export(start_fy_quarter)
export(start_next_fy_quarter)
export(write_file)
importFrom(data.table,.N)
importFrom(data.table,.SD)
importFrom(magrittr,"%>%")
importFrom(readr,col_character)
importFrom(readr,col_date)
Expand Down
214 changes: 214 additions & 0 deletions R/aggregate_by_chi_zihao.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#' Aggregate by CHI
#'
#' @description Aggregate episode file by CHI to convert into
#' individual file.
#'
#' @importFrom data.table .N
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi_zihao <- function(episode_file) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
data.table::setDT(episode_file)

# Ensure all variable names are lowercase
data.table::setnames(episode_file, stringr::str_to_lower)

# Sort the data
data.table::setkeyv(
episode_file,
c(
"chi",
"record_keydate1",
"keytime1",
"record_keydate2",
"keytime2"
)
)

data.table::setnames(
episode_file,
c(
"ch_chi_cis", "cij_marker", "ooh_case_id"
# ,"hh_in_fy"
),
c(
"ch_cis_episodes", "cij_total", "ooh_cases"
# ,"hl1_in_fy"
)
)

# column specification, grouped by chi
# columns to select last
cols2 <- c(
"postcode",
"dob",
"gpprac",
vars_start_with(episode_file, "sc_")
)
# columns to count unique rows
cols3 <- c(
"ch_cis_episodes",
"cij_total",
"cij_el",
"cij_non_el",
"cij_mat",
"cij_delay",
"ooh_cases",
"preventable_admissions"
)
# columns to sum up
cols4 <- c(
vars_end_with(
episode_file,
c(
"episodes",
"beddays",
"cost",
"attendances",
"attend",
"contacts",
"hours",
"alarms",
"telecare",
"paid_items",
"advice",
"homev",
"time",
"assessment",
"other",
"dn",
"nhs24",
"pcc"
)
),
vars_start_with(
episode_file,
"sds_option"
),
"health_net_cost_inc_dnas"
)
cols4 <- cols4[!(cols4 %in% c("ch_cis_episodes"))]
# columns to select maximum
cols5 <- c("nsu", vars_contain(episode_file, c("hl1_in_fy")))
data.table::setnafill(episode_file, fill = 0L, cols = cols5)
# compute
individual_file_cols1 <- episode_file[,
.(gender = mean(gender)),
by = "chi"
]
individual_file_cols2 <- episode_file[,
.SD[.N],
.SDcols = cols2,
by = "chi"
]
individual_file_cols3 <- episode_file[,
lapply(.SD, function(x) {
data.table::uniqueN(x, na.rm = TRUE)
}),
.SDcols = cols3,
by = "chi"
]
individual_file_cols4 <- episode_file[,
lapply(.SD, function(x) {
sum(x, na.rm = TRUE)
}),
.SDcols = cols4,
by = "chi"
]
individual_file_cols5 <- episode_file[,
lapply(.SD, function(x) max(x, na.rm = TRUE)),
.SDcols = cols5,
by = "chi"
]
individual_file_cols6 <- episode_file[,
.(
preventable_beddays = ifelse(
max(cij_ppa, na.rm = TRUE),
max(cij_end_date) - min(cij_start_date),
NA_real_
)
),
# cij_marker has been renamed as cij_total
by = c("chi", "cij_total")
]
individual_file_cols6 <- individual_file_cols6[,
.(
preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
),
by = "chi"
]

individual_file <- dplyr::bind_cols(
individual_file_cols1,
individual_file_cols2[, chi := NULL],
individual_file_cols3[, chi := NULL],
individual_file_cols4[, chi := NULL],
individual_file_cols5[, chi := NULL],
individual_file_cols6[, chi := NULL]
)

# convert back to tibble
return(dplyr::as_tibble(individual_file))
}


#' select columns ending with some patterns
#' @describeIn select columns based on patterns
vars_end_with <- function(data, vars, ignore_case = FALSE) {
names(data)[stringr::str_ends(
names(data),
stringr::regex(paste(vars, collapse = "|"),
ignore_case = ignore_case
)
)]
}

#' select columns starting with some patterns
#' @describeIn select columns based on patterns
vars_start_with <- function(data, vars, ignore_case = FALSE) {
names(data)[stringr::str_starts(
names(data),
stringr::regex(paste(vars, collapse = "|"),
ignore_case = ignore_case
)
)]
}

#' select columns contains some characters
#' @describeIn select columns based on patterns
vars_contain <- function(data, vars, ignore_case = FALSE) {
names(data)[stringr::str_detect(
names(data),
stringr::regex(paste(vars, collapse = "|"),
ignore_case = ignore_case
)
)]
}

#' Aggregate CIS episodes
#'
#' @description Aggregate CH variables by CHI and CIS.
#'
#' @inheritParams create_individual_file
aggregate_ch_episodes_zihao <- function(episode_file) {
cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")

# Convert to data.table
data.table::setDT(episode_file)

# Perform grouping and aggregation
episode_file <- episode_file[, `:=`(
ch_no_cost = max(ch_no_cost),
ch_ep_start = min(record_keydate1),
ch_ep_end = max(ch_ep_end),
ch_cost_per_day = mean(ch_cost_per_day)
), by = c("chi", "ch_chi_cis")]

# Convert back to tibble if needed
episode_file <- tibble::as_tibble(episode_file)

return(episode_file)
}
Loading

0 comments on commit 6419d88

Please sign in to comment.