Skip to content

Commit

Permalink
Merge branch 'mar-23-update' into bug-service_use_cohort
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennit07 authored Feb 14, 2024
2 parents b1a9523 + 842e616 commit 760e2e5
Show file tree
Hide file tree
Showing 9 changed files with 232 additions and 153 deletions.
8 changes: 4 additions & 4 deletions R/fix_sc_dates.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @return A date vector with replaced end dates
fix_sc_start_dates <- function(start_date, period_start) {
# Fix sds_start_date is missing by setting start_date to be the start of
# financial year
# financial period
start_date <- dplyr::if_else(
is.na(start_date),
period_start,
Expand All @@ -30,12 +30,12 @@ fix_sc_start_dates <- function(start_date, period_start) {
#' @param period Social care latest submission period.
#'
#' @return A date vector with replaced end dates
fix_sc_end_dates <- function(start_date, end_date, period) {
fix_sc_end_dates <- function(start_date, end_date, period_end_date) {
# Fix sds_end_date is earlier than sds_start_date by setting end_date to be
# the end of financial year
end_date <- dplyr::if_else(
start_date > end_date,
end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"),
period_end_date,
end_date
)

Expand All @@ -57,7 +57,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) {
#' @return A date vector with replaced end dates
fix_sc_missing_end_dates <- function(end_date, period_end) {
# Fix sds_end_date is earlier than sds_start_date by setting end_date to be
# the end of financial year
# the end of financial period
end_date <- dplyr::if_else(
is.na(end_date),
period_end,
Expand Down
30 changes: 30 additions & 0 deletions R/process_extract_homelessness.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,36 @@ process_extract_homelessness <- function(
)
)
) %>%
dplyr::mutate(property_type_code = as.character(property_type_code)) %>%
dplyr::mutate(
property_type_code = dplyr::case_when(
property_type_code == "1" ~ "1 - Own Property - LA Tenancy",
property_type_code == "2" ~ "2 - Own Property - RSL Tenancy",
property_type_code == "3" ~ "3 - Own Property - private rented tenancy",
property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house",
property_type_code == "5" ~ "5 - Own Property - owning/buying",
property_type_code == "6" ~ "6 - Parental / family home / relatives",
property_type_code == "7" ~ " 7 - Friends / partners",
property_type_code == "8" ~ "8 - Armed Services Accommodation",
property_type_code == "9" ~ "9 - Prison",
property_type_code == "10" ~ "10 - Hospital",
property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)",
property_type_code == "12" ~ "12 - Supported accommodation",
property_type_code == "13" ~ "13 - Hostel (unsupported)",
property_type_code == "14" ~ "14 - Bed & Breakfast",
property_type_code == "15" ~ "15 - Caravan / mobile home",
property_type_code == "16" ~ "16 - Long-term roofless",
property_type_code == "17" ~ "17 - Long-term sofa surfing",
property_type_code == "18" ~ "18 - Other",
property_type_code == "19" ~ "19 - Not known / refused",
property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO",
property_type_code == "21" ~ "21 - Lodger",
property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector",
property_type_code == "23" ~ "23 - Shared Property - Local Authority",
property_type_code == "24" ~ "24 - Shared Property - RSL",
TRUE ~ property_type_code
)
) %>%
dplyr::left_join(
la_code_lookup,
by = dplyr::join_by("sending_local_authority_code_9" == "CA")
Expand Down
97 changes: 61 additions & 36 deletions R/process_lookup_sc_demographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,46 @@ process_lookup_sc_demographics <- function(
dplyr::pull(.data$pc7)


# Data Cleaning ---------------------------------------

# Fill in missing data and flag latest cases to keep ---------------------------------------
sc_demog <- data %>%
dplyr::mutate(
# use chi if upi is NA
upi = dplyr::coalesce(.data$upi, .data$chi_upi),
# check gender code - replace code 99 with 9
submitted_gender = replace(.data$submitted_gender, .data$submitted_gender == 99L, 9L)
dplyr::rename(
chi = chi_upi,
gender = chi_gender_code,
dob = chi_date_of_birth
) %>%
# fill in missing demographic details
dplyr::arrange(period, social_care_id) %>%
dplyr::group_by(social_care_id, sending_location) %>%
tidyr::fill(chi, .direction = ("updown")) %>%
tidyr::fill(dob, .direction = ("updown")) %>%
tidyr::fill(date_of_death, .direction = ("updown")) %>%
tidyr::fill(gender, .direction = ("updown")) %>%
tidyr::fill(chi_postcode, .direction = ("updown")) %>%
tidyr::fill(submitted_postcode, .direction = ("updown")) %>%
dplyr::ungroup() %>%
# format postcodes using `phsmethods`
dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere?


# flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate)
sc_demog <- sc_demog %>%
dplyr::group_by(chi, sending_location) %>%
dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi
dplyr::group_by(chi, social_care_id, sending_location) %>%
dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care
dplyr::group_by(chi, sending_location) %>%
dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>%
dplyr::mutate(
# use CHI sex if available
gender = dplyr::if_else(
is.na(.data$chi_gender_code) | .data$chi_gender_code == 9L,
.data$submitted_gender,
.data$chi_gender_code
),
# Use CHI DoB if available
dob = dplyr::coalesce(.data$chi_date_of_birth, .data$submitted_date_of_birth)
latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0),
keep = ifelse(latest_sc_id == period, 1, 0)
) %>%
# format postcodes using `phsmethods`
dplyr::mutate(dplyr::across(
tidyselect::contains("postcode"),
~ phsmethods::format_postcode(.x, format = "pc7")
))
dplyr::ungroup()

sc_demog <- sc_demog %>%
dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>%
dplyr::distinct()

# postcodes ---------------------------------------------------------------

# count number of na postcodes
na_postcodes <- sc_demog %>%
Expand All @@ -69,29 +85,32 @@ process_lookup_sc_demographics <- function(
~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA)
)) %>%
dplyr::select(
"latest_record_flag",
"extract_date",
"sending_location",
"social_care_id",
"upi",
"chi",
"gender",
"dob",
"date_of_death",
"submitted_postcode",
"chi_postcode"
"chi_postcode",
"keep",
"latest_flag"
) %>%
# check if submitted_postcode matches with postcode lookup
dplyr::mutate(
valid_pc = .data$submitted_postcode %in% valid_spd_postcodes
valid_pc_submitted = .data$submitted_postcode %in% valid_spd_postcodes,
valid_pc_chi = .data$chi_postcode %in% valid_spd_postcodes
) %>%
# use submitted_postcode if valid, otherwise use chi_postcode
dplyr::mutate(postcode = dplyr::case_when(
(!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode,
(is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode
(!is.na(.data$chi_postcode) & .data$valid_pc_chi) ~ .data$chi_postcode,
((is.na(.data$chi_postcode) | !(.data$valid_pc_chi)) & !(is.na(.data$submitted_postcode)) & .data$valid_pc_submitted) ~ .data$submitted_postcode,
(is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode
)) %>%
dplyr::mutate(postcode_type = dplyr::case_when(
(!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted",
(is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi",
(is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing"
(postcode == chi_postcode) ~ "chi",
(postcode == submitted_postcode) ~ "submitted",
(is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing"
))

# Check where the postcodes are coming from
Expand All @@ -102,26 +121,32 @@ process_lookup_sc_demographics <- function(
na_replaced_postcodes <- sc_demog %>%
dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))


sc_demog_lookup <- sc_demog %>%
dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi
dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>%
dplyr::distinct() %>%
# group by sending location and ID
dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>%
# arrange so latest submissions are last
dplyr::arrange(
.data$sending_location,
.data$social_care_id,
.data$latest_record_flag,
.data$extract_date
.data$latest_flag
) %>%
# summarise to select the last (non NA) submission
dplyr::summarise(
chi = dplyr::last(.data$upi),
gender = dplyr::last(.data$gender),
dob = dplyr::last(.data$dob),
postcode = dplyr::last(.data$postcode)
postcode = dplyr::last(.data$postcode),
date_of_death = dplyr::last(.data$date_of_death)
) %>%
dplyr::ungroup()

# check to make sure all cases of chi are still there
dplyr::n_distinct(sc_demog_lookup$chi) # 524810
dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404


if (write_to_disk) {
write_file(
sc_demog_lookup,
Expand Down
Loading

0 comments on commit 760e2e5

Please sign in to comment.