Skip to content

Commit

Permalink
add duplicated chi number to the tests in process_tests_individual_file
Browse files Browse the repository at this point in the history
  • Loading branch information
lizihao-anu committed Aug 9, 2023
1 parent b36386e commit 61451d0
Showing 1 changed file with 114 additions and 15 deletions.
129 changes: 114 additions & 15 deletions R/process_tests_individual_file.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,117 @@
#' process_tests_individual_file
#' Process Individual file tests
#'
#' @description check whether individual files have duplicated rows for the same chi
#' @description Takes the processed individual file and produces
#' a test comparison with the previous data. This is written to disk as a CSV.
#'
#' @return NULL if no duplicated chi, OR rows with duplicated chi.
process_tests_individual_file <- function(individual_file, anon_chi_in = FALSE) {
chi_col <- dplyr::if_else(anon_chi_in, "anon_chi", "chi")
duplicated_chi <- duplicated(individual_file[[chi_col]])
dup_num <- sum(duplicated_chi)
if (dup_num < 1L) {
print("There is no duplicated CHI")
return(NULL)
} else {
print("There are duplicated CHIs")
return(individual_file %>%
dplyr::filter(!!sym(chi_col)) %in% duplicated_chi)
}
#' @inherit process_tests_acute
#'
#' @export
process_tests_individual_file <- function(data, year) {
data <- data %>%
dplyr::select(
"year",
"anon_chi",
"gender",
"postcode",
"dob",
"hbrescode",
"health_net_cost",
slfhelper::ltc_vars,
dplyr::contains(c(
"beddays",
"cost",
"episodes",
"attendances",
"admissions",
"cases",
"consultations"
))
) %>%
slfhelper::get_chi()

old_data <- get_existing_data_for_tests(data, file_version = "individual")

comparison <- produce_test_comparison(
old_data = produce_individual_file_tests(old_data),
new_data = produce_individual_file_tests(data)
) %>%
write_tests_xlsx(sheet_name = "indiv_file", year)

return(comparison)
}

#' Source Extract Tests
#'
#' @description Produce a set of tests which can be used by most
#' of the extracts.
#' This will produce counts of various demographics
#' using [create_demog_test_flags()] counts of episodes for every `hbrescode`
#' using [create_hb_test_flags()], a total cost for each `hbrescode` using
#' [create_hb_cost_test_flags()].
#' It will also produce various summary statistics for bedday, cost and
#' episode date variables.
#'
#' @param data new or old data for testing summary flags
#' (data is from [get_source_extract_path()])
#'
#' @return a dataframe with a count of each flag
#' from [calculate_measures()]
produce_individual_file_tests <- function(data) {
names(data) <- tolower(names(data))

test_flags <- data %>%
# use functions to create HB and partnership flags
create_demog_test_flags() %>%
create_hb_test_flags(.data$hbrescode) %>%
create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>%
# keep variables for comparison
dplyr::select(c("valid_chi":dplyr::last_col())) %>%
# use function to sum new test flags
calculate_measures(measure = "sum")

all_measures <- data %>%
calculate_measures(
vars = c(
"beddays",
"cost",
"episodes",
"attendances",
"admissions",
"cases",
"consulations"

Check failure on line 81 in R/process_tests_individual_file.R

View workflow job for this annotation

GitHub Actions / Check Spelling

`consulations` is not a recognized word. (unrecognized-spelling)
),
measure = "all"
)

min_max_measures <- data %>%
calculate_measures(
vars = c(
"health_net_cost"
),
measure = "min-max"
)

sum_measures <- data %>%
dplyr::select(slfhelper::ltc_vars) %>%
calculate_measures(
vars = c(
slfhelper::ltc_vars
),
measure = "sum"
)

dup_chi <- data.frame(measure = "duplicated chi number",
value = duplicated(data$chi) %>%
sum() %>% as.integer())

join_output <- list(
test_flags,
all_measures,
min_max_measures,
sum_measures,
dup_chi
) %>%
purrr::reduce(dplyr::full_join, by = c("measure", "value"))

return(join_output)
}

0 comments on commit 61451d0

Please sign in to comment.