-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add duplicated chi number to the tests in process_tests_individual_file
- Loading branch information
1 parent
b36386e
commit 61451d0
Showing
1 changed file
with
114 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,117 @@ | ||
#' process_tests_individual_file | ||
#' Process Individual file tests | ||
#' | ||
#' @description check whether individual files have duplicated rows for the same chi | ||
#' @description Takes the processed individual file and produces | ||
#' a test comparison with the previous data. This is written to disk as a CSV. | ||
#' | ||
#' @return NULL if no duplicated chi, OR rows with duplicated chi. | ||
process_tests_individual_file <- function(individual_file, anon_chi_in = FALSE) { | ||
chi_col <- dplyr::if_else(anon_chi_in, "anon_chi", "chi") | ||
duplicated_chi <- duplicated(individual_file[[chi_col]]) | ||
dup_num <- sum(duplicated_chi) | ||
if (dup_num < 1L) { | ||
print("There is no duplicated CHI") | ||
return(NULL) | ||
} else { | ||
print("There are duplicated CHIs") | ||
return(individual_file %>% | ||
dplyr::filter(!!sym(chi_col)) %in% duplicated_chi) | ||
} | ||
#' @inherit process_tests_acute | ||
#' | ||
#' @export | ||
process_tests_individual_file <- function(data, year) { | ||
data <- data %>% | ||
dplyr::select( | ||
"year", | ||
"anon_chi", | ||
"gender", | ||
"postcode", | ||
"dob", | ||
"hbrescode", | ||
"health_net_cost", | ||
slfhelper::ltc_vars, | ||
dplyr::contains(c( | ||
"beddays", | ||
"cost", | ||
"episodes", | ||
"attendances", | ||
"admissions", | ||
"cases", | ||
"consultations" | ||
)) | ||
) %>% | ||
slfhelper::get_chi() | ||
|
||
old_data <- get_existing_data_for_tests(data, file_version = "individual") | ||
|
||
comparison <- produce_test_comparison( | ||
old_data = produce_individual_file_tests(old_data), | ||
new_data = produce_individual_file_tests(data) | ||
) %>% | ||
write_tests_xlsx(sheet_name = "indiv_file", year) | ||
|
||
return(comparison) | ||
} | ||
|
||
#' Source Extract Tests | ||
#' | ||
#' @description Produce a set of tests which can be used by most | ||
#' of the extracts. | ||
#' This will produce counts of various demographics | ||
#' using [create_demog_test_flags()] counts of episodes for every `hbrescode` | ||
#' using [create_hb_test_flags()], a total cost for each `hbrescode` using | ||
#' [create_hb_cost_test_flags()]. | ||
#' It will also produce various summary statistics for bedday, cost and | ||
#' episode date variables. | ||
#' | ||
#' @param data new or old data for testing summary flags | ||
#' (data is from [get_source_extract_path()]) | ||
#' | ||
#' @return a dataframe with a count of each flag | ||
#' from [calculate_measures()] | ||
produce_individual_file_tests <- function(data) { | ||
names(data) <- tolower(names(data)) | ||
|
||
test_flags <- data %>% | ||
# use functions to create HB and partnership flags | ||
create_demog_test_flags() %>% | ||
create_hb_test_flags(.data$hbrescode) %>% | ||
create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% | ||
# keep variables for comparison | ||
dplyr::select(c("valid_chi":dplyr::last_col())) %>% | ||
# use function to sum new test flags | ||
calculate_measures(measure = "sum") | ||
|
||
all_measures <- data %>% | ||
calculate_measures( | ||
vars = c( | ||
"beddays", | ||
"cost", | ||
"episodes", | ||
"attendances", | ||
"admissions", | ||
"cases", | ||
"consulations" | ||
), | ||
measure = "all" | ||
) | ||
|
||
min_max_measures <- data %>% | ||
calculate_measures( | ||
vars = c( | ||
"health_net_cost" | ||
), | ||
measure = "min-max" | ||
) | ||
|
||
sum_measures <- data %>% | ||
dplyr::select(slfhelper::ltc_vars) %>% | ||
calculate_measures( | ||
vars = c( | ||
slfhelper::ltc_vars | ||
), | ||
measure = "sum" | ||
) | ||
|
||
dup_chi <- data.frame(measure = "duplicated chi number", | ||
value = duplicated(data$chi) %>% | ||
sum() %>% as.integer()) | ||
|
||
join_output <- list( | ||
test_flags, | ||
all_measures, | ||
min_max_measures, | ||
sum_measures, | ||
dup_chi | ||
) %>% | ||
purrr::reduce(dplyr::full_join, by = c("measure", "value")) | ||
|
||
return(join_output) | ||
} |