Skip to content

Commit

Permalink
fixed bugs in the keep population flag. (#874)
Browse files Browse the repository at this point in the history
- filter out where gender not 1 or 2
- deal with negative ages where dob is after mid point of year
- previously filtered cases where dod was before midpoint, but was also
accidentally removing cases with missing death dates! Now fixed.
- added filter for nsu only once calculations are done
  • Loading branch information
SwiftySalmon authored Dec 7, 2023
2 parents 01f0d1b + 9b7a20a commit bbf6447
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions R/add_keep_population_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_keep_population_flag <- function(individual_file, year) {
year_available <- pop_estimates %>%
dplyr::pull(year) %>%
unique()

if (calendar_year %in% year_available) {
pop_estimates <- pop_estimates %>%
dplyr::filter(year == calendar_year)
Expand Down Expand Up @@ -70,17 +71,18 @@ add_keep_population_flag <- function(individual_file, year) {
# If they don't have a locality, they're no good as we won't have an estimate to match them against.
# Same for age and gender.
nsu_keep_lookup <- individual_file %>%
dplyr::filter(gender == 1 | gender == 2) %>%
dplyr::filter(!is.na(locality), !is.na(age)) %>%
# Remove people who died before the mid-point of the calender year.
# This will make our numbers line up better with the methodology used for the mid-year population estimates.
# anyone who died 5 years before the file shouldn't be in it anyway...
dplyr::filter(death_date > mid_year | nsu != 0) %>%
dplyr::filter(death_date > mid_year | is.na(death_date) | nsu != 0) %>%
# Calculate the populations of the whole SLF and of the NSU.
dplyr::group_by(locality, age_group, gender) %>%
dplyr::mutate(
nsu_population = sum(nsu),
total_source_population = dplyr::n()
) %>%
dplyr::filter(nsu == 1) %>%
dplyr::left_join(pop_estimates,
by = c("locality", "age_group", "gender")
) %>%
Expand Down Expand Up @@ -137,7 +139,7 @@ add_age_group <- function(data, age_var_name) {
data <- data %>%
dplyr::mutate(
age_group = dplyr::case_when(
{{ age_var_name }} >= 0 & {{ age_var_name }} <= 4 ~ "0-4",
{{ age_var_name }} >= -1 & {{ age_var_name }} <= 4 ~ "0-4",
{{ age_var_name }} >= 5 & {{ age_var_name }} <= 14 ~ "5-14",
{{ age_var_name }} >= 15 & {{ age_var_name }} <= 24 ~ "15-24",
{{ age_var_name }} >= 25 & {{ age_var_name }} <= 34 ~ "25-34",
Expand Down

0 comments on commit bbf6447

Please sign in to comment.