Skip to content

Commit

Permalink
read_erp function (#261)
Browse files Browse the repository at this point in the history
* Initial version. Attempted to match style of read_cpi function.

* The sex parameter now accepts a vector of valid values
  • Loading branch information
fmarkham authored Jan 14, 2025
1 parent 9b2d4d7 commit c2f85fb
Showing 1 changed file with 164 additions and 0 deletions.
164 changes: 164 additions & 0 deletions R/read_erp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#' Download a tidy tibble containing the Estimated Residential Population from the ABS
#'
#' \code{read_erp()} uses the \code{read_abs()} function to download, import,
#' and tidy the Estimated Residential Population from the ABS. It allows the user
#' to specify age, sex and states/territories of interest. It returns a tibble
#' containing five columns: the date, the age range, sex and states that the ERP
#' corresponds to. This makes joining the ERP to another dataframe easy.
#'
#' @param age_range numeric; default is "0:100". A vector containing ages in single
#' years for which an ERP is sought. The ABS top-code ages at 100.
#'
#' @param sex character; default is "Persons". Other values are "Male" and
#' "Female".
#'
#' @param states character; default is "Australia". Other values are the full
#' or abbreviated names of the states and self-governing territories.
#'
#' @param path character; default is "data/ABS". Only used if
#' retain_files is set to TRUE. Local directory in which to save
#' downloaded ABS time series spreadsheets.
#'
#' @param show_progress_bars logical; TRUE by default. If set to FALSE, progress
#' bars will not be shown when ABS spreadsheets are downloading.
#'
#' @param check_local logical; FALSE by default. See \code{?read_abs}.
#'
#' @param retain_files logical; FALSE by default. When TRUE, the spreadsheets
#' downloaded from the ABS website will be saved in the
#' directory specified with 'path'.
#'
#' @examples
#'
#' # Create a tibble called 'erp' that contains the ERP index
#' # numbers for 30 June each year for Australia.
#' \donttest{
#' erp <- read_erp()
#' }
#'
#' @importFrom dplyr filter select tibble mutate group_by summarise arrange
#' @importFrom tools toTitleCase
#'
#' @export

read_erp <- function(age_range = 0:100,
sex = c("Persons", "Male", "Female"),
states = c("Australia", "New South Wales", "Victoria", "Queensland",
"South Australia", "Western Australia", "Tasmania",
"Northern Territory", "Australian Capital Territory"),
path = Sys.getenv("R_READABS_PATH", unset = tempdir()),
show_progress_bars = TRUE,
check_local = FALSE,
retain_files = FALSE) {
if (!is.numeric(age_range)) {
stop("The age_range argument must be a numeric vector.")
}

# Check if all elements are integers
if (any(age_range != as.integer(age_range))) {
stop("All elements in age_range must be integers.")
}

# Check if all elements are between 0 and 100
if (any(age_range < 0 | age_range > 100)) {
stop("All elements in age_range must be between 0 and 100.")
}

# Restrict the 'sex' argument to the valid choices
sex_options <- c("Persons", "Male", "Female")

# Check if the input is a subset of the valid options
if (!all(sex %in% sex_options)) {
invalid_values <- sex[!sex %in% sex_options]
stop("Invalid value(s): ", paste(invalid_values, collapse = ", "),
". Allowed values are: Persons, Male, Female.")
}

# Restrict the states argument to valid choices but include abbreviations
# Always return the full name if an abbreviation has been used.
stes <- unname(vapply(states, validate_state, FUN.VALUE = character(1)))

if (!is.logical(retain_files)) {
stop("`retain_files` must be either `TRUE` or `FALSE`.")
}

if (!is.logical(show_progress_bars)) {
stop("`show_progress_bars` must be either `TRUE` or `FALSE`")
}

if (retain_files == TRUE & !is.character(path)) {
stop(
"If `retain_files` is `TRUE`,",
" you must specify a valid file path in `path`."
)
}

ste_tbls <- state_lookup$tbl_n[state_lookup$full_name %in% stes]

erp_raw <- read_abs(
cat_no = "3101.0",
tables = ste_tbls,
retain_files = retain_files,
check_local = check_local,
show_progress_bars = show_progress_bars,
path = path
)


x <- erp_raw %>%
dplyr::mutate(
age = gsub("[^0-9]", "", series),
series_sex = gsub(".*;\\s*(Male|Female|Persons)\\s*;.*", "\\1", series),
state = trimws(gsub(".*,(\\s*[^,]+)$", "\\1", table_title))
) %>%
filter(age %in% age_range,
series_sex %in% sex) %>%
group_by(date, series_sex, state) %>%
summarise(erp = sum(value)) %>%
arrange(state, series_sex, date) %>%
select(date, state, sex=series_sex, erp)

x
}

# Lookup table of state/territory names, abbreviations and ABS ERP table numbers
state_lookup <- tibble(full_name=c("Australia", "New South Wales", "Victoria", "Queensland",
"South Australia", "Western Australia", "Tasmania",
"Northern Territory", "Australian Capital Territory"),
abbrev=c("Aus", "NSW", "Vic", "Qld", "SA", "WA", "Tas", "NT", "ACT"),
tbl_n=c(59, 51:58))

# Ensure that user specified state/territory names are acceptable. Allow full
# names or abbreviations. Throw an error if they are incorrect.
# Always return the full name.
validate_state <- function(state) {
# Define valid states and their abbreviations
valid_states <- state_lookup$full_name
valid_abbreviations <- state_lookup$abbrev

# Allow trailing periods in abbreviations
valid_abbreviations_with_period <- paste0(valid_abbreviations, ".")

# Combine all valid inputs
all_valid_inputs <- c(valid_states, valid_abbreviations, valid_abbreviations_with_period)

# Create a lookup table (map all variants to full state names)
state_map <- setNames(rep(valid_states, times = 3),
c(valid_states, valid_abbreviations, valid_abbreviations_with_period))

# Standardize input to lowercase and remove trailing periods in case
# people do things like "Vic." or "Tas."
state_cleaned <- gsub("\\.$", "", state) # Remove trailing period
state_cleaned <- tolower(state_cleaned) # Convert to lowercase

# Create a cleaned version of the map for case-insensitive matching
names(state_map) <- tolower(names(state_map))

# Check if the input matches a valid state or abbreviation
if (!state_cleaned %in% names(state_map)) {
stop("Invalid state. Please use a valid state name or abbreviation.")
}

# Return the standard full state name
return(unname(tools::toTitleCase(state_map[state_cleaned])))
}

0 comments on commit c2f85fb

Please sign in to comment.