Skip to content

Commit

Permalink
Add support for bespoke MRM formatting in read_lfs_datacube (#255)
Browse files Browse the repository at this point in the history
* Add support for bespoke MRM formatting in `read_lfs_datacube`

* iterate version, add @angusmoore as ctb

* fix DESCRIPTION

* use newer Roxygen

---------

Co-authored-by: Matt Cowgill <[email protected]>
  • Loading branch information
angusmoore and MattCowgill authored Aug 4, 2024
1 parent 07a4be4 commit 524b6c2
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 13 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
Package: readabs
Type: Package
Title: Download and Tidy Time Series Data from the Australian Bureau of Statistics
Version: 0.4.16.903
Version: 0.4.16.904
Authors@R: c(
person("Matt", "Cowgill", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0422-3300")),
person("Zoe", "Meers", role = "aut", email = "[email protected]"),
person("Jaron", "Lee", role = "aut", email = "[email protected]"),
person("David", "Diviny", role = "aut", email = "[email protected]"),
person("Hugh", "Parsonage", role = "ctb", email = "[email protected]"),
person("Kinto", "Behr", role = "ctb", email = "[email protected]"))
person("Kinto", "Behr", role = "ctb", email = "[email protected]"),
person("Angus", "Moore", role = "ctb")
)
Maintainer: Matt Cowgill <[email protected]>
Description: Downloads, imports, and tidies time series data from the
Australian Bureau of Statistics <https://www.abs.gov.au/>.
Expand All @@ -32,7 +34,7 @@ Imports:
labelled
URL: https://github.com/mattcowgill/readabs
BugReports: https://github.com/mattcowgill/readabs/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
VignetteBuilder: knitr
Suggests:
knitr,
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# readabs development version
* The experimental `readabs::read_api()` function no longer coerces columns describing the data categories to numeric. Thanks to @kletts.
* read_lfs_datacube() now able to fetch modelled SA4 labour force estimates, thanks to @AngusMoore
* Bug fix to read_job_mobility()

# readabs 0.4.16
Expand Down
54 changes: 44 additions & 10 deletions R/download_data_cube.r
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,35 @@ download_abs_data_cube <- function(catalogue_string,
return(invisible(filepath))
}

read_lfs_mrm_table <- function(file, sheet, variable_name) {
df <- file |>
readxl::read_excel(
sheet = sheet,
skip = 4
) |>
tidyr::pivot_longer(!tidyr::matches("SA4"), names_to = "date", values_to = "value") |>
dplyr::mutate(date = as.numeric(.data$date)) |>
dplyr::filter(!is.na({{variable_name}})) |>
dplyr::mutate(SA4_code = substr(.data$SA4, 1, 3)) |>
dplyr::mutate(SA4_name = substr(.data$SA4, 5, nchar(.data$SA4))) |>
dplyr::mutate(
variable = variable_name,
date = as.Date(.data$date, origin = "1899-12-30")
)
df[, c("SA4_code", "SA4_name", "variable", "date", "value")]
}

read_lfs_mrm <- function(file) {
bind_rows(
read_lfs_mrm_table(file, "Table 1", "employed_persons_000s"),
read_lfs_mrm_table(file, "Table 2", "unemployed_persons_000s"),
read_lfs_mrm_table(file, "Table 3", "nilf_persons_000s"),
read_lfs_mrm_table(file, "Table 4", "emp_to_pop_ratio"),
read_lfs_mrm_table(file, "Table 5", "unemployment_rate"),
read_lfs_mrm_table(file, "Table 6", "participation_rate")
)
}

#' Convenience function to download and tidy data cubes from
#' ABS Labour Force, Australia, Detailed.
#' @param cube character. A character string that is either the complete filename
Expand All @@ -118,17 +147,22 @@ read_lfs_datacube <- function(cube,
cube = cube,
path = path
)
df <- file |>
readxl::read_excel(
sheet = "Data 1",
skip = 3
) |>
rename(date = 1) %>%
mutate(date = as.Date(date))

colnames(df) <- tolower(colnames(df))
colnames(df) <- gsub(" |-|:", "_", colnames(df))
colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df))
if (cube == "MRM" || cube == "MRM1") {
df <- read_lfs_mrm(file)
} else {
df <- file |>
readxl::read_excel(
sheet = "Data 1",
skip = 3
) |>
rename(date = 1) %>%
mutate(date = as.Date(date))

colnames(df) <- tolower(colnames(df))
colnames(df) <- gsub(" |-|:", "_", colnames(df))
colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df))
}

df
}
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-download_data_cube.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,10 @@ test_that("read_lfs_datacube", {
expect_length(lm1, 10)
expect_gt(nrow(lm1), 300000)
})

# Test for MRM, which has a different format, and there parsing logic
test_that("read_lfs_datacube - MRM", {
mrm1 <- read_lfs_datacube("MRM1")
expect_s3_class(mrm1, "tbl_df")
expect_gt(nrow(mrm1), 30000)
})

0 comments on commit 524b6c2

Please sign in to comment.