Add support for bespoke MRM formatting in read_lfs_datacube (#255)

* Add support for bespoke MRM formatting in `read_lfs_datacube` * iterate version, add @angusmoore as ctb * fix DESCRIPTION * use newer Roxygen --------- Co-authored-by: Matt Cowgill <[email protected]>
MattCowgill · Aug 4, 2024 · 524b6c2 · 524b6c2
1 parent 07a4be4
commit 524b6c2
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 13 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,16 @@
 Package: readabs
 Type: Package
 Title: Download and Tidy Time Series Data from the Australian Bureau of Statistics
-Version: 0.4.16.903
+Version: 0.4.16.904
 Authors@R: c(
            person("Matt", "Cowgill", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0422-3300")),
            person("Zoe", "Meers", role = "aut", email = "[email protected]"),
            person("Jaron", "Lee", role = "aut", email = "[email protected]"),
            person("David", "Diviny", role = "aut", email = "[email protected]"),
            person("Hugh", "Parsonage", role = "ctb", email = "[email protected]"),
-           person("Kinto", "Behr", role = "ctb", email = "[email protected]"))
+           person("Kinto", "Behr", role = "ctb", email = "[email protected]"),
+           person("Angus", "Moore", role = "ctb")
+           )
 Maintainer: Matt Cowgill <[email protected]>
 Description: Downloads, imports, and tidies time series data from the 
     Australian Bureau of Statistics <https://www.abs.gov.au/>.
@@ -32,7 +34,7 @@ Imports:
     labelled
 URL: https://github.com/mattcowgill/readabs
 BugReports: https://github.com/mattcowgill/readabs/issues
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
 Suggests: 
     knitr,

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # readabs development version
 * The experimental `readabs::read_api()` function no longer coerces columns describing the data categories to numeric. Thanks to @kletts. 
+* read_lfs_datacube() now able to fetch modelled SA4 labour force estimates, thanks to @AngusMoore
 * Bug fix to read_job_mobility()
 
 # readabs 0.4.16

diff --git a/R/download_data_cube.r b/R/download_data_cube.r
@@ -99,6 +99,35 @@ download_abs_data_cube <- function(catalogue_string,
   return(invisible(filepath))
 }
 
+read_lfs_mrm_table <- function(file, sheet, variable_name) {
+  df <- file |>
+    readxl::read_excel(
+      sheet = sheet,
+      skip = 4
+    ) |>
+    tidyr::pivot_longer(!tidyr::matches("SA4"), names_to = "date", values_to = "value") |>
+    dplyr::mutate(date = as.numeric(.data$date)) |>
+    dplyr::filter(!is.na({{variable_name}})) |>
+    dplyr::mutate(SA4_code = substr(.data$SA4, 1, 3)) |>
+    dplyr::mutate(SA4_name = substr(.data$SA4, 5, nchar(.data$SA4))) |>
+    dplyr::mutate(
+      variable = variable_name,
+      date = as.Date(.data$date, origin = "1899-12-30")
+    )
+  df[, c("SA4_code", "SA4_name", "variable", "date", "value")]
+}
+
+read_lfs_mrm <- function(file) {
+  bind_rows(
+    read_lfs_mrm_table(file, "Table 1", "employed_persons_000s"),
+    read_lfs_mrm_table(file, "Table 2", "unemployed_persons_000s"),
+    read_lfs_mrm_table(file, "Table 3", "nilf_persons_000s"),
+    read_lfs_mrm_table(file, "Table 4", "emp_to_pop_ratio"),
+    read_lfs_mrm_table(file, "Table 5", "unemployment_rate"),
+    read_lfs_mrm_table(file, "Table 6", "participation_rate")
+  )
+}
+
 #' Convenience function to download and tidy data cubes from
 #' ABS Labour Force, Australia, Detailed.
 #' @param cube character. A character string that is either the complete filename
@@ -118,17 +147,22 @@ read_lfs_datacube <- function(cube,
     cube = cube,
     path = path
   )
-  df <- file |>
-    readxl::read_excel(
-      sheet = "Data 1",
-      skip = 3
-    ) |>
-    rename(date = 1) %>%
-    mutate(date = as.Date(date))
 
-  colnames(df) <- tolower(colnames(df))
-  colnames(df) <- gsub(" |-|:", "_", colnames(df))
-  colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df))
+  if (cube == "MRM" || cube == "MRM1") {
+    df <- read_lfs_mrm(file)
+  } else {
+    df <- file |>
+      readxl::read_excel(
+        sheet = "Data 1",
+        skip = 3
+      ) |>
+      rename(date = 1) %>%
+      mutate(date = as.Date(date))
+
+    colnames(df) <- tolower(colnames(df))
+    colnames(df) <- gsub(" |-|:", "_", colnames(df))
+    colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df))
+  }
 
   df
 }

diff --git a/tests/testthat/test-download_data_cube.R b/tests/testthat/test-download_data_cube.R
@@ -9,3 +9,10 @@ test_that("read_lfs_datacube", {
   expect_length(lm1, 10)
   expect_gt(nrow(lm1), 300000)
 })
+
+# Test for MRM, which has a different format, and there parsing logic
+test_that("read_lfs_datacube - MRM", {
+  mrm1 <- read_lfs_datacube("MRM1")
+  expect_s3_class(mrm1, "tbl_df")
+  expect_gt(nrow(mrm1), 30000)
+})