Skip to content

Commit

Permalink
Some updates to read_file() (#727)
Browse files Browse the repository at this point in the history
* Use `!!` injection operator as a simpler workaround

apache/arrow#36658

* Add explicit `.rds.gz` and `.csv.gz` extension handling

Previously this assumed any file ending in `.gz` was a CSV.

* Style code

---------

Co-authored-by: Moohan <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
  • Loading branch information
3 people authored Jul 17, 2023
1 parent 3f86f89 commit 5272eed
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
43 changes: 31 additions & 12 deletions R/read_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,27 @@
#' @return the data a [tibble][tibble::tibble-package]
#' @export
read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
valid_extensions <- c("rds", "fst", "sav", "zsav", "csv", "gz", "parquet")
valid_extensions <- c(
"rds",
"rds.gz",
"fst",
"sav",
"zsav",
"csv",
"csv.gz",
"parquet"
)

ext <- fs::path_ext(path)

if (ext == "gz") {
ext <- paste(
fs::path_ext(fs::path_ext_remove(path)),
"gz",
sep = "."
)
}

if (!(ext %in% valid_extensions)) {
cli::cli_abort(c(
"x" = "Invalid extension: {.val {ext}}",
Expand All @@ -36,17 +53,19 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
}

data <- switch(ext,
"rds" = readr::read_rds(path),
"fst" = fst::read_fst(path),
"sav" = haven::read_spss(path, ...),
"zsav" = haven::read_spss(path, ...),
"csv" = readr::read_csv(path, ..., show_col_types = FALSE),
"gz" = readr::read_csv(path, ..., show_col_types = FALSE),
"parquet" = if (is.null(col_select)) {
arrow::read_parquet(path, as_data_frame = as_data_frame, ...)
} else {
arrow::read_parquet(path, col_select = col_select, as_data_frame = as_data_frame, ...)
}
"rds" = readr::read_rds(file = path),
"rds.gz" = readr::read_rds(file = path),
"fst" = tibble::as_tibble(fst::read_fst(path = path)),
"sav" = haven::read_spss(file = path, ...),
"zsav" = haven::read_spss(file = path, ...),
"csv" = readr::read_csv(file = path, ..., show_col_types = FALSE),
"csv.gz" = readr::read_csv(file = path, ..., show_col_types = FALSE),
"parquet" = arrow::read_parquet(
file = path,
col_select = !!col_select,
as_data_frame = as_data_frame,
...
)
)

return(data)
Expand Down
5 changes: 4 additions & 1 deletion tests/testthat/test-read_file.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
test_that("read_file works", {
rds_path <- tempfile(fileext = ".rds")
rds_gz_path <- tempfile(fileext = ".rds.gz")
fst_path <- tempfile(fileext = ".fst")
sav_path <- tempfile(fileext = ".sav")
zsav_path <- tempfile(fileext = ".zsav")
Expand All @@ -10,6 +11,7 @@ test_that("read_file works", {
aq_data <- tibble::as_tibble(datasets::airquality)

readr::write_rds(aq_data, rds_path)
readr::write_rds(aq_data, rds_gz_path)
fst::write_fst(aq_data, fst_path)
haven::write_sav(aq_data, sav_path)
haven::write_sav(aq_data, zsav_path, compress = "zsav")
Expand All @@ -18,7 +20,8 @@ test_that("read_file works", {
arrow::write_parquet(aq_data, parquet_path)

expect_equal(aq_data, read_file(rds_path))
expect_equal(aq_data, tibble::as_tibble(read_file(fst_path)))
expect_equal(aq_data, read_file(rds_gz_path))
expect_equal(aq_data, read_file(fst_path))
expect_equal(aq_data, haven::zap_formats(read_file(sav_path)))
expect_equal(aq_data, haven::zap_formats(read_file(zsav_path)))
expect_equal(aq_data, read_file(csv_gz_path))
Expand Down

0 comments on commit 5272eed

Please sign in to comment.