Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[draft] tar_terra_rast: implement {gdalraster} SOZip preserve_metadata method #123

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -47,7 +47,8 @@ Suggests:
sf,
stars,
testthat (>= 3.0.0),
fs
fs,
gdalraster
Config/testthat/edition: 3
URL: https://github.com/njtierney/geotargets, http://geotargets.njtierney.com
BugReports: https://github.com/njtierney/geotargets/issues
137 changes: 92 additions & 45 deletions R/tar-terra-rast.R
Original file line number Diff line number Diff line change
@@ -83,12 +83,21 @@ tar_terra_rast <- function(name,
drv <- get_gdal_available_driver_list("raster")
filetype <- rlang::arg_match0(filetype, drv$name)

# currently only "drop" and "zip" are valid options
# various methods for packaging geospatial data and auxiliary files
preserve_metadata <- preserve_metadata %||% "drop"
preserve_metadata <- rlang::arg_match0(preserve_metadata, c("drop", "zip"))
preserve_metadata <- rlang::arg_match0(preserve_metadata, c("drop", "zip", "gdalraster_sozip"))

# ensure that user-passed `resources` doesn't include `custom_format`
check_user_resources(resources)

if (preserve_metadata == "gdalraster_sozip") {
check_pkg_installed("gdalraster")
}

# ensure that user-passed `resources` doesn't include `custom_format`
if ("custom_format" %in% names(resources)) {
cli::cli_abort("{.val custom_format} cannot be supplied to targets created with {.fn tar_terra_rast}")
}

name <- targets::tar_deparse_language(substitute(name))

@@ -135,50 +144,88 @@ tar_terra_rast <- function(name,
}

tar_rast_read <- function(preserve_metadata) {
switch(preserve_metadata,
zip = function(path) {
tmp <- withr::local_tempdir()
zip::unzip(zipfile = path, exdir = tmp)
terra::rast(file.path(tmp, basename(path)))
},
drop = function(path) terra::rast(path)
)
switch(
preserve_metadata,
zip = function(path) {
tmp <- withr::local_tempdir()
zip::unzip(zipfile = path, exdir = tmp)
terra::rast(file.path(tmp, basename(path)))
},
gdalraster_sozip = function(path) {
terra::rast(paste0("/vsizip/{", path, "}/"), basename(path))
},
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to worry about file.path here, or is this syntax going to be generalisable to all OS's?

drop = function(path) terra::rast(path)
)
}

tar_rast_write <- function(filetype, gdal, preserve_metadata) {
switch(preserve_metadata,
zip = function(object, path) {
# write the raster in a fresh local tempdir() that disappears when function is done
tmp <- withr::local_tempdir()
dir.create(file.path(tmp, dirname(path)), recursive = TRUE)
terra::writeRaster(
object,
file.path(tmp, path),
filetype = filetype,
overwrite = TRUE,
gdal = gdal
)
# package files into a zip file using `zip::zip()`
raster_files <- list.files(file.path(tmp, dirname(path)), full.names = TRUE)
zip::zip(
file.path(tmp, basename(path)),
files = raster_files,
compression_level = 1,
mode = "cherry-pick",
root = dirname(raster_files)[1]
)
# move the zip file to the expected place
file.copy(file.path(tmp, basename(path)), path)
unlink(file.path(tmp, basename(path)))
},
drop = function(object, path) {
terra::writeRaster(
object,
path,
filetype = filetype,
overwrite = TRUE,
gdal = gdal
)
}
)
switch(
preserve_metadata,
zip = function(object, path) {
#write the raster in a fresh local tempdir() that disappears when function is done
tmp <- withr::local_tempdir()
dir.create(file.path(tmp, dirname(path)), recursive = TRUE)
terra::writeRaster(
object,
file.path(tmp, path),
filetype = filetype,
overwrite = TRUE,
gdal = gdal
)
#package files into a zip file using `zip::zip()`
raster_files <- list.files(file.path(tmp, dirname(path)), full.names = TRUE)
zip::zip(
file.path(tmp, basename(path)),
files = raster_files,
compression_level = 1,
mode = "cherry-pick",
root = dirname(raster_files)[1]
)

# move the zip file to the expected place
file.copy(file.path(tmp, basename(path)), path)
unlink(file.path(tmp, basename(path)))
},
gdalraster_sozip = function(object, path) {

tmp <- withr::local_tempdir()

dir.create(file.path(tmp, dirname(path)), recursive = TRUE)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest moving file.path(tmp, dirname(path)) out into a separate variable, since it gets referred to a few times later on in this function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok this is done. Sorry for some commit noise on this, made a mistake that broke tests, but I think it is resolved now

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See also #127 - the fix(es) for that issue are maybe also required here.


terra::writeRaster(
object,
file.path(tmp, path),
filetype = filetype,
overwrite = TRUE,
gdal = gdal
)

raster_files <- list.files(file.path(tmp, dirname(path)), full.names = TRUE)

# create seek-optimized zip file using gdalraster
gdalraster::addFilesInZip(
path,
raster_files,
full_paths = FALSE,
overwrite = TRUE,
sozip_enabled = "YES",
num_threads = 1,
quiet = TRUE
)
# always create sozip regardless of file size (sozip_enabled = "YES")
# TODO: allow user control of number of threads?
# how does num_threads interact multiple workers etc.?

unlink(file.path(tmp, path))
},
drop = function(object, path) {
terra::writeRaster(
object,
path,
filetype = filetype,
overwrite = TRUE,
gdal = gdal
)
}
)
}