Skip to content

Commit

Permalink
feat!(cache): implement knit_cache_hook instead of pre/post-processors
Browse files Browse the repository at this point in the history
  • Loading branch information
atusy committed Apr 30, 2024
1 parent 94cca33 commit 4d76e48
Showing 1 changed file with 58 additions and 10 deletions.
68 changes: 58 additions & 10 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ new_cache = function() {
}

cache_purge = function(hash) {
for (h in hash) unlink(paste(cache_path(h), c('rds', 'rdb', 'rdx', 'RData'), sep = '.'))
for (h in hash) unlink(paste0(cache_path(h), c('.rds', '.rdb', '.rdx', '.RData', '__extra')), recursive = TRUE)
}

cache_save = function(keys, outname, hash, lazy = TRUE) {
Expand All @@ -32,7 +32,7 @@ new_cache = function() {
# random seed is always load()ed
keys = as.character(setdiff(keys, '.Random.seed'))
envir = knit_global()
saveRDS(setNames(lapply(keys, function(k) knit_cache_preprocess(envir[[k]])), keys), paste(path, 'rds', sep = '.'))
saveRDS(lapply(setNames(keys, keys), function(k) knit_cache_hook(envir[[k]], k, path)), paste(path, 'rds', sep = '.'))
unlink(paste(path, c('rdb', 'rdx'), sep = '.')) # migrate from former implementation
}

Expand Down Expand Up @@ -65,7 +65,12 @@ new_cache = function() {
envir = knit_global()
obj = readRDS(paste(path, 'rds', sep = '.'))
for (nm in names(obj)) {
assign(nm, knit_cache_postprocess(obj[[nm]]), envir = envir)
o = obj[[nm]]
assign(
nm,
if (is.function(o) && inherits(o, 'knit_cache_loader') && !inherits(o, 'AsIs')) o() else o,
envir = envir
)
}
}
}
Expand Down Expand Up @@ -142,16 +147,59 @@ cache_meta_name = function(hash) sprintf('.%s_meta', hash)
# a variable name to store the text output of code chunks
cache_output_name = function(hash) sprintf('.%s', hash)

# process cached objects before save and after read
knit_cache_preprocess = function(x, ...) UseMethod('knit_cache_preprocess')
knit_cache_preprocess.default = function(x, ...) x
knit_cache_postprocess = function(x, ...) UseMethod('knit_cache_postprocess')
knit_cache_postprocess.default = function(x, ...) x
#' Hook cache behavior
#'
#' By default, a named list of objects in a chunk is cached as is in a rds
#' file. If certain classes of objects need custom cache behaviors, register
#' S3 methods to \code{knit_cache_preprocess}. The return value of the method
#' is cached to the rds file. If custom loader is needed, the method should
#' return a function with \code{knit_cache_loader} class which will be called.
#'
#' @param x a value of object to be cached.
#' @param nm a name of the object to be cached. If a hook creates an external file based on \code{nm}, then apply \code{\link{URLencode}} to \code{nm} in order to avoid invalid file names.
#' @param path
#' a common path of the cache files of a chunk. If the hook creates extra
#' files which needs be cleaned up by knitr, then create a directory whose
#' name is `\code{path}` suffixed by "__extra", and save the files in it.
#' @param ... Reserved for future extensions
#'
#' @return
#' A value to be cached. If the value is the \code{knitr_cache_loader}-classed
#' function, then the function is called and the returned value is treated as
#' the loaded value. The loader should receive ellipsis as an argument for the
#' future extentions.
#'
#' @examples
#' registerS3method(
#' "knit_cache_preprocess",
#' "character",
#' function(x, nm, path, ...) {
#' # Cache x as is if it extends character class
#' if (!identical(class(x), "character")) {
#' return(x)
#' }
#'
#' # Preprocess data (e.g., save data to an external file)
#' # Create external files under the directory of `paste0(path, "__extra")`
#' # if knitr should cleanup them on refreshing/cleaning cache.
#' d <- paste0(path, "__extra")
#' dir.create(d, showWarnings = FALSE, recursive = TRUE)
#' f <- file.path(d, paste0(URLencode(nm, reserved = TRUE), '.txt'))
#' writeLines(x, f)
#'
#' # Return loader function
#' # which receives ellipsis for future extentions and has knit_cache_loader class
#' structure(function(...) readLines(f), class = 'knit_cache_loader')
#' },
#' envir = asNamespace("knitr")
#' )
knit_cache_hook = function(x, nm, path, ...) UseMethod('knit_cache_hook')
registerS3method("knit_cache_hook", "default", function(x, nm, path, ...) x)

cache = new_cache()

# a regex for cache files
cache_rx = '_[abcdef0123456789]{32}[.](rds|rdb|rdx|RData)$'
cache_rx = '_[abcdef0123456789]{32}([.](rds|rdb|rdx|RData)|__extra)$'

#' Build automatic dependencies among chunks
#'
Expand Down Expand Up @@ -350,7 +398,7 @@ clean_cache = function(clean = FALSE, path = opts_chunk$get('cache.path')) {
i = !(sub(cache_rx, '', base) %in% paste0(p1, labs))
if (p1 != '') i = i & (substr(base, 1, nchar(p1)) == p1)
if (!any(i)) return()
if (clean) unlink(files[i]) else message(
if (clean) unlink(files[i], recursive = TRUE) else message(
'Clean these cache files?\n\n', one_string(files[i]), '\n'
)
}

0 comments on commit 4d76e48

Please sign in to comment.