Skip to content

Commit

Permalink
added research-archiveCleanedPackage
Browse files Browse the repository at this point in the history
  • Loading branch information
Qile0317 committed Oct 3, 2024
1 parent 290f834 commit 3975a04
Show file tree
Hide file tree
Showing 5 changed files with 247 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ Imports:
Rcpp (>= 1.0.12),
rlang,
testthat,
usethis
usethis,
R.utils
LinkingTo: Rcpp
Suggests:
covr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
S3method(getfirst,default)
S3method(getlast,default)
export(add)
export(archiveCleanedPackage)
export(bound)
export(closestWord)
export(colToRownames)
Expand Down
177 changes: 177 additions & 0 deletions R/research.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#' @title Archive and clean Package Directory
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' This function processes an R package directory, removing irrelevant files
#' (e.g., `.git`, `.gitignore`-listed files, etc.) and strips parts of lines
#' matching a specified regex from source code. It then creates a clean folder
#' or a zipped archive of the package without modifying the original directory.
#'
#' MAJOR NOTE this is no where near perfect and the regexes used at the moment
#' for removing comments are prone to issues such as not detecting whether
#' the symbols are inside special strings.
#'
#' @param packageDir A string specifying the path to the package directory.
#' @param output A string specifying the name of the output cleaned directory.
#' If the string ends with .zip, the function will create a zipped archive of
#' the cleaned directory.
#' @param removeHidden Logical; if `TRUE`, removes hidden files and folders
#' (starting with a dot). Default is `TRUE`.
#' @param removeGitignore Logical; if `TRUE`, removes files listed in the
#' `.gitignore`. Default is `TRUE`.
#' @param verbose Logical; if `TRUE`, prints additional information about the
#' cleaning process. Default is `FALSE`.
#'
#' @details
#' The function cleans the package directory by:
#' - Removing hidden files and folders like `.git`.
#' - Excluding files listed in `.gitignore`.
#' - Stripping out parts of lines matching a regex pattern from source files.
#'
#' The output will be either a cleaned directory or a zipped archive of it.
#'
#' @return A cleaned directory or a zipped file of the package directory.
#' @keywords research
#'
#' @examples
#' \dontrun{
#' # Clean and archive the package directory
#' archiveCleanedPackage("/path/to/package", "clean_package.zip")
#'
#' # Clean package directory without zipping it
#' archiveCleanedPackage("/path/to/package", NULL)
#' }
#' @export
archiveCleanedPackage <- function(
packageDir, output, removeHidden = TRUE,
removeGitignore = TRUE, verbose = FALSE
) {

assert_that(is.string(packageDir), dir.exists(packageDir))
assert_that(is.string(output))
assert_that(is.flag(removeHidden))
assert_that(is.flag(removeGitignore))
assert_that(is.flag(verbose))

tempDir <- tempfile()
dir.create(tempDir)

gitignorePatterns <-
if (removeGitignore)
getGitignorePatterns(packageDir)
else
NULL

copyCleanedRFiles(
packageDir, tempDir, patterns = gitignorePatterns,
removeHidden = removeHidden, verbose = verbose
)

if (grepl("*\\.zip$", output)) {

if (verbose) message("Creating zip archive at: ", output)

zip(
zipfile = output,
files = list.files(tempDir, full.names = TRUE),
flags = "-r"
)

} else {
dir.create(output, showWarnings = FALSE)

R.utils::copyDirectory(
tempDir, output, recursive = TRUE
)

if (verbose)
message("Cleaned package directory created at: ", output)
}

unlink(tempDir, recursive = TRUE)
}

getGitignorePatterns <- function(dir) {

gitignorePath <- file.path(dir, ".gitignore")

if (!file.exists(gitignorePath)) return(NULL)

gitignoreLines <- readLines(gitignorePath)

patterns <- gitignoreLines[
(!grepl("^#", gitignoreLines)) &
gitignoreLines != "" &
gitignoreLines != "\n"
]

return(patterns)
}

stripLinesAfterRegex <- function(lines, regex) {
gsub(paste0("\\s*", regex, ".*"), "", lines, perl = TRUE)
}

stripTrailingWhiteSpace <- function(lines) {
gsub("\\s+$", "", lines)
}

ensureSingleNewLineAtEnd <- function(lines) {
lines <- lines[rev(cumsum(rev(lines != "")) > 0)]
if (tail(lines, 1) != "") {
lines <- c(lines, "")
}
lines
}


copyCleanedRFiles <- function(
from, to, patterns = NULL, removeHidden = TRUE, verbose = FALSE
) {

dir.create(to, showWarnings = FALSE, recursive = TRUE)

files <- list.files(
from, all.files = TRUE, recursive = TRUE, full.names = TRUE
)

if (removeHidden) files <- files[!grepl("/\\.", files)]

if (!is.null(patterns)) {
for (pattern in patterns) {
files <- files[!grepl(pattern, files, perl = TRUE)]
}
}

for (file in files) {
relativePath <- sub(paste0("^", from), "", file)
destPath <- file.path(to, relativePath)

if (dir.exists(file)) {
dir.create(destPath, showWarnings = FALSE, recursive = TRUE)
next
}

if (verbose) message("Processing file: ", file)
fileLines <- readLines(file, warn = FALSE)

# this regex isnt perfect either
if (grepl("\\.(R|r|Rmd|rmd|Rnw|rnw|cpp|hpp|c|h|rs)$", file)) {

# this isnt perfect because this might be in a string
cleanedLines <- fileLines %>%
stripLinesAfterRegex(" *#+ *(TODO|FIXME|BUG|HACK)") %>%
stripTrailingWhiteSpace() %>%
ensureSingleNewLineAtEnd()

if (verbose)
message("Stripped lines matching TODO/FIXME from: ", file)

} else {
cleanedLines <- fileLines
}

writeLines(cleanedLines, destPath)
}
}
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ reference:
- contents: has_keyword("packageDevelopment")
- title: Package Loading
- contents: has_keyword("packageLoading")
- title: Research
- contents: has_keyword("research")
- title: Regex
- contents: has_keyword("regex")
- title: Spelling
Expand Down
65 changes: 65 additions & 0 deletions man/archiveCleanedPackage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3975a04

Please sign in to comment.