From 3975a04f0e9c702be5e6e661c796160c7c69ab65 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Thu, 3 Oct 2024 00:08:00 -0700 Subject: [PATCH] added research-archiveCleanedPackage --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/research.R | 177 +++++++++++++++++++++++++++++++++++ _pkgdown.yml | 2 + man/archiveCleanedPackage.Rd | 65 +++++++++++++ 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 R/research.R create mode 100644 man/archiveCleanedPackage.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 75dd62c..6b7fdfd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,8 @@ Imports: Rcpp (>= 1.0.12), rlang, testthat, - usethis + usethis, + R.utils LinkingTo: Rcpp Suggests: covr, diff --git a/NAMESPACE b/NAMESPACE index 6ff735e..8519fc7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(getfirst,default) S3method(getlast,default) export(add) +export(archiveCleanedPackage) export(bound) export(closestWord) export(colToRownames) diff --git a/R/research.R b/R/research.R new file mode 100644 index 0000000..4ae0910 --- /dev/null +++ b/R/research.R @@ -0,0 +1,177 @@ +#' @title Archive and clean Package Directory +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' This function processes an R package directory, removing irrelevant files +#' (e.g., `.git`, `.gitignore`-listed files, etc.) and strips parts of lines +#' matching a specified regex from source code. It then creates a clean folder +#' or a zipped archive of the package without modifying the original directory. +#' +#' MAJOR NOTE this is no where near perfect and the regexes used at the moment +#' for removing comments are prone to issues such as not detecting whether +#' the symbols are inside special strings. +#' +#' @param packageDir A string specifying the path to the package directory. +#' @param output A string specifying the name of the output cleaned directory. +#' If the string ends with .zip, the function will create a zipped archive of +#' the cleaned directory. +#' @param removeHidden Logical; if `TRUE`, removes hidden files and folders +#' (starting with a dot). Default is `TRUE`. +#' @param removeGitignore Logical; if `TRUE`, removes files listed in the +#' `.gitignore`. Default is `TRUE`. +#' @param verbose Logical; if `TRUE`, prints additional information about the +#' cleaning process. Default is `FALSE`. +#' +#' @details +#' The function cleans the package directory by: +#' - Removing hidden files and folders like `.git`. +#' - Excluding files listed in `.gitignore`. +#' - Stripping out parts of lines matching a regex pattern from source files. +#' +#' The output will be either a cleaned directory or a zipped archive of it. +#' +#' @return A cleaned directory or a zipped file of the package directory. +#' @keywords research +#' +#' @examples +#' \dontrun{ +#' # Clean and archive the package directory +#' archiveCleanedPackage("/path/to/package", "clean_package.zip") +#' +#' # Clean package directory without zipping it +#' archiveCleanedPackage("/path/to/package", NULL) +#' } +#' @export +archiveCleanedPackage <- function( + packageDir, output, removeHidden = TRUE, + removeGitignore = TRUE, verbose = FALSE +) { + + assert_that(is.string(packageDir), dir.exists(packageDir)) + assert_that(is.string(output)) + assert_that(is.flag(removeHidden)) + assert_that(is.flag(removeGitignore)) + assert_that(is.flag(verbose)) + + tempDir <- tempfile() + dir.create(tempDir) + + gitignorePatterns <- + if (removeGitignore) + getGitignorePatterns(packageDir) + else + NULL + + copyCleanedRFiles( + packageDir, tempDir, patterns = gitignorePatterns, + removeHidden = removeHidden, verbose = verbose + ) + + if (grepl("*\\.zip$", output)) { + + if (verbose) message("Creating zip archive at: ", output) + + zip( + zipfile = output, + files = list.files(tempDir, full.names = TRUE), + flags = "-r" + ) + + } else { + dir.create(output, showWarnings = FALSE) + + R.utils::copyDirectory( + tempDir, output, recursive = TRUE + ) + + if (verbose) + message("Cleaned package directory created at: ", output) + } + + unlink(tempDir, recursive = TRUE) +} + +getGitignorePatterns <- function(dir) { + + gitignorePath <- file.path(dir, ".gitignore") + + if (!file.exists(gitignorePath)) return(NULL) + + gitignoreLines <- readLines(gitignorePath) + + patterns <- gitignoreLines[ + (!grepl("^#", gitignoreLines)) & + gitignoreLines != "" & + gitignoreLines != "\n" + ] + + return(patterns) +} + +stripLinesAfterRegex <- function(lines, regex) { + gsub(paste0("\\s*", regex, ".*"), "", lines, perl = TRUE) +} + +stripTrailingWhiteSpace <- function(lines) { + gsub("\\s+$", "", lines) +} + +ensureSingleNewLineAtEnd <- function(lines) { + lines <- lines[rev(cumsum(rev(lines != "")) > 0)] + if (tail(lines, 1) != "") { + lines <- c(lines, "") + } + lines +} + + +copyCleanedRFiles <- function( + from, to, patterns = NULL, removeHidden = TRUE, verbose = FALSE +) { + + dir.create(to, showWarnings = FALSE, recursive = TRUE) + + files <- list.files( + from, all.files = TRUE, recursive = TRUE, full.names = TRUE + ) + + if (removeHidden) files <- files[!grepl("/\\.", files)] + + if (!is.null(patterns)) { + for (pattern in patterns) { + files <- files[!grepl(pattern, files, perl = TRUE)] + } + } + + for (file in files) { + relativePath <- sub(paste0("^", from), "", file) + destPath <- file.path(to, relativePath) + + if (dir.exists(file)) { + dir.create(destPath, showWarnings = FALSE, recursive = TRUE) + next + } + + if (verbose) message("Processing file: ", file) + fileLines <- readLines(file, warn = FALSE) + + # this regex isnt perfect either + if (grepl("\\.(R|r|Rmd|rmd|Rnw|rnw|cpp|hpp|c|h|rs)$", file)) { + + # this isnt perfect because this might be in a string + cleanedLines <- fileLines %>% + stripLinesAfterRegex(" *#+ *(TODO|FIXME|BUG|HACK)") %>% + stripTrailingWhiteSpace() %>% + ensureSingleNewLineAtEnd() + + if (verbose) + message("Stripped lines matching TODO/FIXME from: ", file) + + } else { + cleanedLines <- fileLines + } + + writeLines(cleanedLines, destPath) + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index af7e8fb..b1167d4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -43,6 +43,8 @@ reference: - contents: has_keyword("packageDevelopment") - title: Package Loading - contents: has_keyword("packageLoading") +- title: Research +- contents: has_keyword("research") - title: Regex - contents: has_keyword("regex") - title: Spelling diff --git a/man/archiveCleanedPackage.Rd b/man/archiveCleanedPackage.Rd new file mode 100644 index 0000000..37bed10 --- /dev/null +++ b/man/archiveCleanedPackage.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/research.R +\name{archiveCleanedPackage} +\alias{archiveCleanedPackage} +\title{Archive and clean Package Directory} +\usage{ +archiveCleanedPackage( + packageDir, + output, + removeHidden = TRUE, + removeGitignore = TRUE, + verbose = FALSE +) +} +\arguments{ +\item{packageDir}{A string specifying the path to the package directory.} + +\item{output}{A string specifying the name of the output cleaned directory. +If the string ends with .zip, the function will create a zipped archive of +the cleaned directory.} + +\item{removeHidden}{Logical; if \code{TRUE}, removes hidden files and folders +(starting with a dot). Default is \code{TRUE}.} + +\item{removeGitignore}{Logical; if \code{TRUE}, removes files listed in the +\code{.gitignore}. Default is \code{TRUE}.} + +\item{verbose}{Logical; if \code{TRUE}, prints additional information about the +cleaning process. Default is \code{FALSE}.} +} +\value{ +A cleaned directory or a zipped file of the package directory. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +This function processes an R package directory, removing irrelevant files +(e.g., \code{.git}, \code{.gitignore}-listed files, etc.) and strips parts of lines +matching a specified regex from source code. It then creates a clean folder +or a zipped archive of the package without modifying the original directory. + +MAJOR NOTE this is no where near perfect and the regexes used at the moment +for removing comments are prone to issues such as not detecting whether +the symbols are inside special strings. +} +\details{ +The function cleans the package directory by: +\itemize{ +\item Removing hidden files and folders like \code{.git}. +\item Excluding files listed in \code{.gitignore}. +\item Stripping out parts of lines matching a regex pattern from source files. +} + +The output will be either a cleaned directory or a zipped archive of it. +} +\examples{ +\dontrun{ +# Clean and archive the package directory +archiveCleanedPackage("/path/to/package", "clean_package.zip") + +# Clean package directory without zipping it +archiveCleanedPackage("/path/to/package", NULL) +} +} +\keyword{research}