From e004cd4091ae3cb111873076365619ce0ba42430 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 6 Oct 2024 20:10:30 +0100
Subject: [PATCH] refactor function names in R/cleanup.R

---
 R/cleanup.R | 142 ++++++++++++++++++++++++++--------------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/R/cleanup.R b/R/cleanup.R
index 3a708415..f82722f2 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -31,10 +31,10 @@
 #' @return [string] string with only alphanumerics, "_", "+", and "."
 #' @examples
 #' \dontrun{
-#' clean_string()
+#' cleanString()
 #' }
 #'
-clean_string <- function(string) {
+cleanString <- function(string) {
     # replace spaces with "_"
     string <- stringr::str_replace_all(string, "\\s+", "_")
     # keep only alphanumeric characters, "_", and "."
@@ -44,7 +44,7 @@ clean_string <- function(string) {
 
 # use the same code as upstream_scripts/00_submit_full.R's
 # get_sequences() function to extract accession numbers
-#' string2accnum
+#' extractAccNum
 #'
 #' @param string
 #'
@@ -53,9 +53,9 @@ clean_string <- function(string) {
 #'
 #' @examples
 #' \dontrun{
-#' string2accnum()
+#' extractAccNum()
 #' }
-string2accnum <- function(string) {
+extractAccNum <- function(string) {
     if (grepl("\\|", string)) {
         accnum <- strsplit(string, "\\|")[[1]][2]
         accnum <- strsplit(accnum, " ")[[1]][1]
@@ -81,9 +81,9 @@ string2accnum <- function(string) {
 #' @examples
 #' \dontrun{
 #' c("xxx", "xxx", "xxx", "yyy", "yyy") |>
-#'     make_accnums_unique()
+#'     ensureUniqAccNum()
 #' }
-make_accnums_unique <- function(accnums) {
+ensureUniqAccNum <- function(accnums) {
     # group by accnums then use the row count as a proxy
     # for the index of occurence for each accession number
     df_accnums <- tibble::tibble("accnum" = accnums)
@@ -113,14 +113,14 @@ make_accnums_unique <- function(accnums) {
 #' @examples
 #' \dontrun{
 #' AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |>
-#'     cleanup_fasta_header()
+#'     cleanFAHeaders()
 #' }
-cleanup_fasta_header <- function(fasta) {
+cleanFAHeaders <- function(fasta) {
     headers <- names(fasta)
     # try parsing accession numbers from header
     headers <- purrr::map_chr(
         headers,
-        string2accnum
+        extractAccNum
     )
     # sanitize string for pathing (file read/write-ing)
     headers <- purrr::map_chr(
@@ -128,7 +128,7 @@ cleanup_fasta_header <- function(fasta) {
         fs::path_sanitize
     )
     # append an index suffix for the ith occurence of each accnum
-    headers <- make_accnums_unique(headers)
+    headers <- ensureUniqAccNum(headers)
     names(fasta) <- headers
     return(fasta)
 }
@@ -153,9 +153,9 @@ cleanup_fasta_header <- function(fasta) {
 #'
 #' @examples
 #' \dontrun{
-#' remove_empty(prot, "DomArch")
+#' removeEmptyRows(prot, "DomArch")
 #' }
-remove_empty <- function(prot, by_column = "DomArch") {
+removeEmptyRows <- function(prot, by_column = "DomArch") {
     # ?? Don't call other psp functions within these functions
     prot <- prot %>%
         as_tibble() %>%
@@ -168,7 +168,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 }
 
 ###########################
-#' repeat2s
+#' condenseRepeatedDomains
 #'
 #' @description
 #' Condense repeated domains
@@ -181,7 +181,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @param prot A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.
 #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s).
-#' @param excluded_prots Vector of strings that repeat2s should not reduce to (s). Defaults to c()
+#' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()
 #'
 #' @return Describe return, in detail
 #' @export
@@ -191,10 +191,10 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @examples
 #' \dontrun{
-#' repeat2s(prot, "DomArch")
+#' condenseRepeatedDomains(prot, "DomArch")
 #' }
-repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
-    # If there are strings that repeat2s should not affect, the pattern to search
+condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) {
+    # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
     # for must be changed to exclude a search for those desired strings
 
     collapsed_prots <- paste0(excluded_prots, collapse = "\\s|")
@@ -253,10 +253,10 @@ repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
 #'
 #' @examples
 #' \dontrun{
-#' replaceQMs()
+#' replaceQuestionMarks()
 #' }
 #'
-replaceQMs <- function(prot, by_column = "GenContext") {
+replaceQuestionMarks <- function(prot, by_column = "GenContext") {
     by <- sym(by_column)
 
     # Regex for finding repeated `?`
@@ -290,9 +290,9 @@ replaceQMs <- function(prot, by_column = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_astrk()
+#' removeAsterisks()
 #' }
-remove_astrk <- function(query_data, colname = "GenContext") {
+removeAsterisks <- function(query_data, colname = "GenContext") {
     query_data[, colname] <- map(query_data[, colname], function(x) str_remove_all(x, pattern = "\\*"))
 
     return(query_data)
@@ -323,9 +323,9 @@ remove_astrk <- function(query_data, colname = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_tails(prot, "DomArch")
+#' removeTails(prot, "DomArch")
 #' }
-remove_tails <- function(prot, by_column = "DomArch",
+removeTails <- function(prot, by_column = "DomArch",
     keep_domains = FALSE) { # !! currently redundant
 
     by_column <- sym(by_column)
@@ -369,7 +369,7 @@ remove_tails <- function(prot, by_column = "DomArch",
 #' A cleaned up version of the data table is returned.
 #'
 #' @param prot A data frame that contains columns 'Species'.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
 #' Default is false.
 #'
 #' @importFrom stringr coll str_replace_all
@@ -379,9 +379,9 @@ remove_tails <- function(prot, by_column = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_species(prot, TRUE)
+#' cleanSpecies(prot, TRUE)
 #' }
-cleanup_species <- function(prot, remove_empty = FALSE) {
+cleanSpecies <- function(prot, removeEmptyRows = FALSE) {
     # FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
 
     # Create cleaned up Species column
@@ -404,8 +404,8 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
         str_replace_all(coll("  ", TRUE), " ")
 
     # !! CHECK !! Species vs Species_old
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "Species")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "Species")
     }
 
     return(prot)
@@ -425,9 +425,9 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #' @param prot A data frame that must contain columns Query and ClustName.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'.
 #' @param domains_keep A data frame containing the domain names to be retained.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
 #'
 #' @importFrom dplyr filter
 #' @importFrom stringr coll str_replace_all
@@ -437,12 +437,12 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename)
+#' cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename)
 #' }
-cleanup_clust <- function(prot,
+cleanClusters <- function(prot,
     domains_rename, domains_keep,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = FALSE) {
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = FALSE) {
     # Create cleaned up ClustName column
     prot$ClustName <- prot$ClustName.orig
 
@@ -469,19 +469,19 @@ cleanup_clust <- function(prot,
 
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, by_column = "ClustName")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, by_column = "ClustName")
     }
     # Remove singletons
-    # if(remove_tails){
+    # if(removeTails){
     #  prot <- prot %>% filter(!grepl(".1$", ClustID))
     # }
-    if (remove_tails) {
-        prot <- remove_tails(prot, by_column = "ClustName")
+    if (removeTails) {
+        prot <- removeTails(prot, by_column = "ClustName")
     }
     # Remove empty rows
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "ClustName")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "ClustName")
     }
 
 
@@ -509,9 +509,9 @@ cleanup_clust <- function(prot,
 #' @param domains_keep A data frame containing the domain names to be retained.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the
 #' corresponding replacement values in a column 'new'.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
 #' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains'
 #'
 #' @importFrom dplyr pull
@@ -522,12 +522,12 @@ cleanup_clust <- function(prot,
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
 #' }
-cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
+cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
     domains_keep, domains_rename,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = F,
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = F,
     domains_ignore = NULL) {
     old_sym <- sym(old)
     new_sym <- sym(new)
@@ -577,22 +577,22 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 
     ## Optional parameters
     # Remove singletons
-    if (remove_tails) {
-        prot <- remove_tails(prot = prot, by_column = new)
+    if (removeTails) {
+        prot <- removeTails(prot = prot, by_column = new)
     }
     # Condense repeats
-    if (repeat2s) {
+    if (condenseRepeatedDomains) {
         ## Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "character"
-        prot <- repeat2s(prot = prot, by_column = new)
+        prot <- condenseRepeatedDomains(prot = prot, by_column = new)
     }
     # Remove empty rows
     # ! FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
     # ! Make a separate function of out of this?
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = new)
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = new)
     }
 
-    prot <- replaceQMs(prot, new)
+    prot <- replaceQuestionMarks(prot, new)
 
     return(prot)
 }
@@ -610,7 +610,7 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #' @param prot A data frame that contains columns 'GenContext.orig'
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'.
 #' Defaults to an empty data frame with a new and old column such that non of the domains will be renamed
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
 #' @param remove_asterisk Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.
 #'
 #' @importFrom stringr str_replace_all
@@ -620,11 +620,11 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_gencontext(prot, domains_rename, T, F)
+#' cleanGenomicContext(prot, domains_rename, T, F)
 #' }
 #'
-cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
-    repeat2s = TRUE, remove_asterisk = TRUE) {
+cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
+    condenseRepeatedDomains = TRUE, remove_asterisk = TRUE) {
     # Create cleaned up GenContext column
     prot$GenContext <- prot$GenContext.orig
 
@@ -641,16 +641,16 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
     ## Reverse operons | Straighten them out!
     prot <- reverse_operon(prot)
 
-    prot <- replaceQMs(prot, "GenContext")
+    prot <- replaceQuestionMarks(prot, "GenContext")
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, "GenContext")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, "GenContext")
     }
 
     # Remove the Asterisks
     if (remove_asterisk) {
-        prot <- remove_astrk(prot, colname = "GenContext")
+        prot <- removeAsterisks(prot, colname = "GenContext")
     }
 
     return(prot)
@@ -666,9 +666,9 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_GeneDesc()
+#' cleanGeneDescription()
 #' }
-cleanup_GeneDesc <- function(prot, column) {
+cleanGeneDescription <- function(prot, column) {
     prot[, "GeneDesc"] <- gsub("\\.$", "", prot %>% pull(column))
     prot[, "GeneDesc"] <- gsub("%2C", ",", prot %>% pull(column))
     return(prot)
@@ -688,9 +688,9 @@ cleanup_GeneDesc <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' pick_longer_duplicate()
+#' selectLongestDuplicate()
 #' }
-pick_longer_duplicate <- function(prot, column) {
+selectLongestDuplicate <- function(prot, column) {
     col <- sym(column)
 
     prot$row.orig <- 1:nrow(prot)
@@ -736,9 +736,9 @@ pick_longer_duplicate <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_lineage()
+#' cleanLineage()
 #' }
-cleanup_lineage <- function(prot, lins_rename) {
+cleanLineage <- function(prot, lins_rename) {
     for (i in 1:nrow(lins_rename)) {
         prot$Lineage <- gsub(lins_rename$old[i], lins_rename$new[i],
             x = prot$Lineage,