From f5dbe30045a7a066daec77fc3e3962030d535e9f Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 6 Oct 2024 17:16:26 +0100
Subject: [PATCH 01/33] refactor function names in CHANGED-pre-msa-tree.R

---
 R/CHANGED-pre-msa-tree.R | 60 ++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index 998e8fbd..ef334232 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -1,9 +1,9 @@
 ## Pre-requisites to generate MSA and Phylogenetic Tree
 ## Includes the following functions:
-## convert_aln2fa, to_titlecase, add_leaves
-## generate_all_aln2fa
-## convert_aln2tsv??, convert_accnum2fa??
-## Created from add_leaves.R, convert_aln2fa.R, all_aln2fa.R
+## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment
+## generateAllAlignments2FA
+## convertAlignment2TSV??, convertAccNumber2FA??
+## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R
 ## Modified: Dec 24, 2019 | Jan 2021
 ## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen)
 
@@ -35,7 +35,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @author Andrie, Janani Ravi
 #' @description Translate string to Title Case w/ delimitter.
 #' @aliases totitle, to_title
-#' @usage to_titlecase(text, delimitter)
+#' @usage convert2TitleCase(text, delimitter)
 #' @param x Character vector.
 #' @param y Delimitter. Default is space (" ").
 #' @seealso chartr, toupper, and tolower.
@@ -44,7 +44,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @export
 #'
 #' @examples
-to_titlecase <- function(x, y = " ") {
+convert2TitleCase <- function(x, y = " ") {
     s <- strsplit(x, y)[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2),
         sep = "", collapse = y
@@ -89,9 +89,9 @@ to_titlecase <- function(x, y = " ") {
 #'
 #' @examples
 #' \dontrun{
-#' add_leaves("pspa_snf7.aln", "pspa.txt")
+#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
 #' }
-add_leaves <- function(aln_file = "",
+addLeaves2Alignment <- function(aln_file = "",
     lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
     # lin_file="data/rawdata_tsv/PspA.txt",
     reduced = FALSE) {
@@ -164,7 +164,7 @@ add_leaves <- function(aln_file = "",
             # AccNum,
             sep = "_"
         ))
-    temp$Leaf <- map(temp$Leaf, to_titlecase)
+    temp$Leaf <- map(temp$Leaf, convert2TitleCase)
     temp <- temp %>%
         mutate(Leaf_Acc = (paste(Leaf, AccNum, sep = "_")))
 
@@ -203,7 +203,7 @@ add_leaves <- function(aln_file = "",
 #' @export
 #'
 #' @examples
-add_name <- function(data,
+addName <- function(data,
     accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
     lin_sep = ">", out_col = "Name") {
     cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
@@ -283,10 +283,10 @@ add_name <- function(data,
 #'
 #' @examples
 #' \dontrun{
-#' add_leaves("pspa_snf7.aln", "pspa.txt")
+#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
 #' }
 #'
-convert_aln2fa <- function(aln_file = "",
+convertAlignment2FA <- function(aln_file = "",
     lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
     fa_outpath = "",
     reduced = FALSE) {
@@ -297,7 +297,7 @@ convert_aln2fa <- function(aln_file = "",
     # fa_outpath="data/alns/pspc.fasta"
 
     ## Add leaves
-    aln <- add_leaves(
+    aln <- addLeaves2Alignment(
         aln = aln_file,
         lin = lin_file,
         reduced = reduced
@@ -320,7 +320,7 @@ convert_aln2fa <- function(aln_file = "",
     return(fasta)
 }
 
-#' Default rename_fasta() replacement function. Maps an accession number to its name
+#' Default renameFA() replacement function. Maps an accession number to its name
 #'
 #' @param line The line of a fasta file starting with '>'
 #' @param acc2name Data Table containing a column of accession numbers and a name column
@@ -335,8 +335,8 @@ convert_aln2fa <- function(aln_file = "",
 #' @export
 #'
 #' @examples
-map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
-    # change to be the name equivalent to an add_names column
+mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
+    # change to be the name equivalent to an addNames column
     # Find the first ' '
     end_acc <- str_locate(line, " ")[[1]]
 
@@ -364,8 +364,8 @@ map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name")
 #' @export
 #'
 #' @examples
-rename_fasta <- function(fa_path, outpath,
-    replacement_function = map_acc2name, ...) {
+renameFA <- function(fa_path, outpath,
+    replacement_function = mapAccession2Name, ...) {
     lines <- read_lines(fa_path)
     res <- map(lines, function(x) {
         if (strtrim(x, 1) == ">") {
@@ -381,7 +381,7 @@ rename_fasta <- function(fa_path, outpath,
 }
 
 ################################
-## generate_all_aln2fa
+## generateAllAlignments2FA
 #' Adding Leaves to an alignment file w/ accessions
 #'
 #' @keywords alignment, accnum, leaves, lineage, species
@@ -408,9 +408,9 @@ rename_fasta <- function(fa_path, outpath,
 #'
 #' @examples
 #' \dontrun{
-#' generate_all_aln2fa()
+#' generateAllAlignments2FA()
 #' }
-generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
+generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
     fa_outpath = here("data/alns/"),
     lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
     reduced = F) {
@@ -432,7 +432,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
         fa_outpath = paste0(fa_outpath, "/", variable, ".fa")
     )
     pmap(
-        .l = aln2fa_args, .f = convert_aln2fa,
+        .l = aln2fa_args, .f = convertAlignment2FA,
         lin_file = lin_file,
         reduced = reduced
     )
@@ -441,7 +441,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
 
 # accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
 # accessions <- rep("ANY95992.1", 201)
-#' acc2fa converts protein accession numbers to a fasta format.
+#' acc2FA converts protein accession numbers to a fasta format.
 #'
 #' @description
 #' Resulting fasta file is written to the outpath.
@@ -464,11 +464,11 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
 #'
 #' @examples
 #' \dontrun{
-#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
-#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
-#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
+#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
+#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
 #' }
-acc2fa <- function(accessions, outpath, plan = "sequential") {
+acc2FA <- function(accessions, outpath, plan = "sequential") {
     # validation
     stopifnot(length(accessions) > 0)
 
@@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) {
 
 
 ################################
-## convert_accnum2fa
+## convertAccNumber2FA
 #######
 ## 1 ##
 #######
@@ -706,9 +706,9 @@ get_accnums_from_fasta_file <- function(fasta_file) {
 # seqs <- retrieveseqs(seqnames,"swissprot")
 
 ################################
-## convert_aln2tsv
+## convertAlignment2TSV
 ## NEEDS FIXING!
-# convert_aln2tsv <- function(file_path){
+# convertAlignment2TSV <- function(file_path){
 #   cfile <- read_delim("data/alignments/pspc.gismo.aln", delim=" ")
 #   cfile <- as.data.frame(map(cfile,function(x) gsub("\\s+", "",x)))
 #   colnames(cfile) <- c("AccNum", "Alignment")

From a58057115e99bce9ab62beb02585837282498934 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 6 Oct 2024 19:21:23 +0100
Subject: [PATCH 02/33] refactor function names in R/fa2domain.R

---
 R/fa2domain.R | 52 +++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/R/fa2domain.R b/R/fa2domain.R
index 672d0856..ea926cb4 100644
--- a/R/fa2domain.R
+++ b/R/fa2domain.R
@@ -4,7 +4,7 @@
 # - a protein with no domains (unlikely) found from
 # interproscan CLI will return a completely empty file (0Bytes)
 
-#' exec_interproscan
+#' runInterProScan
 #'
 #' @param filepath_fasta
 #' @param filepath_out
@@ -15,7 +15,7 @@
 #' @return
 #'
 #' @examples
-exec_interproscan <- function(
+runInterProScan <- function(
         filepath_fasta,
         filepath_out, # do not inlucde file extension since ipr handles this
         appl = c("Pfam", "Gene3D")
@@ -34,7 +34,7 @@ exec_interproscan <- function(
         return(NULL)
     }
     # read and return results
-    df_iprscan <- read_iprscan_tsv(paste0(filepath_out, ".tsv"))
+    df_iprscan <- readIPRScanTSV(paste0(filepath_out, ".tsv"))
     return(df_iprscan)
 }
 
@@ -43,7 +43,7 @@ exec_interproscan <- function(
 #' molevol_scripts/R/colnames_molevol.R)
 #'
 #' @return [chr] interproscan column names used throughout molevolvr
-get_df_ipr_col_names <- function() {
+getIPRScanColnames <- function() {
     column_names <- c(
         "AccNum", "SeqMD5Digest", "SLength", "Analysis",
         "DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score",
@@ -58,7 +58,7 @@ get_df_ipr_col_names <- function() {
 #' @return [collector] a named vector of type expecatations
 #' for interproscan columns
 #'
-get_df_ipr_col_types <- function() {
+getIPRScanColtypes <- function() {
     column_types <- readr::cols(
         "AccNum" = readr::col_character(),
         "SeqMD5Digest" = readr::col_character(),
@@ -85,10 +85,10 @@ get_df_ipr_col_types <- function() {
 #' @importFrom readr read_tsv
 #'
 #' @return [tbl_df] interproscan output table
-read_iprscan_tsv <- function(filepath) {
+readIPRScanTSV <- function(filepath) {
     df_ipr <- readr::read_tsv(filepath,
-        col_types = get_df_ipr_col_types(),
-        col_names = get_df_ipr_col_names()
+        col_types = getIPRScanColtypes(),
+        col_names = getIPRScanColnames()
     )
     return(df_ipr)
 }
@@ -100,7 +100,7 @@ read_iprscan_tsv <- function(filepath) {
 #' which will be used to search for its sequence's domains (df_iprscan param)
 #' @param fasta [AAStringSet] original fasta file which was fed into interproscan
 #' @param df_iprscan [tbl_df] the output TSV of interproscan, read as a tibble with
-#' read_iprscan_tsv()
+#' readIPRScanTSV()
 #' @param analysis [chr] the domain databases to extract sequences from
 #'
 #' @importFrom dplyr arrange filter mutate rowwise relocate select ungroup
@@ -115,12 +115,12 @@ read_iprscan_tsv <- function(filepath) {
 #' setwd(path_molevol_scripts)
 #' source("R/fa2domain.R")
 #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
+#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
 #' accnum <- df_iprscan$AccNum[1]
-#' df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan)
+#' df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan)
 #' }
 #'
-make_df_iprscan_domains <- function(
+createIPRScanDomainTable <- function(
         accnum,
         fasta,
         df_iprscan,
@@ -170,17 +170,17 @@ make_df_iprscan_domains <- function(
     return(df_iprscan_domains)
 }
 
-#' Using the table returned from make_df_iprscan_domains, construct a
+#' Using the table returned from createIPRScanDomainTable, construct a
 #' domain fasta for a single accession number in the original fasta
-#' (i.e., the original fasta argument to make_df_iprscan_domains())
+#' (i.e., the original fasta argument to createIPRScanDomainTable())
 #'
-#' @param df_iprscan_domains [tbl_df] return value from make_df_iprscan_domains
+#' @param df_iprscan_domains [tbl_df] return value from createIPRScanDomainTable
 #'
 #' @importFrom Biostrings AAStringSet
 #' @importFrom dplyr mutate rowwise
 #'
 #' @return [AAStringSet] A domain fasta containing all the domains for a
-#' single protein in the original fasta passed as an argument to make_df_iprscan_domains()
+#' single protein in the original fasta passed as an argument to createIPRScanDomainTable()
 #'
 #' @examples
 #' \dontrun{
@@ -188,13 +188,13 @@ make_df_iprscan_domains <- function(
 #' setwd(path_molevol_scripts)
 #' source("R/fa2domain.R")
 #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
+#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
 #' accnum <- df_iprscan$AccNum[1]
-#' df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan)
-#' fasta_domains <- df_iprscan_domains |> df_iprscan_domains2fasta()
+#' df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan)
+#' fasta_domains <- df_iprscan_domains |> convertIPRScanDomainTable2FA()
 #' }
 #'
-df_iprscan_domains2fasta <- function(df_iprscan_domains) {
+convertIPRScanDomainTable2FA <- function(df_iprscan_domains) {
     # if there are no records (e.g., after filtering for Pfam analysis only)
     # the quickly return an empty AAStringSet object
     if (nrow(df_iprscan_domains) < 1) {
@@ -228,7 +228,7 @@ df_iprscan_domains2fasta <- function(df_iprscan_domains) {
     return(fasta_domains)
 }
 
-#' fasta2fasta_domain
+#' getDomainsFromFA
 #'
 #' @param fasta [AAStringSet] a protein (AA) fasta
 #' @param df_iprscan [tbl_df] the interproscan results from the original fasta
@@ -245,11 +245,11 @@ df_iprscan_domains2fasta <- function(df_iprscan_domains) {
 #' setwd(path_molevol_scripts)
 #' source("R/fa2domain.R")
 #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
-#' fasta2fasta_domain(fasta, df_iprscan)
+#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
+#' getDomainsFromFA(fasta, df_iprscan)
 #' }
 #'
-fasta2fasta_domain <- function(
+getDomainsFromFA <- function(
         fasta,
         df_iprscan,
         analysis = c("Pfam", "Gene3D"),
@@ -270,7 +270,7 @@ fasta2fasta_domain <- function(
         X = names(fasta),
         FUN = function(header) {
             # parse the accession number from header
-            df_iprscan_domains <- make_df_iprscan_domains(
+            df_iprscan_domains <- createIPRScanDomainTable(
                 header,
                 fasta,
                 df_iprscan,
@@ -289,7 +289,7 @@ fasta2fasta_domain <- function(
                 }
                 return(FALSE)
             }
-            fasta_domains <- df_iprscan_domains2fasta(df_iprscan_domains)
+            fasta_domains <- convertIPRScanDomainTable2FA(df_iprscan_domains)
             parent_fasta_domains <<- c(parent_fasta_domains, fasta_domains)
             return(TRUE)
         },

From e004cd4091ae3cb111873076365619ce0ba42430 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 6 Oct 2024 20:10:30 +0100
Subject: [PATCH 03/33] refactor function names in R/cleanup.R

---
 R/cleanup.R | 142 ++++++++++++++++++++++++++--------------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/R/cleanup.R b/R/cleanup.R
index 3a708415..f82722f2 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -31,10 +31,10 @@
 #' @return [string] string with only alphanumerics, "_", "+", and "."
 #' @examples
 #' \dontrun{
-#' clean_string()
+#' cleanString()
 #' }
 #'
-clean_string <- function(string) {
+cleanString <- function(string) {
     # replace spaces with "_"
     string <- stringr::str_replace_all(string, "\\s+", "_")
     # keep only alphanumeric characters, "_", and "."
@@ -44,7 +44,7 @@ clean_string <- function(string) {
 
 # use the same code as upstream_scripts/00_submit_full.R's
 # get_sequences() function to extract accession numbers
-#' string2accnum
+#' extractAccNum
 #'
 #' @param string
 #'
@@ -53,9 +53,9 @@ clean_string <- function(string) {
 #'
 #' @examples
 #' \dontrun{
-#' string2accnum()
+#' extractAccNum()
 #' }
-string2accnum <- function(string) {
+extractAccNum <- function(string) {
     if (grepl("\\|", string)) {
         accnum <- strsplit(string, "\\|")[[1]][2]
         accnum <- strsplit(accnum, " ")[[1]][1]
@@ -81,9 +81,9 @@ string2accnum <- function(string) {
 #' @examples
 #' \dontrun{
 #' c("xxx", "xxx", "xxx", "yyy", "yyy") |>
-#'     make_accnums_unique()
+#'     ensureUniqAccNum()
 #' }
-make_accnums_unique <- function(accnums) {
+ensureUniqAccNum <- function(accnums) {
     # group by accnums then use the row count as a proxy
     # for the index of occurence for each accession number
     df_accnums <- tibble::tibble("accnum" = accnums)
@@ -113,14 +113,14 @@ make_accnums_unique <- function(accnums) {
 #' @examples
 #' \dontrun{
 #' AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |>
-#'     cleanup_fasta_header()
+#'     cleanFAHeaders()
 #' }
-cleanup_fasta_header <- function(fasta) {
+cleanFAHeaders <- function(fasta) {
     headers <- names(fasta)
     # try parsing accession numbers from header
     headers <- purrr::map_chr(
         headers,
-        string2accnum
+        extractAccNum
     )
     # sanitize string for pathing (file read/write-ing)
     headers <- purrr::map_chr(
@@ -128,7 +128,7 @@ cleanup_fasta_header <- function(fasta) {
         fs::path_sanitize
     )
     # append an index suffix for the ith occurence of each accnum
-    headers <- make_accnums_unique(headers)
+    headers <- ensureUniqAccNum(headers)
     names(fasta) <- headers
     return(fasta)
 }
@@ -153,9 +153,9 @@ cleanup_fasta_header <- function(fasta) {
 #'
 #' @examples
 #' \dontrun{
-#' remove_empty(prot, "DomArch")
+#' removeEmptyRows(prot, "DomArch")
 #' }
-remove_empty <- function(prot, by_column = "DomArch") {
+removeEmptyRows <- function(prot, by_column = "DomArch") {
     # ?? Don't call other psp functions within these functions
     prot <- prot %>%
         as_tibble() %>%
@@ -168,7 +168,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 }
 
 ###########################
-#' repeat2s
+#' condenseRepeatedDomains
 #'
 #' @description
 #' Condense repeated domains
@@ -181,7 +181,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @param prot A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.
 #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s).
-#' @param excluded_prots Vector of strings that repeat2s should not reduce to (s). Defaults to c()
+#' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()
 #'
 #' @return Describe return, in detail
 #' @export
@@ -191,10 +191,10 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @examples
 #' \dontrun{
-#' repeat2s(prot, "DomArch")
+#' condenseRepeatedDomains(prot, "DomArch")
 #' }
-repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
-    # If there are strings that repeat2s should not affect, the pattern to search
+condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) {
+    # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
     # for must be changed to exclude a search for those desired strings
 
     collapsed_prots <- paste0(excluded_prots, collapse = "\\s|")
@@ -253,10 +253,10 @@ repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
 #'
 #' @examples
 #' \dontrun{
-#' replaceQMs()
+#' replaceQuestionMarks()
 #' }
 #'
-replaceQMs <- function(prot, by_column = "GenContext") {
+replaceQuestionMarks <- function(prot, by_column = "GenContext") {
     by <- sym(by_column)
 
     # Regex for finding repeated `?`
@@ -290,9 +290,9 @@ replaceQMs <- function(prot, by_column = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_astrk()
+#' removeAsterisks()
 #' }
-remove_astrk <- function(query_data, colname = "GenContext") {
+removeAsterisks <- function(query_data, colname = "GenContext") {
     query_data[, colname] <- map(query_data[, colname], function(x) str_remove_all(x, pattern = "\\*"))
 
     return(query_data)
@@ -323,9 +323,9 @@ remove_astrk <- function(query_data, colname = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_tails(prot, "DomArch")
+#' removeTails(prot, "DomArch")
 #' }
-remove_tails <- function(prot, by_column = "DomArch",
+removeTails <- function(prot, by_column = "DomArch",
     keep_domains = FALSE) { # !! currently redundant
 
     by_column <- sym(by_column)
@@ -369,7 +369,7 @@ remove_tails <- function(prot, by_column = "DomArch",
 #' A cleaned up version of the data table is returned.
 #'
 #' @param prot A data frame that contains columns 'Species'.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
 #' Default is false.
 #'
 #' @importFrom stringr coll str_replace_all
@@ -379,9 +379,9 @@ remove_tails <- function(prot, by_column = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_species(prot, TRUE)
+#' cleanSpecies(prot, TRUE)
 #' }
-cleanup_species <- function(prot, remove_empty = FALSE) {
+cleanSpecies <- function(prot, removeEmptyRows = FALSE) {
     # FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
 
     # Create cleaned up Species column
@@ -404,8 +404,8 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
         str_replace_all(coll("  ", TRUE), " ")
 
     # !! CHECK !! Species vs Species_old
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "Species")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "Species")
     }
 
     return(prot)
@@ -425,9 +425,9 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #' @param prot A data frame that must contain columns Query and ClustName.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'.
 #' @param domains_keep A data frame containing the domain names to be retained.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
 #'
 #' @importFrom dplyr filter
 #' @importFrom stringr coll str_replace_all
@@ -437,12 +437,12 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename)
+#' cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename)
 #' }
-cleanup_clust <- function(prot,
+cleanClusters <- function(prot,
     domains_rename, domains_keep,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = FALSE) {
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = FALSE) {
     # Create cleaned up ClustName column
     prot$ClustName <- prot$ClustName.orig
 
@@ -469,19 +469,19 @@ cleanup_clust <- function(prot,
 
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, by_column = "ClustName")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, by_column = "ClustName")
     }
     # Remove singletons
-    # if(remove_tails){
+    # if(removeTails){
     #  prot <- prot %>% filter(!grepl(".1$", ClustID))
     # }
-    if (remove_tails) {
-        prot <- remove_tails(prot, by_column = "ClustName")
+    if (removeTails) {
+        prot <- removeTails(prot, by_column = "ClustName")
     }
     # Remove empty rows
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "ClustName")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "ClustName")
     }
 
 
@@ -509,9 +509,9 @@ cleanup_clust <- function(prot,
 #' @param domains_keep A data frame containing the domain names to be retained.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the
 #' corresponding replacement values in a column 'new'.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
 #' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains'
 #'
 #' @importFrom dplyr pull
@@ -522,12 +522,12 @@ cleanup_clust <- function(prot,
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
 #' }
-cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
+cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
     domains_keep, domains_rename,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = F,
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = F,
     domains_ignore = NULL) {
     old_sym <- sym(old)
     new_sym <- sym(new)
@@ -577,22 +577,22 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 
     ## Optional parameters
     # Remove singletons
-    if (remove_tails) {
-        prot <- remove_tails(prot = prot, by_column = new)
+    if (removeTails) {
+        prot <- removeTails(prot = prot, by_column = new)
     }
     # Condense repeats
-    if (repeat2s) {
+    if (condenseRepeatedDomains) {
         ## Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "character"
-        prot <- repeat2s(prot = prot, by_column = new)
+        prot <- condenseRepeatedDomains(prot = prot, by_column = new)
     }
     # Remove empty rows
     # ! FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
     # ! Make a separate function of out of this?
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = new)
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = new)
     }
 
-    prot <- replaceQMs(prot, new)
+    prot <- replaceQuestionMarks(prot, new)
 
     return(prot)
 }
@@ -610,7 +610,7 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #' @param prot A data frame that contains columns 'GenContext.orig'
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'.
 #' Defaults to an empty data frame with a new and old column such that non of the domains will be renamed
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
 #' @param remove_asterisk Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.
 #'
 #' @importFrom stringr str_replace_all
@@ -620,11 +620,11 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_gencontext(prot, domains_rename, T, F)
+#' cleanGenomicContext(prot, domains_rename, T, F)
 #' }
 #'
-cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
-    repeat2s = TRUE, remove_asterisk = TRUE) {
+cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
+    condenseRepeatedDomains = TRUE, remove_asterisk = TRUE) {
     # Create cleaned up GenContext column
     prot$GenContext <- prot$GenContext.orig
 
@@ -641,16 +641,16 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
     ## Reverse operons | Straighten them out!
     prot <- reverse_operon(prot)
 
-    prot <- replaceQMs(prot, "GenContext")
+    prot <- replaceQuestionMarks(prot, "GenContext")
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, "GenContext")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, "GenContext")
     }
 
     # Remove the Asterisks
     if (remove_asterisk) {
-        prot <- remove_astrk(prot, colname = "GenContext")
+        prot <- removeAsterisks(prot, colname = "GenContext")
     }
 
     return(prot)
@@ -666,9 +666,9 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_GeneDesc()
+#' cleanGeneDescription()
 #' }
-cleanup_GeneDesc <- function(prot, column) {
+cleanGeneDescription <- function(prot, column) {
     prot[, "GeneDesc"] <- gsub("\\.$", "", prot %>% pull(column))
     prot[, "GeneDesc"] <- gsub("%2C", ",", prot %>% pull(column))
     return(prot)
@@ -688,9 +688,9 @@ cleanup_GeneDesc <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' pick_longer_duplicate()
+#' selectLongestDuplicate()
 #' }
-pick_longer_duplicate <- function(prot, column) {
+selectLongestDuplicate <- function(prot, column) {
     col <- sym(column)
 
     prot$row.orig <- 1:nrow(prot)
@@ -736,9 +736,9 @@ pick_longer_duplicate <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_lineage()
+#' cleanLineage()
 #' }
-cleanup_lineage <- function(prot, lins_rename) {
+cleanLineage <- function(prot, lins_rename) {
     for (i in 1:nrow(lins_rename)) {
         prot$Lineage <- gsub(lins_rename$old[i], lins_rename$new[i],
             x = prot$Lineage,

From a8669a444696333e862525f85bc9775cee67e539 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 01:23:07 +0100
Subject: [PATCH 04/33] refactor function names in R/CHANGED-pre-msa-tree.R

---
 R/CHANGED-pre-msa-tree.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R
index ef334232..c4a97589 100644
--- a/R/CHANGED-pre-msa-tree.R
+++ b/R/CHANGED-pre-msa-tree.R
@@ -2,7 +2,7 @@
 ## Includes the following functions:
 ## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment
 ## generateAllAlignments2FA
-## convertAlignment2TSV??, convertAccNumber2FA??
+## convertAlignment2TSV??, convertAccNum2FA??
 ## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R
 ## Modified: Dec 24, 2019 | Jan 2021
 ## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen)
@@ -335,7 +335,7 @@ convertAlignment2FA <- function(aln_file = "",
 #' @export
 #'
 #' @examples
-mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
+mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     # change to be the name equivalent to an addNames column
     # Find the first ' '
     end_acc <- str_locate(line, " ")[[1]]
@@ -365,7 +365,7 @@ mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Na
 #'
 #' @examples
 renameFA <- function(fa_path, outpath,
-    replacement_function = mapAccession2Name, ...) {
+    replacement_function = mapAcc2Name, ...) {
     lines <- read_lines(fa_path)
     res <- map(lines, function(x) {
         if (strtrim(x, 1) == ">") {
@@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) {
 
 
 ################################
-## convertAccNumber2FA
+## convertAccNum2FA
 #######
 ## 1 ##
 #######

From b15e294ba6321fca22c64732ed13703de9f82784 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 01:40:30 +0100
Subject: [PATCH 05/33] refactor function names in R/fa2domain.R

---
 R/fa2domain.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/fa2domain.R b/R/fa2domain.R
index ea926cb4..01a56918 100644
--- a/R/fa2domain.R
+++ b/R/fa2domain.R
@@ -4,7 +4,7 @@
 # - a protein with no domains (unlikely) found from
 # interproscan CLI will return a completely empty file (0Bytes)
 
-#' runInterProScan
+#' runIPRScan
 #'
 #' @param filepath_fasta
 #' @param filepath_out
@@ -15,7 +15,7 @@
 #' @return
 #'
 #' @examples
-runInterProScan <- function(
+runIPRScan <- function(
         filepath_fasta,
         filepath_out, # do not inlucde file extension since ipr handles this
         appl = c("Pfam", "Gene3D")
@@ -43,7 +43,7 @@ runInterProScan <- function(
 #' molevol_scripts/R/colnames_molevol.R)
 #'
 #' @return [chr] interproscan column names used throughout molevolvr
-getIPRScanColnames <- function() {
+getIPRScanColNames <- function() {
     column_names <- c(
         "AccNum", "SeqMD5Digest", "SLength", "Analysis",
         "DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score",
@@ -58,7 +58,7 @@ getIPRScanColnames <- function() {
 #' @return [collector] a named vector of type expecatations
 #' for interproscan columns
 #'
-getIPRScanColtypes <- function() {
+getIPRScanColTypes <- function() {
     column_types <- readr::cols(
         "AccNum" = readr::col_character(),
         "SeqMD5Digest" = readr::col_character(),
@@ -87,8 +87,8 @@ getIPRScanColtypes <- function() {
 #' @return [tbl_df] interproscan output table
 readIPRScanTSV <- function(filepath) {
     df_ipr <- readr::read_tsv(filepath,
-        col_types = getIPRScanColtypes(),
-        col_names = getIPRScanColnames()
+        col_types = getIPRScanColTypes(),
+        col_names = getIPRScanColNames()
     )
     return(df_ipr)
 }

From be3966d004d5c91db4018ab7a5d2c9aac7a06ced Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 02:25:34 +0100
Subject: [PATCH 06/33] refactored function names in R/plotting.R

---
 R/plotting.R | 64 ++++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/R/plotting.R b/R/plotting.R
index 7abd06d4..da95ea5f 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -31,9 +31,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' shorten_lineage()
+#' shortenLineage()
 #' }
-shorten_lineage <- function(data, colname = "Lineage", abr_len = 1) {
+shortenLineage <- function(data, colname = "Lineage", abr_len = 1) {
     abbrv <- function(x) {
         pos_gt <- str_locate(x, ">")
         pos_gt <- pos_gt[1]
@@ -94,9 +94,9 @@ shorten_lineage <- function(data, colname = "Lineage", abr_len = 1) {
 #'
 #' @examples
 #' \dontrun{
-#' upset.plot(pspa.sub, 10, "da2doms")
+#' plotUpSet(pspa.sub, 10, "da2doms")
 #' }
-upset.plot <- function(query_data = "toast_rack.sub",
+plotUpSet <- function(query_data = "toast_rack.sub",
     colname = "DomArch", cutoff = 90,
     RowsCutoff = FALSE, text.scale = 1.5,
     point.size = 2.2, line.size = 0.8) {
@@ -259,9 +259,9 @@ upset.plot <- function(query_data = "toast_rack.sub",
 #'
 #' @examples
 #' \dontrun{
-#' lineage.DA.plot(toast_rack_data, 10, "DomArch.norep", "da2doms")
+#' plotLineageDA(toast_rack_data, 10, "DomArch.norep", "da2doms")
 #' }
-lineage.DA.plot <- function(query_data = "prot",
+plotLineageDA <- function(query_data = "prot",
     colname = "DomArch",
     cutoff = 90,
     RowsCutoff = FALSE,
@@ -271,7 +271,7 @@ lineage.DA.plot <- function(query_data = "prot",
     # @param type Character. Default is "da2doms" for Domain Architectures.
     # Other alternative: "gc2da" for Genomic Contexts. -- unused parameter
 
-    query_data <- shorten_lineage(query_data, "Lineage", abr_len = 1)
+    query_data <- shortenLineage(query_data, "Lineage", abr_len = 1)
 
     query.summ.byLin <- query_data %>% total_counts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff)
 
@@ -355,9 +355,9 @@ lineage.DA.plot <- function(query_data = "prot",
 #'
 #' @examples
 #' \dontrun{
-#' lineage.Query.plot(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95)
+#' plotLineageQuery(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95)
 #' }
-lineage.Query.plot <- function(query_data = all,
+plotLineageQuery <- function(query_data = all,
     queries,
     colname = "ClustName",
     cutoff, color = "default") {
@@ -394,7 +394,7 @@ lineage.Query.plot <- function(query_data = all,
     # query_data contains all rows that possess a lineage
     query_data <- query_data %>% filter(grepl("a", Lineage))
 
-    query_data <- shorten_lineage(query_data, "Lineage", abr_len = 1)
+    query_data <- shortenLineage(query_data, "Lineage", abr_len = 1)
     query_lin_counts <- data.frame("Query" = character(0), "Lineage" = character(0), "count" = integer())
     for (q in queries) {
         query_lin <- query_by_lineage(data = query_data, query = q, column = {{ col }}, by = "Lineage")
@@ -492,10 +492,10 @@ lineage.Query.plot <- function(query_data = all,
 #'
 #' @examples
 #' \dontrun{
-#' lineage.neighbors.plot(pspa_data, pspa, "GenContext.norep", "da2doms")
+#' plotLineageNeighbors(pspa_data, pspa, "GenContext.norep", "da2doms")
 #' }
 #'
-lineage.neighbors.plot <- function(query_data = "prot", query = "pspa",
+plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
     colname = "GenContext.norep") {
     query_data <- query_data %>% filter(grepl("a", Lineage))
     query.GCDA <- read_delim(paste0("Top-", query, "-neighbors.txt"),
@@ -567,9 +567,9 @@ lineage.neighbors.plot <- function(query_data = "prot", query = "pspa",
 #'
 #' @examples
 #' \dontrun{
-#' lineage.domain_repeats.plot()
+#' plotLineageDomainRepeats()
 #' }
-lineage.domain_repeats.plot <- function(query_data, colname) {
+plotLineageDomainRepeats <- function(query_data, colname) {
     # query_data <- pspa_data
     # colname <- "SIG.TM.LADB"
 
@@ -629,7 +629,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
 }
 
 
-#' LineagePlot
+#' plotLineageHeatmap
 #'
 #' @description
 #' Generate a lineage plot
@@ -651,7 +651,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
 #'
 #' @examples
 #' \dontrun{
-#' LineagePlot(psp_data,
+#' plotLineageHeatmap(psp_data,
 #'     c(
 #'         "PspA", "Snf7", "Classical-AAA", "PspF", "PspB", "PspC", "ClgR", "PspM",
 #'         "Thioredoxin", "PspN_N", "DUF3046", "LiaI-LiaF-TM", "Toast_rack", "REC",
@@ -664,8 +664,8 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
 #' )
 #' }
 #'
-LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) {
-    LevelReduction <- function(lin) {
+plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size = 8) {
+    .LevelReduction <- function(lin) {
         if (level == 1) {
             gt_loc <- str_locate(lin, ">")[[1]]
             if (is.na(gt_loc)) {
@@ -703,7 +703,7 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) {
         all_grouped <- dplyr::union(all_grouped, domSub)
     }
 
-    GetKingdom <- function(lin) {
+    .GetKingdom <- function(lin) {
         gt_loc <- str_locate(lin, ">")[, "start"]
 
         if (is.na(gt_loc)) {
@@ -715,12 +715,12 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) {
         }
     }
 
-    all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, LevelReduction)))
+    all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, .LevelReduction)))
 
     all_grouped_reduced <- all_grouped %>%
         group_by(Query, ReducedLin) %>%
         summarize("count" = sum(count)) %>%
-        mutate(Kingdom = unlist(purrr::map(ReducedLin, GetKingdom)))
+        mutate(Kingdom = unlist(purrr::map(ReducedLin, .GetKingdom)))
 
     lin_counts <- all_grouped_reduced %>%
         group_by(Kingdom, ReducedLin) %>%
@@ -814,9 +814,9 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) {
 #'
 #' @examples
 #' \dontrun{
-#' stacked_lin_plot()
+#' plotStackedLineage()
 #' }
-stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Lineage",
+plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "Lineage",
     xlabel = "Domain Architecture",
     reduce_lineage = TRUE,
     label.size = 8,
@@ -828,7 +828,7 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li
     col <- sym(column)
 
     if (reduce_lineage) {
-        prot <- shorten_lineage(prot, Lineage_col, abr_len = 3)
+        prot <- shortenLineage(prot, Lineage_col, abr_len = 3)
     }
 
     total_count <- total_counts(prot, column, cutoff, lineage_col = Lineage_col)
@@ -935,7 +935,7 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li
 ################
 #### NEEDS SOME WORK
 
-#' Wordcloud3
+#' plotWordCloud3
 #'
 #' @param data
 #' @param size
@@ -1048,9 +1048,9 @@ wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = "
 #'
 #' @examples
 #' \dontrun{
-#' wordcloud_element(prot, "da2doms", 10)
+#' createWordCloudElement(prot, "da2doms", 10)
 #' }
-wordcloud_element <- function(query_data = "prot",
+createWordCloudElement <- function(query_data = "prot",
     colname = "DomArch",
     cutoff = 70,
     UsingRowsCutoff = FALSE) {
@@ -1125,9 +1125,9 @@ wordcloud_element <- function(query_data = "prot",
 #'
 #' @examples
 #' \dontrun{
-#' wordcloud_element(prot, "da2doms", 10)
+#' createWordCloudElement(prot, "da2doms", 10)
 #' }
-wordcloud2_element <- function(query_data = "prot",
+createWordCloud2Element <- function(query_data = "prot",
     colname = "DomArch",
     cutoff = 70,
     UsingRowsCutoff = FALSE) {
@@ -1194,9 +1194,9 @@ wordcloud2_element <- function(query_data = "prot",
 #'
 #' @examples
 #' \dontrun{
-#' lineage_sunburst()
+#' plotLineageSunburst()
 #' }
-lineage_sunburst <- function(prot, lineage_column = "Lineage",
+plotLineageSunburst <- function(prot, lineage_column = "Lineage",
     type = "sunburst",
     levels = 2, colors = NULL, legendOrder = NULL, showLegend = TRUE, maxLevels = 5) {
     lin_col <- sym(lineage_column)
@@ -1278,7 +1278,7 @@ lineage_sunburst <- function(prot, lineage_column = "Lineage",
 
 
 ## COMMENTED LINEAGE.DA.PLOT
-# lineage.plot <- function(query_data, cutoff, type) {
+# plotLineage <- function(query_data, cutoff, type) {
 # 	switch(type,
 # 				 da2doms={wc <- DA.doms.wc; words <- toast_rack.DAdoms; colname <- "DomArch.norep"; toast_rack.summ.byLin <- toast_rack.DA.summ.byLin}, # elements <- toast_rack.DA;
 # 				 gc2da={wc <- GC.DA.wc; words <- toast_rack.GCDA; colname <- "GenContext.norep"; toast_rack.summ.byLin <- toast_rack.GC.summ.byLin} # elements <- toast_rack.GC;

From 8d4da8efe6a0119f55452f43a7d65d84ed2f3115 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Mon, 7 Oct 2024 08:13:27 +0300
Subject: [PATCH 07/33] defunct functions in acc2lin

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE         |  5 ++++
 R/acc2lin.R       | 17 +++++++-----
 R/deprecate.R     | 40 ++++++++++++++++++++++++++++
 man/acc2lin.Rd    | 68 ++++++++++++++++++++++++++++++++++++++++++-----
 man/add_lins.Rd   | 23 ++--------------
 man/deprecate.Rd  | 43 ++++++++++++++++++++++++++++++
 man/efetch_ipg.Rd | 17 +-----------
 man/ipg2lin.Rd    | 24 +----------------
 man/sink.reset.Rd | 19 -------------
 9 files changed, 165 insertions(+), 91 deletions(-)
 create mode 100644 R/deprecate.R
 create mode 100644 man/deprecate.Rd
 delete mode 100644 man/sink.reset.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..4dbb858b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,14 +4,17 @@ export(BinaryDomainNetwork)
 export(DownloadAssemblySummary)
 export(GCA2lin)
 export(GenContextNetwork)
+export(IPG2Lineage)
 export(LineagePlot)
 export(RepresentativeAccNums)
+export(acc2Lineage)
 export(acc2fa)
 export(acc2lin)
 export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
+export(addlineage)
 export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
@@ -34,6 +37,7 @@ export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
+export(efetchIPG)
 export(efetch_ipg)
 export(filter_by_doms)
 export(filter_freq)
@@ -79,6 +83,7 @@ export(run_rpsblast)
 export(send_job_status_email)
 export(shorten_lineage)
 export(sink.reset)
+export(sinkReset)
 export(stacked_lin_plot)
 export(string2accnum)
 export(summ.DA)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index f8d71949..dca24140 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,13 +10,14 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
+#' @rdname acc2lin
 #' @export
 #'
 #' @examples
 #' \dontrun{
 #' sink.reset()
 #' }
-sink.reset <- function() {
+sinkReset <- function() {
     for (i in seq_len(sink.number())) {
         sink(NULL)
     }
@@ -37,14 +38,15 @@ sink.reset <- function() {
 #' @importFrom rlang sym
 #'
 #' @return Describe return, in detail
+#' @rdname acc2lin
 #' @export
 #'
 #' @examples
 #' \dontrun{
 #' add_lins()
 #' }
-add_lins <- function(df, acc_col = "AccNum", assembly_path,
-    lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
+addlineage <- function(df, acc_col = "AccNum", assembly_path,
+    lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
     s_acc_col <- sym(acc_col)
     accessions <- df %>% pull(acc_col)
     lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
@@ -81,13 +83,14 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' @param plan
 #'
 #' @return Describe return, in detail
+#' @rdname acc2lin
 #' @export
 #'
 #' @examples
 #' \dontrun{
 #' acc2lin()
 #' }
-acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
+acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
     tmp_ipg <- F
     if (is.null(ipgout_path)) {
         tmp_ipg <- T
@@ -119,13 +122,14 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path =
 #' @importFrom rentrez entrez_fetch
 #'
 #' @return Describe return, in detail
+#' @rdname acc2lin
 #' @export
 #'
 #' @examples
 #' \dontrun{
 #' efetch_ipg()
 #' }
-efetch_ipg <- function(accnums, out_path, plan = "sequential") {
+efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
     if (length(accnums) > 0) {
         partition <- function(in_data, groups) {
             # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal
@@ -187,6 +191,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' @importFrom data.table fread
 #'
 #' @return Describe return, in detail
+#' @rdname acc2lin
 #' @export
 #'
 #' @examples
@@ -194,7 +199,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' ipg2lin()
 #' }
 #'
-ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {
+IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) {
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
 
     ipg_dt <- ipg_dt[Protein %in% accessions]
diff --git a/R/deprecate.R b/R/deprecate.R
new file mode 100644
index 00000000..2de0bbcd
--- /dev/null
+++ b/R/deprecate.R
@@ -0,0 +1,40 @@
+#' These functions will be deprecated. Please use other functions instead.
+#' 
+#' @name deprecate
+#' 
+NULL
+
+#' @rdname deprecate
+#' @export
+sink.reset <- function() {
+    warning("'sink.reset' is deprecated. Use 'sinkReset' instead.")
+    sinkReset() 
+}
+
+#' @rdname deprecate
+#' @export
+add_lins <- function(df, ...) {
+    warning("'add_lins' is deprecated. Use 'addlineage' instead.")
+    addlineage(df, ...) 
+}
+
+#' @rdname deprecate
+#' @export
+acc2lin <- function(accessions, ...) {
+    warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.")
+    acc2Lineage(accessions, ...)
+}
+
+#' @rdname deprecate
+#' @export
+efetch_ipg <- function(accnums, ...) {
+    warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.")
+    efetchIPG(accnums, ...) 
+}
+
+#' @rdname deprecate
+#' @export
+ipg2lin <- function(accessions, ...) {
+    warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.")
+    IPG32Lineage(accessions, ...)  
+}
\ No newline at end of file
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index 6255b290..f008be5f 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -1,17 +1,39 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{acc2lin}
+\name{sinkReset}
+\alias{sinkReset}
+\alias{addlineage}
+\alias{acc2Lineage}
+\alias{efetchIPG}
+\alias{IPG2Lineage}
 \alias{acc2lin}
-\title{acc2lin}
+\title{Sink Reset}
 \usage{
-acc2lin(
+sinkReset()
+
+addlineage(
+  df,
+  acc_col = "AccNum",
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "sequential",
+  ...
+)
+
+acc2Lineage(
   accessions,
   assembly_path,
   lineagelookup_path,
   ipgout_path = NULL,
-  plan = "multicore"
+  plan = "sequential",
+  ...
 )
 
+efetchIPG(accnums, out_path, plan = "sequential", ...)
+
+IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...)
+
 acc2lin(
   accessions,
   assembly_path,
@@ -21,8 +43,6 @@ acc2lin(
 )
 }
 \arguments{
-\item{accessions}{Character vector of protein accessions}
-
 \item{assembly_path}{String of the path to the assembly_summary path
 This file can be generated using the "DownloadAssemblySummary()" function}
 
@@ -33,14 +53,37 @@ This file can be generated using the "DownloadAssemblySummary()" function}
 on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 
 \item{plan}{}
+
+\item{accessions}{Character vector of protein accessions}
+
+\item{accnums}{Character vector containing the accession numbers to query on
+the ipg database}
+
+\item{out_path}{Path to write the efetch results to}
+
+\item{ipg_file}{Filepath to the file containing results of an efetch run on the
+ipg database. The protein accession in 'accessions' should be contained in this
+file}
 }
 \value{
+No return, but run to close all outstanding \code{sink()}s
+
+Describe return, in detail
+
+Describe return, in detail
+
+Describe return, in detail
+
 Describe return, in detail
 }
 \description{
 This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
+Perform efetch on the ipg database and write the results to out_path
+
+Takes the resulting file of an efetch run on the ipg database and
+
 Function to map protein accession numbers to lineage
 
 This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
@@ -48,8 +91,21 @@ of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 }
 \examples{
 \dontrun{
+sink.reset()
+}
+\dontrun{
+add_lins()
+}
+\dontrun{
 acc2lin()
 }
+\dontrun{
+efetch_ipg()
+}
+\dontrun{
+ipg2lin()
+}
+
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/add_lins.Rd b/man/add_lins.Rd
index 226e428d..9ac343ea 100644
--- a/man/add_lins.Rd
+++ b/man/add_lins.Rd
@@ -1,18 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
+% Please edit documentation in R/lineage.R
 \name{add_lins}
 \alias{add_lins}
-\title{Add Lineages}
+\title{add_lins}
 \usage{
-add_lins(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
 add_lins(
   df,
   acc_col = "AccNum",
@@ -25,16 +16,6 @@ add_lins(
 \arguments{
 \item{plan}{}
 }
-\value{
-Describe return, in detail
-}
 \description{
-Add Lineages
-
 add_lins
 }
-\examples{
-\dontrun{
-add_lins()
-}
-}
diff --git a/man/deprecate.Rd b/man/deprecate.Rd
new file mode 100644
index 00000000..b8f0731f
--- /dev/null
+++ b/man/deprecate.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/deprecate.R
+\name{deprecate}
+\alias{deprecate}
+\alias{sink.reset}
+\alias{add_lins}
+\alias{acc2lin}
+\alias{efetch_ipg}
+\alias{ipg2lin}
+\title{These functions will be deprecated. Please use other functions instead.}
+\usage{
+sink.reset()
+
+add_lins(
+  df,
+  acc_col = "AccNum",
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+
+acc2lin(
+  accessions,
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+
+efetch_ipg(accessions, out_path, plan = "multicore")
+
+ipg2lin(
+  accessions,
+  ipg_file,
+  refseq_assembly_path,
+  genbank_assembly_path,
+  lineagelookup_path
+)
+}
+\description{
+These functions will be deprecated. Please use other functions instead.
+}
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
index ec5b6bcb..efe1e8c5 100644
--- a/man/efetch_ipg.Rd
+++ b/man/efetch_ipg.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
+% Please edit documentation in R/lineage.R
 \name{efetch_ipg}
 \alias{efetch_ipg}
 \title{efetch_ipg}
 \usage{
-efetch_ipg(accessions, out_path, plan = "multicore")
-
 efetch_ipg(accessions, out_path, plan = "multicore")
 }
 \arguments{
@@ -15,22 +13,9 @@ the ipg database}
 \item{out_path}{Path to write the efetch results to}
 
 \item{plan}{}
-
-\item{accnums}{Character vector containing the accession numbers to query on
-the ipg database}
-}
-\value{
-Describe return, in detail
 }
 \description{
 Perform efetch on the ipg database and write the results to out_path
-
-Perform efetch on the ipg database and write the results to out_path
-}
-\examples{
-\dontrun{
-efetch_ipg()
-}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
index 3a14eada..6e2b4c6f 100644
--- a/man/ipg2lin.Rd
+++ b/man/ipg2lin.Rd
@@ -1,17 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
+% Please edit documentation in R/lineage.R
 \name{ipg2lin}
 \alias{ipg2lin}
 \title{ipg2lin}
 \usage{
-ipg2lin(
-  accessions,
-  ipg_file,
-  refseq_assembly_path,
-  genbank_assembly_path,
-  lineagelookup_path
-)
-
 ipg2lin(
   accessions,
   ipg_file,
@@ -30,24 +22,10 @@ file}
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
 "create_lineage_lookup()" function}
-
-\item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
-}
-\value{
-Describe return, in detail
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
-
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
-}
-\examples{
-\dontrun{
-ipg2lin()
-}
-
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
deleted file mode 100644
index a31b841d..00000000
--- a/man/sink.reset.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R
-\name{sink.reset}
-\alias{sink.reset}
-\title{Sink Reset}
-\usage{
-sink.reset()
-}
-\value{
-No return, but run to close all outstanding \code{sink()}s
-}
-\description{
-Sink Reset
-}
-\examples{
-\dontrun{
-sink.reset()
-}
-}

From 3804b9969edc1a6966740829e711056d8e9a05f3 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 09:26:13 +0100
Subject: [PATCH 08/33] update NAMESPACE with roxygen2

---
 NAMESPACE                                     | 22 +++++++++----------
 ..._element.Rd => createWordCloud2Element.Rd} |  8 +++----
 ...d_element.Rd => createWordCloudElement.Rd} |  8 +++----
 man/{lineage.DA.plot.Rd => plotLineageDA.Rd}  |  8 +++----
 ...ts.plot.Rd => plotLineageDomainRepeats.Rd} |  8 +++----
 man/{LineagePlot.Rd => plotLineageHeatmap.Rd} | 10 ++++-----
 ...ghbors.plot.Rd => plotLineageNeighbors.Rd} |  8 +++----
 ...eage.Query.plot.Rd => plotLineageQuery.Rd} |  8 +++----
 ...age_sunburst.Rd => plotLineageSunburst.Rd} |  8 +++----
 ...cked_lin_plot.Rd => plotStackedLineage.Rd} |  8 +++----
 man/{upset.plot.Rd => plotUpSet.Rd}           |  8 +++----
 man/{shorten_lineage.Rd => shortenLineage.Rd} |  8 +++----
 man/wordcloud3.Rd                             |  4 ++--
 13 files changed, 58 insertions(+), 58 deletions(-)
 rename man/{wordcloud2_element.Rd => createWordCloud2Element.Rd} (87%)
 rename man/{wordcloud_element.Rd => createWordCloudElement.Rd} (88%)
 rename man/{lineage.DA.plot.Rd => plotLineageDA.Rd} (90%)
 rename man/{lineage.domain_repeats.plot.Rd => plotLineageDomainRepeats.Rd} (61%)
 rename man/{LineagePlot.Rd => plotLineageHeatmap.Rd} (84%)
 rename man/{lineage.neighbors.plot.Rd => plotLineageNeighbors.Rd} (87%)
 rename man/{lineage.Query.plot.Rd => plotLineageQuery.Rd} (84%)
 rename man/{lineage_sunburst.Rd => plotLineageSunburst.Rd} (92%)
 rename man/{stacked_lin_plot.Rd => plotStackedLineage.Rd} (84%)
 rename man/{upset.plot.Rd => plotUpSet.Rd} (94%)
 rename man/{shorten_lineage.Rd => shortenLineage.Rd} (63%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..59de0ad0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,6 @@ export(BinaryDomainNetwork)
 export(DownloadAssemblySummary)
 export(GCA2lin)
 export(GenContextNetwork)
-export(LineagePlot)
 export(RepresentativeAccNums)
 export(acc2fa)
 export(acc2lin)
@@ -30,6 +29,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createWordCloud2Element)
+export(createWordCloudElement)
 export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
@@ -52,17 +53,20 @@ export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
-export(lineage.DA.plot)
-export(lineage.Query.plot)
-export(lineage.domain_repeats.plot)
-export(lineage.neighbors.plot)
-export(lineage_sunburst)
 export(make_job_results_url)
 export(make_opts2procs)
 export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
 export(pick_longer_duplicate)
+export(plotLineageDA)
+export(plotLineageDomainRepeats)
+export(plotLineageHeatmap)
+export(plotLineageNeighbors)
+export(plotLineageQuery)
+export(plotLineageSunburst)
+export(plotStackedLineage)
+export(plotUpSet)
 export(plot_estimated_walltimes)
 export(prot2tax)
 export(prot2tax_old)
@@ -77,9 +81,8 @@ export(reverse_operon)
 export(run_deltablast)
 export(run_rpsblast)
 export(send_job_status_email)
-export(shorten_lineage)
+export(shortenLineage)
 export(sink.reset)
-export(stacked_lin_plot)
 export(string2accnum)
 export(summ.DA)
 export(summ.DA.byLin)
@@ -90,10 +93,7 @@ export(summarize_bylin)
 export(theme_genes2)
 export(to_titlecase)
 export(total_counts)
-export(upset.plot)
-export(wordcloud2_element)
 export(wordcloud3)
-export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
 export(write_proc_medians_table)
 export(write_proc_medians_yml)
diff --git a/man/wordcloud2_element.Rd b/man/createWordCloud2Element.Rd
similarity index 87%
rename from man/wordcloud2_element.Rd
rename to man/createWordCloud2Element.Rd
index fbd97b60..a6279e2f 100644
--- a/man/wordcloud2_element.Rd
+++ b/man/createWordCloud2Element.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{wordcloud2_element}
-\alias{wordcloud2_element}
+\name{createWordCloud2Element}
+\alias{createWordCloud2Element}
 \title{Wordclouds for the predominant domains, domain architectures.}
 \usage{
-wordcloud2_element(
+createWordCloud2Element(
   query_data = "prot",
   colname = "DomArch",
   cutoff = 70,
@@ -33,7 +33,7 @@ column names.
 }
 \examples{
 \dontrun{
-wordcloud_element(prot, "da2doms", 10)
+createWordCloudElement(prot, "da2doms", 10)
 }
 }
 \author{
diff --git a/man/wordcloud_element.Rd b/man/createWordCloudElement.Rd
similarity index 88%
rename from man/wordcloud_element.Rd
rename to man/createWordCloudElement.Rd
index bffddb28..7f27ef41 100644
--- a/man/wordcloud_element.Rd
+++ b/man/createWordCloudElement.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{wordcloud_element}
-\alias{wordcloud_element}
+\name{createWordCloudElement}
+\alias{createWordCloudElement}
 \title{Wordclouds for the predominant domains, domain architectures}
 \usage{
-wordcloud_element(
+createWordCloudElement(
   query_data = "prot",
   colname = "DomArch",
   cutoff = 70,
@@ -33,7 +33,7 @@ column names.
 }
 \examples{
 \dontrun{
-wordcloud_element(prot, "da2doms", 10)
+createWordCloudElement(prot, "da2doms", 10)
 }
 }
 \author{
diff --git a/man/lineage.DA.plot.Rd b/man/plotLineageDA.Rd
similarity index 90%
rename from man/lineage.DA.plot.Rd
rename to man/plotLineageDA.Rd
index f938d70d..7e84bcfd 100644
--- a/man/lineage.DA.plot.Rd
+++ b/man/plotLineageDA.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{lineage.DA.plot}
-\alias{lineage.DA.plot}
+\name{plotLineageDA}
+\alias{plotLineageDA}
 \title{Lineage Plot: Heatmap of Domains/DAs/GCs vs Lineages}
 \usage{
-lineage.DA.plot(
+plotLineageDA(
   query_data = "prot",
   colname = "DomArch",
   cutoff = 90,
@@ -40,7 +40,7 @@ column names.
 }
 \examples{
 \dontrun{
-lineage.DA.plot(toast_rack_data, 10, "DomArch.norep", "da2doms")
+plotLineageDA(toast_rack_data, 10, "DomArch.norep", "da2doms")
 }
 }
 \author{
diff --git a/man/lineage.domain_repeats.plot.Rd b/man/plotLineageDomainRepeats.Rd
similarity index 61%
rename from man/lineage.domain_repeats.plot.Rd
rename to man/plotLineageDomainRepeats.Rd
index 6cee0cac..8ccfba41 100644
--- a/man/lineage.domain_repeats.plot.Rd
+++ b/man/plotLineageDomainRepeats.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{lineage.domain_repeats.plot}
-\alias{lineage.domain_repeats.plot}
+\name{plotLineageDomainRepeats}
+\alias{plotLineageDomainRepeats}
 \title{Lineage Domain Repeats Plot}
 \usage{
-lineage.domain_repeats.plot(query_data, colname)
+plotLineageDomainRepeats(query_data, colname)
 }
 \arguments{
 \item{colname}{}
@@ -14,6 +14,6 @@ Lineage Domain Repeats Plot
 }
 \examples{
 \dontrun{
-lineage.domain_repeats.plot()
+plotLineageDomainRepeats()
 }
 }
diff --git a/man/LineagePlot.Rd b/man/plotLineageHeatmap.Rd
similarity index 84%
rename from man/LineagePlot.Rd
rename to man/plotLineageHeatmap.Rd
index 6aed2fb9..5449f8ec 100644
--- a/man/LineagePlot.Rd
+++ b/man/plotLineageHeatmap.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{LineagePlot}
-\alias{LineagePlot}
-\title{LineagePlot}
+\name{plotLineageHeatmap}
+\alias{plotLineageHeatmap}
+\title{plotLineageHeatmap}
 \usage{
-LineagePlot(prot, domains_of_interest, level = 3, label.size = 8)
+plotLineageHeatmap(prot, domains_of_interest, level = 3, label.size = 8)
 }
 \arguments{
 \item{prot}{Data frame containing DomArch and Lineage Columns}
@@ -20,7 +20,7 @@ Generate a lineage plot
 }
 \examples{
 \dontrun{
-LineagePlot(psp_data,
+plotLineageHeatmap(psp_data,
     c(
         "PspA", "Snf7", "Classical-AAA", "PspF", "PspB", "PspC", "ClgR", "PspM",
         "Thioredoxin", "PspN_N", "DUF3046", "LiaI-LiaF-TM", "Toast_rack", "REC",
diff --git a/man/lineage.neighbors.plot.Rd b/man/plotLineageNeighbors.Rd
similarity index 87%
rename from man/lineage.neighbors.plot.Rd
rename to man/plotLineageNeighbors.Rd
index b8394838..85adf175 100644
--- a/man/lineage.neighbors.plot.Rd
+++ b/man/plotLineageNeighbors.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{lineage.neighbors.plot}
-\alias{lineage.neighbors.plot}
+\name{plotLineageNeighbors}
+\alias{plotLineageNeighbors}
 \title{Lineage Plot for top neighbors}
 \usage{
-lineage.neighbors.plot(
+plotLineageNeighbors(
   query_data = "prot",
   query = "pspa",
   colname = "GenContext.norep"
@@ -35,7 +35,7 @@ column names.
 }
 \examples{
 \dontrun{
-lineage.neighbors.plot(pspa_data, pspa, "GenContext.norep", "da2doms")
+plotLineageNeighbors(pspa_data, pspa, "GenContext.norep", "da2doms")
 }
 
 }
diff --git a/man/lineage.Query.plot.Rd b/man/plotLineageQuery.Rd
similarity index 84%
rename from man/lineage.Query.plot.Rd
rename to man/plotLineageQuery.Rd
index 84ceb683..ad52a4d2 100644
--- a/man/lineage.Query.plot.Rd
+++ b/man/plotLineageQuery.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{lineage.Query.plot}
-\alias{lineage.Query.plot}
+\name{plotLineageQuery}
+\alias{plotLineageQuery}
 \title{Lineage Plot: Heatmap of Queries vs Lineages}
 \usage{
-lineage.Query.plot(
+plotLineageQuery(
   query_data = all,
   queries,
   colname = "ClustName",
@@ -30,7 +30,7 @@ column names.
 }
 \examples{
 \dontrun{
-lineage.Query.plot(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95)
+plotLineageQuery(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95)
 }
 }
 \keyword{Architectures,}
diff --git a/man/lineage_sunburst.Rd b/man/plotLineageSunburst.Rd
similarity index 92%
rename from man/lineage_sunburst.Rd
rename to man/plotLineageSunburst.Rd
index 38872bf5..972bbe5d 100644
--- a/man/lineage_sunburst.Rd
+++ b/man/plotLineageSunburst.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{lineage_sunburst}
-\alias{lineage_sunburst}
+\name{plotLineageSunburst}
+\alias{plotLineageSunburst}
 \title{Lineage Sunburst}
 \usage{
-lineage_sunburst(
+plotLineageSunburst(
   prot,
   lineage_column = "Lineage",
   type = "sunburst",
@@ -37,6 +37,6 @@ Lineage Sunburst
 }
 \examples{
 \dontrun{
-lineage_sunburst()
+plotLineageSunburst()
 }
 }
diff --git a/man/stacked_lin_plot.Rd b/man/plotStackedLineage.Rd
similarity index 84%
rename from man/stacked_lin_plot.Rd
rename to man/plotStackedLineage.Rd
index de97cd7f..9d1cde6d 100644
--- a/man/stacked_lin_plot.Rd
+++ b/man/plotStackedLineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{stacked_lin_plot}
-\alias{stacked_lin_plot}
+\name{plotStackedLineage}
+\alias{plotStackedLineage}
 \title{Stacked Lineage Plot}
 \usage{
-stacked_lin_plot(
+plotStackedLineage(
   prot,
   column = "DomArch",
   cutoff,
@@ -28,6 +28,6 @@ Stacked Lineage Plot
 }
 \examples{
 \dontrun{
-stacked_lin_plot()
+plotStackedLineage()
 }
 }
diff --git a/man/upset.plot.Rd b/man/plotUpSet.Rd
similarity index 94%
rename from man/upset.plot.Rd
rename to man/plotUpSet.Rd
index 3537f849..84169987 100644
--- a/man/upset.plot.Rd
+++ b/man/plotUpSet.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{upset.plot}
-\alias{upset.plot}
+\name{plotUpSet}
+\alias{plotUpSet}
 \title{UpSet Plot}
 \usage{
-upset.plot(
+plotUpSet(
   query_data = "toast_rack.sub",
   colname = "DomArch",
   cutoff = 90,
@@ -45,7 +45,7 @@ column names.
 }
 \examples{
 \dontrun{
-upset.plot(pspa.sub, 10, "da2doms")
+plotUpSet(pspa.sub, 10, "da2doms")
 }
 }
 \author{
diff --git a/man/shorten_lineage.Rd b/man/shortenLineage.Rd
similarity index 63%
rename from man/shorten_lineage.Rd
rename to man/shortenLineage.Rd
index db86271e..f495fb32 100644
--- a/man/shorten_lineage.Rd
+++ b/man/shortenLineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotting.R
-\name{shorten_lineage}
-\alias{shorten_lineage}
+\name{shortenLineage}
+\alias{shortenLineage}
 \title{Shorten Lineage}
 \usage{
-shorten_lineage(data, colname = "Lineage", abr_len = 1)
+shortenLineage(data, colname = "Lineage", abr_len = 1)
 }
 \arguments{
 \item{abr_len}{}
@@ -14,6 +14,6 @@ Shorten Lineage
 }
 \examples{
 \dontrun{
-shorten_lineage()
+shortenLineage()
 }
 }
diff --git a/man/wordcloud3.Rd b/man/wordcloud3.Rd
index 62eed454..cce07a82 100644
--- a/man/wordcloud3.Rd
+++ b/man/wordcloud3.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/plotting.R
 \name{wordcloud3}
 \alias{wordcloud3}
-\title{Wordcloud3}
+\title{plotWordCloud3}
 \usage{
 wordcloud3(
   data,
@@ -28,5 +28,5 @@ wordcloud3(
 \item{hoverFunction}{}
 }
 \description{
-Wordcloud3
+plotWordCloud3
 }

From 2c1ce1a213d7b36395acbd76d11cb4bf6b8a89f7 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 09:33:02 +0100
Subject: [PATCH 09/33] update NAMESPACE using roxygen2

---
 NAMESPACE                                     | 26 +++++++++----------
 man/{cleanup_clust.Rd => cleanClusters.Rd}    | 20 +++++++-------
 ..._domarch.Rd => cleanDomainArchitecture.Rd} | 20 +++++++-------
 ...anup_fasta_header.Rd => cleanFAHeaders.Rd} |  8 +++---
 ...up_GeneDesc.Rd => cleanGeneDescription.Rd} |  8 +++---
 ...p_gencontext.Rd => cleanGenomicContext.Rd} | 12 ++++-----
 man/{cleanup_lineage.Rd => cleanLineage.Rd}   |  8 +++---
 man/{cleanup_species.Rd => cleanSpecies.Rd}   | 10 +++----
 man/{clean_string.Rd => cleanString.Rd}       |  8 +++---
 ...repeat2s.Rd => condenseRepeatedDomains.Rd} | 12 ++++-----
 ..._accnums_unique.Rd => ensureUniqAccNum.Rd} |  8 +++---
 man/{string2accnum.Rd => extractAccNum.Rd}    | 12 ++++-----
 man/{remove_astrk.Rd => removeAsterisks.Rd}   |  8 +++---
 man/{remove_empty.Rd => removeEmptyRows.Rd}   |  8 +++---
 man/{remove_tails.Rd => removeTails.Rd}       |  8 +++---
 ...{replaceQMs.Rd => replaceQuestionMarks.Rd} |  8 +++---
 ...duplicate.Rd => selectLongestDuplicate.Rd} |  8 +++---
 17 files changed, 96 insertions(+), 96 deletions(-)
 rename man/{cleanup_clust.Rd => cleanClusters.Rd} (59%)
 rename man/{cleanup_domarch.Rd => cleanDomainArchitecture.Rd} (66%)
 rename man/{cleanup_fasta_header.Rd => cleanFAHeaders.Rd} (78%)
 rename man/{cleanup_GeneDesc.Rd => cleanGeneDescription.Rd} (70%)
 rename man/{cleanup_gencontext.Rd => cleanGenomicContext.Rd} (78%)
 rename man/{cleanup_lineage.Rd => cleanLineage.Rd} (71%)
 rename man/{cleanup_species.Rd => cleanSpecies.Rd} (70%)
 rename man/{clean_string.Rd => cleanString.Rd} (84%)
 rename man/{repeat2s.Rd => condenseRepeatedDomains.Rd} (67%)
 rename man/{make_accnums_unique.Rd => ensureUniqAccNum.Rd} (80%)
 rename man/{string2accnum.Rd => extractAccNum.Rd} (63%)
 rename man/{remove_astrk.Rd => removeAsterisks.Rd} (72%)
 rename man/{remove_empty.Rd => removeEmptyRows.Rd} (84%)
 rename man/{remove_tails.Rd => removeTails.Rd} (83%)
 rename man/{replaceQMs.Rd => replaceQuestionMarks.Rd} (73%)
 rename man/{pick_longer_duplicate.Rd => selectLongestDuplicate.Rd} (67%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9724f0dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,15 +16,16 @@ export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
 export(assign_job_queue)
-export(cleanup_GeneDesc)
-export(cleanup_clust)
-export(cleanup_domarch)
-export(cleanup_gencontext)
-export(cleanup_lineage)
-export(cleanup_species)
+export(cleanClusters)
+export(cleanDomainArchitecture)
+export(cleanGeneDescription)
+export(cleanGenomicContext)
+export(cleanLineage)
+export(cleanSpecies)
 export(combine_files)
 export(combine_full)
 export(combine_ipr)
+export(condenseRepeatedDomains)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
@@ -35,6 +36,7 @@ export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
+export(extractAccNum)
 export(filter_by_doms)
 export(filter_freq)
 export(find_paralogs)
@@ -62,25 +64,23 @@ export(make_opts2procs)
 export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
-export(pick_longer_duplicate)
 export(plot_estimated_walltimes)
 export(prot2tax)
 export(prot2tax_old)
-export(remove_astrk)
-export(remove_empty)
-export(remove_tails)
+export(removeAsterisks)
+export(removeEmptyRows)
+export(removeTails)
 export(rename_fasta)
-export(repeat2s)
-export(replaceQMs)
+export(replaceQuestionMarks)
 export(reveql)
 export(reverse_operon)
 export(run_deltablast)
 export(run_rpsblast)
+export(selectLongestDuplicate)
 export(send_job_status_email)
 export(shorten_lineage)
 export(sink.reset)
 export(stacked_lin_plot)
-export(string2accnum)
 export(summ.DA)
 export(summ.DA.byLin)
 export(summ.GC)
diff --git a/man/cleanup_clust.Rd b/man/cleanClusters.Rd
similarity index 59%
rename from man/cleanup_clust.Rd
rename to man/cleanClusters.Rd
index 4eed8be8..7ef4f3b9 100644
--- a/man/cleanup_clust.Rd
+++ b/man/cleanClusters.Rd
@@ -1,16 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_clust}
-\alias{cleanup_clust}
+\name{cleanClusters}
+\alias{cleanClusters}
 \title{Cleanup Clust}
 \usage{
-cleanup_clust(
+cleanClusters(
   prot,
   domains_rename,
   domains_keep,
-  repeat2s = TRUE,
-  remove_tails = FALSE,
-  remove_empty = FALSE
+  condenseRepeatedDomains = TRUE,
+  removeTails = FALSE,
+  removeEmptyRows = FALSE
 )
 }
 \arguments{
@@ -20,11 +20,11 @@ cleanup_clust(
 
 \item{domains_keep}{A data frame containing the domain names to be retained.}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.}
 
-\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
+\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.}
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.}
 }
 \value{
 Cleaned up data frame
@@ -39,6 +39,6 @@ The return value is the cleaned up data frame.
 }
 \examples{
 \dontrun{
-cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename)
+cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename)
 }
 }
diff --git a/man/cleanup_domarch.Rd b/man/cleanDomainArchitecture.Rd
similarity index 66%
rename from man/cleanup_domarch.Rd
rename to man/cleanDomainArchitecture.Rd
index 21955509..887b5388 100644
--- a/man/cleanup_domarch.Rd
+++ b/man/cleanDomainArchitecture.Rd
@@ -1,18 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_domarch}
-\alias{cleanup_domarch}
+\name{cleanDomainArchitecture}
+\alias{cleanDomainArchitecture}
 \title{Cleanup DomArch}
 \usage{
-cleanup_domarch(
+cleanDomainArchitecture(
   prot,
   old = "DomArch.orig",
   new = "DomArch",
   domains_keep,
   domains_rename,
-  repeat2s = TRUE,
-  remove_tails = FALSE,
-  remove_empty = F,
+  condenseRepeatedDomains = TRUE,
+  removeTails = FALSE,
+  removeEmptyRows = F,
   domains_ignore = NULL
 )
 }
@@ -24,11 +24,11 @@ cleanup_domarch(
 \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the
 corresponding replacement values in a column 'new'.}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.}
 
-\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
+\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.}
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.}
 
 \item{domains_ignore}{A data frame containing the domain names to be removed in a column called 'domains'}
 }
@@ -46,6 +46,6 @@ The original data frame is returned with the clean DomArchs column and the old d
 }
 \examples{
 \dontrun{
-cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
 }
 }
diff --git a/man/cleanup_fasta_header.Rd b/man/cleanFAHeaders.Rd
similarity index 78%
rename from man/cleanup_fasta_header.Rd
rename to man/cleanFAHeaders.Rd
index 416f6be2..e9ad9b30 100644
--- a/man/cleanup_fasta_header.Rd
+++ b/man/cleanFAHeaders.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_fasta_header}
-\alias{cleanup_fasta_header}
+\name{cleanFAHeaders}
+\alias{cleanFAHeaders}
 \title{Cleanup FASTA Header}
 \usage{
-cleanup_fasta_header(fasta)
+cleanFAHeaders(fasta)
 }
 \arguments{
 \item{fasta}{}
@@ -19,6 +19,6 @@ suffix of the ith occurence to handle duplicates
 \examples{
 \dontrun{
 AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |>
-    cleanup_fasta_header()
+    cleanFAHeaders()
 }
 }
diff --git a/man/cleanup_GeneDesc.Rd b/man/cleanGeneDescription.Rd
similarity index 70%
rename from man/cleanup_GeneDesc.Rd
rename to man/cleanGeneDescription.Rd
index 3068fe49..f98a25d4 100644
--- a/man/cleanup_GeneDesc.Rd
+++ b/man/cleanGeneDescription.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_GeneDesc}
-\alias{cleanup_GeneDesc}
+\name{cleanGeneDescription}
+\alias{cleanGeneDescription}
 \title{Cleanup GeneDesc}
 \usage{
-cleanup_GeneDesc(prot, column)
+cleanGeneDescription(prot, column)
 }
 \arguments{
 \item{column}{}
@@ -17,6 +17,6 @@ Cleanup GeneDesc
 }
 \examples{
 \dontrun{
-cleanup_GeneDesc()
+cleanGeneDescription()
 }
 }
diff --git a/man/cleanup_gencontext.Rd b/man/cleanGenomicContext.Rd
similarity index 78%
rename from man/cleanup_gencontext.Rd
rename to man/cleanGenomicContext.Rd
index 8e26a447..2c2dcc18 100644
--- a/man/cleanup_gencontext.Rd
+++ b/man/cleanGenomicContext.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_gencontext}
-\alias{cleanup_gencontext}
+\name{cleanGenomicContext}
+\alias{cleanGenomicContext}
 \title{Cleanup Genomic Contexts}
 \usage{
-cleanup_gencontext(
+cleanGenomicContext(
   prot,
   domains_rename = data.frame(old = character(0), new = character(0), stringsAsFactors =
     F),
-  repeat2s = TRUE,
+  condenseRepeatedDomains = TRUE,
   remove_asterisk = TRUE
 )
 }
@@ -18,7 +18,7 @@ cleanup_gencontext(
 \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'.
 Defaults to an empty data frame with a new and old column such that non of the domains will be renamed}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.}
 
 \item{remove_asterisk}{Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.}
 }
@@ -33,7 +33,7 @@ A cleaned up version of the data table is returned.
 }
 \examples{
 \dontrun{
-cleanup_gencontext(prot, domains_rename, T, F)
+cleanGenomicContext(prot, domains_rename, T, F)
 }
 
 }
diff --git a/man/cleanup_lineage.Rd b/man/cleanLineage.Rd
similarity index 71%
rename from man/cleanup_lineage.Rd
rename to man/cleanLineage.Rd
index 35669f4e..adcea312 100644
--- a/man/cleanup_lineage.Rd
+++ b/man/cleanLineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_lineage}
-\alias{cleanup_lineage}
+\name{cleanLineage}
+\alias{cleanLineage}
 \title{Cleanup Lineage}
 \usage{
-cleanup_lineage(prot, lins_rename)
+cleanLineage(prot, lins_rename)
 }
 \arguments{
 \item{lins_rename}{}
@@ -17,6 +17,6 @@ Cleanup Lineage
 }
 \examples{
 \dontrun{
-cleanup_lineage()
+cleanLineage()
 }
 }
diff --git a/man/cleanup_species.Rd b/man/cleanSpecies.Rd
similarity index 70%
rename from man/cleanup_species.Rd
rename to man/cleanSpecies.Rd
index beedb23c..82b5444c 100644
--- a/man/cleanup_species.Rd
+++ b/man/cleanSpecies.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_species}
-\alias{cleanup_species}
+\name{cleanSpecies}
+\alias{cleanSpecies}
 \title{Cleanup Species}
 \usage{
-cleanup_species(prot, remove_empty = FALSE)
+cleanSpecies(prot, removeEmptyRows = FALSE)
 }
 \arguments{
 \item{prot}{A data frame that contains columns 'Species'.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
 Default is false.}
 }
 \value{
@@ -25,6 +25,6 @@ A cleaned up version of the data table is returned.
 }
 \examples{
 \dontrun{
-cleanup_species(prot, TRUE)
+cleanSpecies(prot, TRUE)
 }
 }
diff --git a/man/clean_string.Rd b/man/cleanString.Rd
similarity index 84%
rename from man/clean_string.Rd
rename to man/cleanString.Rd
index a17a95bb..0dc2937e 100644
--- a/man/clean_string.Rd
+++ b/man/cleanString.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{clean_string}
-\alias{clean_string}
+\name{cleanString}
+\alias{cleanString}
 \title{Clean String}
 \usage{
-clean_string(string)
+cleanString(string)
 }
 \arguments{
 \item{string}{}
@@ -19,7 +19,7 @@ cleanup domain architecture values
 }
 \examples{
 \dontrun{
-clean_string()
+cleanString()
 }
 
 }
diff --git a/man/repeat2s.Rd b/man/condenseRepeatedDomains.Rd
similarity index 67%
rename from man/repeat2s.Rd
rename to man/condenseRepeatedDomains.Rd
index 30a09cc6..3b239129 100644
--- a/man/repeat2s.Rd
+++ b/man/condenseRepeatedDomains.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{repeat2s}
-\alias{repeat2s}
-\title{repeat2s}
+\name{condenseRepeatedDomains}
+\alias{condenseRepeatedDomains}
+\title{condenseRepeatedDomains}
 \usage{
-repeat2s(prot, by_column = "DomArch", excluded_prots = c())
+condenseRepeatedDomains(prot, by_column = "DomArch", excluded_prots = c())
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.}
 
 \item{by_column}{Column in which repeats are condensed to domain+domain -> domain(s).}
 
-\item{excluded_prots}{Vector of strings that repeat2s should not reduce to (s). Defaults to c()}
+\item{excluded_prots}{Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()}
 }
 \value{
 Describe return, in detail
@@ -27,6 +27,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-repeat2s(prot, "DomArch")
+condenseRepeatedDomains(prot, "DomArch")
 }
 }
diff --git a/man/make_accnums_unique.Rd b/man/ensureUniqAccNum.Rd
similarity index 80%
rename from man/make_accnums_unique.Rd
rename to man/ensureUniqAccNum.Rd
index 62866a24..ddb4a70d 100644
--- a/man/make_accnums_unique.Rd
+++ b/man/ensureUniqAccNum.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{make_accnums_unique}
-\alias{make_accnums_unique}
+\name{ensureUniqAccNum}
+\alias{ensureUniqAccNum}
 \title{make accnums unique}
 \usage{
-make_accnums_unique(accnums)
+ensureUniqAccNum(accnums)
 }
 \arguments{
 \item{accnums}{\link{chr} a vector of accession numbers}
@@ -19,6 +19,6 @@ character vector) making them unique
 \examples{
 \dontrun{
 c("xxx", "xxx", "xxx", "yyy", "yyy") |>
-    make_accnums_unique()
+    ensureUniqAccNum()
 }
 }
diff --git a/man/string2accnum.Rd b/man/extractAccNum.Rd
similarity index 63%
rename from man/string2accnum.Rd
rename to man/extractAccNum.Rd
index dd7de249..15870f3f 100644
--- a/man/string2accnum.Rd
+++ b/man/extractAccNum.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{string2accnum}
-\alias{string2accnum}
-\title{string2accnum}
+\name{extractAccNum}
+\alias{extractAccNum}
+\title{extractAccNum}
 \usage{
-string2accnum(string)
+extractAccNum(string)
 }
 \arguments{
 \item{string}{}
@@ -13,10 +13,10 @@ string2accnum(string)
 Describe return, in detail
 }
 \description{
-string2accnum
+extractAccNum
 }
 \examples{
 \dontrun{
-string2accnum()
+extractAccNum()
 }
 }
diff --git a/man/remove_astrk.Rd b/man/removeAsterisks.Rd
similarity index 72%
rename from man/remove_astrk.Rd
rename to man/removeAsterisks.Rd
index 3562521d..691a7adf 100644
--- a/man/remove_astrk.Rd
+++ b/man/removeAsterisks.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_astrk}
-\alias{remove_astrk}
+\name{removeAsterisks}
+\alias{removeAsterisks}
 \title{Remove Astrk}
 \usage{
-remove_astrk(query_data, colname = "GenContext")
+removeAsterisks(query_data, colname = "GenContext")
 }
 \arguments{
 \item{colname}{}
@@ -18,6 +18,6 @@ Used for removing * from GenContext columns
 }
 \examples{
 \dontrun{
-remove_astrk()
+removeAsterisks()
 }
 }
diff --git a/man/remove_empty.Rd b/man/removeEmptyRows.Rd
similarity index 84%
rename from man/remove_empty.Rd
rename to man/removeEmptyRows.Rd
index cfbf707b..66551810 100644
--- a/man/remove_empty.Rd
+++ b/man/removeEmptyRows.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_empty}
-\alias{remove_empty}
+\name{removeEmptyRows}
+\alias{removeEmptyRows}
 \title{Remove Empty}
 \usage{
-remove_empty(prot, by_column = "DomArch")
+removeEmptyRows(prot, by_column = "DomArch")
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'Species', 'GenContext', 'ClustName' columns.}
@@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-remove_empty(prot, "DomArch")
+removeEmptyRows(prot, "DomArch")
 }
 }
diff --git a/man/remove_tails.Rd b/man/removeTails.Rd
similarity index 83%
rename from man/remove_tails.Rd
rename to man/removeTails.Rd
index 1cd20861..76d1e18a 100644
--- a/man/remove_tails.Rd
+++ b/man/removeTails.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_tails}
-\alias{remove_tails}
+\name{removeTails}
+\alias{removeTails}
 \title{Remove Tails}
 \usage{
-remove_tails(prot, by_column = "DomArch", keep_domains = FALSE)
+removeTails(prot, by_column = "DomArch", keep_domains = FALSE)
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.}
@@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-remove_tails(prot, "DomArch")
+removeTails(prot, "DomArch")
 }
 }
diff --git a/man/replaceQMs.Rd b/man/replaceQuestionMarks.Rd
similarity index 73%
rename from man/replaceQMs.Rd
rename to man/replaceQuestionMarks.Rd
index 604a8ece..0949568f 100644
--- a/man/replaceQMs.Rd
+++ b/man/replaceQuestionMarks.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{replaceQMs}
-\alias{replaceQMs}
+\name{replaceQuestionMarks}
+\alias{replaceQuestionMarks}
 \title{Replace QMs}
 \usage{
-replaceQMs(prot, by_column = "GenContext")
+replaceQuestionMarks(prot, by_column = "GenContext")
 }
 \arguments{
 \item{prot}{DataTable to operate on}
@@ -20,7 +20,7 @@ Replace '?' with 'X'
 }
 \examples{
 \dontrun{
-replaceQMs()
+replaceQuestionMarks()
 }
 
 }
diff --git a/man/pick_longer_duplicate.Rd b/man/selectLongestDuplicate.Rd
similarity index 67%
rename from man/pick_longer_duplicate.Rd
rename to man/selectLongestDuplicate.Rd
index d7858da7..c177d289 100644
--- a/man/pick_longer_duplicate.Rd
+++ b/man/selectLongestDuplicate.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{pick_longer_duplicate}
-\alias{pick_longer_duplicate}
+\name{selectLongestDuplicate}
+\alias{selectLongestDuplicate}
 \title{Pick Longer Duplicate}
 \usage{
-pick_longer_duplicate(prot, column)
+selectLongestDuplicate(prot, column)
 }
 \arguments{
 \item{column}{}
@@ -17,6 +17,6 @@ Pick Longer Duplicate
 }
 \examples{
 \dontrun{
-pick_longer_duplicate()
+selectLongestDuplicate()
 }
 }

From 96d0ddf982d6d472c5ffba2bac4f8e641cd81176 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 10:04:49 +0100
Subject: [PATCH 10/33] refactor function names in R/tree.R

---
 NAMESPACE                                        |  6 +++---
 R/tree.R                                         | 16 ++++++++--------
 ...nerate_trees.Rd => convertAlignment2Trees.Rd} |  8 ++++----
 man/{convert_fa2tre.Rd => convertFA2Tree.Rd}     | 10 +++++-----
 man/{generate_fa2tre.Rd => createFA2Tree.Rd}     |  8 ++++----
 5 files changed, 24 insertions(+), 24 deletions(-)
 rename man/{generate_trees.Rd => convertAlignment2Trees.Rd} (56%)
 rename man/{convert_fa2tre.Rd => convertFA2Tree.Rd} (73%)
 rename man/{generate_fa2tre.Rd => createFA2Tree.Rd} (89%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..49c9d02f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,11 +25,13 @@ export(cleanup_species)
 export(combine_files)
 export(combine_full)
 export(combine_ipr)
+export(convertAlignment2Trees)
+export(convertFA2Tree)
 export(convert_aln2fa)
-export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createFA2Tree)
 export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
@@ -42,9 +44,7 @@ export(find_top_acc)
 export(format_job_args)
 export(gc_undirected_network)
 export(generate_all_aln2fa)
-export(generate_fa2tre)
 export(generate_msa)
-export(generate_trees)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
 export(get_proc_medians)
diff --git a/R/tree.R b/R/tree.R
index 01e9ead5..8eb641d9 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -1,6 +1,6 @@
 ## Generating Phylogenetic Trees from Alignment Fasta files
 ## Includes the following functions:
-## generate_trees, convert_fa2tre, generate_fa2tre
+## convertAlignment2Trees, convertFA2Tree, createFA2Tree
 ## Modified: Jan, 2020
 ## Janani Ravi (@jananiravi), Molecular Ecologist (@molecologist)
 
@@ -35,7 +35,7 @@
 ## Approach 0 | FastTree2.0
 ###########################
 ## !! FastTree will only work if there are unique sequence names!!
-#' convert_fa2tre
+#' convertFA2Tree
 #'
 #' @param fa_path
 #' @param tre_path
@@ -45,7 +45,7 @@
 #' @export
 #'
 #' @examples
-convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"),
+convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
     tre_path = here("data/alns/pspa_snf7.tre"),
     fasttree_path = here("src/FastTree")) {
     # fa_path=here("data/alns/pspa_snf7.fa")
@@ -67,7 +67,7 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"),
     #              here("src/FastTree.c"), "-lm", collapse=" "))
 }
 ## Generate Trees for ALL fasta files in "data/alns"
-#' generate_trees
+#' convertAlignment2Trees
 #'
 #' @description
 #' Generate Trees for ALL fasta files in "data/alns"
@@ -82,7 +82,7 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"),
 #' @export
 #'
 #' @examples
-generate_trees <- function(aln_path = here("data/alns/")) {
+convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
     # finding all fasta alignment files
     fa_filenames <- list.files(path = aln_path, pattern = "*.fa")
     fa_paths <- paste0(aln_path, fa_filenames)
@@ -96,7 +96,7 @@ generate_trees <- function(aln_path = here("data/alns/")) {
         tre_path = paste0(aln_path, variable, ".tre")
     )
     pmap(
-        .l = fa2tre_args, .f = convert_fa2tre,
+        .l = fa2tre_args, .f = convertFA2Tree,
         fasttree_path = here("src/FastTree")
     )
 }
@@ -104,7 +104,7 @@ generate_trees <- function(aln_path = here("data/alns/")) {
 ##############################
 ## REFS: 1-4
 ############
-#' generate_fa2tre
+#' createFA2Tree
 #'
 #' @author Janani Ravi, MolEcologist
 #' @keywords phylogenetic tree, alignment, fasta
@@ -134,7 +134,7 @@ generate_trees <- function(aln_path = here("data/alns/")) {
 #' \dontrun{
 #' generate_aln2tree("pspa_snf7.fa")
 #' }
-generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
+createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa",
     out_file = "data/alns/pspa_snf7.tre") {
     ## SAMPLE ARGS
     # fa_file="data/alns/pspa_snf7.fa"
diff --git a/man/generate_trees.Rd b/man/convertAlignment2Trees.Rd
similarity index 56%
rename from man/generate_trees.Rd
rename to man/convertAlignment2Trees.Rd
index 43bd7243..002f5203 100644
--- a/man/generate_trees.Rd
+++ b/man/convertAlignment2Trees.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/tree.R
-\name{generate_trees}
-\alias{generate_trees}
-\title{generate_trees}
+\name{convertAlignment2Trees}
+\alias{convertAlignment2Trees}
+\title{convertAlignment2Trees}
 \usage{
-generate_trees(aln_path = here("data/alns/"))
+convertAlignment2Trees(aln_path = here("data/alns/"))
 }
 \arguments{
 \item{aln_path}{}
diff --git a/man/convert_fa2tre.Rd b/man/convertFA2Tree.Rd
similarity index 73%
rename from man/convert_fa2tre.Rd
rename to man/convertFA2Tree.Rd
index 87c59d67..b2fb93de 100644
--- a/man/convert_fa2tre.Rd
+++ b/man/convertFA2Tree.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/tree.R
-\name{convert_fa2tre}
-\alias{convert_fa2tre}
-\title{convert_fa2tre}
+\name{convertFA2Tree}
+\alias{convertFA2Tree}
+\title{convertFA2Tree}
 \usage{
-convert_fa2tre(
+convertFA2Tree(
   fa_path = here("data/alns/pspa_snf7.fa"),
   tre_path = here("data/alns/pspa_snf7.tre"),
   fasttree_path = here("src/FastTree")
@@ -14,5 +14,5 @@ convert_fa2tre(
 \item{fasttree_path}{}
 }
 \description{
-convert_fa2tre
+convertFA2Tree
 }
diff --git a/man/generate_fa2tre.Rd b/man/createFA2Tree.Rd
similarity index 89%
rename from man/generate_fa2tre.Rd
rename to man/createFA2Tree.Rd
index b70848bb..76da7807 100644
--- a/man/generate_fa2tre.Rd
+++ b/man/createFA2Tree.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/tree.R
-\name{generate_fa2tre}
-\alias{generate_fa2tre}
-\title{generate_fa2tre}
+\name{createFA2Tree}
+\alias{createFA2Tree}
+\title{createFA2Tree}
 \usage{
-generate_fa2tre(
+createFA2Tree(
   fa_file = "data/alns/pspa_snf7.fa",
   out_file = "data/alns/pspa_snf7.tre"
 )

From 0e8c24536070806557f7139dfd08b072e2bada5d Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 10:17:08 +0100
Subject: [PATCH 11/33] update namespace file with roxygen2

---
 NAMESPACE                       |  8 +++++
 man/acc2fa.Rd                   | 13 ++------
 man/addLeaves2Alignment.Rd      | 50 ++++++++++++++++++++++++++++++
 man/addName.Rd                  | 39 +++++++++++++++++++++++
 man/add_leaves.Rd               | 21 +------------
 man/add_name.Rd                 | 18 ++---------
 man/convert2TitleCase.Rd        | 24 ++++++++++++++
 man/convertAlignment2FA.Rd      | 55 +++++++++++++++++++++++++++++++++
 man/convert_aln2fa.Rd           | 21 +------------
 man/generateAllAlignments2FA.Rd | 45 +++++++++++++++++++++++++++
 man/generate_all_aln2fa.Rd      | 18 +----------
 man/mapAcc2Name.Rd              | 21 +++++++++++++
 man/map_acc2name.Rd             |  6 +---
 man/renameFA.Rd                 | 20 ++++++++++++
 man/rename_fasta.Rd             |  6 +---
 man/to_titlecase.Rd             | 10 ++----
 16 files changed, 273 insertions(+), 102 deletions(-)
 create mode 100644 man/addLeaves2Alignment.Rd
 create mode 100644 man/addName.Rd
 create mode 100644 man/convert2TitleCase.Rd
 create mode 100644 man/convertAlignment2FA.Rd
 create mode 100644 man/generateAllAlignments2FA.Rd
 create mode 100644 man/mapAcc2Name.Rd
 create mode 100644 man/renameFA.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..94423e66 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,8 +6,11 @@ export(GCA2lin)
 export(GenContextNetwork)
 export(LineagePlot)
 export(RepresentativeAccNums)
+export(acc2FA)
 export(acc2fa)
 export(acc2lin)
+export(addLeaves2Alignment)
+export(addName)
 export(add_leaves)
 export(add_lins)
 export(add_name)
@@ -25,6 +28,8 @@ export(cleanup_species)
 export(combine_files)
 export(combine_full)
 export(combine_ipr)
+export(convert2TitleCase)
+export(convertAlignment2FA)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
@@ -41,6 +46,7 @@ export(find_paralogs)
 export(find_top_acc)
 export(format_job_args)
 export(gc_undirected_network)
+export(generateAllAlignments2FA)
 export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
@@ -59,6 +65,7 @@ export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
 export(make_opts2procs)
+export(mapAcc2Name)
 export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
@@ -69,6 +76,7 @@ export(prot2tax_old)
 export(remove_astrk)
 export(remove_empty)
 export(remove_tails)
+export(renameFA)
 export(rename_fasta)
 export(repeat2s)
 export(replaceQMs)
diff --git a/man/acc2fa.Rd b/man/acc2fa.Rd
index d4e4ee71..158b2d51 100644
--- a/man/acc2fa.Rd
+++ b/man/acc2fa.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{acc2fa}
 \alias{acc2fa}
-\title{acc2fa converts protein accession numbers to a fasta format.}
+\title{acc2fa}
 \usage{
-acc2fa(accessions, outpath, plan = "sequential")
-
 acc2fa(accessions, outpath, plan = "sequential")
 }
 \arguments{
@@ -17,8 +15,6 @@ Function may not work for vectors of length > 10,000}
 \item{plan}{}
 }
 \description{
-Resulting fasta file is written to the outpath.
-
 acc2fa converts protein accession numbers to a fasta format.
 Resulting fasta file is written to the outpath.
 }
@@ -28,11 +24,6 @@ acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_p
 Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
 EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
 }
-\dontrun{
-acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
-Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
-EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
-}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/addLeaves2Alignment.Rd b/man/addLeaves2Alignment.Rd
new file mode 100644
index 00000000..a758ebd5
--- /dev/null
+++ b/man/addLeaves2Alignment.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{addLeaves2Alignment}
+\alias{addLeaves2Alignment}
+\title{Adding Leaves to an alignment file w/ accessions}
+\usage{
+addLeaves2Alignment(
+  aln_file = "",
+  lin_file = "data/rawdata_tsv/all_semiclean.txt",
+  reduced = FALSE
+)
+}
+\arguments{
+\item{aln_file}{haracter. Path to file. Input tab-delimited file +
+alignment file accnum & alignment.
+Default is 'pspa_snf7.aln'}
+
+\item{lin_file}{Character. Path to file. Protein file with accession +
+number to lineage mapping.
+Default is 'pspa.txt'}
+
+\item{reduced}{Boolean. If TRUE, a reduced data frame will be generated with
+only one sequence per lineage. Default is FALSE.}
+}
+\description{
+Adding Leaves to an alignment file w/ accessions
+Genomic Contexts vs Domain Architectures.
+}
+\details{
+The alignment file would need two columns: 1. accession +
+number and 2. alignment. The protein homolog accession to lineage mapping +
+file should have
+}
+\note{
+Please refer to the source code if you have alternate +
+file formats and/or column names.
+}
+\examples{
+\dontrun{
+addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
+}
+}
+\author{
+Janani Ravi
+}
+\keyword{accnum,}
+\keyword{alignment,}
+\keyword{leaves,}
+\keyword{lineage,}
+\keyword{species}
diff --git a/man/addName.Rd b/man/addName.Rd
new file mode 100644
index 00000000..e04f9849
--- /dev/null
+++ b/man/addName.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{addName}
+\alias{addName}
+\title{Add Name}
+\usage{
+addName(
+  data,
+  accnum_col = "AccNum",
+  spec_col = "Species",
+  lin_col = "Lineage",
+  lin_sep = ">",
+  out_col = "Name"
+)
+}
+\arguments{
+\item{data}{Data to add name column to}
+
+\item{accnum_col}{Column containing accession numbers}
+
+\item{spec_col}{Column containing species}
+
+\item{lin_col}{Column containing lineage}
+
+\item{lin_sep}{Character separating lineage levels}
+
+\item{out_col}{Column that contains the new 'Name' derived from Species,
+Lineage, and AccNum info}
+}
+\value{
+Original data with a 'Name' column
+}
+\description{
+This function adds a new 'Name' column that is comprised of components from
+Kingdom, Phylum, Genus, and species, as well as the accession
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/add_leaves.Rd b/man/add_leaves.Rd
index 6d3b3f91..f1eeed10 100644
--- a/man/add_leaves.Rd
+++ b/man/add_leaves.Rd
@@ -1,15 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{add_leaves}
 \alias{add_leaves}
 \title{Adding Leaves to an alignment file w/ accessions}
 \usage{
-add_leaves(
-  aln_file = "",
-  lin_file = "data/rawdata_tsv/all_semiclean.txt",
-  reduced = FALSE
-)
-
 add_leaves(
   aln_file = "",
   lin_file = "data/rawdata_tsv/all_semiclean.txt",
@@ -29,25 +23,15 @@ Default is 'pspa.txt'}
 only one sequence per lineage. Default is FALSE.}
 }
 \description{
-Adding Leaves to an alignment file w/ accessions
-Genomic Contexts vs Domain Architectures.
-
 Adding Leaves to an alignment file w/ accessions
 Genomic Contexts vs Domain Architectures.
 }
 \details{
-The alignment file would need two columns: 1. accession +
-number and 2. alignment. The protein homolog accession to lineage mapping +
-file should have
-
 The alignment file would need two columns: 1. accession +
 number and 2. alignment. The protein homolog accession to lineage mapping +
 file should have
 }
 \note{
-Please refer to the source code if you have alternate +
-file formats and/or column names.
-
 Please refer to the source code if you have alternate +
 file formats and/or column names.
 }
@@ -55,9 +39,6 @@ file formats and/or column names.
 \dontrun{
 add_leaves("pspa_snf7.aln", "pspa.txt")
 }
-\dontrun{
-add_leaves("pspa_snf7.aln", "pspa.txt")
-}
 }
 \author{
 Janani Ravi
diff --git a/man/add_name.Rd b/man/add_name.Rd
index c4fce392..f19139e1 100644
--- a/man/add_name.Rd
+++ b/man/add_name.Rd
@@ -1,18 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{add_name}
 \alias{add_name}
-\title{Add Name}
+\title{Title}
 \usage{
-add_name(
-  data,
-  accnum_col = "AccNum",
-  spec_col = "Species",
-  lin_col = "Lineage",
-  lin_sep = ">",
-  out_col = "Name"
-)
-
 add_name(
   data,
   accnum_col = "AccNum",
@@ -37,14 +28,9 @@ add_name(
 Lineage, and AccNum info}
 }
 \value{
-Original data with a 'Name' column
-
 Original data with a 'Name' column
 }
 \description{
-This function adds a new 'Name' column that is comprised of components from
-Kingdom, Phylum, Genus, and species, as well as the accession
-
 This function adds a new 'Name' column that is comprised of components from
 Kingdom, Phylum, Genus, and species, as well as the accession
 }
diff --git a/man/convert2TitleCase.Rd b/man/convert2TitleCase.Rd
new file mode 100644
index 00000000..84e7fa00
--- /dev/null
+++ b/man/convert2TitleCase.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{convert2TitleCase}
+\alias{convert2TitleCase}
+\alias{totitle,}
+\alias{to_title}
+\title{Changing case to 'Title Case'}
+\usage{
+convert2TitleCase(text, delimitter)
+}
+\arguments{
+\item{x}{Character vector.}
+
+\item{y}{Delimitter. Default is space (" ").}
+}
+\description{
+Translate string to Title Case w/ delimitter.
+}
+\seealso{
+chartr, toupper, and tolower.
+}
+\author{
+Andrie, Janani Ravi
+}
diff --git a/man/convertAlignment2FA.Rd b/man/convertAlignment2FA.Rd
new file mode 100644
index 00000000..d6b4dc56
--- /dev/null
+++ b/man/convertAlignment2FA.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{convertAlignment2FA}
+\alias{convertAlignment2FA}
+\title{Adding Leaves to an alignment file w/ accessions}
+\usage{
+convertAlignment2FA(
+  aln_file = "",
+  lin_file = "data/rawdata_tsv/all_semiclean.txt",
+  fa_outpath = "",
+  reduced = FALSE
+)
+}
+\arguments{
+\item{aln_file}{Character. Path to file. Input tab-delimited file +
+alignment file accnum & alignment.
+Default is 'pspa_snf7.aln'}
+
+\item{lin_file}{Character. Path to file. Protein file with accession +
+number to lineage mapping.
+Default is 'pspa.txt'}
+
+\item{fa_outpath}{Character. Path to the written fasta file.
+Default is 'NULL'}
+
+\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
+Default is 'FALSE'}
+}
+\description{
+Adding Leaves to an alignment file w/ accessions
+Genomic Contexts vs Domain Architectures.
+}
+\details{
+The alignment file would need two columns: 1. accession +
+number and 2. alignment. The protein homolog accession to lineage mapping +
+file should have
+}
+\note{
+Please refer to the source code if you have alternate +
+file formats and/or column names.
+}
+\examples{
+\dontrun{
+addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
+}
+
+}
+\author{
+Janani Ravi
+}
+\keyword{accnum,}
+\keyword{alignment,}
+\keyword{leaves,}
+\keyword{lineage,}
+\keyword{species}
diff --git a/man/convert_aln2fa.Rd b/man/convert_aln2fa.Rd
index 3e9812df..8bebe31d 100644
--- a/man/convert_aln2fa.Rd
+++ b/man/convert_aln2fa.Rd
@@ -1,16 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{convert_aln2fa}
 \alias{convert_aln2fa}
 \title{Adding Leaves to an alignment file w/ accessions}
 \usage{
-convert_aln2fa(
-  aln_file = "",
-  lin_file = "data/rawdata_tsv/all_semiclean.txt",
-  fa_outpath = "",
-  reduced = FALSE
-)
-
 convert_aln2fa(
   aln_file = "",
   lin_file = "data/rawdata_tsv/all_semiclean.txt",
@@ -35,29 +28,17 @@ Default is 'FALSE'}
 }
 \description{
 Adding Leaves to an alignment file w/ accessions
-Genomic Contexts vs Domain Architectures.
 }
 \details{
-The alignment file would need two columns: 1. accession +
-number and 2. alignment. The protein homolog accession to lineage mapping +
-file should have
-
 The alignment file would need two columns: 1. accession +
 number and 2. alignment. The protein homolog accession to lineage mapping +
 file should have
 }
 \note{
-Please refer to the source code if you have alternate +
-file formats and/or column names.
-
 Please refer to the source code if you have alternate +
 file formats and/or column names.
 }
 \examples{
-\dontrun{
-add_leaves("pspa_snf7.aln", "pspa.txt")
-}
-
 \dontrun{
 add_leaves("pspa_snf7.aln", "pspa.txt")
 }
diff --git a/man/generateAllAlignments2FA.Rd b/man/generateAllAlignments2FA.Rd
new file mode 100644
index 00000000..3bf9938a
--- /dev/null
+++ b/man/generateAllAlignments2FA.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{generateAllAlignments2FA}
+\alias{generateAllAlignments2FA}
+\title{Adding Leaves to an alignment file w/ accessions}
+\usage{
+generateAllAlignments2FA(
+  aln_path = here("data/rawdata_aln/"),
+  fa_outpath = here("data/alns/"),
+  lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
+  reduced = F
+)
+}
+\arguments{
+\item{aln_path}{Character. Path to alignment files.
+Default is 'here("data/rawdata_aln/")'}
+
+\item{fa_outpath}{Character. Path to file. Master protein file with AccNum & lineages.
+Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
+
+\item{lin_file}{Character. Path to the written fasta file.
+Default is 'here("data/alns/")'.}
+
+\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage.
+Default is 'FALSE'.}
+}
+\description{
+Adding Leaves to all alignment files w/ accessions & DAs?
+}
+\details{
+The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
+}
+\note{
+Please refer to the source code if you have alternate + file formats and/or column names.
+}
+\examples{
+\dontrun{
+generateAllAlignments2FA()
+}
+}
+\keyword{accnum,}
+\keyword{alignment,}
+\keyword{leaves,}
+\keyword{lineage,}
+\keyword{species}
diff --git a/man/generate_all_aln2fa.Rd b/man/generate_all_aln2fa.Rd
index 7f99c981..ad6b7136 100644
--- a/man/generate_all_aln2fa.Rd
+++ b/man/generate_all_aln2fa.Rd
@@ -1,16 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{generate_all_aln2fa}
 \alias{generate_all_aln2fa}
 \title{Adding Leaves to an alignment file w/ accessions}
 \usage{
-generate_all_aln2fa(
-  aln_path = here("data/rawdata_aln/"),
-  fa_outpath = here("data/alns/"),
-  lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
-  reduced = F
-)
-
 generate_all_aln2fa(
   aln_path = here("data/rawdata_aln/"),
   fa_outpath = here("data/alns/"),
@@ -32,27 +25,18 @@ Default is 'here("data/rawdata_tsv/all_semiclean.txt")'}
 Default is 'FALSE'.}
 }
 \description{
-Adding Leaves to all alignment files w/ accessions & DAs?
-
 Adding Leaves to all alignment files w/ accessions & DAs?
 }
 \details{
-The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
-
 The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages.
 }
 \note{
-Please refer to the source code if you have alternate + file formats and/or column names.
-
 Please refer to the source code if you have alternate + file formats and/or column names.
 }
 \examples{
 \dontrun{
 generate_all_aln2fa()
 }
-\dontrun{
-generate_all_aln2fa()
-}
 }
 \author{
 Janani Ravi
diff --git a/man/mapAcc2Name.Rd b/man/mapAcc2Name.Rd
new file mode 100644
index 00000000..0f5d447d
--- /dev/null
+++ b/man/mapAcc2Name.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{mapAcc2Name}
+\alias{mapAcc2Name}
+\title{Default renameFA() replacement function. Maps an accession number to its name}
+\usage{
+mapAcc2Name(line, acc2name, acc_col = "AccNum", name_col = "Name")
+}
+\arguments{
+\item{line}{The line of a fasta file starting with '>'}
+
+\item{acc2name}{Data Table containing a column of accession numbers and a name column}
+
+\item{acc_col}{Name of the column containing Accession numbers}
+
+\item{name_col}{Name of the column containing the names that the accession numbers
+are mapped to}
+}
+\description{
+Default renameFA() replacement function. Maps an accession number to its name
+}
diff --git a/man/map_acc2name.Rd b/man/map_acc2name.Rd
index 846145ee..fcdb3023 100644
--- a/man/map_acc2name.Rd
+++ b/man/map_acc2name.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{map_acc2name}
 \alias{map_acc2name}
 \title{Default rename_fasta() replacement function. Maps an accession number to its name}
 \usage{
-map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
-
 map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 }
 \arguments{
@@ -19,7 +17,5 @@ map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name")
 are mapped to}
 }
 \description{
-Default rename_fasta() replacement function. Maps an accession number to its name
-
 Default rename_fasta() replacement function. Maps an accession number to its name
 }
diff --git a/man/renameFA.Rd b/man/renameFA.Rd
new file mode 100644
index 00000000..7b6fd579
--- /dev/null
+++ b/man/renameFA.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{renameFA}
+\alias{renameFA}
+\title{Rename the labels of fasta files}
+\usage{
+renameFA(fa_path, outpath, replacement_function = mapAcc2Name, ...)
+}
+\arguments{
+\item{fa_path}{Path to fasta file}
+
+\item{outpath}{Path to write altered fasta file to}
+
+\item{replacement_function}{Function to apply to lines starting with '>'}
+
+\item{...}{Additional arguments to pass to replacement_function}
+}
+\description{
+Rename the labels of fasta files
+}
diff --git a/man/rename_fasta.Rd b/man/rename_fasta.Rd
index 120b65e8..6b4e5dd7 100644
--- a/man/rename_fasta.Rd
+++ b/man/rename_fasta.Rd
@@ -1,11 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{rename_fasta}
 \alias{rename_fasta}
 \title{Rename the labels of fasta files}
 \usage{
-rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...)
-
 rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...)
 }
 \arguments{
@@ -18,7 +16,5 @@ rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...)
 \item{...}{Additional arguments to pass to replacement_function}
 }
 \description{
-Rename the labels of fasta files
-
 Rename the labels of fasta files
 }
diff --git a/man/to_titlecase.Rd b/man/to_titlecase.Rd
index dc093a97..45139d3b 100644
--- a/man/to_titlecase.Rd
+++ b/man/to_titlecase.Rd
@@ -1,13 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R
+% Please edit documentation in R/pre-msa-tree.R
 \name{to_titlecase}
 \alias{to_titlecase}
 \alias{totitle,}
 \alias{to_title}
-\title{Changing case to 'Title Case'}
+\title{To Titlecase}
 \usage{
-to_titlecase(text, delimitter)
-
 to_titlecase(text, delimitter)
 }
 \arguments{
@@ -16,14 +14,10 @@ to_titlecase(text, delimitter)
 \item{y}{Delimitter. Default is space (" ").}
 }
 \description{
-Translate string to Title Case w/ delimitter.
-
 Translate string to Title Case w/ delimitter.
 Changing case to 'Title Case'
 }
 \seealso{
-chartr, toupper, and tolower.
-
 chartr, toupper, and tolower.
 }
 \author{

From 950e6b6cc4c519561011baf6e79b4673e323456d Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 11:30:11 +0100
Subject: [PATCH 12/33] refactor function names in R/plotme.R

---
 NAMESPACE                                     | 10 ++---
 R/plotme.R                                    | 44 +++++++++----------
 man/create_all_col_params.Rd                  | 14 ------
 man/create_one_col_params.Rd                  | 14 ------
 man/{count_to_sunburst.Rd => plotSunburst.Rd} | 29 +++++-------
 man/prepareColumnParams.Rd                    | 14 ++++++
 man/prepareSingleColumnParams.Rd              | 14 ++++++
 ...{assert_count_df.Rd => validateCountDF.Rd} | 10 ++---
 8 files changed, 72 insertions(+), 77 deletions(-)
 delete mode 100644 man/create_all_col_params.Rd
 delete mode 100644 man/create_one_col_params.Rd
 rename man/{count_to_sunburst.Rd => plotSunburst.Rd} (61%)
 create mode 100644 man/prepareColumnParams.Rd
 create mode 100644 man/prepareSingleColumnParams.Rd
 rename man/{assert_count_df.Rd => validateCountDF.Rd} (56%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..1a5f4e4f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,7 +14,6 @@ export(add_name)
 export(add_tax)
 export(advanced_opts2est_walltime)
 export(alignFasta)
-export(assert_count_df)
 export(assign_job_queue)
 export(cleanup_GeneDesc)
 export(cleanup_clust)
@@ -28,11 +27,7 @@ export(combine_ipr)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
-export(count_to_sunburst)
-export(count_to_treemap)
-export(create_all_col_params)
 export(create_lineage_lookup)
-export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
 export(filter_by_doms)
@@ -63,7 +58,11 @@ export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
 export(pick_longer_duplicate)
+export(plotSunburst)
+export(plotTreemap)
 export(plot_estimated_walltimes)
+export(prepareColumnParams)
+export(prepareSingleColumnParams)
 export(prot2tax)
 export(prot2tax_old)
 export(remove_astrk)
@@ -91,6 +90,7 @@ export(theme_genes2)
 export(to_titlecase)
 export(total_counts)
 export(upset.plot)
+export(validateCountDF)
 export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
diff --git a/R/plotme.R b/R/plotme.R
index ba9de53e..6cbeb3d0 100644
--- a/R/plotme.R
+++ b/R/plotme.R
@@ -1,11 +1,11 @@
-# Taken from https://github.com/yogevherz/plotme/blob/master/R/count_to_sunburst_treemap.R
+# Taken from https://github.com/yogevherz/plotme/blob/master/R/plotSunburst_treemap.R
 #' Create an interactive plotly from count data
 #'
 #' @description
 #' These functions help you quickly create interactive hierarchical plots
 #' from categorical data. They expect the summary of the data created by
-#' `dplyr::count()` and produce either a sunburst plot (`count_to_sunburst()`) or
-#' a treemap plot (`count_to_treemap()`)
+#' `dplyr::count()` and produce either a sunburst plot (`plotSunburst()`) or
+#' a treemap plot (`plotTreemap()`)
 #'
 #' @param count_data An output of dplyr::count(), tibble or data frame
 #' @param fill_by_n If TRUE, uses a continuous scale to fill plot by group size
@@ -20,21 +20,21 @@
 #' starwars_count <- count(starwars, species, eye_color, name)
 #'
 #' # sunburst plot
-#' count_to_sunburst(starwars_count)
+#' plotSunburst(starwars_count)
 #'
 #' # fill by group size
-#' count_to_sunburst(starwars_count, fill_by_n = TRUE)
+#' plotSunburst(starwars_count, fill_by_n = TRUE)
 #'
 #' # treemap plot, ordered by group size
-#' count_to_treemap(starwars_count, sort_by_n = TRUE)
+#' plotTreemap(starwars_count, sort_by_n = TRUE)
 #'
 #' # display al charchaters by homeworld
 #' starwars %>%
 #'     count(homeworld, name) %>%
-#'     count_to_treemap(sort_by_n = TRUE)
+#'     plotTreemap(sort_by_n = TRUE)
 #'
-count_to_sunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2) {
-    params <- create_all_col_params(count_data, fill_by_n, sort_by_n)
+plotSunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2) {
+    params <- prepareColumnParams(count_data, fill_by_n, sort_by_n)
 
     purrr::exec(plotly::plot_ly,
         !!!params,
@@ -53,9 +53,9 @@ count_to_sunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE,
 #' @importFrom purrr exec
 #'
 #' @export
-#' @rdname count_to_sunburst
-count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
-    params <- create_all_col_params(count_data, fill_by_n, sort_by_n)
+#' @rdname plotSunburst
+plotTreemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
+    params <- prepareColumnParams(count_data, fill_by_n, sort_by_n)
 
     purrr::exec(plotly::plot_ly,
         !!!params,
@@ -66,7 +66,7 @@ count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
 }
 
 
-#' create_all_col_params
+#' prepareColumnParams
 #'
 #' @param count_data
 #' @param fill_by_n
@@ -80,8 +80,8 @@ count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) {
 #' @export
 #'
 #' @examples
-create_all_col_params <- function(count_data, fill_by_n, sort_by_n) {
-    assert_count_df(count_data)
+prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) {
+    validateCountDF(count_data)
     assertthat::assert_that(is.logical(fill_by_n),
         length(fill_by_n) == 1,
         msg = "fill_by_n must be either TRUE or FALSE"
@@ -91,12 +91,12 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) {
         msg = "sort_by_n must be either TRUE or FALSE"
     )
 
-    count_data <- all_non_n_cols_to_char(count_data)
+    count_data <- .all_non_n_cols_to_char(count_data)
 
     category_num <- ncol(count_data) - 1
 
     params <- purrr::map(1:category_num,
-        create_one_col_params,
+        prepareSingleColumnParams,
         df = count_data,
         root = ""
     ) %>%
@@ -114,7 +114,7 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) {
     params
 }
 
-#' create_one_col_params
+#' prepareSingleColumnParams
 #'
 #' @param df
 #' @param col_num
@@ -127,7 +127,7 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) {
 #' @export
 #'
 #' @examples
-create_one_col_params <- function(df,
+prepareSingleColumnParams <- function(df,
     col_num,
     root) {
     col_name <- names(df)[col_num]
@@ -156,7 +156,7 @@ create_one_col_params <- function(df,
         ) %>%
         dplyr::select(ids, parents, labels, values, hovertext)
 }
-#' assert_count_df
+#' validateCountDF
 #'
 #' @param var
 #'
@@ -167,7 +167,7 @@ create_one_col_params <- function(df,
 #' @export
 #'
 #' @examples
-assert_count_df <- function(var) {
+validateCountDF <- function(var) {
     msg <- paste(substitute(var), "must be a count dataframe (output of dplyr::count)")
     assertthat::assert_that(is.data.frame(var),
         assertthat::has_name(var, "n"),
@@ -178,7 +178,7 @@ assert_count_df <- function(var) {
     assertthat::assert_that(is.numeric(n_col), msg = msg)
 }
 
-all_non_n_cols_to_char <- function(df) {
+.all_non_n_cols_to_char <- function(df) {
     df %>%
         dplyr::mutate(dplyr::across(!matches("^n$"), as.character))
 }
diff --git a/man/create_all_col_params.Rd b/man/create_all_col_params.Rd
deleted file mode 100644
index 5234dfeb..00000000
--- a/man/create_all_col_params.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plotme.R
-\name{create_all_col_params}
-\alias{create_all_col_params}
-\title{create_all_col_params}
-\usage{
-create_all_col_params(count_data, fill_by_n, sort_by_n)
-}
-\arguments{
-\item{sort_by_n}{}
-}
-\description{
-create_all_col_params
-}
diff --git a/man/create_one_col_params.Rd b/man/create_one_col_params.Rd
deleted file mode 100644
index 0e3cdd10..00000000
--- a/man/create_one_col_params.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plotme.R
-\name{create_one_col_params}
-\alias{create_one_col_params}
-\title{create_one_col_params}
-\usage{
-create_one_col_params(df, col_num, root)
-}
-\arguments{
-\item{root}{}
-}
-\description{
-create_one_col_params
-}
diff --git a/man/count_to_sunburst.Rd b/man/plotSunburst.Rd
similarity index 61%
rename from man/count_to_sunburst.Rd
rename to man/plotSunburst.Rd
index ae292772..5ee465a6 100644
--- a/man/count_to_sunburst.Rd
+++ b/man/plotSunburst.Rd
@@ -1,18 +1,13 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotme.R
-\name{count_to_sunburst}
-\alias{count_to_sunburst}
-\alias{count_to_treemap}
+\name{plotSunburst}
+\alias{plotSunburst}
+\alias{plotTreemap}
 \title{Create an interactive plotly from count data}
 \usage{
-count_to_sunburst(
-  count_data,
-  fill_by_n = FALSE,
-  sort_by_n = FALSE,
-  maxdepth = 2
-)
-
-count_to_treemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE)
+plotSunburst(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2)
+
+plotTreemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE)
 }
 \arguments{
 \item{count_data}{}
@@ -24,25 +19,25 @@ count_to_treemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE)
 \description{
 These functions help you quickly create interactive hierarchical plots
 from categorical data. They expect the summary of the data created by
-\code{dplyr::count()} and produce either a sunburst plot (\code{count_to_sunburst()}) or
-a treemap plot (\code{count_to_treemap()})
+\code{dplyr::count()} and produce either a sunburst plot (\code{plotSunburst()}) or
+a treemap plot (\code{plotTreemap()})
 }
 \examples{
 library(dplyr)
 starwars_count <- count(starwars, species, eye_color, name)
 
 # sunburst plot
-count_to_sunburst(starwars_count)
+plotSunburst(starwars_count)
 
 # fill by group size
-count_to_sunburst(starwars_count, fill_by_n = TRUE)
+plotSunburst(starwars_count, fill_by_n = TRUE)
 
 # treemap plot, ordered by group size
-count_to_treemap(starwars_count, sort_by_n = TRUE)
+plotTreemap(starwars_count, sort_by_n = TRUE)
 
 # display al charchaters by homeworld
 starwars \%>\%
     count(homeworld, name) \%>\%
-    count_to_treemap(sort_by_n = TRUE)
+    plotTreemap(sort_by_n = TRUE)
 
 }
diff --git a/man/prepareColumnParams.Rd b/man/prepareColumnParams.Rd
new file mode 100644
index 00000000..bb0b9a29
--- /dev/null
+++ b/man/prepareColumnParams.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotme.R
+\name{prepareColumnParams}
+\alias{prepareColumnParams}
+\title{prepareColumnParams}
+\usage{
+prepareColumnParams(count_data, fill_by_n, sort_by_n)
+}
+\arguments{
+\item{sort_by_n}{}
+}
+\description{
+prepareColumnParams
+}
diff --git a/man/prepareSingleColumnParams.Rd b/man/prepareSingleColumnParams.Rd
new file mode 100644
index 00000000..d823852b
--- /dev/null
+++ b/man/prepareSingleColumnParams.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotme.R
+\name{prepareSingleColumnParams}
+\alias{prepareSingleColumnParams}
+\title{prepareSingleColumnParams}
+\usage{
+prepareSingleColumnParams(df, col_num, root)
+}
+\arguments{
+\item{root}{}
+}
+\description{
+prepareSingleColumnParams
+}
diff --git a/man/assert_count_df.Rd b/man/validateCountDF.Rd
similarity index 56%
rename from man/assert_count_df.Rd
rename to man/validateCountDF.Rd
index 3591d692..fc4aefa2 100644
--- a/man/assert_count_df.Rd
+++ b/man/validateCountDF.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plotme.R
-\name{assert_count_df}
-\alias{assert_count_df}
-\title{assert_count_df}
+\name{validateCountDF}
+\alias{validateCountDF}
+\title{validateCountDF}
 \usage{
-assert_count_df(var)
+validateCountDF(var)
 }
 \arguments{
 \item{var}{}
 }
 \description{
-assert_count_df
+validateCountDF
 }

From 7e2454330665f6dff0fa74d993dcc3bbc9523eca Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 13:37:45 +0100
Subject: [PATCH 13/33] refactor function names in R/lineage.R

---
 NAMESPACE                                     | 10 +++---
 R/lineage.R                                   | 32 +++++++++----------
 man/DownloadAssemblySummary.Rd                | 22 -------------
 man/{GCA2lin.Rd => GCA2Lineage.Rd}            |  8 ++---
 man/acc2lin.Rd                                |  2 +-
 man/{add_tax.Rd => addTaxID.Rd}               | 10 +++---
 man/prot2tax.Rd                               | 14 --------
 man/proteinAcc2TaxID.Rd                       | 14 ++++++++
 ...rot2tax_old.Rd => proteinAcc2TaxID_old.Rd} |  8 ++---
 9 files changed, 49 insertions(+), 71 deletions(-)
 delete mode 100644 man/DownloadAssemblySummary.Rd
 rename man/{GCA2lin.Rd => GCA2Lineage.Rd} (88%)
 rename man/{add_tax.Rd => addTaxID.Rd} (57%)
 delete mode 100644 man/prot2tax.Rd
 create mode 100644 man/proteinAcc2TaxID.Rd
 rename man/{prot2tax_old.Rd => proteinAcc2TaxID_old.Rd} (75%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..17738278 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,17 +1,16 @@
 # Generated by roxygen2: do not edit by hand
 
 export(BinaryDomainNetwork)
-export(DownloadAssemblySummary)
-export(GCA2lin)
+export(GCA2Lineage)
 export(GenContextNetwork)
 export(LineagePlot)
 export(RepresentativeAccNums)
 export(acc2fa)
 export(acc2lin)
+export(addTaxID)
 export(add_leaves)
 export(add_lins)
 export(add_name)
-export(add_tax)
 export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
@@ -34,6 +33,7 @@ export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
+export(downloadAssemblySummary)
 export(efetch_ipg)
 export(filter_by_doms)
 export(filter_freq)
@@ -64,8 +64,8 @@ export(map_advanced_opts2procs)
 export(msa_pdf)
 export(pick_longer_duplicate)
 export(plot_estimated_walltimes)
-export(prot2tax)
-export(prot2tax_old)
+export(proteinAcc2TaxID)
+export(proteinAcc2TaxID_old)
 export(remove_astrk)
 export(remove_empty)
 export(remove_tails)
diff --git a/R/lineage.R b/R/lineage.R
index 20acec04..f034739b 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -22,7 +22,7 @@
 #' @export
 #'
 #' @examples
-DownloadAssemblySummary <- function(outpath,
+downloadAssemblySummary <- function(outpath,
     keep = c(
         "assembly_accession", "taxid",
         "species_taxid", "organism_name"
@@ -74,7 +74,7 @@ DownloadAssemblySummary <- function(outpath,
 #'
 #' @param prot_data Dataframe containing a column `GCA_ID`
 #' @param assembly_path String of the path to the assembly_summary path
-#' This file can be generated using the "DownloadAssemblySummary()" function
+#' This file can be generated using the "downloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' "create_lineage_lookup()" function
@@ -87,7 +87,7 @@ DownloadAssemblySummary <- function(outpath,
 #' @export
 #'
 #' @examples
-GCA2lin <- function(prot_data,
+GCA2Lineage <- function(prot_data,
     assembly_path = "/data/research/jravilab/common_data/assembly_summary_genbank.txt",
     lineagelookup_path = "/data/research/jravilab/common_data/lineage_lookup.tsv",
     acc_col = "AccNum") {
@@ -189,7 +189,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #'
 #' @param accessions Character vector of protein accessions
 #' @param assembly_path String of the path to the assembly_summary path
-#' This file can be generated using the "DownloadAssemblySummary()" function
+#' This file can be generated using the "downloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' @param ipgout_path Path to write the results of the efetch run of the accessions
@@ -353,25 +353,25 @@ ipg2lin <- function(accessions, ipg_file,
     refseq_rows <- refseq_rows[which(refseq_rows != 0)]
     genbank_rows <- genbank_rows[which(genbank_rows != 0)]
 
-    # Call GCA2lins using refseq
+    # Call GCA2Lineages using refseq
     ### Possible to run these in parallel if it takes a while
     if (length(refseq_rows) != 0) {
         refseq_ipg_dt <- ipg_dt[refseq_rows, ]
-        refseq_lins <- GCA2lin(refseq_ipg_dt,
+        refseq_lins <- GCA2Lineage(refseq_ipg_dt,
             assembly_path = refseq_assembly_path,
             lineagelookup_path
         )
     }
     if (length(genbank_rows) != 0) {
         genbank_ipg_dt <- ipg_dt[genbank_rows, ]
-        genbank_lins <- GCA2lin(gca_ipg_dt,
+        genbank_lins <- GCA2Lineage(gca_ipg_dt,
             assembly_path = genbank_assembly_path,
             lineagelookup_path
         )
     }
 
 
-    lins <- GCA2lin(prot_data = ipg_dt, assembly_path, lineagelookup_path)
+    lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path)
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)
@@ -381,7 +381,7 @@ ipg2lin <- function(accessions, ipg_file,
 #########################################
 ## !! @SAM: Add TaxID based on AccNum? ##
 #########################################
-#' add_tax
+#' addTaxID
 #'
 #' @param data
 #' @param acc_col
@@ -393,7 +393,7 @@ ipg2lin <- function(accessions, ipg_file,
 #' @export
 #'
 #' @examples
-add_tax <- function(data, acc_col = "AccNum", version = T) {
+addTaxID <- function(data, acc_col = "AccNum", version = T) {
     if (!is.data.table(data)) {
         data <- as.data.table(data)
     }
@@ -408,7 +408,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) {
     }
 
     out_path <- tempdir()
-    tax <- prot2tax(accessions, "TEMPTAX", out_path, return_dt = TRUE)
+    tax <- proteinAcc2TaxID(accessions, "TEMPTAX", out_path, return_dt = TRUE)
 
     data <- merge.data.table(data, tax,
         by.x = acc_col, by.y = "AccNum.noV", all.x = T
@@ -419,7 +419,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) {
 ##################################
 ## Maps Protein AccNum to TaxID ##
 ##################################
-#' prot2tax
+#' proteinAcc2TaxID
 #'
 #' @param accnums
 #' @param suffix
@@ -432,7 +432,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) {
 #' @export
 #'
 #' @examples
-prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) {
+proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) {
     # Write accnums to a file
     acc_file <- tempfile()
     write(paste(accnums, collapse = "\n"), acc_file)
@@ -450,7 +450,7 @@ prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) {
 #######################################
 ## OLD: Maps Protein AccNum to TaxID ##
 #######################################
-#' prot2tax_old
+#' proteinAcc2TaxID_old
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description Perform elink to go from protein database to taxonomy database
@@ -468,7 +468,7 @@ prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) {
 #' @export
 #'
 #' @examples
-prot2tax_old <- function(accessions, out_path, plan = "multicore") {
+proteinAcc2TaxID_old <- function(accessions, out_path, plan = "multicore") {
     if (length(accessions) > 0) {
         partition <- function(v, groups) {
             # Partition data to limit number of queries per second for rentrez fetch:
@@ -500,7 +500,7 @@ prot2tax_old <- function(accessions, out_path, plan = "multicore") {
             }
             print(x)
             script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/acc2info.sh"
-            # script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/prot2tax.sh"
+            # script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/proteinAcc2TaxID.sh"
 
             # accnum_in <- paste(partitioned_acc[[x]], collapse=",")
             accnum_in <- tempfile()
diff --git a/man/DownloadAssemblySummary.Rd b/man/DownloadAssemblySummary.Rd
deleted file mode 100644
index 2d724793..00000000
--- a/man/DownloadAssemblySummary.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
-\name{DownloadAssemblySummary}
-\alias{DownloadAssemblySummary}
-\title{Download the combined assembly summaries of genbank and refseq}
-\usage{
-DownloadAssemblySummary(
-  outpath,
-  keep = c("assembly_accession", "taxid", "species_taxid", "organism_name")
-)
-}
-\arguments{
-\item{outpath}{String of path where the assembly summary file should be written}
-
-\item{keep}{Character vector containing which columns should be retained and downloaded}
-}
-\description{
-Download the combined assembly summaries of genbank and refseq
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/GCA2lin.Rd b/man/GCA2Lineage.Rd
similarity index 88%
rename from man/GCA2lin.Rd
rename to man/GCA2Lineage.Rd
index ad83ca39..9ec0ce56 100644
--- a/man/GCA2lin.Rd
+++ b/man/GCA2Lineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{GCA2lin}
-\alias{GCA2lin}
+\name{GCA2Lineage}
+\alias{GCA2Lineage}
 \title{Function to map GCA_ID to TaxID, and TaxID to Lineage}
 \usage{
-GCA2lin(
+GCA2Lineage(
   prot_data,
   assembly_path = "/data/research/jravilab/common_data/assembly_summary_genbank.txt",
   lineagelookup_path = "/data/research/jravilab/common_data/lineage_lookup.tsv",
@@ -15,7 +15,7 @@ GCA2lin(
 \item{prot_data}{Dataframe containing a column \code{GCA_ID}}
 
 \item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
+This file can be generated using the "downloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index 6255b290..fd393d43 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -24,7 +24,7 @@ acc2lin(
 \item{accessions}{Character vector of protein accessions}
 
 \item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
+This file can be generated using the "downloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the}
diff --git a/man/add_tax.Rd b/man/addTaxID.Rd
similarity index 57%
rename from man/add_tax.Rd
rename to man/addTaxID.Rd
index fec859c3..d2fe139d 100644
--- a/man/add_tax.Rd
+++ b/man/addTaxID.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{add_tax}
-\alias{add_tax}
-\title{add_tax}
+\name{addTaxID}
+\alias{addTaxID}
+\title{addTaxID}
 \usage{
-add_tax(data, acc_col = "AccNum", version = T)
+addTaxID(data, acc_col = "AccNum", version = T)
 }
 \arguments{
 \item{version}{}
 }
 \description{
-add_tax
+addTaxID
 }
diff --git a/man/prot2tax.Rd b/man/prot2tax.Rd
deleted file mode 100644
index 3631287e..00000000
--- a/man/prot2tax.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
-\name{prot2tax}
-\alias{prot2tax}
-\title{prot2tax}
-\usage{
-prot2tax(accnums, suffix, out_path, return_dt = FALSE)
-}
-\arguments{
-\item{return_dt}{}
-}
-\description{
-prot2tax
-}
diff --git a/man/proteinAcc2TaxID.Rd b/man/proteinAcc2TaxID.Rd
new file mode 100644
index 00000000..c0317bba
--- /dev/null
+++ b/man/proteinAcc2TaxID.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lineage.R
+\name{proteinAcc2TaxID}
+\alias{proteinAcc2TaxID}
+\title{proteinAcc2TaxID}
+\usage{
+proteinAcc2TaxID(accnums, suffix, out_path, return_dt = FALSE)
+}
+\arguments{
+\item{return_dt}{}
+}
+\description{
+proteinAcc2TaxID
+}
diff --git a/man/prot2tax_old.Rd b/man/proteinAcc2TaxID_old.Rd
similarity index 75%
rename from man/prot2tax_old.Rd
rename to man/proteinAcc2TaxID_old.Rd
index 22541131..0c2a85ba 100644
--- a/man/prot2tax_old.Rd
+++ b/man/proteinAcc2TaxID_old.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{prot2tax_old}
-\alias{prot2tax_old}
-\title{prot2tax_old}
+\name{proteinAcc2TaxID_old}
+\alias{proteinAcc2TaxID_old}
+\title{proteinAcc2TaxID_old}
 \usage{
-prot2tax_old(accessions, out_path, plan = "multicore")
+proteinAcc2TaxID_old(accessions, out_path, plan = "multicore")
 }
 \arguments{
 \item{accessions}{Character vector containing the accession numbers to query on

From ec96cf1b4192343716fa074f8d1e48d2af5f33e4 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Mon, 7 Oct 2024 22:20:13 +0300
Subject: [PATCH 14/33] rename functions

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE                           |  5 ---
 R/acc2lin.R                         | 28 ++++++++---------
 R/deprecate.R                       | 40 -----------------------
 R/lineage.R                         | 24 +++++++-------
 man/{ipg2lin.Rd => IPG2Lineage.Rd}  |  8 ++---
 man/acc2Lineage.Rd                  | 37 ++++++++++++++++++++++
 man/acc2lin.Rd                      | 49 ++++++++++++-----------------
 man/{add_lins.Rd => addlineage.Rd}  | 10 +++---
 man/deprecate.Rd                    | 43 -------------------------
 man/{efetch_ipg.Rd => efetchIPG.Rd} |  8 ++---
 10 files changed, 96 insertions(+), 156 deletions(-)
 delete mode 100644 R/deprecate.R
 rename man/{ipg2lin.Rd => IPG2Lineage.Rd} (91%)
 create mode 100644 man/acc2Lineage.Rd
 rename man/{add_lins.Rd => addlineage.Rd} (76%)
 delete mode 100644 man/deprecate.Rd
 rename man/{efetch_ipg.Rd => efetchIPG.Rd} (78%)

diff --git a/NAMESPACE b/NAMESPACE
index 4dbb858b..a526b959 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,9 +9,7 @@ export(LineagePlot)
 export(RepresentativeAccNums)
 export(acc2Lineage)
 export(acc2fa)
-export(acc2lin)
 export(add_leaves)
-export(add_lins)
 export(add_name)
 export(add_tax)
 export(addlineage)
@@ -38,7 +36,6 @@ export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetchIPG)
-export(efetch_ipg)
 export(filter_by_doms)
 export(filter_freq)
 export(find_paralogs)
@@ -53,7 +50,6 @@ export(get_accnums_from_fasta_file)
 export(get_job_message)
 export(get_proc_medians)
 export(get_proc_weights)
-export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
 export(lineage.DA.plot)
@@ -82,7 +78,6 @@ export(run_deltablast)
 export(run_rpsblast)
 export(send_job_status_email)
 export(shorten_lineage)
-export(sink.reset)
 export(sinkReset)
 export(stacked_lin_plot)
 export(string2accnum)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index dca24140..71e7ae07 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -15,7 +15,7 @@
 #'
 #' @examples
 #' \dontrun{
-#' sink.reset()
+#' sinkReset()
 #' }
 sinkReset <- function() {
     for (i in seq_len(sink.number())) {
@@ -43,13 +43,13 @@ sinkReset <- function() {
 #'
 #' @examples
 #' \dontrun{
-#' add_lins()
+#' addlineage()
 #' }
 addlineage <- function(df, acc_col = "AccNum", assembly_path,
     lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
     s_acc_col <- sym(acc_col)
     accessions <- df %>% pull(acc_col)
-    lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
+    lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
 
     # Drop a lot of the unimportant columns for now? will make merging much easier
     lins <- lins[, c(
@@ -66,11 +66,11 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path,
 }
 
 
-#' acc2lin
+#' acc2Lineage
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 #'
 #' @param accessions Character vector of protein accessions
@@ -88,7 +88,7 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path,
 #'
 #' @examples
 #' \dontrun{
-#' acc2lin()
+#' acc2Lineage()
 #' }
 acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
     tmp_ipg <- F
@@ -96,9 +96,9 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
         tmp_ipg <- T
         ipgout_path <- tempfile("ipg", fileext = ".txt")
     }
-    efetch_ipg(accessions, out_path = ipgout_path, plan)
+    efetchIPG(accessions, out_path = ipgout_path, plan)
 
-    lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
+    lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)
 
     if (tmp_ipg) {
         unlink(tempdir(), recursive = T)
@@ -106,7 +106,7 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
     return(lins)
 }
 
-#' efetch_ipg
+#' efetchIPG
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
@@ -127,12 +127,12 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
 #'
 #' @examples
 #' \dontrun{
-#' efetch_ipg()
+#' efetchIPG()
 #' }
 efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
     if (length(accnums) > 0) {
         partition <- function(in_data, groups) {
-            # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal
+            # \\TODO This function should be defined outside of efetchIPG(). It can be non-exported/internal
             # Partition data to limit number of queries per second for rentrez fetch:
             # limit of 10/second w/ key
             l <- length(in_data)
@@ -172,7 +172,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
     }
 }
 
-#' ipg2lin
+#' IPG2Lineage
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
@@ -196,7 +196,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #'
 #' @examples
 #' \dontrun{
-#' ipg2lin()
+#' IPG2Lineage()
 #' }
 #'
 IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) {
@@ -216,7 +216,7 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
 
 
 
-# efetch_ipg <- function(accnums, outpath)
+# efetchIPG <- function(accnums, outpath)
 # {
 #   SIZE = 250
 #   lower_bound = 1
diff --git a/R/deprecate.R b/R/deprecate.R
deleted file mode 100644
index 2de0bbcd..00000000
--- a/R/deprecate.R
+++ /dev/null
@@ -1,40 +0,0 @@
-#' These functions will be deprecated. Please use other functions instead.
-#' 
-#' @name deprecate
-#' 
-NULL
-
-#' @rdname deprecate
-#' @export
-sink.reset <- function() {
-    warning("'sink.reset' is deprecated. Use 'sinkReset' instead.")
-    sinkReset() 
-}
-
-#' @rdname deprecate
-#' @export
-add_lins <- function(df, ...) {
-    warning("'add_lins' is deprecated. Use 'addlineage' instead.")
-    addlineage(df, ...) 
-}
-
-#' @rdname deprecate
-#' @export
-acc2lin <- function(accessions, ...) {
-    warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.")
-    acc2Lineage(accessions, ...)
-}
-
-#' @rdname deprecate
-#' @export
-efetch_ipg <- function(accnums, ...) {
-    warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.")
-    efetchIPG(accnums, ...) 
-}
-
-#' @rdname deprecate
-#' @export
-ipg2lin <- function(accessions, ...) {
-    warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.")
-    IPG32Lineage(accessions, ...)  
-}
\ No newline at end of file
diff --git a/R/lineage.R b/R/lineage.R
index 20acec04..3775b63b 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -133,7 +133,7 @@ GCA2lin <- function(prot_data,
 ###################################
 ## !! @SAM why is this called lins?
 ###################################
-#' add_lins
+#' addlineage
 #'
 #' @param df
 #' @param acc_col
@@ -149,11 +149,11 @@ GCA2lin <- function(prot_data,
 #' @export
 #'
 #' @examples
-add_lins <- function(df, acc_col = "AccNum", assembly_path,
+addlineage <- function(df, acc_col = "AccNum", assembly_path,
     lineagelookup_path, ipgout_path = NULL, plan = "multicore") {
     acc_sym <- sym(acc_col)
     accessions <- df %>% pull(acc_sym)
-    lins <- acc2lin(accessions, assembly_path,
+    lins <- acc2Lineage(accessions, assembly_path,
         lineagelookup_path, ipgout_path,
         plan = plan
     )
@@ -178,13 +178,13 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #######################################
 ## Map Protein Accessions to Lineage ##
 #######################################
-#' acc2lin
+#' acc2Lineage
 #'
 #' @description
 #' Function to map protein accession numbers to lineage
 #'
 #' @author Samuel Chen, Janani Ravi
-#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 #'
 #' @param accessions Character vector of protein accessions
@@ -200,7 +200,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' @export
 #'
 #' @examples
-acc2lin <- function(accessions, assembly_path, lineagelookup_path,
+acc2Lineage <- function(accessions, assembly_path, lineagelookup_path,
     ipgout_path = NULL, plan = "multicore") {
     tmp_ipg <- F
 
@@ -208,9 +208,9 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path,
         tmp_ipg <- T
         ipgout_path <- tempfile("ipg", fileext = ".txt")
     }
-    efetch_ipg(accessions, out_path = ipgout_path, plan = plan)
+    efetchIPG(accessions, out_path = ipgout_path, plan = plan)
 
-    lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
+    lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)
 
     # if(tmp_ipg)
     # {
@@ -227,7 +227,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path,
 #########################################
 ## Download IPG results for Accessions ##
 #########################################
-#' efetch_ipg
+#' efetchIPG
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description Perform efetch on the ipg database and write the results to out_path
@@ -245,7 +245,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path,
 #' @export
 #'
 #' @examples
-efetch_ipg <- function(accessions, out_path, plan = "multicore") {
+efetchIPG <- function(accessions, out_path, plan = "multicore") {
     if (length(accessions) > 0) {
         partition <- function(v, groups) {
             # Partition data to limit number of queries per second for rentrez fetch:
@@ -295,7 +295,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
 #########################################
 ## Maps IPG results to TaxID + Lineage ##
 #########################################
-#' ipg2lin
+#' IPG2Lineage
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description Takes the resulting file of an efetch run on the ipg database and
@@ -317,7 +317,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
 #' @export
 #'
 #' @examples
-ipg2lin <- function(accessions, ipg_file,
+IPG2Lineage <- function(accessions, ipg_file,
     refseq_assembly_path, genbank_assembly_path,
     lineagelookup_path) {
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
diff --git a/man/ipg2lin.Rd b/man/IPG2Lineage.Rd
similarity index 91%
rename from man/ipg2lin.Rd
rename to man/IPG2Lineage.Rd
index 6e2b4c6f..43a920b9 100644
--- a/man/ipg2lin.Rd
+++ b/man/IPG2Lineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{ipg2lin}
-\alias{ipg2lin}
-\title{ipg2lin}
+\name{IPG2Lineage}
+\alias{IPG2Lineage}
+\title{IPG2Lineage}
 \usage{
-ipg2lin(
+IPG2Lineage(
   accessions,
   ipg_file,
   refseq_assembly_path,
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
new file mode 100644
index 00000000..5ab5931a
--- /dev/null
+++ b/man/acc2Lineage.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lineage.R
+\name{acc2Lineage}
+\alias{acc2Lineage}
+\title{acc2Lineage}
+\usage{
+acc2Lineage(
+  accessions,
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+}
+\arguments{
+\item{accessions}{Character vector of protein accessions}
+
+\item{assembly_path}{String of the path to the assembly_summary path
+This file can be generated using the "DownloadAssemblySummary()" function}
+
+\item{lineagelookup_path}{String of the path to the lineage lookup file
+(taxid to lineage mapping). This file can be generated using the}
+
+\item{ipgout_path}{Path to write the results of the efetch run of the accessions
+on the ipg database. If NULL, the file will not be written. Defaults to NULL}
+
+\item{plan}{}
+}
+\description{
+Function to map protein accession numbers to lineage
+
+This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
+of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index f008be5f..92b2887b 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -1,12 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
+% Please edit documentation in R/acc2lin.R
 \name{sinkReset}
 \alias{sinkReset}
 \alias{addlineage}
 \alias{acc2Lineage}
 \alias{efetchIPG}
 \alias{IPG2Lineage}
-\alias{acc2lin}
 \title{Sink Reset}
 \usage{
 sinkReset()
@@ -17,8 +16,7 @@ addlineage(
   assembly_path,
   lineagelookup_path,
   ipgout_path = NULL,
-  plan = "sequential",
-  ...
+  plan = "multicore"
 )
 
 acc2Lineage(
@@ -26,20 +24,17 @@ acc2Lineage(
   assembly_path,
   lineagelookup_path,
   ipgout_path = NULL,
-  plan = "sequential",
-  ...
+  plan = "multicore"
 )
 
-efetchIPG(accnums, out_path, plan = "sequential", ...)
-
-IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...)
+efetchIPG(accessions, out_path, plan = "multicore")
 
-acc2lin(
+IPG2Lineage(
   accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
+  ipg_file,
+  refseq_assembly_path,
+  genbank_assembly_path,
+  lineagelookup_path
 )
 }
 \arguments{
@@ -47,7 +42,8 @@ acc2lin(
 This file can be generated using the "DownloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
-(taxid to lineage mapping). This file can be generated using the}
+(taxid to lineage mapping). This file can be generated using the
+"create_lineage_lookup()" function}
 
 \item{ipgout_path}{Path to write the results of the efetch run of the accessions
 on the ipg database. If NULL, the file will not be written. Defaults to NULL}
@@ -56,14 +52,14 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 
 \item{accessions}{Character vector of protein accessions}
 
-\item{accnums}{Character vector containing the accession numbers to query on
-the ipg database}
-
 \item{out_path}{Path to write the efetch results to}
 
 \item{ipg_file}{Filepath to the file containing results of an efetch run on the
 ipg database. The protein accession in 'accessions' should be contained in this
 file}
+
+\item{accnums}{Character vector containing the accession numbers to query on
+the ipg database}
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
@@ -77,33 +73,28 @@ Describe return, in detail
 Describe return, in detail
 }
 \description{
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Perform efetch on the ipg database and write the results to out_path
 
 Takes the resulting file of an efetch run on the ipg database and
-
-Function to map protein accession numbers to lineage
-
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 }
 \examples{
 \dontrun{
-sink.reset()
+sinkReset()
 }
 \dontrun{
-add_lins()
+addlineage()
 }
 \dontrun{
-acc2lin()
+acc2Lineage()
 }
 \dontrun{
-efetch_ipg()
+efetchIPG()
 }
 \dontrun{
-ipg2lin()
+IPG2Lineage()
 }
 
 }
diff --git a/man/add_lins.Rd b/man/addlineage.Rd
similarity index 76%
rename from man/add_lins.Rd
rename to man/addlineage.Rd
index 9ac343ea..7f34dc9f 100644
--- a/man/add_lins.Rd
+++ b/man/addlineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{add_lins}
-\alias{add_lins}
-\title{add_lins}
+\name{addlineage}
+\alias{addlineage}
+\title{addlineage}
 \usage{
-add_lins(
+addlineage(
   df,
   acc_col = "AccNum",
   assembly_path,
@@ -17,5 +17,5 @@ add_lins(
 \item{plan}{}
 }
 \description{
-add_lins
+addlineage
 }
diff --git a/man/deprecate.Rd b/man/deprecate.Rd
deleted file mode 100644
index b8f0731f..00000000
--- a/man/deprecate.Rd
+++ /dev/null
@@ -1,43 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/deprecate.R
-\name{deprecate}
-\alias{deprecate}
-\alias{sink.reset}
-\alias{add_lins}
-\alias{acc2lin}
-\alias{efetch_ipg}
-\alias{ipg2lin}
-\title{These functions will be deprecated. Please use other functions instead.}
-\usage{
-sink.reset()
-
-add_lins(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-efetch_ipg(accessions, out_path, plan = "multicore")
-
-ipg2lin(
-  accessions,
-  ipg_file,
-  refseq_assembly_path,
-  genbank_assembly_path,
-  lineagelookup_path
-)
-}
-\description{
-These functions will be deprecated. Please use other functions instead.
-}
diff --git a/man/efetch_ipg.Rd b/man/efetchIPG.Rd
similarity index 78%
rename from man/efetch_ipg.Rd
rename to man/efetchIPG.Rd
index efe1e8c5..157ceb75 100644
--- a/man/efetch_ipg.Rd
+++ b/man/efetchIPG.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{efetch_ipg}
-\alias{efetch_ipg}
-\title{efetch_ipg}
+\name{efetchIPG}
+\alias{efetchIPG}
+\title{efetchIPG}
 \usage{
-efetch_ipg(accessions, out_path, plan = "multicore")
+efetchIPG(accessions, out_path, plan = "multicore")
 }
 \arguments{
 \item{accessions}{Character vector containing the accession numbers to query on

From 542092f245a99ff1e75d9e0ed3f1c226384ece25 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Mon, 7 Oct 2024 14:45:22 -0600
Subject: [PATCH 15/33] update .Rd files to reflect function rename

---
 man/convertIPRScanDomainTable2FA.Rd           | 35 +++++++++++++++++++
 ...domains.Rd => createIPRScanDomainTable.Rd} | 12 +++----
 man/df_iprscan_domains2fasta.Rd               | 35 -------------------
 man/exec_interproscan.Rd                      | 14 --------
 ...ta2fasta_domain.Rd => getDomainsFromFA.Rd} | 14 ++++----
 ...ipr_col_names.Rd => getIPRScanColNames.Rd} |  6 ++--
 ...ipr_col_types.Rd => getIPRScanColTypes.Rd} |  6 ++--
 ...{read_iprscan_tsv.Rd => readIPRScanTSV.Rd} |  6 ++--
 man/runIPRScan.Rd                             | 14 ++++++++
 9 files changed, 71 insertions(+), 71 deletions(-)
 create mode 100644 man/convertIPRScanDomainTable2FA.Rd
 rename man/{make_df_iprscan_domains.Rd => createIPRScanDomainTable.Rd} (83%)
 delete mode 100644 man/df_iprscan_domains2fasta.Rd
 delete mode 100644 man/exec_interproscan.Rd
 rename man/{fasta2fasta_domain.Rd => getDomainsFromFA.Rd} (76%)
 rename man/{get_df_ipr_col_names.Rd => getIPRScanColNames.Rd} (85%)
 rename man/{get_df_ipr_col_types.Rd => getIPRScanColTypes.Rd} (86%)
 rename man/{read_iprscan_tsv.Rd => readIPRScanTSV.Rd} (83%)
 create mode 100644 man/runIPRScan.Rd

diff --git a/man/convertIPRScanDomainTable2FA.Rd b/man/convertIPRScanDomainTable2FA.Rd
new file mode 100644
index 00000000..7b8b9d24
--- /dev/null
+++ b/man/convertIPRScanDomainTable2FA.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fa2domain.R
+\name{convertIPRScanDomainTable2FA}
+\alias{convertIPRScanDomainTable2FA}
+\title{Using the table returned from createIPRScanDomainTable, construct a
+domain fasta for a single accession number in the original fasta
+(i.e., the original fasta argument to createIPRScanDomainTable())}
+\usage{
+convertIPRScanDomainTable2FA(df_iprscan_domains)
+}
+\arguments{
+\item{df_iprscan_domains}{\link{tbl_df} return value from createIPRScanDomainTable}
+}
+\value{
+\link{AAStringSet} A domain fasta containing all the domains for a
+single protein in the original fasta passed as an argument to createIPRScanDomainTable()
+}
+\description{
+Using the table returned from createIPRScanDomainTable, construct a
+domain fasta for a single accession number in the original fasta
+(i.e., the original fasta argument to createIPRScanDomainTable())
+}
+\examples{
+\dontrun{
+path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_transfer/molevolvr_dev"), "molevol_scripts")
+setwd(path_molevol_scripts)
+source("R/fa2domain.R")
+fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
+df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
+accnum <- df_iprscan$AccNum[1]
+df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan)
+fasta_domains <- df_iprscan_domains |> convertIPRScanDomainTable2FA()
+}
+
+}
diff --git a/man/make_df_iprscan_domains.Rd b/man/createIPRScanDomainTable.Rd
similarity index 83%
rename from man/make_df_iprscan_domains.Rd
rename to man/createIPRScanDomainTable.Rd
index 2f1871e2..b5d4abf7 100644
--- a/man/make_df_iprscan_domains.Rd
+++ b/man/createIPRScanDomainTable.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fa2domain.R
-\name{make_df_iprscan_domains}
-\alias{make_df_iprscan_domains}
+\name{createIPRScanDomainTable}
+\alias{createIPRScanDomainTable}
 \title{For a given accession number, get the domain sequences using a interproscan
 output table & the original FASTA file}
 \usage{
-make_df_iprscan_domains(
+createIPRScanDomainTable(
   accnum,
   fasta,
   df_iprscan,
@@ -19,7 +19,7 @@ which will be used to search for its sequence's domains (df_iprscan param)}
 \item{fasta}{\link{AAStringSet} original fasta file which was fed into interproscan}
 
 \item{df_iprscan}{\link{tbl_df} the output TSV of interproscan, read as a tibble with
-read_iprscan_tsv()}
+readIPRScanTSV()}
 
 \item{analysis}{\link{chr} the domain databases to extract sequences from}
 }
@@ -36,9 +36,9 @@ path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_tra
 setwd(path_molevol_scripts)
 source("R/fa2domain.R")
 fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
+df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
 accnum <- df_iprscan$AccNum[1]
-df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan)
+df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan)
 }
 
 }
diff --git a/man/df_iprscan_domains2fasta.Rd b/man/df_iprscan_domains2fasta.Rd
deleted file mode 100644
index 595b3310..00000000
--- a/man/df_iprscan_domains2fasta.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fa2domain.R
-\name{df_iprscan_domains2fasta}
-\alias{df_iprscan_domains2fasta}
-\title{Using the table returned from make_df_iprscan_domains, construct a
-domain fasta for a single accession number in the original fasta
-(i.e., the original fasta argument to make_df_iprscan_domains())}
-\usage{
-df_iprscan_domains2fasta(df_iprscan_domains)
-}
-\arguments{
-\item{df_iprscan_domains}{\link{tbl_df} return value from make_df_iprscan_domains}
-}
-\value{
-\link{AAStringSet} A domain fasta containing all the domains for a
-single protein in the original fasta passed as an argument to make_df_iprscan_domains()
-}
-\description{
-Using the table returned from make_df_iprscan_domains, construct a
-domain fasta for a single accession number in the original fasta
-(i.e., the original fasta argument to make_df_iprscan_domains())
-}
-\examples{
-\dontrun{
-path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_transfer/molevolvr_dev"), "molevol_scripts")
-setwd(path_molevol_scripts)
-source("R/fa2domain.R")
-fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
-accnum <- df_iprscan$AccNum[1]
-df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan)
-fasta_domains <- df_iprscan_domains |> df_iprscan_domains2fasta()
-}
-
-}
diff --git a/man/exec_interproscan.Rd b/man/exec_interproscan.Rd
deleted file mode 100644
index b18ab579..00000000
--- a/man/exec_interproscan.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fa2domain.R
-\name{exec_interproscan}
-\alias{exec_interproscan}
-\title{exec_interproscan}
-\usage{
-exec_interproscan(filepath_fasta, filepath_out, appl = c("Pfam", "Gene3D"))
-}
-\arguments{
-\item{appl}{}
-}
-\description{
-exec_interproscan
-}
diff --git a/man/fasta2fasta_domain.Rd b/man/getDomainsFromFA.Rd
similarity index 76%
rename from man/fasta2fasta_domain.Rd
rename to man/getDomainsFromFA.Rd
index 18169172..08ff911d 100644
--- a/man/fasta2fasta_domain.Rd
+++ b/man/getDomainsFromFA.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fa2domain.R
-\name{fasta2fasta_domain}
-\alias{fasta2fasta_domain}
-\title{fasta2fasta_domain}
+\name{getDomainsFromFA}
+\alias{getDomainsFromFA}
+\title{getDomainsFromFA}
 \usage{
-fasta2fasta_domain(
+getDomainsFromFA(
   fasta,
   df_iprscan,
   analysis = c("Pfam", "Gene3D"),
@@ -22,7 +22,7 @@ fasta2fasta_domain(
 fasta_domains \link{AAStringSet} fasta of domains
 }
 \description{
-fasta2fasta_domain
+getDomainsFromFA
 }
 \examples{
 \dontrun{
@@ -30,8 +30,8 @@ path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_tra
 setwd(path_molevol_scripts)
 source("R/fa2domain.R")
 fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa")
-df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv")
-fasta2fasta_domain(fasta, df_iprscan)
+df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv")
+getDomainsFromFA(fasta, df_iprscan)
 }
 
 }
diff --git a/man/get_df_ipr_col_names.Rd b/man/getIPRScanColNames.Rd
similarity index 85%
rename from man/get_df_ipr_col_names.Rd
rename to man/getIPRScanColNames.Rd
index 56ce908f..7518081f 100644
--- a/man/get_df_ipr_col_names.Rd
+++ b/man/getIPRScanColNames.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fa2domain.R
-\name{get_df_ipr_col_names}
-\alias{get_df_ipr_col_names}
+\name{getIPRScanColNames}
+\alias{getIPRScanColNames}
 \title{Constructor function for interproscan column names
 (based upon the global variable written in
 molevol_scripts/R/colnames_molevol.R)}
 \usage{
-get_df_ipr_col_names()
+getIPRScanColNames()
 }
 \value{
 \link{chr} interproscan column names used throughout molevolvr
diff --git a/man/get_df_ipr_col_types.Rd b/man/getIPRScanColTypes.Rd
similarity index 86%
rename from man/get_df_ipr_col_types.Rd
rename to man/getIPRScanColTypes.Rd
index 6f0f6f7d..f7abadce 100644
--- a/man/get_df_ipr_col_types.Rd
+++ b/man/getIPRScanColTypes.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fa2domain.R
-\name{get_df_ipr_col_types}
-\alias{get_df_ipr_col_types}
+\name{getIPRScanColTypes}
+\alias{getIPRScanColTypes}
 \title{construct column types for reading interproscan output TSVs
 (based upon the global variable written in
 molevol_scripts/R/colnames_molevol.R)}
 \usage{
-get_df_ipr_col_types()
+getIPRScanColTypes()
 }
 \value{
 \link{collector} a named vector of type expecatations
diff --git a/man/read_iprscan_tsv.Rd b/man/readIPRScanTSV.Rd
similarity index 83%
rename from man/read_iprscan_tsv.Rd
rename to man/readIPRScanTSV.Rd
index e7a314a6..0c646c48 100644
--- a/man/read_iprscan_tsv.Rd
+++ b/man/readIPRScanTSV.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fa2domain.R
-\name{read_iprscan_tsv}
-\alias{read_iprscan_tsv}
+\name{readIPRScanTSV}
+\alias{readIPRScanTSV}
 \title{Read an interproscan output TSV with standardized
 column names and types}
 \usage{
-read_iprscan_tsv(filepath)
+readIPRScanTSV(filepath)
 }
 \arguments{
 \item{filepath}{\link{chr} path to interproscan output TSV}
diff --git a/man/runIPRScan.Rd b/man/runIPRScan.Rd
new file mode 100644
index 00000000..678d8652
--- /dev/null
+++ b/man/runIPRScan.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fa2domain.R
+\name{runIPRScan}
+\alias{runIPRScan}
+\title{runIPRScan}
+\usage{
+runIPRScan(filepath_fasta, filepath_out, appl = c("Pfam", "Gene3D"))
+}
+\arguments{
+\item{appl}{}
+}
+\description{
+runIPRScan
+}

From 843ecda71722bd4d152c43075d7f49567e46b0b6 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Mon, 7 Oct 2024 15:40:15 -0600
Subject: [PATCH 16/33] use new function name

---
 R/networks_domarch.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/networks_domarch.R b/R/networks_domarch.R
index 66385a74..010b7619 100755
--- a/R/networks_domarch.R
+++ b/R/networks_domarch.R
@@ -74,11 +74,11 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
 
             # string clean up all of the Domain Architecture columns
             prot <- prot |>
-                mutate(DomArch.ntwrk = clean_string(DomArch.ntwrk)) |>
+                mutate(DomArch.ntwrk = cleanString(DomArch.ntwrk)) |>
                 mutate(
                     across(
                         all_of(column),
-                        clean_string
+                        cleanString
                     )
                 )
             domains_of_interest_regex <- paste(domains_of_interest, collapse = "|")

From 3e36c7200ad49e8afabfecffd1547e798c4105cc Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 07:50:03 -0600
Subject: [PATCH 17/33] adjust casing - addLineage()

---
 NAMESPACE         |  2 +-
 R/acc2lin.R       |  6 +++---
 R/lineage.R       |  4 ++--
 man/acc2lin.Rd    |  6 +++---
 man/addlineage.Rd | 10 +++++-----
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index a526b959..726d1423 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,10 +9,10 @@ export(LineagePlot)
 export(RepresentativeAccNums)
 export(acc2Lineage)
 export(acc2fa)
+export(addLineage)
 export(add_leaves)
 export(add_name)
 export(add_tax)
-export(addlineage)
 export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index 71e7ae07..2b1e7078 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -24,7 +24,7 @@ sinkReset <- function() {
 }
 
 
-#' Add Lineages
+#' addLineage
 #'
 #' @param df
 #' @param acc_col
@@ -43,9 +43,9 @@ sinkReset <- function() {
 #'
 #' @examples
 #' \dontrun{
-#' addlineage()
+#' addLineage()
 #' }
-addlineage <- function(df, acc_col = "AccNum", assembly_path,
+addLineage <- function(df, acc_col = "AccNum", assembly_path,
     lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
     s_acc_col <- sym(acc_col)
     accessions <- df %>% pull(acc_col)
diff --git a/R/lineage.R b/R/lineage.R
index 3775b63b..f136c719 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -133,7 +133,7 @@ GCA2lin <- function(prot_data,
 ###################################
 ## !! @SAM why is this called lins?
 ###################################
-#' addlineage
+#' addLineage
 #'
 #' @param df
 #' @param acc_col
@@ -149,7 +149,7 @@ GCA2lin <- function(prot_data,
 #' @export
 #'
 #' @examples
-addlineage <- function(df, acc_col = "AccNum", assembly_path,
+addLineage <- function(df, acc_col = "AccNum", assembly_path,
     lineagelookup_path, ipgout_path = NULL, plan = "multicore") {
     acc_sym <- sym(acc_col)
     accessions <- df %>% pull(acc_sym)
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index 92b2887b..88663260 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/acc2lin.R
 \name{sinkReset}
 \alias{sinkReset}
-\alias{addlineage}
+\alias{addLineage}
 \alias{acc2Lineage}
 \alias{efetchIPG}
 \alias{IPG2Lineage}
@@ -10,7 +10,7 @@
 \usage{
 sinkReset()
 
-addlineage(
+addLineage(
   df,
   acc_col = "AccNum",
   assembly_path,
@@ -85,7 +85,7 @@ Takes the resulting file of an efetch run on the ipg database and
 sinkReset()
 }
 \dontrun{
-addlineage()
+addLineage()
 }
 \dontrun{
 acc2Lineage()
diff --git a/man/addlineage.Rd b/man/addlineage.Rd
index 7f34dc9f..f13259fa 100644
--- a/man/addlineage.Rd
+++ b/man/addlineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/lineage.R
-\name{addlineage}
-\alias{addlineage}
-\title{addlineage}
+\name{addLineage}
+\alias{addLineage}
+\title{addLineage}
 \usage{
-addlineage(
+addLineage(
   df,
   acc_col = "AccNum",
   assembly_path,
@@ -17,5 +17,5 @@ addlineage(
 \item{plan}{}
 }
 \description{
-addlineage
+addLineage
 }

From 6ce981d2922889987212dad321e6fd89210af5f6 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 08:06:11 -0600
Subject: [PATCH 18/33] adjust namespace based on upstream

---
 NAMESPACE | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index fa961be9..da443880 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,11 +7,11 @@ export(GenContextNetwork)
 export(IPG2Lineage)
 export(LineagePlot)
 export(RepresentativeAccNums)
-export(acc2Lineage)
 export(acc2FA)
+export(acc2Lineage)
 export(acc2fa)
-export(addLineage)
 export(addLeaves2Alignment)
+export(addLineage)
 export(addName)
 export(add_leaves)
 export(add_name)
@@ -73,6 +73,7 @@ export(map_advanced_opts2procs)
 export(msa_pdf)
 export(plot_estimated_walltimes)
 export(prot2tax)
+export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From 2d47952d4e53b04bf925632ea9c222dd3a83b347 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 08:13:37 -0600
Subject: [PATCH 19/33] remove old rd tag for acc2lin

---
 R/acc2lin.R        |   5 ---
 man/IPG2Lineage.Rd |  24 ++++++++++-
 man/acc2Lineage.Rd |  21 ++++++++-
 man/acc2lin.Rd     | 103 ---------------------------------------------
 man/addlineage.Rd  |  21 ++++++++-
 man/efetchIPG.Rd   |  17 +++++++-
 man/sinkReset.Rd   |  19 +++++++++
 7 files changed, 98 insertions(+), 112 deletions(-)
 delete mode 100644 man/acc2lin.Rd
 create mode 100644 man/sinkReset.Rd

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 2b1e7078..73aca0f4 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,7 +10,6 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
-#' @rdname acc2lin
 #' @export
 #'
 #' @examples
@@ -38,7 +37,6 @@ sinkReset <- function() {
 #' @importFrom rlang sym
 #'
 #' @return Describe return, in detail
-#' @rdname acc2lin
 #' @export
 #'
 #' @examples
@@ -83,7 +81,6 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' @param plan
 #'
 #' @return Describe return, in detail
-#' @rdname acc2lin
 #' @export
 #'
 #' @examples
@@ -122,7 +119,6 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
 #' @importFrom rentrez entrez_fetch
 #'
 #' @return Describe return, in detail
-#' @rdname acc2lin
 #' @export
 #'
 #' @examples
@@ -191,7 +187,6 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' @importFrom data.table fread
 #'
 #' @return Describe return, in detail
-#' @rdname acc2lin
 #' @export
 #'
 #' @examples
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index 43a920b9..cf3e635e 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -1,9 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
+% Please edit documentation in R/acc2lin.R, R/lineage.R
 \name{IPG2Lineage}
 \alias{IPG2Lineage}
 \title{IPG2Lineage}
 \usage{
+IPG2Lineage(
+  accessions,
+  ipg_file,
+  refseq_assembly_path,
+  genbank_assembly_path,
+  lineagelookup_path
+)
+
 IPG2Lineage(
   accessions,
   ipg_file,
@@ -22,10 +30,24 @@ file}
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
 "create_lineage_lookup()" function}
+
+\item{assembly_path}{String of the path to the assembly_summary path
+This file can be generated using the "DownloadAssemblySummary()" function}
+}
+\value{
+Describe return, in detail
 }
 \description{
+Takes the resulting file of an efetch run on the ipg database and
+
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
+}
+\examples{
+\dontrun{
+IPG2Lineage()
+}
+
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index 5ab5931a..d632c52e 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -1,9 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
+% Please edit documentation in R/acc2lin.R, R/lineage.R
 \name{acc2Lineage}
 \alias{acc2Lineage}
 \title{acc2Lineage}
 \usage{
+acc2Lineage(
+  accessions,
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+
 acc2Lineage(
   accessions,
   assembly_path,
@@ -26,12 +34,23 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 
 \item{plan}{}
 }
+\value{
+Describe return, in detail
+}
 \description{
+This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
+of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
+
 Function to map protein accession numbers to lineage
 
 This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 }
+\examples{
+\dontrun{
+acc2Lineage()
+}
+}
 \author{
 Samuel Chen, Janani Ravi
 }
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index 88663260..00000000
--- a/man/acc2lin.Rd
+++ /dev/null
@@ -1,103 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R
-\name{sinkReset}
-\alias{sinkReset}
-\alias{addLineage}
-\alias{acc2Lineage}
-\alias{efetchIPG}
-\alias{IPG2Lineage}
-\title{Sink Reset}
-\usage{
-sinkReset()
-
-addLineage(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-acc2Lineage(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-efetchIPG(accessions, out_path, plan = "multicore")
-
-IPG2Lineage(
-  accessions,
-  ipg_file,
-  refseq_assembly_path,
-  genbank_assembly_path,
-  lineagelookup_path
-)
-}
-\arguments{
-\item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
-
-\item{lineagelookup_path}{String of the path to the lineage lookup file
-(taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
-
-\item{ipgout_path}{Path to write the results of the efetch run of the accessions
-on the ipg database. If NULL, the file will not be written. Defaults to NULL}
-
-\item{plan}{}
-
-\item{accessions}{Character vector of protein accessions}
-
-\item{out_path}{Path to write the efetch results to}
-
-\item{ipg_file}{Filepath to the file containing results of an efetch run on the
-ipg database. The protein accession in 'accessions' should be contained in this
-file}
-
-\item{accnums}{Character vector containing the accession numbers to query on
-the ipg database}
-}
-\value{
-No return, but run to close all outstanding \code{sink()}s
-
-Describe return, in detail
-
-Describe return, in detail
-
-Describe return, in detail
-
-Describe return, in detail
-}
-\description{
-This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-
-Perform efetch on the ipg database and write the results to out_path
-
-Takes the resulting file of an efetch run on the ipg database and
-}
-\examples{
-\dontrun{
-sinkReset()
-}
-\dontrun{
-addLineage()
-}
-\dontrun{
-acc2Lineage()
-}
-\dontrun{
-efetchIPG()
-}
-\dontrun{
-IPG2Lineage()
-}
-
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/addlineage.Rd b/man/addlineage.Rd
index f13259fa..6694e94c 100644
--- a/man/addlineage.Rd
+++ b/man/addlineage.Rd
@@ -1,9 +1,18 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
+% Please edit documentation in R/acc2lin.R, R/lineage.R
 \name{addLineage}
 \alias{addLineage}
 \title{addLineage}
 \usage{
+addLineage(
+  df,
+  acc_col = "AccNum",
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+
 addLineage(
   df,
   acc_col = "AccNum",
@@ -16,6 +25,16 @@ addLineage(
 \arguments{
 \item{plan}{}
 }
+\value{
+Describe return, in detail
+}
 \description{
 addLineage
+
+addLineage
+}
+\examples{
+\dontrun{
+addLineage()
+}
 }
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 157ceb75..6a5d85a4 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -1,9 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lineage.R
+% Please edit documentation in R/acc2lin.R, R/lineage.R
 \name{efetchIPG}
 \alias{efetchIPG}
 \title{efetchIPG}
 \usage{
+efetchIPG(accessions, out_path, plan = "multicore")
+
 efetchIPG(accessions, out_path, plan = "multicore")
 }
 \arguments{
@@ -13,9 +15,22 @@ the ipg database}
 \item{out_path}{Path to write the efetch results to}
 
 \item{plan}{}
+
+\item{accnums}{Character vector containing the accession numbers to query on
+the ipg database}
+}
+\value{
+Describe return, in detail
 }
 \description{
 Perform efetch on the ipg database and write the results to out_path
+
+Perform efetch on the ipg database and write the results to out_path
+}
+\examples{
+\dontrun{
+efetchIPG()
+}
 }
 \author{
 Samuel Chen, Janani Ravi
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
new file mode 100644
index 00000000..0285c0b2
--- /dev/null
+++ b/man/sinkReset.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/acc2lin.R
+\name{sinkReset}
+\alias{sinkReset}
+\title{Sink Reset}
+\usage{
+sinkReset()
+}
+\value{
+No return, but run to close all outstanding \code{sink()}s
+}
+\description{
+Sink Reset
+}
+\examples{
+\dontrun{
+sinkReset()
+}
+}

From 331b6515f4c000cc0d30a9ba3749d27ef5a97339 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 08:28:44 -0600
Subject: [PATCH 20/33] namespace conflict resolution for other PRs

---
 NAMESPACE | 1 -
 1 file changed, 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index a309f180..157a53e5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,7 +5,6 @@ export(DownloadAssemblySummary)
 export(GCA2lin)
 export(GenContextNetwork)
 export(IPG2Lineage)
-export(LineagePlot)
 export(RepresentativeAccNums)
 export(acc2FA)
 export(acc2Lineage)

From 8a577797c05aad8aab61b2eef67299cdc2b90f88 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 15:46:05 +0100
Subject: [PATCH 21/33] refactor function names in R/summarize.R

---
 NAMESPACE                                     | 23 +++---
 R/summarize.R                                 | 78 +++++++++----------
 man/{count_bycol.Rd => countbycolumn.Rd}      |  8 +-
 man/elements2words.Rd                         | 29 -------
 man/{filter_by_doms.Rd => filterbydomains.Rd} | 10 +--
 man/{filter_freq.Rd => filterbyfrequency.Rd}  |  8 +-
 man/{find_paralogs.Rd => findparalogs.Rd}     |  8 +-
 man/summ.GC.byDALin.Rd                        | 22 ------
 man/summ.GC.byLin.Rd                          | 22 ------
 man/{summ.DA.Rd => summarizeDomArch.Rd}       | 10 +--
 ...byLin.Rd => summarizeDomArch_ByLineage.Rd} | 10 +--
 man/{summ.GC.Rd => summarizeGenContext.Rd}    | 12 +--
 man/summarizeGenContext_ByDomArchLineage.Rd   | 22 ++++++
 man/summarizeGenContext_ByLineage.Rd          | 22 ++++++
 ...mmarize_bylin.Rd => summarizebylineage.Rd} |  8 +-
 ...s.Rd => totalgencontextordomarchcounts.Rd} |  8 +-
 man/{words2wc.Rd => words2wordcounts.Rd}      | 10 +--
 17 files changed, 141 insertions(+), 169 deletions(-)
 rename man/{count_bycol.Rd => countbycolumn.Rd} (66%)
 delete mode 100644 man/elements2words.Rd
 rename man/{filter_by_doms.Rd => filterbydomains.Rd} (84%)
 rename man/{filter_freq.Rd => filterbyfrequency.Rd} (71%)
 rename man/{find_paralogs.Rd => findparalogs.Rd} (84%)
 delete mode 100644 man/summ.GC.byDALin.Rd
 delete mode 100644 man/summ.GC.byLin.Rd
 rename man/{summ.DA.Rd => summarizeDomArch.Rd} (70%)
 rename man/{summ.DA.byLin.Rd => summarizeDomArch_ByLineage.Rd} (62%)
 rename man/{summ.GC.Rd => summarizeGenContext.Rd} (57%)
 create mode 100644 man/summarizeGenContext_ByDomArchLineage.Rd
 create mode 100644 man/summarizeGenContext_ByLineage.Rd
 rename man/{summarize_bylin.Rd => summarizebylineage.Rd} (68%)
 rename man/{total_counts.Rd => totalgencontextordomarchcounts.Rd} (84%)
 rename man/{words2wc.Rd => words2wordcounts.Rd} (77%)

diff --git a/NAMESPACE b/NAMESPACE
index af1d0ba4..a8632305 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -33,19 +33,19 @@ export(convert2TitleCase)
 export(convertAlignment2FA)
 export(convert_aln2fa)
 export(convert_fa2tre)
-export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(countbycolumn)
 export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
 export(extractAccNum)
-export(filter_by_doms)
-export(filter_freq)
-export(find_paralogs)
+export(filterbydomains)
+export(filterbyfrequency)
 export(find_top_acc)
+export(findparalogs)
 export(format_job_args)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
@@ -73,6 +73,7 @@ export(map_advanced_opts2procs)
 export(msa_pdf)
 export(plot_estimated_walltimes)
 export(prot2tax)
+export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)
@@ -88,15 +89,15 @@ export(send_job_status_email)
 export(shorten_lineage)
 export(sink.reset)
 export(stacked_lin_plot)
-export(summ.DA)
-export(summ.DA.byLin)
-export(summ.GC)
-export(summ.GC.byDALin)
-export(summ.GC.byLin)
-export(summarize_bylin)
+export(summarizeDomArch)
+export(summarizeDomArch_ByLineage)
+export(summarizeGenContext)
+export(summarizeGenContext_ByDomArchLineage)
+export(summarizeGenContext_ByLineage)
+export(summarizebylineage)
 export(theme_genes2)
 export(to_titlecase)
-export(total_counts)
+export(totalgencontextordomarchcounts)
 export(upset.plot)
 export(wordcloud2_element)
 export(wordcloud3)
diff --git a/R/summarize.R b/R/summarize.R
index a9b13e43..d2cef471 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -13,7 +13,7 @@
 #' Filter by Domains
 #'
 #' @author Samuel Chen, Janani Ravi
-#' @description filter_by_doms filters a data frame by identifying exact domain matches
+#' @description filterbydomains filters a data frame by identifying exact domain matches
 #' and either keeping or removing rows with the identified domain
 #'
 #' @param prot Dataframe to filter
@@ -33,9 +33,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' filter_by_doms()
+#' filterbydomains()
 #' }
-filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
+filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
     ignore.case = FALSE) {
     # Only rows with a domain in doms_keep will be kept
     # Any row containing a domain in doms_remove will be removed
@@ -102,9 +102,9 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 #'
 #' @examples
 #' \dontrun{
-#' count_bycol()
+#' countbycolumn()
 #' }
-count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
+countbycolumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
     counts <- prot %>%
         select(column) %>%
         table() %>%
@@ -135,10 +135,10 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #'
 #' @examples
 #' \dontrun{
-#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
+#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words()
 #' }
 #'
-elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
+elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
     z1 <- prot %>%
         dplyr::pull(column) %>%
         str_replace_all("\\,", " ") %>%
@@ -184,11 +184,11 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #' @examples
 #' \dontrun{
 #' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-#'     elements2words() |>
-#'     words2wc()
+#'     elements2Words() |>
+#'     words2wordcounts()
 #' }
 #'
-words2wc <- function(string) {
+words2wordcounts <- function(string) {
     df_word_count <- string %>%
         # reduce spaces with length 2 or greater to a single space
         str_replace_all("\\s{2,}", " ") %>%
@@ -227,9 +227,9 @@ words2wc <- function(string) {
 #'
 #' @examples
 #' \dontrun{
-#' filter_freq()
+#' filterbyfrequency()
 #' }
-filter_freq <- function(x, min.freq) {
+filterbyfrequency <- function(x, min.freq) {
     x %>%
         filter(freq >= min.freq)
 }
@@ -254,10 +254,10 @@ filter_freq <- function(x, min.freq) {
 #' \dontrun{
 #' library(tidyverse)
 #' tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-#'     summarize_bylin(query = "all")
+#'     summarizebylineage(query = "all")
 #' }
 #'
-summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
+summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage",
     query) {
     column <- sym(column)
     by <- sym(by)
@@ -277,7 +277,7 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 }
 
 
-#' summ.DA.byLin
+#' summarizeDomArch_ByLineage
 #'
 #' @description
 #' Function to summarize and retrieve counts by Domains & Domains+Lineage
@@ -292,9 +292,9 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #'
 #' @examples
 #' \dontrun{
-#' summ.DA.byLin()
+#' summarizeDomArch_ByLineage()
 #' }
-summ.DA.byLin <- function(x) {
+summarizeDomArch_ByLineage <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", DomArch)) %>%
@@ -304,7 +304,7 @@ summ.DA.byLin <- function(x) {
 }
 
 ## Function to retrieve counts of how many lineages a DomArch appears in
-#' summ.DA
+#' summarizeDomArch
 #'
 #' @description
 #' Function to retrieve counts of how many lineages a DomArch appears in
@@ -318,9 +318,9 @@ summ.DA.byLin <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' summ.DA()
+#' summarizeDomArch()
 #' }
-summ.DA <- function(x) {
+summarizeDomArch <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(DomArch) %>%
@@ -330,7 +330,7 @@ summ.DA <- function(x) {
         filter(!grepl("^-$", DomArch))
 }
 
-#' summ.GC.byDALin
+#' summarizeGenContext_ByDomArchLineage
 #'
 #' @param x
 #'
@@ -341,9 +341,9 @@ summ.DA <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' summ.GC.byDALin
+#' summarizeGenContext_ByDomArchLineage
 #' }
-summ.GC.byDALin <- function(x) {
+summarizeGenContext_ByDomArchLineage <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -355,7 +355,7 @@ summ.GC.byDALin <- function(x) {
         arrange(desc(count))
 }
 
-#' summ.GC.byLin
+#' summarizeGenContext_ByLineage
 #'
 #' @param x
 #'
@@ -366,9 +366,9 @@ summ.GC.byDALin <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' summ.GC.byLin()
+#' summarizeGenContext_ByLineage()
 #' }
-summ.GC.byLin <- function(x) {
+summarizeGenContext_ByLineage <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
@@ -380,7 +380,7 @@ summ.GC.byLin <- function(x) {
         arrange(desc(count))
 }
 
-#' summ.GC
+#' summarizeGenContext
 #'
 #' @param x
 #'
@@ -391,9 +391,9 @@ summ.GC.byLin <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' summ.GC()
+#' summarizeGenContext()
 #' }
-summ.GC <- function(x) {
+summarizeGenContext <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(GenContext) %>%
@@ -436,9 +436,9 @@ summ.GC <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' total_counts(pspa - gc_lin_counts, 0, "GC")
+#' totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC")
 #' }
-total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
+totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col = "Lineage",
     cutoff = 90, RowsCutoff = FALSE, digits = 2
     # type = "GC"
 ) {
@@ -448,7 +448,7 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
         filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>%
         filter({{ column }} != "")
 
-    prot <- summarize_bylin(prot, column, by = lineage_col, query = "all")
+    prot <- summarizebylineage(prot, column, by = lineage_col, query = "all")
     col_count <- prot %>%
         group_by({{ column }}) %>%
         summarise(totalcount = sum(count))
@@ -598,9 +598,9 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
 #'
 #' @examples
 #' \dontrun{
-#' find_paralogs(pspa)
+#' findparalogs(pspa)
 #' }
-find_paralogs <- function(prot) {
+findparalogs <- function(prot) {
     # Remove eukaryotes
     prot <- prot %>% filter(!grepl("^eukaryota", Lineage))
     paralogTable <- prot %>%
@@ -635,17 +635,17 @@ find_paralogs <- function(prot) {
 # query.sub$GenContext %>%
 # counts(n)")
 
-# ## elements2words: Function to break up ELEMENTS to WORDS for DA and GC
+# ## elements2Words: Function to break up ELEMENTS to WORDS for DA and GC
 # cat("Converting DA to domains and GC to DAs.\n2 switches: da2doms and gc2da
 # \nFor e.g.:
 # query.sub$DA.doms <- query.sub$DomArch.norep %>%
-#   elements2words(\"da2doms\")
+#   elements2Words(\"da2doms\")
 # query.sub$GC.da <- query.sub$GenContext %>%
-# 	elements2words(\"gc2da\")")
+# 	elements2Words(\"gc2da\")")
 
 
-# ## words2wc: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
+# ## words2wordcounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
 # cat("Word counts for broken up domains from DAs and DAs from GCs.
 # \nFor e.g.:
 # DA.doms.wc <- query.sub$DA.doms %>%
-#   words2wc()")
+#   words2wordcounts()")
diff --git a/man/count_bycol.Rd b/man/countbycolumn.Rd
similarity index 66%
rename from man/count_bycol.Rd
rename to man/countbycolumn.Rd
index 884c0f0f..e669a4a3 100644
--- a/man/count_bycol.Rd
+++ b/man/countbycolumn.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{count_bycol}
-\alias{count_bycol}
+\name{countbycolumn}
+\alias{countbycolumn}
 \title{Count Bycol}
 \usage{
-count_bycol(prot = prot, column = "DomArch", min.freq = 1)
+countbycolumn(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
 \item{min.freq}{}
@@ -17,6 +17,6 @@ Count Bycol
 }
 \examples{
 \dontrun{
-count_bycol()
+countbycolumn()
 }
 }
diff --git a/man/elements2words.Rd b/man/elements2words.Rd
deleted file mode 100644
index 80fcbafb..00000000
--- a/man/elements2words.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{elements2words}
-\alias{elements2words}
-\title{Elements 2 Words}
-\usage{
-elements2words(prot, column = "DomArch", conversion_type = "da2doms")
-}
-\arguments{
-\item{prot}{\link{dataframe}}
-
-\item{column}{\link{string} column name}
-
-\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
-domains. 'gc2da' genomic context to domain architectures}
-}
-\value{
-\link{string} with words delimited by spaces
-}
-\description{
-Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
-context (GC)
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
-}
-
-}
diff --git a/man/filter_by_doms.Rd b/man/filterbydomains.Rd
similarity index 84%
rename from man/filter_by_doms.Rd
rename to man/filterbydomains.Rd
index cfe255ca..7fd148e7 100644
--- a/man/filter_by_doms.Rd
+++ b/man/filterbydomains.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{filter_by_doms}
-\alias{filter_by_doms}
+\name{filterbydomains}
+\alias{filterbydomains}
 \title{Filter by Domains}
 \usage{
-filter_by_doms(
+filterbydomains(
   prot,
   column = "DomArch",
   doms_keep = c(),
@@ -28,7 +28,7 @@ observation to be kept}
 Filtered data frame
 }
 \description{
-filter_by_doms filters a data frame by identifying exact domain matches
+filterbydomains filters a data frame by identifying exact domain matches
 and either keeping or removing rows with the identified domain
 }
 \note{
@@ -36,7 +36,7 @@ There is no need to make the domains 'regex safe', that will be handled by this
 }
 \examples{
 \dontrun{
-filter_by_doms()
+filterbydomains()
 }
 }
 \author{
diff --git a/man/filter_freq.Rd b/man/filterbyfrequency.Rd
similarity index 71%
rename from man/filter_freq.Rd
rename to man/filterbyfrequency.Rd
index ce4db5ac..d219a100 100644
--- a/man/filter_freq.Rd
+++ b/man/filterbyfrequency.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{filter_freq}
-\alias{filter_freq}
+\name{filterbyfrequency}
+\alias{filterbyfrequency}
 \title{Filter Frequency}
 \usage{
-filter_freq(x, min.freq)
+filterbyfrequency(x, min.freq)
 }
 \arguments{
 \item{min.freq}{}
@@ -17,6 +17,6 @@ Filter Frequency
 }
 \examples{
 \dontrun{
-filter_freq()
+filterbyfrequency()
 }
 }
diff --git a/man/find_paralogs.Rd b/man/findparalogs.Rd
similarity index 84%
rename from man/find_paralogs.Rd
rename to man/findparalogs.Rd
index fbf14384..7e985fe5 100644
--- a/man/find_paralogs.Rd
+++ b/man/findparalogs.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{find_paralogs}
-\alias{find_paralogs}
+\name{findparalogs}
+\alias{findparalogs}
 \title{Find Paralogs}
 \usage{
-find_paralogs(prot)
+findparalogs(prot)
 }
 \arguments{
 \item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
@@ -21,6 +21,6 @@ column names.
 }
 \examples{
 \dontrun{
-find_paralogs(pspa)
+findparalogs(pspa)
 }
 }
diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd
deleted file mode 100644
index 34c9f84d..00000000
--- a/man/summ.GC.byDALin.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byDALin}
-\alias{summ.GC.byDALin}
-\title{summ.GC.byDALin}
-\usage{
-summ.GC.byDALin(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-summ.GC.byDALin
-}
-\examples{
-\dontrun{
-summ.GC.byDALin
-}
-}
diff --git a/man/summ.GC.byLin.Rd b/man/summ.GC.byLin.Rd
deleted file mode 100644
index df2a8fb8..00000000
--- a/man/summ.GC.byLin.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byLin}
-\alias{summ.GC.byLin}
-\title{summ.GC.byLin}
-\usage{
-summ.GC.byLin(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summ.GC.byLin
-}
-\examples{
-\dontrun{
-summ.GC.byLin()
-}
-}
diff --git a/man/summ.DA.Rd b/man/summarizeDomArch.Rd
similarity index 70%
rename from man/summ.DA.Rd
rename to man/summarizeDomArch.Rd
index 13717140..11db1afa 100644
--- a/man/summ.DA.Rd
+++ b/man/summarizeDomArch.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summ.DA}
-\alias{summ.DA}
-\title{summ.DA}
+\name{summarizeDomArch}
+\alias{summarizeDomArch}
+\title{summarizeDomArch}
 \usage{
-summ.DA(x)
+summarizeDomArch(x)
 }
 \arguments{
 \item{x}{}
@@ -17,6 +17,6 @@ Function to retrieve counts of how many lineages a DomArch appears in
 }
 \examples{
 \dontrun{
-summ.DA()
+summarizeDomArch()
 }
 }
diff --git a/man/summ.DA.byLin.Rd b/man/summarizeDomArch_ByLineage.Rd
similarity index 62%
rename from man/summ.DA.byLin.Rd
rename to man/summarizeDomArch_ByLineage.Rd
index 66555fd5..cf5fac22 100644
--- a/man/summ.DA.byLin.Rd
+++ b/man/summarizeDomArch_ByLineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summ.DA.byLin}
-\alias{summ.DA.byLin}
-\title{summ.DA.byLin}
+\name{summarizeDomArch_ByLineage}
+\alias{summarizeDomArch_ByLineage}
+\title{summarizeDomArch_ByLineage}
 \usage{
-summ.DA.byLin(x)
+summarizeDomArch_ByLineage(x)
 }
 \arguments{
 \item{x}{}
@@ -17,6 +17,6 @@ Function to summarize and retrieve counts by Domains & Domains+Lineage
 }
 \examples{
 \dontrun{
-summ.DA.byLin()
+summarizeDomArch_ByLineage()
 }
 }
diff --git a/man/summ.GC.Rd b/man/summarizeGenContext.Rd
similarity index 57%
rename from man/summ.GC.Rd
rename to man/summarizeGenContext.Rd
index fa52a6bf..5a40811b 100644
--- a/man/summ.GC.Rd
+++ b/man/summarizeGenContext.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summ.GC}
-\alias{summ.GC}
-\title{summ.GC}
+\name{summarizeGenContext}
+\alias{summarizeGenContext}
+\title{summarizeGenContext}
 \usage{
-summ.GC(x)
+summarizeGenContext(x)
 }
 \arguments{
 \item{x}{}
@@ -13,10 +13,10 @@ summ.GC(x)
 Describe return, in detail
 }
 \description{
-summ.GC
+summarizeGenContext
 }
 \examples{
 \dontrun{
-summ.GC()
+summarizeGenContext()
 }
 }
diff --git a/man/summarizeGenContext_ByDomArchLineage.Rd b/man/summarizeGenContext_ByDomArchLineage.Rd
new file mode 100644
index 00000000..59e0376e
--- /dev/null
+++ b/man/summarizeGenContext_ByDomArchLineage.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{summarizeGenContext_ByDomArchLineage}
+\alias{summarizeGenContext_ByDomArchLineage}
+\title{summarizeGenContext_ByDomArchLineage}
+\usage{
+summarizeGenContext_ByDomArchLineage(x)
+}
+\arguments{
+\item{x}{}
+}
+\value{
+Define return, in detail
+}
+\description{
+summarizeGenContext_ByDomArchLineage
+}
+\examples{
+\dontrun{
+summarizeGenContext_ByDomArchLineage
+}
+}
diff --git a/man/summarizeGenContext_ByLineage.Rd b/man/summarizeGenContext_ByLineage.Rd
new file mode 100644
index 00000000..932fe6a7
--- /dev/null
+++ b/man/summarizeGenContext_ByLineage.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{summarizeGenContext_ByLineage}
+\alias{summarizeGenContext_ByLineage}
+\title{summarizeGenContext_ByLineage}
+\usage{
+summarizeGenContext_ByLineage(x)
+}
+\arguments{
+\item{x}{}
+}
+\value{
+Describe return, in detail
+}
+\description{
+summarizeGenContext_ByLineage
+}
+\examples{
+\dontrun{
+summarizeGenContext_ByLineage()
+}
+}
diff --git a/man/summarize_bylin.Rd b/man/summarizebylineage.Rd
similarity index 68%
rename from man/summarize_bylin.Rd
rename to man/summarizebylineage.Rd
index a94c54c1..34d72c37 100644
--- a/man/summarize_bylin.Rd
+++ b/man/summarizebylineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summarize_bylin}
-\alias{summarize_bylin}
+\name{summarizebylineage}
+\alias{summarizebylineage}
 \title{Summarize by Lineage}
 \usage{
-summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
+summarizebylineage(prot = "prot", column = "DomArch", by = "Lineage", query)
 }
 \arguments{
 \item{query}{}
@@ -19,7 +19,7 @@ Summarize by Lineage
 \dontrun{
 library(tidyverse)
 tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarize_bylin(query = "all")
+    summarizebylineage(query = "all")
 }
 
 }
diff --git a/man/total_counts.Rd b/man/totalgencontextordomarchcounts.Rd
similarity index 84%
rename from man/total_counts.Rd
rename to man/totalgencontextordomarchcounts.Rd
index 49db8822..aa8697ee 100644
--- a/man/total_counts.Rd
+++ b/man/totalgencontextordomarchcounts.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{total_counts}
-\alias{total_counts}
+\name{totalgencontextordomarchcounts}
+\alias{totalgencontextordomarchcounts}
 \title{Total Counts}
 \usage{
-total_counts(
+totalgencontextordomarchcounts(
   prot,
   column = "DomArch",
   lineage_col = "Lineage",
@@ -37,6 +37,6 @@ column names.
 }
 \examples{
 \dontrun{
-total_counts(pspa - gc_lin_counts, 0, "GC")
+totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC")
 }
 }
diff --git a/man/words2wc.Rd b/man/words2wordcounts.Rd
similarity index 77%
rename from man/words2wc.Rd
rename to man/words2wordcounts.Rd
index 1eba5dc4..69f30c5d 100644
--- a/man/words2wc.Rd
+++ b/man/words2wordcounts.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{words2wc}
-\alias{words2wc}
+\name{words2wordcounts}
+\alias{words2wordcounts}
 \title{Words 2 Word Counts}
 \usage{
-words2wc(string)
+words2wordcounts(string)
 }
 \arguments{
 \item{string}{}
@@ -18,8 +18,8 @@ Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
 \examples{
 \dontrun{
 tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2words() |>
-    words2wc()
+    elements2Words() |>
+    words2wordcounts()
 }
 
 }

From adf6903e71517a48540f4c360df45b0a4067455d Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Tue, 8 Oct 2024 15:59:04 +0100
Subject: [PATCH 22/33] refactor function names in R/job_status_emails.R

---
 NAMESPACE                                     |  8 ++--
 R/job_status_emails.R                         | 20 +++++-----
 man/addlineage.Rd                             | 40 -------------------
 ..._results_url.Rd => createJobResultsURL.Rd} |  6 +--
 ...sage.Rd => createJobStatusEmailMessage.Rd} | 16 +++++---
 ..._job_args.Rd => formatJobArgumentsHTML.Rd} |  8 ++--
 ..._status_email.Rd => sendJobStatusEmail.Rd} | 12 ++----
 7 files changed, 35 insertions(+), 75 deletions(-)
 delete mode 100644 man/addlineage.Rd
 rename man/{make_job_results_url.Rd => createJobResultsURL.Rd} (89%)
 rename man/{get_job_message.Rd => createJobStatusEmailMessage.Rd} (75%)
 rename man/{format_job_args.Rd => formatJobArgumentsHTML.Rd} (73%)
 rename man/{send_job_status_email.Rd => sendJobStatusEmail.Rd} (84%)

diff --git a/NAMESPACE b/NAMESPACE
index da443880..dd547990 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -37,6 +37,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createJobResultsURL)
+export(createJobStatusEmailMessage)
 export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)
@@ -47,7 +49,7 @@ export(filter_by_doms)
 export(filter_freq)
 export(find_paralogs)
 export(find_top_acc)
-export(format_job_args)
+export(formatJobArgumentsHTML)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_all_aln2fa)
@@ -55,7 +57,6 @@ export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
 export(get_accnums_from_fasta_file)
-export(get_job_message)
 export(get_proc_medians)
 export(get_proc_weights)
 export(ipr2viz)
@@ -65,7 +66,6 @@ export(lineage.Query.plot)
 export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
-export(make_job_results_url)
 export(make_opts2procs)
 export(mapAcc2Name)
 export(map_acc2name)
@@ -85,7 +85,7 @@ export(reverse_operon)
 export(run_deltablast)
 export(run_rpsblast)
 export(selectLongestDuplicate)
-export(send_job_status_email)
+export(sendJobStatusEmail)
 export(shorten_lineage)
 export(sinkReset)
 export(stacked_lin_plot)
diff --git a/R/job_status_emails.R b/R/job_status_emails.R
index 4a96459a..6b1412e9 100644
--- a/R/job_status_emails.R
+++ b/R/job_status_emails.R
@@ -7,7 +7,7 @@
 # 1. source("job_status_emails.R")
 # 2. call:
 #     # event_type can be 'start' or 'end'
-#     send_job_status_email(notify_email, job_dir, pin_id, event_type)
+#     sendJobStatusEmail(notify_email, job_dir, pin_id, event_type)
 # Return
 #   unfortunately, there is no return value for the underlying sendmailR methods
 #
@@ -25,7 +25,7 @@
 #' @return the URL where the user can check the status of their job
 #' @export
 #'
-make_job_results_url <- function(
+createJobResultsURL <- function(
         pin_id,
         base_url = Sys.getenv("BASE_URL", unset = "http://jravilab.org/molevolvr/")) {
     return(paste0(base_url, "?r=", pin_id, "&p=home"))
@@ -91,9 +91,9 @@ make_job_results_url <- function(
 #'
 #' @examples
 #' \dontrun{
-#' format_job_args("/data/scratch/janani/molevolvr_out/Ba5sV1_full")
+#' formatJobArgumentsHTML("/data/scratch/janani/molevolvr_out/Ba5sV1_full")
 #' }
-format_job_args <- function(job_args) {
+formatJobArgumentsHTML <- function(job_args) {
     # format job arguments into html-formatted key/value pairs
     job_args_list <- tags$ul(lapply(names(job_args), function(key) {
         # look up human labels for field names, values, if available
@@ -146,7 +146,7 @@ format_job_args <- function(job_args) {
 }
 
 #' Produces a mail message that can be sent to a user when their job is accepted.
-#' Used by the send_job_status_email() method.
+#' Used by the sendJobStatusEmail() method.
 #'
 #' @param job_dir
 #' the directory where the job's arguments are stored, in job_args.yml
@@ -166,11 +166,11 @@ format_job_args <- function(job_args) {
 #' @return
 #' the result of the sendmailR::sendmail() call
 #' @export
-get_job_message <- function(job_dir, pin_id, job_results_url, event_type, context) {
+createJobStatusEmailMessage <- function(job_dir, pin_id, job_results_url, event_type, context) {
     # pull the set of args written to dir/job_args.yml, so we
     # can send it in the email
     job_args <- yaml::read_yaml(file.path(job_dir, "job_args.yml"))
-    job_args_list <- format_job_args(job_args)
+    job_args_list <- formatJobArgumentsHTML(job_args)
 
     # determine which template to use based on the event type
     if (event_type == "start") {
@@ -217,7 +217,7 @@ get_job_message <- function(job_dir, pin_id, job_results_url, event_type, contex
 #' @return
 #' the result of the sendmailR::sendmail() call
 #' @export
-send_job_status_email <- function(notify_email, job_dir, pin_id, event_type, context = NULL) {
+sendJobStatusEmail <- function(notify_email, job_dir, pin_id, event_type, context = NULL) {
     # -------------------------------------------------
     # --- step 1. build the email subject and contents
     # -------------------------------------------------
@@ -232,10 +232,10 @@ send_job_status_email <- function(notify_email, job_dir, pin_id, event_type, con
     }
 
     # construct the job results URL from the pin_id
-    job_results_url <- make_job_results_url(pin_id)
+    job_results_url <- createJobResultsURL(pin_id)
 
     # produce a formatted email message from the arguments and template
-    message <- get_job_message(
+    message <- createJobStatusEmailMessage(
         job_dir, pin_id, job_results_url, event_type, context
     )
 
diff --git a/man/addlineage.Rd b/man/addlineage.Rd
deleted file mode 100644
index 6694e94c..00000000
--- a/man/addlineage.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{addLineage}
-\alias{addLineage}
-\title{addLineage}
-\usage{
-addLineage(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-addLineage(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-}
-\arguments{
-\item{plan}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-addLineage
-
-addLineage
-}
-\examples{
-\dontrun{
-addLineage()
-}
-}
diff --git a/man/make_job_results_url.Rd b/man/createJobResultsURL.Rd
similarity index 89%
rename from man/make_job_results_url.Rd
rename to man/createJobResultsURL.Rd
index 77f7bda5..02407f15 100644
--- a/man/make_job_results_url.Rd
+++ b/man/createJobResultsURL.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/job_status_emails.R
-\name{make_job_results_url}
-\alias{make_job_results_url}
+\name{createJobResultsURL}
+\alias{createJobResultsURL}
 \title{Given a pin_id, returns the URL where the user can check the status of their job}
 \usage{
-make_job_results_url(
+createJobResultsURL(
   pin_id,
   base_url = Sys.getenv("BASE_URL", unset = "http://jravilab.org/molevolvr/")
 )
diff --git a/man/get_job_message.Rd b/man/createJobStatusEmailMessage.Rd
similarity index 75%
rename from man/get_job_message.Rd
rename to man/createJobStatusEmailMessage.Rd
index 0c7ee8f2..1f779793 100644
--- a/man/get_job_message.Rd
+++ b/man/createJobStatusEmailMessage.Rd
@@ -1,11 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/job_status_emails.R
-\name{get_job_message}
-\alias{get_job_message}
+\name{createJobStatusEmailMessage}
+\alias{createJobStatusEmailMessage}
 \title{Produces a mail message that can be sent to a user when their job is accepted.
-Used by the send_job_status_email() method.}
+Used by the sendJobStatusEmail() method.}
 \usage{
-get_job_message(job_dir, pin_id, job_results_url, event_type, context)
+createJobStatusEmailMessage(
+  job_dir,
+  pin_id,
+  job_results_url,
+  event_type,
+  context
+)
 }
 \arguments{
 \item{job_dir}{the directory where the job's arguments are stored, in job_args.yml}
@@ -23,5 +29,5 @@ the result of the sendmailR::sendmail() call
 }
 \description{
 Produces a mail message that can be sent to a user when their job is accepted.
-Used by the send_job_status_email() method.
+Used by the sendJobStatusEmail() method.
 }
diff --git a/man/format_job_args.Rd b/man/formatJobArgumentsHTML.Rd
similarity index 73%
rename from man/format_job_args.Rd
rename to man/formatJobArgumentsHTML.Rd
index 7af96750..371fb6f1 100644
--- a/man/format_job_args.Rd
+++ b/man/formatJobArgumentsHTML.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/job_status_emails.R
-\name{format_job_args}
-\alias{format_job_args}
+\name{formatJobArgumentsHTML}
+\alias{formatJobArgumentsHTML}
 \title{Format job arguments into html-formatted key/value pairs, for including
 in an email}
 \usage{
-format_job_args(job_args)
+formatJobArgumentsHTML(job_args)
 }
 \arguments{
 \item{job_args}{a list of job arguments, e.g. as read from the job_args.yml file}
@@ -19,6 +19,6 @@ in an email
 }
 \examples{
 \dontrun{
-format_job_args("/data/scratch/janani/molevolvr_out/Ba5sV1_full")
+formatJobArgumentsHTML("/data/scratch/janani/molevolvr_out/Ba5sV1_full")
 }
 }
diff --git a/man/send_job_status_email.Rd b/man/sendJobStatusEmail.Rd
similarity index 84%
rename from man/send_job_status_email.Rd
rename to man/sendJobStatusEmail.Rd
index 4357687a..b53f652a 100644
--- a/man/send_job_status_email.Rd
+++ b/man/sendJobStatusEmail.Rd
@@ -1,17 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/job_status_emails.R
-\name{send_job_status_email}
-\alias{send_job_status_email}
+\name{sendJobStatusEmail}
+\alias{sendJobStatusEmail}
 \title{Sends a "job accepted" email to a user when their job is accepted,
 including details about the job submission and how to check its status.}
 \usage{
-send_job_status_email(
-  notify_email,
-  job_dir,
-  pin_id,
-  event_type,
-  context = NULL
-)
+sendJobStatusEmail(notify_email, job_dir, pin_id, event_type, context = NULL)
 }
 \arguments{
 \item{notify_email}{the email address to send the notification to}

From acd7f8ca1477b7a9d1bb54bf8615eb6947baf80d Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 08:59:13 -0600
Subject: [PATCH 23/33] adjust NAMESPACE order

---
 NAMESPACE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 25c4acc5..bbc2bff2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -37,9 +37,9 @@ export(convert_aln2fa)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createFA2Tree)
 export(createWordCloud2Element)
 export(createWordCloudElement)
-export(createFA2Tree)
 export(create_all_col_params)
 export(create_lineage_lookup)
 export(create_one_col_params)

From 04859e588d3e7fe0ec7e34f5c26b71ebd44ea002 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 09:42:06 -0600
Subject: [PATCH 24/33] update NAMESPACE

---
 NAMESPACE | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 9bfd643a..2cffa30e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,12 +34,9 @@ export(convertAlignment2Trees)
 export(convertFA2Tree)
 export(convert_aln2fa)
 export(count_bycol)
-export(count_to_sunburst)
-export(count_to_treemap)
 export(createFA2Tree)
 export(createWordCloud2Element)
 export(createWordCloudElement)
-export(create_all_col_params)
 export(create_lineage_lookup)
 export(domain_network)
 export(efetchIPG)
@@ -72,9 +69,9 @@ export(plotLineageNeighbors)
 export(plotLineageQuery)
 export(plotLineageSunburst)
 export(plotStackedLineage)
-export(plotUpSet)
 export(plotSunburst)
 export(plotTreemap)
+export(plotUpSet)
 export(plot_estimated_walltimes)
 export(prepareColumnParams)
 export(prepareSingleColumnParams)
@@ -103,9 +100,7 @@ export(summarize_bylin)
 export(theme_genes2)
 export(to_titlecase)
 export(total_counts)
-export(upset.plot)
 export(validateCountDF)
-export(wordcloud2_element)
 export(wordcloud3)
 export(write.MsaAAMultipleAlignment)
 export(write_proc_medians_table)

From c4c8cfb16b7b2b79444bdcfcc4d05e591393649a Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 09:49:39 -0600
Subject: [PATCH 25/33] keep original ref

---
 R/plotme.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/plotme.R b/R/plotme.R
index 6cbeb3d0..906e85ec 100644
--- a/R/plotme.R
+++ b/R/plotme.R
@@ -1,4 +1,4 @@
-# Taken from https://github.com/yogevherz/plotme/blob/master/R/plotSunburst_treemap.R
+# Taken from https://github.com/yogevherz/plotme/blob/master/R/count_to_sunburst_treemap.R
 #' Create an interactive plotly from count data
 #'
 #' @description

From 00b376b9a0bfc613b98470e89be7222c4781378f Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 09:58:56 -0600
Subject: [PATCH 26/33] add missing .Rd

---
 man/downloadAssemblySummary.Rd | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 man/downloadAssemblySummary.Rd

diff --git a/man/downloadAssemblySummary.Rd b/man/downloadAssemblySummary.Rd
new file mode 100644
index 00000000..636af878
--- /dev/null
+++ b/man/downloadAssemblySummary.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lineage.R
+\name{downloadAssemblySummary}
+\alias{downloadAssemblySummary}
+\title{Download the combined assembly summaries of genbank and refseq}
+\usage{
+downloadAssemblySummary(
+  outpath,
+  keep = c("assembly_accession", "taxid", "species_taxid", "organism_name")
+)
+}
+\arguments{
+\item{outpath}{String of path where the assembly summary file should be written}
+
+\item{keep}{Character vector containing which columns should be retained and downloaded}
+}
+\description{
+Download the combined assembly summaries of genbank and refseq
+}
+\author{
+Samuel Chen, Janani Ravi
+}

From acc5c6dd8045afc6691958132e029cf3e7c04f3b Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 10:03:20 -0600
Subject: [PATCH 27/33] resolve merge conflicts from incoming PRs

---
 man/acc2Lineage.Rd |  2 +-
 man/acc2lin.Rd     | 56 ----------------------------------------------
 man/addlineage.Rd  | 40 ---------------------------------
 3 files changed, 1 insertion(+), 97 deletions(-)
 delete mode 100644 man/acc2lin.Rd
 delete mode 100644 man/addlineage.Rd

diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index d632c52e..a24bdc9a 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -24,7 +24,7 @@ acc2Lineage(
 \item{accessions}{Character vector of protein accessions}
 
 \item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
+This file can be generated using the "downloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the}
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index fd393d43..00000000
--- a/man/acc2lin.Rd
+++ /dev/null
@@ -1,56 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{acc2lin}
-\alias{acc2lin}
-\title{acc2lin}
-\usage{
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-}
-\arguments{
-\item{accessions}{Character vector of protein accessions}
-
-\item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "downloadAssemblySummary()" function}
-
-\item{lineagelookup_path}{String of the path to the lineage lookup file
-(taxid to lineage mapping). This file can be generated using the}
-
-\item{ipgout_path}{Path to write the results of the efetch run of the accessions
-on the ipg database. If NULL, the file will not be written. Defaults to NULL}
-
-\item{plan}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-
-Function to map protein accession numbers to lineage
-
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-}
-\examples{
-\dontrun{
-acc2lin()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/addlineage.Rd b/man/addlineage.Rd
deleted file mode 100644
index 6694e94c..00000000
--- a/man/addlineage.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{addLineage}
-\alias{addLineage}
-\title{addLineage}
-\usage{
-addLineage(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-addLineage(
-  df,
-  acc_col = "AccNum",
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-}
-\arguments{
-\item{plan}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-addLineage
-
-addLineage
-}
-\examples{
-\dontrun{
-addLineage()
-}
-}

From 919d60bbc075c81e3ba136768d8f7b98a79a84cc Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 10:17:52 -0600
Subject: [PATCH 28/33] update dependent .Rd referencing
 downloadAssemblySummary()

---
 R/acc2lin.R        |  4 ++--
 man/IPG2Lineage.Rd |  2 +-
 man/addLineage.Rd  | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 man/addLineage.Rd

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 73aca0f4..40d190a6 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -73,7 +73,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #'
 #' @param accessions Character vector of protein accessions
 #' @param assembly_path String of the path to the assembly_summary path
-#' This file can be generated using the "DownloadAssemblySummary()" function
+#' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' @param ipgout_path Path to write the results of the efetch run of the accessions
@@ -179,7 +179,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' ipg database. The protein accession in 'accessions' should be contained in this
 #' file
 #' @param assembly_path String of the path to the assembly_summary path
-#' This file can be generated using the "DownloadAssemblySummary()" function
+#' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
 #' "create_lineage_lookup()" function
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index cf3e635e..e24ab617 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -32,7 +32,7 @@ file}
 "create_lineage_lookup()" function}
 
 \item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
+This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function}
 }
 \value{
 Describe return, in detail
diff --git a/man/addLineage.Rd b/man/addLineage.Rd
new file mode 100644
index 00000000..6694e94c
--- /dev/null
+++ b/man/addLineage.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/acc2lin.R, R/lineage.R
+\name{addLineage}
+\alias{addLineage}
+\title{addLineage}
+\usage{
+addLineage(
+  df,
+  acc_col = "AccNum",
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+
+addLineage(
+  df,
+  acc_col = "AccNum",
+  assembly_path,
+  lineagelookup_path,
+  ipgout_path = NULL,
+  plan = "multicore"
+)
+}
+\arguments{
+\item{plan}{}
+}
+\value{
+Describe return, in detail
+}
+\description{
+addLineage
+
+addLineage
+}
+\examples{
+\dontrun{
+addLineage()
+}
+}

From 6d3c23b06e04c5c135ce4d021f31dc6477a06bf5 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 10:22:31 -0600
Subject: [PATCH 29/33] reference new function name in acc2lin.R

---
 R/acc2lin.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 40d190a6..1984ec3c 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -201,7 +201,7 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
 
     ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID")
 
-    lins <- GCA2Lins(prot_data = ipg_dt, assembly_path, lineagelookup_path)
+    lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path)
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)

From 9045b7ca78631f61523bd003b430aff7394362fd Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 10:38:33 -0600
Subject: [PATCH 30/33] add missing .Rd

---
 man/elements2Words.Rd | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 man/elements2Words.Rd

diff --git a/man/elements2Words.Rd b/man/elements2Words.Rd
new file mode 100644
index 00000000..1094d363
--- /dev/null
+++ b/man/elements2Words.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{elements2Words}
+\alias{elements2Words}
+\title{Elements 2 Words}
+\usage{
+elements2Words(prot, column = "DomArch", conversion_type = "da2doms")
+}
+\arguments{
+\item{prot}{\link{dataframe}}
+
+\item{column}{\link{string} column name}
+
+\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
+domains. 'gc2da' genomic context to domain architectures}
+}
+\value{
+\link{string} with words delimited by spaces
+}
+\description{
+Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
+context (GC)
+}
+\examples{
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words()
+}
+
+}

From c83e89c7f7e0e2bd7821ab7bc199a4ffa791d246 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 10:41:47 -0600
Subject: [PATCH 31/33] let R sort NAMESPACE

---
 NAMESPACE | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index de6af7c0..51ef791b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,9 +32,9 @@ export(convertAlignment2FA)
 export(convertAlignment2Trees)
 export(convertFA2Tree)
 export(convert_aln2fa)
+export(countbycolumn)
 export(createFA2Tree)
 export(createWordCloud2Element)
-export(countbycolumn)
 export(createWordCloudElement)
 export(create_lineage_lookup)
 export(domain_network)
@@ -77,7 +77,6 @@ export(prepareColumnParams)
 export(prepareSingleColumnParams)
 export(proteinAcc2TaxID)
 export(proteinAcc2TaxID_old)
-export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From 490e542b2857a44b1aa82d6f6c696ed7a20ecd2e Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 11:08:27 -0600
Subject: [PATCH 32/33] updates - adjust casing, RE #53   - match with .Rd -
 update references throughout MolEvolvR pkg

---
 NAMESPACE                             | 12 ++++----
 R/cleanup.R                           |  2 +-
 R/networks_domarch.R                  |  8 ++---
 R/networks_gencontext.R               | 12 ++++----
 R/plotting.R                          | 22 +++++++-------
 R/summarize.R                         | 43 ++++++++++++---------------
 man/countbycolumn.Rd                  | 12 ++++----
 man/filterbydomains.Rd                | 10 +++----
 man/filterbyfrequency.Rd              |  8 ++---
 man/findparalogs.Rd                   |  8 ++---
 man/summarizebylineage.Rd             |  8 ++---
 man/totalgencontextordomarchcounts.Rd |  8 ++---
 man/words2wordcounts.Rd               |  8 ++---
 13 files changed, 78 insertions(+), 83 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 51ef791b..d37a2a60 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,7 +32,7 @@ export(convertAlignment2FA)
 export(convertAlignment2Trees)
 export(convertFA2Tree)
 export(convert_aln2fa)
-export(countbycolumn)
+export(countByColumn)
 export(createFA2Tree)
 export(createWordCloud2Element)
 export(createWordCloudElement)
@@ -41,10 +41,10 @@ export(domain_network)
 export(downloadAssemblySummary)
 export(efetchIPG)
 export(extractAccNum)
-export(filterbydomains)
-export(filterbyfrequency)
+export(filterByDomains)
+export(filterByFrequency)
+export(findParalogs)
 export(find_top_acc)
-export(findparalogs)
 export(format_job_args)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
@@ -91,15 +91,15 @@ export(selectLongestDuplicate)
 export(send_job_status_email)
 export(shortenLineage)
 export(sinkReset)
+export(summarizeByLineage)
 export(summarizeDomArch)
 export(summarizeDomArch_ByLineage)
 export(summarizeGenContext)
 export(summarizeGenContext_ByDomArchLineage)
 export(summarizeGenContext_ByLineage)
-export(summarizebylineage)
 export(theme_genes2)
 export(to_titlecase)
-export(totalgencontextordomarchcounts)
+export(totalGenContextOrDomArchCounts)
 export(validateCountDF)
 export(wordcloud3)
 export(write.MsaAAMultipleAlignment)
diff --git a/R/cleanup.R b/R/cleanup.R
index f82722f2..39b4b8d2 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -561,7 +561,7 @@ cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
     # Remove rows with no domains contained within domains_keep
     # filter(grepl(domains_for_grep, DomArch))
     if (!is.null(domains_keep)) {
-        prot <- prot %>% filter_by_doms(column = new, doms_keep = domains_keep$domains)
+        prot <- prot %>% filterByDomains(column = new, doms_keep = domains_keep$domains)
     }
 
     # ##!! NOT RUN !!
diff --git a/R/networks_domarch.R b/R/networks_domarch.R
index 010b7619..fea0a195 100755
--- a/R/networks_domarch.R
+++ b/R/networks_domarch.R
@@ -54,7 +54,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
         {
             column_name <- sym(column)
 
-            prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
+            prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
 
             # ensure  only Domains that are in the tc cutoff range are kept
             within_list <- prot_tc %>%
@@ -95,7 +95,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
             # cleanup domain list
             domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+")
             # Get a table of domain counts
-            wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc()
+            wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts()
             wc <- pivot_wider(wc, names_from = words, values_from = freq)
 
             # Remove all isolated domarchs, such that an adjacency list can easily be constructed
@@ -262,7 +262,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
 
     column_name <- sym(column)
 
-    prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
+    prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
 
     within_list <- prot_tc %>%
         select({{ column_name }}) %>%
@@ -286,7 +286,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
     domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+")
 
     # Get domain counts before eliminating domarchs with no edges
-    wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc()
+    wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts()
 
     nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq) %>%
         mutate(group = purrr::map(
diff --git a/R/networks_gencontext.R b/R/networks_gencontext.R
index 6b703146..e0dd63da 100755
--- a/R/networks_gencontext.R
+++ b/R/networks_gencontext.R
@@ -46,11 +46,11 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere
     column_name <- sym(column)
     if (cutoff_type == "Lineage") {
         lin_summary <- prot %>%
-            summ.DA.byLin() %>%
-            summ.DA()
+            summarizeDomArch_ByLineage() %>%
+            summarizeDomArch()
         doms_above_cutoff <- (lin_summary %>% filter(totallin >= cutoff))[[column]]
     } else if (cutoff_type == "Total Count") { # Change this type?
-        GC_above_cutoff <- (prot %>% total_counts(column = column, cutoff = cutoff))[[column]]
+        GC_above_cutoff <- (prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff))[[column]]
     }
 
     prot <- prot[which(prot[[as_string(column_name)]] %in% GC_above_cutoff), ]
@@ -153,8 +153,8 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
     column_name <- sym(column)
 
 
-    # Perform cutoff through total_counts
-    prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff)
+    # Perform cutoff through totalGenContextOrDomArchCounts
+    prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff)
 
     within_list <- prot_tc %>%
         select({{ column_name }}) %>%
@@ -218,7 +218,7 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
     }
 
     # Get domain counts before eliminating domarchs with no edges
-    wc <- elements2words(prot = prot, column = column, conversion_type = "gc2da") %>% words2wc()
+    wc <- elements2Words(prot = prot, column = column, conversion_type = "gc2da") %>% words2WordCounts()
     nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq)
 
     max_size <- max(nodes$size)
diff --git a/R/plotting.R b/R/plotting.R
index da95ea5f..5c8de823 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -108,7 +108,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
     # colname = string(colname)
     tryCatch(
         {
-            tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5)
+            tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5)
             ##### Remove Tails ####
             # tails comprise of less than 1% of data each
             # ie) individual percent is less than 1
@@ -128,7 +128,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
             words.tc <- tc %>%
                 select({{ column }}) %>%
                 distinct() %>%
-                elements2words(column = colname, conversion_type = type)
+                elements2Words(column = colname, conversion_type = type)
             # names(words.tc)[1] <- "words"
             words.tc <- words.tc %>% str_split(pattern = " ")
             words.tc <- as.data.frame(words.tc, col.names = "Words", stringsAsFactors = F) %>%
@@ -273,7 +273,7 @@ plotLineageDA <- function(query_data = "prot",
 
     query_data <- shortenLineage(query_data, "Lineage", abr_len = 1)
 
-    query.summ.byLin <- query_data %>% total_counts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff)
+    query.summ.byLin <- query_data %>% totalGenContextOrDomArchCounts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff)
 
     query.summ.byLin$Lineage <- map(query.summ.byLin$Lineage, function(x) str_replace_all(string = x, pattern = ">", replacement = "_")) %>%
         unlist()
@@ -390,7 +390,7 @@ plotLineageQuery <- function(query_data = all,
     }
     col <- sym(colname)
 
-    query_data <- query_data %>% total_counts(column = colname, cutoff = cutoff)
+    query_data <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff)
     # query_data contains all rows that possess a lineage
     query_data <- query_data %>% filter(grepl("a", Lineage))
 
@@ -831,7 +831,7 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
         prot <- shortenLineage(prot, Lineage_col, abr_len = 3)
     }
 
-    total_count <- total_counts(prot, column, cutoff, lineage_col = Lineage_col)
+    total_count <- totalGenContextOrDomArchCounts(prot, column, cutoff, lineage_col = Lineage_col)
     # total_count = prot
 
     # Order bars by descending freq
@@ -1054,7 +1054,7 @@ createWordCloudElement <- function(query_data = "prot",
     colname = "DomArch",
     cutoff = 70,
     UsingRowsCutoff = FALSE) {
-    tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
+    tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
 
     column <- sym(colname)
     # Get words from filter
@@ -1069,11 +1069,11 @@ createWordCloudElement <- function(query_data = "prot",
     }
 
     words.tc <- query_data %>%
-        elements2words(
+        elements2Words(
             column = colname,
             conversion_type = type
         ) %>%
-        words2wc()
+        words2WordCounts()
 
     # names(words.tc) <- c("words", "freq")
 
@@ -1134,7 +1134,7 @@ createWordCloud2Element <- function(query_data = "prot",
     # @param type Character. Default is "da2doms" for Domain Architectures.
     # Other alternative: "gc2da" for Genomic Contexts.
 
-    tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
+    tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
 
     column <- sym(colname)
     query_data <- query_data %>% filter({{ column }} %in% pull(tc, {{ colname }}))
@@ -1146,11 +1146,11 @@ createWordCloud2Element <- function(query_data = "prot",
     }
 
     words.tc <- query_data %>%
-        elements2words(
+        elements2Words(
             column = colname,
             conversion_type = type
         ) %>%
-        words2wc()
+        words2WordCounts()
 
     names(words.tc) <- c("words", "freq")
 
diff --git a/R/summarize.R b/R/summarize.R
index d2cef471..e0dae1c4 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -13,7 +13,7 @@
 #' Filter by Domains
 #'
 #' @author Samuel Chen, Janani Ravi
-#' @description filterbydomains filters a data frame by identifying exact domain matches
+#' @description filterByDomains filters a data frame by identifying exact domain matches
 #' and either keeping or removing rows with the identified domain
 #'
 #' @param prot Dataframe to filter
@@ -33,9 +33,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' filterbydomains()
+#' filterByDomains()
 #' }
-filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
+filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
     ignore.case = FALSE) {
     # Only rows with a domain in doms_keep will be kept
     # Any row containing a domain in doms_remove will be removed
@@ -89,7 +89,7 @@ filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 ## Before/after break up ##
 ###########################
 ## Function to obtain element counts (DA, GC)
-#' Count Bycol
+#' Count By Column
 #'
 #' @param prot
 #' @param column
@@ -102,9 +102,9 @@ filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 #'
 #' @examples
 #' \dontrun{
-#' countbycolumn()
+#' countByColumn()
 #' }
-countbycolumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
+countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
     counts <- prot %>%
         select(column) %>%
         table() %>%
@@ -185,10 +185,10 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #' \dontrun{
 #' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
 #'     elements2Words() |>
-#'     words2wordcounts()
+#'     words2WordCounts()
 #' }
 #'
-words2wordcounts <- function(string) {
+words2WordCounts <- function(string) {
     df_word_count <- string %>%
         # reduce spaces with length 2 or greater to a single space
         str_replace_all("\\s{2,}", " ") %>%
@@ -227,9 +227,9 @@ words2wordcounts <- function(string) {
 #'
 #' @examples
 #' \dontrun{
-#' filterbyfrequency()
+#' filterByFrequency()
 #' }
-filterbyfrequency <- function(x, min.freq) {
+filterByFrequency <- function(x, min.freq) {
     x %>%
         filter(freq >= min.freq)
 }
@@ -254,10 +254,10 @@ filterbyfrequency <- function(x, min.freq) {
 #' \dontrun{
 #' library(tidyverse)
 #' tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-#'     summarizebylineage(query = "all")
+#'     summarizeByLineage(query = "all")
 #' }
 #'
-summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage",
+summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage",
     query) {
     column <- sym(column)
     by <- sym(by)
@@ -295,7 +295,6 @@ summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage"
 #' summarizeDomArch_ByLineage()
 #' }
 summarizeDomArch_ByLineage <- function(x) {
-    ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", DomArch)) %>%
         group_by(DomArch, Lineage) %>%
@@ -321,7 +320,6 @@ summarizeDomArch_ByLineage <- function(x) {
 #' summarizeDomArch()
 #' }
 summarizeDomArch <- function(x) {
-    ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(DomArch) %>%
         summarise(totalcount = sum(count), totallin = n()) %>% # totallin=n_distinct(Lineage),
@@ -344,7 +342,6 @@ summarizeDomArch <- function(x) {
 #' summarizeGenContext_ByDomArchLineage
 #' }
 summarizeGenContext_ByDomArchLineage <- function(x) {
-    ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
         filter(!grepl("^-$", DomArch)) %>%
@@ -369,7 +366,6 @@ summarizeGenContext_ByDomArchLineage <- function(x) {
 #' summarizeGenContext_ByLineage()
 #' }
 summarizeGenContext_ByLineage <- function(x) {
-    ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         filter(!grepl("^-$", GenContext)) %>%
         filter(!grepl("^-$", DomArch)) %>%
@@ -394,7 +390,6 @@ summarizeGenContext_ByLineage <- function(x) {
 #' summarizeGenContext()
 #' }
 summarizeGenContext <- function(x) {
-    ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
         group_by(GenContext) %>%
         summarise(
@@ -436,9 +431,9 @@ summarizeGenContext <- function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC")
+#' totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
 #' }
-totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col = "Lineage",
+totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col = "Lineage",
     cutoff = 90, RowsCutoff = FALSE, digits = 2
     # type = "GC"
 ) {
@@ -448,7 +443,7 @@ totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col
         filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>%
         filter({{ column }} != "")
 
-    prot <- summarizebylineage(prot, column, by = lineage_col, query = "all")
+    prot <- summarizeByLineage(prot, column, by = lineage_col, query = "all")
     col_count <- prot %>%
         group_by({{ column }}) %>%
         summarise(totalcount = sum(count))
@@ -598,9 +593,9 @@ totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col
 #'
 #' @examples
 #' \dontrun{
-#' findparalogs(pspa)
+#' findParalogs(pspa)
 #' }
-findparalogs <- function(prot) {
+findParalogs <- function(prot) {
     # Remove eukaryotes
     prot <- prot %>% filter(!grepl("^eukaryota", Lineage))
     paralogTable <- prot %>%
@@ -644,8 +639,8 @@ findparalogs <- function(prot) {
 # 	elements2Words(\"gc2da\")")
 
 
-# ## words2wordcounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
+# ## words2WordCounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
 # cat("Word counts for broken up domains from DAs and DAs from GCs.
 # \nFor e.g.:
 # DA.doms.wc <- query.sub$DA.doms %>%
-#   words2wordcounts()")
+#   words2WordCounts()")
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
index e669a4a3..34fcc3e0 100644
--- a/man/countbycolumn.Rd
+++ b/man/countbycolumn.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{countbycolumn}
-\alias{countbycolumn}
-\title{Count Bycol}
+\name{countByColumn}
+\alias{countByColumn}
+\title{Count By Column}
 \usage{
-countbycolumn(prot = prot, column = "DomArch", min.freq = 1)
+countByColumn(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
 \item{min.freq}{}
@@ -13,10 +13,10 @@ countbycolumn(prot = prot, column = "DomArch", min.freq = 1)
 Describe return, in detail
 }
 \description{
-Count Bycol
+Count By Column
 }
 \examples{
 \dontrun{
-countbycolumn()
+countByColumn()
 }
 }
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
index 7fd148e7..8c885363 100644
--- a/man/filterbydomains.Rd
+++ b/man/filterbydomains.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{filterbydomains}
-\alias{filterbydomains}
+\name{filterByDomains}
+\alias{filterByDomains}
 \title{Filter by Domains}
 \usage{
-filterbydomains(
+filterByDomains(
   prot,
   column = "DomArch",
   doms_keep = c(),
@@ -28,7 +28,7 @@ observation to be kept}
 Filtered data frame
 }
 \description{
-filterbydomains filters a data frame by identifying exact domain matches
+filterByDomains filters a data frame by identifying exact domain matches
 and either keeping or removing rows with the identified domain
 }
 \note{
@@ -36,7 +36,7 @@ There is no need to make the domains 'regex safe', that will be handled by this
 }
 \examples{
 \dontrun{
-filterbydomains()
+filterByDomains()
 }
 }
 \author{
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
index d219a100..d2c5f9cd 100644
--- a/man/filterbyfrequency.Rd
+++ b/man/filterbyfrequency.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{filterbyfrequency}
-\alias{filterbyfrequency}
+\name{filterByFrequency}
+\alias{filterByFrequency}
 \title{Filter Frequency}
 \usage{
-filterbyfrequency(x, min.freq)
+filterByFrequency(x, min.freq)
 }
 \arguments{
 \item{min.freq}{}
@@ -17,6 +17,6 @@ Filter Frequency
 }
 \examples{
 \dontrun{
-filterbyfrequency()
+filterByFrequency()
 }
 }
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
index 7e985fe5..4b5edbcf 100644
--- a/man/findparalogs.Rd
+++ b/man/findparalogs.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{findparalogs}
-\alias{findparalogs}
+\name{findParalogs}
+\alias{findParalogs}
 \title{Find Paralogs}
 \usage{
-findparalogs(prot)
+findParalogs(prot)
 }
 \arguments{
 \item{prot}{A data frame filtered by a Query, containing columns Species and Lineage}
@@ -21,6 +21,6 @@ column names.
 }
 \examples{
 \dontrun{
-findparalogs(pspa)
+findParalogs(pspa)
 }
 }
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
index 34d72c37..2e445913 100644
--- a/man/summarizebylineage.Rd
+++ b/man/summarizebylineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summarizebylineage}
-\alias{summarizebylineage}
+\name{summarizeByLineage}
+\alias{summarizeByLineage}
 \title{Summarize by Lineage}
 \usage{
-summarizebylineage(prot = "prot", column = "DomArch", by = "Lineage", query)
+summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
 }
 \arguments{
 \item{query}{}
@@ -19,7 +19,7 @@ Summarize by Lineage
 \dontrun{
 library(tidyverse)
 tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizebylineage(query = "all")
+    summarizeByLineage(query = "all")
 }
 
 }
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
index aa8697ee..f457cb6a 100644
--- a/man/totalgencontextordomarchcounts.Rd
+++ b/man/totalgencontextordomarchcounts.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{totalgencontextordomarchcounts}
-\alias{totalgencontextordomarchcounts}
+\name{totalGenContextOrDomArchCounts}
+\alias{totalGenContextOrDomArchCounts}
 \title{Total Counts}
 \usage{
-totalgencontextordomarchcounts(
+totalGenContextOrDomArchCounts(
   prot,
   column = "DomArch",
   lineage_col = "Lineage",
@@ -37,6 +37,6 @@ column names.
 }
 \examples{
 \dontrun{
-totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC")
+totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
 }
 }
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
index 69f30c5d..7f60f226 100644
--- a/man/words2wordcounts.Rd
+++ b/man/words2wordcounts.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{words2wordcounts}
-\alias{words2wordcounts}
+\name{words2WordCounts}
+\alias{words2WordCounts}
 \title{Words 2 Word Counts}
 \usage{
-words2wordcounts(string)
+words2WordCounts(string)
 }
 \arguments{
 \item{string}{}
@@ -19,7 +19,7 @@ Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
 \dontrun{
 tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
     elements2Words() |>
-    words2wordcounts()
+    words2WordCounts()
 }
 
 }

From 6ec0b13b9236714726ec0f0a7c60d0963220ef7c Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 8 Oct 2024 11:18:20 -0600
Subject: [PATCH 33/33] update NAMESPACE

---
 NAMESPACE | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 42ce58f9..53332439 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,9 +34,9 @@ export(convertFA2Tree)
 export(convert_aln2fa)
 export(countByColumn)
 export(createFA2Tree)
-export(createWordCloud2Element)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createWordCloud2Element)
 export(createWordCloudElement)
 export(create_lineage_lookup)
 export(domain_network)
@@ -57,7 +57,6 @@ export(get_proc_medians)
 export(get_proc_weights)
 export(ipr2viz)
 export(ipr2viz_web)
-export(make_job_results_url)
 export(make_opts2procs)
 export(mapAcc2Name)
 export(map_acc2name)