From f5dbe30045a7a066daec77fc3e3962030d535e9f Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Sun, 6 Oct 2024 17:16:26 +0100 Subject: [PATCH 01/33] refactor function names in CHANGED-pre-msa-tree.R --- R/CHANGED-pre-msa-tree.R | 60 ++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R index 998e8fbd..ef334232 100644 --- a/R/CHANGED-pre-msa-tree.R +++ b/R/CHANGED-pre-msa-tree.R @@ -1,9 +1,9 @@ ## Pre-requisites to generate MSA and Phylogenetic Tree ## Includes the following functions: -## convert_aln2fa, to_titlecase, add_leaves -## generate_all_aln2fa -## convert_aln2tsv??, convert_accnum2fa?? -## Created from add_leaves.R, convert_aln2fa.R, all_aln2fa.R +## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment +## generateAllAlignments2FA +## convertAlignment2TSV??, convertAccNumber2FA?? +## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R ## Modified: Dec 24, 2019 | Jan 2021 ## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen) @@ -35,7 +35,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE") #' @author Andrie, Janani Ravi #' @description Translate string to Title Case w/ delimitter. #' @aliases totitle, to_title -#' @usage to_titlecase(text, delimitter) +#' @usage convert2TitleCase(text, delimitter) #' @param x Character vector. #' @param y Delimitter. Default is space (" "). #' @seealso chartr, toupper, and tolower. @@ -44,7 +44,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE") #' @export #' #' @examples -to_titlecase <- function(x, y = " ") { +convert2TitleCase <- function(x, y = " ") { s <- strsplit(x, y)[[1]] paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = y @@ -89,9 +89,9 @@ to_titlecase <- function(x, y = " ") { #' #' @examples #' \dontrun{ -#' add_leaves("pspa_snf7.aln", "pspa.txt") +#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt") #' } -add_leaves <- function(aln_file = "", +addLeaves2Alignment <- function(aln_file = "", lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!! # lin_file="data/rawdata_tsv/PspA.txt", reduced = FALSE) { @@ -164,7 +164,7 @@ add_leaves <- function(aln_file = "", # AccNum, sep = "_" )) - temp$Leaf <- map(temp$Leaf, to_titlecase) + temp$Leaf <- map(temp$Leaf, convert2TitleCase) temp <- temp %>% mutate(Leaf_Acc = (paste(Leaf, AccNum, sep = "_"))) @@ -203,7 +203,7 @@ add_leaves <- function(aln_file = "", #' @export #' #' @examples -add_name <- function(data, +addName <- function(data, accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage", lin_sep = ">", out_col = "Name") { cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp") @@ -283,10 +283,10 @@ add_name <- function(data, #' #' @examples #' \dontrun{ -#' add_leaves("pspa_snf7.aln", "pspa.txt") +#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt") #' } #' -convert_aln2fa <- function(aln_file = "", +convertAlignment2FA <- function(aln_file = "", lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!! fa_outpath = "", reduced = FALSE) { @@ -297,7 +297,7 @@ convert_aln2fa <- function(aln_file = "", # fa_outpath="data/alns/pspc.fasta" ## Add leaves - aln <- add_leaves( + aln <- addLeaves2Alignment( aln = aln_file, lin = lin_file, reduced = reduced @@ -320,7 +320,7 @@ convert_aln2fa <- function(aln_file = "", return(fasta) } -#' Default rename_fasta() replacement function. Maps an accession number to its name +#' Default renameFA() replacement function. Maps an accession number to its name #' #' @param line The line of a fasta file starting with '>' #' @param acc2name Data Table containing a column of accession numbers and a name column @@ -335,8 +335,8 @@ convert_aln2fa <- function(aln_file = "", #' @export #' #' @examples -map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") { - # change to be the name equivalent to an add_names column +mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") { + # change to be the name equivalent to an addNames column # Find the first ' ' end_acc <- str_locate(line, " ")[[1]] @@ -364,8 +364,8 @@ map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") #' @export #' #' @examples -rename_fasta <- function(fa_path, outpath, - replacement_function = map_acc2name, ...) { +renameFA <- function(fa_path, outpath, + replacement_function = mapAccession2Name, ...) { lines <- read_lines(fa_path) res <- map(lines, function(x) { if (strtrim(x, 1) == ">") { @@ -381,7 +381,7 @@ rename_fasta <- function(fa_path, outpath, } ################################ -## generate_all_aln2fa +## generateAllAlignments2FA #' Adding Leaves to an alignment file w/ accessions #' #' @keywords alignment, accnum, leaves, lineage, species @@ -408,9 +408,9 @@ rename_fasta <- function(fa_path, outpath, #' #' @examples #' \dontrun{ -#' generate_all_aln2fa() +#' generateAllAlignments2FA() #' } -generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"), +generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"), fa_outpath = here("data/alns/"), lin_file = here("data/rawdata_tsv/all_semiclean.txt"), reduced = F) { @@ -432,7 +432,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"), fa_outpath = paste0(fa_outpath, "/", variable, ".fa") ) pmap( - .l = aln2fa_args, .f = convert_aln2fa, + .l = aln2fa_args, .f = convertAlignment2FA, lin_file = lin_file, reduced = reduced ) @@ -441,7 +441,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"), # accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1") # accessions <- rep("ANY95992.1", 201) -#' acc2fa converts protein accession numbers to a fasta format. +#' acc2FA converts protein accession numbers to a fasta format. #' #' @description #' Resulting fasta file is written to the outpath. @@ -464,11 +464,11 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"), #' #' @examples #' \dontrun{ -#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta") -#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa") -#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa") +#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta") +#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa") +#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa") #' } -acc2fa <- function(accessions, outpath, plan = "sequential") { +acc2FA <- function(accessions, outpath, plan = "sequential") { # validation stopifnot(length(accessions) > 0) @@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) { ################################ -## convert_accnum2fa +## convertAccNumber2FA ####### ## 1 ## ####### @@ -706,9 +706,9 @@ get_accnums_from_fasta_file <- function(fasta_file) { # seqs <- retrieveseqs(seqnames,"swissprot") ################################ -## convert_aln2tsv +## convertAlignment2TSV ## NEEDS FIXING! -# convert_aln2tsv <- function(file_path){ +# convertAlignment2TSV <- function(file_path){ # cfile <- read_delim("data/alignments/pspc.gismo.aln", delim=" ") # cfile <- as.data.frame(map(cfile,function(x) gsub("\\s+", "",x))) # colnames(cfile) <- c("AccNum", "Alignment") From a58057115e99bce9ab62beb02585837282498934 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Sun, 6 Oct 2024 19:21:23 +0100 Subject: [PATCH 02/33] refactor function names in R/fa2domain.R --- R/fa2domain.R | 52 +++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/R/fa2domain.R b/R/fa2domain.R index 672d0856..ea926cb4 100644 --- a/R/fa2domain.R +++ b/R/fa2domain.R @@ -4,7 +4,7 @@ # - a protein with no domains (unlikely) found from # interproscan CLI will return a completely empty file (0Bytes) -#' exec_interproscan +#' runInterProScan #' #' @param filepath_fasta #' @param filepath_out @@ -15,7 +15,7 @@ #' @return #' #' @examples -exec_interproscan <- function( +runInterProScan <- function( filepath_fasta, filepath_out, # do not inlucde file extension since ipr handles this appl = c("Pfam", "Gene3D") @@ -34,7 +34,7 @@ exec_interproscan <- function( return(NULL) } # read and return results - df_iprscan <- read_iprscan_tsv(paste0(filepath_out, ".tsv")) + df_iprscan <- readIPRScanTSV(paste0(filepath_out, ".tsv")) return(df_iprscan) } @@ -43,7 +43,7 @@ exec_interproscan <- function( #' molevol_scripts/R/colnames_molevol.R) #' #' @return [chr] interproscan column names used throughout molevolvr -get_df_ipr_col_names <- function() { +getIPRScanColnames <- function() { column_names <- c( "AccNum", "SeqMD5Digest", "SLength", "Analysis", "DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score", @@ -58,7 +58,7 @@ get_df_ipr_col_names <- function() { #' @return [collector] a named vector of type expecatations #' for interproscan columns #' -get_df_ipr_col_types <- function() { +getIPRScanColtypes <- function() { column_types <- readr::cols( "AccNum" = readr::col_character(), "SeqMD5Digest" = readr::col_character(), @@ -85,10 +85,10 @@ get_df_ipr_col_types <- function() { #' @importFrom readr read_tsv #' #' @return [tbl_df] interproscan output table -read_iprscan_tsv <- function(filepath) { +readIPRScanTSV <- function(filepath) { df_ipr <- readr::read_tsv(filepath, - col_types = get_df_ipr_col_types(), - col_names = get_df_ipr_col_names() + col_types = getIPRScanColtypes(), + col_names = getIPRScanColnames() ) return(df_ipr) } @@ -100,7 +100,7 @@ read_iprscan_tsv <- function(filepath) { #' which will be used to search for its sequence's domains (df_iprscan param) #' @param fasta [AAStringSet] original fasta file which was fed into interproscan #' @param df_iprscan [tbl_df] the output TSV of interproscan, read as a tibble with -#' read_iprscan_tsv() +#' readIPRScanTSV() #' @param analysis [chr] the domain databases to extract sequences from #' #' @importFrom dplyr arrange filter mutate rowwise relocate select ungroup @@ -115,12 +115,12 @@ read_iprscan_tsv <- function(filepath) { #' setwd(path_molevol_scripts) #' source("R/fa2domain.R") #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") +#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") #' accnum <- df_iprscan$AccNum[1] -#' df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan) +#' df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan) #' } #' -make_df_iprscan_domains <- function( +createIPRScanDomainTable <- function( accnum, fasta, df_iprscan, @@ -170,17 +170,17 @@ make_df_iprscan_domains <- function( return(df_iprscan_domains) } -#' Using the table returned from make_df_iprscan_domains, construct a +#' Using the table returned from createIPRScanDomainTable, construct a #' domain fasta for a single accession number in the original fasta -#' (i.e., the original fasta argument to make_df_iprscan_domains()) +#' (i.e., the original fasta argument to createIPRScanDomainTable()) #' -#' @param df_iprscan_domains [tbl_df] return value from make_df_iprscan_domains +#' @param df_iprscan_domains [tbl_df] return value from createIPRScanDomainTable #' #' @importFrom Biostrings AAStringSet #' @importFrom dplyr mutate rowwise #' #' @return [AAStringSet] A domain fasta containing all the domains for a -#' single protein in the original fasta passed as an argument to make_df_iprscan_domains() +#' single protein in the original fasta passed as an argument to createIPRScanDomainTable() #' #' @examples #' \dontrun{ @@ -188,13 +188,13 @@ make_df_iprscan_domains <- function( #' setwd(path_molevol_scripts) #' source("R/fa2domain.R") #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") +#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") #' accnum <- df_iprscan$AccNum[1] -#' df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan) -#' fasta_domains <- df_iprscan_domains |> df_iprscan_domains2fasta() +#' df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan) +#' fasta_domains <- df_iprscan_domains |> convertIPRScanDomainTable2FA() #' } #' -df_iprscan_domains2fasta <- function(df_iprscan_domains) { +convertIPRScanDomainTable2FA <- function(df_iprscan_domains) { # if there are no records (e.g., after filtering for Pfam analysis only) # the quickly return an empty AAStringSet object if (nrow(df_iprscan_domains) < 1) { @@ -228,7 +228,7 @@ df_iprscan_domains2fasta <- function(df_iprscan_domains) { return(fasta_domains) } -#' fasta2fasta_domain +#' getDomainsFromFA #' #' @param fasta [AAStringSet] a protein (AA) fasta #' @param df_iprscan [tbl_df] the interproscan results from the original fasta @@ -245,11 +245,11 @@ df_iprscan_domains2fasta <- function(df_iprscan_domains) { #' setwd(path_molevol_scripts) #' source("R/fa2domain.R") #' fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -#' df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") -#' fasta2fasta_domain(fasta, df_iprscan) +#' df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") +#' getDomainsFromFA(fasta, df_iprscan) #' } #' -fasta2fasta_domain <- function( +getDomainsFromFA <- function( fasta, df_iprscan, analysis = c("Pfam", "Gene3D"), @@ -270,7 +270,7 @@ fasta2fasta_domain <- function( X = names(fasta), FUN = function(header) { # parse the accession number from header - df_iprscan_domains <- make_df_iprscan_domains( + df_iprscan_domains <- createIPRScanDomainTable( header, fasta, df_iprscan, @@ -289,7 +289,7 @@ fasta2fasta_domain <- function( } return(FALSE) } - fasta_domains <- df_iprscan_domains2fasta(df_iprscan_domains) + fasta_domains <- convertIPRScanDomainTable2FA(df_iprscan_domains) parent_fasta_domains <<- c(parent_fasta_domains, fasta_domains) return(TRUE) }, From e004cd4091ae3cb111873076365619ce0ba42430 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Sun, 6 Oct 2024 20:10:30 +0100 Subject: [PATCH 03/33] refactor function names in R/cleanup.R --- R/cleanup.R | 142 ++++++++++++++++++++++++++-------------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/R/cleanup.R b/R/cleanup.R index 3a708415..f82722f2 100755 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -31,10 +31,10 @@ #' @return [string] string with only alphanumerics, "_", "+", and "." #' @examples #' \dontrun{ -#' clean_string() +#' cleanString() #' } #' -clean_string <- function(string) { +cleanString <- function(string) { # replace spaces with "_" string <- stringr::str_replace_all(string, "\\s+", "_") # keep only alphanumeric characters, "_", and "." @@ -44,7 +44,7 @@ clean_string <- function(string) { # use the same code as upstream_scripts/00_submit_full.R's # get_sequences() function to extract accession numbers -#' string2accnum +#' extractAccNum #' #' @param string #' @@ -53,9 +53,9 @@ clean_string <- function(string) { #' #' @examples #' \dontrun{ -#' string2accnum() +#' extractAccNum() #' } -string2accnum <- function(string) { +extractAccNum <- function(string) { if (grepl("\\|", string)) { accnum <- strsplit(string, "\\|")[[1]][2] accnum <- strsplit(accnum, " ")[[1]][1] @@ -81,9 +81,9 @@ string2accnum <- function(string) { #' @examples #' \dontrun{ #' c("xxx", "xxx", "xxx", "yyy", "yyy") |> -#' make_accnums_unique() +#' ensureUniqAccNum() #' } -make_accnums_unique <- function(accnums) { +ensureUniqAccNum <- function(accnums) { # group by accnums then use the row count as a proxy # for the index of occurence for each accession number df_accnums <- tibble::tibble("accnum" = accnums) @@ -113,14 +113,14 @@ make_accnums_unique <- function(accnums) { #' @examples #' \dontrun{ #' AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |> -#' cleanup_fasta_header() +#' cleanFAHeaders() #' } -cleanup_fasta_header <- function(fasta) { +cleanFAHeaders <- function(fasta) { headers <- names(fasta) # try parsing accession numbers from header headers <- purrr::map_chr( headers, - string2accnum + extractAccNum ) # sanitize string for pathing (file read/write-ing) headers <- purrr::map_chr( @@ -128,7 +128,7 @@ cleanup_fasta_header <- function(fasta) { fs::path_sanitize ) # append an index suffix for the ith occurence of each accnum - headers <- make_accnums_unique(headers) + headers <- ensureUniqAccNum(headers) names(fasta) <- headers return(fasta) } @@ -153,9 +153,9 @@ cleanup_fasta_header <- function(fasta) { #' #' @examples #' \dontrun{ -#' remove_empty(prot, "DomArch") +#' removeEmptyRows(prot, "DomArch") #' } -remove_empty <- function(prot, by_column = "DomArch") { +removeEmptyRows <- function(prot, by_column = "DomArch") { # ?? Don't call other psp functions within these functions prot <- prot %>% as_tibble() %>% @@ -168,7 +168,7 @@ remove_empty <- function(prot, by_column = "DomArch") { } ########################### -#' repeat2s +#' condenseRepeatedDomains #' #' @description #' Condense repeated domains @@ -181,7 +181,7 @@ remove_empty <- function(prot, by_column = "DomArch") { #' #' @param prot A data frame containing 'DomArch', 'GenContext', 'ClustName' columns. #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s). -#' @param excluded_prots Vector of strings that repeat2s should not reduce to (s). Defaults to c() +#' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c() #' #' @return Describe return, in detail #' @export @@ -191,10 +191,10 @@ remove_empty <- function(prot, by_column = "DomArch") { #' #' @examples #' \dontrun{ -#' repeat2s(prot, "DomArch") +#' condenseRepeatedDomains(prot, "DomArch") #' } -repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) { - # If there are strings that repeat2s should not affect, the pattern to search +condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) { + # If there are strings that condenseRepeatedDomains should not affect, the pattern to search # for must be changed to exclude a search for those desired strings collapsed_prots <- paste0(excluded_prots, collapse = "\\s|") @@ -253,10 +253,10 @@ repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) { #' #' @examples #' \dontrun{ -#' replaceQMs() +#' replaceQuestionMarks() #' } #' -replaceQMs <- function(prot, by_column = "GenContext") { +replaceQuestionMarks <- function(prot, by_column = "GenContext") { by <- sym(by_column) # Regex for finding repeated `?` @@ -290,9 +290,9 @@ replaceQMs <- function(prot, by_column = "GenContext") { #' #' @examples #' \dontrun{ -#' remove_astrk() +#' removeAsterisks() #' } -remove_astrk <- function(query_data, colname = "GenContext") { +removeAsterisks <- function(query_data, colname = "GenContext") { query_data[, colname] <- map(query_data[, colname], function(x) str_remove_all(x, pattern = "\\*")) return(query_data) @@ -323,9 +323,9 @@ remove_astrk <- function(query_data, colname = "GenContext") { #' #' @examples #' \dontrun{ -#' remove_tails(prot, "DomArch") +#' removeTails(prot, "DomArch") #' } -remove_tails <- function(prot, by_column = "DomArch", +removeTails <- function(prot, by_column = "DomArch", keep_domains = FALSE) { # !! currently redundant by_column <- sym(by_column) @@ -369,7 +369,7 @@ remove_tails <- function(prot, by_column = "DomArch", #' A cleaned up version of the data table is returned. #' #' @param prot A data frame that contains columns 'Species'. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. #' Default is false. #' #' @importFrom stringr coll str_replace_all @@ -379,9 +379,9 @@ remove_tails <- function(prot, by_column = "DomArch", #' #' @examples #' \dontrun{ -#' cleanup_species(prot, TRUE) +#' cleanSpecies(prot, TRUE) #' } -cleanup_species <- function(prot, remove_empty = FALSE) { +cleanSpecies <- function(prot, removeEmptyRows = FALSE) { # FUNCTIONS CALLED HERE, if else might be better since only two options, T and F # Create cleaned up Species column @@ -404,8 +404,8 @@ cleanup_species <- function(prot, remove_empty = FALSE) { str_replace_all(coll(" ", TRUE), " ") # !! CHECK !! Species vs Species_old - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = "Species") + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = "Species") } return(prot) @@ -425,9 +425,9 @@ cleanup_species <- function(prot, remove_empty = FALSE) { #' @param prot A data frame that must contain columns Query and ClustName. #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'. #' @param domains_keep A data frame containing the domain names to be retained. -#' @param repeat2s Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE. -#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE. +#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE. #' #' @importFrom dplyr filter #' @importFrom stringr coll str_replace_all @@ -437,12 +437,12 @@ cleanup_species <- function(prot, remove_empty = FALSE) { #' #' @examples #' \dontrun{ -#' cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename) +#' cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename) #' } -cleanup_clust <- function(prot, +cleanClusters <- function(prot, domains_rename, domains_keep, - repeat2s = TRUE, remove_tails = FALSE, - remove_empty = FALSE) { + condenseRepeatedDomains = TRUE, removeTails = FALSE, + removeEmptyRows = FALSE) { # Create cleaned up ClustName column prot$ClustName <- prot$ClustName.orig @@ -469,19 +469,19 @@ cleanup_clust <- function(prot, ## Optional parameters # Condense repeats - if (repeat2s) { - prot <- repeat2s(prot, by_column = "ClustName") + if (condenseRepeatedDomains) { + prot <- condenseRepeatedDomains(prot, by_column = "ClustName") } # Remove singletons - # if(remove_tails){ + # if(removeTails){ # prot <- prot %>% filter(!grepl(".1$", ClustID)) # } - if (remove_tails) { - prot <- remove_tails(prot, by_column = "ClustName") + if (removeTails) { + prot <- removeTails(prot, by_column = "ClustName") } # Remove empty rows - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = "ClustName") + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = "ClustName") } @@ -509,9 +509,9 @@ cleanup_clust <- function(prot, #' @param domains_keep A data frame containing the domain names to be retained. #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the #' corresponding replacement values in a column 'new'. -#' @param repeat2s Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE. -#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE. +#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE. #' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains' #' #' @importFrom dplyr pull @@ -522,12 +522,12 @@ cleanup_clust <- function(prot, #' #' @examples #' \dontrun{ -#' cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) +#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) #' } -cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", +cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch", domains_keep, domains_rename, - repeat2s = TRUE, remove_tails = FALSE, - remove_empty = F, + condenseRepeatedDomains = TRUE, removeTails = FALSE, + removeEmptyRows = F, domains_ignore = NULL) { old_sym <- sym(old) new_sym <- sym(new) @@ -577,22 +577,22 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", ## Optional parameters # Remove singletons - if (remove_tails) { - prot <- remove_tails(prot = prot, by_column = new) + if (removeTails) { + prot <- removeTails(prot = prot, by_column = new) } # Condense repeats - if (repeat2s) { + if (condenseRepeatedDomains) { ## Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "character" - prot <- repeat2s(prot = prot, by_column = new) + prot <- condenseRepeatedDomains(prot = prot, by_column = new) } # Remove empty rows # ! FUNCTIONS CALLED HERE, if else might be better since only two options, T and F # ! Make a separate function of out of this? - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = new) + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = new) } - prot <- replaceQMs(prot, new) + prot <- replaceQuestionMarks(prot, new) return(prot) } @@ -610,7 +610,7 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", #' @param prot A data frame that contains columns 'GenContext.orig' #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'. #' Defaults to an empty data frame with a new and old column such that non of the domains will be renamed -#' @param repeat2s Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE. #' @param remove_asterisk Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE. #' #' @importFrom stringr str_replace_all @@ -620,11 +620,11 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", #' #' @examples #' \dontrun{ -#' cleanup_gencontext(prot, domains_rename, T, F) +#' cleanGenomicContext(prot, domains_rename, T, F) #' } #' -cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F), - repeat2s = TRUE, remove_asterisk = TRUE) { +cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F), + condenseRepeatedDomains = TRUE, remove_asterisk = TRUE) { # Create cleaned up GenContext column prot$GenContext <- prot$GenContext.orig @@ -641,16 +641,16 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact ## Reverse operons | Straighten them out! prot <- reverse_operon(prot) - prot <- replaceQMs(prot, "GenContext") + prot <- replaceQuestionMarks(prot, "GenContext") ## Optional parameters # Condense repeats - if (repeat2s) { - prot <- repeat2s(prot, "GenContext") + if (condenseRepeatedDomains) { + prot <- condenseRepeatedDomains(prot, "GenContext") } # Remove the Asterisks if (remove_asterisk) { - prot <- remove_astrk(prot, colname = "GenContext") + prot <- removeAsterisks(prot, colname = "GenContext") } return(prot) @@ -666,9 +666,9 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact #' #' @examples #' \dontrun{ -#' cleanup_GeneDesc() +#' cleanGeneDescription() #' } -cleanup_GeneDesc <- function(prot, column) { +cleanGeneDescription <- function(prot, column) { prot[, "GeneDesc"] <- gsub("\\.$", "", prot %>% pull(column)) prot[, "GeneDesc"] <- gsub("%2C", ",", prot %>% pull(column)) return(prot) @@ -688,9 +688,9 @@ cleanup_GeneDesc <- function(prot, column) { #' #' @examples #' \dontrun{ -#' pick_longer_duplicate() +#' selectLongestDuplicate() #' } -pick_longer_duplicate <- function(prot, column) { +selectLongestDuplicate <- function(prot, column) { col <- sym(column) prot$row.orig <- 1:nrow(prot) @@ -736,9 +736,9 @@ pick_longer_duplicate <- function(prot, column) { #' #' @examples #' \dontrun{ -#' cleanup_lineage() +#' cleanLineage() #' } -cleanup_lineage <- function(prot, lins_rename) { +cleanLineage <- function(prot, lins_rename) { for (i in 1:nrow(lins_rename)) { prot$Lineage <- gsub(lins_rename$old[i], lins_rename$new[i], x = prot$Lineage, From a8669a444696333e862525f85bc9775cee67e539 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 01:23:07 +0100 Subject: [PATCH 04/33] refactor function names in R/CHANGED-pre-msa-tree.R --- R/CHANGED-pre-msa-tree.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/CHANGED-pre-msa-tree.R b/R/CHANGED-pre-msa-tree.R index ef334232..c4a97589 100644 --- a/R/CHANGED-pre-msa-tree.R +++ b/R/CHANGED-pre-msa-tree.R @@ -2,7 +2,7 @@ ## Includes the following functions: ## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment ## generateAllAlignments2FA -## convertAlignment2TSV??, convertAccNumber2FA?? +## convertAlignment2TSV??, convertAccNum2FA?? ## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R ## Modified: Dec 24, 2019 | Jan 2021 ## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen) @@ -335,7 +335,7 @@ convertAlignment2FA <- function(aln_file = "", #' @export #' #' @examples -mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") { +mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") { # change to be the name equivalent to an addNames column # Find the first ' ' end_acc <- str_locate(line, " ")[[1]] @@ -365,7 +365,7 @@ mapAccession2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Na #' #' @examples renameFA <- function(fa_path, outpath, - replacement_function = mapAccession2Name, ...) { + replacement_function = mapAcc2Name, ...) { lines <- read_lines(fa_path) res <- map(lines, function(x) { if (strtrim(x, 1) == ">") { @@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) { ################################ -## convertAccNumber2FA +## convertAccNum2FA ####### ## 1 ## ####### From b15e294ba6321fca22c64732ed13703de9f82784 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 01:40:30 +0100 Subject: [PATCH 05/33] refactor function names in R/fa2domain.R --- R/fa2domain.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/fa2domain.R b/R/fa2domain.R index ea926cb4..01a56918 100644 --- a/R/fa2domain.R +++ b/R/fa2domain.R @@ -4,7 +4,7 @@ # - a protein with no domains (unlikely) found from # interproscan CLI will return a completely empty file (0Bytes) -#' runInterProScan +#' runIPRScan #' #' @param filepath_fasta #' @param filepath_out @@ -15,7 +15,7 @@ #' @return #' #' @examples -runInterProScan <- function( +runIPRScan <- function( filepath_fasta, filepath_out, # do not inlucde file extension since ipr handles this appl = c("Pfam", "Gene3D") @@ -43,7 +43,7 @@ runInterProScan <- function( #' molevol_scripts/R/colnames_molevol.R) #' #' @return [chr] interproscan column names used throughout molevolvr -getIPRScanColnames <- function() { +getIPRScanColNames <- function() { column_names <- c( "AccNum", "SeqMD5Digest", "SLength", "Analysis", "DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score", @@ -58,7 +58,7 @@ getIPRScanColnames <- function() { #' @return [collector] a named vector of type expecatations #' for interproscan columns #' -getIPRScanColtypes <- function() { +getIPRScanColTypes <- function() { column_types <- readr::cols( "AccNum" = readr::col_character(), "SeqMD5Digest" = readr::col_character(), @@ -87,8 +87,8 @@ getIPRScanColtypes <- function() { #' @return [tbl_df] interproscan output table readIPRScanTSV <- function(filepath) { df_ipr <- readr::read_tsv(filepath, - col_types = getIPRScanColtypes(), - col_names = getIPRScanColnames() + col_types = getIPRScanColTypes(), + col_names = getIPRScanColNames() ) return(df_ipr) } From be3966d004d5c91db4018ab7a5d2c9aac7a06ced Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 02:25:34 +0100 Subject: [PATCH 06/33] refactored function names in R/plotting.R --- R/plotting.R | 64 ++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 7abd06d4..da95ea5f 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -31,9 +31,9 @@ #' #' @examples #' \dontrun{ -#' shorten_lineage() +#' shortenLineage() #' } -shorten_lineage <- function(data, colname = "Lineage", abr_len = 1) { +shortenLineage <- function(data, colname = "Lineage", abr_len = 1) { abbrv <- function(x) { pos_gt <- str_locate(x, ">") pos_gt <- pos_gt[1] @@ -94,9 +94,9 @@ shorten_lineage <- function(data, colname = "Lineage", abr_len = 1) { #' #' @examples #' \dontrun{ -#' upset.plot(pspa.sub, 10, "da2doms") +#' plotUpSet(pspa.sub, 10, "da2doms") #' } -upset.plot <- function(query_data = "toast_rack.sub", +plotUpSet <- function(query_data = "toast_rack.sub", colname = "DomArch", cutoff = 90, RowsCutoff = FALSE, text.scale = 1.5, point.size = 2.2, line.size = 0.8) { @@ -259,9 +259,9 @@ upset.plot <- function(query_data = "toast_rack.sub", #' #' @examples #' \dontrun{ -#' lineage.DA.plot(toast_rack_data, 10, "DomArch.norep", "da2doms") +#' plotLineageDA(toast_rack_data, 10, "DomArch.norep", "da2doms") #' } -lineage.DA.plot <- function(query_data = "prot", +plotLineageDA <- function(query_data = "prot", colname = "DomArch", cutoff = 90, RowsCutoff = FALSE, @@ -271,7 +271,7 @@ lineage.DA.plot <- function(query_data = "prot", # @param type Character. Default is "da2doms" for Domain Architectures. # Other alternative: "gc2da" for Genomic Contexts. -- unused parameter - query_data <- shorten_lineage(query_data, "Lineage", abr_len = 1) + query_data <- shortenLineage(query_data, "Lineage", abr_len = 1) query.summ.byLin <- query_data %>% total_counts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff) @@ -355,9 +355,9 @@ lineage.DA.plot <- function(query_data = "prot", #' #' @examples #' \dontrun{ -#' lineage.Query.plot(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95) +#' plotLineageQuery(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95) #' } -lineage.Query.plot <- function(query_data = all, +plotLineageQuery <- function(query_data = all, queries, colname = "ClustName", cutoff, color = "default") { @@ -394,7 +394,7 @@ lineage.Query.plot <- function(query_data = all, # query_data contains all rows that possess a lineage query_data <- query_data %>% filter(grepl("a", Lineage)) - query_data <- shorten_lineage(query_data, "Lineage", abr_len = 1) + query_data <- shortenLineage(query_data, "Lineage", abr_len = 1) query_lin_counts <- data.frame("Query" = character(0), "Lineage" = character(0), "count" = integer()) for (q in queries) { query_lin <- query_by_lineage(data = query_data, query = q, column = {{ col }}, by = "Lineage") @@ -492,10 +492,10 @@ lineage.Query.plot <- function(query_data = all, #' #' @examples #' \dontrun{ -#' lineage.neighbors.plot(pspa_data, pspa, "GenContext.norep", "da2doms") +#' plotLineageNeighbors(pspa_data, pspa, "GenContext.norep", "da2doms") #' } #' -lineage.neighbors.plot <- function(query_data = "prot", query = "pspa", +plotLineageNeighbors <- function(query_data = "prot", query = "pspa", colname = "GenContext.norep") { query_data <- query_data %>% filter(grepl("a", Lineage)) query.GCDA <- read_delim(paste0("Top-", query, "-neighbors.txt"), @@ -567,9 +567,9 @@ lineage.neighbors.plot <- function(query_data = "prot", query = "pspa", #' #' @examples #' \dontrun{ -#' lineage.domain_repeats.plot() +#' plotLineageDomainRepeats() #' } -lineage.domain_repeats.plot <- function(query_data, colname) { +plotLineageDomainRepeats <- function(query_data, colname) { # query_data <- pspa_data # colname <- "SIG.TM.LADB" @@ -629,7 +629,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) { } -#' LineagePlot +#' plotLineageHeatmap #' #' @description #' Generate a lineage plot @@ -651,7 +651,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) { #' #' @examples #' \dontrun{ -#' LineagePlot(psp_data, +#' plotLineageHeatmap(psp_data, #' c( #' "PspA", "Snf7", "Classical-AAA", "PspF", "PspB", "PspC", "ClgR", "PspM", #' "Thioredoxin", "PspN_N", "DUF3046", "LiaI-LiaF-TM", "Toast_rack", "REC", @@ -664,8 +664,8 @@ lineage.domain_repeats.plot <- function(query_data, colname) { #' ) #' } #' -LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) { - LevelReduction <- function(lin) { +plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size = 8) { + .LevelReduction <- function(lin) { if (level == 1) { gt_loc <- str_locate(lin, ">")[[1]] if (is.na(gt_loc)) { @@ -703,7 +703,7 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) { all_grouped <- dplyr::union(all_grouped, domSub) } - GetKingdom <- function(lin) { + .GetKingdom <- function(lin) { gt_loc <- str_locate(lin, ">")[, "start"] if (is.na(gt_loc)) { @@ -715,12 +715,12 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) { } } - all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, LevelReduction))) + all_grouped <- all_grouped %>% mutate(ReducedLin = unlist(purrr::map(Lineage, .LevelReduction))) all_grouped_reduced <- all_grouped %>% group_by(Query, ReducedLin) %>% summarize("count" = sum(count)) %>% - mutate(Kingdom = unlist(purrr::map(ReducedLin, GetKingdom))) + mutate(Kingdom = unlist(purrr::map(ReducedLin, .GetKingdom))) lin_counts <- all_grouped_reduced %>% group_by(Kingdom, ReducedLin) %>% @@ -814,9 +814,9 @@ LineagePlot <- function(prot, domains_of_interest, level = 3, label.size = 8) { #' #' @examples #' \dontrun{ -#' stacked_lin_plot() +#' plotStackedLineage() #' } -stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Lineage", +plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "Lineage", xlabel = "Domain Architecture", reduce_lineage = TRUE, label.size = 8, @@ -828,7 +828,7 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li col <- sym(column) if (reduce_lineage) { - prot <- shorten_lineage(prot, Lineage_col, abr_len = 3) + prot <- shortenLineage(prot, Lineage_col, abr_len = 3) } total_count <- total_counts(prot, column, cutoff, lineage_col = Lineage_col) @@ -935,7 +935,7 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li ################ #### NEEDS SOME WORK -#' Wordcloud3 +#' plotWordCloud3 #' #' @param data #' @param size @@ -1048,9 +1048,9 @@ wordcloud3 <- function(data, size = 1, minSize = 0, gridSize = 0, fontFamily = " #' #' @examples #' \dontrun{ -#' wordcloud_element(prot, "da2doms", 10) +#' createWordCloudElement(prot, "da2doms", 10) #' } -wordcloud_element <- function(query_data = "prot", +createWordCloudElement <- function(query_data = "prot", colname = "DomArch", cutoff = 70, UsingRowsCutoff = FALSE) { @@ -1125,9 +1125,9 @@ wordcloud_element <- function(query_data = "prot", #' #' @examples #' \dontrun{ -#' wordcloud_element(prot, "da2doms", 10) +#' createWordCloudElement(prot, "da2doms", 10) #' } -wordcloud2_element <- function(query_data = "prot", +createWordCloud2Element <- function(query_data = "prot", colname = "DomArch", cutoff = 70, UsingRowsCutoff = FALSE) { @@ -1194,9 +1194,9 @@ wordcloud2_element <- function(query_data = "prot", #' #' @examples #' \dontrun{ -#' lineage_sunburst() +#' plotLineageSunburst() #' } -lineage_sunburst <- function(prot, lineage_column = "Lineage", +plotLineageSunburst <- function(prot, lineage_column = "Lineage", type = "sunburst", levels = 2, colors = NULL, legendOrder = NULL, showLegend = TRUE, maxLevels = 5) { lin_col <- sym(lineage_column) @@ -1278,7 +1278,7 @@ lineage_sunburst <- function(prot, lineage_column = "Lineage", ## COMMENTED LINEAGE.DA.PLOT -# lineage.plot <- function(query_data, cutoff, type) { +# plotLineage <- function(query_data, cutoff, type) { # switch(type, # da2doms={wc <- DA.doms.wc; words <- toast_rack.DAdoms; colname <- "DomArch.norep"; toast_rack.summ.byLin <- toast_rack.DA.summ.byLin}, # elements <- toast_rack.DA; # gc2da={wc <- GC.DA.wc; words <- toast_rack.GCDA; colname <- "GenContext.norep"; toast_rack.summ.byLin <- toast_rack.GC.summ.byLin} # elements <- toast_rack.GC; From 8d4da8efe6a0119f55452f43a7d65d84ed2f3115 Mon Sep 17 00:00:00 2001 From: Awa Synthia Date: Mon, 7 Oct 2024 08:13:27 +0300 Subject: [PATCH 07/33] defunct functions in acc2lin Signed-off-by: Awa Synthia --- NAMESPACE | 5 ++++ R/acc2lin.R | 17 +++++++----- R/deprecate.R | 40 ++++++++++++++++++++++++++++ man/acc2lin.Rd | 68 ++++++++++++++++++++++++++++++++++++++++++----- man/add_lins.Rd | 23 ++-------------- man/deprecate.Rd | 43 ++++++++++++++++++++++++++++++ man/efetch_ipg.Rd | 17 +----------- man/ipg2lin.Rd | 24 +---------------- man/sink.reset.Rd | 19 ------------- 9 files changed, 165 insertions(+), 91 deletions(-) create mode 100644 R/deprecate.R create mode 100644 man/deprecate.Rd delete mode 100644 man/sink.reset.Rd diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..4dbb858b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,14 +4,17 @@ export(BinaryDomainNetwork) export(DownloadAssemblySummary) export(GCA2lin) export(GenContextNetwork) +export(IPG2Lineage) export(LineagePlot) export(RepresentativeAccNums) +export(acc2Lineage) export(acc2fa) export(acc2lin) export(add_leaves) export(add_lins) export(add_name) export(add_tax) +export(addlineage) export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) @@ -34,6 +37,7 @@ export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) export(domain_network) +export(efetchIPG) export(efetch_ipg) export(filter_by_doms) export(filter_freq) @@ -79,6 +83,7 @@ export(run_rpsblast) export(send_job_status_email) export(shorten_lineage) export(sink.reset) +export(sinkReset) export(stacked_lin_plot) export(string2accnum) export(summ.DA) diff --git a/R/acc2lin.R b/R/acc2lin.R index f8d71949..dca24140 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -10,13 +10,14 @@ #' Sink Reset #' #' @return No return, but run to close all outstanding `sink()`s +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' sink.reset() #' } -sink.reset <- function() { +sinkReset <- function() { for (i in seq_len(sink.number())) { sink(NULL) } @@ -37,14 +38,15 @@ sink.reset <- function() { #' @importFrom rlang sym #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' add_lins() #' } -add_lins <- function(df, acc_col = "AccNum", assembly_path, - lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +addlineage <- function(df, acc_col = "AccNum", assembly_path, + lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) @@ -81,13 +83,14 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' @param plan #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' acc2lin() #' } -acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F if (is.null(ipgout_path)) { tmp_ipg <- T @@ -119,13 +122,14 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = #' @importFrom rentrez entrez_fetch #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' efetch_ipg() #' } -efetch_ipg <- function(accnums, out_path, plan = "sequential") { +efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { if (length(accnums) > 0) { partition <- function(in_data, groups) { # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal @@ -187,6 +191,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { #' @importFrom data.table fread #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples @@ -194,7 +199,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { #' ipg2lin() #' } #' -ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) { +IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) ipg_dt <- ipg_dt[Protein %in% accessions] diff --git a/R/deprecate.R b/R/deprecate.R new file mode 100644 index 00000000..2de0bbcd --- /dev/null +++ b/R/deprecate.R @@ -0,0 +1,40 @@ +#' These functions will be deprecated. Please use other functions instead. +#' +#' @name deprecate +#' +NULL + +#' @rdname deprecate +#' @export +sink.reset <- function() { + warning("'sink.reset' is deprecated. Use 'sinkReset' instead.") + sinkReset() +} + +#' @rdname deprecate +#' @export +add_lins <- function(df, ...) { + warning("'add_lins' is deprecated. Use 'addlineage' instead.") + addlineage(df, ...) +} + +#' @rdname deprecate +#' @export +acc2lin <- function(accessions, ...) { + warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.") + acc2Lineage(accessions, ...) +} + +#' @rdname deprecate +#' @export +efetch_ipg <- function(accnums, ...) { + warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.") + efetchIPG(accnums, ...) +} + +#' @rdname deprecate +#' @export +ipg2lin <- function(accessions, ...) { + warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.") + IPG32Lineage(accessions, ...) +} \ No newline at end of file diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index 6255b290..f008be5f 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -1,17 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{acc2lin} +\name{sinkReset} +\alias{sinkReset} +\alias{addlineage} +\alias{acc2Lineage} +\alias{efetchIPG} +\alias{IPG2Lineage} \alias{acc2lin} -\title{acc2lin} +\title{Sink Reset} \usage{ -acc2lin( +sinkReset() + +addlineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "sequential", + ... +) + +acc2Lineage( accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "multicore" + plan = "sequential", + ... ) +efetchIPG(accnums, out_path, plan = "sequential", ...) + +IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...) + acc2lin( accessions, assembly_path, @@ -21,8 +43,6 @@ acc2lin( ) } \arguments{ -\item{accessions}{Character vector of protein accessions} - \item{assembly_path}{String of the path to the assembly_summary path This file can be generated using the "DownloadAssemblySummary()" function} @@ -33,14 +53,37 @@ This file can be generated using the "DownloadAssemblySummary()" function} on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{plan}{} + +\item{accessions}{Character vector of protein accessions} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} + +\item{out_path}{Path to write the efetch results to} + +\item{ipg_file}{Filepath to the file containing results of an efetch run on the +ipg database. The protein accession in 'accessions' should be contained in this +file} } \value{ +No return, but run to close all outstanding \code{sink()}s + +Describe return, in detail + +Describe return, in detail + +Describe return, in detail + Describe return, in detail } \description{ This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. +Perform efetch on the ipg database and write the results to out_path + +Takes the resulting file of an efetch run on the ipg database and + Function to map protein accession numbers to lineage This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set @@ -48,8 +91,21 @@ of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } \examples{ \dontrun{ +sink.reset() +} +\dontrun{ +add_lins() +} +\dontrun{ acc2lin() } +\dontrun{ +efetch_ipg() +} +\dontrun{ +ipg2lin() +} + } \author{ Samuel Chen, Janani Ravi diff --git a/man/add_lins.Rd b/man/add_lins.Rd index 226e428d..9ac343ea 100644 --- a/man/add_lins.Rd +++ b/man/add_lins.Rd @@ -1,18 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{add_lins} \alias{add_lins} -\title{Add Lineages} +\title{add_lins} \usage{ -add_lins( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - add_lins( df, acc_col = "AccNum", @@ -25,16 +16,6 @@ add_lins( \arguments{ \item{plan}{} } -\value{ -Describe return, in detail -} \description{ -Add Lineages - add_lins } -\examples{ -\dontrun{ -add_lins() -} -} diff --git a/man/deprecate.Rd b/man/deprecate.Rd new file mode 100644 index 00000000..b8f0731f --- /dev/null +++ b/man/deprecate.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deprecate.R +\name{deprecate} +\alias{deprecate} +\alias{sink.reset} +\alias{add_lins} +\alias{acc2lin} +\alias{efetch_ipg} +\alias{ipg2lin} +\title{These functions will be deprecated. Please use other functions instead.} +\usage{ +sink.reset() + +add_lins( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + +acc2lin( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + +efetch_ipg(accessions, out_path, plan = "multicore") + +ipg2lin( + accessions, + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path +) +} +\description{ +These functions will be deprecated. Please use other functions instead. +} diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd index ec5b6bcb..efe1e8c5 100644 --- a/man/efetch_ipg.Rd +++ b/man/efetch_ipg.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{efetch_ipg} \alias{efetch_ipg} \title{efetch_ipg} \usage{ -efetch_ipg(accessions, out_path, plan = "multicore") - efetch_ipg(accessions, out_path, plan = "multicore") } \arguments{ @@ -15,22 +13,9 @@ the ipg database} \item{out_path}{Path to write the efetch results to} \item{plan}{} - -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} -} -\value{ -Describe return, in detail } \description{ Perform efetch on the ipg database and write the results to out_path - -Perform efetch on the ipg database and write the results to out_path -} -\examples{ -\dontrun{ -efetch_ipg() -} } \author{ Samuel Chen, Janani Ravi diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd index 3a14eada..6e2b4c6f 100644 --- a/man/ipg2lin.Rd +++ b/man/ipg2lin.Rd @@ -1,17 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{ipg2lin} \alias{ipg2lin} \title{ipg2lin} \usage{ -ipg2lin( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) - ipg2lin( accessions, ipg_file, @@ -30,24 +22,10 @@ file} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the "create_lineage_lookup()" function} - -\item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} -} -\value{ -Describe return, in detail } \description{ -Takes the resulting file of an efetch run on the ipg database and - Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns -} -\examples{ -\dontrun{ -ipg2lin() -} - } \author{ Samuel Chen, Janani Ravi diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd deleted file mode 100644 index a31b841d..00000000 --- a/man/sink.reset.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R -\name{sink.reset} -\alias{sink.reset} -\title{Sink Reset} -\usage{ -sink.reset() -} -\value{ -No return, but run to close all outstanding \code{sink()}s -} -\description{ -Sink Reset -} -\examples{ -\dontrun{ -sink.reset() -} -} From 3804b9969edc1a6966740829e711056d8e9a05f3 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 09:26:13 +0100 Subject: [PATCH 08/33] update NAMESPACE with roxygen2 --- NAMESPACE | 22 +++++++++---------- ..._element.Rd => createWordCloud2Element.Rd} | 8 +++---- ...d_element.Rd => createWordCloudElement.Rd} | 8 +++---- man/{lineage.DA.plot.Rd => plotLineageDA.Rd} | 8 +++---- ...ts.plot.Rd => plotLineageDomainRepeats.Rd} | 8 +++---- man/{LineagePlot.Rd => plotLineageHeatmap.Rd} | 10 ++++----- ...ghbors.plot.Rd => plotLineageNeighbors.Rd} | 8 +++---- ...eage.Query.plot.Rd => plotLineageQuery.Rd} | 8 +++---- ...age_sunburst.Rd => plotLineageSunburst.Rd} | 8 +++---- ...cked_lin_plot.Rd => plotStackedLineage.Rd} | 8 +++---- man/{upset.plot.Rd => plotUpSet.Rd} | 8 +++---- man/{shorten_lineage.Rd => shortenLineage.Rd} | 8 +++---- man/wordcloud3.Rd | 4 ++-- 13 files changed, 58 insertions(+), 58 deletions(-) rename man/{wordcloud2_element.Rd => createWordCloud2Element.Rd} (87%) rename man/{wordcloud_element.Rd => createWordCloudElement.Rd} (88%) rename man/{lineage.DA.plot.Rd => plotLineageDA.Rd} (90%) rename man/{lineage.domain_repeats.plot.Rd => plotLineageDomainRepeats.Rd} (61%) rename man/{LineagePlot.Rd => plotLineageHeatmap.Rd} (84%) rename man/{lineage.neighbors.plot.Rd => plotLineageNeighbors.Rd} (87%) rename man/{lineage.Query.plot.Rd => plotLineageQuery.Rd} (84%) rename man/{lineage_sunburst.Rd => plotLineageSunburst.Rd} (92%) rename man/{stacked_lin_plot.Rd => plotStackedLineage.Rd} (84%) rename man/{upset.plot.Rd => plotUpSet.Rd} (94%) rename man/{shorten_lineage.Rd => shortenLineage.Rd} (63%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..59de0ad0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ export(BinaryDomainNetwork) export(DownloadAssemblySummary) export(GCA2lin) export(GenContextNetwork) -export(LineagePlot) export(RepresentativeAccNums) export(acc2fa) export(acc2lin) @@ -30,6 +29,8 @@ export(convert_fa2tre) export(count_bycol) export(count_to_sunburst) export(count_to_treemap) +export(createWordCloud2Element) +export(createWordCloudElement) export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) @@ -52,17 +53,20 @@ export(get_proc_weights) export(ipg2lin) export(ipr2viz) export(ipr2viz_web) -export(lineage.DA.plot) -export(lineage.Query.plot) -export(lineage.domain_repeats.plot) -export(lineage.neighbors.plot) -export(lineage_sunburst) export(make_job_results_url) export(make_opts2procs) export(map_acc2name) export(map_advanced_opts2procs) export(msa_pdf) export(pick_longer_duplicate) +export(plotLineageDA) +export(plotLineageDomainRepeats) +export(plotLineageHeatmap) +export(plotLineageNeighbors) +export(plotLineageQuery) +export(plotLineageSunburst) +export(plotStackedLineage) +export(plotUpSet) export(plot_estimated_walltimes) export(prot2tax) export(prot2tax_old) @@ -77,9 +81,8 @@ export(reverse_operon) export(run_deltablast) export(run_rpsblast) export(send_job_status_email) -export(shorten_lineage) +export(shortenLineage) export(sink.reset) -export(stacked_lin_plot) export(string2accnum) export(summ.DA) export(summ.DA.byLin) @@ -90,10 +93,7 @@ export(summarize_bylin) export(theme_genes2) export(to_titlecase) export(total_counts) -export(upset.plot) -export(wordcloud2_element) export(wordcloud3) -export(wordcloud_element) export(write.MsaAAMultipleAlignment) export(write_proc_medians_table) export(write_proc_medians_yml) diff --git a/man/wordcloud2_element.Rd b/man/createWordCloud2Element.Rd similarity index 87% rename from man/wordcloud2_element.Rd rename to man/createWordCloud2Element.Rd index fbd97b60..a6279e2f 100644 --- a/man/wordcloud2_element.Rd +++ b/man/createWordCloud2Element.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{wordcloud2_element} -\alias{wordcloud2_element} +\name{createWordCloud2Element} +\alias{createWordCloud2Element} \title{Wordclouds for the predominant domains, domain architectures.} \usage{ -wordcloud2_element( +createWordCloud2Element( query_data = "prot", colname = "DomArch", cutoff = 70, @@ -33,7 +33,7 @@ column names. } \examples{ \dontrun{ -wordcloud_element(prot, "da2doms", 10) +createWordCloudElement(prot, "da2doms", 10) } } \author{ diff --git a/man/wordcloud_element.Rd b/man/createWordCloudElement.Rd similarity index 88% rename from man/wordcloud_element.Rd rename to man/createWordCloudElement.Rd index bffddb28..7f27ef41 100644 --- a/man/wordcloud_element.Rd +++ b/man/createWordCloudElement.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{wordcloud_element} -\alias{wordcloud_element} +\name{createWordCloudElement} +\alias{createWordCloudElement} \title{Wordclouds for the predominant domains, domain architectures} \usage{ -wordcloud_element( +createWordCloudElement( query_data = "prot", colname = "DomArch", cutoff = 70, @@ -33,7 +33,7 @@ column names. } \examples{ \dontrun{ -wordcloud_element(prot, "da2doms", 10) +createWordCloudElement(prot, "da2doms", 10) } } \author{ diff --git a/man/lineage.DA.plot.Rd b/man/plotLineageDA.Rd similarity index 90% rename from man/lineage.DA.plot.Rd rename to man/plotLineageDA.Rd index f938d70d..7e84bcfd 100644 --- a/man/lineage.DA.plot.Rd +++ b/man/plotLineageDA.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{lineage.DA.plot} -\alias{lineage.DA.plot} +\name{plotLineageDA} +\alias{plotLineageDA} \title{Lineage Plot: Heatmap of Domains/DAs/GCs vs Lineages} \usage{ -lineage.DA.plot( +plotLineageDA( query_data = "prot", colname = "DomArch", cutoff = 90, @@ -40,7 +40,7 @@ column names. } \examples{ \dontrun{ -lineage.DA.plot(toast_rack_data, 10, "DomArch.norep", "da2doms") +plotLineageDA(toast_rack_data, 10, "DomArch.norep", "da2doms") } } \author{ diff --git a/man/lineage.domain_repeats.plot.Rd b/man/plotLineageDomainRepeats.Rd similarity index 61% rename from man/lineage.domain_repeats.plot.Rd rename to man/plotLineageDomainRepeats.Rd index 6cee0cac..8ccfba41 100644 --- a/man/lineage.domain_repeats.plot.Rd +++ b/man/plotLineageDomainRepeats.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{lineage.domain_repeats.plot} -\alias{lineage.domain_repeats.plot} +\name{plotLineageDomainRepeats} +\alias{plotLineageDomainRepeats} \title{Lineage Domain Repeats Plot} \usage{ -lineage.domain_repeats.plot(query_data, colname) +plotLineageDomainRepeats(query_data, colname) } \arguments{ \item{colname}{} @@ -14,6 +14,6 @@ Lineage Domain Repeats Plot } \examples{ \dontrun{ -lineage.domain_repeats.plot() +plotLineageDomainRepeats() } } diff --git a/man/LineagePlot.Rd b/man/plotLineageHeatmap.Rd similarity index 84% rename from man/LineagePlot.Rd rename to man/plotLineageHeatmap.Rd index 6aed2fb9..5449f8ec 100644 --- a/man/LineagePlot.Rd +++ b/man/plotLineageHeatmap.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{LineagePlot} -\alias{LineagePlot} -\title{LineagePlot} +\name{plotLineageHeatmap} +\alias{plotLineageHeatmap} +\title{plotLineageHeatmap} \usage{ -LineagePlot(prot, domains_of_interest, level = 3, label.size = 8) +plotLineageHeatmap(prot, domains_of_interest, level = 3, label.size = 8) } \arguments{ \item{prot}{Data frame containing DomArch and Lineage Columns} @@ -20,7 +20,7 @@ Generate a lineage plot } \examples{ \dontrun{ -LineagePlot(psp_data, +plotLineageHeatmap(psp_data, c( "PspA", "Snf7", "Classical-AAA", "PspF", "PspB", "PspC", "ClgR", "PspM", "Thioredoxin", "PspN_N", "DUF3046", "LiaI-LiaF-TM", "Toast_rack", "REC", diff --git a/man/lineage.neighbors.plot.Rd b/man/plotLineageNeighbors.Rd similarity index 87% rename from man/lineage.neighbors.plot.Rd rename to man/plotLineageNeighbors.Rd index b8394838..85adf175 100644 --- a/man/lineage.neighbors.plot.Rd +++ b/man/plotLineageNeighbors.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{lineage.neighbors.plot} -\alias{lineage.neighbors.plot} +\name{plotLineageNeighbors} +\alias{plotLineageNeighbors} \title{Lineage Plot for top neighbors} \usage{ -lineage.neighbors.plot( +plotLineageNeighbors( query_data = "prot", query = "pspa", colname = "GenContext.norep" @@ -35,7 +35,7 @@ column names. } \examples{ \dontrun{ -lineage.neighbors.plot(pspa_data, pspa, "GenContext.norep", "da2doms") +plotLineageNeighbors(pspa_data, pspa, "GenContext.norep", "da2doms") } } diff --git a/man/lineage.Query.plot.Rd b/man/plotLineageQuery.Rd similarity index 84% rename from man/lineage.Query.plot.Rd rename to man/plotLineageQuery.Rd index 84ceb683..ad52a4d2 100644 --- a/man/lineage.Query.plot.Rd +++ b/man/plotLineageQuery.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{lineage.Query.plot} -\alias{lineage.Query.plot} +\name{plotLineageQuery} +\alias{plotLineageQuery} \title{Lineage Plot: Heatmap of Queries vs Lineages} \usage{ -lineage.Query.plot( +plotLineageQuery( query_data = all, queries, colname = "ClustName", @@ -30,7 +30,7 @@ column names. } \examples{ \dontrun{ -lineage.Query.plot(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95) +plotLineageQuery(prot, c("PspA", "PspB", "PspC", "PspM", "PspN"), 95) } } \keyword{Architectures,} diff --git a/man/lineage_sunburst.Rd b/man/plotLineageSunburst.Rd similarity index 92% rename from man/lineage_sunburst.Rd rename to man/plotLineageSunburst.Rd index 38872bf5..972bbe5d 100644 --- a/man/lineage_sunburst.Rd +++ b/man/plotLineageSunburst.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{lineage_sunburst} -\alias{lineage_sunburst} +\name{plotLineageSunburst} +\alias{plotLineageSunburst} \title{Lineage Sunburst} \usage{ -lineage_sunburst( +plotLineageSunburst( prot, lineage_column = "Lineage", type = "sunburst", @@ -37,6 +37,6 @@ Lineage Sunburst } \examples{ \dontrun{ -lineage_sunburst() +plotLineageSunburst() } } diff --git a/man/stacked_lin_plot.Rd b/man/plotStackedLineage.Rd similarity index 84% rename from man/stacked_lin_plot.Rd rename to man/plotStackedLineage.Rd index de97cd7f..9d1cde6d 100644 --- a/man/stacked_lin_plot.Rd +++ b/man/plotStackedLineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{stacked_lin_plot} -\alias{stacked_lin_plot} +\name{plotStackedLineage} +\alias{plotStackedLineage} \title{Stacked Lineage Plot} \usage{ -stacked_lin_plot( +plotStackedLineage( prot, column = "DomArch", cutoff, @@ -28,6 +28,6 @@ Stacked Lineage Plot } \examples{ \dontrun{ -stacked_lin_plot() +plotStackedLineage() } } diff --git a/man/upset.plot.Rd b/man/plotUpSet.Rd similarity index 94% rename from man/upset.plot.Rd rename to man/plotUpSet.Rd index 3537f849..84169987 100644 --- a/man/upset.plot.Rd +++ b/man/plotUpSet.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{upset.plot} -\alias{upset.plot} +\name{plotUpSet} +\alias{plotUpSet} \title{UpSet Plot} \usage{ -upset.plot( +plotUpSet( query_data = "toast_rack.sub", colname = "DomArch", cutoff = 90, @@ -45,7 +45,7 @@ column names. } \examples{ \dontrun{ -upset.plot(pspa.sub, 10, "da2doms") +plotUpSet(pspa.sub, 10, "da2doms") } } \author{ diff --git a/man/shorten_lineage.Rd b/man/shortenLineage.Rd similarity index 63% rename from man/shorten_lineage.Rd rename to man/shortenLineage.Rd index db86271e..f495fb32 100644 --- a/man/shorten_lineage.Rd +++ b/man/shortenLineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R -\name{shorten_lineage} -\alias{shorten_lineage} +\name{shortenLineage} +\alias{shortenLineage} \title{Shorten Lineage} \usage{ -shorten_lineage(data, colname = "Lineage", abr_len = 1) +shortenLineage(data, colname = "Lineage", abr_len = 1) } \arguments{ \item{abr_len}{} @@ -14,6 +14,6 @@ Shorten Lineage } \examples{ \dontrun{ -shorten_lineage() +shortenLineage() } } diff --git a/man/wordcloud3.Rd b/man/wordcloud3.Rd index 62eed454..cce07a82 100644 --- a/man/wordcloud3.Rd +++ b/man/wordcloud3.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plotting.R \name{wordcloud3} \alias{wordcloud3} -\title{Wordcloud3} +\title{plotWordCloud3} \usage{ wordcloud3( data, @@ -28,5 +28,5 @@ wordcloud3( \item{hoverFunction}{} } \description{ -Wordcloud3 +plotWordCloud3 } From 2c1ce1a213d7b36395acbd76d11cb4bf6b8a89f7 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 09:33:02 +0100 Subject: [PATCH 09/33] update NAMESPACE using roxygen2 --- NAMESPACE | 26 +++++++++---------- man/{cleanup_clust.Rd => cleanClusters.Rd} | 20 +++++++------- ..._domarch.Rd => cleanDomainArchitecture.Rd} | 20 +++++++------- ...anup_fasta_header.Rd => cleanFAHeaders.Rd} | 8 +++--- ...up_GeneDesc.Rd => cleanGeneDescription.Rd} | 8 +++--- ...p_gencontext.Rd => cleanGenomicContext.Rd} | 12 ++++----- man/{cleanup_lineage.Rd => cleanLineage.Rd} | 8 +++--- man/{cleanup_species.Rd => cleanSpecies.Rd} | 10 +++---- man/{clean_string.Rd => cleanString.Rd} | 8 +++--- ...repeat2s.Rd => condenseRepeatedDomains.Rd} | 12 ++++----- ..._accnums_unique.Rd => ensureUniqAccNum.Rd} | 8 +++--- man/{string2accnum.Rd => extractAccNum.Rd} | 12 ++++----- man/{remove_astrk.Rd => removeAsterisks.Rd} | 8 +++--- man/{remove_empty.Rd => removeEmptyRows.Rd} | 8 +++--- man/{remove_tails.Rd => removeTails.Rd} | 8 +++--- ...{replaceQMs.Rd => replaceQuestionMarks.Rd} | 8 +++--- ...duplicate.Rd => selectLongestDuplicate.Rd} | 8 +++--- 17 files changed, 96 insertions(+), 96 deletions(-) rename man/{cleanup_clust.Rd => cleanClusters.Rd} (59%) rename man/{cleanup_domarch.Rd => cleanDomainArchitecture.Rd} (66%) rename man/{cleanup_fasta_header.Rd => cleanFAHeaders.Rd} (78%) rename man/{cleanup_GeneDesc.Rd => cleanGeneDescription.Rd} (70%) rename man/{cleanup_gencontext.Rd => cleanGenomicContext.Rd} (78%) rename man/{cleanup_lineage.Rd => cleanLineage.Rd} (71%) rename man/{cleanup_species.Rd => cleanSpecies.Rd} (70%) rename man/{clean_string.Rd => cleanString.Rd} (84%) rename man/{repeat2s.Rd => condenseRepeatedDomains.Rd} (67%) rename man/{make_accnums_unique.Rd => ensureUniqAccNum.Rd} (80%) rename man/{string2accnum.Rd => extractAccNum.Rd} (63%) rename man/{remove_astrk.Rd => removeAsterisks.Rd} (72%) rename man/{remove_empty.Rd => removeEmptyRows.Rd} (84%) rename man/{remove_tails.Rd => removeTails.Rd} (83%) rename man/{replaceQMs.Rd => replaceQuestionMarks.Rd} (73%) rename man/{pick_longer_duplicate.Rd => selectLongestDuplicate.Rd} (67%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..9724f0dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,15 +16,16 @@ export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) export(assign_job_queue) -export(cleanup_GeneDesc) -export(cleanup_clust) -export(cleanup_domarch) -export(cleanup_gencontext) -export(cleanup_lineage) -export(cleanup_species) +export(cleanClusters) +export(cleanDomainArchitecture) +export(cleanGeneDescription) +export(cleanGenomicContext) +export(cleanLineage) +export(cleanSpecies) export(combine_files) export(combine_full) export(combine_ipr) +export(condenseRepeatedDomains) export(convert_aln2fa) export(convert_fa2tre) export(count_bycol) @@ -35,6 +36,7 @@ export(create_lineage_lookup) export(create_one_col_params) export(domain_network) export(efetch_ipg) +export(extractAccNum) export(filter_by_doms) export(filter_freq) export(find_paralogs) @@ -62,25 +64,23 @@ export(make_opts2procs) export(map_acc2name) export(map_advanced_opts2procs) export(msa_pdf) -export(pick_longer_duplicate) export(plot_estimated_walltimes) export(prot2tax) export(prot2tax_old) -export(remove_astrk) -export(remove_empty) -export(remove_tails) +export(removeAsterisks) +export(removeEmptyRows) +export(removeTails) export(rename_fasta) -export(repeat2s) -export(replaceQMs) +export(replaceQuestionMarks) export(reveql) export(reverse_operon) export(run_deltablast) export(run_rpsblast) +export(selectLongestDuplicate) export(send_job_status_email) export(shorten_lineage) export(sink.reset) export(stacked_lin_plot) -export(string2accnum) export(summ.DA) export(summ.DA.byLin) export(summ.GC) diff --git a/man/cleanup_clust.Rd b/man/cleanClusters.Rd similarity index 59% rename from man/cleanup_clust.Rd rename to man/cleanClusters.Rd index 4eed8be8..7ef4f3b9 100644 --- a/man/cleanup_clust.Rd +++ b/man/cleanClusters.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_clust} -\alias{cleanup_clust} +\name{cleanClusters} +\alias{cleanClusters} \title{Cleanup Clust} \usage{ -cleanup_clust( +cleanClusters( prot, domains_rename, domains_keep, - repeat2s = TRUE, - remove_tails = FALSE, - remove_empty = FALSE + condenseRepeatedDomains = TRUE, + removeTails = FALSE, + removeEmptyRows = FALSE ) } \arguments{ @@ -20,11 +20,11 @@ cleanup_clust( \item{domains_keep}{A data frame containing the domain names to be retained.} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.} -\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} +\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.} +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.} } \value{ Cleaned up data frame @@ -39,6 +39,6 @@ The return value is the cleaned up data frame. } \examples{ \dontrun{ -cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename) +cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename) } } diff --git a/man/cleanup_domarch.Rd b/man/cleanDomainArchitecture.Rd similarity index 66% rename from man/cleanup_domarch.Rd rename to man/cleanDomainArchitecture.Rd index 21955509..887b5388 100644 --- a/man/cleanup_domarch.Rd +++ b/man/cleanDomainArchitecture.Rd @@ -1,18 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_domarch} -\alias{cleanup_domarch} +\name{cleanDomainArchitecture} +\alias{cleanDomainArchitecture} \title{Cleanup DomArch} \usage{ -cleanup_domarch( +cleanDomainArchitecture( prot, old = "DomArch.orig", new = "DomArch", domains_keep, domains_rename, - repeat2s = TRUE, - remove_tails = FALSE, - remove_empty = F, + condenseRepeatedDomains = TRUE, + removeTails = FALSE, + removeEmptyRows = F, domains_ignore = NULL ) } @@ -24,11 +24,11 @@ cleanup_domarch( \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'.} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.} -\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} +\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.} +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.} \item{domains_ignore}{A data frame containing the domain names to be removed in a column called 'domains'} } @@ -46,6 +46,6 @@ The original data frame is returned with the clean DomArchs column and the old d } \examples{ \dontrun{ -cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) +cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) } } diff --git a/man/cleanup_fasta_header.Rd b/man/cleanFAHeaders.Rd similarity index 78% rename from man/cleanup_fasta_header.Rd rename to man/cleanFAHeaders.Rd index 416f6be2..e9ad9b30 100644 --- a/man/cleanup_fasta_header.Rd +++ b/man/cleanFAHeaders.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_fasta_header} -\alias{cleanup_fasta_header} +\name{cleanFAHeaders} +\alias{cleanFAHeaders} \title{Cleanup FASTA Header} \usage{ -cleanup_fasta_header(fasta) +cleanFAHeaders(fasta) } \arguments{ \item{fasta}{} @@ -19,6 +19,6 @@ suffix of the ith occurence to handle duplicates \examples{ \dontrun{ AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |> - cleanup_fasta_header() + cleanFAHeaders() } } diff --git a/man/cleanup_GeneDesc.Rd b/man/cleanGeneDescription.Rd similarity index 70% rename from man/cleanup_GeneDesc.Rd rename to man/cleanGeneDescription.Rd index 3068fe49..f98a25d4 100644 --- a/man/cleanup_GeneDesc.Rd +++ b/man/cleanGeneDescription.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_GeneDesc} -\alias{cleanup_GeneDesc} +\name{cleanGeneDescription} +\alias{cleanGeneDescription} \title{Cleanup GeneDesc} \usage{ -cleanup_GeneDesc(prot, column) +cleanGeneDescription(prot, column) } \arguments{ \item{column}{} @@ -17,6 +17,6 @@ Cleanup GeneDesc } \examples{ \dontrun{ -cleanup_GeneDesc() +cleanGeneDescription() } } diff --git a/man/cleanup_gencontext.Rd b/man/cleanGenomicContext.Rd similarity index 78% rename from man/cleanup_gencontext.Rd rename to man/cleanGenomicContext.Rd index 8e26a447..2c2dcc18 100644 --- a/man/cleanup_gencontext.Rd +++ b/man/cleanGenomicContext.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_gencontext} -\alias{cleanup_gencontext} +\name{cleanGenomicContext} +\alias{cleanGenomicContext} \title{Cleanup Genomic Contexts} \usage{ -cleanup_gencontext( +cleanGenomicContext( prot, domains_rename = data.frame(old = character(0), new = character(0), stringsAsFactors = F), - repeat2s = TRUE, + condenseRepeatedDomains = TRUE, remove_asterisk = TRUE ) } @@ -18,7 +18,7 @@ cleanup_gencontext( \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'. Defaults to an empty data frame with a new and old column such that non of the domains will be renamed} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.} \item{remove_asterisk}{Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.} } @@ -33,7 +33,7 @@ A cleaned up version of the data table is returned. } \examples{ \dontrun{ -cleanup_gencontext(prot, domains_rename, T, F) +cleanGenomicContext(prot, domains_rename, T, F) } } diff --git a/man/cleanup_lineage.Rd b/man/cleanLineage.Rd similarity index 71% rename from man/cleanup_lineage.Rd rename to man/cleanLineage.Rd index 35669f4e..adcea312 100644 --- a/man/cleanup_lineage.Rd +++ b/man/cleanLineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_lineage} -\alias{cleanup_lineage} +\name{cleanLineage} +\alias{cleanLineage} \title{Cleanup Lineage} \usage{ -cleanup_lineage(prot, lins_rename) +cleanLineage(prot, lins_rename) } \arguments{ \item{lins_rename}{} @@ -17,6 +17,6 @@ Cleanup Lineage } \examples{ \dontrun{ -cleanup_lineage() +cleanLineage() } } diff --git a/man/cleanup_species.Rd b/man/cleanSpecies.Rd similarity index 70% rename from man/cleanup_species.Rd rename to man/cleanSpecies.Rd index beedb23c..82b5444c 100644 --- a/man/cleanup_species.Rd +++ b/man/cleanSpecies.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_species} -\alias{cleanup_species} +\name{cleanSpecies} +\alias{cleanSpecies} \title{Cleanup Species} \usage{ -cleanup_species(prot, remove_empty = FALSE) +cleanSpecies(prot, removeEmptyRows = FALSE) } \arguments{ \item{prot}{A data frame that contains columns 'Species'.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. Default is false.} } \value{ @@ -25,6 +25,6 @@ A cleaned up version of the data table is returned. } \examples{ \dontrun{ -cleanup_species(prot, TRUE) +cleanSpecies(prot, TRUE) } } diff --git a/man/clean_string.Rd b/man/cleanString.Rd similarity index 84% rename from man/clean_string.Rd rename to man/cleanString.Rd index a17a95bb..0dc2937e 100644 --- a/man/clean_string.Rd +++ b/man/cleanString.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{clean_string} -\alias{clean_string} +\name{cleanString} +\alias{cleanString} \title{Clean String} \usage{ -clean_string(string) +cleanString(string) } \arguments{ \item{string}{} @@ -19,7 +19,7 @@ cleanup domain architecture values } \examples{ \dontrun{ -clean_string() +cleanString() } } diff --git a/man/repeat2s.Rd b/man/condenseRepeatedDomains.Rd similarity index 67% rename from man/repeat2s.Rd rename to man/condenseRepeatedDomains.Rd index 30a09cc6..3b239129 100644 --- a/man/repeat2s.Rd +++ b/man/condenseRepeatedDomains.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{repeat2s} -\alias{repeat2s} -\title{repeat2s} +\name{condenseRepeatedDomains} +\alias{condenseRepeatedDomains} +\title{condenseRepeatedDomains} \usage{ -repeat2s(prot, by_column = "DomArch", excluded_prots = c()) +condenseRepeatedDomains(prot, by_column = "DomArch", excluded_prots = c()) } \arguments{ \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.} \item{by_column}{Column in which repeats are condensed to domain+domain -> domain(s).} -\item{excluded_prots}{Vector of strings that repeat2s should not reduce to (s). Defaults to c()} +\item{excluded_prots}{Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()} } \value{ Describe return, in detail @@ -27,6 +27,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -repeat2s(prot, "DomArch") +condenseRepeatedDomains(prot, "DomArch") } } diff --git a/man/make_accnums_unique.Rd b/man/ensureUniqAccNum.Rd similarity index 80% rename from man/make_accnums_unique.Rd rename to man/ensureUniqAccNum.Rd index 62866a24..ddb4a70d 100644 --- a/man/make_accnums_unique.Rd +++ b/man/ensureUniqAccNum.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{make_accnums_unique} -\alias{make_accnums_unique} +\name{ensureUniqAccNum} +\alias{ensureUniqAccNum} \title{make accnums unique} \usage{ -make_accnums_unique(accnums) +ensureUniqAccNum(accnums) } \arguments{ \item{accnums}{\link{chr} a vector of accession numbers} @@ -19,6 +19,6 @@ character vector) making them unique \examples{ \dontrun{ c("xxx", "xxx", "xxx", "yyy", "yyy") |> - make_accnums_unique() + ensureUniqAccNum() } } diff --git a/man/string2accnum.Rd b/man/extractAccNum.Rd similarity index 63% rename from man/string2accnum.Rd rename to man/extractAccNum.Rd index dd7de249..15870f3f 100644 --- a/man/string2accnum.Rd +++ b/man/extractAccNum.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{string2accnum} -\alias{string2accnum} -\title{string2accnum} +\name{extractAccNum} +\alias{extractAccNum} +\title{extractAccNum} \usage{ -string2accnum(string) +extractAccNum(string) } \arguments{ \item{string}{} @@ -13,10 +13,10 @@ string2accnum(string) Describe return, in detail } \description{ -string2accnum +extractAccNum } \examples{ \dontrun{ -string2accnum() +extractAccNum() } } diff --git a/man/remove_astrk.Rd b/man/removeAsterisks.Rd similarity index 72% rename from man/remove_astrk.Rd rename to man/removeAsterisks.Rd index 3562521d..691a7adf 100644 --- a/man/remove_astrk.Rd +++ b/man/removeAsterisks.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_astrk} -\alias{remove_astrk} +\name{removeAsterisks} +\alias{removeAsterisks} \title{Remove Astrk} \usage{ -remove_astrk(query_data, colname = "GenContext") +removeAsterisks(query_data, colname = "GenContext") } \arguments{ \item{colname}{} @@ -18,6 +18,6 @@ Used for removing * from GenContext columns } \examples{ \dontrun{ -remove_astrk() +removeAsterisks() } } diff --git a/man/remove_empty.Rd b/man/removeEmptyRows.Rd similarity index 84% rename from man/remove_empty.Rd rename to man/removeEmptyRows.Rd index cfbf707b..66551810 100644 --- a/man/remove_empty.Rd +++ b/man/removeEmptyRows.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_empty} -\alias{remove_empty} +\name{removeEmptyRows} +\alias{removeEmptyRows} \title{Remove Empty} \usage{ -remove_empty(prot, by_column = "DomArch") +removeEmptyRows(prot, by_column = "DomArch") } \arguments{ \item{prot}{A data frame containing 'DomArch', 'Species', 'GenContext', 'ClustName' columns.} @@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -remove_empty(prot, "DomArch") +removeEmptyRows(prot, "DomArch") } } diff --git a/man/remove_tails.Rd b/man/removeTails.Rd similarity index 83% rename from man/remove_tails.Rd rename to man/removeTails.Rd index 1cd20861..76d1e18a 100644 --- a/man/remove_tails.Rd +++ b/man/removeTails.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_tails} -\alias{remove_tails} +\name{removeTails} +\alias{removeTails} \title{Remove Tails} \usage{ -remove_tails(prot, by_column = "DomArch", keep_domains = FALSE) +removeTails(prot, by_column = "DomArch", keep_domains = FALSE) } \arguments{ \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.} @@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -remove_tails(prot, "DomArch") +removeTails(prot, "DomArch") } } diff --git a/man/replaceQMs.Rd b/man/replaceQuestionMarks.Rd similarity index 73% rename from man/replaceQMs.Rd rename to man/replaceQuestionMarks.Rd index 604a8ece..0949568f 100644 --- a/man/replaceQMs.Rd +++ b/man/replaceQuestionMarks.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{replaceQMs} -\alias{replaceQMs} +\name{replaceQuestionMarks} +\alias{replaceQuestionMarks} \title{Replace QMs} \usage{ -replaceQMs(prot, by_column = "GenContext") +replaceQuestionMarks(prot, by_column = "GenContext") } \arguments{ \item{prot}{DataTable to operate on} @@ -20,7 +20,7 @@ Replace '?' with 'X' } \examples{ \dontrun{ -replaceQMs() +replaceQuestionMarks() } } diff --git a/man/pick_longer_duplicate.Rd b/man/selectLongestDuplicate.Rd similarity index 67% rename from man/pick_longer_duplicate.Rd rename to man/selectLongestDuplicate.Rd index d7858da7..c177d289 100644 --- a/man/pick_longer_duplicate.Rd +++ b/man/selectLongestDuplicate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{pick_longer_duplicate} -\alias{pick_longer_duplicate} +\name{selectLongestDuplicate} +\alias{selectLongestDuplicate} \title{Pick Longer Duplicate} \usage{ -pick_longer_duplicate(prot, column) +selectLongestDuplicate(prot, column) } \arguments{ \item{column}{} @@ -17,6 +17,6 @@ Pick Longer Duplicate } \examples{ \dontrun{ -pick_longer_duplicate() +selectLongestDuplicate() } } From 96d0ddf982d6d472c5ffba2bac4f8e641cd81176 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 10:04:49 +0100 Subject: [PATCH 10/33] refactor function names in R/tree.R --- NAMESPACE | 6 +++--- R/tree.R | 16 ++++++++-------- ...nerate_trees.Rd => convertAlignment2Trees.Rd} | 8 ++++---- man/{convert_fa2tre.Rd => convertFA2Tree.Rd} | 10 +++++----- man/{generate_fa2tre.Rd => createFA2Tree.Rd} | 8 ++++---- 5 files changed, 24 insertions(+), 24 deletions(-) rename man/{generate_trees.Rd => convertAlignment2Trees.Rd} (56%) rename man/{convert_fa2tre.Rd => convertFA2Tree.Rd} (73%) rename man/{generate_fa2tre.Rd => createFA2Tree.Rd} (89%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..49c9d02f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,11 +25,13 @@ export(cleanup_species) export(combine_files) export(combine_full) export(combine_ipr) +export(convertAlignment2Trees) +export(convertFA2Tree) export(convert_aln2fa) -export(convert_fa2tre) export(count_bycol) export(count_to_sunburst) export(count_to_treemap) +export(createFA2Tree) export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) @@ -42,9 +44,7 @@ export(find_top_acc) export(format_job_args) export(gc_undirected_network) export(generate_all_aln2fa) -export(generate_fa2tre) export(generate_msa) -export(generate_trees) export(get_accnums_from_fasta_file) export(get_job_message) export(get_proc_medians) diff --git a/R/tree.R b/R/tree.R index 01e9ead5..8eb641d9 100755 --- a/R/tree.R +++ b/R/tree.R @@ -1,6 +1,6 @@ ## Generating Phylogenetic Trees from Alignment Fasta files ## Includes the following functions: -## generate_trees, convert_fa2tre, generate_fa2tre +## convertAlignment2Trees, convertFA2Tree, createFA2Tree ## Modified: Jan, 2020 ## Janani Ravi (@jananiravi), Molecular Ecologist (@molecologist) @@ -35,7 +35,7 @@ ## Approach 0 | FastTree2.0 ########################### ## !! FastTree will only work if there are unique sequence names!! -#' convert_fa2tre +#' convertFA2Tree #' #' @param fa_path #' @param tre_path @@ -45,7 +45,7 @@ #' @export #' #' @examples -convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"), +convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"), tre_path = here("data/alns/pspa_snf7.tre"), fasttree_path = here("src/FastTree")) { # fa_path=here("data/alns/pspa_snf7.fa") @@ -67,7 +67,7 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"), # here("src/FastTree.c"), "-lm", collapse=" ")) } ## Generate Trees for ALL fasta files in "data/alns" -#' generate_trees +#' convertAlignment2Trees #' #' @description #' Generate Trees for ALL fasta files in "data/alns" @@ -82,7 +82,7 @@ convert_fa2tre <- function(fa_path = here("data/alns/pspa_snf7.fa"), #' @export #' #' @examples -generate_trees <- function(aln_path = here("data/alns/")) { +convertAlignment2Trees <- function(aln_path = here("data/alns/")) { # finding all fasta alignment files fa_filenames <- list.files(path = aln_path, pattern = "*.fa") fa_paths <- paste0(aln_path, fa_filenames) @@ -96,7 +96,7 @@ generate_trees <- function(aln_path = here("data/alns/")) { tre_path = paste0(aln_path, variable, ".tre") ) pmap( - .l = fa2tre_args, .f = convert_fa2tre, + .l = fa2tre_args, .f = convertFA2Tree, fasttree_path = here("src/FastTree") ) } @@ -104,7 +104,7 @@ generate_trees <- function(aln_path = here("data/alns/")) { ############################## ## REFS: 1-4 ############ -#' generate_fa2tre +#' createFA2Tree #' #' @author Janani Ravi, MolEcologist #' @keywords phylogenetic tree, alignment, fasta @@ -134,7 +134,7 @@ generate_trees <- function(aln_path = here("data/alns/")) { #' \dontrun{ #' generate_aln2tree("pspa_snf7.fa") #' } -generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa", +createFA2Tree <- function(fa_file = "data/alns/pspa_snf7.fa", out_file = "data/alns/pspa_snf7.tre") { ## SAMPLE ARGS # fa_file="data/alns/pspa_snf7.fa" diff --git a/man/generate_trees.Rd b/man/convertAlignment2Trees.Rd similarity index 56% rename from man/generate_trees.Rd rename to man/convertAlignment2Trees.Rd index 43bd7243..002f5203 100644 --- a/man/generate_trees.Rd +++ b/man/convertAlignment2Trees.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tree.R -\name{generate_trees} -\alias{generate_trees} -\title{generate_trees} +\name{convertAlignment2Trees} +\alias{convertAlignment2Trees} +\title{convertAlignment2Trees} \usage{ -generate_trees(aln_path = here("data/alns/")) +convertAlignment2Trees(aln_path = here("data/alns/")) } \arguments{ \item{aln_path}{} diff --git a/man/convert_fa2tre.Rd b/man/convertFA2Tree.Rd similarity index 73% rename from man/convert_fa2tre.Rd rename to man/convertFA2Tree.Rd index 87c59d67..b2fb93de 100644 --- a/man/convert_fa2tre.Rd +++ b/man/convertFA2Tree.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tree.R -\name{convert_fa2tre} -\alias{convert_fa2tre} -\title{convert_fa2tre} +\name{convertFA2Tree} +\alias{convertFA2Tree} +\title{convertFA2Tree} \usage{ -convert_fa2tre( +convertFA2Tree( fa_path = here("data/alns/pspa_snf7.fa"), tre_path = here("data/alns/pspa_snf7.tre"), fasttree_path = here("src/FastTree") @@ -14,5 +14,5 @@ convert_fa2tre( \item{fasttree_path}{} } \description{ -convert_fa2tre +convertFA2Tree } diff --git a/man/generate_fa2tre.Rd b/man/createFA2Tree.Rd similarity index 89% rename from man/generate_fa2tre.Rd rename to man/createFA2Tree.Rd index b70848bb..76da7807 100644 --- a/man/generate_fa2tre.Rd +++ b/man/createFA2Tree.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tree.R -\name{generate_fa2tre} -\alias{generate_fa2tre} -\title{generate_fa2tre} +\name{createFA2Tree} +\alias{createFA2Tree} +\title{createFA2Tree} \usage{ -generate_fa2tre( +createFA2Tree( fa_file = "data/alns/pspa_snf7.fa", out_file = "data/alns/pspa_snf7.tre" ) From 0e8c24536070806557f7139dfd08b072e2bada5d Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 10:17:08 +0100 Subject: [PATCH 11/33] update namespace file with roxygen2 --- NAMESPACE | 8 +++++ man/acc2fa.Rd | 13 ++------ man/addLeaves2Alignment.Rd | 50 ++++++++++++++++++++++++++++++ man/addName.Rd | 39 +++++++++++++++++++++++ man/add_leaves.Rd | 21 +------------ man/add_name.Rd | 18 ++--------- man/convert2TitleCase.Rd | 24 ++++++++++++++ man/convertAlignment2FA.Rd | 55 +++++++++++++++++++++++++++++++++ man/convert_aln2fa.Rd | 21 +------------ man/generateAllAlignments2FA.Rd | 45 +++++++++++++++++++++++++++ man/generate_all_aln2fa.Rd | 18 +---------- man/mapAcc2Name.Rd | 21 +++++++++++++ man/map_acc2name.Rd | 6 +--- man/renameFA.Rd | 20 ++++++++++++ man/rename_fasta.Rd | 6 +--- man/to_titlecase.Rd | 10 ++---- 16 files changed, 273 insertions(+), 102 deletions(-) create mode 100644 man/addLeaves2Alignment.Rd create mode 100644 man/addName.Rd create mode 100644 man/convert2TitleCase.Rd create mode 100644 man/convertAlignment2FA.Rd create mode 100644 man/generateAllAlignments2FA.Rd create mode 100644 man/mapAcc2Name.Rd create mode 100644 man/renameFA.Rd diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..94423e66 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,8 +6,11 @@ export(GCA2lin) export(GenContextNetwork) export(LineagePlot) export(RepresentativeAccNums) +export(acc2FA) export(acc2fa) export(acc2lin) +export(addLeaves2Alignment) +export(addName) export(add_leaves) export(add_lins) export(add_name) @@ -25,6 +28,8 @@ export(cleanup_species) export(combine_files) export(combine_full) export(combine_ipr) +export(convert2TitleCase) +export(convertAlignment2FA) export(convert_aln2fa) export(convert_fa2tre) export(count_bycol) @@ -41,6 +46,7 @@ export(find_paralogs) export(find_top_acc) export(format_job_args) export(gc_undirected_network) +export(generateAllAlignments2FA) export(generate_all_aln2fa) export(generate_fa2tre) export(generate_msa) @@ -59,6 +65,7 @@ export(lineage.neighbors.plot) export(lineage_sunburst) export(make_job_results_url) export(make_opts2procs) +export(mapAcc2Name) export(map_acc2name) export(map_advanced_opts2procs) export(msa_pdf) @@ -69,6 +76,7 @@ export(prot2tax_old) export(remove_astrk) export(remove_empty) export(remove_tails) +export(renameFA) export(rename_fasta) export(repeat2s) export(replaceQMs) diff --git a/man/acc2fa.Rd b/man/acc2fa.Rd index d4e4ee71..158b2d51 100644 --- a/man/acc2fa.Rd +++ b/man/acc2fa.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{acc2fa} \alias{acc2fa} -\title{acc2fa converts protein accession numbers to a fasta format.} +\title{acc2fa} \usage{ -acc2fa(accessions, outpath, plan = "sequential") - acc2fa(accessions, outpath, plan = "sequential") } \arguments{ @@ -17,8 +15,6 @@ Function may not work for vectors of length > 10,000} \item{plan}{} } \description{ -Resulting fasta file is written to the outpath. - acc2fa converts protein accession numbers to a fasta format. Resulting fasta file is written to the outpath. } @@ -28,11 +24,6 @@ acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_p Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa") EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa") } -\dontrun{ -acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta") -Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa") -EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa") -} } \author{ Samuel Chen, Janani Ravi diff --git a/man/addLeaves2Alignment.Rd b/man/addLeaves2Alignment.Rd new file mode 100644 index 00000000..a758ebd5 --- /dev/null +++ b/man/addLeaves2Alignment.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{addLeaves2Alignment} +\alias{addLeaves2Alignment} +\title{Adding Leaves to an alignment file w/ accessions} +\usage{ +addLeaves2Alignment( + aln_file = "", + lin_file = "data/rawdata_tsv/all_semiclean.txt", + reduced = FALSE +) +} +\arguments{ +\item{aln_file}{haracter. Path to file. Input tab-delimited file + +alignment file accnum & alignment. +Default is 'pspa_snf7.aln'} + +\item{lin_file}{Character. Path to file. Protein file with accession + +number to lineage mapping. +Default is 'pspa.txt'} + +\item{reduced}{Boolean. If TRUE, a reduced data frame will be generated with +only one sequence per lineage. Default is FALSE.} +} +\description{ +Adding Leaves to an alignment file w/ accessions +Genomic Contexts vs Domain Architectures. +} +\details{ +The alignment file would need two columns: 1. accession + +number and 2. alignment. The protein homolog accession to lineage mapping + +file should have +} +\note{ +Please refer to the source code if you have alternate + +file formats and/or column names. +} +\examples{ +\dontrun{ +addLeaves2Alignment("pspa_snf7.aln", "pspa.txt") +} +} +\author{ +Janani Ravi +} +\keyword{accnum,} +\keyword{alignment,} +\keyword{leaves,} +\keyword{lineage,} +\keyword{species} diff --git a/man/addName.Rd b/man/addName.Rd new file mode 100644 index 00000000..e04f9849 --- /dev/null +++ b/man/addName.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{addName} +\alias{addName} +\title{Add Name} +\usage{ +addName( + data, + accnum_col = "AccNum", + spec_col = "Species", + lin_col = "Lineage", + lin_sep = ">", + out_col = "Name" +) +} +\arguments{ +\item{data}{Data to add name column to} + +\item{accnum_col}{Column containing accession numbers} + +\item{spec_col}{Column containing species} + +\item{lin_col}{Column containing lineage} + +\item{lin_sep}{Character separating lineage levels} + +\item{out_col}{Column that contains the new 'Name' derived from Species, +Lineage, and AccNum info} +} +\value{ +Original data with a 'Name' column +} +\description{ +This function adds a new 'Name' column that is comprised of components from +Kingdom, Phylum, Genus, and species, as well as the accession +} +\author{ +Samuel Chen, Janani Ravi +} diff --git a/man/add_leaves.Rd b/man/add_leaves.Rd index 6d3b3f91..f1eeed10 100644 --- a/man/add_leaves.Rd +++ b/man/add_leaves.Rd @@ -1,15 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{add_leaves} \alias{add_leaves} \title{Adding Leaves to an alignment file w/ accessions} \usage{ -add_leaves( - aln_file = "", - lin_file = "data/rawdata_tsv/all_semiclean.txt", - reduced = FALSE -) - add_leaves( aln_file = "", lin_file = "data/rawdata_tsv/all_semiclean.txt", @@ -29,25 +23,15 @@ Default is 'pspa.txt'} only one sequence per lineage. Default is FALSE.} } \description{ -Adding Leaves to an alignment file w/ accessions -Genomic Contexts vs Domain Architectures. - Adding Leaves to an alignment file w/ accessions Genomic Contexts vs Domain Architectures. } \details{ -The alignment file would need two columns: 1. accession + -number and 2. alignment. The protein homolog accession to lineage mapping + -file should have - The alignment file would need two columns: 1. accession + number and 2. alignment. The protein homolog accession to lineage mapping + file should have } \note{ -Please refer to the source code if you have alternate + -file formats and/or column names. - Please refer to the source code if you have alternate + file formats and/or column names. } @@ -55,9 +39,6 @@ file formats and/or column names. \dontrun{ add_leaves("pspa_snf7.aln", "pspa.txt") } -\dontrun{ -add_leaves("pspa_snf7.aln", "pspa.txt") -} } \author{ Janani Ravi diff --git a/man/add_name.Rd b/man/add_name.Rd index c4fce392..f19139e1 100644 --- a/man/add_name.Rd +++ b/man/add_name.Rd @@ -1,18 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{add_name} \alias{add_name} -\title{Add Name} +\title{Title} \usage{ -add_name( - data, - accnum_col = "AccNum", - spec_col = "Species", - lin_col = "Lineage", - lin_sep = ">", - out_col = "Name" -) - add_name( data, accnum_col = "AccNum", @@ -37,14 +28,9 @@ add_name( Lineage, and AccNum info} } \value{ -Original data with a 'Name' column - Original data with a 'Name' column } \description{ -This function adds a new 'Name' column that is comprised of components from -Kingdom, Phylum, Genus, and species, as well as the accession - This function adds a new 'Name' column that is comprised of components from Kingdom, Phylum, Genus, and species, as well as the accession } diff --git a/man/convert2TitleCase.Rd b/man/convert2TitleCase.Rd new file mode 100644 index 00000000..84e7fa00 --- /dev/null +++ b/man/convert2TitleCase.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{convert2TitleCase} +\alias{convert2TitleCase} +\alias{totitle,} +\alias{to_title} +\title{Changing case to 'Title Case'} +\usage{ +convert2TitleCase(text, delimitter) +} +\arguments{ +\item{x}{Character vector.} + +\item{y}{Delimitter. Default is space (" ").} +} +\description{ +Translate string to Title Case w/ delimitter. +} +\seealso{ +chartr, toupper, and tolower. +} +\author{ +Andrie, Janani Ravi +} diff --git a/man/convertAlignment2FA.Rd b/man/convertAlignment2FA.Rd new file mode 100644 index 00000000..d6b4dc56 --- /dev/null +++ b/man/convertAlignment2FA.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{convertAlignment2FA} +\alias{convertAlignment2FA} +\title{Adding Leaves to an alignment file w/ accessions} +\usage{ +convertAlignment2FA( + aln_file = "", + lin_file = "data/rawdata_tsv/all_semiclean.txt", + fa_outpath = "", + reduced = FALSE +) +} +\arguments{ +\item{aln_file}{Character. Path to file. Input tab-delimited file + +alignment file accnum & alignment. +Default is 'pspa_snf7.aln'} + +\item{lin_file}{Character. Path to file. Protein file with accession + +number to lineage mapping. +Default is 'pspa.txt'} + +\item{fa_outpath}{Character. Path to the written fasta file. +Default is 'NULL'} + +\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage. +Default is 'FALSE'} +} +\description{ +Adding Leaves to an alignment file w/ accessions +Genomic Contexts vs Domain Architectures. +} +\details{ +The alignment file would need two columns: 1. accession + +number and 2. alignment. The protein homolog accession to lineage mapping + +file should have +} +\note{ +Please refer to the source code if you have alternate + +file formats and/or column names. +} +\examples{ +\dontrun{ +addLeaves2Alignment("pspa_snf7.aln", "pspa.txt") +} + +} +\author{ +Janani Ravi +} +\keyword{accnum,} +\keyword{alignment,} +\keyword{leaves,} +\keyword{lineage,} +\keyword{species} diff --git a/man/convert_aln2fa.Rd b/man/convert_aln2fa.Rd index 3e9812df..8bebe31d 100644 --- a/man/convert_aln2fa.Rd +++ b/man/convert_aln2fa.Rd @@ -1,16 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{convert_aln2fa} \alias{convert_aln2fa} \title{Adding Leaves to an alignment file w/ accessions} \usage{ -convert_aln2fa( - aln_file = "", - lin_file = "data/rawdata_tsv/all_semiclean.txt", - fa_outpath = "", - reduced = FALSE -) - convert_aln2fa( aln_file = "", lin_file = "data/rawdata_tsv/all_semiclean.txt", @@ -35,29 +28,17 @@ Default is 'FALSE'} } \description{ Adding Leaves to an alignment file w/ accessions -Genomic Contexts vs Domain Architectures. } \details{ -The alignment file would need two columns: 1. accession + -number and 2. alignment. The protein homolog accession to lineage mapping + -file should have - The alignment file would need two columns: 1. accession + number and 2. alignment. The protein homolog accession to lineage mapping + file should have } \note{ -Please refer to the source code if you have alternate + -file formats and/or column names. - Please refer to the source code if you have alternate + file formats and/or column names. } \examples{ -\dontrun{ -add_leaves("pspa_snf7.aln", "pspa.txt") -} - \dontrun{ add_leaves("pspa_snf7.aln", "pspa.txt") } diff --git a/man/generateAllAlignments2FA.Rd b/man/generateAllAlignments2FA.Rd new file mode 100644 index 00000000..3bf9938a --- /dev/null +++ b/man/generateAllAlignments2FA.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{generateAllAlignments2FA} +\alias{generateAllAlignments2FA} +\title{Adding Leaves to an alignment file w/ accessions} +\usage{ +generateAllAlignments2FA( + aln_path = here("data/rawdata_aln/"), + fa_outpath = here("data/alns/"), + lin_file = here("data/rawdata_tsv/all_semiclean.txt"), + reduced = F +) +} +\arguments{ +\item{aln_path}{Character. Path to alignment files. +Default is 'here("data/rawdata_aln/")'} + +\item{fa_outpath}{Character. Path to file. Master protein file with AccNum & lineages. +Default is 'here("data/rawdata_tsv/all_semiclean.txt")'} + +\item{lin_file}{Character. Path to the written fasta file. +Default is 'here("data/alns/")'.} + +\item{reduced}{Boolean. If TRUE, the fasta file will contain only one sequence per lineage. +Default is 'FALSE'.} +} +\description{ +Adding Leaves to all alignment files w/ accessions & DAs? +} +\details{ +The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages. +} +\note{ +Please refer to the source code if you have alternate + file formats and/or column names. +} +\examples{ +\dontrun{ +generateAllAlignments2FA() +} +} +\keyword{accnum,} +\keyword{alignment,} +\keyword{leaves,} +\keyword{lineage,} +\keyword{species} diff --git a/man/generate_all_aln2fa.Rd b/man/generate_all_aln2fa.Rd index 7f99c981..ad6b7136 100644 --- a/man/generate_all_aln2fa.Rd +++ b/man/generate_all_aln2fa.Rd @@ -1,16 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{generate_all_aln2fa} \alias{generate_all_aln2fa} \title{Adding Leaves to an alignment file w/ accessions} \usage{ -generate_all_aln2fa( - aln_path = here("data/rawdata_aln/"), - fa_outpath = here("data/alns/"), - lin_file = here("data/rawdata_tsv/all_semiclean.txt"), - reduced = F -) - generate_all_aln2fa( aln_path = here("data/rawdata_aln/"), fa_outpath = here("data/alns/"), @@ -32,27 +25,18 @@ Default is 'here("data/rawdata_tsv/all_semiclean.txt")'} Default is 'FALSE'.} } \description{ -Adding Leaves to all alignment files w/ accessions & DAs? - Adding Leaves to all alignment files w/ accessions & DAs? } \details{ -The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages. - The alignment files would need two columns separated by spaces: 1. AccNum and 2. alignment. The protein homolog file should have AccNum, Species, Lineages. } \note{ -Please refer to the source code if you have alternate + file formats and/or column names. - Please refer to the source code if you have alternate + file formats and/or column names. } \examples{ \dontrun{ generate_all_aln2fa() } -\dontrun{ -generate_all_aln2fa() -} } \author{ Janani Ravi diff --git a/man/mapAcc2Name.Rd b/man/mapAcc2Name.Rd new file mode 100644 index 00000000..0f5d447d --- /dev/null +++ b/man/mapAcc2Name.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{mapAcc2Name} +\alias{mapAcc2Name} +\title{Default renameFA() replacement function. Maps an accession number to its name} +\usage{ +mapAcc2Name(line, acc2name, acc_col = "AccNum", name_col = "Name") +} +\arguments{ +\item{line}{The line of a fasta file starting with '>'} + +\item{acc2name}{Data Table containing a column of accession numbers and a name column} + +\item{acc_col}{Name of the column containing Accession numbers} + +\item{name_col}{Name of the column containing the names that the accession numbers +are mapped to} +} +\description{ +Default renameFA() replacement function. Maps an accession number to its name +} diff --git a/man/map_acc2name.Rd b/man/map_acc2name.Rd index 846145ee..fcdb3023 100644 --- a/man/map_acc2name.Rd +++ b/man/map_acc2name.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{map_acc2name} \alias{map_acc2name} \title{Default rename_fasta() replacement function. Maps an accession number to its name} \usage{ -map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name") - map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name") } \arguments{ @@ -19,7 +17,5 @@ map_acc2name(line, acc2name, acc_col = "AccNum", name_col = "Name") are mapped to} } \description{ -Default rename_fasta() replacement function. Maps an accession number to its name - Default rename_fasta() replacement function. Maps an accession number to its name } diff --git a/man/renameFA.Rd b/man/renameFA.Rd new file mode 100644 index 00000000..7b6fd579 --- /dev/null +++ b/man/renameFA.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CHANGED-pre-msa-tree.R +\name{renameFA} +\alias{renameFA} +\title{Rename the labels of fasta files} +\usage{ +renameFA(fa_path, outpath, replacement_function = mapAcc2Name, ...) +} +\arguments{ +\item{fa_path}{Path to fasta file} + +\item{outpath}{Path to write altered fasta file to} + +\item{replacement_function}{Function to apply to lines starting with '>'} + +\item{...}{Additional arguments to pass to replacement_function} +} +\description{ +Rename the labels of fasta files +} diff --git a/man/rename_fasta.Rd b/man/rename_fasta.Rd index 120b65e8..6b4e5dd7 100644 --- a/man/rename_fasta.Rd +++ b/man/rename_fasta.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{rename_fasta} \alias{rename_fasta} \title{Rename the labels of fasta files} \usage{ -rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...) - rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...) } \arguments{ @@ -18,7 +16,5 @@ rename_fasta(fa_path, outpath, replacement_function = map_acc2name, ...) \item{...}{Additional arguments to pass to replacement_function} } \description{ -Rename the labels of fasta files - Rename the labels of fasta files } diff --git a/man/to_titlecase.Rd b/man/to_titlecase.Rd index dc093a97..45139d3b 100644 --- a/man/to_titlecase.Rd +++ b/man/to_titlecase.Rd @@ -1,13 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CHANGED-pre-msa-tree.R, R/pre-msa-tree.R +% Please edit documentation in R/pre-msa-tree.R \name{to_titlecase} \alias{to_titlecase} \alias{totitle,} \alias{to_title} -\title{Changing case to 'Title Case'} +\title{To Titlecase} \usage{ -to_titlecase(text, delimitter) - to_titlecase(text, delimitter) } \arguments{ @@ -16,14 +14,10 @@ to_titlecase(text, delimitter) \item{y}{Delimitter. Default is space (" ").} } \description{ -Translate string to Title Case w/ delimitter. - Translate string to Title Case w/ delimitter. Changing case to 'Title Case' } \seealso{ -chartr, toupper, and tolower. - chartr, toupper, and tolower. } \author{ From 950e6b6cc4c519561011baf6e79b4673e323456d Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 11:30:11 +0100 Subject: [PATCH 12/33] refactor function names in R/plotme.R --- NAMESPACE | 10 ++--- R/plotme.R | 44 +++++++++---------- man/create_all_col_params.Rd | 14 ------ man/create_one_col_params.Rd | 14 ------ man/{count_to_sunburst.Rd => plotSunburst.Rd} | 29 +++++------- man/prepareColumnParams.Rd | 14 ++++++ man/prepareSingleColumnParams.Rd | 14 ++++++ ...{assert_count_df.Rd => validateCountDF.Rd} | 10 ++--- 8 files changed, 72 insertions(+), 77 deletions(-) delete mode 100644 man/create_all_col_params.Rd delete mode 100644 man/create_one_col_params.Rd rename man/{count_to_sunburst.Rd => plotSunburst.Rd} (61%) create mode 100644 man/prepareColumnParams.Rd create mode 100644 man/prepareSingleColumnParams.Rd rename man/{assert_count_df.Rd => validateCountDF.Rd} (56%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..1a5f4e4f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export(add_name) export(add_tax) export(advanced_opts2est_walltime) export(alignFasta) -export(assert_count_df) export(assign_job_queue) export(cleanup_GeneDesc) export(cleanup_clust) @@ -28,11 +27,7 @@ export(combine_ipr) export(convert_aln2fa) export(convert_fa2tre) export(count_bycol) -export(count_to_sunburst) -export(count_to_treemap) -export(create_all_col_params) export(create_lineage_lookup) -export(create_one_col_params) export(domain_network) export(efetch_ipg) export(filter_by_doms) @@ -63,7 +58,11 @@ export(map_acc2name) export(map_advanced_opts2procs) export(msa_pdf) export(pick_longer_duplicate) +export(plotSunburst) +export(plotTreemap) export(plot_estimated_walltimes) +export(prepareColumnParams) +export(prepareSingleColumnParams) export(prot2tax) export(prot2tax_old) export(remove_astrk) @@ -91,6 +90,7 @@ export(theme_genes2) export(to_titlecase) export(total_counts) export(upset.plot) +export(validateCountDF) export(wordcloud2_element) export(wordcloud3) export(wordcloud_element) diff --git a/R/plotme.R b/R/plotme.R index ba9de53e..6cbeb3d0 100644 --- a/R/plotme.R +++ b/R/plotme.R @@ -1,11 +1,11 @@ -# Taken from https://github.com/yogevherz/plotme/blob/master/R/count_to_sunburst_treemap.R +# Taken from https://github.com/yogevherz/plotme/blob/master/R/plotSunburst_treemap.R #' Create an interactive plotly from count data #' #' @description #' These functions help you quickly create interactive hierarchical plots #' from categorical data. They expect the summary of the data created by -#' `dplyr::count()` and produce either a sunburst plot (`count_to_sunburst()`) or -#' a treemap plot (`count_to_treemap()`) +#' `dplyr::count()` and produce either a sunburst plot (`plotSunburst()`) or +#' a treemap plot (`plotTreemap()`) #' #' @param count_data An output of dplyr::count(), tibble or data frame #' @param fill_by_n If TRUE, uses a continuous scale to fill plot by group size @@ -20,21 +20,21 @@ #' starwars_count <- count(starwars, species, eye_color, name) #' #' # sunburst plot -#' count_to_sunburst(starwars_count) +#' plotSunburst(starwars_count) #' #' # fill by group size -#' count_to_sunburst(starwars_count, fill_by_n = TRUE) +#' plotSunburst(starwars_count, fill_by_n = TRUE) #' #' # treemap plot, ordered by group size -#' count_to_treemap(starwars_count, sort_by_n = TRUE) +#' plotTreemap(starwars_count, sort_by_n = TRUE) #' #' # display al charchaters by homeworld #' starwars %>% #' count(homeworld, name) %>% -#' count_to_treemap(sort_by_n = TRUE) +#' plotTreemap(sort_by_n = TRUE) #' -count_to_sunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2) { - params <- create_all_col_params(count_data, fill_by_n, sort_by_n) +plotSunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2) { + params <- prepareColumnParams(count_data, fill_by_n, sort_by_n) purrr::exec(plotly::plot_ly, !!!params, @@ -53,9 +53,9 @@ count_to_sunburst <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE, #' @importFrom purrr exec #' #' @export -#' @rdname count_to_sunburst -count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) { - params <- create_all_col_params(count_data, fill_by_n, sort_by_n) +#' @rdname plotSunburst +plotTreemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) { + params <- prepareColumnParams(count_data, fill_by_n, sort_by_n) purrr::exec(plotly::plot_ly, !!!params, @@ -66,7 +66,7 @@ count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) { } -#' create_all_col_params +#' prepareColumnParams #' #' @param count_data #' @param fill_by_n @@ -80,8 +80,8 @@ count_to_treemap <- function(count_data, fill_by_n = FALSE, sort_by_n = FALSE) { #' @export #' #' @examples -create_all_col_params <- function(count_data, fill_by_n, sort_by_n) { - assert_count_df(count_data) +prepareColumnParams <- function(count_data, fill_by_n, sort_by_n) { + validateCountDF(count_data) assertthat::assert_that(is.logical(fill_by_n), length(fill_by_n) == 1, msg = "fill_by_n must be either TRUE or FALSE" @@ -91,12 +91,12 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) { msg = "sort_by_n must be either TRUE or FALSE" ) - count_data <- all_non_n_cols_to_char(count_data) + count_data <- .all_non_n_cols_to_char(count_data) category_num <- ncol(count_data) - 1 params <- purrr::map(1:category_num, - create_one_col_params, + prepareSingleColumnParams, df = count_data, root = "" ) %>% @@ -114,7 +114,7 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) { params } -#' create_one_col_params +#' prepareSingleColumnParams #' #' @param df #' @param col_num @@ -127,7 +127,7 @@ create_all_col_params <- function(count_data, fill_by_n, sort_by_n) { #' @export #' #' @examples -create_one_col_params <- function(df, +prepareSingleColumnParams <- function(df, col_num, root) { col_name <- names(df)[col_num] @@ -156,7 +156,7 @@ create_one_col_params <- function(df, ) %>% dplyr::select(ids, parents, labels, values, hovertext) } -#' assert_count_df +#' validateCountDF #' #' @param var #' @@ -167,7 +167,7 @@ create_one_col_params <- function(df, #' @export #' #' @examples -assert_count_df <- function(var) { +validateCountDF <- function(var) { msg <- paste(substitute(var), "must be a count dataframe (output of dplyr::count)") assertthat::assert_that(is.data.frame(var), assertthat::has_name(var, "n"), @@ -178,7 +178,7 @@ assert_count_df <- function(var) { assertthat::assert_that(is.numeric(n_col), msg = msg) } -all_non_n_cols_to_char <- function(df) { +.all_non_n_cols_to_char <- function(df) { df %>% dplyr::mutate(dplyr::across(!matches("^n$"), as.character)) } diff --git a/man/create_all_col_params.Rd b/man/create_all_col_params.Rd deleted file mode 100644 index 5234dfeb..00000000 --- a/man/create_all_col_params.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotme.R -\name{create_all_col_params} -\alias{create_all_col_params} -\title{create_all_col_params} -\usage{ -create_all_col_params(count_data, fill_by_n, sort_by_n) -} -\arguments{ -\item{sort_by_n}{} -} -\description{ -create_all_col_params -} diff --git a/man/create_one_col_params.Rd b/man/create_one_col_params.Rd deleted file mode 100644 index 0e3cdd10..00000000 --- a/man/create_one_col_params.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotme.R -\name{create_one_col_params} -\alias{create_one_col_params} -\title{create_one_col_params} -\usage{ -create_one_col_params(df, col_num, root) -} -\arguments{ -\item{root}{} -} -\description{ -create_one_col_params -} diff --git a/man/count_to_sunburst.Rd b/man/plotSunburst.Rd similarity index 61% rename from man/count_to_sunburst.Rd rename to man/plotSunburst.Rd index ae292772..5ee465a6 100644 --- a/man/count_to_sunburst.Rd +++ b/man/plotSunburst.Rd @@ -1,18 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotme.R -\name{count_to_sunburst} -\alias{count_to_sunburst} -\alias{count_to_treemap} +\name{plotSunburst} +\alias{plotSunburst} +\alias{plotTreemap} \title{Create an interactive plotly from count data} \usage{ -count_to_sunburst( - count_data, - fill_by_n = FALSE, - sort_by_n = FALSE, - maxdepth = 2 -) - -count_to_treemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE) +plotSunburst(count_data, fill_by_n = FALSE, sort_by_n = FALSE, maxdepth = 2) + +plotTreemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE) } \arguments{ \item{count_data}{} @@ -24,25 +19,25 @@ count_to_treemap(count_data, fill_by_n = FALSE, sort_by_n = FALSE) \description{ These functions help you quickly create interactive hierarchical plots from categorical data. They expect the summary of the data created by -\code{dplyr::count()} and produce either a sunburst plot (\code{count_to_sunburst()}) or -a treemap plot (\code{count_to_treemap()}) +\code{dplyr::count()} and produce either a sunburst plot (\code{plotSunburst()}) or +a treemap plot (\code{plotTreemap()}) } \examples{ library(dplyr) starwars_count <- count(starwars, species, eye_color, name) # sunburst plot -count_to_sunburst(starwars_count) +plotSunburst(starwars_count) # fill by group size -count_to_sunburst(starwars_count, fill_by_n = TRUE) +plotSunburst(starwars_count, fill_by_n = TRUE) # treemap plot, ordered by group size -count_to_treemap(starwars_count, sort_by_n = TRUE) +plotTreemap(starwars_count, sort_by_n = TRUE) # display al charchaters by homeworld starwars \%>\% count(homeworld, name) \%>\% - count_to_treemap(sort_by_n = TRUE) + plotTreemap(sort_by_n = TRUE) } diff --git a/man/prepareColumnParams.Rd b/man/prepareColumnParams.Rd new file mode 100644 index 00000000..bb0b9a29 --- /dev/null +++ b/man/prepareColumnParams.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotme.R +\name{prepareColumnParams} +\alias{prepareColumnParams} +\title{prepareColumnParams} +\usage{ +prepareColumnParams(count_data, fill_by_n, sort_by_n) +} +\arguments{ +\item{sort_by_n}{} +} +\description{ +prepareColumnParams +} diff --git a/man/prepareSingleColumnParams.Rd b/man/prepareSingleColumnParams.Rd new file mode 100644 index 00000000..d823852b --- /dev/null +++ b/man/prepareSingleColumnParams.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotme.R +\name{prepareSingleColumnParams} +\alias{prepareSingleColumnParams} +\title{prepareSingleColumnParams} +\usage{ +prepareSingleColumnParams(df, col_num, root) +} +\arguments{ +\item{root}{} +} +\description{ +prepareSingleColumnParams +} diff --git a/man/assert_count_df.Rd b/man/validateCountDF.Rd similarity index 56% rename from man/assert_count_df.Rd rename to man/validateCountDF.Rd index 3591d692..fc4aefa2 100644 --- a/man/assert_count_df.Rd +++ b/man/validateCountDF.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotme.R -\name{assert_count_df} -\alias{assert_count_df} -\title{assert_count_df} +\name{validateCountDF} +\alias{validateCountDF} +\title{validateCountDF} \usage{ -assert_count_df(var) +validateCountDF(var) } \arguments{ \item{var}{} } \description{ -assert_count_df +validateCountDF } From 7e2454330665f6dff0fa74d993dcc3bbc9523eca Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 13:37:45 +0100 Subject: [PATCH 13/33] refactor function names in R/lineage.R --- NAMESPACE | 10 +++--- R/lineage.R | 32 +++++++++---------- man/DownloadAssemblySummary.Rd | 22 ------------- man/{GCA2lin.Rd => GCA2Lineage.Rd} | 8 ++--- man/acc2lin.Rd | 2 +- man/{add_tax.Rd => addTaxID.Rd} | 10 +++--- man/prot2tax.Rd | 14 -------- man/proteinAcc2TaxID.Rd | 14 ++++++++ ...rot2tax_old.Rd => proteinAcc2TaxID_old.Rd} | 8 ++--- 9 files changed, 49 insertions(+), 71 deletions(-) delete mode 100644 man/DownloadAssemblySummary.Rd rename man/{GCA2lin.Rd => GCA2Lineage.Rd} (88%) rename man/{add_tax.Rd => addTaxID.Rd} (57%) delete mode 100644 man/prot2tax.Rd create mode 100644 man/proteinAcc2TaxID.Rd rename man/{prot2tax_old.Rd => proteinAcc2TaxID_old.Rd} (75%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..17738278 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,17 +1,16 @@ # Generated by roxygen2: do not edit by hand export(BinaryDomainNetwork) -export(DownloadAssemblySummary) -export(GCA2lin) +export(GCA2Lineage) export(GenContextNetwork) export(LineagePlot) export(RepresentativeAccNums) export(acc2fa) export(acc2lin) +export(addTaxID) export(add_leaves) export(add_lins) export(add_name) -export(add_tax) export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) @@ -34,6 +33,7 @@ export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) export(domain_network) +export(downloadAssemblySummary) export(efetch_ipg) export(filter_by_doms) export(filter_freq) @@ -64,8 +64,8 @@ export(map_advanced_opts2procs) export(msa_pdf) export(pick_longer_duplicate) export(plot_estimated_walltimes) -export(prot2tax) -export(prot2tax_old) +export(proteinAcc2TaxID) +export(proteinAcc2TaxID_old) export(remove_astrk) export(remove_empty) export(remove_tails) diff --git a/R/lineage.R b/R/lineage.R index 20acec04..f034739b 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -22,7 +22,7 @@ #' @export #' #' @examples -DownloadAssemblySummary <- function(outpath, +downloadAssemblySummary <- function(outpath, keep = c( "assembly_accession", "taxid", "species_taxid", "organism_name" @@ -74,7 +74,7 @@ DownloadAssemblySummary <- function(outpath, #' #' @param prot_data Dataframe containing a column `GCA_ID` #' @param assembly_path String of the path to the assembly_summary path -#' This file can be generated using the "DownloadAssemblySummary()" function +#' This file can be generated using the "downloadAssemblySummary()" function #' @param lineagelookup_path String of the path to the lineage lookup file #' (taxid to lineage mapping). This file can be generated using the #' "create_lineage_lookup()" function @@ -87,7 +87,7 @@ DownloadAssemblySummary <- function(outpath, #' @export #' #' @examples -GCA2lin <- function(prot_data, +GCA2Lineage <- function(prot_data, assembly_path = "/data/research/jravilab/common_data/assembly_summary_genbank.txt", lineagelookup_path = "/data/research/jravilab/common_data/lineage_lookup.tsv", acc_col = "AccNum") { @@ -189,7 +189,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' #' @param accessions Character vector of protein accessions #' @param assembly_path String of the path to the assembly_summary path -#' This file can be generated using the "DownloadAssemblySummary()" function +#' This file can be generated using the "downloadAssemblySummary()" function #' @param lineagelookup_path String of the path to the lineage lookup file #' (taxid to lineage mapping). This file can be generated using the #' @param ipgout_path Path to write the results of the efetch run of the accessions @@ -353,25 +353,25 @@ ipg2lin <- function(accessions, ipg_file, refseq_rows <- refseq_rows[which(refseq_rows != 0)] genbank_rows <- genbank_rows[which(genbank_rows != 0)] - # Call GCA2lins using refseq + # Call GCA2Lineages using refseq ### Possible to run these in parallel if it takes a while if (length(refseq_rows) != 0) { refseq_ipg_dt <- ipg_dt[refseq_rows, ] - refseq_lins <- GCA2lin(refseq_ipg_dt, + refseq_lins <- GCA2Lineage(refseq_ipg_dt, assembly_path = refseq_assembly_path, lineagelookup_path ) } if (length(genbank_rows) != 0) { genbank_ipg_dt <- ipg_dt[genbank_rows, ] - genbank_lins <- GCA2lin(gca_ipg_dt, + genbank_lins <- GCA2Lineage(gca_ipg_dt, assembly_path = genbank_assembly_path, lineagelookup_path ) } - lins <- GCA2lin(prot_data = ipg_dt, assembly_path, lineagelookup_path) + lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path) lins <- lins[!is.na(Lineage)] %>% unique() return(lins) @@ -381,7 +381,7 @@ ipg2lin <- function(accessions, ipg_file, ######################################### ## !! @SAM: Add TaxID based on AccNum? ## ######################################### -#' add_tax +#' addTaxID #' #' @param data #' @param acc_col @@ -393,7 +393,7 @@ ipg2lin <- function(accessions, ipg_file, #' @export #' #' @examples -add_tax <- function(data, acc_col = "AccNum", version = T) { +addTaxID <- function(data, acc_col = "AccNum", version = T) { if (!is.data.table(data)) { data <- as.data.table(data) } @@ -408,7 +408,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) { } out_path <- tempdir() - tax <- prot2tax(accessions, "TEMPTAX", out_path, return_dt = TRUE) + tax <- proteinAcc2TaxID(accessions, "TEMPTAX", out_path, return_dt = TRUE) data <- merge.data.table(data, tax, by.x = acc_col, by.y = "AccNum.noV", all.x = T @@ -419,7 +419,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) { ################################## ## Maps Protein AccNum to TaxID ## ################################## -#' prot2tax +#' proteinAcc2TaxID #' #' @param accnums #' @param suffix @@ -432,7 +432,7 @@ add_tax <- function(data, acc_col = "AccNum", version = T) { #' @export #' #' @examples -prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) { +proteinAcc2TaxID <- function(accnums, suffix, out_path, return_dt = FALSE) { # Write accnums to a file acc_file <- tempfile() write(paste(accnums, collapse = "\n"), acc_file) @@ -450,7 +450,7 @@ prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) { ####################################### ## OLD: Maps Protein AccNum to TaxID ## ####################################### -#' prot2tax_old +#' proteinAcc2TaxID_old #' #' @author Samuel Chen, Janani Ravi #' @description Perform elink to go from protein database to taxonomy database @@ -468,7 +468,7 @@ prot2tax <- function(accnums, suffix, out_path, return_dt = FALSE) { #' @export #' #' @examples -prot2tax_old <- function(accessions, out_path, plan = "multicore") { +proteinAcc2TaxID_old <- function(accessions, out_path, plan = "multicore") { if (length(accessions) > 0) { partition <- function(v, groups) { # Partition data to limit number of queries per second for rentrez fetch: @@ -500,7 +500,7 @@ prot2tax_old <- function(accessions, out_path, plan = "multicore") { } print(x) script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/acc2info.sh" - # script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/prot2tax.sh" + # script <- "/data/research/jravilab/molevol_scripts/upstream_scripts/proteinAcc2TaxID.sh" # accnum_in <- paste(partitioned_acc[[x]], collapse=",") accnum_in <- tempfile() diff --git a/man/DownloadAssemblySummary.Rd b/man/DownloadAssemblySummary.Rd deleted file mode 100644 index 2d724793..00000000 --- a/man/DownloadAssemblySummary.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R -\name{DownloadAssemblySummary} -\alias{DownloadAssemblySummary} -\title{Download the combined assembly summaries of genbank and refseq} -\usage{ -DownloadAssemblySummary( - outpath, - keep = c("assembly_accession", "taxid", "species_taxid", "organism_name") -) -} -\arguments{ -\item{outpath}{String of path where the assembly summary file should be written} - -\item{keep}{Character vector containing which columns should be retained and downloaded} -} -\description{ -Download the combined assembly summaries of genbank and refseq -} -\author{ -Samuel Chen, Janani Ravi -} diff --git a/man/GCA2lin.Rd b/man/GCA2Lineage.Rd similarity index 88% rename from man/GCA2lin.Rd rename to man/GCA2Lineage.Rd index ad83ca39..9ec0ce56 100644 --- a/man/GCA2lin.Rd +++ b/man/GCA2Lineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{GCA2lin} -\alias{GCA2lin} +\name{GCA2Lineage} +\alias{GCA2Lineage} \title{Function to map GCA_ID to TaxID, and TaxID to Lineage} \usage{ -GCA2lin( +GCA2Lineage( prot_data, assembly_path = "/data/research/jravilab/common_data/assembly_summary_genbank.txt", lineagelookup_path = "/data/research/jravilab/common_data/lineage_lookup.tsv", @@ -15,7 +15,7 @@ GCA2lin( \item{prot_data}{Dataframe containing a column \code{GCA_ID}} \item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} +This file can be generated using the "downloadAssemblySummary()" function} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index 6255b290..fd393d43 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -24,7 +24,7 @@ acc2lin( \item{accessions}{Character vector of protein accessions} \item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} +This file can be generated using the "downloadAssemblySummary()" function} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the} diff --git a/man/add_tax.Rd b/man/addTaxID.Rd similarity index 57% rename from man/add_tax.Rd rename to man/addTaxID.Rd index fec859c3..d2fe139d 100644 --- a/man/add_tax.Rd +++ b/man/addTaxID.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{add_tax} -\alias{add_tax} -\title{add_tax} +\name{addTaxID} +\alias{addTaxID} +\title{addTaxID} \usage{ -add_tax(data, acc_col = "AccNum", version = T) +addTaxID(data, acc_col = "AccNum", version = T) } \arguments{ \item{version}{} } \description{ -add_tax +addTaxID } diff --git a/man/prot2tax.Rd b/man/prot2tax.Rd deleted file mode 100644 index 3631287e..00000000 --- a/man/prot2tax.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R -\name{prot2tax} -\alias{prot2tax} -\title{prot2tax} -\usage{ -prot2tax(accnums, suffix, out_path, return_dt = FALSE) -} -\arguments{ -\item{return_dt}{} -} -\description{ -prot2tax -} diff --git a/man/proteinAcc2TaxID.Rd b/man/proteinAcc2TaxID.Rd new file mode 100644 index 00000000..c0317bba --- /dev/null +++ b/man/proteinAcc2TaxID.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lineage.R +\name{proteinAcc2TaxID} +\alias{proteinAcc2TaxID} +\title{proteinAcc2TaxID} +\usage{ +proteinAcc2TaxID(accnums, suffix, out_path, return_dt = FALSE) +} +\arguments{ +\item{return_dt}{} +} +\description{ +proteinAcc2TaxID +} diff --git a/man/prot2tax_old.Rd b/man/proteinAcc2TaxID_old.Rd similarity index 75% rename from man/prot2tax_old.Rd rename to man/proteinAcc2TaxID_old.Rd index 22541131..0c2a85ba 100644 --- a/man/prot2tax_old.Rd +++ b/man/proteinAcc2TaxID_old.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{prot2tax_old} -\alias{prot2tax_old} -\title{prot2tax_old} +\name{proteinAcc2TaxID_old} +\alias{proteinAcc2TaxID_old} +\title{proteinAcc2TaxID_old} \usage{ -prot2tax_old(accessions, out_path, plan = "multicore") +proteinAcc2TaxID_old(accessions, out_path, plan = "multicore") } \arguments{ \item{accessions}{Character vector containing the accession numbers to query on From ec96cf1b4192343716fa074f8d1e48d2af5f33e4 Mon Sep 17 00:00:00 2001 From: Awa Synthia Date: Mon, 7 Oct 2024 22:20:13 +0300 Subject: [PATCH 14/33] rename functions Signed-off-by: Awa Synthia --- NAMESPACE | 5 --- R/acc2lin.R | 28 ++++++++--------- R/deprecate.R | 40 ----------------------- R/lineage.R | 24 +++++++------- man/{ipg2lin.Rd => IPG2Lineage.Rd} | 8 ++--- man/acc2Lineage.Rd | 37 ++++++++++++++++++++++ man/acc2lin.Rd | 49 ++++++++++++----------------- man/{add_lins.Rd => addlineage.Rd} | 10 +++--- man/deprecate.Rd | 43 ------------------------- man/{efetch_ipg.Rd => efetchIPG.Rd} | 8 ++--- 10 files changed, 96 insertions(+), 156 deletions(-) delete mode 100644 R/deprecate.R rename man/{ipg2lin.Rd => IPG2Lineage.Rd} (91%) create mode 100644 man/acc2Lineage.Rd rename man/{add_lins.Rd => addlineage.Rd} (76%) delete mode 100644 man/deprecate.Rd rename man/{efetch_ipg.Rd => efetchIPG.Rd} (78%) diff --git a/NAMESPACE b/NAMESPACE index 4dbb858b..a526b959 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,9 +9,7 @@ export(LineagePlot) export(RepresentativeAccNums) export(acc2Lineage) export(acc2fa) -export(acc2lin) export(add_leaves) -export(add_lins) export(add_name) export(add_tax) export(addlineage) @@ -38,7 +36,6 @@ export(create_lineage_lookup) export(create_one_col_params) export(domain_network) export(efetchIPG) -export(efetch_ipg) export(filter_by_doms) export(filter_freq) export(find_paralogs) @@ -53,7 +50,6 @@ export(get_accnums_from_fasta_file) export(get_job_message) export(get_proc_medians) export(get_proc_weights) -export(ipg2lin) export(ipr2viz) export(ipr2viz_web) export(lineage.DA.plot) @@ -82,7 +78,6 @@ export(run_deltablast) export(run_rpsblast) export(send_job_status_email) export(shorten_lineage) -export(sink.reset) export(sinkReset) export(stacked_lin_plot) export(string2accnum) diff --git a/R/acc2lin.R b/R/acc2lin.R index dca24140..71e7ae07 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -15,7 +15,7 @@ #' #' @examples #' \dontrun{ -#' sink.reset() +#' sinkReset() #' } sinkReset <- function() { for (i in seq_len(sink.number())) { @@ -43,13 +43,13 @@ sinkReset <- function() { #' #' @examples #' \dontrun{ -#' add_lins() +#' addlineage() #' } addlineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) - lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) # Drop a lot of the unimportant columns for now? will make merging much easier lins <- lins[, c( @@ -66,11 +66,11 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path, } -#' acc2lin +#' acc2Lineage #' #' @author Samuel Chen, Janani Ravi #' -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -88,7 +88,7 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path, #' #' @examples #' \dontrun{ -#' acc2lin() +#' acc2Lineage() #' } acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F @@ -96,9 +96,9 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan) + efetchIPG(accessions, out_path = ipgout_path, plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) if (tmp_ipg) { unlink(tempdir(), recursive = T) @@ -106,7 +106,7 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa return(lins) } -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @@ -127,12 +127,12 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa #' #' @examples #' \dontrun{ -#' efetch_ipg() +#' efetchIPG() #' } efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { if (length(accnums) > 0) { partition <- function(in_data, groups) { - # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal + # \\TODO This function should be defined outside of efetchIPG(). It can be non-exported/internal # Partition data to limit number of queries per second for rentrez fetch: # limit of 10/second w/ key l <- length(in_data) @@ -172,7 +172,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { } } -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @@ -196,7 +196,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' #' @examples #' \dontrun{ -#' ipg2lin() +#' IPG2Lineage() #' } #' IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { @@ -216,7 +216,7 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, -# efetch_ipg <- function(accnums, outpath) +# efetchIPG <- function(accnums, outpath) # { # SIZE = 250 # lower_bound = 1 diff --git a/R/deprecate.R b/R/deprecate.R deleted file mode 100644 index 2de0bbcd..00000000 --- a/R/deprecate.R +++ /dev/null @@ -1,40 +0,0 @@ -#' These functions will be deprecated. Please use other functions instead. -#' -#' @name deprecate -#' -NULL - -#' @rdname deprecate -#' @export -sink.reset <- function() { - warning("'sink.reset' is deprecated. Use 'sinkReset' instead.") - sinkReset() -} - -#' @rdname deprecate -#' @export -add_lins <- function(df, ...) { - warning("'add_lins' is deprecated. Use 'addlineage' instead.") - addlineage(df, ...) -} - -#' @rdname deprecate -#' @export -acc2lin <- function(accessions, ...) { - warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.") - acc2Lineage(accessions, ...) -} - -#' @rdname deprecate -#' @export -efetch_ipg <- function(accnums, ...) { - warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.") - efetchIPG(accnums, ...) -} - -#' @rdname deprecate -#' @export -ipg2lin <- function(accessions, ...) { - warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.") - IPG32Lineage(accessions, ...) -} \ No newline at end of file diff --git a/R/lineage.R b/R/lineage.R index 20acec04..3775b63b 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data, ################################### ## !! @SAM why is this called lins? ################################### -#' add_lins +#' addlineage #' #' @param df #' @param acc_col @@ -149,11 +149,11 @@ GCA2lin <- function(prot_data, #' @export #' #' @examples -add_lins <- function(df, acc_col = "AccNum", assembly_path, +addlineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { acc_sym <- sym(acc_col) accessions <- df %>% pull(acc_sym) - lins <- acc2lin(accessions, assembly_path, + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan = plan ) @@ -178,13 +178,13 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, ####################################### ## Map Protein Accessions to Lineage ## ####################################### -#' acc2lin +#' acc2Lineage #' #' @description #' Function to map protein accession numbers to lineage #' #' @author Samuel Chen, Janani Ravi -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -200,7 +200,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' @export #' #' @examples -acc2lin <- function(accessions, assembly_path, lineagelookup_path, +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { tmp_ipg <- F @@ -208,9 +208,9 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan = plan) + efetchIPG(accessions, out_path = ipgout_path, plan = plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) # if(tmp_ipg) # { @@ -227,7 +227,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ######################################### ## Download IPG results for Accessions ## ######################################### -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @description Perform efetch on the ipg database and write the results to out_path @@ -245,7 +245,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, #' @export #' #' @examples -efetch_ipg <- function(accessions, out_path, plan = "multicore") { +efetchIPG <- function(accessions, out_path, plan = "multicore") { if (length(accessions) > 0) { partition <- function(v, groups) { # Partition data to limit number of queries per second for rentrez fetch: @@ -295,7 +295,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { ######################################### ## Maps IPG results to TaxID + Lineage ## ######################################### -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @description Takes the resulting file of an efetch run on the ipg database and @@ -317,7 +317,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { #' @export #' #' @examples -ipg2lin <- function(accessions, ipg_file, +IPG2Lineage <- function(accessions, ipg_file, refseq_assembly_path, genbank_assembly_path, lineagelookup_path) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) diff --git a/man/ipg2lin.Rd b/man/IPG2Lineage.Rd similarity index 91% rename from man/ipg2lin.Rd rename to man/IPG2Lineage.Rd index 6e2b4c6f..43a920b9 100644 --- a/man/ipg2lin.Rd +++ b/man/IPG2Lineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{ipg2lin} -\alias{ipg2lin} -\title{ipg2lin} +\name{IPG2Lineage} +\alias{IPG2Lineage} +\title{IPG2Lineage} \usage{ -ipg2lin( +IPG2Lineage( accessions, ipg_file, refseq_assembly_path, diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd new file mode 100644 index 00000000..5ab5931a --- /dev/null +++ b/man/acc2Lineage.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lineage.R +\name{acc2Lineage} +\alias{acc2Lineage} +\title{acc2Lineage} +\usage{ +acc2Lineage( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) +} +\arguments{ +\item{accessions}{Character vector of protein accessions} + +\item{assembly_path}{String of the path to the assembly_summary path +This file can be generated using the "DownloadAssemblySummary()" function} + +\item{lineagelookup_path}{String of the path to the lineage lookup file +(taxid to lineage mapping). This file can be generated using the} + +\item{ipgout_path}{Path to write the results of the efetch run of the accessions +on the ipg database. If NULL, the file will not be written. Defaults to NULL} + +\item{plan}{} +} +\description{ +Function to map protein accession numbers to lineage + +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set +of protein accessions to their assembly (GCA_ID), tax ID, and lineage. +} +\author{ +Samuel Chen, Janani Ravi +} diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index f008be5f..92b2887b 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -1,12 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/acc2lin.R \name{sinkReset} \alias{sinkReset} \alias{addlineage} \alias{acc2Lineage} \alias{efetchIPG} \alias{IPG2Lineage} -\alias{acc2lin} \title{Sink Reset} \usage{ sinkReset() @@ -17,8 +16,7 @@ addlineage( assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "sequential", - ... + plan = "multicore" ) acc2Lineage( @@ -26,20 +24,17 @@ acc2Lineage( assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "sequential", - ... + plan = "multicore" ) -efetchIPG(accnums, out_path, plan = "sequential", ...) - -IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...) +efetchIPG(accessions, out_path, plan = "multicore") -acc2lin( +IPG2Lineage( accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path ) } \arguments{ @@ -47,7 +42,8 @@ acc2lin( This file can be generated using the "DownloadAssemblySummary()" function} \item{lineagelookup_path}{String of the path to the lineage lookup file -(taxid to lineage mapping). This file can be generated using the} +(taxid to lineage mapping). This file can be generated using the +"create_lineage_lookup()" function} \item{ipgout_path}{Path to write the results of the efetch run of the accessions on the ipg database. If NULL, the file will not be written. Defaults to NULL} @@ -56,14 +52,14 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{accessions}{Character vector of protein accessions} -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} - \item{out_path}{Path to write the efetch results to} \item{ipg_file}{Filepath to the file containing results of an efetch run on the ipg database. The protein accession in 'accessions' should be contained in this file} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} } \value{ No return, but run to close all outstanding \code{sink()}s @@ -77,33 +73,28 @@ Describe return, in detail Describe return, in detail } \description{ -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. Perform efetch on the ipg database and write the results to out_path Takes the resulting file of an efetch run on the ipg database and - -Function to map protein accession numbers to lineage - -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } \examples{ \dontrun{ -sink.reset() +sinkReset() } \dontrun{ -add_lins() +addlineage() } \dontrun{ -acc2lin() +acc2Lineage() } \dontrun{ -efetch_ipg() +efetchIPG() } \dontrun{ -ipg2lin() +IPG2Lineage() } } diff --git a/man/add_lins.Rd b/man/addlineage.Rd similarity index 76% rename from man/add_lins.Rd rename to man/addlineage.Rd index 9ac343ea..7f34dc9f 100644 --- a/man/add_lins.Rd +++ b/man/addlineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{add_lins} -\alias{add_lins} -\title{add_lins} +\name{addlineage} +\alias{addlineage} +\title{addlineage} \usage{ -add_lins( +addlineage( df, acc_col = "AccNum", assembly_path, @@ -17,5 +17,5 @@ add_lins( \item{plan}{} } \description{ -add_lins +addlineage } diff --git a/man/deprecate.Rd b/man/deprecate.Rd deleted file mode 100644 index b8f0731f..00000000 --- a/man/deprecate.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deprecate.R -\name{deprecate} -\alias{deprecate} -\alias{sink.reset} -\alias{add_lins} -\alias{acc2lin} -\alias{efetch_ipg} -\alias{ipg2lin} -\title{These functions will be deprecated. Please use other functions instead.} -\usage{ -sink.reset() - -add_lins( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -acc2lin( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -efetch_ipg(accessions, out_path, plan = "multicore") - -ipg2lin( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) -} -\description{ -These functions will be deprecated. Please use other functions instead. -} diff --git a/man/efetch_ipg.Rd b/man/efetchIPG.Rd similarity index 78% rename from man/efetch_ipg.Rd rename to man/efetchIPG.Rd index efe1e8c5..157ceb75 100644 --- a/man/efetch_ipg.Rd +++ b/man/efetchIPG.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{efetch_ipg} -\alias{efetch_ipg} -\title{efetch_ipg} +\name{efetchIPG} +\alias{efetchIPG} +\title{efetchIPG} \usage{ -efetch_ipg(accessions, out_path, plan = "multicore") +efetchIPG(accessions, out_path, plan = "multicore") } \arguments{ \item{accessions}{Character vector containing the accession numbers to query on From 542092f245a99ff1e75d9e0ed3f1c226384ece25 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Mon, 7 Oct 2024 14:45:22 -0600 Subject: [PATCH 15/33] update .Rd files to reflect function rename --- man/convertIPRScanDomainTable2FA.Rd | 35 +++++++++++++++++++ ...domains.Rd => createIPRScanDomainTable.Rd} | 12 +++---- man/df_iprscan_domains2fasta.Rd | 35 ------------------- man/exec_interproscan.Rd | 14 -------- ...ta2fasta_domain.Rd => getDomainsFromFA.Rd} | 14 ++++---- ...ipr_col_names.Rd => getIPRScanColNames.Rd} | 6 ++-- ...ipr_col_types.Rd => getIPRScanColTypes.Rd} | 6 ++-- ...{read_iprscan_tsv.Rd => readIPRScanTSV.Rd} | 6 ++-- man/runIPRScan.Rd | 14 ++++++++ 9 files changed, 71 insertions(+), 71 deletions(-) create mode 100644 man/convertIPRScanDomainTable2FA.Rd rename man/{make_df_iprscan_domains.Rd => createIPRScanDomainTable.Rd} (83%) delete mode 100644 man/df_iprscan_domains2fasta.Rd delete mode 100644 man/exec_interproscan.Rd rename man/{fasta2fasta_domain.Rd => getDomainsFromFA.Rd} (76%) rename man/{get_df_ipr_col_names.Rd => getIPRScanColNames.Rd} (85%) rename man/{get_df_ipr_col_types.Rd => getIPRScanColTypes.Rd} (86%) rename man/{read_iprscan_tsv.Rd => readIPRScanTSV.Rd} (83%) create mode 100644 man/runIPRScan.Rd diff --git a/man/convertIPRScanDomainTable2FA.Rd b/man/convertIPRScanDomainTable2FA.Rd new file mode 100644 index 00000000..7b8b9d24 --- /dev/null +++ b/man/convertIPRScanDomainTable2FA.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fa2domain.R +\name{convertIPRScanDomainTable2FA} +\alias{convertIPRScanDomainTable2FA} +\title{Using the table returned from createIPRScanDomainTable, construct a +domain fasta for a single accession number in the original fasta +(i.e., the original fasta argument to createIPRScanDomainTable())} +\usage{ +convertIPRScanDomainTable2FA(df_iprscan_domains) +} +\arguments{ +\item{df_iprscan_domains}{\link{tbl_df} return value from createIPRScanDomainTable} +} +\value{ +\link{AAStringSet} A domain fasta containing all the domains for a +single protein in the original fasta passed as an argument to createIPRScanDomainTable() +} +\description{ +Using the table returned from createIPRScanDomainTable, construct a +domain fasta for a single accession number in the original fasta +(i.e., the original fasta argument to createIPRScanDomainTable()) +} +\examples{ +\dontrun{ +path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_transfer/molevolvr_dev"), "molevol_scripts") +setwd(path_molevol_scripts) +source("R/fa2domain.R") +fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") +df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") +accnum <- df_iprscan$AccNum[1] +df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan) +fasta_domains <- df_iprscan_domains |> convertIPRScanDomainTable2FA() +} + +} diff --git a/man/make_df_iprscan_domains.Rd b/man/createIPRScanDomainTable.Rd similarity index 83% rename from man/make_df_iprscan_domains.Rd rename to man/createIPRScanDomainTable.Rd index 2f1871e2..b5d4abf7 100644 --- a/man/make_df_iprscan_domains.Rd +++ b/man/createIPRScanDomainTable.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fa2domain.R -\name{make_df_iprscan_domains} -\alias{make_df_iprscan_domains} +\name{createIPRScanDomainTable} +\alias{createIPRScanDomainTable} \title{For a given accession number, get the domain sequences using a interproscan output table & the original FASTA file} \usage{ -make_df_iprscan_domains( +createIPRScanDomainTable( accnum, fasta, df_iprscan, @@ -19,7 +19,7 @@ which will be used to search for its sequence's domains (df_iprscan param)} \item{fasta}{\link{AAStringSet} original fasta file which was fed into interproscan} \item{df_iprscan}{\link{tbl_df} the output TSV of interproscan, read as a tibble with -read_iprscan_tsv()} +readIPRScanTSV()} \item{analysis}{\link{chr} the domain databases to extract sequences from} } @@ -36,9 +36,9 @@ path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_tra setwd(path_molevol_scripts) source("R/fa2domain.R") fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") +df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") accnum <- df_iprscan$AccNum[1] -df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan) +df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan) } } diff --git a/man/df_iprscan_domains2fasta.Rd b/man/df_iprscan_domains2fasta.Rd deleted file mode 100644 index 595b3310..00000000 --- a/man/df_iprscan_domains2fasta.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fa2domain.R -\name{df_iprscan_domains2fasta} -\alias{df_iprscan_domains2fasta} -\title{Using the table returned from make_df_iprscan_domains, construct a -domain fasta for a single accession number in the original fasta -(i.e., the original fasta argument to make_df_iprscan_domains())} -\usage{ -df_iprscan_domains2fasta(df_iprscan_domains) -} -\arguments{ -\item{df_iprscan_domains}{\link{tbl_df} return value from make_df_iprscan_domains} -} -\value{ -\link{AAStringSet} A domain fasta containing all the domains for a -single protein in the original fasta passed as an argument to make_df_iprscan_domains() -} -\description{ -Using the table returned from make_df_iprscan_domains, construct a -domain fasta for a single accession number in the original fasta -(i.e., the original fasta argument to make_df_iprscan_domains()) -} -\examples{ -\dontrun{ -path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_transfer/molevolvr_dev"), "molevol_scripts") -setwd(path_molevol_scripts) -source("R/fa2domain.R") -fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") -accnum <- df_iprscan$AccNum[1] -df_iprscan_domains <- make_df_iprscan_domains(accnum, fasta, df_iprscan) -fasta_domains <- df_iprscan_domains |> df_iprscan_domains2fasta() -} - -} diff --git a/man/exec_interproscan.Rd b/man/exec_interproscan.Rd deleted file mode 100644 index b18ab579..00000000 --- a/man/exec_interproscan.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fa2domain.R -\name{exec_interproscan} -\alias{exec_interproscan} -\title{exec_interproscan} -\usage{ -exec_interproscan(filepath_fasta, filepath_out, appl = c("Pfam", "Gene3D")) -} -\arguments{ -\item{appl}{} -} -\description{ -exec_interproscan -} diff --git a/man/fasta2fasta_domain.Rd b/man/getDomainsFromFA.Rd similarity index 76% rename from man/fasta2fasta_domain.Rd rename to man/getDomainsFromFA.Rd index 18169172..08ff911d 100644 --- a/man/fasta2fasta_domain.Rd +++ b/man/getDomainsFromFA.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fa2domain.R -\name{fasta2fasta_domain} -\alias{fasta2fasta_domain} -\title{fasta2fasta_domain} +\name{getDomainsFromFA} +\alias{getDomainsFromFA} +\title{getDomainsFromFA} \usage{ -fasta2fasta_domain( +getDomainsFromFA( fasta, df_iprscan, analysis = c("Pfam", "Gene3D"), @@ -22,7 +22,7 @@ fasta2fasta_domain( fasta_domains \link{AAStringSet} fasta of domains } \description{ -fasta2fasta_domain +getDomainsFromFA } \examples{ \dontrun{ @@ -30,8 +30,8 @@ path_molevol_scripts <- file.path(Sys.getenv("DEV", unset = "/data/molevolvr_tra setwd(path_molevol_scripts) source("R/fa2domain.R") fasta <- Biostrings::readAAStringSet("./tests/example_protein.fa") -df_iprscan <- read_iprscan_tsv("./tests/example_iprscan_valid.tsv") -fasta2fasta_domain(fasta, df_iprscan) +df_iprscan <- readIPRScanTSV("./tests/example_iprscan_valid.tsv") +getDomainsFromFA(fasta, df_iprscan) } } diff --git a/man/get_df_ipr_col_names.Rd b/man/getIPRScanColNames.Rd similarity index 85% rename from man/get_df_ipr_col_names.Rd rename to man/getIPRScanColNames.Rd index 56ce908f..7518081f 100644 --- a/man/get_df_ipr_col_names.Rd +++ b/man/getIPRScanColNames.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fa2domain.R -\name{get_df_ipr_col_names} -\alias{get_df_ipr_col_names} +\name{getIPRScanColNames} +\alias{getIPRScanColNames} \title{Constructor function for interproscan column names (based upon the global variable written in molevol_scripts/R/colnames_molevol.R)} \usage{ -get_df_ipr_col_names() +getIPRScanColNames() } \value{ \link{chr} interproscan column names used throughout molevolvr diff --git a/man/get_df_ipr_col_types.Rd b/man/getIPRScanColTypes.Rd similarity index 86% rename from man/get_df_ipr_col_types.Rd rename to man/getIPRScanColTypes.Rd index 6f0f6f7d..f7abadce 100644 --- a/man/get_df_ipr_col_types.Rd +++ b/man/getIPRScanColTypes.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fa2domain.R -\name{get_df_ipr_col_types} -\alias{get_df_ipr_col_types} +\name{getIPRScanColTypes} +\alias{getIPRScanColTypes} \title{construct column types for reading interproscan output TSVs (based upon the global variable written in molevol_scripts/R/colnames_molevol.R)} \usage{ -get_df_ipr_col_types() +getIPRScanColTypes() } \value{ \link{collector} a named vector of type expecatations diff --git a/man/read_iprscan_tsv.Rd b/man/readIPRScanTSV.Rd similarity index 83% rename from man/read_iprscan_tsv.Rd rename to man/readIPRScanTSV.Rd index e7a314a6..0c646c48 100644 --- a/man/read_iprscan_tsv.Rd +++ b/man/readIPRScanTSV.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fa2domain.R -\name{read_iprscan_tsv} -\alias{read_iprscan_tsv} +\name{readIPRScanTSV} +\alias{readIPRScanTSV} \title{Read an interproscan output TSV with standardized column names and types} \usage{ -read_iprscan_tsv(filepath) +readIPRScanTSV(filepath) } \arguments{ \item{filepath}{\link{chr} path to interproscan output TSV} diff --git a/man/runIPRScan.Rd b/man/runIPRScan.Rd new file mode 100644 index 00000000..678d8652 --- /dev/null +++ b/man/runIPRScan.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fa2domain.R +\name{runIPRScan} +\alias{runIPRScan} +\title{runIPRScan} +\usage{ +runIPRScan(filepath_fasta, filepath_out, appl = c("Pfam", "Gene3D")) +} +\arguments{ +\item{appl}{} +} +\description{ +runIPRScan +} From 843ecda71722bd4d152c43075d7f49567e46b0b6 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Mon, 7 Oct 2024 15:40:15 -0600 Subject: [PATCH 16/33] use new function name --- R/networks_domarch.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/networks_domarch.R b/R/networks_domarch.R index 66385a74..010b7619 100755 --- a/R/networks_domarch.R +++ b/R/networks_domarch.R @@ -74,11 +74,11 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff # string clean up all of the Domain Architecture columns prot <- prot |> - mutate(DomArch.ntwrk = clean_string(DomArch.ntwrk)) |> + mutate(DomArch.ntwrk = cleanString(DomArch.ntwrk)) |> mutate( across( all_of(column), - clean_string + cleanString ) ) domains_of_interest_regex <- paste(domains_of_interest, collapse = "|") From 3e36c7200ad49e8afabfecffd1547e798c4105cc Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 07:50:03 -0600 Subject: [PATCH 17/33] adjust casing - addLineage() --- NAMESPACE | 2 +- R/acc2lin.R | 6 +++--- R/lineage.R | 4 ++-- man/acc2lin.Rd | 6 +++--- man/addlineage.Rd | 10 +++++----- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a526b959..726d1423 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,10 +9,10 @@ export(LineagePlot) export(RepresentativeAccNums) export(acc2Lineage) export(acc2fa) +export(addLineage) export(add_leaves) export(add_name) export(add_tax) -export(addlineage) export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) diff --git a/R/acc2lin.R b/R/acc2lin.R index 71e7ae07..2b1e7078 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -24,7 +24,7 @@ sinkReset <- function() { } -#' Add Lineages +#' addLineage #' #' @param df #' @param acc_col @@ -43,9 +43,9 @@ sinkReset <- function() { #' #' @examples #' \dontrun{ -#' addlineage() +#' addLineage() #' } -addlineage <- function(df, acc_col = "AccNum", assembly_path, +addLineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) diff --git a/R/lineage.R b/R/lineage.R index 3775b63b..f136c719 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data, ################################### ## !! @SAM why is this called lins? ################################### -#' addlineage +#' addLineage #' #' @param df #' @param acc_col @@ -149,7 +149,7 @@ GCA2lin <- function(prot_data, #' @export #' #' @examples -addlineage <- function(df, acc_col = "AccNum", assembly_path, +addLineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { acc_sym <- sym(acc_col) accessions <- df %>% pull(acc_sym) diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index 92b2887b..88663260 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/acc2lin.R \name{sinkReset} \alias{sinkReset} -\alias{addlineage} +\alias{addLineage} \alias{acc2Lineage} \alias{efetchIPG} \alias{IPG2Lineage} @@ -10,7 +10,7 @@ \usage{ sinkReset() -addlineage( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -85,7 +85,7 @@ Takes the resulting file of an efetch run on the ipg database and sinkReset() } \dontrun{ -addlineage() +addLineage() } \dontrun{ acc2Lineage() diff --git a/man/addlineage.Rd b/man/addlineage.Rd index 7f34dc9f..f13259fa 100644 --- a/man/addlineage.Rd +++ b/man/addlineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{addlineage} -\alias{addlineage} -\title{addlineage} +\name{addLineage} +\alias{addLineage} +\title{addLineage} \usage{ -addlineage( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -17,5 +17,5 @@ addlineage( \item{plan}{} } \description{ -addlineage +addLineage } From 6ce981d2922889987212dad321e6fd89210af5f6 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:06:11 -0600 Subject: [PATCH 18/33] adjust namespace based on upstream --- NAMESPACE | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fa961be9..da443880 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,11 @@ export(GenContextNetwork) export(IPG2Lineage) export(LineagePlot) export(RepresentativeAccNums) -export(acc2Lineage) export(acc2FA) +export(acc2Lineage) export(acc2fa) -export(addLineage) export(addLeaves2Alignment) +export(addLineage) export(addName) export(add_leaves) export(add_name) @@ -73,6 +73,7 @@ export(map_advanced_opts2procs) export(msa_pdf) export(plot_estimated_walltimes) export(prot2tax) +export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) From 2d47952d4e53b04bf925632ea9c222dd3a83b347 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:13:37 -0600 Subject: [PATCH 19/33] remove old rd tag for acc2lin --- R/acc2lin.R | 5 --- man/IPG2Lineage.Rd | 24 ++++++++++- man/acc2Lineage.Rd | 21 ++++++++- man/acc2lin.Rd | 103 --------------------------------------------- man/addlineage.Rd | 21 ++++++++- man/efetchIPG.Rd | 17 +++++++- man/sinkReset.Rd | 19 +++++++++ 7 files changed, 98 insertions(+), 112 deletions(-) delete mode 100644 man/acc2lin.Rd create mode 100644 man/sinkReset.Rd diff --git a/R/acc2lin.R b/R/acc2lin.R index 2b1e7078..73aca0f4 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -10,7 +10,6 @@ #' Sink Reset #' #' @return No return, but run to close all outstanding `sink()`s -#' @rdname acc2lin #' @export #' #' @examples @@ -38,7 +37,6 @@ sinkReset <- function() { #' @importFrom rlang sym #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -83,7 +81,6 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' @param plan #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -122,7 +119,6 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa #' @importFrom rentrez entrez_fetch #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -191,7 +187,6 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' @importFrom data.table fread #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd index 43a920b9..cf3e635e 100644 --- a/man/IPG2Lineage.Rd +++ b/man/IPG2Lineage.Rd @@ -1,9 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{IPG2Lineage} \alias{IPG2Lineage} \title{IPG2Lineage} \usage{ +IPG2Lineage( + accessions, + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path +) + IPG2Lineage( accessions, ipg_file, @@ -22,10 +30,24 @@ file} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the "create_lineage_lookup()" function} + +\item{assembly_path}{String of the path to the assembly_summary path +This file can be generated using the "DownloadAssemblySummary()" function} +} +\value{ +Describe return, in detail } \description{ +Takes the resulting file of an efetch run on the ipg database and + Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns +} +\examples{ +\dontrun{ +IPG2Lineage() +} + } \author{ Samuel Chen, Janani Ravi diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd index 5ab5931a..d632c52e 100644 --- a/man/acc2Lineage.Rd +++ b/man/acc2Lineage.Rd @@ -1,9 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{acc2Lineage} \alias{acc2Lineage} \title{acc2Lineage} \usage{ +acc2Lineage( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + acc2Lineage( accessions, assembly_path, @@ -26,12 +34,23 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{plan}{} } +\value{ +Describe return, in detail +} \description{ +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set +of protein accessions to their assembly (GCA_ID), tax ID, and lineage. + Function to map protein accession numbers to lineage This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } +\examples{ +\dontrun{ +acc2Lineage() +} +} \author{ Samuel Chen, Janani Ravi } diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd deleted file mode 100644 index 88663260..00000000 --- a/man/acc2lin.Rd +++ /dev/null @@ -1,103 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R -\name{sinkReset} -\alias{sinkReset} -\alias{addLineage} -\alias{acc2Lineage} -\alias{efetchIPG} -\alias{IPG2Lineage} -\title{Sink Reset} -\usage{ -sinkReset() - -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -acc2Lineage( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -efetchIPG(accessions, out_path, plan = "multicore") - -IPG2Lineage( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) -} -\arguments{ -\item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} - -\item{lineagelookup_path}{String of the path to the lineage lookup file -(taxid to lineage mapping). This file can be generated using the -"create_lineage_lookup()" function} - -\item{ipgout_path}{Path to write the results of the efetch run of the accessions -on the ipg database. If NULL, the file will not be written. Defaults to NULL} - -\item{plan}{} - -\item{accessions}{Character vector of protein accessions} - -\item{out_path}{Path to write the efetch results to} - -\item{ipg_file}{Filepath to the file containing results of an efetch run on the -ipg database. The protein accession in 'accessions' should be contained in this -file} - -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} -} -\value{ -No return, but run to close all outstanding \code{sink()}s - -Describe return, in detail - -Describe return, in detail - -Describe return, in detail - -Describe return, in detail -} -\description{ -This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. - -Perform efetch on the ipg database and write the results to out_path - -Takes the resulting file of an efetch run on the ipg database and -} -\examples{ -\dontrun{ -sinkReset() -} -\dontrun{ -addLineage() -} -\dontrun{ -acc2Lineage() -} -\dontrun{ -efetchIPG() -} -\dontrun{ -IPG2Lineage() -} - -} -\author{ -Samuel Chen, Janani Ravi -} diff --git a/man/addlineage.Rd b/man/addlineage.Rd index f13259fa..6694e94c 100644 --- a/man/addlineage.Rd +++ b/man/addlineage.Rd @@ -1,9 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{addLineage} \alias{addLineage} \title{addLineage} \usage{ +addLineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + addLineage( df, acc_col = "AccNum", @@ -16,6 +25,16 @@ addLineage( \arguments{ \item{plan}{} } +\value{ +Describe return, in detail +} \description{ addLineage + +addLineage +} +\examples{ +\dontrun{ +addLineage() +} } diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd index 157ceb75..6a5d85a4 100644 --- a/man/efetchIPG.Rd +++ b/man/efetchIPG.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{efetchIPG} \alias{efetchIPG} \title{efetchIPG} \usage{ +efetchIPG(accessions, out_path, plan = "multicore") + efetchIPG(accessions, out_path, plan = "multicore") } \arguments{ @@ -13,9 +15,22 @@ the ipg database} \item{out_path}{Path to write the efetch results to} \item{plan}{} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} +} +\value{ +Describe return, in detail } \description{ Perform efetch on the ipg database and write the results to out_path + +Perform efetch on the ipg database and write the results to out_path +} +\examples{ +\dontrun{ +efetchIPG() +} } \author{ Samuel Chen, Janani Ravi diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd new file mode 100644 index 00000000..0285c0b2 --- /dev/null +++ b/man/sinkReset.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acc2lin.R +\name{sinkReset} +\alias{sinkReset} +\title{Sink Reset} +\usage{ +sinkReset() +} +\value{ +No return, but run to close all outstanding \code{sink()}s +} +\description{ +Sink Reset +} +\examples{ +\dontrun{ +sinkReset() +} +} From 331b6515f4c000cc0d30a9ba3749d27ef5a97339 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:28:44 -0600 Subject: [PATCH 20/33] namespace conflict resolution for other PRs --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index a309f180..157a53e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,6 @@ export(DownloadAssemblySummary) export(GCA2lin) export(GenContextNetwork) export(IPG2Lineage) -export(LineagePlot) export(RepresentativeAccNums) export(acc2FA) export(acc2Lineage) From 8a577797c05aad8aab61b2eef67299cdc2b90f88 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Tue, 8 Oct 2024 15:46:05 +0100 Subject: [PATCH 21/33] refactor function names in R/summarize.R --- NAMESPACE | 23 +++--- R/summarize.R | 78 +++++++++---------- man/{count_bycol.Rd => countbycolumn.Rd} | 8 +- man/elements2words.Rd | 29 ------- man/{filter_by_doms.Rd => filterbydomains.Rd} | 10 +-- man/{filter_freq.Rd => filterbyfrequency.Rd} | 8 +- man/{find_paralogs.Rd => findparalogs.Rd} | 8 +- man/summ.GC.byDALin.Rd | 22 ------ man/summ.GC.byLin.Rd | 22 ------ man/{summ.DA.Rd => summarizeDomArch.Rd} | 10 +-- ...byLin.Rd => summarizeDomArch_ByLineage.Rd} | 10 +-- man/{summ.GC.Rd => summarizeGenContext.Rd} | 12 +-- man/summarizeGenContext_ByDomArchLineage.Rd | 22 ++++++ man/summarizeGenContext_ByLineage.Rd | 22 ++++++ ...mmarize_bylin.Rd => summarizebylineage.Rd} | 8 +- ...s.Rd => totalgencontextordomarchcounts.Rd} | 8 +- man/{words2wc.Rd => words2wordcounts.Rd} | 10 +-- 17 files changed, 141 insertions(+), 169 deletions(-) rename man/{count_bycol.Rd => countbycolumn.Rd} (66%) delete mode 100644 man/elements2words.Rd rename man/{filter_by_doms.Rd => filterbydomains.Rd} (84%) rename man/{filter_freq.Rd => filterbyfrequency.Rd} (71%) rename man/{find_paralogs.Rd => findparalogs.Rd} (84%) delete mode 100644 man/summ.GC.byDALin.Rd delete mode 100644 man/summ.GC.byLin.Rd rename man/{summ.DA.Rd => summarizeDomArch.Rd} (70%) rename man/{summ.DA.byLin.Rd => summarizeDomArch_ByLineage.Rd} (62%) rename man/{summ.GC.Rd => summarizeGenContext.Rd} (57%) create mode 100644 man/summarizeGenContext_ByDomArchLineage.Rd create mode 100644 man/summarizeGenContext_ByLineage.Rd rename man/{summarize_bylin.Rd => summarizebylineage.Rd} (68%) rename man/{total_counts.Rd => totalgencontextordomarchcounts.Rd} (84%) rename man/{words2wc.Rd => words2wordcounts.Rd} (77%) diff --git a/NAMESPACE b/NAMESPACE index af1d0ba4..a8632305 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,19 +33,19 @@ export(convert2TitleCase) export(convertAlignment2FA) export(convert_aln2fa) export(convert_fa2tre) -export(count_bycol) export(count_to_sunburst) export(count_to_treemap) +export(countbycolumn) export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) export(domain_network) export(efetch_ipg) export(extractAccNum) -export(filter_by_doms) -export(filter_freq) -export(find_paralogs) +export(filterbydomains) +export(filterbyfrequency) export(find_top_acc) +export(findparalogs) export(format_job_args) export(gc_undirected_network) export(generateAllAlignments2FA) @@ -73,6 +73,7 @@ export(map_advanced_opts2procs) export(msa_pdf) export(plot_estimated_walltimes) export(prot2tax) +export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) @@ -88,15 +89,15 @@ export(send_job_status_email) export(shorten_lineage) export(sink.reset) export(stacked_lin_plot) -export(summ.DA) -export(summ.DA.byLin) -export(summ.GC) -export(summ.GC.byDALin) -export(summ.GC.byLin) -export(summarize_bylin) +export(summarizeDomArch) +export(summarizeDomArch_ByLineage) +export(summarizeGenContext) +export(summarizeGenContext_ByDomArchLineage) +export(summarizeGenContext_ByLineage) +export(summarizebylineage) export(theme_genes2) export(to_titlecase) -export(total_counts) +export(totalgencontextordomarchcounts) export(upset.plot) export(wordcloud2_element) export(wordcloud3) diff --git a/R/summarize.R b/R/summarize.R index a9b13e43..d2cef471 100644 --- a/R/summarize.R +++ b/R/summarize.R @@ -13,7 +13,7 @@ #' Filter by Domains #' #' @author Samuel Chen, Janani Ravi -#' @description filter_by_doms filters a data frame by identifying exact domain matches +#' @description filterbydomains filters a data frame by identifying exact domain matches #' and either keeping or removing rows with the identified domain #' #' @param prot Dataframe to filter @@ -33,9 +33,9 @@ #' #' @examples #' \dontrun{ -#' filter_by_doms() +#' filterbydomains() #' } -filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(), +filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(), ignore.case = FALSE) { # Only rows with a domain in doms_keep will be kept # Any row containing a domain in doms_remove will be removed @@ -102,9 +102,9 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov #' #' @examples #' \dontrun{ -#' count_bycol() +#' countbycolumn() #' } -count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) { +countbycolumn <- function(prot = prot, column = "DomArch", min.freq = 1) { counts <- prot %>% select(column) %>% table() %>% @@ -135,10 +135,10 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) { #' #' @examples #' \dontrun{ -#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words() +#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words() #' } #' -elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") { +elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms") { z1 <- prot %>% dplyr::pull(column) %>% str_replace_all("\\,", " ") %>% @@ -184,11 +184,11 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms" #' @examples #' \dontrun{ #' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> -#' elements2words() |> -#' words2wc() +#' elements2Words() |> +#' words2wordcounts() #' } #' -words2wc <- function(string) { +words2wordcounts <- function(string) { df_word_count <- string %>% # reduce spaces with length 2 or greater to a single space str_replace_all("\\s{2,}", " ") %>% @@ -227,9 +227,9 @@ words2wc <- function(string) { #' #' @examples #' \dontrun{ -#' filter_freq() +#' filterbyfrequency() #' } -filter_freq <- function(x, min.freq) { +filterbyfrequency <- function(x, min.freq) { x %>% filter(freq >= min.freq) } @@ -254,10 +254,10 @@ filter_freq <- function(x, min.freq) { #' \dontrun{ #' library(tidyverse) #' tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |> -#' summarize_bylin(query = "all") +#' summarizebylineage(query = "all") #' } #' -summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage", +summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage", query) { column <- sym(column) by <- sym(by) @@ -277,7 +277,7 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage", } -#' summ.DA.byLin +#' summarizeDomArch_ByLineage #' #' @description #' Function to summarize and retrieve counts by Domains & Domains+Lineage @@ -292,9 +292,9 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage", #' #' @examples #' \dontrun{ -#' summ.DA.byLin() +#' summarizeDomArch_ByLineage() #' } -summ.DA.byLin <- function(x) { +summarizeDomArch_ByLineage <- function(x) { ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", DomArch)) %>% @@ -304,7 +304,7 @@ summ.DA.byLin <- function(x) { } ## Function to retrieve counts of how many lineages a DomArch appears in -#' summ.DA +#' summarizeDomArch #' #' @description #' Function to retrieve counts of how many lineages a DomArch appears in @@ -318,9 +318,9 @@ summ.DA.byLin <- function(x) { #' #' @examples #' \dontrun{ -#' summ.DA() +#' summarizeDomArch() #' } -summ.DA <- function(x) { +summarizeDomArch <- function(x) { ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% group_by(DomArch) %>% @@ -330,7 +330,7 @@ summ.DA <- function(x) { filter(!grepl("^-$", DomArch)) } -#' summ.GC.byDALin +#' summarizeGenContext_ByDomArchLineage #' #' @param x #' @@ -341,9 +341,9 @@ summ.DA <- function(x) { #' #' @examples #' \dontrun{ -#' summ.GC.byDALin +#' summarizeGenContext_ByDomArchLineage #' } -summ.GC.byDALin <- function(x) { +summarizeGenContext_ByDomArchLineage <- function(x) { ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", GenContext)) %>% @@ -355,7 +355,7 @@ summ.GC.byDALin <- function(x) { arrange(desc(count)) } -#' summ.GC.byLin +#' summarizeGenContext_ByLineage #' #' @param x #' @@ -366,9 +366,9 @@ summ.GC.byDALin <- function(x) { #' #' @examples #' \dontrun{ -#' summ.GC.byLin() +#' summarizeGenContext_ByLineage() #' } -summ.GC.byLin <- function(x) { +summarizeGenContext_ByLineage <- function(x) { ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", GenContext)) %>% @@ -380,7 +380,7 @@ summ.GC.byLin <- function(x) { arrange(desc(count)) } -#' summ.GC +#' summarizeGenContext #' #' @param x #' @@ -391,9 +391,9 @@ summ.GC.byLin <- function(x) { #' #' @examples #' \dontrun{ -#' summ.GC() +#' summarizeGenContext() #' } -summ.GC <- function(x) { +summarizeGenContext <- function(x) { ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% group_by(GenContext) %>% @@ -436,9 +436,9 @@ summ.GC <- function(x) { #' #' @examples #' \dontrun{ -#' total_counts(pspa - gc_lin_counts, 0, "GC") +#' totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC") #' } -total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage", +totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col = "Lineage", cutoff = 90, RowsCutoff = FALSE, digits = 2 # type = "GC" ) { @@ -448,7 +448,7 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage", filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>% filter({{ column }} != "") - prot <- summarize_bylin(prot, column, by = lineage_col, query = "all") + prot <- summarizebylineage(prot, column, by = lineage_col, query = "all") col_count <- prot %>% group_by({{ column }}) %>% summarise(totalcount = sum(count)) @@ -598,9 +598,9 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage", #' #' @examples #' \dontrun{ -#' find_paralogs(pspa) +#' findparalogs(pspa) #' } -find_paralogs <- function(prot) { +findparalogs <- function(prot) { # Remove eukaryotes prot <- prot %>% filter(!grepl("^eukaryota", Lineage)) paralogTable <- prot %>% @@ -635,17 +635,17 @@ find_paralogs <- function(prot) { # query.sub$GenContext %>% # counts(n)") -# ## elements2words: Function to break up ELEMENTS to WORDS for DA and GC +# ## elements2Words: Function to break up ELEMENTS to WORDS for DA and GC # cat("Converting DA to domains and GC to DAs.\n2 switches: da2doms and gc2da # \nFor e.g.: # query.sub$DA.doms <- query.sub$DomArch.norep %>% -# elements2words(\"da2doms\") +# elements2Words(\"da2doms\") # query.sub$GC.da <- query.sub$GenContext %>% -# elements2words(\"gc2da\")") +# elements2Words(\"gc2da\")") -# ## words2wc: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)] +# ## words2wordcounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)] # cat("Word counts for broken up domains from DAs and DAs from GCs. # \nFor e.g.: # DA.doms.wc <- query.sub$DA.doms %>% -# words2wc()") +# words2wordcounts()") diff --git a/man/count_bycol.Rd b/man/countbycolumn.Rd similarity index 66% rename from man/count_bycol.Rd rename to man/countbycolumn.Rd index 884c0f0f..e669a4a3 100644 --- a/man/count_bycol.Rd +++ b/man/countbycolumn.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{count_bycol} -\alias{count_bycol} +\name{countbycolumn} +\alias{countbycolumn} \title{Count Bycol} \usage{ -count_bycol(prot = prot, column = "DomArch", min.freq = 1) +countbycolumn(prot = prot, column = "DomArch", min.freq = 1) } \arguments{ \item{min.freq}{} @@ -17,6 +17,6 @@ Count Bycol } \examples{ \dontrun{ -count_bycol() +countbycolumn() } } diff --git a/man/elements2words.Rd b/man/elements2words.Rd deleted file mode 100644 index 80fcbafb..00000000 --- a/man/elements2words.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summarize.R -\name{elements2words} -\alias{elements2words} -\title{Elements 2 Words} -\usage{ -elements2words(prot, column = "DomArch", conversion_type = "da2doms") -} -\arguments{ -\item{prot}{\link{dataframe}} - -\item{column}{\link{string} column name} - -\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to -domains. 'gc2da' genomic context to domain architectures} -} -\value{ -\link{string} with words delimited by spaces -} -\description{ -Break string ELEMENTS into WORDS for domain architecture (DA) and genomic -context (GC) -} -\examples{ -\dontrun{ -tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words() -} - -} diff --git a/man/filter_by_doms.Rd b/man/filterbydomains.Rd similarity index 84% rename from man/filter_by_doms.Rd rename to man/filterbydomains.Rd index cfe255ca..7fd148e7 100644 --- a/man/filter_by_doms.Rd +++ b/man/filterbydomains.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{filter_by_doms} -\alias{filter_by_doms} +\name{filterbydomains} +\alias{filterbydomains} \title{Filter by Domains} \usage{ -filter_by_doms( +filterbydomains( prot, column = "DomArch", doms_keep = c(), @@ -28,7 +28,7 @@ observation to be kept} Filtered data frame } \description{ -filter_by_doms filters a data frame by identifying exact domain matches +filterbydomains filters a data frame by identifying exact domain matches and either keeping or removing rows with the identified domain } \note{ @@ -36,7 +36,7 @@ There is no need to make the domains 'regex safe', that will be handled by this } \examples{ \dontrun{ -filter_by_doms() +filterbydomains() } } \author{ diff --git a/man/filter_freq.Rd b/man/filterbyfrequency.Rd similarity index 71% rename from man/filter_freq.Rd rename to man/filterbyfrequency.Rd index ce4db5ac..d219a100 100644 --- a/man/filter_freq.Rd +++ b/man/filterbyfrequency.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{filter_freq} -\alias{filter_freq} +\name{filterbyfrequency} +\alias{filterbyfrequency} \title{Filter Frequency} \usage{ -filter_freq(x, min.freq) +filterbyfrequency(x, min.freq) } \arguments{ \item{min.freq}{} @@ -17,6 +17,6 @@ Filter Frequency } \examples{ \dontrun{ -filter_freq() +filterbyfrequency() } } diff --git a/man/find_paralogs.Rd b/man/findparalogs.Rd similarity index 84% rename from man/find_paralogs.Rd rename to man/findparalogs.Rd index fbf14384..7e985fe5 100644 --- a/man/find_paralogs.Rd +++ b/man/findparalogs.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{find_paralogs} -\alias{find_paralogs} +\name{findparalogs} +\alias{findparalogs} \title{Find Paralogs} \usage{ -find_paralogs(prot) +findparalogs(prot) } \arguments{ \item{prot}{A data frame filtered by a Query, containing columns Species and Lineage} @@ -21,6 +21,6 @@ column names. } \examples{ \dontrun{ -find_paralogs(pspa) +findparalogs(pspa) } } diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd deleted file mode 100644 index 34c9f84d..00000000 --- a/man/summ.GC.byDALin.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summarize.R -\name{summ.GC.byDALin} -\alias{summ.GC.byDALin} -\title{summ.GC.byDALin} -\usage{ -summ.GC.byDALin(x) -} -\arguments{ -\item{x}{} -} -\value{ -Define return, in detail -} -\description{ -summ.GC.byDALin -} -\examples{ -\dontrun{ -summ.GC.byDALin -} -} diff --git a/man/summ.GC.byLin.Rd b/man/summ.GC.byLin.Rd deleted file mode 100644 index df2a8fb8..00000000 --- a/man/summ.GC.byLin.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summarize.R -\name{summ.GC.byLin} -\alias{summ.GC.byLin} -\title{summ.GC.byLin} -\usage{ -summ.GC.byLin(x) -} -\arguments{ -\item{x}{} -} -\value{ -Describe return, in detail -} -\description{ -summ.GC.byLin -} -\examples{ -\dontrun{ -summ.GC.byLin() -} -} diff --git a/man/summ.DA.Rd b/man/summarizeDomArch.Rd similarity index 70% rename from man/summ.DA.Rd rename to man/summarizeDomArch.Rd index 13717140..11db1afa 100644 --- a/man/summ.DA.Rd +++ b/man/summarizeDomArch.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{summ.DA} -\alias{summ.DA} -\title{summ.DA} +\name{summarizeDomArch} +\alias{summarizeDomArch} +\title{summarizeDomArch} \usage{ -summ.DA(x) +summarizeDomArch(x) } \arguments{ \item{x}{} @@ -17,6 +17,6 @@ Function to retrieve counts of how many lineages a DomArch appears in } \examples{ \dontrun{ -summ.DA() +summarizeDomArch() } } diff --git a/man/summ.DA.byLin.Rd b/man/summarizeDomArch_ByLineage.Rd similarity index 62% rename from man/summ.DA.byLin.Rd rename to man/summarizeDomArch_ByLineage.Rd index 66555fd5..cf5fac22 100644 --- a/man/summ.DA.byLin.Rd +++ b/man/summarizeDomArch_ByLineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{summ.DA.byLin} -\alias{summ.DA.byLin} -\title{summ.DA.byLin} +\name{summarizeDomArch_ByLineage} +\alias{summarizeDomArch_ByLineage} +\title{summarizeDomArch_ByLineage} \usage{ -summ.DA.byLin(x) +summarizeDomArch_ByLineage(x) } \arguments{ \item{x}{} @@ -17,6 +17,6 @@ Function to summarize and retrieve counts by Domains & Domains+Lineage } \examples{ \dontrun{ -summ.DA.byLin() +summarizeDomArch_ByLineage() } } diff --git a/man/summ.GC.Rd b/man/summarizeGenContext.Rd similarity index 57% rename from man/summ.GC.Rd rename to man/summarizeGenContext.Rd index fa52a6bf..5a40811b 100644 --- a/man/summ.GC.Rd +++ b/man/summarizeGenContext.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{summ.GC} -\alias{summ.GC} -\title{summ.GC} +\name{summarizeGenContext} +\alias{summarizeGenContext} +\title{summarizeGenContext} \usage{ -summ.GC(x) +summarizeGenContext(x) } \arguments{ \item{x}{} @@ -13,10 +13,10 @@ summ.GC(x) Describe return, in detail } \description{ -summ.GC +summarizeGenContext } \examples{ \dontrun{ -summ.GC() +summarizeGenContext() } } diff --git a/man/summarizeGenContext_ByDomArchLineage.Rd b/man/summarizeGenContext_ByDomArchLineage.Rd new file mode 100644 index 00000000..59e0376e --- /dev/null +++ b/man/summarizeGenContext_ByDomArchLineage.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarize.R +\name{summarizeGenContext_ByDomArchLineage} +\alias{summarizeGenContext_ByDomArchLineage} +\title{summarizeGenContext_ByDomArchLineage} +\usage{ +summarizeGenContext_ByDomArchLineage(x) +} +\arguments{ +\item{x}{} +} +\value{ +Define return, in detail +} +\description{ +summarizeGenContext_ByDomArchLineage +} +\examples{ +\dontrun{ +summarizeGenContext_ByDomArchLineage +} +} diff --git a/man/summarizeGenContext_ByLineage.Rd b/man/summarizeGenContext_ByLineage.Rd new file mode 100644 index 00000000..932fe6a7 --- /dev/null +++ b/man/summarizeGenContext_ByLineage.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarize.R +\name{summarizeGenContext_ByLineage} +\alias{summarizeGenContext_ByLineage} +\title{summarizeGenContext_ByLineage} +\usage{ +summarizeGenContext_ByLineage(x) +} +\arguments{ +\item{x}{} +} +\value{ +Describe return, in detail +} +\description{ +summarizeGenContext_ByLineage +} +\examples{ +\dontrun{ +summarizeGenContext_ByLineage() +} +} diff --git a/man/summarize_bylin.Rd b/man/summarizebylineage.Rd similarity index 68% rename from man/summarize_bylin.Rd rename to man/summarizebylineage.Rd index a94c54c1..34d72c37 100644 --- a/man/summarize_bylin.Rd +++ b/man/summarizebylineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{summarize_bylin} -\alias{summarize_bylin} +\name{summarizebylineage} +\alias{summarizebylineage} \title{Summarize by Lineage} \usage{ -summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query) +summarizebylineage(prot = "prot", column = "DomArch", by = "Lineage", query) } \arguments{ \item{query}{} @@ -19,7 +19,7 @@ Summarize by Lineage \dontrun{ library(tidyverse) tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |> - summarize_bylin(query = "all") + summarizebylineage(query = "all") } } diff --git a/man/total_counts.Rd b/man/totalgencontextordomarchcounts.Rd similarity index 84% rename from man/total_counts.Rd rename to man/totalgencontextordomarchcounts.Rd index 49db8822..aa8697ee 100644 --- a/man/total_counts.Rd +++ b/man/totalgencontextordomarchcounts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{total_counts} -\alias{total_counts} +\name{totalgencontextordomarchcounts} +\alias{totalgencontextordomarchcounts} \title{Total Counts} \usage{ -total_counts( +totalgencontextordomarchcounts( prot, column = "DomArch", lineage_col = "Lineage", @@ -37,6 +37,6 @@ column names. } \examples{ \dontrun{ -total_counts(pspa - gc_lin_counts, 0, "GC") +totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC") } } diff --git a/man/words2wc.Rd b/man/words2wordcounts.Rd similarity index 77% rename from man/words2wc.Rd rename to man/words2wordcounts.Rd index 1eba5dc4..69f30c5d 100644 --- a/man/words2wc.Rd +++ b/man/words2wordcounts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{words2wc} -\alias{words2wc} +\name{words2wordcounts} +\alias{words2wordcounts} \title{Words 2 Word Counts} \usage{ -words2wc(string) +words2wordcounts(string) } \arguments{ \item{string}{} @@ -18,8 +18,8 @@ Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)} \examples{ \dontrun{ tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> - elements2words() |> - words2wc() + elements2Words() |> + words2wordcounts() } } From adf6903e71517a48540f4c360df45b0a4067455d Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Tue, 8 Oct 2024 15:59:04 +0100 Subject: [PATCH 22/33] refactor function names in R/job_status_emails.R --- NAMESPACE | 8 ++-- R/job_status_emails.R | 20 +++++----- man/addlineage.Rd | 40 ------------------- ..._results_url.Rd => createJobResultsURL.Rd} | 6 +-- ...sage.Rd => createJobStatusEmailMessage.Rd} | 16 +++++--- ..._job_args.Rd => formatJobArgumentsHTML.Rd} | 8 ++-- ..._status_email.Rd => sendJobStatusEmail.Rd} | 12 ++---- 7 files changed, 35 insertions(+), 75 deletions(-) delete mode 100644 man/addlineage.Rd rename man/{make_job_results_url.Rd => createJobResultsURL.Rd} (89%) rename man/{get_job_message.Rd => createJobStatusEmailMessage.Rd} (75%) rename man/{format_job_args.Rd => formatJobArgumentsHTML.Rd} (73%) rename man/{send_job_status_email.Rd => sendJobStatusEmail.Rd} (84%) diff --git a/NAMESPACE b/NAMESPACE index da443880..dd547990 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,8 @@ export(convert_fa2tre) export(count_bycol) export(count_to_sunburst) export(count_to_treemap) +export(createJobResultsURL) +export(createJobStatusEmailMessage) export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) @@ -47,7 +49,7 @@ export(filter_by_doms) export(filter_freq) export(find_paralogs) export(find_top_acc) -export(format_job_args) +export(formatJobArgumentsHTML) export(gc_undirected_network) export(generateAllAlignments2FA) export(generate_all_aln2fa) @@ -55,7 +57,6 @@ export(generate_fa2tre) export(generate_msa) export(generate_trees) export(get_accnums_from_fasta_file) -export(get_job_message) export(get_proc_medians) export(get_proc_weights) export(ipr2viz) @@ -65,7 +66,6 @@ export(lineage.Query.plot) export(lineage.domain_repeats.plot) export(lineage.neighbors.plot) export(lineage_sunburst) -export(make_job_results_url) export(make_opts2procs) export(mapAcc2Name) export(map_acc2name) @@ -85,7 +85,7 @@ export(reverse_operon) export(run_deltablast) export(run_rpsblast) export(selectLongestDuplicate) -export(send_job_status_email) +export(sendJobStatusEmail) export(shorten_lineage) export(sinkReset) export(stacked_lin_plot) diff --git a/R/job_status_emails.R b/R/job_status_emails.R index 4a96459a..6b1412e9 100644 --- a/R/job_status_emails.R +++ b/R/job_status_emails.R @@ -7,7 +7,7 @@ # 1. source("job_status_emails.R") # 2. call: # # event_type can be 'start' or 'end' -# send_job_status_email(notify_email, job_dir, pin_id, event_type) +# sendJobStatusEmail(notify_email, job_dir, pin_id, event_type) # Return # unfortunately, there is no return value for the underlying sendmailR methods # @@ -25,7 +25,7 @@ #' @return the URL where the user can check the status of their job #' @export #' -make_job_results_url <- function( +createJobResultsURL <- function( pin_id, base_url = Sys.getenv("BASE_URL", unset = "http://jravilab.org/molevolvr/")) { return(paste0(base_url, "?r=", pin_id, "&p=home")) @@ -91,9 +91,9 @@ make_job_results_url <- function( #' #' @examples #' \dontrun{ -#' format_job_args("/data/scratch/janani/molevolvr_out/Ba5sV1_full") +#' formatJobArgumentsHTML("/data/scratch/janani/molevolvr_out/Ba5sV1_full") #' } -format_job_args <- function(job_args) { +formatJobArgumentsHTML <- function(job_args) { # format job arguments into html-formatted key/value pairs job_args_list <- tags$ul(lapply(names(job_args), function(key) { # look up human labels for field names, values, if available @@ -146,7 +146,7 @@ format_job_args <- function(job_args) { } #' Produces a mail message that can be sent to a user when their job is accepted. -#' Used by the send_job_status_email() method. +#' Used by the sendJobStatusEmail() method. #' #' @param job_dir #' the directory where the job's arguments are stored, in job_args.yml @@ -166,11 +166,11 @@ format_job_args <- function(job_args) { #' @return #' the result of the sendmailR::sendmail() call #' @export -get_job_message <- function(job_dir, pin_id, job_results_url, event_type, context) { +createJobStatusEmailMessage <- function(job_dir, pin_id, job_results_url, event_type, context) { # pull the set of args written to dir/job_args.yml, so we # can send it in the email job_args <- yaml::read_yaml(file.path(job_dir, "job_args.yml")) - job_args_list <- format_job_args(job_args) + job_args_list <- formatJobArgumentsHTML(job_args) # determine which template to use based on the event type if (event_type == "start") { @@ -217,7 +217,7 @@ get_job_message <- function(job_dir, pin_id, job_results_url, event_type, contex #' @return #' the result of the sendmailR::sendmail() call #' @export -send_job_status_email <- function(notify_email, job_dir, pin_id, event_type, context = NULL) { +sendJobStatusEmail <- function(notify_email, job_dir, pin_id, event_type, context = NULL) { # ------------------------------------------------- # --- step 1. build the email subject and contents # ------------------------------------------------- @@ -232,10 +232,10 @@ send_job_status_email <- function(notify_email, job_dir, pin_id, event_type, con } # construct the job results URL from the pin_id - job_results_url <- make_job_results_url(pin_id) + job_results_url <- createJobResultsURL(pin_id) # produce a formatted email message from the arguments and template - message <- get_job_message( + message <- createJobStatusEmailMessage( job_dir, pin_id, job_results_url, event_type, context ) diff --git a/man/addlineage.Rd b/man/addlineage.Rd deleted file mode 100644 index 6694e94c..00000000 --- a/man/addlineage.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R -\name{addLineage} -\alias{addLineage} -\title{addLineage} -\usage{ -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) -} -\arguments{ -\item{plan}{} -} -\value{ -Describe return, in detail -} -\description{ -addLineage - -addLineage -} -\examples{ -\dontrun{ -addLineage() -} -} diff --git a/man/make_job_results_url.Rd b/man/createJobResultsURL.Rd similarity index 89% rename from man/make_job_results_url.Rd rename to man/createJobResultsURL.Rd index 77f7bda5..02407f15 100644 --- a/man/make_job_results_url.Rd +++ b/man/createJobResultsURL.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/job_status_emails.R -\name{make_job_results_url} -\alias{make_job_results_url} +\name{createJobResultsURL} +\alias{createJobResultsURL} \title{Given a pin_id, returns the URL where the user can check the status of their job} \usage{ -make_job_results_url( +createJobResultsURL( pin_id, base_url = Sys.getenv("BASE_URL", unset = "http://jravilab.org/molevolvr/") ) diff --git a/man/get_job_message.Rd b/man/createJobStatusEmailMessage.Rd similarity index 75% rename from man/get_job_message.Rd rename to man/createJobStatusEmailMessage.Rd index 0c7ee8f2..1f779793 100644 --- a/man/get_job_message.Rd +++ b/man/createJobStatusEmailMessage.Rd @@ -1,11 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/job_status_emails.R -\name{get_job_message} -\alias{get_job_message} +\name{createJobStatusEmailMessage} +\alias{createJobStatusEmailMessage} \title{Produces a mail message that can be sent to a user when their job is accepted. -Used by the send_job_status_email() method.} +Used by the sendJobStatusEmail() method.} \usage{ -get_job_message(job_dir, pin_id, job_results_url, event_type, context) +createJobStatusEmailMessage( + job_dir, + pin_id, + job_results_url, + event_type, + context +) } \arguments{ \item{job_dir}{the directory where the job's arguments are stored, in job_args.yml} @@ -23,5 +29,5 @@ the result of the sendmailR::sendmail() call } \description{ Produces a mail message that can be sent to a user when their job is accepted. -Used by the send_job_status_email() method. +Used by the sendJobStatusEmail() method. } diff --git a/man/format_job_args.Rd b/man/formatJobArgumentsHTML.Rd similarity index 73% rename from man/format_job_args.Rd rename to man/formatJobArgumentsHTML.Rd index 7af96750..371fb6f1 100644 --- a/man/format_job_args.Rd +++ b/man/formatJobArgumentsHTML.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/job_status_emails.R -\name{format_job_args} -\alias{format_job_args} +\name{formatJobArgumentsHTML} +\alias{formatJobArgumentsHTML} \title{Format job arguments into html-formatted key/value pairs, for including in an email} \usage{ -format_job_args(job_args) +formatJobArgumentsHTML(job_args) } \arguments{ \item{job_args}{a list of job arguments, e.g. as read from the job_args.yml file} @@ -19,6 +19,6 @@ in an email } \examples{ \dontrun{ -format_job_args("/data/scratch/janani/molevolvr_out/Ba5sV1_full") +formatJobArgumentsHTML("/data/scratch/janani/molevolvr_out/Ba5sV1_full") } } diff --git a/man/send_job_status_email.Rd b/man/sendJobStatusEmail.Rd similarity index 84% rename from man/send_job_status_email.Rd rename to man/sendJobStatusEmail.Rd index 4357687a..b53f652a 100644 --- a/man/send_job_status_email.Rd +++ b/man/sendJobStatusEmail.Rd @@ -1,17 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/job_status_emails.R -\name{send_job_status_email} -\alias{send_job_status_email} +\name{sendJobStatusEmail} +\alias{sendJobStatusEmail} \title{Sends a "job accepted" email to a user when their job is accepted, including details about the job submission and how to check its status.} \usage{ -send_job_status_email( - notify_email, - job_dir, - pin_id, - event_type, - context = NULL -) +sendJobStatusEmail(notify_email, job_dir, pin_id, event_type, context = NULL) } \arguments{ \item{notify_email}{the email address to send the notification to} From acd7f8ca1477b7a9d1bb54bf8615eb6947baf80d Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:59:13 -0600 Subject: [PATCH 23/33] adjust NAMESPACE order --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 25c4acc5..bbc2bff2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,9 +37,9 @@ export(convert_aln2fa) export(count_bycol) export(count_to_sunburst) export(count_to_treemap) +export(createFA2Tree) export(createWordCloud2Element) export(createWordCloudElement) -export(createFA2Tree) export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) From 04859e588d3e7fe0ec7e34f5c26b71ebd44ea002 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 09:42:06 -0600 Subject: [PATCH 24/33] update NAMESPACE --- NAMESPACE | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9bfd643a..2cffa30e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,12 +34,9 @@ export(convertAlignment2Trees) export(convertFA2Tree) export(convert_aln2fa) export(count_bycol) -export(count_to_sunburst) -export(count_to_treemap) export(createFA2Tree) export(createWordCloud2Element) export(createWordCloudElement) -export(create_all_col_params) export(create_lineage_lookup) export(domain_network) export(efetchIPG) @@ -72,9 +69,9 @@ export(plotLineageNeighbors) export(plotLineageQuery) export(plotLineageSunburst) export(plotStackedLineage) -export(plotUpSet) export(plotSunburst) export(plotTreemap) +export(plotUpSet) export(plot_estimated_walltimes) export(prepareColumnParams) export(prepareSingleColumnParams) @@ -103,9 +100,7 @@ export(summarize_bylin) export(theme_genes2) export(to_titlecase) export(total_counts) -export(upset.plot) export(validateCountDF) -export(wordcloud2_element) export(wordcloud3) export(write.MsaAAMultipleAlignment) export(write_proc_medians_table) From c4c8cfb16b7b2b79444bdcfcc4d05e591393649a Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 09:49:39 -0600 Subject: [PATCH 25/33] keep original ref --- R/plotme.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plotme.R b/R/plotme.R index 6cbeb3d0..906e85ec 100644 --- a/R/plotme.R +++ b/R/plotme.R @@ -1,4 +1,4 @@ -# Taken from https://github.com/yogevherz/plotme/blob/master/R/plotSunburst_treemap.R +# Taken from https://github.com/yogevherz/plotme/blob/master/R/count_to_sunburst_treemap.R #' Create an interactive plotly from count data #' #' @description From 00b376b9a0bfc613b98470e89be7222c4781378f Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 09:58:56 -0600 Subject: [PATCH 26/33] add missing .Rd --- man/downloadAssemblySummary.Rd | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 man/downloadAssemblySummary.Rd diff --git a/man/downloadAssemblySummary.Rd b/man/downloadAssemblySummary.Rd new file mode 100644 index 00000000..636af878 --- /dev/null +++ b/man/downloadAssemblySummary.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lineage.R +\name{downloadAssemblySummary} +\alias{downloadAssemblySummary} +\title{Download the combined assembly summaries of genbank and refseq} +\usage{ +downloadAssemblySummary( + outpath, + keep = c("assembly_accession", "taxid", "species_taxid", "organism_name") +) +} +\arguments{ +\item{outpath}{String of path where the assembly summary file should be written} + +\item{keep}{Character vector containing which columns should be retained and downloaded} +} +\description{ +Download the combined assembly summaries of genbank and refseq +} +\author{ +Samuel Chen, Janani Ravi +} From acc5c6dd8045afc6691958132e029cf3e7c04f3b Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 10:03:20 -0600 Subject: [PATCH 27/33] resolve merge conflicts from incoming PRs --- man/acc2Lineage.Rd | 2 +- man/acc2lin.Rd | 56 ---------------------------------------------- man/addlineage.Rd | 40 --------------------------------- 3 files changed, 1 insertion(+), 97 deletions(-) delete mode 100644 man/acc2lin.Rd delete mode 100644 man/addlineage.Rd diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd index d632c52e..a24bdc9a 100644 --- a/man/acc2Lineage.Rd +++ b/man/acc2Lineage.Rd @@ -24,7 +24,7 @@ acc2Lineage( \item{accessions}{Character vector of protein accessions} \item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} +This file can be generated using the "downloadAssemblySummary()" function} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the} diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd deleted file mode 100644 index fd393d43..00000000 --- a/man/acc2lin.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R -\name{acc2lin} -\alias{acc2lin} -\title{acc2lin} -\usage{ -acc2lin( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -acc2lin( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) -} -\arguments{ -\item{accessions}{Character vector of protein accessions} - -\item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "downloadAssemblySummary()" function} - -\item{lineagelookup_path}{String of the path to the lineage lookup file -(taxid to lineage mapping). This file can be generated using the} - -\item{ipgout_path}{Path to write the results of the efetch run of the accessions -on the ipg database. If NULL, the file will not be written. Defaults to NULL} - -\item{plan}{} -} -\value{ -Describe return, in detail -} -\description{ -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. - -Function to map protein accession numbers to lineage - -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. -} -\examples{ -\dontrun{ -acc2lin() -} -} -\author{ -Samuel Chen, Janani Ravi -} diff --git a/man/addlineage.Rd b/man/addlineage.Rd deleted file mode 100644 index 6694e94c..00000000 --- a/man/addlineage.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R -\name{addLineage} -\alias{addLineage} -\title{addLineage} -\usage{ -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) -} -\arguments{ -\item{plan}{} -} -\value{ -Describe return, in detail -} -\description{ -addLineage - -addLineage -} -\examples{ -\dontrun{ -addLineage() -} -} From 919d60bbc075c81e3ba136768d8f7b98a79a84cc Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 10:17:52 -0600 Subject: [PATCH 28/33] update dependent .Rd referencing downloadAssemblySummary() --- R/acc2lin.R | 4 ++-- man/IPG2Lineage.Rd | 2 +- man/addLineage.Rd | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 man/addLineage.Rd diff --git a/R/acc2lin.R b/R/acc2lin.R index 73aca0f4..40d190a6 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -73,7 +73,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' #' @param accessions Character vector of protein accessions #' @param assembly_path String of the path to the assembly_summary path -#' This file can be generated using the "DownloadAssemblySummary()" function +#' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function #' @param lineagelookup_path String of the path to the lineage lookup file #' (taxid to lineage mapping). This file can be generated using the #' @param ipgout_path Path to write the results of the efetch run of the accessions @@ -179,7 +179,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' ipg database. The protein accession in 'accessions' should be contained in this #' file #' @param assembly_path String of the path to the assembly_summary path -#' This file can be generated using the "DownloadAssemblySummary()" function +#' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function #' @param lineagelookup_path String of the path to the lineage lookup file #' (taxid to lineage mapping). This file can be generated using the #' "create_lineage_lookup()" function diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd index cf3e635e..e24ab617 100644 --- a/man/IPG2Lineage.Rd +++ b/man/IPG2Lineage.Rd @@ -32,7 +32,7 @@ file} "create_lineage_lookup()" function} \item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} +This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function} } \value{ Describe return, in detail diff --git a/man/addLineage.Rd b/man/addLineage.Rd new file mode 100644 index 00000000..6694e94c --- /dev/null +++ b/man/addLineage.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acc2lin.R, R/lineage.R +\name{addLineage} +\alias{addLineage} +\title{addLineage} +\usage{ +addLineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + +addLineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) +} +\arguments{ +\item{plan}{} +} +\value{ +Describe return, in detail +} +\description{ +addLineage + +addLineage +} +\examples{ +\dontrun{ +addLineage() +} +} From 6d3c23b06e04c5c135ce4d021f31dc6477a06bf5 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 10:22:31 -0600 Subject: [PATCH 29/33] reference new function name in acc2lin.R --- R/acc2lin.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/acc2lin.R b/R/acc2lin.R index 40d190a6..1984ec3c 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -201,7 +201,7 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID") - lins <- GCA2Lins(prot_data = ipg_dt, assembly_path, lineagelookup_path) + lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path) lins <- lins[!is.na(Lineage)] %>% unique() return(lins) From 9045b7ca78631f61523bd003b430aff7394362fd Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 10:38:33 -0600 Subject: [PATCH 30/33] add missing .Rd --- man/elements2Words.Rd | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 man/elements2Words.Rd diff --git a/man/elements2Words.Rd b/man/elements2Words.Rd new file mode 100644 index 00000000..1094d363 --- /dev/null +++ b/man/elements2Words.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarize.R +\name{elements2Words} +\alias{elements2Words} +\title{Elements 2 Words} +\usage{ +elements2Words(prot, column = "DomArch", conversion_type = "da2doms") +} +\arguments{ +\item{prot}{\link{dataframe}} + +\item{column}{\link{string} column name} + +\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to +domains. 'gc2da' genomic context to domain architectures} +} +\value{ +\link{string} with words delimited by spaces +} +\description{ +Break string ELEMENTS into WORDS for domain architecture (DA) and genomic +context (GC) +} +\examples{ +\dontrun{ +tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words() +} + +} From c83e89c7f7e0e2bd7821ab7bc199a4ffa791d246 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 10:41:47 -0600 Subject: [PATCH 31/33] let R sort NAMESPACE --- NAMESPACE | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index de6af7c0..51ef791b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,9 +32,9 @@ export(convertAlignment2FA) export(convertAlignment2Trees) export(convertFA2Tree) export(convert_aln2fa) +export(countbycolumn) export(createFA2Tree) export(createWordCloud2Element) -export(countbycolumn) export(createWordCloudElement) export(create_lineage_lookup) export(domain_network) @@ -77,7 +77,6 @@ export(prepareColumnParams) export(prepareSingleColumnParams) export(proteinAcc2TaxID) export(proteinAcc2TaxID_old) -export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) From 490e542b2857a44b1aa82d6f6c696ed7a20ecd2e Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 11:08:27 -0600 Subject: [PATCH 32/33] updates - adjust casing, RE #53 - match with .Rd - update references throughout MolEvolvR pkg --- NAMESPACE | 12 ++++---- R/cleanup.R | 2 +- R/networks_domarch.R | 8 ++--- R/networks_gencontext.R | 12 ++++---- R/plotting.R | 22 +++++++------- R/summarize.R | 43 ++++++++++++--------------- man/countbycolumn.Rd | 12 ++++---- man/filterbydomains.Rd | 10 +++---- man/filterbyfrequency.Rd | 8 ++--- man/findparalogs.Rd | 8 ++--- man/summarizebylineage.Rd | 8 ++--- man/totalgencontextordomarchcounts.Rd | 8 ++--- man/words2wordcounts.Rd | 8 ++--- 13 files changed, 78 insertions(+), 83 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 51ef791b..d37a2a60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,7 +32,7 @@ export(convertAlignment2FA) export(convertAlignment2Trees) export(convertFA2Tree) export(convert_aln2fa) -export(countbycolumn) +export(countByColumn) export(createFA2Tree) export(createWordCloud2Element) export(createWordCloudElement) @@ -41,10 +41,10 @@ export(domain_network) export(downloadAssemblySummary) export(efetchIPG) export(extractAccNum) -export(filterbydomains) -export(filterbyfrequency) +export(filterByDomains) +export(filterByFrequency) +export(findParalogs) export(find_top_acc) -export(findparalogs) export(format_job_args) export(gc_undirected_network) export(generateAllAlignments2FA) @@ -91,15 +91,15 @@ export(selectLongestDuplicate) export(send_job_status_email) export(shortenLineage) export(sinkReset) +export(summarizeByLineage) export(summarizeDomArch) export(summarizeDomArch_ByLineage) export(summarizeGenContext) export(summarizeGenContext_ByDomArchLineage) export(summarizeGenContext_ByLineage) -export(summarizebylineage) export(theme_genes2) export(to_titlecase) -export(totalgencontextordomarchcounts) +export(totalGenContextOrDomArchCounts) export(validateCountDF) export(wordcloud3) export(write.MsaAAMultipleAlignment) diff --git a/R/cleanup.R b/R/cleanup.R index f82722f2..39b4b8d2 100755 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -561,7 +561,7 @@ cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch", # Remove rows with no domains contained within domains_keep # filter(grepl(domains_for_grep, DomArch)) if (!is.null(domains_keep)) { - prot <- prot %>% filter_by_doms(column = new, doms_keep = domains_keep$domains) + prot <- prot %>% filterByDomains(column = new, doms_keep = domains_keep$domains) } # ##!! NOT RUN !! diff --git a/R/networks_domarch.R b/R/networks_domarch.R index 010b7619..fea0a195 100755 --- a/R/networks_domarch.R +++ b/R/networks_domarch.R @@ -54,7 +54,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff { column_name <- sym(column) - prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5) + prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5) # ensure only Domains that are in the tc cutoff range are kept within_list <- prot_tc %>% @@ -95,7 +95,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff # cleanup domain list domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+") # Get a table of domain counts - wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc() + wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts() wc <- pivot_wider(wc, names_from = words, values_from = freq) # Remove all isolated domarchs, such that an adjacency list can easily be constructed @@ -262,7 +262,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c column_name <- sym(column) - prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5) + prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5) within_list <- prot_tc %>% select({{ column_name }}) %>% @@ -286,7 +286,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+") # Get domain counts before eliminating domarchs with no edges - wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc() + wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts() nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq) %>% mutate(group = purrr::map( diff --git a/R/networks_gencontext.R b/R/networks_gencontext.R index 6b703146..e0dd63da 100755 --- a/R/networks_gencontext.R +++ b/R/networks_gencontext.R @@ -46,11 +46,11 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere column_name <- sym(column) if (cutoff_type == "Lineage") { lin_summary <- prot %>% - summ.DA.byLin() %>% - summ.DA() + summarizeDomArch_ByLineage() %>% + summarizeDomArch() doms_above_cutoff <- (lin_summary %>% filter(totallin >= cutoff))[[column]] } else if (cutoff_type == "Total Count") { # Change this type? - GC_above_cutoff <- (prot %>% total_counts(column = column, cutoff = cutoff))[[column]] + GC_above_cutoff <- (prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff))[[column]] } prot <- prot[which(prot[[as_string(column_name)]] %in% GC_above_cutoff), ] @@ -153,8 +153,8 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext", column_name <- sym(column) - # Perform cutoff through total_counts - prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff) + # Perform cutoff through totalGenContextOrDomArchCounts + prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff) within_list <- prot_tc %>% select({{ column_name }}) %>% @@ -218,7 +218,7 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext", } # Get domain counts before eliminating domarchs with no edges - wc <- elements2words(prot = prot, column = column, conversion_type = "gc2da") %>% words2wc() + wc <- elements2Words(prot = prot, column = column, conversion_type = "gc2da") %>% words2WordCounts() nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq) max_size <- max(nodes$size) diff --git a/R/plotting.R b/R/plotting.R index da95ea5f..5c8de823 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -108,7 +108,7 @@ plotUpSet <- function(query_data = "toast_rack.sub", # colname = string(colname) tryCatch( { - tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5) + tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5) ##### Remove Tails #### # tails comprise of less than 1% of data each # ie) individual percent is less than 1 @@ -128,7 +128,7 @@ plotUpSet <- function(query_data = "toast_rack.sub", words.tc <- tc %>% select({{ column }}) %>% distinct() %>% - elements2words(column = colname, conversion_type = type) + elements2Words(column = colname, conversion_type = type) # names(words.tc)[1] <- "words" words.tc <- words.tc %>% str_split(pattern = " ") words.tc <- as.data.frame(words.tc, col.names = "Words", stringsAsFactors = F) %>% @@ -273,7 +273,7 @@ plotLineageDA <- function(query_data = "prot", query_data <- shortenLineage(query_data, "Lineage", abr_len = 1) - query.summ.byLin <- query_data %>% total_counts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff) + query.summ.byLin <- query_data %>% totalGenContextOrDomArchCounts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff) query.summ.byLin$Lineage <- map(query.summ.byLin$Lineage, function(x) str_replace_all(string = x, pattern = ">", replacement = "_")) %>% unlist() @@ -390,7 +390,7 @@ plotLineageQuery <- function(query_data = all, } col <- sym(colname) - query_data <- query_data %>% total_counts(column = colname, cutoff = cutoff) + query_data <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff) # query_data contains all rows that possess a lineage query_data <- query_data %>% filter(grepl("a", Lineage)) @@ -831,7 +831,7 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = " prot <- shortenLineage(prot, Lineage_col, abr_len = 3) } - total_count <- total_counts(prot, column, cutoff, lineage_col = Lineage_col) + total_count <- totalGenContextOrDomArchCounts(prot, column, cutoff, lineage_col = Lineage_col) # total_count = prot # Order bars by descending freq @@ -1054,7 +1054,7 @@ createWordCloudElement <- function(query_data = "prot", colname = "DomArch", cutoff = 70, UsingRowsCutoff = FALSE) { - tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5) + tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5) column <- sym(colname) # Get words from filter @@ -1069,11 +1069,11 @@ createWordCloudElement <- function(query_data = "prot", } words.tc <- query_data %>% - elements2words( + elements2Words( column = colname, conversion_type = type ) %>% - words2wc() + words2WordCounts() # names(words.tc) <- c("words", "freq") @@ -1134,7 +1134,7 @@ createWordCloud2Element <- function(query_data = "prot", # @param type Character. Default is "da2doms" for Domain Architectures. # Other alternative: "gc2da" for Genomic Contexts. - tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5) + tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5) column <- sym(colname) query_data <- query_data %>% filter({{ column }} %in% pull(tc, {{ colname }})) @@ -1146,11 +1146,11 @@ createWordCloud2Element <- function(query_data = "prot", } words.tc <- query_data %>% - elements2words( + elements2Words( column = colname, conversion_type = type ) %>% - words2wc() + words2WordCounts() names(words.tc) <- c("words", "freq") diff --git a/R/summarize.R b/R/summarize.R index d2cef471..e0dae1c4 100644 --- a/R/summarize.R +++ b/R/summarize.R @@ -13,7 +13,7 @@ #' Filter by Domains #' #' @author Samuel Chen, Janani Ravi -#' @description filterbydomains filters a data frame by identifying exact domain matches +#' @description filterByDomains filters a data frame by identifying exact domain matches #' and either keeping or removing rows with the identified domain #' #' @param prot Dataframe to filter @@ -33,9 +33,9 @@ #' #' @examples #' \dontrun{ -#' filterbydomains() +#' filterByDomains() #' } -filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(), +filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(), ignore.case = FALSE) { # Only rows with a domain in doms_keep will be kept # Any row containing a domain in doms_remove will be removed @@ -89,7 +89,7 @@ filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo ## Before/after break up ## ########################### ## Function to obtain element counts (DA, GC) -#' Count Bycol +#' Count By Column #' #' @param prot #' @param column @@ -102,9 +102,9 @@ filterbydomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo #' #' @examples #' \dontrun{ -#' countbycolumn() +#' countByColumn() #' } -countbycolumn <- function(prot = prot, column = "DomArch", min.freq = 1) { +countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) { counts <- prot %>% select(column) %>% table() %>% @@ -185,10 +185,10 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms" #' \dontrun{ #' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> #' elements2Words() |> -#' words2wordcounts() +#' words2WordCounts() #' } #' -words2wordcounts <- function(string) { +words2WordCounts <- function(string) { df_word_count <- string %>% # reduce spaces with length 2 or greater to a single space str_replace_all("\\s{2,}", " ") %>% @@ -227,9 +227,9 @@ words2wordcounts <- function(string) { #' #' @examples #' \dontrun{ -#' filterbyfrequency() +#' filterByFrequency() #' } -filterbyfrequency <- function(x, min.freq) { +filterByFrequency <- function(x, min.freq) { x %>% filter(freq >= min.freq) } @@ -254,10 +254,10 @@ filterbyfrequency <- function(x, min.freq) { #' \dontrun{ #' library(tidyverse) #' tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |> -#' summarizebylineage(query = "all") +#' summarizeByLineage(query = "all") #' } #' -summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage", +summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage", query) { column <- sym(column) by <- sym(by) @@ -295,7 +295,6 @@ summarizebylineage <- function(prot = "prot", column = "DomArch", by = "Lineage" #' summarizeDomArch_ByLineage() #' } summarizeDomArch_ByLineage <- function(x) { - ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", DomArch)) %>% group_by(DomArch, Lineage) %>% @@ -321,7 +320,6 @@ summarizeDomArch_ByLineage <- function(x) { #' summarizeDomArch() #' } summarizeDomArch <- function(x) { - ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% group_by(DomArch) %>% summarise(totalcount = sum(count), totallin = n()) %>% # totallin=n_distinct(Lineage), @@ -344,7 +342,6 @@ summarizeDomArch <- function(x) { #' summarizeGenContext_ByDomArchLineage #' } summarizeGenContext_ByDomArchLineage <- function(x) { - ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", GenContext)) %>% filter(!grepl("^-$", DomArch)) %>% @@ -369,7 +366,6 @@ summarizeGenContext_ByDomArchLineage <- function(x) { #' summarizeGenContext_ByLineage() #' } summarizeGenContext_ByLineage <- function(x) { - ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% filter(!grepl("^-$", GenContext)) %>% filter(!grepl("^-$", DomArch)) %>% @@ -394,7 +390,6 @@ summarizeGenContext_ByLineage <- function(x) { #' summarizeGenContext() #' } summarizeGenContext <- function(x) { - ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_' x %>% group_by(GenContext) %>% summarise( @@ -436,9 +431,9 @@ summarizeGenContext <- function(x) { #' #' @examples #' \dontrun{ -#' totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC") +#' totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC") #' } -totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col = "Lineage", +totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col = "Lineage", cutoff = 90, RowsCutoff = FALSE, digits = 2 # type = "GC" ) { @@ -448,7 +443,7 @@ totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>% filter({{ column }} != "") - prot <- summarizebylineage(prot, column, by = lineage_col, query = "all") + prot <- summarizeByLineage(prot, column, by = lineage_col, query = "all") col_count <- prot %>% group_by({{ column }}) %>% summarise(totalcount = sum(count)) @@ -598,9 +593,9 @@ totalgencontextordomarchcounts <- function(prot, column = "DomArch", lineage_col #' #' @examples #' \dontrun{ -#' findparalogs(pspa) +#' findParalogs(pspa) #' } -findparalogs <- function(prot) { +findParalogs <- function(prot) { # Remove eukaryotes prot <- prot %>% filter(!grepl("^eukaryota", Lineage)) paralogTable <- prot %>% @@ -644,8 +639,8 @@ findparalogs <- function(prot) { # elements2Words(\"gc2da\")") -# ## words2wordcounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)] +# ## words2WordCounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)] # cat("Word counts for broken up domains from DAs and DAs from GCs. # \nFor e.g.: # DA.doms.wc <- query.sub$DA.doms %>% -# words2wordcounts()") +# words2WordCounts()") diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd index e669a4a3..34fcc3e0 100644 --- a/man/countbycolumn.Rd +++ b/man/countbycolumn.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{countbycolumn} -\alias{countbycolumn} -\title{Count Bycol} +\name{countByColumn} +\alias{countByColumn} +\title{Count By Column} \usage{ -countbycolumn(prot = prot, column = "DomArch", min.freq = 1) +countByColumn(prot = prot, column = "DomArch", min.freq = 1) } \arguments{ \item{min.freq}{} @@ -13,10 +13,10 @@ countbycolumn(prot = prot, column = "DomArch", min.freq = 1) Describe return, in detail } \description{ -Count Bycol +Count By Column } \examples{ \dontrun{ -countbycolumn() +countByColumn() } } diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd index 7fd148e7..8c885363 100644 --- a/man/filterbydomains.Rd +++ b/man/filterbydomains.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{filterbydomains} -\alias{filterbydomains} +\name{filterByDomains} +\alias{filterByDomains} \title{Filter by Domains} \usage{ -filterbydomains( +filterByDomains( prot, column = "DomArch", doms_keep = c(), @@ -28,7 +28,7 @@ observation to be kept} Filtered data frame } \description{ -filterbydomains filters a data frame by identifying exact domain matches +filterByDomains filters a data frame by identifying exact domain matches and either keeping or removing rows with the identified domain } \note{ @@ -36,7 +36,7 @@ There is no need to make the domains 'regex safe', that will be handled by this } \examples{ \dontrun{ -filterbydomains() +filterByDomains() } } \author{ diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd index d219a100..d2c5f9cd 100644 --- a/man/filterbyfrequency.Rd +++ b/man/filterbyfrequency.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{filterbyfrequency} -\alias{filterbyfrequency} +\name{filterByFrequency} +\alias{filterByFrequency} \title{Filter Frequency} \usage{ -filterbyfrequency(x, min.freq) +filterByFrequency(x, min.freq) } \arguments{ \item{min.freq}{} @@ -17,6 +17,6 @@ Filter Frequency } \examples{ \dontrun{ -filterbyfrequency() +filterByFrequency() } } diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd index 7e985fe5..4b5edbcf 100644 --- a/man/findparalogs.Rd +++ b/man/findparalogs.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{findparalogs} -\alias{findparalogs} +\name{findParalogs} +\alias{findParalogs} \title{Find Paralogs} \usage{ -findparalogs(prot) +findParalogs(prot) } \arguments{ \item{prot}{A data frame filtered by a Query, containing columns Species and Lineage} @@ -21,6 +21,6 @@ column names. } \examples{ \dontrun{ -findparalogs(pspa) +findParalogs(pspa) } } diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd index 34d72c37..2e445913 100644 --- a/man/summarizebylineage.Rd +++ b/man/summarizebylineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{summarizebylineage} -\alias{summarizebylineage} +\name{summarizeByLineage} +\alias{summarizeByLineage} \title{Summarize by Lineage} \usage{ -summarizebylineage(prot = "prot", column = "DomArch", by = "Lineage", query) +summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query) } \arguments{ \item{query}{} @@ -19,7 +19,7 @@ Summarize by Lineage \dontrun{ library(tidyverse) tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |> - summarizebylineage(query = "all") + summarizeByLineage(query = "all") } } diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd index aa8697ee..f457cb6a 100644 --- a/man/totalgencontextordomarchcounts.Rd +++ b/man/totalgencontextordomarchcounts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{totalgencontextordomarchcounts} -\alias{totalgencontextordomarchcounts} +\name{totalGenContextOrDomArchCounts} +\alias{totalGenContextOrDomArchCounts} \title{Total Counts} \usage{ -totalgencontextordomarchcounts( +totalGenContextOrDomArchCounts( prot, column = "DomArch", lineage_col = "Lineage", @@ -37,6 +37,6 @@ column names. } \examples{ \dontrun{ -totalgencontextordomarchcounts(pspa - gc_lin_counts, 0, "GC") +totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC") } } diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd index 69f30c5d..7f60f226 100644 --- a/man/words2wordcounts.Rd +++ b/man/words2wordcounts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarize.R -\name{words2wordcounts} -\alias{words2wordcounts} +\name{words2WordCounts} +\alias{words2WordCounts} \title{Words 2 Word Counts} \usage{ -words2wordcounts(string) +words2WordCounts(string) } \arguments{ \item{string}{} @@ -19,7 +19,7 @@ Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)} \dontrun{ tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words() |> - words2wordcounts() + words2WordCounts() } } From 6ec0b13b9236714726ec0f0a7c60d0963220ef7c Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 11:18:20 -0600 Subject: [PATCH 33/33] update NAMESPACE --- NAMESPACE | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 42ce58f9..53332439 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,9 +34,9 @@ export(convertFA2Tree) export(convert_aln2fa) export(countByColumn) export(createFA2Tree) -export(createWordCloud2Element) export(createJobResultsURL) export(createJobStatusEmailMessage) +export(createWordCloud2Element) export(createWordCloudElement) export(create_lineage_lookup) export(domain_network) @@ -57,7 +57,6 @@ export(get_proc_medians) export(get_proc_weights) export(ipr2viz) export(ipr2viz_web) -export(make_job_results_url) export(make_opts2procs) export(mapAcc2Name) export(map_acc2name)