Skip to content

Commit

Permalink
Merge branch 'main' into docssummarize
Browse files Browse the repository at this point in the history
  • Loading branch information
awasyn committed Oct 8, 2024
2 parents d9fa04b + e62a71c commit 0b79f89
Show file tree
Hide file tree
Showing 102 changed files with 1,311 additions and 831 deletions.
122 changes: 65 additions & 57 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,99 +1,107 @@
# Generated by roxygen2: do not edit by hand

export(BinaryDomainNetwork)
export(DownloadAssemblySummary)
export(GCA2lin)
export(GCA2Lineage)
export(GenContextNetwork)
export(LineagePlot)
export(IPG2Lineage)
export(RepresentativeAccNums)
export(acc2FA)
export(acc2Lineage)
export(acc2fa)
export(acc2lin)
export(addLeaves2Alignment)
export(addLineage)
export(addName)
export(addTaxID)
export(add_leaves)
export(add_lins)
export(add_name)
export(add_tax)
export(advanced_opts2est_walltime)
export(alignFasta)
export(assert_count_df)
export(assign_job_queue)
export(cleanup_GeneDesc)
export(cleanup_clust)
export(cleanup_domarch)
export(cleanup_gencontext)
export(cleanup_lineage)
export(cleanup_species)
export(cleanClusters)
export(cleanDomainArchitecture)
export(cleanGeneDescription)
export(cleanGenomicContext)
export(cleanLineage)
export(cleanSpecies)
export(combine_files)
export(combine_full)
export(combine_ipr)
export(condenseRepeatedDomains)
export(convert2TitleCase)
export(convertAlignment2FA)
export(convertAlignment2Trees)
export(convertFA2Tree)
export(convert_aln2fa)
export(convert_fa2tre)
export(count_bycol)
export(count_to_sunburst)
export(count_to_treemap)
export(create_all_col_params)
export(countByColumn)
export(createFA2Tree)
export(createJobResultsURL)
export(createJobStatusEmailMessage)
export(createWordCloud2Element)
export(createWordCloudElement)
export(create_lineage_lookup)
export(create_one_col_params)
export(domain_network)
export(efetch_ipg)
export(filter_by_doms)
export(filter_freq)
export(find_paralogs)
export(downloadAssemblySummary)
export(efetchIPG)
export(extractAccNum)
export(filterByDomains)
export(filterByFrequency)
export(findParalogs)
export(find_top_acc)
export(format_job_args)
export(formatJobArgumentsHTML)
export(gc_undirected_network)
export(generateAllAlignments2FA)
export(generate_all_aln2fa)
export(generate_fa2tre)
export(generate_msa)
export(generate_trees)
export(get_accnums_from_fasta_file)
export(get_job_message)
export(get_proc_medians)
export(get_proc_weights)
export(ipg2lin)
export(ipr2viz)
export(ipr2viz_web)
export(lineage.DA.plot)
export(lineage.Query.plot)
export(lineage.domain_repeats.plot)
export(lineage.neighbors.plot)
export(lineage_sunburst)
export(make_job_results_url)
export(make_opts2procs)
export(mapAcc2Name)
export(map_acc2name)
export(map_advanced_opts2procs)
export(msa_pdf)
export(pick_longer_duplicate)
export(plotLineageDA)
export(plotLineageDomainRepeats)
export(plotLineageHeatmap)
export(plotLineageNeighbors)
export(plotLineageQuery)
export(plotLineageSunburst)
export(plotStackedLineage)
export(plotSunburst)
export(plotTreemap)
export(plotUpSet)
export(plot_estimated_walltimes)
export(prot2tax)
export(prot2tax_old)
export(remove_astrk)
export(remove_empty)
export(remove_tails)
export(prepareColumnParams)
export(prepareSingleColumnParams)
export(proteinAcc2TaxID)
export(proteinAcc2TaxID_old)
export(removeAsterisks)
export(removeEmptyRows)
export(removeTails)
export(renameFA)
export(rename_fasta)
export(repeat2s)
export(replaceQMs)
export(replaceQuestionMarks)
export(reveql)
export(reverse_operon)
export(run_deltablast)
export(run_rpsblast)
export(send_job_status_email)
export(shorten_lineage)
export(sink.reset)
export(stacked_lin_plot)
export(string2accnum)
export(summ.DA)
export(summ.DA.byLin)
export(summ.GC)
export(summ.GC.byDALin)
export(summ.GC.byLin)
export(summarize_bylin)
export(selectLongestDuplicate)
export(sendJobStatusEmail)
export(shortenLineage)
export(sinkReset)
export(summarizeByLineage)
export(summarizeDomArch)
export(summarizeDomArch_ByLineage)
export(summarizeGenContext)
export(summarizeGenContext_ByDomArchLineage)
export(summarizeGenContext_ByLineage)
export(theme_genes2)
export(to_titlecase)
export(total_counts)
export(upset.plot)
export(wordcloud2_element)
export(totalGenContextOrDomArchCounts)
export(validateCountDF)
export(wordcloud3)
export(wordcloud_element)
export(write.MsaAAMultipleAlignment)
export(write_proc_medians_table)
export(write_proc_medians_yml)
Expand Down
60 changes: 30 additions & 30 deletions R/CHANGED-pre-msa-tree.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
## Pre-requisites to generate MSA and Phylogenetic Tree
## Includes the following functions:
## convert_aln2fa, to_titlecase, add_leaves
## generate_all_aln2fa
## convert_aln2tsv??, convert_accnum2fa??
## Created from add_leaves.R, convert_aln2fa.R, all_aln2fa.R
## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment
## generateAllAlignments2FA
## convertAlignment2TSV??, convertAccNum2FA??
## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R
## Modified: Dec 24, 2019 | Jan 2021
## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen)

Expand Down Expand Up @@ -35,7 +35,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @author Andrie, Janani Ravi
#' @description Translate string to Title Case w/ delimitter.
#' @aliases totitle, to_title
#' @usage to_titlecase(text, delimitter)
#' @usage convert2TitleCase(text, delimitter)
#' @param x Character vector.
#' @param y Delimitter. Default is space (" ").
#' @seealso chartr, toupper, and tolower.
Expand All @@ -44,7 +44,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @export
#'
#' @examples
to_titlecase <- function(x, y = " ") {
convert2TitleCase <- function(x, y = " ") {
s <- strsplit(x, y)[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2),
sep = "", collapse = y
Expand Down Expand Up @@ -89,9 +89,9 @@ to_titlecase <- function(x, y = " ") {
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
#' }
add_leaves <- function(aln_file = "",
addLeaves2Alignment <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
# lin_file="data/rawdata_tsv/PspA.txt",
reduced = FALSE) {
Expand Down Expand Up @@ -164,7 +164,7 @@ add_leaves <- function(aln_file = "",
# AccNum,
sep = "_"
))
temp$Leaf <- map(temp$Leaf, to_titlecase)
temp$Leaf <- map(temp$Leaf, convert2TitleCase)
temp <- temp %>%
mutate(Leaf_Acc = (paste(Leaf, AccNum, sep = "_")))

Expand Down Expand Up @@ -203,7 +203,7 @@ add_leaves <- function(aln_file = "",
#' @export
#'
#' @examples
add_name <- function(data,
addName <- function(data,
accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
lin_sep = ">", out_col = "Name") {
cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
Expand Down Expand Up @@ -283,10 +283,10 @@ add_name <- function(data,
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
#' }
#'
convert_aln2fa <- function(aln_file = "",
convertAlignment2FA <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
fa_outpath = "",
reduced = FALSE) {
Expand All @@ -297,7 +297,7 @@ convert_aln2fa <- function(aln_file = "",
# fa_outpath="data/alns/pspc.fasta"

## Add leaves
aln <- add_leaves(
aln <- addLeaves2Alignment(
aln = aln_file,
lin = lin_file,
reduced = reduced
Expand All @@ -320,7 +320,7 @@ convert_aln2fa <- function(aln_file = "",
return(fasta)
}

#' Default rename_fasta() replacement function. Maps an accession number to its name
#' Default renameFA() replacement function. Maps an accession number to its name
#'
#' @param line The line of a fasta file starting with '>'
#' @param acc2name Data Table containing a column of accession numbers and a name column
Expand All @@ -335,8 +335,8 @@ convert_aln2fa <- function(aln_file = "",
#' @export
#'
#' @examples
map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
# change to be the name equivalent to an add_names column
mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
# change to be the name equivalent to an addNames column
# Find the first ' '
end_acc <- str_locate(line, " ")[[1]]

Expand Down Expand Up @@ -364,8 +364,8 @@ map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name")
#' @export
#'
#' @examples
rename_fasta <- function(fa_path, outpath,
replacement_function = map_acc2name, ...) {
renameFA <- function(fa_path, outpath,
replacement_function = mapAcc2Name, ...) {
lines <- read_lines(fa_path)
res <- map(lines, function(x) {
if (strtrim(x, 1) == ">") {
Expand All @@ -381,7 +381,7 @@ rename_fasta <- function(fa_path, outpath,
}

################################
## generate_all_aln2fa
## generateAllAlignments2FA
#' Adding Leaves to an alignment file w/ accessions
#'
#' @keywords alignment, accnum, leaves, lineage, species
Expand All @@ -408,9 +408,9 @@ rename_fasta <- function(fa_path, outpath,
#'
#' @examples
#' \dontrun{
#' generate_all_aln2fa()
#' generateAllAlignments2FA()
#' }
generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
fa_outpath = here("data/alns/"),
lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
reduced = F) {
Expand All @@ -432,7 +432,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
fa_outpath = paste0(fa_outpath, "/", variable, ".fa")
)
pmap(
.l = aln2fa_args, .f = convert_aln2fa,
.l = aln2fa_args, .f = convertAlignment2FA,
lin_file = lin_file,
reduced = reduced
)
Expand All @@ -441,7 +441,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),

# accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
# accessions <- rep("ANY95992.1", 201)
#' acc2fa converts protein accession numbers to a fasta format.
#' acc2FA converts protein accession numbers to a fasta format.
#'
#' @description
#' Resulting fasta file is written to the outpath.
Expand All @@ -464,11 +464,11 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
#'
#' @examples
#' \dontrun{
#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
#' }
acc2fa <- function(accessions, outpath, plan = "sequential") {
acc2FA <- function(accessions, outpath, plan = "sequential") {
# validation
stopifnot(length(accessions) > 0)

Expand Down Expand Up @@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) {


################################
## convert_accnum2fa
## convertAccNum2FA
#######
## 1 ##
#######
Expand Down Expand Up @@ -706,9 +706,9 @@ get_accnums_from_fasta_file <- function(fasta_file) {
# seqs <- retrieveseqs(seqnames,"swissprot")

################################
## convert_aln2tsv
## convertAlignment2TSV
## NEEDS FIXING!
# convert_aln2tsv <- function(file_path){
# convertAlignment2TSV <- function(file_path){
# cfile <- read_delim("data/alignments/pspc.gismo.aln", delim=" ")
# cfile <- as.data.frame(map(cfile,function(x) gsub("\\s+", "",x)))
# colnames(cfile) <- c("AccNum", "Alignment")
Expand Down
Loading

0 comments on commit 0b79f89

Please sign in to comment.