Skip to content

Commit

Permalink
Merge pull request JRaviLab#87 from teddyCodex/issue-52-rename-functions
Browse files Browse the repository at this point in the history
refactor function names in R/pre-msa-tree and R/reverse-operons.R
- fixes JRaviLab#52
  • Loading branch information
the-mayer authored Oct 12, 2024
2 parents cffeb7a + d544f7e commit 1c43150
Show file tree
Hide file tree
Showing 24 changed files with 222 additions and 342 deletions.
16 changes: 5 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@ export(BinaryDomainNetwork)
export(GCA2Lineage)
export(GenContextNetwork)
export(IPG2Lineage)
export(RepresentativeAccNums)
export(acc2FA)
export(acc2Lineage)
export(acc2fa)
export(addLeaves2Alignment)
export(addLineage)
export(addName)
export(addTaxID)
export(add_leaves)
export(add_name)
export(advanced_opts2est_walltime)
export(alignFasta)
export(assign_job_queue)
Expand All @@ -31,11 +28,11 @@ export(convert2TitleCase)
export(convertAlignment2FA)
export(convertAlignment2Trees)
export(convertFA2Tree)
export(convert_aln2fa)
export(countByColumn)
export(createFA2Tree)
export(createJobResultsURL)
export(createJobStatusEmailMessage)
export(createRepresentativeAccNum)
export(createWordCloud2Element)
export(createWordCloudElement)
export(create_lineage_lookup)
Expand All @@ -49,15 +46,13 @@ export(findParalogs)
export(formatJobArgumentsHTML)
export(gc_undirected_network)
export(generateAllAlignments2FA)
export(generate_all_aln2fa)
export(generate_msa)
export(getAccNumFromFA)
export(getTopAccByLinDomArch)
export(get_accnums_from_fasta_file)
export(get_proc_medians)
export(get_proc_weights)
export(make_opts2procs)
export(mapAcc2Name)
export(map_acc2name)
export(map_advanced_opts2procs)
export(msa_pdf)
export(plotIPR2Viz)
Expand All @@ -83,26 +78,25 @@ export(removeTails)
export(renameFA)
export(rename_fasta)
export(replaceQuestionMarks)
export(reveql)
export(reverse_operon)
export(reverseOperonSeq)
export(run_deltablast)
export(run_rpsblast)
export(selectLongestDuplicate)
export(sendJobStatusEmail)
export(shortenLineage)
export(sinkReset)
export(straightenOperonSeq)
export(summarizeByLineage)
export(summarizeDomArch)
export(summarizeDomArch_ByLineage)
export(summarizeGenContext)
export(summarizeGenContext_ByDomArchLineage)
export(summarizeGenContext_ByLineage)
export(themeGenes2)
export(to_titlecase)
export(totalGenContextOrDomArchCounts)
export(validateCountDF)
export(wordcloud3)
export(write.MsaAAMultipleAlignment)
export(writeMSA_AA2FA)
export(write_proc_medians_table)
export(write_proc_medians_yml)
importFrom(Biostrings,AAStringSet)
Expand Down
35 changes: 24 additions & 11 deletions R/CHANGED-pre-msa-tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ convert2TitleCase <- function(x, y = " ") {
################################
## Function to add leaves to an alignment file
## !! Add DA to leaves?
#' Adding Leaves to an alignment file w/ accessions
#' addLeaves2Alignment
#'
#' @author Janani Ravi
#' @keywords alignment, accnum, leaves, lineage, species
Expand Down Expand Up @@ -178,7 +178,7 @@ addLeaves2Alignment <- function(aln_file = "",
}


#' Add Name
#' addName
#'
#' @author Samuel Chen, Janani Ravi
#' @description This function adds a new 'Name' column that is comprised of components from
Expand Down Expand Up @@ -252,7 +252,7 @@ addName <- function(data,

################################
## Function to convert alignment 'aln' to fasta format for MSA + Tree
#' Adding Leaves to an alignment file w/ accessions
#' convertAlignment2FA
#'
#' @author Janani Ravi
#' @keywords alignment, accnum, leaves, lineage, species
Expand Down Expand Up @@ -320,6 +320,9 @@ convertAlignment2FA <- function(aln_file = "",
return(fasta)
}

#' mapAcc2Name
#'
#' @description
#' Default renameFA() replacement function. Maps an accession number to its name
#'
#' @param line The line of a fasta file starting with '>'
Expand Down Expand Up @@ -382,6 +385,9 @@ renameFA <- function(fa_path, outpath,

################################
## generateAllAlignments2FA
#' generateAllAlignments2FA
#'
#' @description
#' Adding Leaves to an alignment file w/ accessions
#'
#' @keywords alignment, accnum, leaves, lineage, species
Expand Down Expand Up @@ -441,10 +447,11 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),

# accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
# accessions <- rep("ANY95992.1", 201)
#' acc2FA converts protein accession numbers to a fasta format.
#' acc2FA
#'
#' @description
#' Resulting fasta file is written to the outpath.
#' converts protein accession numbers to a fasta format. Resulting
#' fasta file is written to the outpath.
#'
#' @author Samuel Chen, Janani Ravi
#' @keywords accnum, fasta
Expand Down Expand Up @@ -539,6 +546,9 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
return(result)
}

#' createRepresentativeAccNum
#'
#' @description
#' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
#'
#' @author Samuel Chen, Janani Ravi
Expand All @@ -556,7 +566,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
#' @export
#'
#' @examples
RepresentativeAccNums <- function(prot_data,
createRepresentativeAccNum <- function(prot_data,
reduced = "Lineage",
accnum_col = "AccNum") {
# Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
Expand Down Expand Up @@ -585,6 +595,9 @@ RepresentativeAccNums <- function(prot_data,
return(accessions)
}

#' alignFasta
#'
#' @description
#' Perform a Multiple Sequence Alignment on a FASTA file.
#'
#' @author Samuel Chen, Janani Ravi
Expand All @@ -610,12 +623,12 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
)

if (typeof(outpath) == "character") {
write.MsaAAMultipleAlignment(aligned, outpath)
writeMSA_AA2FA(aligned, outpath)
}
return(aligned)
}

#' Write MsaAAMultpleAlignment Objects as algined fasta sequence
#' writeMSA_AA2FA
#'
#' @description
#' MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
Expand All @@ -632,7 +645,7 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
#' @export
#'
#' @examples
write.MsaAAMultipleAlignment <- function(alignment, outpath) {
writeMSA_AA2FA <- function(alignment, outpath) {
l <- length(rownames(alignment))
fasta <- ""
for (i in 1:l)
Expand All @@ -645,7 +658,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
return(fasta)
}

#' Get accnums from fasta file
#' getAccNumFromFA
#'
#' @param fasta_file
#'
Expand All @@ -655,7 +668,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
#' @export
#'
#' @examples
get_accnums_from_fasta_file <- function(fasta_file) {
getAccNumFromFA <- function(fasta_file) {
txt <- read_file(fasta_file)
accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
return(accnums)
Expand Down
52 changes: 31 additions & 21 deletions R/pre-msa-tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @export
#'
#' @examples
to_titlecase <- function(x, y = " ") {
convert2TitleCase <- function(x, y = " ") {
s <- strsplit(x, y)[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2),
sep = "", collapse = y
Expand All @@ -59,7 +59,7 @@ to_titlecase <- function(x, y = " ") {
################################
## Function to add leaves to an alignment file
## !! Add DA to leaves?
#' Adding Leaves to an alignment file w/ accessions
#' addLeaves2Alignment
#'
#' @author Janani Ravi
#'
Expand Down Expand Up @@ -95,9 +95,9 @@ to_titlecase <- function(x, y = " ") {
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
#' }
add_leaves <- function(aln_file = "",
addLeaves2Alignment <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
# lin_file="data/rawdata_tsv/PspA.txt",
reduced = FALSE) {
Expand Down Expand Up @@ -184,7 +184,7 @@ add_leaves <- function(aln_file = "",
}


#' Title
#' addName
#'
#' @author Samuel Chen, Janani Ravi
#'
Expand All @@ -209,7 +209,7 @@ add_leaves <- function(aln_file = "",
#' @export
#'
#' @examples
add_name <- function(data,
addName <- function(data,
accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
lin_sep = ">", out_col = "Name") {
cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
Expand Down Expand Up @@ -258,7 +258,7 @@ add_name <- function(data,

################################
## Function to convert alignment 'aln' to fasta format for MSA + Tree
#' Adding Leaves to an alignment file w/ accessions
#' convertAlignment2FA
#'
#' @author Janani Ravi
#'
Expand Down Expand Up @@ -288,9 +288,9 @@ add_name <- function(data,
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' convertAlignment2FA("pspa_snf7.aln", "pspa.txt")
#' }
convert_aln2fa <- function(aln_file = "",
convertAlignment2FA <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
fa_outpath = "",
reduced = FALSE) {
Expand Down Expand Up @@ -324,6 +324,9 @@ convert_aln2fa <- function(aln_file = "",
return(fasta)
}

#' mapAcc2Name
#'
#' @description
#' Default rename_fasta() replacement function. Maps an accession number to its name
#'
#' @param line he line of a fasta file starting with '>'
Expand All @@ -340,7 +343,7 @@ convert_aln2fa <- function(aln_file = "",
#' @export
#'
#' @examples
map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
# change to be the name equivalent to an add_names column
# Find the first ' '
end_acc <- str_locate(line, " ")[[1]]
Expand Down Expand Up @@ -386,7 +389,10 @@ rename_fasta <- function(fa_path, outpath,
}

################################
## generate_all_aln2fa
## generateAllAlignments2FA
#' generateAllAlignments2FA
#'
#' @description
#' Adding Leaves to an alignment file w/ accessions
#'
#' @author Janani Ravi
Expand All @@ -413,9 +419,9 @@ rename_fasta <- function(fa_path, outpath,
#'
#' @examples
#' \dontrun{
#' generate_all_aln2fa()
#' generateAllAlignments2FA()
#' }
generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
fa_outpath = here("data/alns/"),
lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
reduced = F) {
Expand Down Expand Up @@ -448,6 +454,10 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
# accessions <- rep("ANY95992.1", 201)
#' acc2fa
#'
#' @description
#' converts protein accession numbers to a fasta format. Resulting
#' fasta file is written to the outpath.
#'
#' @author Samuel Chen, Janani Ravi
#' @keywords accnum, fasta
#'
Expand Down Expand Up @@ -546,7 +556,7 @@ acc2fa <- function(accessions, outpath, plan = "sequential") {
return(result)
}

#' RepresentativeAccNums
#' createRepresentativeAccNum
#'
#' @description
#' Function to generate a vector of one Accession number per distinct observation from 'reduced' column
Expand All @@ -566,7 +576,7 @@ acc2fa <- function(accessions, outpath, plan = "sequential") {
#' @export
#'
#' @examples
RepresentativeAccNums <- function(prot_data,
createRepresentativeAccNum <- function(prot_data,
reduced = "Lineage",
accnum_col = "AccNum") {
# Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
Expand Down Expand Up @@ -623,15 +633,15 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
)

if (typeof(outpath) == "character") {
write.MsaAAMultipleAlignment(aligned, outpath)
writeMSA_AA2FA(aligned, outpath)
}
return(aligned)
}

#' write.MsaAAMultipleAlignment
#' writeMSA_AA2FA
#'
#' @description
#' Write MsaAAMultpleAlignment Objects as algined fasta sequence
#' Write MsaAAMultpleAlignment Objects as aligned fasta sequence
#' MsaAAMultipleAlignment Objects are generated from calls to msaClustalOmega
#' and msaMuscle from the 'msa' package
#'
Expand All @@ -647,7 +657,7 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
#' @export
#'
#' @examples
write.MsaAAMultipleAlignment <- function(alignment, outpath) {
writeMSA_AA2FA <- function(alignment, outpath) {
l <- length(rownames(alignment))
fasta <- ""
for (i in 1:l)
Expand All @@ -660,7 +670,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
return(fasta)
}

#' get_accnums_from_fasta_file
#' getAccNumFromFA
#'
#' @param fasta_file
#'
Expand All @@ -671,7 +681,7 @@ write.MsaAAMultipleAlignment <- function(alignment, outpath) {
#' @export
#'
#' @examples
get_accnums_from_fasta_file <- function(fasta_file) {
getAccNumFromFA <- function(fasta_file) {
txt <- read_file(fasta_file)
accnums <- stringi::stri_extract_all_regex(fasta_file, "(?<=>)[\\w,.]+")[[1]]
return(accnums)
Expand Down
Loading

0 comments on commit 1c43150

Please sign in to comment.