Skip to content

Commit

Permalink
refactor function names in summarize.R
Browse files Browse the repository at this point in the history
  • Loading branch information
teddyCodex committed Oct 7, 2024
1 parent 94369a2 commit 216da18
Show file tree
Hide file tree
Showing 17 changed files with 148 additions and 177 deletions.
22 changes: 11 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ export(combine_full)
export(combine_ipr)
export(convert_aln2fa)
export(convert_fa2tre)
export(count_bycol)
export(countByColumn)
export(count_to_sunburst)
export(count_to_treemap)
export(create_all_col_params)
export(create_lineage_lookup)
export(create_one_col_params)
export(domain_network)
export(efetch_ipg)
export(filter_by_doms)
export(filter_freq)
export(find_paralogs)
export(filterByDomains)
export(filterByFrequency)
export(findParalogs)
export(find_top_acc)
export(format_job_args)
export(gc_undirected_network)
Expand Down Expand Up @@ -81,15 +81,15 @@ export(shorten_lineage)
export(sink.reset)
export(stacked_lin_plot)
export(string2accnum)
export(summ.DA)
export(summ.DA.byLin)
export(summ.GC)
export(summ.GC.byDALin)
export(summ.GC.byLin)
export(summarize_bylin)
export(summarizeByLineage)
export(summarizeDomArch)
export(summarizeDomArch_ByLineage)
export(summarizeGenContext)
export(summarizeGenContext_ByDomArchLineage)
export(summarizeGenContext_ByLineage)
export(theme_genes2)
export(to_titlecase)
export(total_counts)
export(totalGenContextOrDomArchCounts)
export(upset.plot)
export(wordcloud2_element)
export(wordcloud3)
Expand Down
94 changes: 47 additions & 47 deletions R/summarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' Filter by Domains
#'
#' @author Samuel Chen, Janani Ravi
#' @description filter_by_doms filters a data frame by identifying exact domain matches
#' @description filterByDomains filters a data frame by identifying exact domain matches
#' and either keeping or removing rows with the identified domain
#'
#' @param prot Dataframe to filter
Expand All @@ -33,9 +33,9 @@
#'
#' @examples
#' \dontrun{
#' filter_by_doms()
#' filterByDomains()
#' }
filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remove = c(),
ignore.case = FALSE) {
# Only rows with a domain in doms_keep will be kept
# Any row containing a domain in doms_remove will be removed
Expand Down Expand Up @@ -102,9 +102,9 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
#'
#' @examples
#' \dontrun{
#' count_bycol()
#' countByColumn()
#' }
count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
counts <- prot %>%
select(column) %>%
table() %>%
Expand Down Expand Up @@ -135,10 +135,10 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
#'
#' @examples
#' \dontrun{
#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words()
#' }
#'
elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
z1 <- prot %>%
dplyr::pull(column) %>%
str_replace_all("\\,", " ") %>%
Expand Down Expand Up @@ -184,11 +184,11 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
#' @examples
#' \dontrun{
#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
#' elements2words() |>
#' words2wc()
#' elements2Words() |>
#' words2WordCounts()
#' }
#'
words2wc <- function(string) {
words2WordCounts <- function(string) {
df_word_count <- string %>%
# reduce spaces with length 2 or greater to a single space
str_replace_all("\\s{2,}", " ") %>%
Expand Down Expand Up @@ -227,9 +227,9 @@ words2wc <- function(string) {
#'
#' @examples
#' \dontrun{
#' filter_freq()
#' filterByFrequency()
#' }
filter_freq <- function(x, min.freq) {
filterByFrequency <- function(x, min.freq) {
x %>%
filter(freq >= min.freq)
}
Expand All @@ -254,10 +254,10 @@ filter_freq <- function(x, min.freq) {
#' \dontrun{
#' library(tidyverse)
#' tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
#' summarize_bylin(query = "all")
#' summarizeByLineage(query = "all")
#' }
#'
summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage",
query) {
column <- sym(column)
by <- sym(by)
Expand All @@ -277,7 +277,7 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
}


#' summ.DA.byLin
#' summarizeDomArch_ByLineage
#'
#' @description
#' Function to summarize and retrieve counts by Domains & Domains+Lineage
Expand All @@ -292,9 +292,9 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
#'
#' @examples
#' \dontrun{
#' summ.DA.byLin()
#' summarizeDomArch_ByLineage()
#' }
summ.DA.byLin <- function(x) {
summarizeDomArch_ByLineage <- function(x) {
## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
x %>%
filter(!grepl("^-$", DomArch)) %>%
Expand All @@ -304,7 +304,7 @@ summ.DA.byLin <- function(x) {
}

## Function to retrieve counts of how many lineages a DomArch appears in
#' summ.DA
#' summarizeDomArch
#'
#' @description
#' Function to retrieve counts of how many lineages a DomArch appears in
Expand All @@ -318,9 +318,9 @@ summ.DA.byLin <- function(x) {
#'
#' @examples
#' \dontrun{
#' summ.DA()
#' summarizeDomArch()
#' }
summ.DA <- function(x) {
summarizeDomArch <- function(x) {
## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
x %>%
group_by(DomArch) %>%
Expand All @@ -330,7 +330,7 @@ summ.DA <- function(x) {
filter(!grepl("^-$", DomArch))
}

#' summ.GC.byDALin
#' summarizeGenContext_ByDomArchLineage
#'
#' @param x
#'
Expand All @@ -341,9 +341,9 @@ summ.DA <- function(x) {
#'
#' @examples
#' \dontrun{
#' summ.GC.byDALin
#' summarizeGenContext_ByDomArchLineage
#' }
summ.GC.byDALin <- function(x) {
summarizeGenContext_ByDomArchLineage <- function(x) {
## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
x %>%
filter(!grepl("^-$", GenContext)) %>%
Expand All @@ -355,7 +355,7 @@ summ.GC.byDALin <- function(x) {
arrange(desc(count))
}

#' summ.GC.byLin
#' summarizeGenContext_ByLineage
#'
#' @param x
#'
Expand All @@ -366,9 +366,9 @@ summ.GC.byDALin <- function(x) {
#'
#' @examples
#' \dontrun{
#' summ.GC.byLin()
#' summarizeGenContext_ByLineage()
#' }
summ.GC.byLin <- function(x) {
summarizeGenContext_ByLineage <- function(x) {
## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
x %>%
filter(!grepl("^-$", GenContext)) %>%
Expand All @@ -380,7 +380,7 @@ summ.GC.byLin <- function(x) {
arrange(desc(count))
}

#' summ.GC
#' summarizeGenContext
#'
#' @param x
#'
Expand All @@ -391,9 +391,9 @@ summ.GC.byLin <- function(x) {
#'
#' @examples
#' \dontrun{
#' summ.GC()
#' summarizeGenContext()
#' }
summ.GC <- function(x) {
summarizeGenContext <- function(x) {
## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
x %>%
group_by(GenContext) %>%
Expand Down Expand Up @@ -436,9 +436,9 @@ summ.GC <- function(x) {
#'
#' @examples
#' \dontrun{
#' total_counts(pspa - gc_lin_counts, 0, "GC")
#' totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
#' }
total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col = "Lineage",
cutoff = 90, RowsCutoff = FALSE, digits = 2
# type = "GC"
) {
Expand All @@ -448,7 +448,7 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>%
filter({{ column }} != "")

prot <- summarize_bylin(prot, column, by = lineage_col, query = "all")
prot <- summarizeByLineage(prot, column, by = lineage_col, query = "all")
col_count <- prot %>%
group_by({{ column }}) %>%
summarise(totalcount = sum(count))
Expand Down Expand Up @@ -504,7 +504,7 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",



# total_counts_by_query <- function(query_data, queries, colname,cutoff, RemoveAstrk = F)
# totalGenContextOrDomArchCounts_by_query <- function(query_data, queries, colname,cutoff, RemoveAstrk = F)
# {
# ## Get the total counts by the Queries.
#
Expand Down Expand Up @@ -560,19 +560,19 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
# lin_count_totals$CumulativePercent[x] = total_counter
# }
#
# query_total_counts <- left_join(query_lin_counts, lin_count_totals, by = "Lineage")
# query_totalGenContextOrDomArchCounts <- left_join(query_lin_counts, lin_count_totals, by = "Lineage")
#
# query_total_counts <- query_total_counts%>% group_by(total_c,count) %>% arrange(-total_c, -count)
# query_totalGenContextOrDomArchCounts <- query_totalGenContextOrDomArchCounts%>% group_by(total_c,count) %>% arrange(-total_c, -count)
# # Get lineages that are above the cutoff percentage value
# count_cutoff <- (query_total_counts %>% filter(CumulativePercent >= (100-cutoff)))
# count_cutoff <- (query_totalGenContextOrDomArchCounts %>% filter(CumulativePercent >= (100-cutoff)))
#
# query_total_counts <- (query_total_counts %>% filter(total_c >= count_cutoff$total_c[nrow(count_cutoff)]))
# query_totalGenContextOrDomArchCounts <- (query_totalGenContextOrDomArchCounts %>% filter(total_c >= count_cutoff$total_c[nrow(count_cutoff)]))
#
# # Round to 3 digits
# query_total_counts$CumulativePercent <- query_total_counts$CumulativePercent %>% round(digits = 3)
# query_total_counts$IndividualPercent <- query_total_counts$IndividualPercent %>% round(digits = 3)
# query_totalGenContextOrDomArchCounts$CumulativePercent <- query_totalGenContextOrDomArchCounts$CumulativePercent %>% round(digits = 3)
# query_totalGenContextOrDomArchCounts$IndividualPercent <- query_totalGenContextOrDomArchCounts$IndividualPercent %>% round(digits = 3)
#
# return(query_total_counts)
# return(query_totalGenContextOrDomArchCounts)
# }


Expand All @@ -598,9 +598,9 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
#'
#' @examples
#' \dontrun{
#' find_paralogs(pspa)
#' findParalogs(pspa)
#' }
find_paralogs <- function(prot) {
findParalogs <- function(prot) {
# Remove eukaryotes
prot <- prot %>% filter(!grepl("^eukaryota", Lineage))
paralogTable <- prot %>%
Expand Down Expand Up @@ -635,17 +635,17 @@ find_paralogs <- function(prot) {
# query.sub$GenContext %>%
# counts(n)")

# ## elements2words: Function to break up ELEMENTS to WORDS for DA and GC
# ## elements2Words: Function to break up ELEMENTS to WORDS for DA and GC
# cat("Converting DA to domains and GC to DAs.\n2 switches: da2doms and gc2da
# \nFor e.g.:
# query.sub$DA.doms <- query.sub$DomArch.norep %>%
# elements2words(\"da2doms\")
# elements2Words(\"da2doms\")
# query.sub$GC.da <- query.sub$GenContext %>%
# elements2words(\"gc2da\")")
# elements2Words(\"gc2da\")")


# ## words2wc: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
# ## words2WordCounts: Function to get WORD COUNTS [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
# cat("Word counts for broken up domains from DAs and DAs from GCs.
# \nFor e.g.:
# DA.doms.wc <- query.sub$DA.doms %>%
# words2wc()")
# words2WordCounts()")
8 changes: 4 additions & 4 deletions man/count_bycol.Rd → man/countByColumn.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 0 additions & 29 deletions man/elements2words.Rd

This file was deleted.

Loading

0 comments on commit 216da18

Please sign in to comment.