From 2d9acb8b335a9dc281b7a4b460706e90e1a3d218 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Sat, 5 Oct 2024 08:58:46 +0300
Subject: [PATCH 1/3] Add parameter definitions to summarize.R

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 NAMESPACE              |   1 -
 R/summarize.R          | 157 +++++++++++++++++++++++++++++++----------
 man/count_bycol.Rd     |  22 +++++-
 man/elements2words.Rd  |  23 ++++--
 man/filter_freq.Rd     |  10 ++-
 man/summ.DA.Rd         |  13 +++-
 man/summ.DA.byLin.Rd   |   9 ++-
 man/summ.GC.Rd         |  14 +++-
 man/summ.GC.byDALin.Rd |  15 +++-
 man/summarize_bylin.Rd |  15 +++-
 man/total_counts.Rd    |  24 +++++--
 man/words2wc.Rd        |  11 ++-
 12 files changed, 249 insertions(+), 65 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9d73120a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -131,7 +131,6 @@ importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
-importFrom(dplyr,n_distinct)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,right_join)
diff --git a/R/summarize.R b/R/summarize.R
index a9b13e43..e03ca463 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -91,18 +91,31 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 ## Function to obtain element counts (DA, GC)
 #' Count Bycol
 #'
-#' @param prot
-#' @param column
-#' @param min.freq
+#' @param prot A data frame containing the dataset to analyze, typically with 
+#' multiple columns including the one specified by the `column` parameter.
+#' @param column A character string specifying the name of the column to analyze. 
+#' The default is "DomArch".
+#' @param min.freq An integer specifying the minimum frequency an element must 
+#' have to be included in the output. Default is 1.
 #'
 #' @importFrom dplyr arrange as_tibble filter select
 #'
-#' @return Describe return, in detail
+#' @return A tibble with two columns:
+#' \describe{
+#'   \item{`column`}{The unique elements from the specified column 
+#'   (e.g., "DomArch").}
+#'   \item{`freq`}{The frequency of each element, i.e., the number of times 
+#'   each element appears in the specified column.}
+#' }
+#' The tibble is filtered to only include elements that have a frequency 
+#' greater than or equal to `min.freq` and does not include elements with `NA` 
+#' values or those starting with a hyphen ("-").
+#'
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' count_bycol()
+#' count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
 #' }
 count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
     counts <- prot %>%
@@ -123,19 +136,30 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
 #' context (GC)
 #'
-#' @param prot [dataframe]
-#' @param column [string] column name
-#' @param conversion_type [string] type of conversion: 'da2doms': domain architectures to
-#' domains. 'gc2da' genomic context to domain architectures
+#' @param prot A dataframe containing the dataset to analyze. The specified 
+#' `column` contains the string elements to be processed.
+#' @param column A character string specifying the name of the column to analyze. 
+#' Default is "DomArch".
+#' @param conversion_type A character string specifying the type of conversion. 
+#' Two options are available:
+#' \describe{
+#'   \item{`da2doms`}{Convert domain architectures into individual domains by 
+#'   replacing `+` symbols with spaces.}
+#'   \item{`gc2da`}{Convert genomic context into domain architectures by
+#'    replacing directional symbols (`<-`, `->`, and `|`) with spaces.}
+#' }
 #'
 #' @importFrom dplyr pull
 #' @importFrom stringr str_replace_all
 #'
-#' @return [string] with words delimited by spaces
+#' @return A single string where elements are delimited by spaces. The function 
+#' performs necessary substitutions based on the `conversion_type` and cleans up 
+#' extraneous characters like newlines, tabs, and multiple spaces.
 #'
 #' @examples
 #' \dontrun{
-#' tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
+#' tibble::tibble(DomArch = c("aaa+bbb", 
+#' "a+b", "b+c", "b-c")) |> elements2words()
 #' }
 #'
 elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms") {
@@ -175,11 +199,19 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #' @description
 #' Get word counts (wc) [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
 #'
-#' @param string
+#' @param string A character string containing the elements (words) to count. 
+#' This would typically be a space-delimited string representing domain 
+#' architectures or genomic contexts.
 #'
-#' @importFrom dplyr as_tibble filter
+#' @importFrom dplyr as_tibble filter arrange
+#' @importFrom stringr str_replace_all
 #'
-#' @return [tbl_df] table with 2 columns: 1) words & 2) counts/frequency
+#' @return A tibble (tbl_df) with two columns: 
+#' \describe{
+#'   \item{`words`}{A column containing the individual words 
+#'   (domains or domain architectures).}
+#'   \item{`freq`}{A column containing the frequency counts for each word.}
+#' }
 #'
 #' @examples
 #' \dontrun{
@@ -219,10 +251,15 @@ words2wc <- function(string) {
 ## Function to filter based on frequencies
 #' Filter Frequency
 #'
-#' @param x
-#' @param min.freq
+#' @param x A tibble (tbl_df) containing at least two columns: one for 
+#' elements (e.g., `words`) and one for their frequency (e.g., `freq`).
+#' @param min.freq A numeric value specifying the minimum frequency threshold. 
+#' Only elements with frequencies greater than or equal to this value will be 
+#' retained.
+#'
+#' @return A tibble with the same structure as `x`, but filtered to include 
+#' only rows where the frequency is greater than or equal to `min.freq`.
 #'
-#' @return Describe return, in detail
 #' @export
 #'
 #' @examples
@@ -239,15 +276,20 @@ filter_freq <- function(x, min.freq) {
 #########################
 #' Summarize by Lineage
 #'
-#' @param prot
-#' @param column
-#' @param by
-#' @param query
+#' @param prot A dataframe or tibble containing the data.
+#' @param column A string representing the column to be summarized 
+#' (e.g., `DomArch`). Default is "DomArch".
+#' @param by A string representing the grouping column (e.g., `Lineage`). 
+#' Default is "Lineage".
+#' @param query A string specifying the query pattern for filtering the target 
+#' column. Use "all" to skip filtering and include all rows.
 #'
 #' @importFrom dplyr arrange filter group_by summarise
 #' @importFrom rlang sym
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing the counts of occurrences of elements in 
+#' the `column`, grouped by the `by` column. The result includes the number 
+#' of occurrences (`count`) and is arranged in descending order of count.
 #' @export
 #'
 #' @examples
@@ -283,11 +325,17 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #' Function to summarize and retrieve counts by Domains & Domains+Lineage
 #'
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `DomArch` and `Lineage`.
 #'
 #' @importFrom dplyr arrange count desc filter group_by summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing the counts of unique domain architectures 
+#' (`DomArch`) per lineage (`Lineage`). The resulting table contains three 
+#' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
+#' of each domain architecture for each lineage. The results are arranged in 
+#' descending order of `count`.
+#'
 #' @export
 #'
 #' @examples
@@ -309,11 +357,18 @@ summ.DA.byLin <- function(x) {
 #' @description
 #' Function to retrieve counts of how many lineages a DomArch appears in
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have a column 
+#' named `DomArch` and a count column, such as `count`, which represents the 
+#' occurrences of each architecture in various lineages.
 #'
 #' @importFrom dplyr arrange group_by filter summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing each unique `DomArch`, along with the following 
+#' columns:
+#' - `totalcount`: The total occurrences of each `DomArch` across all lineages.
+#' - `totallin`: The total number of unique lineages in which each `DomArch` 
+#' appears.
+#' The results are arranged in descending order of `totallin` and `totalcount`.
 #' @export
 #'
 #' @examples
@@ -332,11 +387,20 @@ summ.DA <- function(x) {
 
 #' summ.GC.byDALin
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
-#' @return Define return, in detail
+#' @return A tibble summarizing each unique combination of `GenContext`, 
+#' `DomArch`, and `Lineage`, along with the following columns:
+#' - `GenContext`: The genomic context for each entry.
+#' - `DomArch`: The domain architecture for each entry.
+#' - `Lineage`: The lineage associated with each entry.
+#' - `count`: The total number of occurrences for each combination of 
+#' `GenContext`, `DomArch`, and `Lineage`.
+#'
+#' The results are arranged in descending order of `count`.
 #' @export
 #'
 #' @examples
@@ -382,11 +446,19 @@ summ.GC.byLin <- function(x) {
 
 #' summ.GC
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data. It must have columns 
+#' named `GenContext`, `DomArch`, and `Lineage`.
 #'
-#' @importFrom dplyr arrange desc filter group_by n_distinct summarise
+#' @importFrom dplyr arrange desc filter group_by n summarise
 #'
-#' @return Describe return, in detail
+#' @return A tibble summarizing each unique combination of `GenContext` and 
+#' `Lineage`, along with the following columns:
+#' - `GenContext`: The genomic context for each entry.
+#' - `Lineage`: The lineage associated with each entry.
+#' - `count`: The total number of occurrences for each combination of
+#'  `GenContext` and `Lineage`.
+#'
+#' The results are arranged in descending order of `count`.
 #' @export
 #'
 #' @examples
@@ -419,16 +491,27 @@ summ.GC <- function(x) {
 #'
 #' @param prot  A data frame that must contain columns:
 #' \itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}
-#' @param column Character. The column to summarize
-#' @param lineage_col
-#' @param cutoff Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.
-#' @param RowsCutoff
-#' @param digits
+#' @param column Character. The column to summarize, default is "DomArch".
+#' @param lineage_col Character. The name of the lineage column, default is 
+#' "Lineage".
+#' @param cutoff Numeric. Cutoff for total count. Counts below this cutoff value 
+#' will not be shown. Default is 0.
+#' @param RowsCutoff Logical. If TRUE, filters based on cumulative percentage 
+#' cutoff. Default is FALSE.
+#' @param digits Numeric. Number of decimal places for percentage columns. 
+#' Default is 2.
+#'
 #'
 #' @importFrom dplyr arrange distinct filter group_by left_join mutate select summarise ungroup
 #' @importFrom rlang as_string sym
 #'
-#' @return Define return, in detail
+#' @return A data frame with the following columns:
+#' - `{{ column }}`: Unique values from the specified column.
+#' - `totalcount`: The total count of occurrences for each unique value in 
+#' the specified column.
+#' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
+#' the overall count.
+#' - `CumulativePercent`: The cumulative percentage of total counts.
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
diff --git a/man/count_bycol.Rd b/man/count_bycol.Rd
index 884c0f0f..946a7ea2 100644
--- a/man/count_bycol.Rd
+++ b/man/count_bycol.Rd
@@ -7,16 +7,32 @@
 count_bycol(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
-\item{min.freq}{}
+\item{prot}{A data frame containing the dataset to analyze, typically with
+multiple columns including the one specified by the \code{column} parameter.}
+
+\item{column}{A character string specifying the name of the column to analyze.
+The default is "DomArch".}
+
+\item{min.freq}{An integer specifying the minimum frequency an element must
+have to be included in the output. Default is 1.}
 }
 \value{
-Describe return, in detail
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
 }
 \description{
 Count Bycol
 }
 \examples{
 \dontrun{
-count_bycol()
+count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
 }
 }
diff --git a/man/elements2words.Rd b/man/elements2words.Rd
index 80fcbafb..bda447db 100644
--- a/man/elements2words.Rd
+++ b/man/elements2words.Rd
@@ -7,15 +7,25 @@
 elements2words(prot, column = "DomArch", conversion_type = "da2doms")
 }
 \arguments{
-\item{prot}{\link{dataframe}}
+\item{prot}{A dataframe containing the dataset to analyze. The specified
+\code{column} contains the string elements to be processed.}
 
-\item{column}{\link{string} column name}
+\item{column}{A character string specifying the name of the column to analyze.
+Default is "DomArch".}
 
-\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
-domains. 'gc2da' genomic context to domain architectures}
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
 }
 \value{
-\link{string} with words delimited by spaces
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
 }
 \description{
 Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -23,7 +33,8 @@ context (GC)
 }
 \examples{
 \dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2words()
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2words()
 }
 
 }
diff --git a/man/filter_freq.Rd b/man/filter_freq.Rd
index ce4db5ac..9dfba73b 100644
--- a/man/filter_freq.Rd
+++ b/man/filter_freq.Rd
@@ -7,10 +7,16 @@
 filter_freq(x, min.freq)
 }
 \arguments{
-\item{min.freq}{}
+\item{x}{A tibble (tbl_df) containing at least two columns: one for
+elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
 }
 \value{
-Describe return, in detail
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
 }
 \description{
 Filter Frequency
diff --git a/man/summ.DA.Rd b/man/summ.DA.Rd
index 13717140..01d15b3c 100644
--- a/man/summ.DA.Rd
+++ b/man/summ.DA.Rd
@@ -7,10 +7,19 @@
 summ.DA(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have a column
+named \code{DomArch} and a count column, such as \code{count}, which represents the
+occurrences of each architecture in various lineages.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing each unique \code{DomArch}, along with the following
+columns:
+\itemize{
+\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
+\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
+appears.
+The results are arranged in descending order of \code{totallin} and \code{totalcount}.
+}
 }
 \description{
 Function to retrieve counts of how many lineages a DomArch appears in
diff --git a/man/summ.DA.byLin.Rd b/man/summ.DA.byLin.Rd
index 66555fd5..d88e5d37 100644
--- a/man/summ.DA.byLin.Rd
+++ b/man/summ.DA.byLin.Rd
@@ -7,10 +7,15 @@
 summ.DA.byLin(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{DomArch} and \code{Lineage}.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing the counts of unique domain architectures
+(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
+columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
+of each domain architecture for each lineage. The results are arranged in
+descending order of \code{count}.
 }
 \description{
 Function to summarize and retrieve counts by Domains & Domains+Lineage
diff --git a/man/summ.GC.Rd b/man/summ.GC.Rd
index fa52a6bf..2ec4d651 100644
--- a/man/summ.GC.Rd
+++ b/man/summ.GC.Rd
@@ -7,10 +7,20 @@
 summ.GC(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing each unique combination of \code{GenContext} and
+\code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext} and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
 }
 \description{
 summ.GC
diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd
index 34c9f84d..7fc8d443 100644
--- a/man/summ.GC.byDALin.Rd
+++ b/man/summ.GC.byDALin.Rd
@@ -7,10 +7,21 @@
 summ.GC.byDALin(x)
 }
 \arguments{
-\item{x}{}
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
 }
 \value{
-Define return, in detail
+A tibble summarizing each unique combination of \code{GenContext},
+\code{DomArch}, and \code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{DomArch}: The domain architecture for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext}, \code{DomArch}, and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
 }
 \description{
 summ.GC.byDALin
diff --git a/man/summarize_bylin.Rd b/man/summarize_bylin.Rd
index a94c54c1..92b93652 100644
--- a/man/summarize_bylin.Rd
+++ b/man/summarize_bylin.Rd
@@ -7,10 +7,21 @@
 summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
 }
 \arguments{
-\item{query}{}
+\item{prot}{A dataframe or tibble containing the data.}
+
+\item{column}{A string representing the column to be summarized
+(e.g., \code{DomArch}). Default is "DomArch".}
+
+\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
+Default is "Lineage".}
+
+\item{query}{A string specifying the query pattern for filtering the target
+column. Use "all" to skip filtering and include all rows.}
 }
 \value{
-Describe return, in detail
+A tibble summarizing the counts of occurrences of elements in
+the \code{column}, grouped by the \code{by} column. The result includes the number
+of occurrences (\code{count}) and is arranged in descending order of count.
 }
 \description{
 Summarize by Lineage
diff --git a/man/total_counts.Rd b/man/total_counts.Rd
index 49db8822..53d70096 100644
--- a/man/total_counts.Rd
+++ b/man/total_counts.Rd
@@ -17,14 +17,30 @@ total_counts(
 \item{prot}{A data frame that must contain columns:
 \itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
 
-\item{column}{Character. The column to summarize}
+\item{column}{Character. The column to summarize, default is "DomArch".}
 
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
+\item{lineage_col}{Character. The name of the lineage column, default is
+"Lineage".}
 
-\item{digits}{}
+\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
+will not be shown. Default is 0.}
+
+\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
+cutoff. Default is FALSE.}
+
+\item{digits}{Numeric. Number of decimal places for percentage columns.
+Default is 2.}
 }
 \value{
-Define return, in detail
+A data frame with the following columns:
+\itemize{
+\item \code{{{ column }}}: Unique values from the specified column.
+\item \code{totalcount}: The total count of occurrences for each unique value in
+the specified column.
+\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
+the overall count.
+\item \code{CumulativePercent}: The cumulative percentage of total counts.
+}
 }
 \description{
 Creates a data frame with a totalcount column
diff --git a/man/words2wc.Rd b/man/words2wc.Rd
index 1eba5dc4..69d006d5 100644
--- a/man/words2wc.Rd
+++ b/man/words2wc.Rd
@@ -7,10 +7,17 @@
 words2wc(string)
 }
 \arguments{
-\item{string}{}
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
 }
 \value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
 }
 \description{
 Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}

From d9fa04bc729586ab336275083d67fb75420ac138 Mon Sep 17 00:00:00 2001
From: Awa Synthia <ndahili14@gmail.com>
Date: Mon, 7 Oct 2024 07:42:08 +0300
Subject: [PATCH 2/3] use one documentation file

Signed-off-by: Awa Synthia <ndahili14@gmail.com>
---
 R/summarize.R          |  16 ++-
 man/count_bycol.Rd     |  38 ------
 man/elements2words.Rd  |  40 -------
 man/filter_by_doms.Rd  |  44 -------
 man/filter_freq.Rd     |  28 -----
 man/summ.DA.Rd         |  31 -----
 man/summ.DA.byLin.Rd   |  27 -----
 man/summ.GC.Rd         |  32 -----
 man/summ.GC.byDALin.Rd |  33 ------
 man/summ.GC.byLin.Rd   |  22 ----
 man/summarize.Rd       | 260 +++++++++++++++++++++++++++++++++++++++++
 man/summarize_bylin.Rd |  36 ------
 man/total_counts.Rd    |  58 ---------
 man/words2wc.Rd        |  32 -----
 14 files changed, 274 insertions(+), 423 deletions(-)
 delete mode 100644 man/count_bycol.Rd
 delete mode 100644 man/elements2words.Rd
 delete mode 100644 man/filter_by_doms.Rd
 delete mode 100644 man/filter_freq.Rd
 delete mode 100644 man/summ.DA.Rd
 delete mode 100644 man/summ.DA.byLin.Rd
 delete mode 100644 man/summ.GC.Rd
 delete mode 100644 man/summ.GC.byDALin.Rd
 delete mode 100644 man/summ.GC.byLin.Rd
 create mode 100644 man/summarize.Rd
 delete mode 100644 man/summarize_bylin.Rd
 delete mode 100644 man/total_counts.Rd
 delete mode 100644 man/words2wc.Rd

diff --git a/R/summarize.R b/R/summarize.R
index e03ca463..0580c15d 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -29,6 +29,7 @@
 #'
 #' @return Filtered data frame
 #' @note There is no need to make the domains 'regex safe', that will be handled by this function
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -110,7 +111,7 @@ filter_by_doms <- function(prot, column = "DomArch", doms_keep = c(), doms_remov
 #' The tibble is filtered to only include elements that have a frequency 
 #' greater than or equal to `min.freq` and does not include elements with `NA` 
 #' values or those starting with a hyphen ("-").
-#'
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -155,6 +156,7 @@ count_bycol <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' @return A single string where elements are delimited by spaces. The function 
 #' performs necessary substitutions based on the `conversion_type` and cleans up 
 #' extraneous characters like newlines, tabs, and multiple spaces.
+#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -212,6 +214,8 @@ elements2words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #'   (domains or domain architectures).}
 #'   \item{`freq`}{A column containing the frequency counts for each word.}
 #' }
+#' 
+#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -259,6 +263,7 @@ words2wc <- function(string) {
 #'
 #' @return A tibble with the same structure as `x`, but filtered to include 
 #' only rows where the frequency is greater than or equal to `min.freq`.
+#' @name summarize
 #'
 #' @export
 #'
@@ -290,6 +295,7 @@ filter_freq <- function(x, min.freq) {
 #' @return A tibble summarizing the counts of occurrences of elements in 
 #' the `column`, grouped by the `by` column. The result includes the number 
 #' of occurrences (`count`) and is arranged in descending order of count.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -335,6 +341,7 @@ summarize_bylin <- function(prot = "prot", column = "DomArch", by = "Lineage",
 #' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
 #' of each domain architecture for each lineage. The results are arranged in 
 #' descending order of `count`.
+#' @name summarize
 #'
 #' @export
 #'
@@ -369,6 +376,7 @@ summ.DA.byLin <- function(x) {
 #' - `totallin`: The total number of unique lineages in which each `DomArch` 
 #' appears.
 #' The results are arranged in descending order of `totallin` and `totalcount`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -401,6 +409,7 @@ summ.DA <- function(x) {
 #' `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -421,11 +430,12 @@ summ.GC.byDALin <- function(x) {
 
 #' summ.GC.byLin
 #'
-#' @param x
+#' @param x A dataframe or tibble containing the data.
 #'
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
 #' @return Describe return, in detail
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -459,6 +469,7 @@ summ.GC.byLin <- function(x) {
 #'  `GenContext` and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
+#' @name summarize
 #' @export
 #'
 #' @examples
@@ -512,6 +523,7 @@ summ.GC <- function(x) {
 #' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
 #' the overall count.
 #' - `CumulativePercent`: The cumulative percentage of total counts.
+#' @name summarize
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
diff --git a/man/count_bycol.Rd b/man/count_bycol.Rd
deleted file mode 100644
index 946a7ea2..00000000
--- a/man/count_bycol.Rd
+++ /dev/null
@@ -1,38 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{count_bycol}
-\alias{count_bycol}
-\title{Count Bycol}
-\usage{
-count_bycol(prot = prot, column = "DomArch", min.freq = 1)
-}
-\arguments{
-\item{prot}{A data frame containing the dataset to analyze, typically with
-multiple columns including the one specified by the \code{column} parameter.}
-
-\item{column}{A character string specifying the name of the column to analyze.
-The default is "DomArch".}
-
-\item{min.freq}{An integer specifying the minimum frequency an element must
-have to be included in the output. Default is 1.}
-}
-\value{
-A tibble with two columns:
-\describe{
-\item{\code{column}}{The unique elements from the specified column
-(e.g., "DomArch").}
-\item{\code{freq}}{The frequency of each element, i.e., the number of times
-each element appears in the specified column.}
-}
-The tibble is filtered to only include elements that have a frequency
-greater than or equal to \code{min.freq} and does not include elements with \code{NA}
-values or those starting with a hyphen ("-").
-}
-\description{
-Count Bycol
-}
-\examples{
-\dontrun{
-count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
-}
-}
diff --git a/man/elements2words.Rd b/man/elements2words.Rd
deleted file mode 100644
index bda447db..00000000
--- a/man/elements2words.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{elements2words}
-\alias{elements2words}
-\title{Elements 2 Words}
-\usage{
-elements2words(prot, column = "DomArch", conversion_type = "da2doms")
-}
-\arguments{
-\item{prot}{A dataframe containing the dataset to analyze. The specified
-\code{column} contains the string elements to be processed.}
-
-\item{column}{A character string specifying the name of the column to analyze.
-Default is "DomArch".}
-
-\item{conversion_type}{A character string specifying the type of conversion.
-Two options are available:
-\describe{
-\item{\code{da2doms}}{Convert domain architectures into individual domains by
-replacing \code{+} symbols with spaces.}
-\item{\code{gc2da}}{Convert genomic context into domain architectures by
-replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
-}}
-}
-\value{
-A single string where elements are delimited by spaces. The function
-performs necessary substitutions based on the \code{conversion_type} and cleans up
-extraneous characters like newlines, tabs, and multiple spaces.
-}
-\description{
-Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
-context (GC)
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", 
-"a+b", "b+c", "b-c")) |> elements2words()
-}
-
-}
diff --git a/man/filter_by_doms.Rd b/man/filter_by_doms.Rd
deleted file mode 100644
index cfe255ca..00000000
--- a/man/filter_by_doms.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filter_by_doms}
-\alias{filter_by_doms}
-\title{Filter by Domains}
-\usage{
-filter_by_doms(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-}
-\arguments{
-\item{prot}{Dataframe to filter}
-
-\item{column}{Column to search for domains in (DomArch column)}
-
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-}
-\value{
-Filtered data frame
-}
-\description{
-filter_by_doms filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-}
-\note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-}
-\examples{
-\dontrun{
-filter_by_doms()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/filter_freq.Rd b/man/filter_freq.Rd
deleted file mode 100644
index 9dfba73b..00000000
--- a/man/filter_freq.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{filter_freq}
-\alias{filter_freq}
-\title{Filter Frequency}
-\usage{
-filter_freq(x, min.freq)
-}
-\arguments{
-\item{x}{A tibble (tbl_df) containing at least two columns: one for
-elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
-
-\item{min.freq}{A numeric value specifying the minimum frequency threshold.
-Only elements with frequencies greater than or equal to this value will be
-retained.}
-}
-\value{
-A tibble with the same structure as \code{x}, but filtered to include
-only rows where the frequency is greater than or equal to \code{min.freq}.
-}
-\description{
-Filter Frequency
-}
-\examples{
-\dontrun{
-filter_freq()
-}
-}
diff --git a/man/summ.DA.Rd b/man/summ.DA.Rd
deleted file mode 100644
index 01d15b3c..00000000
--- a/man/summ.DA.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.DA}
-\alias{summ.DA}
-\title{summ.DA}
-\usage{
-summ.DA(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have a column
-named \code{DomArch} and a count column, such as \code{count}, which represents the
-occurrences of each architecture in various lineages.}
-}
-\value{
-A tibble summarizing each unique \code{DomArch}, along with the following
-columns:
-\itemize{
-\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
-\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
-appears.
-The results are arranged in descending order of \code{totallin} and \code{totalcount}.
-}
-}
-\description{
-Function to retrieve counts of how many lineages a DomArch appears in
-}
-\examples{
-\dontrun{
-summ.DA()
-}
-}
diff --git a/man/summ.DA.byLin.Rd b/man/summ.DA.byLin.Rd
deleted file mode 100644
index d88e5d37..00000000
--- a/man/summ.DA.byLin.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.DA.byLin}
-\alias{summ.DA.byLin}
-\title{summ.DA.byLin}
-\usage{
-summ.DA.byLin(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{DomArch} and \code{Lineage}.}
-}
-\value{
-A tibble summarizing the counts of unique domain architectures
-(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
-columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
-of each domain architecture for each lineage. The results are arranged in
-descending order of \code{count}.
-}
-\description{
-Function to summarize and retrieve counts by Domains & Domains+Lineage
-}
-\examples{
-\dontrun{
-summ.DA.byLin()
-}
-}
diff --git a/man/summ.GC.Rd b/man/summ.GC.Rd
deleted file mode 100644
index 2ec4d651..00000000
--- a/man/summ.GC.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC}
-\alias{summ.GC}
-\title{summ.GC}
-\usage{
-summ.GC(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-}
-\value{
-A tibble summarizing each unique combination of \code{GenContext} and
-\code{Lineage}, along with the following columns:
-\itemize{
-\item \code{GenContext}: The genomic context for each entry.
-\item \code{Lineage}: The lineage associated with each entry.
-\item \code{count}: The total number of occurrences for each combination of
-\code{GenContext} and \code{Lineage}.
-}
-
-The results are arranged in descending order of \code{count}.
-}
-\description{
-summ.GC
-}
-\examples{
-\dontrun{
-summ.GC()
-}
-}
diff --git a/man/summ.GC.byDALin.Rd b/man/summ.GC.byDALin.Rd
deleted file mode 100644
index 7fc8d443..00000000
--- a/man/summ.GC.byDALin.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byDALin}
-\alias{summ.GC.byDALin}
-\title{summ.GC.byDALin}
-\usage{
-summ.GC.byDALin(x)
-}
-\arguments{
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-}
-\value{
-A tibble summarizing each unique combination of \code{GenContext},
-\code{DomArch}, and \code{Lineage}, along with the following columns:
-\itemize{
-\item \code{GenContext}: The genomic context for each entry.
-\item \code{DomArch}: The domain architecture for each entry.
-\item \code{Lineage}: The lineage associated with each entry.
-\item \code{count}: The total number of occurrences for each combination of
-\code{GenContext}, \code{DomArch}, and \code{Lineage}.
-}
-
-The results are arranged in descending order of \code{count}.
-}
-\description{
-summ.GC.byDALin
-}
-\examples{
-\dontrun{
-summ.GC.byDALin
-}
-}
diff --git a/man/summ.GC.byLin.Rd b/man/summ.GC.byLin.Rd
deleted file mode 100644
index df2a8fb8..00000000
--- a/man/summ.GC.byLin.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summ.GC.byLin}
-\alias{summ.GC.byLin}
-\title{summ.GC.byLin}
-\usage{
-summ.GC.byLin(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summ.GC.byLin
-}
-\examples{
-\dontrun{
-summ.GC.byLin()
-}
-}
diff --git a/man/summarize.Rd b/man/summarize.Rd
new file mode 100644
index 00000000..f149f686
--- /dev/null
+++ b/man/summarize.Rd
@@ -0,0 +1,260 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarize.R
+\name{summarize}
+\alias{summarize}
+\alias{filter_by_doms}
+\alias{count_bycol}
+\alias{elements2words}
+\alias{words2wc}
+\alias{filter_freq}
+\alias{summarize_bylin}
+\alias{summ.DA.byLin}
+\alias{summ.DA}
+\alias{summ.GC.byDALin}
+\alias{summ.GC.byLin}
+\alias{summ.GC}
+\alias{total_counts}
+\title{Filter by Domains}
+\usage{
+filter_by_doms(
+  prot,
+  column = "DomArch",
+  doms_keep = c(),
+  doms_remove = c(),
+  ignore.case = FALSE
+)
+
+count_bycol(prot = prot, column = "DomArch", min.freq = 1)
+
+elements2words(prot, column = "DomArch", conversion_type = "da2doms")
+
+words2wc(string)
+
+filter_freq(x, min.freq)
+
+summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
+
+summ.DA.byLin(x)
+
+summ.DA(x)
+
+summ.GC.byDALin(x)
+
+summ.GC.byLin(x)
+
+summ.GC(x)
+
+total_counts(
+  prot,
+  column = "DomArch",
+  lineage_col = "Lineage",
+  cutoff = 90,
+  RowsCutoff = FALSE,
+  digits = 2
+)
+}
+\arguments{
+\item{prot}{A data frame that must contain columns:
+\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
+
+\item{column}{Character. The column to summarize, default is "DomArch".}
+
+\item{doms_keep}{Vector of domains that must be identified within column in order for
+observation to be kept}
+
+\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
+
+\item{ignore.case}{Should the matching be non case sensitive}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
+
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
+
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
+
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
+
+\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
+Default is "Lineage".}
+
+\item{query}{A string specifying the query pattern for filtering the target
+column. Use "all" to skip filtering and include all rows.}
+
+\item{lineage_col}{Character. The name of the lineage column, default is
+"Lineage".}
+
+\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
+will not be shown. Default is 0.}
+
+\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
+cutoff. Default is FALSE.}
+
+\item{digits}{Numeric. Number of decimal places for percentage columns.
+Default is 2.}
+}
+\value{
+Filtered data frame
+
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
+
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
+
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
+
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
+
+A tibble summarizing the counts of occurrences of elements in
+the \code{column}, grouped by the \code{by} column. The result includes the number
+of occurrences (\code{count}) and is arranged in descending order of count.
+
+A tibble summarizing the counts of unique domain architectures
+(\code{DomArch}) per lineage (\code{Lineage}). The resulting table contains three
+columns: \code{DomArch}, \code{Lineage}, and \code{count}, which indicates the frequency
+of each domain architecture for each lineage. The results are arranged in
+descending order of \code{count}.
+
+A tibble summarizing each unique \code{DomArch}, along with the following
+columns:
+\itemize{
+\item \code{totalcount}: The total occurrences of each \code{DomArch} across all lineages.
+\item \code{totallin}: The total number of unique lineages in which each \code{DomArch}
+appears.
+The results are arranged in descending order of \code{totallin} and \code{totalcount}.
+}
+
+A tibble summarizing each unique combination of \code{GenContext},
+\code{DomArch}, and \code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{DomArch}: The domain architecture for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext}, \code{DomArch}, and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
+
+Describe return, in detail
+
+A tibble summarizing each unique combination of \code{GenContext} and
+\code{Lineage}, along with the following columns:
+\itemize{
+\item \code{GenContext}: The genomic context for each entry.
+\item \code{Lineage}: The lineage associated with each entry.
+\item \code{count}: The total number of occurrences for each combination of
+\code{GenContext} and \code{Lineage}.
+}
+
+The results are arranged in descending order of \code{count}.
+
+A data frame with the following columns:
+\itemize{
+\item \code{{{ column }}}: Unique values from the specified column.
+\item \code{totalcount}: The total count of occurrences for each unique value in
+the specified column.
+\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
+the overall count.
+\item \code{CumulativePercent}: The cumulative percentage of total counts.
+}
+}
+\description{
+filter_by_doms filters a data frame by identifying exact domain matches
+and either keeping or removing rows with the identified domain
+
+Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
+context (GC)
+
+Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
+
+Function to summarize and retrieve counts by Domains & Domains+Lineage
+
+Function to retrieve counts of how many lineages a DomArch appears in
+
+Creates a data frame with a totalcount column
+
+This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
+}
+\note{
+There is no need to make the domains 'regex safe', that will be handled by this function
+
+Please refer to the source code if you have alternate file formats and/or
+column names.
+}
+\examples{
+\dontrun{
+filter_by_doms()
+}
+\dontrun{
+count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
+}
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2words()
+}
+
+\dontrun{
+tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
+    elements2words() |>
+    words2wc()
+}
+
+\dontrun{
+filter_freq()
+}
+\dontrun{
+library(tidyverse)
+tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
+    summarize_bylin(query = "all")
+}
+
+\dontrun{
+summ.DA.byLin()
+}
+\dontrun{
+summ.DA()
+}
+\dontrun{
+summ.GC.byDALin
+}
+\dontrun{
+summ.GC.byLin()
+}
+\dontrun{
+summ.GC()
+}
+\dontrun{
+total_counts(pspa - gc_lin_counts, 0, "GC")
+}
+}
+\author{
+Samuel Chen, Janani Ravi
+}
diff --git a/man/summarize_bylin.Rd b/man/summarize_bylin.Rd
deleted file mode 100644
index 92b93652..00000000
--- a/man/summarize_bylin.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarize_bylin}
-\alias{summarize_bylin}
-\title{Summarize by Lineage}
-\usage{
-summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{prot}{A dataframe or tibble containing the data.}
-
-\item{column}{A string representing the column to be summarized
-(e.g., \code{DomArch}). Default is "DomArch".}
-
-\item{by}{A string representing the grouping column (e.g., \code{Lineage}).
-Default is "Lineage".}
-
-\item{query}{A string specifying the query pattern for filtering the target
-column. Use "all" to skip filtering and include all rows.}
-}
-\value{
-A tibble summarizing the counts of occurrences of elements in
-the \code{column}, grouped by the \code{by} column. The result includes the number
-of occurrences (\code{count}) and is arranged in descending order of count.
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarize_bylin(query = "all")
-}
-
-}
diff --git a/man/total_counts.Rd b/man/total_counts.Rd
deleted file mode 100644
index 53d70096..00000000
--- a/man/total_counts.Rd
+++ /dev/null
@@ -1,58 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{total_counts}
-\alias{total_counts}
-\title{Total Counts}
-\usage{
-total_counts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize, default is "DomArch".}
-
-\item{lineage_col}{Character. The name of the lineage column, default is
-"Lineage".}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below this cutoff value
-will not be shown. Default is 0.}
-
-\item{RowsCutoff}{Logical. If TRUE, filters based on cumulative percentage
-cutoff. Default is FALSE.}
-
-\item{digits}{Numeric. Number of decimal places for percentage columns.
-Default is 2.}
-}
-\value{
-A data frame with the following columns:
-\itemize{
-\item \code{{{ column }}}: Unique values from the specified column.
-\item \code{totalcount}: The total count of occurrences for each unique value in
-the specified column.
-\item \code{IndividualCountPercent}: The percentage of each \code{totalcount} relative to
-the overall count.
-\item \code{CumulativePercent}: The cumulative percentage of total counts.
-}
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-total_counts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wc.Rd b/man/words2wc.Rd
deleted file mode 100644
index 69d006d5..00000000
--- a/man/words2wc.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{words2wc}
-\alias{words2wc}
-\title{Words 2 Word Counts}
-\usage{
-words2wc(string)
-}
-\arguments{
-\item{string}{A character string containing the elements (words) to count.
-This would typically be a space-delimited string representing domain
-architectures or genomic contexts.}
-}
-\value{
-A tibble (tbl_df) with two columns:
-\describe{
-\item{\code{words}}{A column containing the individual words
-(domains or domain architectures).}
-\item{\code{freq}}{A column containing the frequency counts for each word.}
-}
-}
-\description{
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
-}
-\examples{
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2words() |>
-    words2wc()
-}
-
-}

From 2da3d1a1eadb1c3d6f140700444e15db46c341d2 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 11 Oct 2024 08:40:17 -0600
Subject: [PATCH 3/3] summarize.R adjustments - add back importFrom
 n_distinct() as it appears to be used by summarizeGenContext() - use function
 call as title -- may specify this in MolEvolvR style guide for consistency -
 adjust Rd grouping with MolEvolvR_summary @rdname tag for functions that had
 a clear summary element. This will hopefully avoid confusion with the rather
 ubiquitous dplyr::summarize - converted some code comments to placeholder
 descriptions

---
 NAMESPACE                                   |   1 +
 R/summarize.R                               |  58 +++----
 man/{summarize.Rd => MolEvolvR_summary.Rd}  | 159 ++++----------------
 man/countbycolumn.Rd                        |  26 +++-
 man/elements2Words.Rd                       |  25 ++-
 man/filterbydomains.Rd                      |   2 +-
 man/filterbyfrequency.Rd                    |  14 +-
 man/findparalogs.Rd                         |   2 +-
 man/summarizeDomArch.Rd                     |  22 ---
 man/summarizeDomArch_ByLineage.Rd           |  22 ---
 man/summarizeGenContext.Rd                  |  22 ---
 man/summarizeGenContext_ByDomArchLineage.Rd |  22 ---
 man/summarizeGenContext_ByLineage.Rd        |  22 ---
 man/summarizebylineage.Rd                   |  25 ---
 man/totalgencontextordomarchcounts.Rd       |  42 ------
 man/words2wordcounts.Rd                     |  13 +-
 16 files changed, 122 insertions(+), 355 deletions(-)
 rename man/{summarize.Rd => MolEvolvR_summary.Rd} (52%)
 delete mode 100644 man/summarizeDomArch.Rd
 delete mode 100644 man/summarizeDomArch_ByLineage.Rd
 delete mode 100644 man/summarizeGenContext.Rd
 delete mode 100644 man/summarizeGenContext_ByDomArchLineage.Rd
 delete mode 100644 man/summarizeGenContext_ByLineage.Rd
 delete mode 100644 man/summarizebylineage.Rd
 delete mode 100644 man/totalgencontextordomarchcounts.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2326fc1f..53332439 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -139,6 +139,7 @@ importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
+importFrom(dplyr,n_distinct)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,right_join)
diff --git a/R/summarize.R b/R/summarize.R
index 321a0488..2816f174 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -10,7 +10,7 @@
 # suppressPackageStartupMessages(library(rlang))
 # conflicted::conflict_prefer("filter", "dplyr")
 
-#' Filter by Domains
+#' filterByDomains
 #'
 #' @author Samuel Chen, Janani Ravi
 #' @description filterByDomains filters a data frame by identifying exact domain matches
@@ -29,7 +29,6 @@
 #'
 #' @return Filtered data frame
 #' @note There is no need to make the domains 'regex safe', that will be handled by this function
-#' @name summarize
 #' @export
 #'
 #' @examples
@@ -89,9 +88,11 @@ filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 ## COUNTS of DAs and GCs ##
 ## Before/after break up ##
 ###########################
-## Function to obtain element counts (DA, GC)
-#' Count By Column
-#'
+
+#' countByColumn
+#' @description
+#' Function to obtain element counts (DA, GC)
+#' 
 #' @param prot A data frame containing the dataset to analyze, typically with 
 #' multiple columns including the one specified by the `column` parameter.
 #' @param column A character string specifying the name of the column to analyze. 
@@ -111,7 +112,6 @@ filterByDomains <- function(prot, column = "DomArch", doms_keep = c(), doms_remo
 #' The tibble is filtered to only include elements that have a frequency 
 #' greater than or equal to `min.freq` and does not include elements with `NA` 
 #' values or those starting with a hyphen ("-").
-#' @name summarize
 #' @export
 #'
 #' @examples
@@ -131,7 +131,7 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
     return(counts)
 }
 
-#' Elements 2 Words
+#' elements2Words
 #'
 #' @description
 #' Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -156,7 +156,6 @@ countByColumn <- function(prot = prot, column = "DomArch", min.freq = 1) {
 #' @return A single string where elements are delimited by spaces. The function 
 #' performs necessary substitutions based on the `conversion_type` and cleans up 
 #' extraneous characters like newlines, tabs, and multiple spaces.
-#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -196,7 +195,7 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
     return(z3)
 }
 
-#' Words 2 Word Counts
+#' words2WordCounts
 #'
 #' @description
 #' Get word counts (wc) [DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)]
@@ -215,7 +214,6 @@ elements2Words <- function(prot, column = "DomArch", conversion_type = "da2doms"
 #'   \item{`freq`}{A column containing the frequency counts for each word.}
 #' }
 #' 
-#' @name summarize
 #'
 #' @examples
 #' \dontrun{
@@ -252,9 +250,11 @@ words2WordCounts <- function(string) {
         arrange(-freq)
     return(df_word_count)
 }
-## Function to filter based on frequencies
-#' Filter Frequency
-#'
+
+#' filterByFrequency
+#' @description
+#' Function to filter based on frequencies
+#' 
 #' @param x A tibble (tbl_df) containing at least two columns: one for 
 #' elements (e.g., `words`) and one for their frequency (e.g., `freq`).
 #' @param min.freq A numeric value specifying the minimum frequency threshold. 
@@ -263,7 +263,6 @@ words2WordCounts <- function(string) {
 #'
 #' @return A tibble with the same structure as `x`, but filtered to include 
 #' only rows where the frequency is greater than or equal to `min.freq`.
-#' @name summarize
 #'
 #' @export
 #'
@@ -279,7 +278,14 @@ filterByFrequency <- function(x, min.freq) {
 #########################
 ## SUMMARY FUNCTIONS ####
 #########################
-#' Summarize by Lineage
+#' MolEvolvR Summary
+#' @name MolEvolvR_summary
+#' @description
+#' A collection of summary functions for the MolEvolvR package.
+#' 
+NULL
+
+#' summarizeByLineage
 #'
 #' @param prot A dataframe or tibble containing the data.
 #' @param column A string representing the column to be summarized 
@@ -295,7 +301,7 @@ filterByFrequency <- function(x, min.freq) {
 #' @return A tibble summarizing the counts of occurrences of elements in 
 #' the `column`, grouped by the `by` column. The result includes the number 
 #' of occurrences (`count`) and is arranged in descending order of count.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -341,7 +347,7 @@ summarizeByLineage <- function(prot = "prot", column = "DomArch", by = "Lineage"
 #' columns: `DomArch`, `Lineage`, and `count`, which indicates the frequency 
 #' of each domain architecture for each lineage. The results are arranged in 
 #' descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #'
 #' @export
 #'
@@ -357,7 +363,7 @@ summarizeDomArch_ByLineage <- function(x) {
         arrange(desc(count))
 }
 
-## Function to retrieve counts of how many lineages a DomArch appears in
+
 #' summarizeDomArch
 #'
 #' @description
@@ -375,7 +381,7 @@ summarizeDomArch_ByLineage <- function(x) {
 #' - `totallin`: The total number of unique lineages in which each `DomArch` 
 #' appears.
 #' The results are arranged in descending order of `totallin` and `totalcount`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -407,7 +413,7 @@ summarizeDomArch <- function(x) {
 #' `GenContext`, `DomArch`, and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -432,7 +438,7 @@ summarizeGenContext_ByDomArchLineage <- function(x) {
 #' @importFrom dplyr arrange desc filter group_by n summarise
 #'
 #' @return Describe return, in detail
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -455,7 +461,7 @@ summarizeGenContext_ByLineage <- function(x) {
 #' @param x A dataframe or tibble containing the data. It must have columns 
 #' named `GenContext`, `DomArch`, and `Lineage`.
 #'
-#' @importFrom dplyr arrange desc filter group_by n summarise
+#' @importFrom dplyr arrange desc filter group_by n n_distinct summarise
 #'
 #' @return A tibble summarizing each unique combination of `GenContext` and 
 #' `Lineage`, along with the following columns:
@@ -465,7 +471,7 @@ summarizeGenContext_ByLineage <- function(x) {
 #'  `GenContext` and `Lineage`.
 #'
 #' The results are arranged in descending order of `count`.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @examples
@@ -487,7 +493,7 @@ summarizeGenContext <- function(x) {
 
 
 ##################
-#' Total Counts
+#' totalGenContextOrDomArchCounts
 #'
 #' @description
 #' Creates a data frame with a totalcount column
@@ -518,7 +524,7 @@ summarizeGenContext <- function(x) {
 #' - `IndividualCountPercent`: The percentage of each `totalcount` relative to 
 #' the overall count.
 #' - `CumulativePercent`: The cumulative percentage of total counts.
-#' @name summarize
+#' @rdname MolEvolvR_summary
 #' @export
 #'
 #' @note Please refer to the source code if you have alternate file formats and/or
@@ -670,7 +676,7 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
 
 
 
-#' Find Paralogs
+#' findParalogs
 #'
 #' @description
 #' Creates a data frame of paralogs.
diff --git a/man/summarize.Rd b/man/MolEvolvR_summary.Rd
similarity index 52%
rename from man/summarize.Rd
rename to man/MolEvolvR_summary.Rd
index f149f686..262c4719 100644
--- a/man/summarize.Rd
+++ b/man/MolEvolvR_summary.Rd
@@ -1,50 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summarize.R
-\name{summarize}
-\alias{summarize}
-\alias{filter_by_doms}
-\alias{count_bycol}
-\alias{elements2words}
-\alias{words2wc}
-\alias{filter_freq}
-\alias{summarize_bylin}
-\alias{summ.DA.byLin}
-\alias{summ.DA}
-\alias{summ.GC.byDALin}
-\alias{summ.GC.byLin}
-\alias{summ.GC}
-\alias{total_counts}
-\title{Filter by Domains}
+\name{MolEvolvR_summary}
+\alias{MolEvolvR_summary}
+\alias{summarizeByLineage}
+\alias{summarizeDomArch_ByLineage}
+\alias{summarizeDomArch}
+\alias{summarizeGenContext_ByDomArchLineage}
+\alias{summarizeGenContext_ByLineage}
+\alias{summarizeGenContext}
+\alias{totalGenContextOrDomArchCounts}
+\title{MolEvolvR Summary}
 \usage{
-filter_by_doms(
-  prot,
-  column = "DomArch",
-  doms_keep = c(),
-  doms_remove = c(),
-  ignore.case = FALSE
-)
-
-count_bycol(prot = prot, column = "DomArch", min.freq = 1)
-
-elements2words(prot, column = "DomArch", conversion_type = "da2doms")
-
-words2wc(string)
+summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
 
-filter_freq(x, min.freq)
+summarizeDomArch_ByLineage(x)
 
-summarize_bylin(prot = "prot", column = "DomArch", by = "Lineage", query)
+summarizeDomArch(x)
 
-summ.DA.byLin(x)
+summarizeGenContext_ByDomArchLineage(x)
 
-summ.DA(x)
+summarizeGenContext_ByLineage(x)
 
-summ.GC.byDALin(x)
+summarizeGenContext(x)
 
-summ.GC.byLin(x)
-
-summ.GC(x)
-
-total_counts(
+totalGenContextOrDomArchCounts(
   prot,
   column = "DomArch",
   lineage_col = "Lineage",
@@ -59,39 +38,15 @@ total_counts(
 
 \item{column}{Character. The column to summarize, default is "DomArch".}
 
-\item{doms_keep}{Vector of domains that must be identified within column in order for
-observation to be kept}
-
-\item{doms_remove}{Vector of domains that, if found within an observation, will be removed}
-
-\item{ignore.case}{Should the matching be non case sensitive}
-
-\item{min.freq}{A numeric value specifying the minimum frequency threshold.
-Only elements with frequencies greater than or equal to this value will be
-retained.}
-
-\item{conversion_type}{A character string specifying the type of conversion.
-Two options are available:
-\describe{
-\item{\code{da2doms}}{Convert domain architectures into individual domains by
-replacing \code{+} symbols with spaces.}
-\item{\code{gc2da}}{Convert genomic context into domain architectures by
-replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
-}}
-
-\item{string}{A character string containing the elements (words) to count.
-This would typically be a space-delimited string representing domain
-architectures or genomic contexts.}
-
-\item{x}{A dataframe or tibble containing the data. It must have columns
-named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
-
 \item{by}{A string representing the grouping column (e.g., \code{Lineage}).
 Default is "Lineage".}
 
 \item{query}{A string specifying the query pattern for filtering the target
 column. Use "all" to skip filtering and include all rows.}
 
+\item{x}{A dataframe or tibble containing the data. It must have columns
+named \code{GenContext}, \code{DomArch}, and \code{Lineage}.}
+
 \item{lineage_col}{Character. The name of the lineage column, default is
 "Lineage".}
 
@@ -105,33 +60,6 @@ cutoff. Default is FALSE.}
 Default is 2.}
 }
 \value{
-Filtered data frame
-
-A tibble with two columns:
-\describe{
-\item{\code{column}}{The unique elements from the specified column
-(e.g., "DomArch").}
-\item{\code{freq}}{The frequency of each element, i.e., the number of times
-each element appears in the specified column.}
-}
-The tibble is filtered to only include elements that have a frequency
-greater than or equal to \code{min.freq} and does not include elements with \code{NA}
-values or those starting with a hyphen ("-").
-
-A single string where elements are delimited by spaces. The function
-performs necessary substitutions based on the \code{conversion_type} and cleans up
-extraneous characters like newlines, tabs, and multiple spaces.
-
-A tibble (tbl_df) with two columns:
-\describe{
-\item{\code{words}}{A column containing the individual words
-(domains or domain architectures).}
-\item{\code{freq}}{A column containing the frequency counts for each word.}
-}
-
-A tibble with the same structure as \code{x}, but filtered to include
-only rows where the frequency is greater than or equal to \code{min.freq}.
-
 A tibble summarizing the counts of occurrences of elements in
 the \code{column}, grouped by the \code{by} column. The result includes the number
 of occurrences (\code{count}) and is arranged in descending order of count.
@@ -187,13 +115,7 @@ the overall count.
 }
 }
 \description{
-filter_by_doms filters a data frame by identifying exact domain matches
-and either keeping or removing rows with the identified domain
-
-Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
-context (GC)
-
-Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}
+A collection of summary functions for the MolEvolvR package.
 
 Function to summarize and retrieve counts by Domains & Domains+Lineage
 
@@ -204,57 +126,32 @@ Creates a data frame with a totalcount column
 This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
 }
 \note{
-There is no need to make the domains 'regex safe', that will be handled by this function
-
 Please refer to the source code if you have alternate file formats and/or
 column names.
 }
 \examples{
 \dontrun{
-filter_by_doms()
-}
-\dontrun{
-count_bycol(prot = my_data, column = "DomArch", min.freq = 10)
-}
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", 
-"a+b", "b+c", "b-c")) |> elements2words()
-}
-
-\dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |>
-    elements2words() |>
-    words2wc()
-}
-
-\dontrun{
-filter_freq()
-}
-\dontrun{
 library(tidyverse)
 tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarize_bylin(query = "all")
+    summarizeByLineage(query = "all")
 }
 
 \dontrun{
-summ.DA.byLin()
+summarizeDomArch_ByLineage()
 }
 \dontrun{
-summ.DA()
+summarizeDomArch()
 }
 \dontrun{
-summ.GC.byDALin
+summarizeGenContext_ByDomArchLineage
 }
 \dontrun{
-summ.GC.byLin()
+summarizeGenContext_ByLineage()
 }
 \dontrun{
-summ.GC()
+summarizeGenContext()
 }
 \dontrun{
-total_counts(pspa - gc_lin_counts, 0, "GC")
-}
+totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
 }
-\author{
-Samuel Chen, Janani Ravi
 }
diff --git a/man/countbycolumn.Rd b/man/countbycolumn.Rd
index 34fcc3e0..57ff9ac4 100644
--- a/man/countbycolumn.Rd
+++ b/man/countbycolumn.Rd
@@ -2,21 +2,37 @@
 % Please edit documentation in R/summarize.R
 \name{countByColumn}
 \alias{countByColumn}
-\title{Count By Column}
+\title{countByColumn}
 \usage{
 countByColumn(prot = prot, column = "DomArch", min.freq = 1)
 }
 \arguments{
-\item{min.freq}{}
+\item{prot}{A data frame containing the dataset to analyze, typically with
+multiple columns including the one specified by the \code{column} parameter.}
+
+\item{column}{A character string specifying the name of the column to analyze.
+The default is "DomArch".}
+
+\item{min.freq}{An integer specifying the minimum frequency an element must
+have to be included in the output. Default is 1.}
 }
 \value{
-Describe return, in detail
+A tibble with two columns:
+\describe{
+\item{\code{column}}{The unique elements from the specified column
+(e.g., "DomArch").}
+\item{\code{freq}}{The frequency of each element, i.e., the number of times
+each element appears in the specified column.}
+}
+The tibble is filtered to only include elements that have a frequency
+greater than or equal to \code{min.freq} and does not include elements with \code{NA}
+values or those starting with a hyphen ("-").
 }
 \description{
-Count By Column
+Function to obtain element counts (DA, GC)
 }
 \examples{
 \dontrun{
-countByColumn()
+countByColumn(prot = my_data, column = "DomArch", min.freq = 10)
 }
 }
diff --git a/man/elements2Words.Rd b/man/elements2Words.Rd
index 1094d363..bfd3071b 100644
--- a/man/elements2Words.Rd
+++ b/man/elements2Words.Rd
@@ -2,20 +2,30 @@
 % Please edit documentation in R/summarize.R
 \name{elements2Words}
 \alias{elements2Words}
-\title{Elements 2 Words}
+\title{elements2Words}
 \usage{
 elements2Words(prot, column = "DomArch", conversion_type = "da2doms")
 }
 \arguments{
-\item{prot}{\link{dataframe}}
+\item{prot}{A dataframe containing the dataset to analyze. The specified
+\code{column} contains the string elements to be processed.}
 
-\item{column}{\link{string} column name}
+\item{column}{A character string specifying the name of the column to analyze.
+Default is "DomArch".}
 
-\item{conversion_type}{\link{string} type of conversion: 'da2doms': domain architectures to
-domains. 'gc2da' genomic context to domain architectures}
+\item{conversion_type}{A character string specifying the type of conversion.
+Two options are available:
+\describe{
+\item{\code{da2doms}}{Convert domain architectures into individual domains by
+replacing \code{+} symbols with spaces.}
+\item{\code{gc2da}}{Convert genomic context into domain architectures by
+replacing directional symbols (\verb{<-}, \verb{->}, and \code{|}) with spaces.}
+}}
 }
 \value{
-\link{string} with words delimited by spaces
+A single string where elements are delimited by spaces. The function
+performs necessary substitutions based on the \code{conversion_type} and cleans up
+extraneous characters like newlines, tabs, and multiple spaces.
 }
 \description{
 Break string ELEMENTS into WORDS for domain architecture (DA) and genomic
@@ -23,7 +33,8 @@ context (GC)
 }
 \examples{
 \dontrun{
-tibble::tibble(DomArch = c("aaa+bbb", "a+b", "b+c", "b-c")) |> elements2Words()
+tibble::tibble(DomArch = c("aaa+bbb", 
+"a+b", "b+c", "b-c")) |> elements2Words()
 }
 
 }
diff --git a/man/filterbydomains.Rd b/man/filterbydomains.Rd
index 8c885363..afb3e5cb 100644
--- a/man/filterbydomains.Rd
+++ b/man/filterbydomains.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/summarize.R
 \name{filterByDomains}
 \alias{filterByDomains}
-\title{Filter by Domains}
+\title{filterByDomains}
 \usage{
 filterByDomains(
   prot,
diff --git a/man/filterbyfrequency.Rd b/man/filterbyfrequency.Rd
index d2c5f9cd..15d06d67 100644
--- a/man/filterbyfrequency.Rd
+++ b/man/filterbyfrequency.Rd
@@ -2,18 +2,24 @@
 % Please edit documentation in R/summarize.R
 \name{filterByFrequency}
 \alias{filterByFrequency}
-\title{Filter Frequency}
+\title{filterByFrequency}
 \usage{
 filterByFrequency(x, min.freq)
 }
 \arguments{
-\item{min.freq}{}
+\item{x}{A tibble (tbl_df) containing at least two columns: one for
+elements (e.g., \code{words}) and one for their frequency (e.g., \code{freq}).}
+
+\item{min.freq}{A numeric value specifying the minimum frequency threshold.
+Only elements with frequencies greater than or equal to this value will be
+retained.}
 }
 \value{
-Describe return, in detail
+A tibble with the same structure as \code{x}, but filtered to include
+only rows where the frequency is greater than or equal to \code{min.freq}.
 }
 \description{
-Filter Frequency
+Function to filter based on frequencies
 }
 \examples{
 \dontrun{
diff --git a/man/findparalogs.Rd b/man/findparalogs.Rd
index 4b5edbcf..d92edf71 100644
--- a/man/findparalogs.Rd
+++ b/man/findparalogs.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/summarize.R
 \name{findParalogs}
 \alias{findParalogs}
-\title{Find Paralogs}
+\title{findParalogs}
 \usage{
 findParalogs(prot)
 }
diff --git a/man/summarizeDomArch.Rd b/man/summarizeDomArch.Rd
deleted file mode 100644
index 11db1afa..00000000
--- a/man/summarizeDomArch.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeDomArch}
-\alias{summarizeDomArch}
-\title{summarizeDomArch}
-\usage{
-summarizeDomArch(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Function to retrieve counts of how many lineages a DomArch appears in
-}
-\examples{
-\dontrun{
-summarizeDomArch()
-}
-}
diff --git a/man/summarizeDomArch_ByLineage.Rd b/man/summarizeDomArch_ByLineage.Rd
deleted file mode 100644
index cf5fac22..00000000
--- a/man/summarizeDomArch_ByLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeDomArch_ByLineage}
-\alias{summarizeDomArch_ByLineage}
-\title{summarizeDomArch_ByLineage}
-\usage{
-summarizeDomArch_ByLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Function to summarize and retrieve counts by Domains & Domains+Lineage
-}
-\examples{
-\dontrun{
-summarizeDomArch_ByLineage()
-}
-}
diff --git a/man/summarizeGenContext.Rd b/man/summarizeGenContext.Rd
deleted file mode 100644
index 5a40811b..00000000
--- a/man/summarizeGenContext.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext}
-\alias{summarizeGenContext}
-\title{summarizeGenContext}
-\usage{
-summarizeGenContext(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summarizeGenContext
-}
-\examples{
-\dontrun{
-summarizeGenContext()
-}
-}
diff --git a/man/summarizeGenContext_ByDomArchLineage.Rd b/man/summarizeGenContext_ByDomArchLineage.Rd
deleted file mode 100644
index 59e0376e..00000000
--- a/man/summarizeGenContext_ByDomArchLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext_ByDomArchLineage}
-\alias{summarizeGenContext_ByDomArchLineage}
-\title{summarizeGenContext_ByDomArchLineage}
-\usage{
-summarizeGenContext_ByDomArchLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-summarizeGenContext_ByDomArchLineage
-}
-\examples{
-\dontrun{
-summarizeGenContext_ByDomArchLineage
-}
-}
diff --git a/man/summarizeGenContext_ByLineage.Rd b/man/summarizeGenContext_ByLineage.Rd
deleted file mode 100644
index 932fe6a7..00000000
--- a/man/summarizeGenContext_ByLineage.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeGenContext_ByLineage}
-\alias{summarizeGenContext_ByLineage}
-\title{summarizeGenContext_ByLineage}
-\usage{
-summarizeGenContext_ByLineage(x)
-}
-\arguments{
-\item{x}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-summarizeGenContext_ByLineage
-}
-\examples{
-\dontrun{
-summarizeGenContext_ByLineage()
-}
-}
diff --git a/man/summarizebylineage.Rd b/man/summarizebylineage.Rd
deleted file mode 100644
index 2e445913..00000000
--- a/man/summarizebylineage.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{summarizeByLineage}
-\alias{summarizeByLineage}
-\title{Summarize by Lineage}
-\usage{
-summarizeByLineage(prot = "prot", column = "DomArch", by = "Lineage", query)
-}
-\arguments{
-\item{query}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-Summarize by Lineage
-}
-\examples{
-\dontrun{
-library(tidyverse)
-tibble(DomArch = c("a+b", "a+b", "b+c", "a+b"), Lineage = c("l1", "l1", "l1", "l2")) |>
-    summarizeByLineage(query = "all")
-}
-
-}
diff --git a/man/totalgencontextordomarchcounts.Rd b/man/totalgencontextordomarchcounts.Rd
deleted file mode 100644
index f457cb6a..00000000
--- a/man/totalgencontextordomarchcounts.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/summarize.R
-\name{totalGenContextOrDomArchCounts}
-\alias{totalGenContextOrDomArchCounts}
-\title{Total Counts}
-\usage{
-totalGenContextOrDomArchCounts(
-  prot,
-  column = "DomArch",
-  lineage_col = "Lineage",
-  cutoff = 90,
-  RowsCutoff = FALSE,
-  digits = 2
-)
-}
-\arguments{
-\item{prot}{A data frame that must contain columns:
-\itemize{\item Either 'GenContext' or 'DomArch.norep' \item count}}
-
-\item{column}{Character. The column to summarize}
-
-\item{cutoff}{Numeric. Cutoff for total count. Counts below cutoff value will not be shown. Default is 0.}
-
-\item{digits}{}
-}
-\value{
-Define return, in detail
-}
-\description{
-Creates a data frame with a totalcount column
-
-This function is designed to sum the counts column by either Genomic Context or Domain Architecture and creates a totalcount column from those sums.
-}
-\note{
-Please refer to the source code if you have alternate file formats and/or
-column names.
-}
-\examples{
-\dontrun{
-totalGenContextOrDomArchCounts(pspa - gc_lin_counts, 0, "GC")
-}
-}
diff --git a/man/words2wordcounts.Rd b/man/words2wordcounts.Rd
index 7f60f226..370dec7f 100644
--- a/man/words2wordcounts.Rd
+++ b/man/words2wordcounts.Rd
@@ -2,15 +2,22 @@
 % Please edit documentation in R/summarize.R
 \name{words2WordCounts}
 \alias{words2WordCounts}
-\title{Words 2 Word Counts}
+\title{words2WordCounts}
 \usage{
 words2WordCounts(string)
 }
 \arguments{
-\item{string}{}
+\item{string}{A character string containing the elements (words) to count.
+This would typically be a space-delimited string representing domain
+architectures or genomic contexts.}
 }
 \value{
-\link{tbl_df} table with 2 columns: 1) words & 2) counts/frequency
+A tibble (tbl_df) with two columns:
+\describe{
+\item{\code{words}}{A column containing the individual words
+(domains or domain architectures).}
+\item{\code{freq}}{A column containing the frequency counts for each word.}
+}
 }
 \description{
 Get word counts (wc) \link{DOMAINS (DA) or DOMAIN ARCHITECTURES (GC)}