diff --git a/DESCRIPTION b/DESCRIPTION
index eeff2a4e9..38c4c3c37 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mia
 Type: Package
-Version: 1.15.4
+Version: 1.15.5
 Authors@R:
     c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"),
              email = "tuomas.v.borman@utu.fi",
diff --git a/NAMESPACE b/NAMESPACE
index b8c444f21..43c48df48 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -289,7 +289,9 @@ importFrom(DelayedArray,DelayedArray)
 importFrom(DelayedArray,colSums)
 importFrom(DelayedArray,getAutoBPPARAM)
 importFrom(DelayedArray,rowSums)
+importFrom(DelayedArray,rowsum)
 importFrom(DelayedArray,setAutoBPPARAM)
+importFrom(DelayedArray,type)
 importFrom(DelayedMatrixStats,colMeans2)
 importFrom(DelayedMatrixStats,colQuantiles)
 importFrom(DelayedMatrixStats,colSums2)
@@ -375,7 +377,6 @@ importFrom(rbiom,unifrac)
 importFrom(rlang,":=")
 importFrom(rlang,sym)
 importFrom(scuttle,sumCountsAcrossFeatures)
-importFrom(scuttle,summarizeAssayByGroup)
 importFrom(stats,TukeyHSD)
 importFrom(stats,anova)
 importFrom(stats,as.dist)
diff --git a/NEWS b/NEWS
index 053a02399..17936409b 100644
--- a/NEWS
+++ b/NEWS
@@ -159,3 +159,4 @@ computation
 
 Changes in version 1.15.x
 + subsetBy*: added update.tree argument
++ agglomerateBy*: Add na.rm option for excluding NA counts
diff --git a/R/agglomerate.R b/R/agglomerate.R
index bb95f9fe5..78f183af7 100644
--- a/R/agglomerate.R
+++ b/R/agglomerate.R
@@ -1,5 +1,10 @@
-#' Agglomerate or merge data using taxonomic information
-#'
+#' @name
+#' agglomerate-methods
+#' 
+#' @title
+#' Agglomerate data using taxonomic information or other grouping
+#' 
+#' @description
 #' Agglomeration functions can be used to sum-up data based on specific criteria
 #' such as taxonomic ranks, variables or prevalence.
 #'
@@ -16,12 +21,38 @@
 #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} are
 #' agglomerated, i.e. summed up. If the assay contains values other than counts
 #' or absolute values, this can lead to meaningless values being produced.
+#' 
+#' @details
+#' Agglomeration sums up the values of assays at the specified taxonomic level.
+#' With certain assays, e.g. those that include binary or negative values, this
+#' summing can produce meaningless values. In those cases, consider performing
+#' agglomeration first, and then applying the transformation afterwards.
+#'
+#' \code{agglomerateByVariable} works similarly to
+#' \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}}.
+#' However, additional support for \code{TreeSummarizedExperiment} was added and
+#' science field agnostic names were used. In addition the \code{archetype}
+#' argument lets the user select how to preserve row or column data.
+#'
+#' For merge data of assays the function from \code{scuttle} are used.
+#'
+#' @return
+#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
+#' optionally-pruned object of the same class as \code{x}.
+#' \code{agglomerateByVariable} returns an object of the same class as \code{x}
+#' with the specified entries merged into one entry in all relevant components.
+#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
+#' optionally-pruned object of the same class as \code{x}.
 #'  
 #' @inheritParams getPrevalence
 #'
 #' @param empty.fields \code{Character vector}. Defines which values should be
 #'   regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be
 #'   removed if \code{na.rm = TRUE} before agglomeration.
+#'   
+#' @param empty.rm \code{Logical scalar}. Defines whether rows including
+#' \code{empty.fields} in specified \code{rank} will be excluded.
+#' (Default: \code{TRUE})
 #'
 #' @param agglomerateTree Deprecated. Use \code{update.tree} instead.
 #' 
@@ -37,28 +68,36 @@
 #'        \item \code{empty.ranks.rm}: \code{Logical scalar}. Determines
 #'        whether to remove those columns of rowData that include only NAs after
 #'        agglomeration. (Default: \code{FALSE})
+#'        
+#'        \item \code{empty.rm}: \code{Logical scalar}. Determines
+#'        whether to remove rows that do not belong to any group, i.e., that
+#'        have \code{NA} value. (Default: \code{FALSE})
+#'        
 #'        \item \code{make.unique}: \code{Logical scalar}. Determines
 #'        whether to make rownames unique. (Default: \code{TRUE})
+#'        
 #'        \item \code{detection}: The threshold value for determining presence
 #'        or absence. A value in \code{x} must exceed this threshold to be
 #'        considered present.
+#'        
 #'        \item \code{assay.type}: \code{Character scalar}. Specifies the assay
-#'        used to
-#'        calculate prevalence. (Default: \code{"counts"})
+#'        used to calculate prevalence. (Default: \code{"counts"})
+#'        
 #'        \item \code{prevalence}: Prevalence threshold (in 0 to 1). The
 #'        required prevalence is strictly greater by default. To include the
 #'        limit, set \code{include.lowest} to \code{TRUE}.
+#'        
 #'        \item \code{update.refseq}: \code{Logical scalar}. Should a
 #'        consensus sequence be calculated? If set to \code{FALSE}, the result
 #'        from \code{archetype} is returned; If set to \code{TRUE} the result
 #'        from
 #'        \code{\link[DECIPHER:ConsensusSequence]{DECIPHER::ConsensusSequence}}
 #'        is returned. (Default: \code{FALSE})
+#'        
 #'        \item \code{archetype} Of each level of \code{group}, which element
-#'        should
-#'        be regarded as the archetype and metadata in the columns or rows kept,
-#'        while merging? This can be single integer value or an integer vector
-#'        of the same length as \code{levels(group)}. (Default:
+#'        should be regarded as the archetype and metadata in the columns or
+#'        rows kept, while merging? This can be single integer value or an
+#'        integer vector of the same length as \code{levels(group)}. (Default:
 #'        \code{1L}, which means the first element encountered per
 #'        factor level will be kept)
 #'    }
@@ -92,43 +131,6 @@
 #' 
 #' @param mergeTree Deprecated. Use \code{update.tree} instead.
 #'
-#' @details
-#' 
-#' Agglomeration sums up the values of assays at the specified taxonomic level.
-#' With
-#' certain assays, e.g. those that include binary or negative values, this
-#' summing
-#' can produce meaningless values. In those cases, consider performing
-#' agglomeration
-#' first, and then applying the transformation afterwards.
-#'
-#' \code{agglomerateByVariable} works similarly to
-#' \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}}.
-#' However, additional support for \code{TreeSummarizedExperiment} was added and
-#' science field agnostic names were used. In addition the \code{archetype}
-#' argument lets the user select how to preserve row or column data.
-#'
-#' For merge data of assays the function from \code{scuttle} are used.
-#'
-#' @return
-#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
-#' optionally-pruned object of the same class as \code{x}.
-#' \code{agglomerateByVariable} returns an object of the same class as \code{x}
-#' with the specified entries merged into one entry in all relevant components.
-#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
-#' optionally-pruned object of the same class as \code{x}.
-#'
-#' @name agglomerate-methods
-#'
-#' @seealso
-#' \code{\link[=splitOn]{splitOn}}
-#' \code{\link[=unsplitOn]{unsplitOn}}
-#' \code{\link[=agglomerate-methods]{agglomerateByVariable}},
-#' \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}},
-#' \code{\link[=agglomerate-methods]{agglomerateByRank}},
-#' \code{\link[SingleCellExperiment:altExps]{altExps}},
-#' \code{\link[SingleCellExperiment:splitAltExps]{splitAltExps}}
-#'
 #' @examples
 #'
 #' ### Agglomerate data based on taxonomic information
@@ -158,9 +160,9 @@
 #' tse <- agglomerateByRank(tse, rank = "Genus")
 #' tse <- transformAssay(tse, method = "pa")
 #'
-#' # removing empty labels by setting na.rm = TRUE
+#' # Removing empty labels by setting empty.rm = TRUE
 #' sum(is.na(rowData(GlobalPatterns)$Family))
-#' x3 <- agglomerateByRank(GlobalPatterns, rank="Family", na.rm = TRUE)
+#' x3 <- agglomerateByRank(GlobalPatterns, rank="Family", empty.rm = TRUE)
 #' nrow(x3) # different from x2
 #'
 #' # Because all the rownames are from the same rank, rownames do not include
@@ -172,20 +174,19 @@
 #' print(rownames(x3[1:3,]))
 #'
 #' # use 'empty.ranks.rm' to remove columns that include only NAs
-#' x4 <- agglomerateByRank(GlobalPatterns, rank="Phylum",
-#'                         empty.ranks.rm = TRUE)
+#' x4 <- agglomerateByRank(
+#'     GlobalPatterns, rank="Phylum", empty.ranks.rm = TRUE)
 #' head(rowData(x4))
 #'
-#' # If the assay contains NAs, you might want to consider replacing them,
+#' # If the assay contains NAs, you might want to specify na.rm=TRUE,
 #' # since summing-up NAs lead to NA
 #' x5 <- GlobalPatterns
 #' # Replace first value with NA
 #' assay(x5)[1,1] <- NA
 #' x6 <- agglomerateByRank(x5, "Kingdom")
 #' head( assay(x6) )
-#' # Replace NAs with 0. This is justified when we are summing-up counts.
-#' assay(x5)[ is.na(assay(x5)) ] <- 0
-#' x6 <- agglomerateByRank(x5, "Kingdom")
+#' # Use na.rm=TRUE
+#' x6 <- agglomerateByRank(x5, "Kingdom", na.rm = TRUE)
 #' head( assay(x6) )
 #'
 #' ## Look at enterotype dataset...
@@ -199,44 +200,94 @@
 #' data(esophagus)
 #' esophagus
 #' plot(rowTree(esophagus))
-#' # get a factor for merging
+#' # Get a factor for merging
 #' f <- factor(regmatches(rownames(esophagus),
-#'                        regexpr("^[0-9]*_[0-9]*",rownames(esophagus))))
-#' merged <- agglomerateByVariable(esophagus, by = "rows", f,
-#'                                 update.tree = TRUE)
+#'     regexpr("^[0-9]*_[0-9]*",rownames(esophagus))))
+#' merged <- agglomerateByVariable(
+#'     esophagus, by = "rows", f, update.tree = TRUE)
 #' plot(rowTree(merged))
 #' #
 #' data(GlobalPatterns)
 #' GlobalPatterns
-#' merged <- agglomerateByVariable(GlobalPatterns, by = "cols",
-#'                                 colData(GlobalPatterns)$SampleType)
+#' merged <- agglomerateByVariable(
+#'     GlobalPatterns, by = "cols", colData(GlobalPatterns)$SampleType)
 #' merged
+#' 
+#' @seealso
+#' \code{\link[=splitOn]{splitOn}}
+#' \code{\link[=unsplitOn]{unsplitOn}}
+#' \code{\link[=agglomerate-methods]{agglomerateByVariable}},
+#' \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}},
+#' \code{\link[=agglomerate-methods]{agglomerateByRank}},
+#' \code{\link[SingleCellExperiment:altExps]{altExps}},
+#' \code{\link[SingleCellExperiment:splitAltExps]{splitAltExps}}
+#' 
 NULL
 
 #' @rdname agglomerate-methods
 #' @export
-setGeneric("agglomerateByRank",
-            signature = "x",
-            function(x, ...)
-                standardGeneric("agglomerateByRank"))
+setGeneric("agglomerateByRank", signature = "x", function(x, ...)
+    standardGeneric("agglomerateByRank"))
 
 #' @rdname agglomerate-methods
-#' @aliases agglomerateByVariable
 #' @export
-setGeneric("agglomerateByVariable",
-            signature = "x",
-            function(x, ...)
-                standardGeneric("agglomerateByVariable"))
+setMethod(
+    "agglomerateByRank", signature = c(x = "TreeSummarizedExperiment"),
+    function(x, rank = taxonomyRanks(x)[1], update.tree = agglomerateTree,
+        agglomerate.tree = agglomerateTree, agglomerateTree = FALSE, ...){
+        # Input check
+        if(!.is_a_bool(update.tree)){
+            stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
+        }
+        #
+        # If there are multiple rowTrees, it might be that multiple
+        # trees are preserved after agglomeration even though the
+        # dataset could be presented with one tree.
+        # --> order the data so that the taxa are searched from one tree
+        # first.
+        if( length(rowTreeNames(x)) > 1 ){
+            x <- .order_based_on_trees(x)
+        }
+        # Agglomerate data by using SCE method
+        x <- callNextMethod(x, rank = rank, update.tree = update.tree, ...)
+        return(x)
+    }
+)
+
+#' @rdname agglomerate-methods
+#' @importFrom SingleCellExperiment altExp altExp<- altExps<-
+#' @export
+setMethod(
+    "agglomerateByRank", signature = c(x = "SingleCellExperiment"),
+    function(x, rank = taxonomyRanks(x)[1], altexp = NULL,
+        altexp.rm = strip_altexp, strip_altexp = TRUE, ...){
+        # Input check
+        if(!.is_a_bool(altexp.rm)){
+            stop("'altexp.rm' must be TRUE or FALSE.", call. = FALSE)
+        }
+        #
+        # Get altexp if specified
+        x <- .check_and_get_altExp(x, altexp)
+        # Remove altexps if user specified so. As we agglomerate data, they do
+        # not necessarily represent the "high-level" data anymore. I.e., usually
+        # altExp includes subsets of TreeSE, but that is not the case anymore.
+        # That is why we clear the altexp slot.
+        if( altexp.rm ){
+            altExps(x) <- NULL
+        }
+        # Agglomerate the data by using SE method
+        x <- callNextMethod(x, rank = rank, ...)
+        return(x)
+    }
+)
 
 #' @rdname agglomerate-methods
-#'
 #' @importFrom SummarizedExperiment rowData rowData<-
-#'
 #' @export
 setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
-    function(x, rank = taxonomyRanks(x)[1], na.rm = TRUE,
+    function(x, rank = taxonomyRanks(x)[1], empty.rm = TRUE,
         empty.fields = c(NA, "", " ", "\t", "-", "_"), ...){
-        # input check
+        # Input check
         if(nrow(x) == 0L){
             stop("No data available in `x` ('x' has nrow(x) == 0L.)",
                 call. = FALSE)
@@ -245,8 +296,8 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
             stop("'rank' must be a non-empty single character value",
                 call. = FALSE)
         }
-        if(!.is_a_bool(na.rm)){
-            stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
+        if(!.is_a_bool(empty.rm)){
+            stop("'empty.rm' must be TRUE or FALSE.", call. = FALSE)
         }
         if(ncol(rowData(x)) == 0L){
             stop("taxonomyData needs to be populated.", call. = FALSE)
@@ -254,146 +305,110 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
         .check_taxonomic_rank(rank, x)
         .check_for_taxonomic_data_order(x)
         #
-
-        # Make a vector from the taxonomic data.
-        col <- which( taxonomyRanks(x) %in% rank )
+        # Get the index of which taxonomy rank is detected and used for
+        # agglomeration
+        col_idx <- which( taxonomyRanks(x) %in% rank )
+        # Get the indices of detected rank columns from rowData
         tax_cols <- .get_tax_cols_from_se(x)
-
-        # if na.rm is TRUE, remove the empty, white-space, NA values from
-        # tree will be pruned later, if update.tree = TRUE
-        if( na.rm ){
-            x <- .remove_with_empty_taxonomic_info(x, tax_cols[col],
-                                                    empty.fields)
+        
+        # if empty.rm is TRUE, remove those rows that have empty,
+        # white-space, NA values in rank information. I.e., they do not have
+        # taxonomy information in specified taxonomy level.
+        if( empty.rm ){
+            x <- .remove_with_empty_taxonomic_info(
+                x, tax_cols[col_idx], empty.fields)
         }
         # If rank is the only rank that is available and this data is unique,
         # then the data is already 'aggregated' and no further operations
         # are needed.
-        if (length(taxonomyRanks(x)) == 1L &&
-            !anyDuplicated(rowData(x)[,taxonomyRanks(x)])) {
+        if( length(taxonomyRanks(x)) == 1L &&
+                !anyDuplicated(rowData(x)[,taxonomyRanks(x)]) ){
             return(x)
         }
-
-        # get groups of taxonomy entries
-        tax_factors <- .get_tax_groups(x, col = col, ...)
-        # Convert to factors. Use na.rm so that NA values are not preserved.
-        # i.e. they are not converted into character values.
+        
+        # Get groups of taxonomy entries, i.e., get the specified rank
+        # column from rowData
+        tax_factors <- .get_tax_groups(x, col = col_idx, ...)
+        # Convert to factors. Use empty.rm so that NA values are not
+        # preserved. i.e. they are not converted into character values.
         # NA values are handled earlier in this function.
-        tax_factors <- .norm_f(nrow(x), tax_factors, na.rm = TRUE)
-
-        # merge taxa
-        x <- agglomerateByVariable(
-            x, by = "rows", group = tax_factors, na.rm = TRUE, ...)
-
-        # "Empty" the values to the right of the rank, using NA_character_.
-        if( col < length(taxonomyRanks(x)) ){
-            badcolumns <- tax_cols[seq_along(tax_cols) > col]
-            if(length(badcolumns) > 0L){
-                row_data <- rowData(x)
-                row_data[, badcolumns] <- NA_character_
-                rowData(x) <- row_data
-            }
+        tax_factors <- .norm_f(nrow(x), tax_factors, empty.rm = TRUE)
+        
+        # Agglomerate data by utilizing agglomerateByVariable
+        args <- c(list(
+            x, by = "rows", group = tax_factors, empty.rm = TRUE), list(...))
+        x <- do.call(agglomerateByVariable, args)
+        
+        # Replace the values to the right of the rank with NA_character_.
+        # These columns no longer represent the agglomerated data, as they
+        # previously corresponded to specific lower taxonomic ranks that are
+        # now aggregated at the current level.
+        badcolumns <- tax_cols[seq_along(tax_cols) > col_idx]
+        if( length(badcolumns) > 0L ){
+            rowData(x)[, badcolumns] <- NA_character_
         }
-        # adjust rownames
-        rownames(x) <- getTaxonomyLabels(x, empty.fields, ...,
-                                        with.rank = FALSE,
-                                        resolve.loops = FALSE)
+        # Adjust rownames
+        rownames(x) <- getTaxonomyLabels(
+            x, empty.fields, with.rank = FALSE, resolve.loops = FALSE, ...)
         # Remove those columns from rowData that include only NAs
         x <- .remove_NA_cols_from_rowdata(x, ...)
+        # Add agglomeration info to metadata
         x <- .add_values_to_metadata(x, "agglomerated_by_rank", rank)
-
         # Order the data in alphabetical order
         x <- x[ order(rownames(x)), ]
+        return(x)
     }
 )
 
 #' @rdname agglomerate-methods
 #' @aliases agglomerateByVariable
 #' @export
-setMethod("agglomerateByVariable", signature = c(x = "SummarizedExperiment"),
-            function(x, by, group = f, f, ...){
-                by <- .check_MARGIN(by)
-                FUN <- switch(by, .merge_rows, .merge_cols)
-                x <- FUN(x, group, ...)
-                return(x)
-            }
-)
+setGeneric("agglomerateByVariable", signature = "x", function(x, ...)
+    standardGeneric("agglomerateByVariable"))
 
 #' @rdname agglomerate-methods
 #' @aliases agglomerateByVariable
 #' @export
 setMethod("agglomerateByVariable",
-            signature = c(x = "TreeSummarizedExperiment"),
-            function(x, by, group = f, f, update.tree = mergeTree,
-                    mergeTree = FALSE, ...){
-                # Check by
-                by <- .check_MARGIN(by)
-                # Get function based on by
-                FUN <- switch(by, .merge_rows_TSE, .merge_cols_TSE)
-                # Agglomerate
-                x <- FUN(x, group, update.tree = update.tree, ...)
-                return(x)
-            }
-)
-
-#' @rdname agglomerate-methods
-#' @importFrom SingleCellExperiment altExp altExp<- altExps<-
-#' @export
-setMethod("agglomerateByRank", signature = c(x = "SingleCellExperiment"),
-    function(x, ..., altexp = NULL, altexp.rm = strip_altexp,
-            strip_altexp = TRUE){
-        # input check
-        if(!.is_a_bool(altexp.rm)){
-            stop("'altexp.rm' mus be TRUE or FALSE.", call. = FALSE)
-        }
-        #
-        if (!is.null(altexp)) {
-            x <- altExp(x, altexp)
-        }
-        if(altexp.rm && is(x, "SingleCellExperiment")){
-            altExps(x) <- NULL
-        }
-        callNextMethod(x, ...)
+    signature = c(x = "TreeSummarizedExperiment"),
+    function(x, by, group = f, f, update.tree = mergeTree, mergeTree = FALSE,
+        ...){
+        # Check by
+        by <- .check_MARGIN(by)
+        # Get function based on by
+        FUN <- switch(by, .merge_rows_TSE, .merge_cols_TSE)
+        # Agglomerate
+        x <- FUN(x, group, update.tree = update.tree, ...)
+        return(x)
     }
 )
 
 #' @rdname agglomerate-methods
+#' @aliases agglomerateByVariable
 #' @export
-setMethod(
-    "agglomerateByRank", signature = c(x = "TreeSummarizedExperiment"),
-    function(
-        x, ..., update.tree = agglomerateTree,
-        agglomerate.tree = agglomerateTree, agglomerateTree = FALSE){
-                # input check
-                if(!.is_a_bool(update.tree)){
-                    stop("'update.tree' must be TRUE or FALSE.",
-                        call. = FALSE)
-                }
-                # If there are multipe rowTrees, it might be that multiple
-                # trees are preserved after agglomeration even though the
-                # dataset could be presented with one tree.
-                # --> order the data so that the taxa are searched from one tree
-                # first.
-                if( length(rowTreeNames(x)) > 1 ){
-                    x <- .order_based_on_trees(x)
-                }
-                # Agglomerate data
-                x <- callNextMethod(x, update.tree = update.tree, ...)
-                return(x)
-            }
+setMethod("agglomerateByVariable", signature = c(x = "SummarizedExperiment"),
+    function(x, by, group = f, f, ...){
+        # Check by
+        by <- .check_MARGIN(by)
+        # Agglomerate the data
+        x <- .merge_rows_or_cols(x, group, by, ...)
+        return(x)
+    }
 )
 
 ################################ HELP FUNCTIONS ################################
 
-.remove_with_empty_taxonomic_info <-
-    function(x, column, empty.fields = c(NA,""," ","\t","-","_"))
-        {
-        tax <- as.character(rowData(x)[,column])
-        f <- !(tax %in% empty.fields)
-        if(any(!f)){
-            x <- x[f, , drop=FALSE]
-        }
-        x
+# This functions subset the data so that rows that do not have taxonomy
+# information in specified rank are removed.
+.remove_with_empty_taxonomic_info <- function(
+        x, column, empty.fields = c(NA,""," ","\t","-","_")){
+    tax <- as.character(rowData(x)[,column])
+    f <- !(tax %in% empty.fields)
+    if(any(!f)){
+        x <- x[f, , drop=FALSE]
     }
+    return(x)
+}
 
 # This function removes empty rank columns from rowdata. (Those that include
 # only NA values)
diff --git a/R/getPrevalence.R b/R/getPrevalence.R
index fca790187..6ccf3a850 100644
--- a/R/getPrevalence.R
+++ b/R/getPrevalence.R
@@ -9,11 +9,11 @@
 #' 
 #' @param assay_name Deprecated. Use \code{assay.type} instead.
 #'
-#' @param detection \code{Numeric scalar}. Detection threshold for absence/presence. 
-#'    If \code{as_relative = FALSE},
-#'    it sets the counts threshold for a taxon to be considered present.
-#'    If \code{as_relative = TRUE}, it sets the relative abundance threshold
-#'    for a taxon to be considered present. (Default: \code{0})
+#' @param detection \code{Numeric scalar}. Detection threshold for
+#' absence/presence. If \code{as_relative = FALSE},
+#' it sets the counts threshold for a taxon to be considered present.
+#' If \code{as_relative = TRUE}, it sets the relative abundance threshold
+#' for a taxon to be considered present. (Default: \code{0})
 #'
 #' @param include.lowest \code{Logical scalar}. Should the lower boundary of the
 #'   detection and prevalence cutoffs be included? (Default: \code{FALSE})
@@ -23,11 +23,11 @@
 #' @param sort \code{Logical scalar}. Should the result be sorted by prevalence?
 #'   (Default: \code{FALSE})
 #'
-#' @param rank \code{Character scalar}. Defines a taxonomic rank. Must be a value of
-#'   \code{taxonomyRanks()} function.
+#' @param rank \code{Character scalar}. Defines a taxonomic rank. Must be a
+#' value of \code{taxonomyRanks()} function.
 #'
-#' @param na.rm \code{Logical scalar}. Should NA values be omitted when calculating
-#' prevalence? (Default: \code{TRUE})
+#' @param na.rm \code{Logical scalar}. Should NA values be omitted?
+#' (Default: \code{TRUE})
 #' 
 #' @param update.tree \code{Logical scalar}. Should
 #' \code{rowTree()} also be agglomerated? (Default: \code{FALSE})
@@ -37,8 +37,6 @@
 #'   \item If \code{!is.null(rank)} arguments are passed on to
 #'   \code{\link[=agglomerate-methods]{agglomerateByRank}}. See
 #'   \code{\link[=agglomerate-methods]{?agglomerateByRank}} for more details.
-#'   Note that you can specify whether to remove empty ranks with
-#'   \code{agg.na.rm} instead of \code{na.rm}. (default: \code{FALSE})
 #'
 #'   \item for \code{getPrevalent}, \code{getRare}, \code{subsetByPrevalent}
 #'   and \code{subsetByRare} additional parameters passed to
@@ -175,8 +173,8 @@ NULL
 #' @rdname getPrevalence
 #' @export
 setGeneric("getPrevalence", signature = "x",
-           function(x, ...)
-               standardGeneric("getPrevalence"))
+    function(x, ...)
+    standardGeneric("getPrevalence"))
 
 #' @rdname getPrevalence
 #' @export
@@ -186,8 +184,7 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function(
         # input check
         if (!.is_numeric_string(detection)) {
             stop("'detection' must be a single numeric value or coercible to ",
-                 "one.",
-                 call. = FALSE)
+                "one.", call. = FALSE)
         }
         #
         if(!.is_a_bool(na.rm)){
@@ -227,36 +224,6 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function(
     }
 )
 
-.agg_for_prevalence <- function(
-        x, rank, relabel = FALSE, make.unique = TRUE, na.rm = FALSE,
-        agg.na.rm = TRUE, ...){
-    # Check na.rm. It is not used in this function, it is only caught so that
-    # it can be passed to getPrevalence(matrix) and not use it here in
-    # agglomerateByRank function.
-    if(!.is_a_bool(na.rm)){
-        stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
-    }
-    #
-    # Check drop.empty.rank
-    if(!.is_a_bool(agg.na.rm)){
-        stop("'agg.na.rm' must be TRUE or FALSE.", call. = FALSE)
-    }
-    #
-    if(!is.null(rank)){
-        .check_taxonomic_rank(rank, x)
-        args <- c(list(x = x, rank = rank, na.rm = agg.na.rm), list(...))
-        argNames <- c(
-            "x","rank","ignore.taxonomy","na.rm","empty.fields", "archetype",
-            "update.tree","average","BPPARAM", "update.refseq")
-        args <- args[names(args) %in% argNames]
-        x <- do.call(agglomerateByRank, args)
-        if(relabel){
-            rownames(x) <- getTaxonomyLabels(x, make.unique = make.unique)
-        }
-    }
-    x
-}
-
 #' @rdname getPrevalence
 #' @export
 setMethod("getPrevalence", signature = c(x = "SummarizedExperiment"),
@@ -264,7 +231,7 @@ setMethod("getPrevalence", signature = c(x = "SummarizedExperiment"),
             rank = NULL, ...){
         # check assay
         .check_assay_present(assay.type, x)
-        x <- .agg_for_prevalence(x, rank = rank, ...)
+        x <- .merge_features(x, rank = rank, ...)
         mat <- assay(x, assay.type)
         # Calculate abundance
         mat <- .to_rel_abund(mat, ...)
@@ -286,8 +253,8 @@ setMethod("getPrevalence", signature = c(x = "SummarizedExperiment"),
 #'
 #' @export
 setGeneric("getPrevalent", signature = "x",
-           function(x, ...)
-               standardGeneric("getPrevalent"))
+    function(x, ...)
+    standardGeneric("getPrevalent"))
 
 .norm_rownames <- function(x){
     if(is.null(rownames(x))){
@@ -303,8 +270,7 @@ setGeneric("getPrevalent", signature = "x",
     # input check
     if (!.is_numeric_string(prevalence)) {
         stop("'prevalence' must be a single numeric value or coercible to ",
-             "one.",
-             call. = FALSE)
+            "one.", call. = FALSE)
     }
 
     prevalence <- as.numeric(prevalence)
@@ -337,7 +303,7 @@ setGeneric("getPrevalent", signature = "x",
 
 .get_prevalent_taxa <- function(x, rank = NULL, ...){
     if(is(x,"SummarizedExperiment")){
-        x <- .agg_for_prevalence(x, rank = rank, ...)
+        x <- .merge_features(x, rank = rank, ...)
     }
     indices <- .get_prevalent_indices(x, ...)
     # If named input return named output
@@ -381,8 +347,8 @@ setMethod("getPrevalent", signature = c(x = "SummarizedExperiment"),
 #'
 #' @export
 setGeneric("getRare", signature = "x",
-           function(x, ...)
-               standardGeneric("getRare"))
+    function(x, ...)
+    standardGeneric("getRare"))
 
 .get_rare_indices <- function(x, ...){
     indices <- .get_prevalent_indices(x = x, ...)
@@ -395,7 +361,7 @@ setGeneric("getRare", signature = "x",
 
 .get_rare_taxa <- function(x, rank = NULL, ...){
     if(is(x,"SummarizedExperiment")){
-        x <- .agg_for_prevalence(x, rank = rank, ...)
+        x <- .merge_features(x, rank = rank, ...)
     }
     indices <- .get_rare_indices(x, ...)
     #
@@ -434,14 +400,14 @@ setMethod("getRare", signature = c(x = "SummarizedExperiment"),
 #' @rdname getPrevalence
 #' @export
 setGeneric("subsetByPrevalent", signature = "x",
-           function(x, ...)
-               standardGeneric("subsetByPrevalent"))
+    function(x, ...)
+    standardGeneric("subsetByPrevalent"))
 
 #' @rdname getPrevalence
 #' @export
 setMethod("subsetByPrevalent", signature = c(x = "SummarizedExperiment"),
     function(x, rank = NULL, ...){
-        x <- .agg_for_prevalence(x, rank = rank, ...)
+        x <- .merge_features(x, rank = rank, ...)
         prevalent_indices <- .get_prevalent_indices(x, ...)
         x[prevalent_indices, ]
     }
@@ -470,14 +436,14 @@ setMethod("subsetByPrevalent", signature = c(x = "TreeSummarizedExperiment"),
 #' @rdname getPrevalence
 #' @export
 setGeneric("subsetByRare", signature = "x",
-           function(x, ...)
-               standardGeneric("subsetByRare"))
+    function(x, ...)
+    standardGeneric("subsetByRare"))
 
 #' @rdname getPrevalence
 #' @export
 setMethod("subsetByRare", signature = c(x = "SummarizedExperiment"),
     function(x, rank = NULL, ...){
-        x <- .agg_for_prevalence(x, rank = rank, ...)
+        x <- .merge_features(x, rank = rank, ...)
         rare_indices <- .get_rare_indices(x, ...)
         x[rare_indices, ]
     }
@@ -506,8 +472,8 @@ setMethod("subsetByRare", signature = c(x = "TreeSummarizedExperiment"),
 #' @rdname getPrevalence
 #' @export
 setGeneric("getPrevalentAbundance", signature = "x",
-           function(x, assay.type = assay_name, assay_name = "relabundance", ...)
-               standardGeneric("getPrevalentAbundance"))
+    function(x, assay.type = assay_name, assay_name = "relabundance", ...)
+    standardGeneric("getPrevalentAbundance"))
 
 #' @rdname getPrevalence
 #' @export
@@ -517,9 +483,8 @@ setMethod("getPrevalentAbundance", signature = c(x = "ANY"),
         cm <- getPrevalent(x, ...)
         if (length(cm) == 0) {
             stop("With the given abundance and prevalence thresholds, no taxa ",
-                 "were found. Try to change detection and prevalence ",
-                 "parameters.",
-                 call. = FALSE)
+                "were found. Try to change detection and prevalence ",
+                "parameters.", call. = FALSE)
         }
         colSums(x[cm, ,drop=FALSE])
     }
@@ -586,23 +551,24 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"),
 #'
 #' @export
 setGeneric("agglomerateByPrevalence", signature = "x",
-           function(x, ...)
-               standardGeneric("agglomerateByPrevalence"))
+    function(x, ...)
+    standardGeneric("agglomerateByPrevalence"))
 
 #' @rdname agglomerateByPrevalence
 #' @export
 setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"),
-    function(x, rank = NULL, other.name = other_label, other_label = "Other", ...){
+    function(x, rank = NULL, other.name = other_label, other_label = "Other",
+        ...){
         # input check
         if(!.is_a_string(other.name)){
             stop("'other.name' must be a single character value.",
-                 call. = FALSE)
+                call. = FALSE)
         }
         #
         # Check assays that they can be merged safely
-        mapply(.check_assays_for_merge, assayNames(x), assays(x))
+        temp <- mapply(.check_assays_for_merge, assayNames(x), assays(x))
         #
-        x <- .agg_for_prevalence(x, rank, check.assays = FALSE, ...)
+        x <- .merge_features(x, rank, check.assays = FALSE, ...)
         pr <- getPrevalent(x, rank = NULL, ...)
         f <- rownames(x) %in% pr
         if(any(!f)){
@@ -624,12 +590,12 @@ setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"),
 #' @rdname agglomerateByPrevalence
 #' @export
 setMethod("agglomerateByPrevalence", 
-          signature = c(x = "TreeSummarizedExperiment"),
+    signature = c(x = "TreeSummarizedExperiment"),
     function(x, rank = NULL, other.name = other_label, other_label = "Other",
             update.tree = FALSE, ...){
         # input check
         if(!.is_a_bool(update.tree)){
-          stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
+            stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
         }
         # update.refseq is a hidden parameter as for all other agglomeration
         # methods from the agglomerate-methods man page.
@@ -647,7 +613,7 @@ setMethod("agglomerateByPrevalence",
         # sequences are only subsetted without finding consensus sequences.
         if( merge_refseq && !is.null(referenceSeq(x))  ){
             # If user wants to agglomerate based on rank
-            x <- .agg_for_prevalence(x, rank, check.assays = FALSE, ...)
+            x <- .merge_features(x, rank, check.assays = FALSE, ...)
             # Find groups that will be used to agglomerate the data
             f <- rownames(x)[ match(rownames(x), rownames(res)) ]
             f[ is.na(f) ] <- other.name
@@ -661,7 +627,7 @@ setMethod("agglomerateByPrevalence",
             res <- .agglomerate_trees(res, 1)
         }
         return(res)
-      }
+    }
 )
 
 # Get abundance. Determines if relative abundance is calculated or not.
diff --git a/R/merge.R b/R/merge.R
index 2c1dde0ac..6151106ef 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,6 +1,9 @@
-.norm_f <- function(i, f, dim.type = c("rows","columns"), na.rm = FALSE, ...){
-    if(!.is_a_bool(na.rm)){
-        stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
+# This function can be used to unify the group id vector. It can be any
+# kind of vector, but this converts it to factor.
+.norm_f <- function(
+        i, f, dim.type = c("rows","columns"), empty.rm = FALSE, ...){
+    if(!.is_a_bool(empty.rm)){
+        stop("'empty.rm' must be TRUE or FALSE.", call. = FALSE)
     }
     dim.type <- match.arg(dim.type)
     if(!is.character(f) && !is.factor(f)){
@@ -13,7 +16,7 @@
             call. = FALSE)
     }
     # This is done otherwise we lose NA values
-    if( !na.rm && any(is.na(f)) ){
+    if( !empty.rm && any(is.na(f)) ){
         f <- as.character(f)
         f[ is.na(f) ] <- "NA"
     }
@@ -23,6 +26,9 @@
     f
 }
 
+# When we merge rows or columns, first member of group is kept as default
+# (in colData or rowData). This function controls this and allows user to
+# specify some other element than the first one.
 .norm_archetype <- function(f, archetype){
     if(length(archetype) > 1L){
         if(length(levels(f)) != length(archetype)){
@@ -49,6 +55,8 @@
     archetype
 }
 
+# This function returns the index/position of rows/columns that are kept
+# after merging.
 #' @importFrom S4Vectors splitAsList
 .get_element_pos <- function(f, archetype){
     archetype <- as.list(archetype)
@@ -58,13 +66,12 @@
     f_pos
 }
 
+# This function merges assays and row/colData.
 #' @importFrom S4Vectors SimpleList
 #' @importFrom scuttle sumCountsAcrossFeatures
-.merge_rows <- function(x, f, archetype = 1L,
-                        average = FALSE,
-                        BPPARAM = SerialParam(),
-                        check.assays = TRUE,
-                        ...){
+.merge_rows_or_cols <- function(
+        x, f, by, archetype = 1L, average = FALSE, BPPARAM = SerialParam(),
+        check.assays = TRUE, na.rm = FALSE, ...){
     # input check
     if( !.is_a_bool(average) ){
         stop("'average' must be TRUE or FALSE.", call. = FALSE)
@@ -72,95 +79,109 @@
     if( !.is_a_bool(check.assays) ){
         stop("'check.assays' must be TRUE or FALSE.", call. = FALSE)
     }
-    if( .is_a_string(f) && f %in% colnames(rowData(x)) ){
-        f <- rowData(x)[[ f ]]
+    if( !.is_a_bool(na.rm) ){
+        stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
     }
-    f <- .norm_f(nrow(x), f, ...)
-    if(length(levels(f)) == nrow(x)){
+    #
+    # Get correct functions based on whether we agglomerate rows or cols
+    rowData_FUN <- switch(by, rowData, colData)
+    nrow_FUN <- switch(by, nrow, ncol)
+    rownames_FUN <- switch(by, rownames, colnames)
+    rownames_ass_FUN <- switch(by, `rownames<-`, `colnames<-`)
+    # If user specified column name from row/colData, get the values
+    if( .is_a_string(f) && f %in% colnames(rowData_FUN(x)) ){
+        f <- rowData_FUN(x)[[ f ]]
+    }
+    # Check that the group ID vector is specifying groups for each element
+    f <- .norm_f(nrow_FUN(x), f, ...)
+    # If the data is already agglomerated at each group
+    if(length(levels(f)) == nrow_FUN(x)){
         return(x)
     }
-
+    # In merging, first element of certain group is kept by default. archetype,
+    # can control this behavior; it can specify the preserved rows for every
+    # group or index.
     archetype <- .norm_archetype(f, archetype)
-    # merge assays
+    
+    # Get assays
     assays <- assays(x)
+    # We check whether the assays include values that cannot be summed. For
+    # instance, summing negative values do not make sense.
     if( check.assays ){
-        mapply(.check_assays_for_merge, names(assays), assays)
-    }
-    assays <- S4Vectors::SimpleList(lapply(assays,
-                                            scuttle::sumCountsAcrossFeatures,
-                                            ids = f,
-                                            subset.row = NULL,
-                                            subset.col = NULL,
-                                            average = average,
-                                            BPPARAM = BPPARAM))
-    names(assays) <- names(assays(x))
-    # merge to result
-    x <- x[.get_element_pos(f, archetype = archetype),]
+        temp <- lapply(seq_len(length(assays)), function(i)
+            .check_assays_for_merge(names(assays)[[i]], assays[[i]]))
+    }
+    
+    # Transpose if we are merging columns
+    if( by == 2L ){
+        assays <- lapply(assays, function(mat) t(mat))
+    }
+    # Get the aggregation function based on whether user wants to exclude NAs
+    # and if there are any NAs. scuttle::sumCountsAcrossFeatures cannot handle
+    # NAs so if user wants to exclude them, we use own implementation.
+    FUN <- if( na.rm && anyNA(assays[[1]])) .sum_counts_accross_features_na else
+        sumCountsAcrossFeatures
+    # Agglomerate assays
+    assays <- lapply(assays, FUN, average = average, ids = f, BPPARAM = BPPARAM)
+    # Transpose back to original orientation
+    if( by == 2L ){
+        assays <- lapply(assays, function(mat) t(mat))
+    }
+    # Convert to SimpleList
+    assays <- assays |> SimpleList()
+    
+    # Now we have agglomerated assays, but TreeSE has still the original form.
+    # We take specified rows/columns from the TreeSE.
+    idx <- .get_element_pos(f, archetype = archetype)
+    if( by == 1L ){
+        x <- x[idx, ]
+    } else{
+        x <- x[ , idx]
+    }
+    
+    # Add assays back to TreeSE
     assays(x, withDimnames = FALSE) <- assays
-    # Change rownames to group names
-    rownames(x) <- rownames(assays[[1]])
-    x
+    # Change row/colnames. Currently, they have same names as in original data
+    # but just certain rows. Change them to represent groups
+    x <- rownames_ass_FUN(x, rownames_FUN(assays[[1]]))
+    return(x)
 }
 
-#' @importFrom scuttle sumCountsAcrossFeatures
+# This function works similarly to scuttle::sumCountsAcrossFeatures but this
+# excludes NAs from the data. The scuttle function cannot handle NAs.
+#' @importFrom DelayedArray DelayedArray type rowsum
+.sum_counts_accross_features_na <- function(x, average, ids, ...){
+    # Which cell is not NA?
+    is_not_na <- !is.na(x)
+    type(is_not_na) <- "integer"
+    # Aggregate data to certain groups
+    x <- rowsum(x, ids, na.rm = TRUE)
+    # Calculate average if specified
+    if( average ){
+        x <- x/rowsum(is_not_na, ids)
+    }
+    return(x)
+}
+
+# This functions checks if assay has negative or binary values. It does not
+# make sense to sum them, so we give warning to user.
 .check_assays_for_merge <- function(assay.type, assay){
     # Check if assays include binary or negative values
     if( all(assay == 0 | assay == 1) ){
-        warning("'",assay.type,"'", " includes binary values.",
+        warning("'", assay.type, "'", " includes binary values.",
                 "\nAgglomeration of it might lead to meaningless values.",
                 "\nCheck the assay, and consider doing transformation again",
                 "manually with agglomerated data.",
                 call. = FALSE)
     }
     if( !all( assay >= 0 | is.na(assay) ) ){
-        warning("'",assay.type,"'", " includes negative values.",
+        warning("'", assay.type, "'", " includes negative values.",
                 "\nAgglomeration of it might lead to meaningless values.",
                 "\nCheck the assay, and consider doing transformation again",
                 "manually with agglomerated data.",
                 call. = FALSE)
     }
-}
-
-#' @importFrom S4Vectors SimpleList
-#' @importFrom scuttle summarizeAssayByGroup
-.merge_cols <- function(x, f, archetype = 1L, ...){
-    # input check
-    if( .is_a_string(f) && f %in% colnames(colData(x)) ){
-      f <- colData(x)[[ f ]]
-    }
-    f <- .norm_f(ncol(x), f, "columns", ...)
-    
-    if(length(levels(f)) == ncol(x)){
-        return(x)
-    }
-    archetype <- .norm_archetype(f, archetype)
-    # merge col data
-    element_pos <- .get_element_pos(f, archetype = archetype)
-    col_data <- colData(x)[element_pos,,drop=FALSE]
-    # merge assays
-    assays <- assays(x)
-    mapply(.check_assays_for_merge, names(assays), assays)
-    FUN <- function(mat, ...){
-        temp <- scuttle::summarizeAssayByGroup(mat,
-                                                statistics = "sum",
-                                                ...)
-        # "sum" includes agglomerated (summed up) data
-        mat <- assay(temp, "sum")
-        return(mat)
-    }
-    assays <- S4Vectors::SimpleList(lapply(assays,
-                                            FUN = FUN,
-                                            ids = f,
-                                            subset.row = NULL,
-                                            subset.col = NULL,
-                                            ...))
-    names(assays) <- names(assays(x))
-    # merge to result
-    x <- x[,.get_element_pos(f, archetype = archetype)]
-    assays(x, withDimnames = FALSE) <- assays
-    # Change colnames to group names
-    colnames(x) <- colnames(assays[[1]])
-    x
+    return(assay)
 }
 
 #' @importFrom Biostrings DNAStringSetList
@@ -203,7 +224,7 @@
         refSeq <- referenceSeq(x)
     }
     #
-    x <- .merge_rows(x, f, archetype = 1L, ...)
+    x <- .merge_rows_or_cols(x, f, by = 1L, archetype = 1L, ...)
     # optionally merge rowTree
     if( update.tree ){
         x <- .agglomerate_trees(x, 1, ...)
@@ -221,7 +242,7 @@
         stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
     }
     #
-    x <- .merge_cols(x, f, archetype = 1L, ...)
+    x <- .merge_rows_or_cols(x, f, by = 2L, archetype = 1L, ...)
     # optionally merge colTree
     if( update.tree ){
         x <- .agglomerate_trees(x, 2, ...)
diff --git a/R/utils.R b/R/utils.R
index fd7c52b3d..dee2dd391 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -521,13 +521,12 @@
 
 ################################################################################
 # internal wrappers for agglomerateByRank/agglomerateByVariable
-.merge_features <- function(x, merge.by, ...) {
+.merge_features <- function(x, merge.by = rank, rank = NULL, ...) {
     # Check if merge.by parameter belongs to taxonomyRanks
-    if (is.character(merge.by) && length(merge.by) == 1 &&
-        merge.by %in% taxonomyRanks(x)) {
+    if( .is_a_string(merge.by) && merge.by %in% taxonomyRanks(x) ){
         # Merge using agglomerateByRank
         x <- agglomerateByRank(x, rank = merge.by, ...)
-    } else {
+    } else if( !is.null(merge.by) ){
         # Merge using agglomerateByVariable
         x <- agglomerateByVariable(x, by = "rows", group = merge.by, ...)
     }
diff --git a/man/agglomerate-methods.Rd b/man/agglomerate-methods.Rd
index 462928742..78e3f47db 100644
--- a/man/agglomerate-methods.Rd
+++ b/man/agglomerate-methods.Rd
@@ -3,12 +3,12 @@
 \name{agglomerate-methods}
 \alias{agglomerate-methods}
 \alias{agglomerateByRank}
-\alias{agglomerateByVariable}
+\alias{agglomerateByRank,TreeSummarizedExperiment-method}
+\alias{agglomerateByRank,SingleCellExperiment-method}
 \alias{agglomerateByRank,SummarizedExperiment-method}
-\alias{agglomerateByVariable,SummarizedExperiment-method}
+\alias{agglomerateByVariable}
 \alias{agglomerateByVariable,TreeSummarizedExperiment-method}
-\alias{agglomerateByRank,SingleCellExperiment-method}
-\alias{agglomerateByRank,TreeSummarizedExperiment-method}
+\alias{agglomerateByVariable,SummarizedExperiment-method}
 \alias{agglomerateByRanks}
 \alias{agglomerateByRanks,SummarizedExperiment-method}
 \alias{agglomerateByRanks,SingleCellExperiment-method}
@@ -17,21 +17,37 @@
 \alias{unsplitByRanks}
 \alias{unsplitByRanks,SingleCellExperiment-method}
 \alias{unsplitByRanks,TreeSummarizedExperiment-method}
-\title{Agglomerate or merge data using taxonomic information}
+\title{Agglomerate data using taxonomic information or other grouping}
 \usage{
 agglomerateByRank(x, ...)
 
-agglomerateByVariable(x, ...)
+\S4method{agglomerateByRank}{TreeSummarizedExperiment}(
+  x,
+  rank = taxonomyRanks(x)[1],
+  update.tree = agglomerateTree,
+  agglomerate.tree = agglomerateTree,
+  agglomerateTree = FALSE,
+  ...
+)
+
+\S4method{agglomerateByRank}{SingleCellExperiment}(
+  x,
+  rank = taxonomyRanks(x)[1],
+  altexp = NULL,
+  altexp.rm = strip_altexp,
+  strip_altexp = TRUE,
+  ...
+)
 
 \S4method{agglomerateByRank}{SummarizedExperiment}(
   x,
   rank = taxonomyRanks(x)[1],
-  na.rm = TRUE,
+  empty.rm = TRUE,
   empty.fields = c(NA, "", " ", "\\t", "-", "_"),
   ...
 )
 
-\S4method{agglomerateByVariable}{SummarizedExperiment}(x, by, group = f, f, ...)
+agglomerateByVariable(x, ...)
 
 \S4method{agglomerateByVariable}{TreeSummarizedExperiment}(
   x,
@@ -43,21 +59,7 @@ agglomerateByVariable(x, ...)
   ...
 )
 
-\S4method{agglomerateByRank}{SingleCellExperiment}(
-  x,
-  ...,
-  altexp = NULL,
-  altexp.rm = strip_altexp,
-  strip_altexp = TRUE
-)
-
-\S4method{agglomerateByRank}{TreeSummarizedExperiment}(
-  x,
-  ...,
-  update.tree = agglomerateTree,
-  agglomerate.tree = agglomerateTree,
-  agglomerateTree = FALSE
-)
+\S4method{agglomerateByVariable}{SummarizedExperiment}(x, by, group = f, f, ...)
 
 agglomerateByRanks(x, ...)
 
@@ -112,11 +114,29 @@ unsplitByRanks(x, ...)
 \code{SummarizedExperiment} objects and other functions.
 See \code{\link[=agglomerate-methods]{agglomerateByRank}} for more details.}
 
-\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a value of
-\code{taxonomyRanks()} function.}
+\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a
+value of \code{taxonomyRanks()} function.}
+
+\item{update.tree}{\code{Logical scalar}. Should
+\code{rowTree()} also be merged? (Default: \code{FALSE})}
+
+\item{agglomerate.tree}{Deprecated. Use \code{update.tree} instead.}
+
+\item{agglomerateTree}{Deprecated. Use \code{update.tree} instead.}
+
+\item{altexp}{\code{Character scalar} or \code{integer scalar}.
+Specifies an alternative experiment containing the input data.}
+
+\item{altexp.rm}{\code{Logical scalar}. Should alternative
+experiments be removed prior to agglomeration? This prevents too many
+nested alternative experiments by default. (Default:
+\code{TRUE})}
+
+\item{strip_altexp}{Deprecated. Use \code{altexp.rm} instead.}
 
-\item{na.rm}{\code{Logical scalar}. Should NA values be omitted when calculating
-prevalence? (Default: \code{TRUE})}
+\item{empty.rm}{\code{Logical scalar}. Defines whether rows including
+\code{empty.fields} in specified \code{rank} will be excluded.
+(Default: \code{TRUE})}
 
 \item{empty.fields}{\code{Character vector}. Defines which values should be
 regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be
@@ -136,28 +156,14 @@ returned unchanged.}
 
 \item{f}{Deprecated. Use \code{group} instead.}
 
-\item{update.tree}{\code{Logical scalar}. Should
-\code{rowTree()} also be merged? (Default: \code{FALSE})}
-
 \item{mergeTree}{Deprecated. Use \code{update.tree} instead.}
 
-\item{altexp}{\code{Character scalar} or \code{integer scalar}.
-Specifies an alternative experiment containing the input data.}
-
-\item{altexp.rm}{\code{Logical scalar}. Should alternative
-experiments be removed prior to agglomeration? This prevents too many
-nested alternative experiments by default. (Default:
-\code{TRUE})}
-
-\item{strip_altexp}{Deprecated. Use \code{altexp.rm} instead.}
-
-\item{agglomerate.tree}{Deprecated. Use \code{update.tree} instead.}
-
-\item{agglomerateTree}{Deprecated. Use \code{update.tree} instead.}
-
 \item{ranks}{\code{Character vector}. Defines taxonomic ranks. Must all be values
 of \code{taxonomyRanks()} function.}
 
+\item{na.rm}{\code{Logical scalar}. Should NA values be omitted?
+(Default: \code{TRUE})}
+
 \item{as.list}{\code{Logical scalar}. Should the list of
 \code{SummarizedExperiment} objects be returned by the function
 \code{agglomerateByRanks} as a SimpleList or stored in altExps?
@@ -194,14 +200,6 @@ and any existing \code{rowTree} is dropped as well, since existing
 Agglomeration functions can be used to sum-up data based on specific criteria
 such as taxonomic ranks, variables or prevalence.
 
-\code{agglomerateByRanks} takes a \code{SummarizedExperiment}, splits it along the
-taxonomic ranks, aggregates the data per rank, converts the input to a
-\code{SingleCellExperiment} objects and stores the aggregated data as
-alternative experiments. \code{unsplitByRanks} takes these alternative
-experiments and flattens them again into a single
-\code{SummarizedExperiment}.
-}
-\details{
 \code{agglomerateByRank} can be used to sum up data based on associations
 with certain taxonomic ranks, as defined in \code{rowData}. Only available
 \code{\link{taxonomyRanks}} can be used.
@@ -216,13 +214,18 @@ retained as defined by \code{archetype}.
 agglomerated, i.e. summed up. If the assay contains values other than counts
 or absolute values, this can lead to meaningless values being produced.
 
+\code{agglomerateByRanks} takes a \code{SummarizedExperiment}, splits it along the
+taxonomic ranks, aggregates the data per rank, converts the input to a
+\code{SingleCellExperiment} objects and stores the aggregated data as
+alternative experiments. \code{unsplitByRanks} takes these alternative
+experiments and flattens them again into a single
+\code{SummarizedExperiment}.
+}
+\details{
 Agglomeration sums up the values of assays at the specified taxonomic level.
-With
-certain assays, e.g. those that include binary or negative values, this
-summing
-can produce meaningless values. In those cases, consider performing
-agglomeration
-first, and then applying the transformation afterwards.
+With certain assays, e.g. those that include binary or negative values, this
+summing can produce meaningless values. In those cases, consider performing
+agglomeration first, and then applying the transformation afterwards.
 
 \code{agglomerateByVariable} works similarly to
 \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}}.
@@ -277,9 +280,9 @@ tse <- transformAssay(GlobalPatterns, method = "pa")
 tse <- agglomerateByRank(tse, rank = "Genus")
 tse <- transformAssay(tse, method = "pa")
 
-# removing empty labels by setting na.rm = TRUE
+# Removing empty labels by setting empty.rm = TRUE
 sum(is.na(rowData(GlobalPatterns)$Family))
-x3 <- agglomerateByRank(GlobalPatterns, rank="Family", na.rm = TRUE)
+x3 <- agglomerateByRank(GlobalPatterns, rank="Family", empty.rm = TRUE)
 nrow(x3) # different from x2
 
 # Because all the rownames are from the same rank, rownames do not include
@@ -291,20 +294,19 @@ rownames(x3) <- getTaxonomyLabels(x3, with.rank = TRUE)
 print(rownames(x3[1:3,]))
 
 # use 'empty.ranks.rm' to remove columns that include only NAs
-x4 <- agglomerateByRank(GlobalPatterns, rank="Phylum",
-                        empty.ranks.rm = TRUE)
+x4 <- agglomerateByRank(
+    GlobalPatterns, rank="Phylum", empty.ranks.rm = TRUE)
 head(rowData(x4))
 
-# If the assay contains NAs, you might want to consider replacing them,
+# If the assay contains NAs, you might want to specify na.rm=TRUE,
 # since summing-up NAs lead to NA
 x5 <- GlobalPatterns
 # Replace first value with NA
 assay(x5)[1,1] <- NA
 x6 <- agglomerateByRank(x5, "Kingdom")
 head( assay(x6) )
-# Replace NAs with 0. This is justified when we are summing-up counts.
-assay(x5)[ is.na(assay(x5)) ] <- 0
-x6 <- agglomerateByRank(x5, "Kingdom")
+# Use na.rm=TRUE
+x6 <- agglomerateByRank(x5, "Kingdom", na.rm = TRUE)
 head( assay(x6) )
 
 ## Look at enterotype dataset...
@@ -318,18 +320,19 @@ taxonomyRanks(enterotype)
 data(esophagus)
 esophagus
 plot(rowTree(esophagus))
-# get a factor for merging
+# Get a factor for merging
 f <- factor(regmatches(rownames(esophagus),
-                       regexpr("^[0-9]*_[0-9]*",rownames(esophagus))))
-merged <- agglomerateByVariable(esophagus, by = "rows", f,
-                                update.tree = TRUE)
+    regexpr("^[0-9]*_[0-9]*",rownames(esophagus))))
+merged <- agglomerateByVariable(
+    esophagus, by = "rows", f, update.tree = TRUE)
 plot(rowTree(merged))
 #
 data(GlobalPatterns)
 GlobalPatterns
-merged <- agglomerateByVariable(GlobalPatterns, by = "cols",
-                                colData(GlobalPatterns)$SampleType)
+merged <- agglomerateByVariable(
+    GlobalPatterns, by = "cols", colData(GlobalPatterns)$SampleType)
 merged
+
 data(GlobalPatterns)
 # print the available taxonomic ranks
 taxonomyRanks(GlobalPatterns)
diff --git a/man/agglomerateByPrevalence.Rd b/man/agglomerateByPrevalence.Rd
index ec3247212..26495b8af 100644
--- a/man/agglomerateByPrevalence.Rd
+++ b/man/agglomerateByPrevalence.Rd
@@ -32,8 +32,8 @@ agglomerateByPrevalence(x, ...)
 \code{SummarizedExperiment} objects and other functions.
 See \code{\link[=agglomerate-methods]{agglomerateByRank}} for more details.}
 
-\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a value of
-\code{taxonomyRanks()} function.}
+\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a
+value of \code{taxonomyRanks()} function.}
 
 \item{other.name}{\code{Character scalar}. Used as the label for the
 summary of non-prevalent taxa. (default: \code{"Other"})}
diff --git a/man/getPrevalence.Rd b/man/getPrevalence.Rd
index 370cb3a6d..a7414df2f 100644
--- a/man/getPrevalence.Rd
+++ b/man/getPrevalence.Rd
@@ -115,8 +115,6 @@ getPrevalentAbundance(
 \item If \code{!is.null(rank)} arguments are passed on to
 \code{\link[=agglomerate-methods]{agglomerateByRank}}. See
 \code{\link[=agglomerate-methods]{?agglomerateByRank}} for more details.
-Note that you can specify whether to remove empty ranks with
-\code{agg.na.rm} instead of \code{na.rm}. (default: \code{FALSE})
 
 \item for \code{getPrevalent}, \code{getRare}, \code{subsetByPrevalent}
 and \code{subsetByRare} additional parameters passed to
@@ -126,8 +124,8 @@ and \code{subsetByRare} additional parameters passed to
 \code{getPrevalent}
 }}
 
-\item{detection}{\code{Numeric scalar}. Detection threshold for absence/presence.
-If \code{as_relative = FALSE},
+\item{detection}{\code{Numeric scalar}. Detection threshold for
+absence/presence. If \code{as_relative = FALSE},
 it sets the counts threshold for a taxon to be considered present.
 If \code{as_relative = TRUE}, it sets the relative abundance threshold
 for a taxon to be considered present. (Default: \code{0})}
@@ -140,16 +138,16 @@ detection and prevalence cutoffs be included? (Default: \code{FALSE})}
 \item{sort}{\code{Logical scalar}. Should the result be sorted by prevalence?
 (Default: \code{FALSE})}
 
-\item{na.rm}{\code{Logical scalar}. Should NA values be omitted when calculating
-prevalence? (Default: \code{TRUE})}
+\item{na.rm}{\code{Logical scalar}. Should NA values be omitted?
+(Default: \code{TRUE})}
 
 \item{assay.type}{\code{Character scalar}. Specifies which assay to use for
 calculation. (Default: \code{"counts"})}
 
 \item{assay_name}{Deprecated. Use \code{assay.type} instead.}
 
-\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a value of
-\code{taxonomyRanks()} function.}
+\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a
+value of \code{taxonomyRanks()} function.}
 
 \item{prevalence}{Prevalence threshold (in 0 to 1). The
 required prevalence is strictly greater by default. To include the
diff --git a/man/summaries.Rd b/man/summaries.Rd
index 5919a0436..7874cd78a 100644
--- a/man/summaries.Rd
+++ b/man/summaries.Rd
@@ -55,8 +55,8 @@ assay used in calculation. (Default: \code{"counts"})}
 
 \item{assay_name}{Deprecated. Use \code{assay.type} instead.}
 
-\item{na.rm}{\code{Logical scalar}. Should NA values be omitted when calculating
-prevalence? (Default: \code{TRUE})}
+\item{na.rm}{\code{Logical scalar}. Should NA values be omitted?
+(Default: \code{TRUE})}
 
 \item{...}{Additional arguments passed on to \code{agglomerateByRank()} when
 \code{rank} is specified for \code{summarizeDominance}.}
diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd
index f125981fa..3617c6e2b 100644
--- a/man/taxonomy-methods.Rd
+++ b/man/taxonomy-methods.Rd
@@ -71,8 +71,8 @@ IdTaxaToDataFrame(from)
 \arguments{
 \item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.}
 
-\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a value of
-\code{taxonomyRanks()} function.}
+\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a
+value of \code{taxonomyRanks()} function.}
 
 \item{empty.fields}{\code{Character vector}. Defines which values should be
 regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be
diff --git a/tests/testthat/test-2merge.R b/tests/testthat/test-2merge.R
index 7189d4ec3..48e49139b 100644
--- a/tests/testthat/test-2merge.R
+++ b/tests/testthat/test-2merge.R
@@ -45,20 +45,20 @@ test_that("merge", {
     actual <- mia:::.get_element_pos(f, archetype = c(2,1))
     expect_equal(actual,c(a = 2, b = 4))
 
-    # .merge_rows
+    # .merge_rows_or_cols
     mat <- matrix(1:60, nrow = 6)
     gr <- GRanges("chr1",rep("1-6",6))
     df <- DataFrame(n = c(1:6))
     mcols(gr) <- df
     grl <- splitAsList(gr,1:6)
-    expect_error(mia:::.merge_rows(),
+    expect_error(mia:::.merge_rows_or_cols(by = 1L),
                  'argument "f" is missing')
     x <- SummarizedExperiment(assays = list(mat = mat))
     xr <- SummarizedExperiment(assays = list(mat = mat),
                                rowRanges = gr)
     xrl <- SummarizedExperiment(assays = list(mat = mat),
                                 rowRanges = unname(grl))
-    expect_error(mia:::.merge_rows(x),
+    expect_error(mia:::.merge_rows_or_cols(x, by = 1L),
                  'argument "f" is missing')
     FUN_check_x <- function(x,archetype=1){
         actual <- agglomerateByVariable(x, by = "rows", f, archetype)
@@ -84,9 +84,124 @@ test_that("merge", {
     }
     lapply(list(xtse),FUN_check_x)
     lapply(list(xtse),FUN_check_x,archetype=2)
+    
+    # Check that average works as expected. average parameter controls whether
+    # to calculate mean or sum. Check that mean is correctly calculated when
+    # there are NAs
+    #
+    # Calculate average and sum for each row group
+    summary_FUN_rows <- function(x, col.var){
+        # Loop through groups and calculate statistics
+        groups <- unique(rowData(x)[[col.var]]) |> sort()
+        res <- lapply(groups, function(group) {
+            mat_sub <- assay(x[rowData(x)[[col.var]] == group, ])
+            list(
+                sum = colSums(mat_sub, na.rm = FALSE),
+                sum_na = colSums(mat_sub, na.rm = TRUE),
+                mean = colMeans(mat_sub, na.rm = FALSE),
+                mean_na = colMeans(mat_sub, na.rm = TRUE)
+            )
+        })
+        # Combine results for each statistic across groups
+        res <- lapply(c("sum", "sum_na", "mean", "mean_na"), function(stat) {
+            do.call(rbind, lapply(res, `[[`, stat))
+        })
+        names(res) <- c("sum", "sum_na", "mean", "mean_na")
+        return(res)
+    }
+    # Generate data
+    tse <- mockSCE()
+    rowData(tse)[["group"]] <- sample(LETTERS, nrow(tse), replace = TRUE)
+    colData(tse)[["group"]] <- sample(LETTERS, ncol(tse), replace=TRUE)
+    # Create a data with NAs
+    n_value <- nrow(tse)*ncol(tse)
+    assay(tse)[c(1, 5, 3, 6)] <- NA
+    # Test with NAs
+    res_sum <- agglomerateByVariable(tse, by = 1, group = "group", average = FALSE, na.rm = FALSE)
+    res_sum_na <- agglomerateByVariable(tse, by = 1, group = "group", average = FALSE, na.rm = TRUE)
+    res_mean <- agglomerateByVariable(tse, by = 1, group = "group", average = TRUE, na.rm = FALSE)
+    res_mean_na <- agglomerateByVariable(tse, by = 1, group = "group", average = TRUE, na.rm = TRUE)
+    ref <- summary_FUN_rows(tse, "group")
+    #
+    expect_equal(assay(res_sum), ref[["sum"]], check.attributes = FALSE)
+    expect_equal(assay(res_sum_na), ref[["sum_na"]], check.attributes = FALSE)
+    expect_equal(assay(res_mean), ref[["mean"]], check.attributes = FALSE)
+    expect_equal(assay(res_mean_na), ref[["mean_na"]], check.attributes = FALSE)
+    # Calculate average and sum for each column group
+    summary_FUN_cols <- function(x, col.var){
+        # Loop through groups and calculate statistics
+        groups <- unique(colData(x)[[col.var]]) |> sort()
+        res <- lapply(groups, function(group){
+            mat_sub <- assay(x[, colData(x)[[col.var]] == group ])
+            list(
+                sum = rowSums(mat_sub, na.rm = FALSE),
+                sum_na = rowSums(mat_sub, na.rm = TRUE),
+                mean = rowMeans(mat_sub, na.rm = FALSE),
+                mean_na = rowMeans(mat_sub, na.rm = TRUE)
+            )
+        })
+        # Combine results for each statistic across groups
+        res <- lapply(c("sum", "sum_na", "mean", "mean_na"), function(stat){
+            do.call(cbind, lapply(res, `[[`, stat))
+        })
+        names(res) <- c("sum", "sum_na", "mean", "mean_na")
+        return(res)
+    }
+    # Test with NAs
+    res_sum <- agglomerateByVariable(tse, by = 2, group = "group", average = FALSE, na.rm = FALSE)
+    res_sum_na <- agglomerateByVariable(tse, by = 2, group = "group", average = FALSE, na.rm = TRUE)
+    res_mean <- agglomerateByVariable(tse, by = 2, group = "group", average = TRUE, na.rm = FALSE)
+    res_mean_na <- agglomerateByVariable(tse, by = 2, group = "group", average = TRUE, na.rm = TRUE)
+    ref <- summary_FUN_cols(tse, "group")
+    #
+    expect_equal(assay(res_sum), ref[["sum"]], check.attributes = FALSE)
+    expect_equal(assay(res_sum_na), ref[["sum_na"]], check.attributes = FALSE)
+    expect_equal(assay(res_mean), ref[["mean"]], check.attributes = FALSE)
+    expect_equal(assay(res_mean_na), ref[["mean_na"]], check.attributes = FALSE)
+    
+    # Check that agglomerateByRank and agglomerateByVariable work correctly
+    # with na.rm
+    data(GlobalPatterns, package="mia")
+    tse <- GlobalPatterns
+    col_idx <- sample(seq_len(ncol(tse)), 1)
+    row_idx <- sample(seq_len(nrow(tse)), 1)
+    tse_mod <- tse
+    assay(tse_mod)[row_idx, col_idx] <- NA
+    group <- colData(tse)[col_idx, "SampleType"]
+    # na.rm = FALSE
+    tse_sub <- agglomerateByVariable(tse_mod, by = "cols", group = "SampleType", na.rm = FALSE)
+    test_mat <- tse_sub |> assay()
+    # na.rm = TRUE
+    tse_sub <- agglomerateByVariable(tse_mod, by = "cols", group = "SampleType", na.rm = TRUE)
+    ref_mat <- tse_sub |> assay()
+    group_idx <- which(colnames(tse_sub) == group)
+    expect_true( is.na(test_mat[row_idx, group_idx]) )
+    expect_true( !is.na(ref_mat[row_idx, group_idx]) )
+    expect_equal(test_mat[-row_idx, -group_idx], ref_mat[-row_idx, -group_idx])
+    #
+    # na.rm = FALSE
+    group <- rowData(tse)[row_idx, "Kingdom"]
+    tse_sub <- agglomerateByVariable(tse_mod, by = "rows", group = "Kingdom", na.rm = FALSE)
+    test_mat <- tse_sub |> assay()
+    # na.rm = TRUE
+    tse_sub <- agglomerateByVariable(tse_mod, by = "rows", group = "Kingdom", na.rm = TRUE)
+    ref_mat <- tse_sub |> assay()
+    group_idx <- which(rownames(tse_sub) == group)
+    expect_true( is.na(test_mat[group_idx, col_idx]) )
+    expect_true( !is.na(ref_mat[group_idx, col_idx]) )
+    expect_equal(test_mat[-group_idx, -col_idx], ref_mat[-group_idx, -col_idx])
+    #
+    # na.rm = FALSE
+    tse_sub <- agglomerateByRank(tse_mod, rank = "Kingdom", na.rm = FALSE)
+    test_mat2 <- tse_sub |> assay()
+    # na.rm = TRUE
+    tse_sub <- agglomerateByRank(tse_mod, rank = "Kingdom", na.rm = TRUE)
+    ref_mat2 <- tse_sub |> assay()
+    expect_equal(test_mat, test_mat2)
+    expect_equal(ref_mat, ref_mat2)
+    
     # Check multiple rowTrees
     data(esophagus, package="mia")
-    data(GlobalPatterns, package="mia")
     # Add arbitrary groups
     rowData(esophagus)$group <- c(rep(c("A", "B", "C"), each = nrow(esophagus)/3),
                                   rep("A", nrow(esophagus)-round(nrow(esophagus)/3)*3) )
diff --git a/tests/testthat/test-3agglomerate.R b/tests/testthat/test-3agglomerate.R
index cf029ab06..3c03151a6 100644
--- a/tests/testthat/test-3agglomerate.R
+++ b/tests/testthat/test-3agglomerate.R
@@ -27,27 +27,27 @@ test_that("agglomerate", {
     expect_equal(assays(actual)$mat[2,1],c(b = 36))
     expect_equal(assays(actual)$mat[3,1],c(c = 24))
     #
-    expect_error(agglomerateByRank(xtse,"",na.rm=FALSE),
+    expect_error(agglomerateByRank(xtse,"",empty.rm=FALSE),
                  "'rank' must be a non-empty single character value")
-    expect_error(agglomerateByRank(xtse,"Family",na.rm=""),
-                 "'na.rm' must be TRUE or FALSE")
+    expect_error(agglomerateByRank(xtse,"Family",empty.rm=""),
+                 "'empty.rm' must be TRUE or FALSE")
     expect_error(
-        agglomerateByRank(xtse,"Family",na.rm=FALSE,update.tree=""),
+        agglomerateByRank(xtse,"Family",empty.rm=FALSE,update.tree=""),
         "'update.tree' must be TRUE or FALSE")
     xtse2 <- xtse
     rowData(xtse2) <- NULL
-    expect_error(agglomerateByRank(xtse2,"Family",na.rm=FALSE),
+    expect_error(agglomerateByRank(xtse2,"Family",empty.rm=FALSE),
                  "taxonomyData needs to be populated")
     #
-    actual <- agglomerateByRank(xtse,"Family",na.rm=FALSE)
+    actual <- agglomerateByRank(xtse,"Family",empty.rm=FALSE)
     expect_equivalent(rowData(actual),rowData(actual_family))
-    actual <- agglomerateByRank(xtse,"Phylum",na.rm=FALSE)
+    actual <- agglomerateByRank(xtse,"Phylum",empty.rm=FALSE)
     expect_equivalent(rowData(actual),rowData(actual_phylum))
     #
-    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, na.rm = TRUE)
+    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rm = TRUE)
     expect_equal(dim(actual),c(6,10))
     expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h"))
-    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, na.rm = FALSE) # the default
+    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rm = FALSE)
     expect_equal(dim(actual),c(8,10))
     expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h",NA,NA))
     actual <- agglomerateByRank(xtse,"Phylum")
@@ -63,22 +63,22 @@ test_that("agglomerate", {
     data(enterotype, package="mia")
     expect_equal(length(unique(rowData(enterotype)[,"Genus"])),
                  nrow(agglomerateByRank(enterotype,"Genus", ignore.taxonomy = FALSE, 
-                 na.rm = FALSE)))
+                 empty.rm = FALSE)))
 
     # agglomeration in all its forms
     data(GlobalPatterns, package="mia")
     se <- GlobalPatterns
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, na.rm = FALSE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label),
                  length(rowTree(se)$tip.label))
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, na.rm = FALSE, update.tree = TRUE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE, update.tree = TRUE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label), 603)
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, na.rm = FALSE, update.tree = TRUE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE, update.tree = TRUE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label), nrow(actual))
     # Test that warning occurs when assay contian binary or negative values
@@ -92,30 +92,30 @@ test_that("agglomerate", {
     data(GlobalPatterns, package="mia")
     tse <- GlobalPatterns
 
-    # Check that na.rm works
+    # Check that empty.rm works
     # Get all phyla
     all_phyla <- unique( rowData(tse)$Phylum )
     
-    # When na.rm = FALSE, then phyla should also include NA --> one extra row
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", na.rm = FALSE)
-    test1 <- agglomerateByRank(tse, rank = "Phylum", na.rm = FALSE)
+    # When empty.rm = FALSE, then phyla should also include NA --> one extra row
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", empty.rm = FALSE)
+    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rm = FALSE)
     
     # Test that dimentionality is the same for merging object by agglomerateByRank
     # and agglomerateByVariable.
     expect_equal(nrow(test0), length(all_phyla))
     expect_equal(nrow(test1), length(all_phyla))
     
-    # When na.rm = TRUE, there should be as many rows as there are non-NA phyla
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", na.rm = TRUE)
-    test1 <- agglomerateByRank(tse, rank = "Phylum", na.rm = TRUE)
+    # When empty.rm = TRUE, there should be as many rows as there are non-NA phyla
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", empty.rm = TRUE)
+    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rm = TRUE)
     
-    # Test that dimentionality is the same when NA values are removed.
+    # Test that dimensionality is the same when NA values are removed.
     expect_equal(nrow(test0), length( all_phyla[!is.na(all_phyla)] ))
     expect_equal(nrow(test1), length( all_phyla[!is.na(all_phyla)] ))
     
     # Check that there are more taxa when agglomeration is to "Species" level
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Species", na.rm = FALSE)
-    test1 <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE)
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Species", empty.rm = FALSE)
+    test1 <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
     expect_equal(nrow(test0), 945)
     expect_equal(nrow(test1), 2307)
     
@@ -143,9 +143,9 @@ test_that("agglomerate", {
     expect_equal(rd1[, cols], rd2[, cols])
     expect_true( ncol(rd1) > ncol(rd2) )
     # Test that make.unique work
-    uniq <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE)
+    uniq <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
     not_uniq <- agglomerateByRank(
-        tse, rank = "Species", make.unique = FALSE, na.rm = FALSE)
+        tse, rank = "Species", make.unique = FALSE, empty.rm = FALSE)
     expect_true( !any( duplicated(rownames(uniq)) ) )
     expect_true( any( duplicated(rownames(not_uniq)) ) )
     
diff --git a/tests/testthat/test-5dominantTaxa.R b/tests/testthat/test-5dominantTaxa.R
index f415cd6e9..ee47df59b 100644
--- a/tests/testthat/test-5dominantTaxa.R
+++ b/tests/testthat/test-5dominantTaxa.R
@@ -21,8 +21,6 @@ test_that("getDominant", {
         expect_equal(getDominant(tse)[1:15], exp.vals.one)
 
         # Test at taxonomic level for values are passed to agglomerateRanks
-        getDominant(tse, rank = "Genus", na.rm = FALSE)
-
         exp.vals.two <- c("Genus:CandidatusSolibacter", "Genus:MC18",
                           "Class:Chloracidobacteria", "Genus:Bacteroides",
                           "Genus:Bacteroides", "Genus:Streptococcus",
@@ -31,21 +29,17 @@ test_that("getDominant", {
                           "Genus:Dolichospermum", "Family:ACK-M1",
                           "Order:Stramenopiles","Order:Stramenopiles","Order:Stramenopiles")
         names(exp.vals.two) <- exp.names.one
-        expect_equal(getDominant(tse,
-                                           rank = "Genus",
-                                           ignore.taxonomy = FALSE,
-                                           na.rm = FALSE)[1:15],
-                     exp.vals.two)
+        expect_equal(
+            getDominant(tse, rank = "Genus", ignore.taxonomy = FALSE, empty.rm = FALSE)[1:15],
+            exp.vals.two)
 
         # Check if DominantTaxa is added to coldata
-        expect_equal(colData(addDominant(tse,
-                                            name="dominant"))$dominant[1:15],
-                     exp.vals.one)
-        expect_equal(colData(addDominant(tse,
-                                            rank = "Genus",
-                                            na.rm = FALSE,
-                                            name="dominant"))$dominant[1:15],
-                     exp.vals.two)
+        expect_equal(
+            colData(addDominant(tse, name="dominant"))$dominant[1:15],
+            exp.vals.one)
+        expect_equal(
+            colData(addDominant(tse,rank = "Genus", empty.rm = FALSE, name="dominant"))$dominant[1:15],
+            exp.vals.two)
         
         # Check if DominantTaxa is added when factor is passed
         exp.vals.three <- c(
@@ -57,10 +51,9 @@ test_that("getDominant", {
         names(exp.vals.three) <- exp.names.one
         test <- tse
         rowData(test)$group <- rowData(tse)$Genus
-        expect_equal(colData(addDominant(test, rank = "group", na.rm = TRUE,
-                                         name="dominant"))$dominant[1:15],
-                     exp.vals.three)
-        
+        expect_equal(
+            colData(addDominant(test, rank = "group", empty.rm = TRUE, name = "dominant"))$dominant[1:15],
+            exp.vals.three)
         
         tse1 <- tse
         # Now data contains 2 dominant taxa in one sample
diff --git a/tests/testthat/test-5prevalence.R b/tests/testthat/test-5prevalence.R
index a77ea05d2..bec4a4185 100644
--- a/tests/testthat/test-5prevalence.R
+++ b/tests/testthat/test-5prevalence.R
@@ -106,16 +106,16 @@ test_that("getPrevalence", {
     remove <- c(15, 200)
     assay(tse, "counts")[remove, ] <- NA
     # Check that agglomeration works
-    tse_agg <- agglomerateByRank(tse, ignore.taxonomy = FALSE, na.rm = FALSE, rank = rank)
+    tse_agg <- agglomerateByRank(tse, ignore.taxonomy = FALSE, empty.rm = FALSE, rank = rank)
     expect_warning(ref <- getPrevalence(tse_agg, na.rm = FALSE))
-    expect_warning(res <- getPrevalence(tse, rank = "Genus", agg.na.rm = FALSE))
+    expect_warning(res <- getPrevalence(tse, rank = "Genus", empty.rm = FALSE))
     expect_true( all(res == ref, na.rm = TRUE) )
     #
     tse_agg <- agglomerateByRank(
-        tse, ignore.taxonomy = FALSE, na.rm = TRUE, rank = rank)
+        tse, ignore.taxonomy = FALSE, empty.rm = TRUE, rank = rank)
     ref <- getPrevalence(tse_agg, na.rm = TRUE)
     res <- getPrevalence(
-        tse, na.rm = TRUE, rank = "Genus", agg.na.rm = TRUE)
+        tse, na.rm = TRUE, rank = "Genus", empty.rm = TRUE)
     expect_true( all(res == ref, na.rm = TRUE) )
 })
 
diff --git a/vignettes/mia.Rmd b/vignettes/mia.Rmd
index a4db08db9..da7bfd655 100644
--- a/vignettes/mia.Rmd
+++ b/vignettes/mia.Rmd
@@ -137,13 +137,13 @@ becomes very easy.
 altExp(tse, "family") <- x2
 ```
 
-Keep in mind, that if you set `na.rm = TRUE`, rows with `NA` or similar value
+Keep in mind, that if you set `empty.rm = TRUE`, rows with `NA` or similar value
 (defined via the `empty.fields` argument) will be removed. Depending on these
 settings different number of rows will be returned.
 
 ```{r}
-x1 <- agglomerateByRank(tse, rank = "Species", na.rm = TRUE)
-altExp(tse,"species") <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE)
+x1 <- agglomerateByRank(tse, rank = "Species", empty.rm = TRUE)
+altExp(tse,"species") <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
 dim(x1)
 dim(altExp(tse,"species"))
 ```