From ac07ba9dd387bb60730a5b164977cb08a024c695 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 20 Nov 2024 19:28:06 +0200 Subject: [PATCH] up --- DESCRIPTION | 5 +- NAMESPACE | 1 + NEWS | 1 + R/plotScree.R | 103 ++++++++++++++++++++------------ man/plotScree.Rd | 71 ++++++++++++++-------- tests/testthat/test-plotScree.R | 66 +++++++++----------- 6 files changed, 144 insertions(+), 103 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3987868..26ba84c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: miaViz Title: Microbiome Analysis Plotting and Visualization -Version: 1.15.2 +Version: 1.15.3 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", @@ -52,8 +52,9 @@ Imports: ggtree, methods, rlang, - scater, S4Vectors, + scales, + scater, SingleCellExperiment, stats, tibble, diff --git a/NAMESPACE b/NAMESPACE index 4101ba2..12132f4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -135,6 +135,7 @@ importFrom(mia,meltSE) importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,sym) +importFrom(scales,pretty_breaks) importFrom(scater,plotReducedDim) importFrom(scater,retrieveCellInfo) importFrom(scater,retrieveFeatureInfo) diff --git a/NEWS b/NEWS index 36e02f4..74b4a4a 100644 --- a/NEWS +++ b/NEWS @@ -36,3 +36,4 @@ Changes in version 1.13.x Changes in version 1.15.x + plotAbundance: Improved visualization of sample metadata ++ plotScree: Method for creating scree plots diff --git a/R/plotScree.R b/R/plotScree.R index 03f9734..66bb329 100644 --- a/R/plotScree.R +++ b/R/plotScree.R @@ -5,35 +5,41 @@ #' Create a scree plot #' #' @description -#' \code{plotScree} creates a scree plot or eigenvalues plot starting from a -#' TreeSummarizedExperiment object or a vector of eigenvalues. This visualization -#' shows how the eigenvalues decrease across components. +#' \code{plotScree} generates a scree plot to visualize the eigenvalues. +#' The eigenvalues can be provided either as a part of a +#' \code{TreeSummarizedExperiment} object or as a separate \code{vector}. +#' This plot illustrates the decline in eigenvalues across components, +#' helping to assess the importance of each component. #' #' @details -#' \code{plotScree} creates a scree plot or eigenvalues plot, which is useful -#' for visualizing the relative importance of components in dimensionality -#' reduction techniques like PCA, RDA, or CCA. When the input is a -#' TreeSummarizedExperiment, the function extracts eigenvalues from the specified -#' reduced dimension slot. When the input is a vector, it directly uses these -#' values as eigenvalues. +#' \code{plotScree} generates a scree plot to visualize the relative importance +#' of components in dimensionality reduction techniques such as Principal +#' Component Analysis (PCA) or Principal Coordinate Analysis (PCoA). If the +#' input is a \code{TreeSummarizedExperiment} object, the function extracts +#' eigenvalues from the specified reduced dimension slot, which requires that +#' dimensionality reduction has been performed beforehand using a dedicated +#' function. Alternatively, if the input is a \code{vector} or an +#' \code{eigenvals} object, these values are directly used as eigenvalues for +#' the plot. #' #' The plot can include a combination of barplot, points, connecting lines, #' and labels, which can be controlled using the \code{show.*} parameters. #' #' An option to show cumulative explained variance is also available by setting -#' \code{cumulative = TRUE}. +#' \code{add.cumulative = TRUE}. #' #' @return #' A \code{ggplot2} object #' #' @param x a #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} -#' or a vector of eigenvalues. +#' \code{\link[vegan:eigenvals]{eigenvals}} or a vector. +#' #' @param dimred \code{Character scalar} or \code{integer scalar}. Determines -#' the reduced dimension to plot. This is used when x is a TreeSummarizedExperiment -#' to extract the eigenvalues from \code{reducedDim(x, dimred)}. -#' @param cumulative \code{Logical scalar}. Whether to show cumulative explained -#' variance. (Default: \code{FALSE}). +#' the reduced dimension to plot. This is used when \code{x} is a +#' \code{TreeSummarizedExperiment} to extract the eigenvalues from +#' \code{reducedDim(x, dimred)}. +#' #' @param ... additional parameters for plotting #' \itemize{ #' \item \code{show.barplot}: \code{Logical scalar}. Whether to show a @@ -47,20 +53,41 @@ #' #' \item \code{show.labels}: \code{Logical scalar}. Whether to show a #' labels for each point. (Default: \code{FALSE}) +#' +#' \item \code{add.proportion}: \code{Logical scalar}. Whether to show +#' proportion of explained variance, i.e., raw eigenvalues. +#' (Default: \code{TRUE}) +#' +#' \item \code{add.cumulative}: \code{Logical scalar}. Whether to show +#' cumulative explained variance calculated from eigenvalues. +#' (Default: \code{FALSE}) +#' +#' \item \code{n}: \code{Integer scalar}. Number of eigenvalues to plot. +#' If \code{NULL}, all eigenvalues are plotted. (Default: \code{NULL}) +#' +#' \item \code{show.names}: \code{Logical scalar}. Whether to show names of +#' components in x-axis. If \code{FALSE}, the index of component is shown +#' instead of names. (Default: \code{FALSE}) +#' +#' \item \code{eig.name}: \code{Character scalar}. The name of the attribute +#' in \code{reducedDim(x, dimred)} that contains the eigenvalues. +#' (Default: \code{c("eig", "varExplained")}) #' } #' #' @examples #' #' library(miaViz) #' library(scater) +#' #' data("enterotype", package = "mia") #' tse <- enterotype #' #' # Run PCA and store results into TreeSE -#' tse <- runPCA(tse, assay.type = "counts") +#' tse <- transformAssay(tse, method = "clr", pseudocount = TRUE) +#' tse <- runPCA(tse, assay.type = "clr") #' #' # Plot scree plot -#' plotScree(tse, "PCA") +#' plotScree(tse, "PCA", add.cumulative = TRUE) #' NULL @@ -87,8 +114,8 @@ setMethod("plotScree", signature = c(x = "ANY"), is_correct <- length(x) > 0L && ((is.vector(x) && is.numeric(x)) || is(x, "eigenvals")) if( !is_correct ){ - stop("Eigenvalues must be either numeric vector or class ", - "'eigenvals'.", call. = FALSE) + stop("'x' must be a numeric vector or class 'eigenvals'.", + call. = FALSE) } # Prepare data for plotting plot_data <- .prepare_data(x, ...) @@ -101,13 +128,14 @@ setMethod("plotScree", signature = c(x = "ANY"), ################################ HELP FUNCTIONS ################################ # This function retrieves the eigenvalues from reducedDim. The ordination must -# be calculated with +# be calculated with dedicaded function in mia or scater so that the eigenvalues +# are stored in correct place. .get_eigenvalues <- function( x, dimred, eig.name = c("eig", "varExplained"), ...){ # Get reducedDim if( !((.is_a_string(dimred) && dimred %in% reducedDimNames(x)) || - (.is_an_integer(dimred) && dimred > 0 && - dimred <= length(reducedDims(x)))) ){ + (.is_an_integer(dimred) && dimred > 0 && + dimred <= length(reducedDims(x)))) ){ stop("'dimred' must specify a valid reducedDim.", call. = FALSE) } reduced_dim <- reducedDim(x, dimred) @@ -141,13 +169,13 @@ setMethod("plotScree", signature = c(x = "ANY"), } # # Create a data.frame with eigenvalues - df <- data.frame(value = x) - df[["pc"]] <- factor(rownames(df), levels = unique(rownames(df))) + df <- data.frame(y = x) + df[["x"]] <- factor(rownames(df), levels = unique(rownames(df))) df[["type"]] <- "proportion" # Calculate cumulative proportion df_cum <- df - df_cum[["value"]] <- cumsum(df_cum[["value"]]) / - sum(df_cum[["value"]], na.rm = TRUE) + df_cum[["y"]] <- cumsum(df_cum[["y"]]) / + sum(df_cum[["y"]], na.rm = TRUE) df_cum[["type"]] <- "cumulative" df <- rbind(df, df_cum) @@ -160,18 +188,19 @@ setMethod("plotScree", signature = c(x = "ANY"), } # If user has specified, take only n first eigenvalues if( !is.null(n) ){ - n <- levels(df[["pc"]])[ seq_len(n) ] - df <- df[ df[["pc"]] %in% n, ] + n <- levels(df[["x"]])[ seq_len(n) ] + df <- df[ df[["x"]] %in% n, ] } - # Replace names with integers + # Replace names with integers to keep the x-axis of plot tidier if( !show.names ){ - df[["pc"]] <- as.integer(df[["pc"]]) + df[["x"]] <- as.integer(df[["x"]]) } return(df) } # This function creates a scree plot. The input is data.frame that includes # 2 columns: one for eigenvalues and other for principal component name. +#' @importFrom scales pretty_breaks .scree_plotter <- function( df, show.points = TRUE, show.line = TRUE, show.barplot = FALSE, show.labels = FALSE, ...){ @@ -193,13 +222,13 @@ setMethod("plotScree", signature = c(x = "ANY"), # scale cumulative values into same scale as proportion. if( length(unique(df[["type"]])) > 1L && !(show.labels || show.barplot) ){ ind <- df[["type"]] == "cumulative" - df[ind, "value"] <- df[ind, "value"] * max(df[!ind, "value"]) # Scale + df[ind, "y"] <- df[ind, "y"] * max(df[!ind, "y"]) # Scale } # Create base plot p <- ggplot(df, aes( - x = pc, - y = value, + x = x, + y = y, group = type, colour = if(length(unique(.data[["type"]])) > 1L && !(show.labels || show.barplot) ) type @@ -215,7 +244,7 @@ setMethod("plotScree", signature = c(x = "ANY"), p <- p + geom_col(width = 0.5) } if( show.labels ){ - p <- p + geom_label(aes(label = round(value, 2))) + p <- p + geom_label(aes(label = round(y, 2))) } # If user wants to add barplots or labels with both cumulative and @@ -231,7 +260,7 @@ setMethod("plotScree", signature = c(x = "ANY"), p <- p + scale_y_continuous( name = "Proportion", sec.axis = sec_axis( - ~ . / max(df[["value"]]), name = "Cumulative proportion")) + ~ . / max(df[["y"]]), name = "Cumulative proportion")) } # Adjust labels in a case where either proportion or cumulative was plotted if( length(unique(df[["type"]])) == 1L ){ @@ -239,9 +268,9 @@ setMethod("plotScree", signature = c(x = "ANY"), } # Adjust the x-axis to display a subset of evenly spaced values for # improved readability - if( is.numeric(df[["pc"]]) ){ + if( is.numeric(df[["x"]]) ){ p <- p + - scale_x_continuous(breaks = scales::pretty_breaks()) + scale_x_continuous(breaks = pretty_breaks()) } # Adjust theme and remove legend p <- p + theme_classic() + diff --git a/man/plotScree.Rd b/man/plotScree.Rd index 943c46e..d68d623 100644 --- a/man/plotScree.Rd +++ b/man/plotScree.Rd @@ -10,12 +10,12 @@ plotScree(x, ...) \S4method{plotScree}{SingleCellExperiment}(x, dimred, ...) -\S4method{plotScree}{ANY}(x, cumulative = FALSE, ...) +\S4method{plotScree}{ANY}(x, ...) } \arguments{ \item{x}{a \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} -or a vector of eigenvalues.} +\code{\link[vegan:eigenvals]{eigenvals}} or a vector.} \item{...}{additional parameters for plotting \itemize{ @@ -30,53 +30,72 @@ line. (Default: \code{TRUE}) \item \code{show.labels}: \code{Logical scalar}. Whether to show a labels for each point. (Default: \code{FALSE}) + +\item \code{add.proportion}: \code{Logical scalar}. Whether to show +proportion of explained variance, i.e., raw eigenvalues. +(Default: \code{TRUE}) + +\item \code{add.cumulative}: \code{Logical scalar}. Whether to show +cumulative explained variance calculated from eigenvalues. +(Default: \code{FALSE}) + +\item \code{n}: \code{Integer scalar}. Number of eigenvalues to plot. +If \code{NULL}, all eigenvalues are plotted. (Default: \code{NULL}) + +\item \code{show.names}: \code{Logical scalar}. Whether to show names of +components in x-axis. If \code{FALSE}, the index of component is shown +instead of names. (Default: \code{FALSE}) + +\item \code{eig.name}: \code{Character scalar}. The name of the attribute +in \code{reducedDim(x, dimred)} that contains the eigenvalues. +(Default: \code{c("eig", "varExplained")}) }} \item{dimred}{\code{Character scalar} or \code{integer scalar}. Determines -the reduced dimension to plot. This is used when x is a TreeSummarizedExperiment -to extract the eigenvalues from \code{reducedDim(x, dimred)}.} - -\item{cumulative}{\code{Logical scalar}. Whether to show cumulative explained -variance. (Default: \code{FALSE}).} +the reduced dimension to plot. This is used when \code{x} is a +\code{TreeSummarizedExperiment} to extract the eigenvalues from +\code{reducedDim(x, dimred)}.} } \value{ A \code{ggplot2} object } \description{ -\code{plotScree} creates a scree plot or eigenvalues plot starting from a -TreeSummarizedExperiment object or a vector of eigenvalues. This visualization -shows how the eigenvalues decrease across components. +\code{plotScree} generates a scree plot to visualize the eigenvalues. +The eigenvalues can be provided either as a part of a +\code{TreeSummarizedExperiment} object or as a separate \code{vector}. +This plot illustrates the decline in eigenvalues across components, +helping to assess the importance of each component. } \details{ -\code{plotScree} creates a scree plot or eigenvalues plot, which is useful -for visualizing the relative importance of components in dimensionality -reduction techniques like PCA, RDA, or CCA. When the input is a -TreeSummarizedExperiment, the function extracts eigenvalues from the specified -reduced dimension slot. When the input is a vector, it directly uses these -values as eigenvalues. +\code{plotScree} generates a scree plot to visualize the relative importance +of components in dimensionality reduction techniques such as Principal +Component Analysis (PCA) or Principal Coordinate Analysis (PCoA). If the +input is a \code{TreeSummarizedExperiment} object, the function extracts +eigenvalues from the specified reduced dimension slot, which requires that +dimensionality reduction has been performed beforehand using a dedicated +function. Alternatively, if the input is a \code{vector} or an +\code{eigenvals} object, these values are directly used as eigenvalues for +the plot. The plot can include a combination of barplot, points, connecting lines, and labels, which can be controlled using the \code{show.*} parameters. An option to show cumulative explained variance is also available by setting -\code{cumulative = TRUE}. +\code{add.cumulative = TRUE}. } \examples{ library(miaViz) +library(scater) + data("enterotype", package = "mia") tse <- enterotype -# Run RDA and store results into TreeSE -tse <- addRDA( - tse, - formula = assay ~ ClinicalStatus + Gender + Age, - FUN = getDissimilarity, - distance = "bray", - na.action = na.exclude - ) +# Run PCA and store results into TreeSE +tse <- transformAssay(tse, method = "clr", pseudocount = TRUE) +tse <- runPCA(tse, assay.type = "clr") # Plot scree plot -plotScree(tse, "RDA") +plotScree(tse, "PCA", add.cumulative = TRUE) } diff --git a/tests/testthat/test-plotScree.R b/tests/testthat/test-plotScree.R index f282849..872468d 100644 --- a/tests/testthat/test-plotScree.R +++ b/tests/testthat/test-plotScree.R @@ -1,44 +1,34 @@ test_that("plot Eigenvalues", { - data("enterotype", package = "mia") - tse <- enterotype - - tse <- addRDA( - tse, - formula = assay ~ ClinicalStatus + Gender + Age, - FUN = getDissimilarity, - distance = "bray", - na.action = na.exclude - ) - - # Define some eigenvalues for vector-based tests - eigenvalues <- sort(runif(10), decreasing = TRUE) - # plotScree handles non-numeric eigenvalues in vector - expect_error(plotScree(c("a", "b", "c")), - "'x' must be a numeric vector.") - - # missing eigenvalues in SingleCellExperiment - sce <- SingleCellExperiment(assays = list(counts = matrix(rpois(1000, 5), - ncol = 10))) - + expect_error( + plotScree(c("a", "b", "c")), + "'x' must be a numeric vector or class 'eigenvals'.") + # Missing eigenvalues in TreeSummarizedExperiment + tse <- TreeSummarizedExperiment( + assays = list(counts = matrix(rpois(1000, 5), ncol = 10))) # Add reducedDim without eigenvalues - reducedDim(sce, "PCA") <- matrix(rnorm(100), ncol = 10) - - expect_error(plotScree(sce, "PCA"), - "No eigenvalues found in the specified reducedDim.") - - # invalid dimred input in SingleCellExperiment - expect_error(plotScree(tse, "invalid_dimred"), - "'dimred' must specify a valid reducedDim.") - - p <- plotScree(eigenvalues) - - # Check if a ggplot object is returned - expect_s3_class(p, "ggplot") - - - p <- plotScree(tse, "RDA") + reducedDim(tse, "PCA") <- matrix(rnorm(100), ncol = 10) + expect_error(plotScree(tse, "PCA")) + # Invalid dimred input + expect_error( + plotScree(tse, "invalid_dimred"), + "'dimred' must specify a valid reducedDim.") + # Define some eigenvalues for vector-based tests + eigenvalues <- sort(runif(10), decreasing = TRUE) + # Check that eigenvalues are plotted from TreeSE or from vector + attr(reducedDim(tse, "PCA"), "test") <- eigenvalues + expect_error(plotScree(tse, "PCA")) + p1 <- plotScree(tse, "PCA", eig.name = "test") + p2 <- plotScree(eigenvalues) # Check if a ggplot object is returned + expect_s3_class(p1, "ggplot") + # Check if the plots are equal + df1 <- ggplot_build(p1)[[1]][[1]] + df2 <- ggplot_build(p2)[[1]][[1]] + expect_equal(df1, df2) + + # Test with different options + p <- plotScree(tse, 1, eig.name = "test", add.cumulative = TRUE, n = 10000) expect_s3_class(p, "ggplot") - }) \ No newline at end of file +})