From e06581eeb7aaa29fc1dfcf6468b445b03f8cbe56 Mon Sep 17 00:00:00 2001 From: "eric.archer" Date: Mon, 23 Sep 2024 13:26:05 -0700 Subject: [PATCH] added 2-D density and % variance to plotPCs() --- mambo/DESCRIPTION | 3 +- mambo/R/mambo.R | 41 ++++++++++++--------- mambo/R/mambo_package.R | 1 - mambo/R/plotPCs.R | 73 +++++++++++++++++++++++++++++--------- mambo/man/mambo_package.Rd | 1 - mambo/man/plotPCs.Rd | 15 +++++++- 6 files changed, 96 insertions(+), 38 deletions(-) diff --git a/mambo/DESCRIPTION b/mambo/DESCRIPTION index 338bf32..557c953 100644 --- a/mambo/DESCRIPTION +++ b/mambo/DESCRIPTION @@ -18,7 +18,7 @@ Authors@R: c( LazyData: TRUE License: GPL (>= 2) Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: R (>= 4.3) Imports: @@ -27,5 +27,6 @@ Imports: dplyr, car, purrr, + tidyr, ggplot2, swfscMisc diff --git a/mambo/R/mambo.R b/mambo/R/mambo.R index 8306342..d7b9b12 100644 --- a/mambo/R/mambo.R +++ b/mambo/R/mambo.R @@ -60,12 +60,14 @@ mambo <- function( cat('\n--------', format(Sys.time()), 'Starting MAMBO --------\n') cat(' Number of replicates:', nrep, '\n') - cat(' MCMC parameters:\n') - cat(' Chains:', chains, '\n') - cat(' Adapt:', adapt, '\n') - cat(' Burnin:', burnin, '\n') - cat(' Total Samples:', total.samples, '\n') - cat(' Thinning:', thin, '\n') + if(bayesian) { + cat(' MCMC parameters:\n') + cat(' Chains:', chains, '\n') + cat(' Adapt:', adapt, '\n') + cat(' Burnin:', burnin, '\n') + cat(' Total Samples:', total.samples, '\n') + cat(' Thinning:', thin, '\n') + } if(output.log) cat(' Log file:', log.fname, '\n') cat('\n--------', format(Sys.time()), 'Occurrence Beta parameters --------\n') @@ -84,12 +86,13 @@ mambo <- function( # do nrep iterations, save results to list, and write to RDS file reps <- lapply(1:nrep, function(i) { - cat('\n--------', format(Sys.time()), 'Replicate ') + start.time <- Sys.time() + cat('\n-------- Replicate ') cat(i, '/', nrep, sep = '') cat(' --------\n') # Extract PCs ------------------------------------------------------------- - cat(' PCA...\n') + cat(' ', format(Sys.time()), 'PCA...\n') pca <- stats::setNames( list(ranPCA(resp.beta), ranPCA(pred.beta)), c(resp.label, pred.label) @@ -101,7 +104,7 @@ mambo <- function( if(bayesian) { # Run Bayesian model ------------------------------------------------------ - cat(' Bayesian model...\n') + cat(' ', format(Sys.time()), 'Bayesian model...\n') utils::capture.output(post <- jagsPClm( pc.resp = pca[[resp.label]]$x[, 1:pca[[resp.label]]$num.pcs], pc.preds = pca[[pred.label]]$x[, 1:pca[[pred.label]]$num.pcs], @@ -113,7 +116,7 @@ mambo <- function( )) # Compute posterior summary statistics ------------------------------------ - cat(' Summarize posterior...\n') + cat(' ', format(Sys.time()), 'Summarize posterior...\n') utils::capture.output(post.smry <- summary(post, silent.jags = TRUE)) # Extract posterior and label dimensions ----------------------------------- @@ -126,9 +129,11 @@ mambo <- function( dimnames(p$w)[[2]] <- paste0(pred.label, '.PC', 1:pca[[pred.label]]$num.pcs) } + end.time <- Sys.time() + elapsed <- difftime(end.time, start.time) cat( ' End replicate:', - format(round(swfscMisc::autoUnits(post$timetaken))), + format(round(swfscMisc::autoUnits(elapsed))), '\n' ) list(pca = pca, post.smry = post.smry, post.list = p) @@ -153,12 +158,14 @@ mambo <- function( cat('\n--------', format(Sys.time()), 'End MAMBO --------\n') cat(' Number of replicates:', nrep, '\n') - cat(' MCMC parameters:\n') - cat(' Chains:', chains, '\n') - cat(' Adapt:', adapt, '\n') - cat(' Burnin:', burnin, '\n') - cat(' Total Samples:', total.samples, '\n') - cat(' Thinning:', thin, '\n') + if(bayesian) { + cat(' MCMC parameters:\n') + cat(' Chains:', chains, '\n') + cat(' Adapt:', adapt, '\n') + cat(' Burnin:', burnin, '\n') + cat(' Total Samples:', total.samples, '\n') + cat(' Thinning:', thin, '\n') + } cat( ' Total elapsed time: ', format(round(swfscMisc::autoUnits(res$run.time$elapsed), 1)), diff --git a/mambo/R/mambo_package.R b/mambo/R/mambo_package.R index b2ef08b..241c3d6 100644 --- a/mambo/R/mambo_package.R +++ b/mambo/R/mambo_package.R @@ -2,7 +2,6 @@ #' #' Metabarcoding Analysis using Modeled Bayesian Occurrences #' -#' @aliases mambo-package #' @docType package #' @name mambo_package #' diff --git a/mambo/R/plotPCs.R b/mambo/R/plotPCs.R index de33902..234c194 100644 --- a/mambo/R/plotPCs.R +++ b/mambo/R/plotPCs.R @@ -5,7 +5,9 @@ #' @param locus label name of response or predictor locus. #' @param pc.x number of x-axis principal component. #' @param pc.y number of y-axis principal component. +#' @param type plot as ellipse of samples or 2-D density. #' @param ellipse.p probability density level of ellipse. +#' @param num.bins number of bins for each axis if 2-D density is plotted. #' @param plot display plot? #' #' @return PCA biplot of scores with confidence ellipses for each sample. @@ -14,32 +16,69 @@ #' #' @export #' -plotPCs <- function(results, locus, pc.x = 1, pc.y = 2, ellipse.p = 0.95, plot = TRUE) { +plotPCs <- function(results, locus, pc.x = 1, pc.y = 2, + type = c('ellipse', 'density'), ellipse.p = 0.95, + num.bins = 50, plot = TRUE) { + if(missing(locus)) stop("'locus' must be specified.") scores <- extractPCA(results)$scores[[locus]] - gg <- purrr::imap(split(scores, scores$sample), function(df, i) { - x <- dplyr::filter(df, pc == pc.x)$score - y <- dplyr::filter(df, pc == pc.y)$score - car::dataEllipse(x, y, levels = ellipse.p, draw = FALSE) |> - as.data.frame() |> - dplyr::mutate(sample = i) - }) |> - dplyr::bind_rows() |> - ggplot2::ggplot() + + prop.var <- sapply( + results$reps, + function(r) r$pca[[locus]]$importance['Proportion of Variance', c(pc.x, pc.y)] + ) |> + t() |> + apply(2, median) + + type <- match.arg(type) + df <- if(type == 'ellipse') { + scores |> + split(scores$sample) |> + purrr::imap(function(df, i) { + x <- dplyr::filter(df, pc == pc.x)$score + y <- dplyr::filter(df, pc == pc.y)$score + car::dataEllipse(x, y, levels = ellipse.p, draw = FALSE) |> + as.data.frame() |> + dplyr::mutate(sample = i) + }) |> + dplyr::bind_rows() + } else { + scores |> + dplyr::mutate(axis = ifelse(pc == pc.x, 'x', 'y')) |> + dplyr::filter(pc %in% c(pc.x, pc.y)) |> + dplyr::select(-dplyr::all_of('pc')) |> + tidyr::pivot_wider( + id_cols = c('sample', 'rep'), + names_from = 'axis', + values_from = 'score' + ) + } + + gg <- df |> + ggplot2::ggplot(mapping = ggplot2::aes(x = x, y = y)) + ggplot2::geom_hline(yintercept = 0, color = 'darkred') + ggplot2::geom_vline(xintercept = 0, color = 'darkred') + - ggplot2::geom_polygon( - ggplot2::aes(x, y, group = sample), - fill = NA, - color = 'black' - ) + ggplot2::labs( - x = paste0('PC', pc.x), - y = paste0('PC', pc.y), + x = paste0('PC', pc.x, ' (', round(prop.var[1] * 100, 1), '%)'), + y = paste0('PC', pc.y, ' (', round(prop.var[2] * 100, 1), '%)'), title = locus ) + ggplot2::theme_minimal() + gg <- if(type == 'ellipse') { + gg + + ggplot2::geom_polygon( + ggplot2::aes(group = sample), + fill = NA, + color = 'black' + ) + } else { + gg + + ggplot2::geom_bin_2d(bins = num.bins) + + ggplot2::scale_fill_viridis_c(option = 'viridis') + + ggplot2::theme(legend.position = 'none') + } + print(gg) + if(plot) print(gg) invisible(gg) } diff --git a/mambo/man/mambo_package.Rd b/mambo/man/mambo_package.Rd index d378e88..d3dbbfb 100644 --- a/mambo/man/mambo_package.Rd +++ b/mambo/man/mambo_package.Rd @@ -5,7 +5,6 @@ \alias{-package} \alias{mambo_package} \alias{mamboTutorial} -\alias{mambo-package} \title{\code{mambo} package} \usage{ mamboTutorial() diff --git a/mambo/man/plotPCs.Rd b/mambo/man/plotPCs.Rd index f288c19..bd9e3bf 100644 --- a/mambo/man/plotPCs.Rd +++ b/mambo/man/plotPCs.Rd @@ -4,7 +4,16 @@ \alias{plotPCs} \title{Plot principal component confidence ellipses} \usage{ -plotPCs(results, locus, pc.x = 1, pc.y = 2, ellipse.p = 0.95, plot = TRUE) +plotPCs( + results, + locus, + pc.x = 1, + pc.y = 2, + type = c("ellipse", "density"), + ellipse.p = 0.95, + num.bins = 50, + plot = TRUE +) } \arguments{ \item{results}{output of a \code{mambo} run.} @@ -15,8 +24,12 @@ plotPCs(results, locus, pc.x = 1, pc.y = 2, ellipse.p = 0.95, plot = TRUE) \item{pc.y}{number of y-axis principal component.} +\item{type}{plot as ellipse of samples or 2-D density.} + \item{ellipse.p}{probability density level of ellipse.} +\item{num.bins}{number of bins for each axis if 2-D density is plotted.} + \item{plot}{display plot?} } \value{