diff --git a/DESCRIPTION b/DESCRIPTION index 6ed76d4c..cad1ea7a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,6 +56,7 @@ Suggests: glue, grid, gsEasy, + gt, gtools, igraph, IntegratedLearner, @@ -105,6 +106,7 @@ Remotes: github::microbiome/mia, github::microbiome/miaViz, github::microbiome/miaTime, + github::joey711/phyloseq, github::stefpeschel/NetCoMi, github::zdk123/SpiecEasi, github::GraceYoon/SPRING, diff --git a/inst/assets/bibliography.bib b/inst/assets/bibliography.bib index 62b1b07a..44edbb2b 100644 --- a/inst/assets/bibliography.bib +++ b/inst/assets/bibliography.bib @@ -1,3 +1,66 @@ +@article{Hill1973, + title = {{Diversity and Evenness: A Unifying Notation and Its Consequences}}, + volume = {54}, + ISSN = {1939-9170}, + url = {http://dx.doi.org/10.2307/1934352}, + DOI = {10.2307/1934352}, + number = {2}, + journal = {{Ecology}}, + publisher = {Wiley}, + author = {Hill, M. O.}, + year = {1973}, + month = mar, + pages = {427–432} +} + +@article{Ma2019, + title = {Diversity-disease relationships and shared species analyses for human microbiome-associated diseases}, + volume = {13}, + ISSN = {1751-7370}, + url = {http://dx.doi.org/10.1038/s41396-019-0395-y}, + DOI = {10.1038/s41396-019-0395-y}, + number = {8}, + journal = {{The ISME Journal}}, + publisher = {Oxford University Press (OUP)}, + author = {Ma, Zhanshan (Sam) and Li, Lianwei and Gotelli, Nicholas J}, + year = {2019}, + month = {mar}, + pages = {1911–1919} +} + +@article{Valles-Colomer2019GBMs, + author = {Valles-Colomer, Mireia and Falony, Gwen and Darzi, Youssef and Tigchelaar, Ettje F. and Wang, Jun and Tito, Raul Y. and Schiweck, Carmen and Kurilshikov, Alexander and Joossens, Marie and Wijmenga, Cisca and Claes, Stephan and Van Oudenhove, Lukas and Zhernakova, Alexandra and Vieira-Silva, Sara and Raes, Jeroen}, + title = {The neuroactive potential of the human gut microbiota in quality of life and depression}, + journal = {{Nature Microbiology}}, + ISSN = {2058-5276}, + DOI = {10.1038/s41564-018-0337-x}, + url = {https://doi.org/10.1038/s41564-018-0337-x}, + year = {2019}, + type = {Journal Article} +} + +@article{vandeputte2017quantitative, + title={Quantitative microbiome profiling links gut community variation to microbial load}, + author={Vandeputte, Doris and Kathagen, Gunter and D’hoe, Kevin and Vieira-Silva, Sara and Valles-Colomer, Mireia and Sabino, Jo{\~a}o and Wang, Jun and Tito, Raul Y and De Commer, Lindsey and Darzi, Youssef and Vermeire, Séverine and Falony, Gwen and Raes, Jeroen}, + journal={{Nature}}, + volume={551}, + number={7681}, + pages={507--511}, + year={2017}, + publisher={{Nature Publishing Group UK London}} +} + +@article{bastiaanssen2023bugs1, + title={Bugs as features (part 1): concepts and foundations for the compositional data analysis of the microbiome--gut--brain axis}, + author={Bastiaanssen, Thomaz FS and Quinn, Thomas P and Loughman, Amy}, + journal={{Nature Mental Health}}, + volume={1}, + number={12}, + pages={930--938}, + doi = {10.1038/s44220-023-00148-3}, + year={2023}, + publisher={{Nature Publishing Group US New York}} +} @Article{Xu2023, author = {Shuangbin Xu and Li Zhan and Wenli Tang and Qianwen Wang and Zehan Dai and Lang Zhou and Tingze Feng and Meijun Chen and Tianzhi Wu and Erqiang Hu and Guangchuang Yu}, @@ -462,7 +525,7 @@ @Article{Faith1992 volume = {61}, number = {1}, pages = {10}, - doi = {https://doi.org/10.1016/0006-3207(92)91201-3}, + doi = {10.1016/0006-3207(92)91201-3}, url = {https://www.sciencedirect.com/science/article/pii/0006320792912013?via%3Dihub}, year = {1992} } diff --git a/inst/pages/alpha_diversity.qmd b/inst/pages/alpha_diversity.qmd index 2d71c575..97d932d6 100644 --- a/inst/pages/alpha_diversity.qmd +++ b/inst/pages/alpha_diversity.qmd @@ -1,129 +1,162 @@ -# Community diversity {#sec-community-diversity} +# Alpha Diversity {#sec-alpha-diversity} ```{r setup, echo=FALSE, results="asis"} library(rebook) chapterPreamble() ``` -Community diversity is a central concept in microbiome research. Several -diversity indices are available in the ecological literature. - -The main categories of diversity indices include species richness, -evenness, and diversity: each of these emphasizes different aspects of -the community heterogeneity [@Whittaker1960], [@Willis2019]. The _Hill -coefficient_ combines many standard indices into a single equation -that provides observed richness, inverse Simpson, Shannon diversity, -and generalized diversity as special cases, with varying levels of -emphasis on species abundance values. Thus, the term _alpha diversity_ -is often used to collectively refer to all these variants. - -**Diversity** summarizes the distribution of - species abundances in a given sample into a single number that - depends on both species richness and evenness (see below). Diversity - indices measure the overall community heterogeneity that considers - both of these aspects simultaneously. A number of ecological - diversity measures are available. In general, diversity increases - together with increasing richness and evenness. **Phylogenetic - diversity** (PD), [@Faith1992], is a variant that incorporates - information from phylogenetic relationships between species, unlike - most other commonly used diversity indices. The `addAlpha()` - function uses a faster reimplementation of the widely used function - in _`picante`_ [@R_picante, @Kembel2010]. The method uses the - default rowTree from the `TreeSummarizedExperiment` object (`tse`). - -**Richness** refers to the total number of species in a community - (sample). The simplest richness index is the number of species - observed in a sample (observed richness). Assuming limited sampling - from the community, however, this may underestimate the true species - richness. Several estimators have been developed to address this, - including for instance ACE [@Chao1992] and Chao1 [@Chao1984] - indices. Richness estimates do not aim to characterize variations in - species abundances. +## Background + +Alpha diversity, or within-sample diversity, is a central concept in microbiome +research. In ecological literature, several distinct but related alpha diversity +indices, often referring to **richness** and **evenness** - the number of taxa +and how they are distributed, respectively - are commonly used +[@Willis2019;@Whittaker1960]. The term **diversity** can be used to collectively +refer to all these indices. + +### Applications + +Alpha diversity is predominantly used to quantify complexity in the microbiome. +In the general adult population, lower alpha diversity and lower bacterial load +have been associated to worse overall physical and mental health +[@Valles-Colomer2019GBMs;@vandeputte2017quantitative]. However, this principle +may not generalize to other populations, most notably in early life +and in patient cohorts [@Ma2019]. + +### Approaches + +The majority of alpha diversity metrics are closely related, though this is not +evident from their names. Bastiaanssen et al. [-@bastiaanssen2023bugs1] lay out +this relationship across two factors (See table below); First, alpha diversity +metrics can be defined as special cases of a unifying equation of **diversity**, +where the **Hill number** determines the specific index captured. Lower Hill +numbers favour **richness**, the number of distinct taxa, whereas higher numbers +favour **evenness**, how the taxa are distributed over the sample [@Hill1973]. +Second, some alpha diversity metrics are weighed based on phylogeny, like +Faith's PD [-@Faith1992] and PhILR [@Silverman2017]. + +```{r} +#| label: alpha_table +#| echo: false + +library(gt) + +# Descriptions +neutral_div <- "**Neutral Diversity** + +--- +*Weighs each taxon equally, no assumptions about phylogeny*" +phyl_div <- "**Phylogenetic Diversity** + +--- +*Indices are scaled based on taxonomic closeness with a phylogenetic tree*" + +hill_0 <- 'Dependent on presence and absence of taxa, not abundance' +hill_1 <- 'Dependent on how evenly taxa are distributed in a sample' +hill_2 <- 'The probability of two randomly picked taxa not being the same' + +table_colnames <- c("phyl_desc", hill_0, hill_1, hill_2) + +footnote_md <- md( +"The equation for general diversity can be defined as follows: + \\${{}^qD = (\\sum_{i=1}^{R}p^q_i})^{\\frac{1}{(1-q)}}\\$, + with *q* = Hill number, *R* = number of features, + *p* = relative feature abundance.") + +# Generate data.frame for table +cbind.data.frame( + c(neutral_div, phyl_div), + c("Richness (Chao1)", "Faith's Phylogenetic Diversity"), + c("Shannon Entropy","Phylogenetic Entropy"), + c("Simpson's Index", "Rao's Quadratic Diversity")) |> + `colnames<-`(value = table_colnames) |> -Nonparametric richness estimators such as Chao1 and ACE, however, must not be -used with amplicon sequence variant (ASV) data. Algorithms that generate ASVs, -like DADA2 and Deblur, typically remove singletons, which are essential for -these richness calculations. This removal leads to meaningless results. -Although ASVs offer higher resolution than operational taxonomic units (OTUs) -and are increasingly used, the removal of singletons invalidates the application -of Chao1 and ACE. Therefore, alternative alpha diversity metrics that do not -depend on singletons or doubletons should be considered, or OTUs could be -used specifically for alpha diversity analysis to retain low-abundance taxa. -Additionally, the inability of denoising algorithms to distinguish true -singleton sequences from artifacts further complicates the issue, making -traditional richness estimators unsuitable for ASV datasets, which are often -standardized for sequencing depth.[@Deng2024] - -**Evenness** focuses on the distribution of species abundances, and it - can thus complement the number of species. Pielou's evenness is a - commonly used index, obtained by normalizing Shannon diversity by - (the natural logarithm of) observed richness. - -These main classes of alpha diversity are sometimes complemented with -indices of dominance or rarity: - -**Dominance** indices are in general negatively correlated with alpha - diversity. A high dominance is obtained when one or a few species have - a high share of the total species abundance in the community. Note - that dominance indices are generally inversely correlated with other - alpha diversity indices. - -**Rarity** indices characterize the concentration of species at low - abundance. Prevalence and detection thresholds determine rare - species whose total concentration will determine the value of a - rarity index. - -## Alpha diversity estimation in practice - -### Calculate diversity measures {#sec-estimate-diversity} - -Alpha diversity can be estimated with `addAlpha()` wrapper function that interact -with other packages implementing the calculation, such as `vegan` -[@R_vegan]. - -These functions calculate the given indices, and add them to the -`colData` slot of the `SummarizedExperiment` object with the given -`name`. - -The estimated values can then be retrieved and analyzed directly from -the `colData`, for example, by plotting them using `plotColData()` from -the `scater` package [@R_scater]. Here, we use the `observed` -species as a measure of richness. + # Pipe into gt + gt(rowname_col = "phyl_desc") |> + fmt_markdown() |> + + tab_spanner(label = md("**Hill number 0**"), columns = 2) |> + tab_spanner(label = md("**Hill number 1**"), columns = 3) |> + tab_spanner(label = md("**Hill number 2**"), columns = 4) |> + + # Formatting + cols_align(align = "center") |> + opt_table_lines() |> + tab_style(style = list(cell_text(v_align = "middle", align = "center")), + locations = cells_column_spanners()) |> + tab_style(style = cell_text(style = "italic"), + locations = cells_column_labels()) |> + tab_style(style = list(cell_text(weight = "bolder")), + locations = cells_body(2:4)) |> + cols_width(1 ~ pct(32.5), 2:4 ~ pct(22.5)) |> + tab_footnote(footnote = footnote_md, placement = "left") +``` -Certain indices have additional options, here observed has `detection` parameter -that control the detection threshold. Species over this threshold is considered -as detected. See full list of options from from `help(addAlpha)`. +::: {.callout-note} +## Note: Richness estimators and denoising + +Several estimators have been developed to address the confounding effect of +limited sampling size on observed richness, most notably ACE [@Chao1992] and +Chao1 [@Chao1984]. Notably, these approaches may yield misleading results for +modern 16S data, which commonly features denoising and removal of singletons +[@Deng2024]. +::: -```{r plot-richness, message=FALSE, cache=TRUE} -#| context: setup +## Examples + +### Calculate alpha diversity measures {#sec-estimate-diversity} + +Alpha diversity can be estimated with the `addAlpha()` function, which interacts +with other packages implementing the calculation, such as `vegan` [@R_vegan] and +_`picante`_ [@R_picante; @Kembel2010]. +These functions calculate the given indices, and add them to the `colData` slot +of the `SummarizedExperiment` object with the given `name`. + +```{r} +#| label: calc-diversity +#| message: false # First, let's load some example data. library(mia) data("GlobalPatterns", package="mia") tse <- GlobalPatterns -# Estimate (observed) richness +# Compute one or multiple indices simultaneously through the index 'parameter'. tse <- addAlpha( - tse, assay.type = "counts", index = "observed", name = "observed", + tse, assay.type = "counts", index = c("observed", "shannon", "faith"), detection = 10) # Check some of the first values in colData tse$observed |> head() +tse$shannon |> head() ``` -::: {.callout-tip} -## Tip +Certain indices have additional options, here observed has `detection` parameter +that control the detection threshold. Species over this threshold is considered +as detected. See full list of options from from `help(addAlpha)`. -You can calculate multiple indices simultaneously by specifying multiple indices -in the `index` parameter. +::: {.callout-note} +## Note: Phylogenetic distances require a tree -For example: `index = c("observed", "shannon")` +Because `tse` is a `TreeSummarizedExperiment` object, its phylogenetic tree is +used by default. However, the optional argument `tree` must be provided if `tse` +does not contain a rowTree. ::: -Let's visualize the results against selected `colData` variables (sample -type and final barcode). +### Visualize alpha diversity measures {#sec-plot-diversity} + +As alpha diversity metrics typically summarize high-dimensional samples into +singular values, many visualization approaches are available. Once calculated, +these metrics can be analyzed directly from the `colData`, for example, by +plotting them using `plotColData()` from the `scater` package [@R_scater]. Here, +we use the `observed` species as a measure of richness. Let's visualize the +results against selected `colData` variables (sample type and final barcode). + +```{r} +#| label: plot-richness +#| message: false +#| fig-cap: "Observerd richness plotted grouped by sample type with colour-labeled barcode." -```{r plot-div-obs, message=FALSE, fig.cap="Shannon diversity estimates plotted grouped by sample type with colour-labeled barcode.", cache=TRUE} library(scater) plotColData( tse, @@ -134,42 +167,16 @@ plotColData( labs(x = "Sample types", y = expression(Richness[Observed])) ``` -We can then analyze the statistical significance. We use the non-parametric -Wilcoxon or Mann-Whitney test, as it is more flexible than the commonly used -Student's t-Test, since it does not assume normality. - -```{r} -#| label: test_alpha1 - -pairwise.wilcox.test( - tse[["observed"]], tse[["SampleType"]], p.adjust.method = "fdr") -``` - -### Faith phylogenetic diversity {#sec-faith-diversity} - -The Faith index is returned by the function `addAlpha()`. It utilizes the widely -used function in _`picante`_ [@R_picante, @Kembel2010]. - -```{r phylo-div-1} -tse <- addAlpha(tse, assay.type = "counts", index = "faith") -tse$faith |> head() -``` - -::: {.callout-note} -## Note - -Because `tse` is a `TreeSummarizedExperiment` object, its phylogenetic tree is -used by default. However, the optional argument `tree` must be provided if -`tse` does not contain one. -::: - -## Alpha diversity measure comparisons {#sec-compare-alpha} +#### Alpha diversity measure comparisons {#sec-compare-alpha} We can compare alpha diversities for example by calculating correlation between them. Below, a visual comparison between shannon and faith indices is shown with a scatter plot. -```{r compare-diversities, fig.width = 6.5} +```{r} +#| label: compare-diversities +#| fig-width: 6.5 + tse <- addAlpha(tse, assay.type = "counts", index = "shannon") plotColData(tse, x = "shannon", y = "faith") + @@ -188,7 +195,10 @@ against a given sample grouping available in `colData` (here, sample type). These have been readily stored in the `colData` slot, and they are thus directly available for plotting. -```{r plot-all-diversities, fig.width = 6.5} +```{r} +#| label: plot-all-diversities +#| fig-width: 6.5 + library(patchwork) # Create the plots @@ -211,7 +221,20 @@ wrap_plots(plots, ncol = 1) + plot_layout(guides = "collect") ``` -## Visualizing significance in group-wise comparisons +### Statistical analysis of alpha diversity measures {#sec-stats-diversity} + +We can then analyze the statistical significance. We use the non-parametric +Wilcoxon or Mann-Whitney test, as it is more flexible than the commonly used +Student's t-Test, since it does not assume normality. + +```{r} +#| label: test_alpha1 + +pairwise.wilcox.test( + tse[["observed"]], tse[["SampleType"]], p.adjust.method = "fdr") +``` + +#### Visualizing significance in group-wise comparisons Next, let's compare the Shannon index between sample groups and visualize the statistical significance. Using the `stat_compare_means` function from the @@ -219,7 +242,9 @@ statistical significance. Using the `stat_compare_means` function from the To add adjusted p-values, we have to first calculate them. -```{r visualize-shannon} +```{r} +#| label: visualize-shannon + library(ggpubr) library(tidyverse) @@ -274,6 +299,7 @@ p <- plotColData( p ``` +## Further reading Article on [`ggpubr` package](http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/) provides further examples for estimating and highlighting significances. diff --git a/inst/pages/introductory_workflow.qmd b/inst/pages/introductory_workflow.qmd index 63938baa..b64e131e 100644 --- a/inst/pages/introductory_workflow.qmd +++ b/inst/pages/introductory_workflow.qmd @@ -238,7 +238,7 @@ Next, we will calculate Faith's phylogenetic diversity index. What sets this ind apart is its incorporation of phylogeny into the diversity calculation. This index considers both the number and the relatedness of different taxa, using branch lengths on a phylogenetic tree. For -more information on diversity, see [@sec-community-diversity]. +more information on diversity, see [@sec-alpha-diversity]. ```{r} #| label: calculateRichness diff --git a/inst/pages/introductory_workflow_french_version.qmd b/inst/pages/introductory_workflow_french_version.qmd index 0b4fd8de..f94d2b1f 100644 --- a/inst/pages/introductory_workflow_french_version.qmd +++ b/inst/pages/introductory_workflow_french_version.qmd @@ -289,7 +289,7 @@ plots <- lapply(plots, "+", Il est très important de faire toutes ces comparaisons afin de quantifier la diversité et de comparer les échantillons dans nos données en utilisant différentes mesures. - Vous pouvez trouver d'autres types de comparaisons -directement dans le livre @sec-community-diversity. +directement dans le livre @sec-alpha-diversity.
diff --git a/inst/pages/visualization.qmd b/inst/pages/visualization.qmd index 4daf3775..478db247 100644 --- a/inst/pages/visualization.qmd +++ b/inst/pages/visualization.qmd @@ -121,7 +121,7 @@ throughout chapter [@sec-quality-control]. ## Diversity estimation Alpha diversity is commonly measured as one of the diversity indices -explained in chapter [@sec-community-diversity]. Because the focus +explained in chapter [@sec-alpha-diversity]. Because the focus lies on each sample separately, one-dimensional plots, such as **scatter**, **violin** and **box plots**, are suitable. @@ -177,11 +177,11 @@ plots[[1]] + plots[[2]] ``` The analogous output in the form of a violin plot is obtained in -chapter [@sec-faith-diversity]. In addition, box plots that group +chapter [@sec-plot-diversity]. In addition, box plots that group samples according to certain information, such as origin, sex, age and health condition, can be labeled with p-values for significant differences with the package `ggpubr` package, as shown in chapter -[@sec-community-diversity]. +[@sec-alpha-diversity]. ### Beta diversity with Shepard and coordination plots