aif #06/02/25

stangandaho · Feb 6, 2025 · 0d33970 · 0d33970
1 parent 4ee9a92
commit 0d33970
Show file tree

Hide file tree

Showing 17 changed files with 754 additions and 97 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -31,5 +31,5 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
-Suggests: testthat (>= 3.0.0)
+Suggests: testthat (>= 3.0.0), vegan
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -27,6 +27,8 @@ export(mm_plot_overlap_coef)
 export(mm_overlap_matrix)
 export(mm_temporal_shift)
 export(mm_alpha_diversity)
+#export(mm_beta_diversity)
+export(mm_dissimilarity)
 
 ## General
 export(`%>%`)

diff --git a/R/from_vegan.R b/R/from_vegan.R
@@ -53,3 +53,15 @@
   xx
 }
 
+veganMahatrans <- function (x, s2, tol = sqrt(.Machine$double.eps), na.rm = FALSE)
+{
+  if (missing(s2))
+    s2 <- cov(x, use = if (na.rm)
+      "pairwise.complete.obs"
+      else "all.obs")
+  e <- eigen(s2, symmetric = TRUE)
+  k <- e$values > max(tol, tol * e$values[1L])
+  sisqr <- e$vectors[, k, drop = FALSE] %*% (sqrt(1/e$values[k]) *
+                                               t(e$vectors[, k, drop = FALSE]))
+  x %*% sisqr
+}
diff --git a/R/mm_alpha_diversity.R b/R/mm_alpha_diversity.R
@@ -127,10 +127,10 @@
 #' @export
 #'
 mm_alpha_diversity <- function(data,
-                             site_column,
+                             to_community = TRUE,
                              index = "shannon",
+                             site_column,
                              species_column,
-                             to_community = TRUE,
                              size_column = NULL,
                              margin = 1) {
 
@@ -173,34 +173,34 @@ mm_alpha_diversity <- function(data,
     if (i == "simpson") {
       # simpson
       simpson = apply(data, margin, function(x){
-        round(sum((x/sum(x, na.rm = TRUE))**2), 3)
+        sum((x/sum(x, na.rm = TRUE))**2)
       })
       ind_value <- 1 - simpson
-      index_list[[i]] <- ind_value
+      index_list[[i]] <- round(ind_value, 3)
 
     }else if(i == "invsimpson"){
       # simpson inverse
       simpson = apply(data, margin, function(x){
         round(sum((x/sum(x, na.rm = TRUE))**2), 3)
       })
       ind_value <- 1/simpson
-      index_list[[i]] <- ind_value
+      index_list[[i]] <- round(ind_value, 3)
 
     }else if (i == "shannon") {
       # shannon
       ind_value <- apply(data, margin, function(x){
         pi <- (x/sum(x, na.rm = TRUE))
         -sum(pi*log(pi), na.rm = TRUE)
       })
-      index_list[[i]] <- ind_value
+      index_list[[i]] <- round(ind_value, 3)
 
     }else if (i %in% c("evenness", "pielou")) {
       shannon <- apply(data, margin, function(x){
         pi <- (x/sum(x, na.rm = TRUE))
         -sum(pi*log(pi), na.rm = TRUE)
       })
       ind_value <- shannon/log(ncol(data))
-      index_list[[i]] <- ind_value
+      index_list[[i]] <- round(ind_value, 3)
     }
   }
 

diff --git a/R/mm_beta_diversity.R b/R/mm_beta_diversity.R
diff --git a/R/mm_dissimilarity.R b/R/mm_dissimilarity.R
@@ -0,0 +1,181 @@
+#' @title Calculate dissimilarity between communities
+#'
+#' @description
+#' The function computes dissimilarity indices that are useful for or popular with
+#' community ecologists. All indices use quantitative data, although they would be
+#' named by the corresponding binary index, but you can calculate the binary index
+#' using an appropriate argument. If you do not find your favourite index here, you
+#' can see if it can be implemented using designdist. Gower, Bray–Curtis, Jaccard
+#' and Kulczynski indices are good in detecting underlying ecological gradients
+#' (Faith et al. 1987). Morisita, Horn–Morisita, Binomial, Cao and Chao indices
+#' should be able to handle different sample sizes
+#' (Wolda 1981, Krebs 1999, Anderson & Millar 2004), and Mountford (1962) and
+#' Raup-Crick indices for presence–absence data should be able to handle unknown
+#' (and variable) sample sizes. Most of these indices are discussed by Krebs (1999)
+#' and Legendre & Legendre (2012), and their properties further compared by Wolda
+#' (1981) and Legendre & De Cáceres (2012). Aitchison (1986) distance is equivalent
+#' to Euclidean distance between CLR-transformed samples ("clr") and deals with
+#' positive compositional data. Robust Aitchison distance by Martino et al. (2019)
+#' uses robust CLR ("rlcr"), making it applicable to non-negative data including
+#' zeroes (unlike the standard Aitchison).
+#'
+#' @param data A data frame or matrix containing the species abundance data. The
+#'   rows represent sites (or samples), and the columns represent species.
+#'   The data can be in raw or transformed format (if `to_community = TRUE`).
+#' @param to_community A logical indicating whether the input data should be
+#'   transformed into community data (site in row and species in column).
+#'   Default is `FALSE`.
+#' @param site_column The name of the column representing the site/sample identifiers
+#'   (only used if `to_community = TRUE`).
+#' @param species_column The name of the column representing species identifiers
+#'   (only used if `to_community = TRUE`).
+#' @param size_column The name of the column representing size or abundance counts
+#'   of each species at each site (optional, used if `to_community = TRUE`).
+#' @param method A character string indicating the distance measure to use for
+#'   calculating beta diversity. The available methods are:
+#'   \code{"manhattan"}, \code{"euclidean"}, \code{"canberra"}, \code{"bray"},
+#'   \code{"kulczynski"}, \code{"gower"}, \code{"morisita"}, \code{"horn"},
+#'   \code{"mountford"}, \code{"jaccard"}, \code{"raup"}, \code{"binomial"},
+#'   \code{"chao"}, \code{"altGower"}, \code{"cao"}, \code{"mahalanobis"},
+#'   \code{"clark"}, \code{"chisq"}, \code{"chord"}, \code{"hellinger"},
+#'   \code{"aitchison"}, \code{"robust.aitchison"}. The default is \code{"bray"}.
+#' @param binary A logical indicating whether to transform the data to presence/absence
+#'   (binary data) before calculating dissimilarities. Default is `FALSE`.
+#' @param diag A logical indicating whether to include the diagonal in the output
+#'   dissimilarity matrix. Default is `FALSE` (diagonal values are omitted).
+#' @param upper A logical indicating whether to return only the upper triangular
+#'   part of the dissimilarity matrix. Default is `FALSE`.
+#' @param na.rm A logical indicating whether to remove `NA` values from the data
+#'   before calculating dissimilarities. Default is `FALSE`. If `FALSE`, an error is raised
+#'   if there are any missing values in the data.
+#' @param ... Additional arguments passed to other functions, such as transformation
+#'   functions for data scaling or standardization.
+#'
+#' @return A distance matrix (of class `dist`) containing the pairwise dissimilarities
+#'   between sites. The dissimilarities are calculated according to the chosen distance
+#'   metric, and various attributes (e.g., method, size, labels) are attached to the
+#'   result.
+#'
+#' @inherit vegan::vegdist details
+#'
+#' @inherit vegan::vegdist note
+#'
+#' @inherit vegan::vegdist author
+#'
+#' @inherit vegan::vegdist references
+#'
+#'
+#' @export
+
+mm_dissimilarity <- function(data,
+                              to_community = FALSE,
+                              site_column,
+                              species_column,
+                              size_column = NULL,
+                              method = "bray",
+                              binary = FALSE,
+                              diag = FALSE,
+                              upper = FALSE,
+                              na.rm = FALSE,
+                              ...){
+
+  if (to_community) {
+
+    site_column <- rlang::ensym(site_column)
+    species_column <- rlang::enquo(species_column)
+    size_column <- tryCatch({ rlang::ensym(size_column) }, error = function(e)NULL)
+
+    if (!is.null(size_column)) {
+      data <- transform_index_data(data = data,
+                                   site_column = !!site_column,
+                                   species_column = !!species_column,
+                                   to_community = to_community,
+                                   size_column = size_column
+      )
+
+    }else{
+      data <- transform_index_data(data = data,
+                                   site_column = !!site_column,
+                                   species_column = !!species_column,
+                                   to_community = to_community
+      )
+    }
+
+
+    site_name <- data %>% dplyr::pull(site_column)
+    data <- data %>% dplyr::select(-site_column) %>%
+      as.matrix()
+    rownames(data) <- site_name
+  }else{
+    data <- as.matrix(data)
+  }
+
+
+  ZAP <- 1e-15
+  if (!is.na(pmatch(method, "euclidian"))){
+    method <- "euclidean"
+  }
+  METHODS <- c("manhattan", "euclidean", "canberra", "bray",
+               "kulczynski", "gower", "morisita", "horn", "mountford",
+               "jaccard", "raup", "binomial", "chao", "altGower", "cao",
+               "mahalanobis", "clark", "chisq", "chord", "hellinger",
+               "aitchison", "robust.aitchison")
+  method <- pmatch(method, METHODS)
+  inm <- METHODS[method]
+  if (is.na(method))
+    rlang::abort("Invalid distance method", call = NULL)
+  if (method == -1)
+    rlang::abort("Ambiguous distance method", call = NULL)
+
+  if (!na.rm && anyNA(data))
+    rlang::abort("Missing values are not allowed if 'na.rm = FALSE'", call = NULL)
+  if (!(is.numeric(data) || is.logical(data)))
+    rlang::abort("Input data must be numeric", call = NULL)
+  if (!method %in% c(1, 2, 6, 16, 18) && any(rowSums(data, na.rm = TRUE) == 0))
+    rlang::warn(sprintf("Empty rows: their dissimilarities may be meaningless in method %s", dQuote(inm)))
+  if (!method %in% c(1, 2, 3, 6, 16, 19, 20) && any(data < 0, na.rm = TRUE))
+    rlang::warn(sprintf("Results may be meaningless because data have negative
+                        entries in method %s", dQuote(inm)))
+  if (method %in% c(11, 18) && any(colSums(data) == 0, na.rm = TRUE))
+    rlang::warn(sprintf("Data have empty species which influence the results in method %s",
+                        dQuote(inm)))
+  if (method == 6)
+    data <- mm_standardize(data, "range", 2, na.rm = TRUE, ...)
+  if (method == 16)
+    data <- veganMahatrans(scale(data, scale = FALSE), na.rm = na.rm)
+  if (method == 18)
+    data <- mm_standardize(data, "chi.square", na.rm = na.rm)
+  if (method == 21)
+    data <- mm_standardize(data, "clr", ...)
+  if (method == 22)
+    data <- mm_standardize(data, "rclr")
+  if (binary)
+    data <- mm_standardize(data, "pa")
+  N <- nrow(data)
+  if (method %in% c(7, 13, 15) && !identical(all.equal(data, round(data)), TRUE))
+    rlang::warn(sprintf("Results may be meaningless with non-integer data
+                        in method %s", dQuote(inm)))
+  d <- .Call(vegan:::do_vegdist, data, as.integer(method))
+  d[d < ZAP] <- 0
+  if (any(is.na(d)))
+    rlang::warn("Missing values in results")
+  attr(d, "maxdist") <- if (method %in% c(3, 4, 5, 7, 8, 10, 11, 13, 17))
+    1
+  else if (method %in% c(19, 20))
+    sqrt(2)
+  else if (method == 9)
+    log(2)
+  else NA
+
+  attr(d, "Size") <- N
+  attr(d, "Labels") <- dimnames(data)[[1]]
+  attr(d, "Diag") <- diag
+  attr(d, "Upper") <- upper
+  attr(d, "method") <- paste(if (binary)
+    "binary ", METHODS[method], sep = "")
+  attr(d, "call") <- match.call()
+  class(d) <- "dist"
+
+
+  return(d)
+}