Skip to content

Commit

Permalink
aif #06/02/25
Browse files Browse the repository at this point in the history
  • Loading branch information
stangandaho committed Feb 6, 2025
1 parent 4ee9a92 commit 0d33970
Show file tree
Hide file tree
Showing 17 changed files with 754 additions and 97 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
Suggests: testthat (>= 3.0.0)
Suggests: testthat (>= 3.0.0), vegan
Config/testthat/edition: 3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ export(mm_plot_overlap_coef)
export(mm_overlap_matrix)
export(mm_temporal_shift)
export(mm_alpha_diversity)
#export(mm_beta_diversity)
export(mm_dissimilarity)

## General
export(`%>%`)
Expand Down
12 changes: 12 additions & 0 deletions R/from_vegan.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,15 @@
xx
}

veganMahatrans <- function (x, s2, tol = sqrt(.Machine$double.eps), na.rm = FALSE)
{
if (missing(s2))
s2 <- cov(x, use = if (na.rm)
"pairwise.complete.obs"
else "all.obs")
e <- eigen(s2, symmetric = TRUE)
k <- e$values > max(tol, tol * e$values[1L])
sisqr <- e$vectors[, k, drop = FALSE] %*% (sqrt(1/e$values[k]) *
t(e$vectors[, k, drop = FALSE]))
x %*% sisqr
}
14 changes: 7 additions & 7 deletions R/mm_alpha_diversity.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@
#' @export
#'
mm_alpha_diversity <- function(data,
site_column,
to_community = TRUE,
index = "shannon",
site_column,
species_column,
to_community = TRUE,
size_column = NULL,
margin = 1) {

Expand Down Expand Up @@ -173,34 +173,34 @@ mm_alpha_diversity <- function(data,
if (i == "simpson") {
# simpson
simpson = apply(data, margin, function(x){
round(sum((x/sum(x, na.rm = TRUE))**2), 3)
sum((x/sum(x, na.rm = TRUE))**2)
})
ind_value <- 1 - simpson
index_list[[i]] <- ind_value
index_list[[i]] <- round(ind_value, 3)

}else if(i == "invsimpson"){
# simpson inverse
simpson = apply(data, margin, function(x){
round(sum((x/sum(x, na.rm = TRUE))**2), 3)
})
ind_value <- 1/simpson
index_list[[i]] <- ind_value
index_list[[i]] <- round(ind_value, 3)

}else if (i == "shannon") {
# shannon
ind_value <- apply(data, margin, function(x){
pi <- (x/sum(x, na.rm = TRUE))
-sum(pi*log(pi), na.rm = TRUE)
})
index_list[[i]] <- ind_value
index_list[[i]] <- round(ind_value, 3)

}else if (i %in% c("evenness", "pielou")) {
shannon <- apply(data, margin, function(x){
pi <- (x/sum(x, na.rm = TRUE))
-sum(pi*log(pi), na.rm = TRUE)
})
ind_value <- shannon/log(ncol(data))
index_list[[i]] <- ind_value
index_list[[i]] <- round(ind_value, 3)
}
}

Expand Down
Empty file added R/mm_beta_diversity.R
Empty file.
181 changes: 181 additions & 0 deletions R/mm_dissimilarity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#' @title Calculate dissimilarity between communities
#'
#' @description
#' The function computes dissimilarity indices that are useful for or popular with
#' community ecologists. All indices use quantitative data, although they would be
#' named by the corresponding binary index, but you can calculate the binary index
#' using an appropriate argument. If you do not find your favourite index here, you
#' can see if it can be implemented using designdist. Gower, Bray–Curtis, Jaccard
#' and Kulczynski indices are good in detecting underlying ecological gradients
#' (Faith et al. 1987). Morisita, Horn–Morisita, Binomial, Cao and Chao indices
#' should be able to handle different sample sizes
#' (Wolda 1981, Krebs 1999, Anderson & Millar 2004), and Mountford (1962) and
#' Raup-Crick indices for presence–absence data should be able to handle unknown
#' (and variable) sample sizes. Most of these indices are discussed by Krebs (1999)
#' and Legendre & Legendre (2012), and their properties further compared by Wolda
#' (1981) and Legendre & De Cáceres (2012). Aitchison (1986) distance is equivalent
#' to Euclidean distance between CLR-transformed samples ("clr") and deals with
#' positive compositional data. Robust Aitchison distance by Martino et al. (2019)
#' uses robust CLR ("rlcr"), making it applicable to non-negative data including
#' zeroes (unlike the standard Aitchison).
#'
#' @param data A data frame or matrix containing the species abundance data. The
#' rows represent sites (or samples), and the columns represent species.
#' The data can be in raw or transformed format (if `to_community = TRUE`).
#' @param to_community A logical indicating whether the input data should be
#' transformed into community data (site in row and species in column).
#' Default is `FALSE`.
#' @param site_column The name of the column representing the site/sample identifiers
#' (only used if `to_community = TRUE`).
#' @param species_column The name of the column representing species identifiers
#' (only used if `to_community = TRUE`).
#' @param size_column The name of the column representing size or abundance counts
#' of each species at each site (optional, used if `to_community = TRUE`).
#' @param method A character string indicating the distance measure to use for
#' calculating beta diversity. The available methods are:
#' \code{"manhattan"}, \code{"euclidean"}, \code{"canberra"}, \code{"bray"},
#' \code{"kulczynski"}, \code{"gower"}, \code{"morisita"}, \code{"horn"},
#' \code{"mountford"}, \code{"jaccard"}, \code{"raup"}, \code{"binomial"},
#' \code{"chao"}, \code{"altGower"}, \code{"cao"}, \code{"mahalanobis"},
#' \code{"clark"}, \code{"chisq"}, \code{"chord"}, \code{"hellinger"},
#' \code{"aitchison"}, \code{"robust.aitchison"}. The default is \code{"bray"}.
#' @param binary A logical indicating whether to transform the data to presence/absence
#' (binary data) before calculating dissimilarities. Default is `FALSE`.
#' @param diag A logical indicating whether to include the diagonal in the output
#' dissimilarity matrix. Default is `FALSE` (diagonal values are omitted).
#' @param upper A logical indicating whether to return only the upper triangular
#' part of the dissimilarity matrix. Default is `FALSE`.
#' @param na.rm A logical indicating whether to remove `NA` values from the data
#' before calculating dissimilarities. Default is `FALSE`. If `FALSE`, an error is raised
#' if there are any missing values in the data.
#' @param ... Additional arguments passed to other functions, such as transformation
#' functions for data scaling or standardization.
#'
#' @return A distance matrix (of class `dist`) containing the pairwise dissimilarities
#' between sites. The dissimilarities are calculated according to the chosen distance
#' metric, and various attributes (e.g., method, size, labels) are attached to the
#' result.
#'
#' @inherit vegan::vegdist details
#'
#' @inherit vegan::vegdist note
#'
#' @inherit vegan::vegdist author
#'
#' @inherit vegan::vegdist references
#'
#'
#' @export

mm_dissimilarity <- function(data,
to_community = FALSE,
site_column,
species_column,
size_column = NULL,
method = "bray",
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = FALSE,
...){

if (to_community) {

site_column <- rlang::ensym(site_column)
species_column <- rlang::enquo(species_column)
size_column <- tryCatch({ rlang::ensym(size_column) }, error = function(e)NULL)

if (!is.null(size_column)) {
data <- transform_index_data(data = data,
site_column = !!site_column,
species_column = !!species_column,
to_community = to_community,
size_column = size_column
)

}else{
data <- transform_index_data(data = data,
site_column = !!site_column,
species_column = !!species_column,
to_community = to_community
)
}


site_name <- data %>% dplyr::pull(site_column)
data <- data %>% dplyr::select(-site_column) %>%
as.matrix()
rownames(data) <- site_name
}else{
data <- as.matrix(data)
}


ZAP <- 1e-15
if (!is.na(pmatch(method, "euclidian"))){
method <- "euclidean"
}
METHODS <- c("manhattan", "euclidean", "canberra", "bray",
"kulczynski", "gower", "morisita", "horn", "mountford",
"jaccard", "raup", "binomial", "chao", "altGower", "cao",
"mahalanobis", "clark", "chisq", "chord", "hellinger",
"aitchison", "robust.aitchison")
method <- pmatch(method, METHODS)
inm <- METHODS[method]
if (is.na(method))
rlang::abort("Invalid distance method", call = NULL)
if (method == -1)
rlang::abort("Ambiguous distance method", call = NULL)

if (!na.rm && anyNA(data))
rlang::abort("Missing values are not allowed if 'na.rm = FALSE'", call = NULL)
if (!(is.numeric(data) || is.logical(data)))
rlang::abort("Input data must be numeric", call = NULL)
if (!method %in% c(1, 2, 6, 16, 18) && any(rowSums(data, na.rm = TRUE) == 0))
rlang::warn(sprintf("Empty rows: their dissimilarities may be meaningless in method %s", dQuote(inm)))
if (!method %in% c(1, 2, 3, 6, 16, 19, 20) && any(data < 0, na.rm = TRUE))
rlang::warn(sprintf("Results may be meaningless because data have negative
entries in method %s", dQuote(inm)))
if (method %in% c(11, 18) && any(colSums(data) == 0, na.rm = TRUE))
rlang::warn(sprintf("Data have empty species which influence the results in method %s",
dQuote(inm)))
if (method == 6)
data <- mm_standardize(data, "range", 2, na.rm = TRUE, ...)
if (method == 16)
data <- veganMahatrans(scale(data, scale = FALSE), na.rm = na.rm)
if (method == 18)
data <- mm_standardize(data, "chi.square", na.rm = na.rm)
if (method == 21)
data <- mm_standardize(data, "clr", ...)
if (method == 22)
data <- mm_standardize(data, "rclr")
if (binary)
data <- mm_standardize(data, "pa")
N <- nrow(data)
if (method %in% c(7, 13, 15) && !identical(all.equal(data, round(data)), TRUE))
rlang::warn(sprintf("Results may be meaningless with non-integer data
in method %s", dQuote(inm)))
d <- .Call(vegan:::do_vegdist, data, as.integer(method))
d[d < ZAP] <- 0
if (any(is.na(d)))
rlang::warn("Missing values in results")
attr(d, "maxdist") <- if (method %in% c(3, 4, 5, 7, 8, 10, 11, 13, 17))
1
else if (method %in% c(19, 20))
sqrt(2)
else if (method == 9)
log(2)
else NA

attr(d, "Size") <- N
attr(d, "Labels") <- dimnames(data)[[1]]
attr(d, "Diag") <- diag
attr(d, "Upper") <- upper
attr(d, "method") <- paste(if (binary)
"binary ", METHODS[method], sep = "")
attr(d, "call") <- match.call()
class(d) <- "dist"


return(d)
}
Loading

0 comments on commit 0d33970

Please sign in to comment.