Skip to content

Commit

Permalink
Merge pull request #460 from antagomir/aitchison
Browse files Browse the repository at this point in the history
Aitchison
  • Loading branch information
jarioksa authored Feb 24, 2022
2 parents 47e9fc3 + 734ed6f commit 710877a
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 36 deletions.
112 changes: 81 additions & 31 deletions R/decostand.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
x <- as.matrix(x)
METHODS <- c("total", "max", "frequency", "normalize", "range", "rank",
"rrank", "standardize", "pa", "chi.square", "hellinger",
"log", "clr", "rclr")
"log", "clr", "rclr", "alr", "ilr")
method <- match.arg(method, METHODS)
if (any(x < 0, na.rm = na.rm)) {
k <- min(x, na.rm = na.rm)
if (method %in% c("total", "frequency", "pa", "chi.square", "rank",
"rrank", "clr", "rclr")) {
"rrank", "clr", "rclr", "alr", "ilr")) {
warning("input data contains negative entries: result may be non-sense\n")
}
}
Expand Down Expand Up @@ -85,18 +85,31 @@
call. = FALSE)
}
x[x > 0 & !is.na(x)] <- log(x[x > 0 & !is.na(x)], base = logbase) + 1

}, alr = {
if (missing(MARGIN))
MARGIN <- 1
if (MARGIN == 1)
x <- .calc_alr(x, ...)
else x <- t(.calc_alr(t(x), ...))
}, ilr = {
if (missing(MARGIN))
MARGIN <- 1
if (MARGIN == 1)
x <- .calc_ilr(x, ...)
else x <- t(.calc_ilr(t(x), ...))
}, clr = {
if (missing(MARGIN))
MARGIN <- 1
if (MARGIN == 1)
x <- t(.calc_clr(t(x), ...))
else x <- .calc_clr(x, ...)
x <- .calc_clr(x, ...)
else x <- t(.calc_clr(t(x), ...))
}, rclr = {
if (missing(MARGIN))
MARGIN <- 1
if (MARGIN == 1)
x <- t(.calc_rclr(t(x)))
else x <- .calc_rclr(x)
x <- .calc_rclr(x, ...)
else x <- t(.calc_rclr(t(x), ...))
})
if (any(is.nan(x)))
warning("result contains NaN, perhaps due to impossible mathematical
Expand All @@ -109,56 +122,93 @@


# Modified from the original version in mia R package
.calc_clr <- function(x, pseudocount=0){
.calc_clr <- function(x, pseudocount=0, na.rm=TRUE){
# Add pseudocount
x <- x + pseudocount
# Calculate relative abundance
x <- .calc_rel_abund(x)
# If there is negative values, gives an error.
# If there are negative values, gives an error.
if (any(x <= 0, na.rm = TRUE)) {
stop("Abundance table contains zero or negative values and ",
"clr-transformation is being applied without (suitable) ",
"pseudocount. \n",
"Try to add pseudocount (default choice pseudocount = 1 for ",
"count assay; or pseudocount = min(x[x>0]) with relabundance ",
"assay).",
call. = FALSE)
"pseudocount. \n")
}
# In every sample, calculates the log of individual entries.
# After that calculates
# the sample-specific mean value and subtracts every entries'
# value with that.
#clog <- t(log(x))
#t(clog - rowMeans(clog))

clog <- log(x)
clogm <- colMeans(clog)
return(t(t(clog) - clogm))
clog - rowMeans(clog)

}

# Modified from the original version in mia R package
.calc_rclr <- function(x){
.calc_rclr <- function(x, na.rm=TRUE){
# If there are negative values, gives an error.
if (any(x < 0, na.rm = na.rm)) {
stop("Abundance table contains negative values. The
rclr transformation assumes non-negative values.\n")
}
# Log transform
log_x <- log(x)
clog <- log(x)
# zeros are converted into infinite values in clr
# They are converted to NAs for now
log_x[is.infinite(log_x)] <- NA
clog[is.infinite(clog)] <- NA
# Calculates means for every sample, does not take NAs into account
mean_log_x <- colMeans(log_x, na.rm = TRUE)
mean_clog <- rowMeans(clog, na.rm = TRUE)
# Calculates exponential values from means, i.e., geometric means
geometric_means_of_samples <- exp(mean_log_x)
geometric_means_of_samples <- exp(mean_clog)
# Divides all values by their sample-wide geometric means
values_divided_by_geom_mean <- t(x)/geometric_means_of_samples
# Does logarithmic transform and transposes the table back to its original
# Then does logarithmic transform and transposes the table back to its original
# form
return_x <- t(log(values_divided_by_geom_mean))
xx <- log(x/geometric_means_of_samples)
# If there were zeros, there are infinite values after logarithmic transform.
# They are converted to zero.
return_x[is.infinite(return_x)] <- 0
return_x
xx[is.infinite(xx)] <- 0
xx
}

# Modified from the original version in mia R package
# Same as decostand method "total" but faster
.calc_rel_abund <- function(x){
sweep(x, 2, colSums(x, na.rm = TRUE), "/")

.calc_alr <- function (x, reference = 1, na.rm=TRUE, pseudocount=0) {
# Add pseudocount
x <- x + pseudocount
# If there is negative values, gives an error.
if (any(x < 0, na.rm = na.rm)) {
stop("Abundance table contains negative values and ",
"alr-transformation is being applied without (suitable) ",
"pseudocount. \n")
}
if (reference > nrow(x))
stop("The reference should be a feature name, or index between 1 to", ncol(x))
clog <- log(x)
clog[, -reference]-clog[, reference]
}



.calc_ilr <- function (x, pseudocount=0) {
# Add pseudocount
x <- x + pseudocount
# If there is negative values, gives an error.
if (any(x < 0, na.rm = TRUE)) {
stop("Abundance table contains negative values and ",
"alr-transformation is being applied without (suitable) ",
"pseudocount. \n")
}

# For a more efficient implementation ideas, check the packages
# compositions and philr for ilrBase
x.ilr <- matrix(NA, nrow(x), ncol(x)-1)
rownames(x.ilr) <- rownames(x)
for (i in seq_len(nrow(x))) {
for (j in seq_len(ncol(x.ilr))) {
x.ilr[i, j] <- -sqrt(j/(j + 1)) * log(((prod(x[i, seq_len(j)]))^(1/j))/x[i, j + 1])
}
}

x.ilr

}


6 changes: 3 additions & 3 deletions R/vegdist.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"kulczynski", "gower", "morisita", "horn", #8
"mountford", "jaccard", "raup", "binomial", "chao", #13
"altGower", "cao", "mahalanobis", "clark", "chisq", "chord", #19
"aitchison", "aitchison_robust") # 21
"Aitchison", "rAitchison") # 21
method <- pmatch(method, METHODS)
inm <- METHODS[method]
if (is.na(method))
Expand Down Expand Up @@ -47,9 +47,9 @@
x <- decostand(x, "chi.square")
if (method == 19) # chord
x <- decostand(x, "normalize")
if (method == 20) # aitchison
if (method == 20) # Aitchison
x <- decostand(x, "clr", ...) # dots to pass possible pseudocount
if (method == 21) # aitchison_robust
if (method == 21) # rAitchison
x <- decostand(x, "rclr") # No pseudocount for rclr
if (binary)
x <- decostand(x, "pa")
Expand Down
39 changes: 37 additions & 2 deletions man/decostand.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,30 @@ wisconsin(x)
\code{\link{vegdist}}), but the standardization can be used
independently of distance indices.

\item \code{alr}: Additive log ratio ("alr") transformation
(Aitchison 1986) reduces data skewness and compositionality
bias. The transformation assumes positive values, pseudocounts
can be added with the argument \code{pseudocount}. One of the
samples is a reference, this sample (name or index) can be given
by \code{reference}. The first sample is used by default
(\code{reference=1}).
Note that this transformation drops one
feature from the transformed output data. The \code{alr}
transformation is defined formally as follows: \deqn{alr =
[log\frac{x_1}{x_D}, ..., log\frac{x_{D-1}}{x_D}]}, where the
denominator sample \deqn{x_D} can be chosen arbitrarily. This
transformation is often used with pH and other chemistry
measurenments. It is also commonly used as multinomial logistic
regression.

\item \code{ilr}: Isometric log ratio ("ilr") transformation
(Aitchison 1986) reduces data skewness and compositionality
bias. The transformation assumes positive values, pseudocounts
can be added with the argument \code{pseudocount}. Note that this
transformation drops one feature from the transformed output
data. For a formal definition, see e.g. Egozcue et al. (2003). The
calculation may be slow for data sets with many samples or features.

\item \code{clr}: centered log ratio ("clr") transformation proposed by
Aitchison (1986) reduces data skewness and compositionality bias.
This transformation has frequent applications in microbial ecology
Expand Down Expand Up @@ -98,7 +122,8 @@ wisconsin(x)
\deqn{rclr = log_{10}\frac{x_{r}}{g(x_{r} > 0)}}{%
rclr = log10(x_r/g(x_r > 0))}
where \eqn{x_{r}} is a single relative value, and \eqn{g(x_{r} > 0)} is geometric
mean of sample-wide relative values that are positive (over 0).
mean of sample-wide relative values that are positive (over 0).

}

Standardization, as contrasted to transformation, means that the
Expand All @@ -125,7 +150,9 @@ wisconsin(x)
\code{"method"}.
}
\author{Jari Oksanen, Etienne \enc{Laliberté}{Laliberte}
(\code{method = "log"}), Leo Lahti (\code{"clr"} and \code{"rclr"}).}
(\code{method = "log"}), Leo Lahti (\code{alr}, \code{ilr},
\code{"clr"} and \code{"rclr"}).}

\note{Common transformations can be made with standard \R functions.}

\references{
Expand All @@ -137,6 +164,11 @@ wisconsin(x)
dispersion as a measure of beta diversity. \emph{Ecology Letters}
\strong{9}, 683--693.

Egozcue, J.J., Pawlowsky-Glahn, V., Mateu-Figueras, G.,
Barcel'o-Vidal, C. (2003) Isometric logratio transformations for
compositional data analysis. \emph{Mathematical Geology}
\strong{35}, 279--300.
Gloor, G.B., Macklaim, J.M., Pawlowsky-Glahn, V. & Egozcue, J.J. (2017)
Microbiome Datasets Are Compositional: And This Is Not Optional.
\emph{Frontiers in Microbiology} \strong{8}, 2224.
Expand Down Expand Up @@ -164,6 +196,9 @@ sptrans <- wisconsin(varespec)
# CLR transformation for rows, with pseudocount
varespec.clr <- decostand(varespec, "clr", pseudocount=1)
# ALR transformation for rows, with pseudocount and reference sample
varespec.alr <- decostand(varespec, "alr", pseudocount=1, reference=1)
## Chi-square: PCA similar but not identical to CA.
## Use wcmdscale for weighted analysis and identical results.
sptrans <- decostand(varespec, "chi.square")
Expand Down
63 changes: 63 additions & 0 deletions tests/aitchison-tests.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Test data
# data(varespec)
testdata <- matrix(round(runif(1000, 0, 100)), nrow=20)
testdata <- testdata - 50
testdata[testdata < 0] <- 0
rownames(testdata) <- paste0("row", seq_len(nrow(testdata)))
colnames(testdata) <- paste0("col", seq_len(ncol(testdata)))

# Calculates relative abundance table
relative <- vegan::decostand(testdata, "total")

# Count and relative data with pseudocount
testdata.with.pseudo <- testdata + 1
relative.with.pseudo <- vegan::decostand(testdata+1, "total")

# Aitchison equals to CLR + Euclid (pseudocount is necessary with clr)
a1 <- vegan::vegdist(testdata+1, method = "Aitchison")
a2 <- vegan::vegdist(vegan::decostand(testdata+1, "clr"), method = "euclidean")
max(abs(a1-a2)) < 1e-6 # Tolerance

# Robust Aitchison equals to rCLR + Euclid
# and works without pseudocount
a1 <- vegan::vegdist(testdata, method = "rAitchison")
a2 <- vegan::vegdist(vegan::decostand(testdata, "rclr"), method = "euclidean")
max(abs(a1-a2)) < 1e-6 # Tolerance

# Robust Aitchison and Aitchison are equal when there are no zeroes
a1 <- vegan::vegdist(testdata.with.pseudo, method = "rAitchison")
a2 <- vegan::vegdist(testdata.with.pseudo, method = "Aitchison")
max(abs(a1-a2)) < 1e-6 # Tolerance

# It is possible to pass pseudocount as a function argument to vegan::decostand
a1 <- vegan::vegdist(testdata, method = "Aitchison", pseudocount=1)
a2 <- vegan::vegdist(testdata+1, method = "Aitchison")
max(abs(a1-a2)) < 1e-6 # Tolerance


# Compare the outcomes with an external package that also provides compositional transformations
# Adding these would demand adding Suggested packages in DESCRIPTION; skipped for now but can be
# useful for manual testing.
#skip <- TRUE
#if (!skip) {
#
# sum(compositions::ilr(testdata.with.pseudo) - vegan::decostand(testdata.with.pseudo, "ilr")) < 1e-6
# sum(compositions::ilr(testdata.with.pseudo) - vegan::decostand(testdata.with.pseudo, "ilr", MARGIN=1)) < 1e-6
# # rgr and compositions packages differ in sign; vegan::decostand is aligned with the "compositions" package
# sum(t(compositions::ilr(t(testdata.with.pseudo))) - (+vegan::decostand(testdata.with.pseudo, "ilr", MARGIN=2))) < 1e-6
# sum(t(rgr::ilr(t(testdata.with.pseudo))) - (-vegan::decostand(testdata.with.pseudo, "ilr", MARGIN=2))) < 1e-6 #
#
# sum(compositions::clr(testdata.with.pseudo) - vegan::decostand(testdata.with.pseudo, "clr")) < 1e-6
# sum(compositions::clr(testdata.with.pseudo) - vegan::decostand(testdata.with.pseudo, "clr", MARGIN=1)) < 1e-6
# sum(t(compositions::clr(t(testdata.with.pseudo))) - vegan::decostand(testdata.with.pseudo, "clr", MARGIN=2)) < 1e-6
# sum(rgr::clr(testdata.with.pseudo) - vegan::decostand(testdata.with.pseudo, "clr"))<1e-6#
#
# sum(compositions::alr(testdata.with.pseudo, ivar=1) - vegan::decostand(testdata.with.pseudo, "alr")) < 1e-6
# sum(compositions::alr(testdata.with.pseudo, ivar=1) - vegan::decostand(testdata.with.pseudo, "alr", MARGIN=1)) < 1e-6
# sum(t(compositions::alr(t(testdata.with.pseudo), ivar=1)) - vegan::decostand(testdata.with.pseudo, "alr", MARGIN=2)) < 1e-6
# sum(rgr::alr(testdata.with.pseudo, j=1) - vegan::decostand(testdata.with.pseudo, "alr", reference=1))<1e-6#
#
#}

# --------------------------------------------------------------------------------------------------------------

Loading

0 comments on commit 710877a

Please sign in to comment.