diff --git a/.gitignore b/.gitignore index ded0e78..b08a346 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# R test files +Rplots.pdf + # History files .Rhistory .Rapp.history diff --git a/DESCRIPTION b/DESCRIPTION index f75d634..64dc885 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: spectratrait Title: A simple add-on package to aid in the fitting of leaf-level spectra-trait PLSR models -Version: 1.0.5 +Version: 1.1.0 Authors@R: c(person(given = "Julien", family = "Lamour", @@ -33,12 +33,13 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Imports: httr (>= 1.4.2), readr (>= 1.3.1), pls (>= 2.7-2), dplyr (>= 1.0.1), + magrittr (>= 2.0.1), reshape2 (>= 1.4.4), here (>= 0.1), plotrix (>= 3.7-8), diff --git a/NAMESPACE b/NAMESPACE index abc135e..ff9a101 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,13 +7,36 @@ export(create_data_split) export(f.coef.valid) export(f.plot.coef) export(f.plot.spec) +export(find_optimal_comp_by_groups) export(find_optimal_components) export(get_ecosis_data) export(percent_rmse) export(pls_permutation) +export(pls_permutation_by_groups) export(source_GitHubData) +import(ggplot2) import(httr) +importFrom(dplyr,all_of) +importFrom(dplyr,group_by_at) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,row_number) +importFrom(dplyr,slice) +importFrom(dplyr,vars) +importFrom(graphics,box) +importFrom(graphics,legend) +importFrom(graphics,lines) +importFrom(graphics,polygon) +importFrom(magrittr,"%>%") importFrom(pls,plsr) +importFrom(pls,selectNcomp) +importFrom(readr,read_csv) +importFrom(reshape2,melt) +importFrom(stats,as.formula) +importFrom(stats,coef) +importFrom(stats,predict) +importFrom(stats,quantile) +importFrom(stats,t.test) importFrom(utils,flush.console) importFrom(utils,read.table) importFrom(utils,setTxtProgressBar) diff --git a/R/create_data_split.R b/R/create_data_split.R index b84efc0..d558af7 100644 --- a/R/create_data_split.R +++ b/R/create_data_split.R @@ -5,15 +5,19 @@ ##' @param split_seed random seed to use for splitting data ##' @param prop the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). ##' This sets the calibration proportion -##' @param group_variables Use factor variables to conduct a stratfied sampling for cal/val +##' @param group_variables Use factor variables to conduct a stratified sampling for cal/val ##' ##' @return output_list A list containing the calibration dataset (cal_data) ##' and validation dataset (val_data) ##' +##' @importFrom magrittr %>% +##' @importFrom dplyr mutate group_by_at slice n vars all_of +##' ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin ##' @export create_data_split <- function(dataset=NULL, approach=NULL, split_seed=123456789, prop=0.8, group_variables=NULL) { + # TODO: import only required functions from dplyr set.seed(split_seed) # outer if/else to stop if approach set to NULL diff --git a/R/f.plot.coef.R b/R/f.plot.coef.R index 0b92d31..9ea0629 100644 --- a/R/f.plot.coef.R +++ b/R/f.plot.coef.R @@ -7,6 +7,9 @@ ##' @param type Name of the y axis and of the legend ##' @param plot_label optional plot label to include with the figure ##' +##' @importFrom stats quantile +##' @importFrom graphics polygon lines legend box +##' ##' @author Julien Lamour ##' @export f.plot.coef <- function( diff --git a/R/f.plot.spec.R b/R/f.plot.spec.R index 264af77..f0798d1 100644 --- a/R/f.plot.spec.R +++ b/R/f.plot.spec.R @@ -7,6 +7,9 @@ ##' @param type Name of the y axis and of the legend. E.g. Reflectance, Transmittance ##' @param plot_label optional plot label to include with the figure ##' +##' @importFrom stats quantile +##' @importFrom graphics polygon lines legend box +##' ##' @author Julien Lamour, Shawn P. Serbin ##' @export f.plot.spec <- function( diff --git a/R/find_optimal_components.R b/R/find_optimal_components.R index e53e544..92a49b4 100644 --- a/R/find_optimal_components.R +++ b/R/find_optimal_components.R @@ -1,6 +1,8 @@ -##' Apply different methods to determing the optimal number of PLSR model components +##' Applies different methods for the determination of the optimal number of PLSR model components ##' ##' @param dataset input full PLSR dataset. Usually just the calibration dataset +##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? +##' Usually the "inVar" variable set at the beginning of a PLS script ##' @param method Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin ##' @param maxComps maximum number of components to consider ##' @param iterations how many different permutations to run @@ -8,23 +10,32 @@ ##' @param prop proportion of data to preserve for each permutation ##' @param random_seed random seed to use for splitting data ##' +##' @importFrom stats as.formula coef predict quantile t.test +##' @importFrom pls plsr selectNcomp +##' @importFrom reshape2 melt +##' @import ggplot2 +##' ##' @return nComps the optimal number of PLSR components ##' ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin ##' @export -find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, iterations=20, seg=100, - prop=0.70, random_seed=123456789) { +find_optimal_components <- function(dataset=NULL, targetVariable=NULL, method="pls", maxComps=20, + iterations=20, seg=100, prop=0.70, random_seed=123456789) { + set.seed(random_seed) + inVar <- targetVariable + print("*** Identifying optimal number of PLSR components ***") + if(method=="pls") { print("*** Running PLS permutation test ***") plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, ncomp=maxComps, validation="CV", segments = seg, segment.type="interleaved", trace=FALSE, - jackknife=TRUE, data=cal.plsr.data) - nComps <- selectNcomp(plsr.out, method = "onesigma", plot = TRUE) + jackknife=TRUE, data=dataset) + nComps <- pls::selectNcomp(plsr.out, method = "onesigma", plot = TRUE) } if(method=="firstPlateau") { - press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps, + press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, iterations=iterations, prop=prop) # PRESS plot pressDF <- as.data.frame(press.out$PRESS) @@ -50,7 +61,7 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite print(bp) } if(method=="firstMin") { - press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps, + press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, iterations=iterations, prop=prop) # PRESS plot pressDF <- as.data.frame(press.out$PRESS) @@ -84,4 +95,103 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite print(bp) } return(nComps) +} + +##' Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, +##' by group (i.e. optimal selection by stratification) +##' +##' @param dataset input full PLSR dataset. Usually just the calibration dataset +##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? +##' Usually the "inVar" variable set at the beginning of a PLS script +##' @param method Which approach to use to find optimal components. Options: firstPlateau, firstMin +##' @param maxComps maximum number of components to consider +##' @param iterations how many different permutations to run +##' @param prop proportion of data to preserve for each permutation +##' @param random_seed random seed to use for splitting data +##' @param group_variables group_variables character vector of the form c("var1", "var2"..."varn") +##' providing the factors used for stratified sampling. +##' +##' @importFrom stats as.formula coef predict quantile t.test +##' @import ggplot2 +##' @importFrom reshape2 melt +##' +##' @return nComps the optimal number of PLSR components +##' +##' @author asierrl, Shawn P. Serbin +##' @export +find_optimal_comp_by_groups <- function (dataset = NULL, targetVariable = NULL, method = "firstPlateau", + maxComps = 20, iterations = 20, prop = 0.7, random_seed = 123456789, + group_variables=NULL) { + set.seed(random_seed) + inVar <- targetVariable + # TODO - really should merge this with the original and have an if/else if not NULL and select either + # pls_permutation OR pls_permutation_by_groups. + print("*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***") + if (method == "pls") { + stop("*** Please select either the firstMin and firstPlateau. The pls package approach is not compatible ***") + } + if (method == "firstPlateau") { + press.out <- spectratrait::pls_permutation_by_groups(dataset=dataset, targetVariable=inVar, + maxComps=maxComps, iterations=iterations, + prop=prop, group_variables=group_variables) + pressDF <- as.data.frame(press.out$PRESS) + names(pressDF) <- as.character(seq(maxComps)) + pressDFres <- reshape2::melt(pressDF) + results <- NULL + for (i in 1:(maxComps - 1)) { + p_value <- t.test(press.out$PRESS[, i], press.out$PRESS[, (i + 1)])$p.value + temp_results <- data.frame(Component = (i + 1), P.value = round(p_value, 6)) + results <- rbind(results, temp_results) + } + nComps <- min(results[results$P.value > 0.05, "Component"]) + print(paste0("*** Optimal number of components based on t.test: ", nComps)) + bp <- ggplot(pressDFres, aes(x = variable, y = value)) + + theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", + y = "PRESS") + + stat_boxplot(geom = "errorbar", width = 0.2) + + geom_vline(xintercept = nComps, linetype = "dashed", + color = "blue", size = 1) + theme(axis.text = element_text(size = 18), legend.position = "none", + axis.title = element_text(size = 20, face = "bold"), + axis.text.x = element_text(angle = 0, vjust = 0.5), + panel.border = element_rect(linetype = "solid", + fill = NA, size = 1.5)) + print(bp) + } + if (method == "firstMin") { + press.out <- spectratrait::pls_permutation_by_groups(dataset = dataset, targetVariable=inVar, + maxComps=maxComps, iterations=iterations, + prop=prop, group_variables=group_variables) + pressDF <- as.data.frame(press.out$PRESS) + names(pressDF) <- as.character(seq(maxComps)) + pressDFres <- reshape2::melt(pressDF) + mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean) + lowest_PRESS <- which.min(mean_PRESS_comp) + results <- as.vector(array(data = "NA", dim = c(lowest_PRESS - 1, 1))) + for (i in seq_along(1:(lowest_PRESS - 1))) { + comp1 <- i + comp2 <- lowest_PRESS + ttest <- t.test(pressDFres$value[which(pressDFres$variable == comp1)], + pressDFres$value[which(pressDFres$variable == comp2)]) + results[i] <- round(unlist(ttest$p.value), 8) + } + results <- data.frame(seq(1, lowest_PRESS - 1, 1), results) + names(results) <- c("Component", "P.value") + first <- min(which(as.numeric(as.character(results$P.value)) > 0.05)) + nComps <- results$Component[first] + print(paste0("*** Optimal number of components based on t.test: ", nComps)) + bp <- ggplot(pressDFres, aes(x = variable, y = value)) + + theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", + y = "PRESS") + + stat_boxplot(geom = "errorbar", width = 0.2) + + geom_vline(xintercept = nComps, linetype = "dashed", + color = "blue", size = 1) + theme(axis.text = element_text(size = 18), legend.position = "none", + axis.title = element_text(size = 20, face = "bold"), + axis.text.x = element_text(angle = 0, vjust = 0.5), + panel.border = element_rect(linetype = "solid", + fill = NA, size = 1.5)) + print(bp) + } + return(nComps) } \ No newline at end of file diff --git a/R/get_ecosis_data.R b/R/get_ecosis_data.R index 5cff000..2973a04 100644 --- a/R/get_ecosis_data.R +++ b/R/get_ecosis_data.R @@ -10,6 +10,8 @@ ##' names(dat_raw)[1:40] ##' } ##' +##' @importFrom readr read_csv +##' ##' @return EcoSIS spectral dataset object ##' ##' @author Shawn P. Serbin, Alexey Shiklomanov diff --git a/R/pls_permutation.R b/R/pls_permutation.R index 8427e67..7ebc79b 100644 --- a/R/pls_permutation.R +++ b/R/pls_permutation.R @@ -4,6 +4,8 @@ ##' See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123 ##' ##' @param dataset input full PLSR dataset. Usually just the calibration dataset +##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? +##' Usually the "inVar" variable set at the beginning of a PLS script ##' @param maxComps maximum number of components to use for each PLSR fit ##' @param iterations how many different permutations to run ##' @param prop proportion of data to preserve for each permutation @@ -17,8 +19,9 @@ ##' ##' @author Julien Lamour, Shawn P. Serbin ##' @export -pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70, - verbose=FALSE) { +pls_permutation <- function(dataset=NULL, targetVariable=NULL, maxComps=20, iterations=20, + prop=0.70, verbose=FALSE) { + inVar <- targetVariable coefs <- array(0,dim=c((ncol(dataset$Spectra)+1),iterations,maxComps)) press.out <- array(data=NA, dim=c(iterations,maxComps)) print("*** Running permutation test. Please hang tight, this can take awhile ***") @@ -27,7 +30,7 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70, "Data Proportion (percent):", prop*100, sep=" ")) if (verbose) { - j <- 1 # <--- Numeric counter for progress bar + j <- 1 pb <- txtProgressBar(min = 0, max = iterations, char="*",width=70,style = 3) } @@ -47,9 +50,9 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70, ### Display progress to console if (verbose) { - setTxtProgressBar(pb, j) # show progress bar - j <- j+1 # <--- increase counter by 1 - flush.console() #<--- show output in real-time + setTxtProgressBar(pb, j) + j <- j+1 + flush.console() } } if (verbose) { @@ -60,4 +63,82 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70, print("*** Providing PRESS and coefficient array output ***") output <- list(PRESS=press.out, coef_array=coefs) return(output) +} + + +##' Run a PLSR model permutation analysis stratified by selected "groups". Can be used to +##' determine the optimal number of components or conduct a boostrap uncertainty analysis +##' +##' @param dataset input full PLSR dataset. Usually just the calibration dataset +##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? +##' Usually the "inVar" variable set at the beginning of a PLS script +##' @param maxComps maximum number of components to use for each PLSR fit +##' @param iterations how many different permutations to run +##' @param prop proportion of data to preserve for each permutation +##' @param verbose Should the function report the current iteration status/progress to the terminal +##' or run silently? TRUE/FALSE. Default FALSE +##' @param group_variables Character vector of the form c("var1", "var2"..."varn") +##' providing the factors used for stratified sampling in the PLSR permutation analysis +##' +##' @return output a list containing the PRESS and coef_array. +##' output <- list(PRESS=press.out, coef_array=coefs) +##' +##' @importFrom magrittr %>% +##' @importFrom dplyr mutate group_by_at slice n row_number +##' @importFrom pls plsr +##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar +##' +##' @author asierrl, Shawn P. Serbin, Julien Lamour +##' @export +##' +pls_permutation_by_groups <- function (dataset = NULL, targetVariable=NULL, maxComps = 20, + iterations = 20, prop = 0.7, group_variables=NULL, + verbose = FALSE) { + inVar <- targetVariable + coefs <- array(0, dim = c((ncol(dataset$Spectra) + 1), iterations, maxComps)) + press.out <- array(data = NA, dim = c(iterations, maxComps)) + print("*** Running permutation test. Please hang tight, this can take awhile ***") + print("Options:") + print(paste("Max Components:", maxComps, "Iterations:", iterations, + "Data Proportion (percent):", prop * 100, sep = " ")) + if (verbose) { + j <- 1 + pb <- utils::txtProgressBar(min = 0, max = iterations, + char = "*", width = 70, style = 3) + } + for (i in seq_along(1:iterations)) { + if (!is.null(group_variables)) { + trainset <- dataset %>% + mutate(int_id=row_number()) %>% + group_by_at(group_variables) %>% + slice(sample(1:n(), prop * n())) + rows <- trainset$int_id + } else { + rows <- sample(1:nrow(dataset), floor(prop * nrow(dataset))) + } + sub.data <- dataset[rows, ] + val.sub.data <- dataset[-rows, ] + plsr.out <- plsr(as.formula(paste(inVar, "~", "Spectra")), + scale = FALSE, center = TRUE, ncomp = maxComps, + validation = "none", + data = sub.data) + pred_val <- predict(plsr.out, newdata = val.sub.data) + sq_resid <- (pred_val[, , ] - val.sub.data[, inVar])^2 + press <- apply(X = sq_resid, MARGIN = 2, FUN = sum) + press.out[i, ] <- press + coefs[, i, ] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps) + rm(rows, sub.data, val.sub.data, plsr.out, pred_val, sq_resid, press) + if (verbose) { + setTxtProgressBar(pb, j) + j <- j + 1 + flush.console() + } + } + if (verbose) { + close(pb) + } + # create a new list with PRESS and permuted coefficients x wavelength x component number + print("*** Providing PRESS and coefficient array output ***") + output <- list(PRESS = press.out, coef_array = coefs) + return(output) } \ No newline at end of file diff --git a/README.md b/README.md index c4c2284..318b2bb 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ https://ecosml.org/package/github/TESTgroup-BNL/spectratrait ### Depends: ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2), -httr (>= 1.4.2), pls (>= 2.7-2), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), +httr (>= 1.4.2), pls (>= 2.7-2), magrittr (>= 2.0.1), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1), knitr ### INSTALL diff --git a/inst/scripts/simple_spectra-trait_plsr_example.R b/inst/scripts/simple_spectra-trait_plsr_example.R index 2b7005a..34a9ed8 100644 --- a/inst/scripts/simple_spectra-trait_plsr_example.R +++ b/inst/scripts/simple_spectra-trait_plsr_example.R @@ -20,8 +20,7 @@ #--------------------------------------------------------------------------------------------------# ### Load libraries -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","here","dplyr","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) #--------------------------------------------------------------------------------------------------# @@ -139,13 +138,14 @@ maxComps <- 20 iterations <- 40 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=plsr_data, method=method, maxComps=maxComps, - seg=seg, random_seed=random_seed) + nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, + random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=plsr_data, method=method, maxComps=maxComps, - iterations=iterations, seg=seg, prop=prop, - random_seed=random_seed) + nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, + seg=seg, prop=prop, random_seed=random_seed) } dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) diff --git a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R index 1f23607..6d3d3b4 100644 --- a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R +++ b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R @@ -15,8 +15,7 @@ #--------------------------------------------------------------------------------------------------# ### Load libraries -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) #--------------------------------------------------------------------------------------------------# @@ -159,16 +158,15 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, - maxComps=maxComps, seg=seg, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, - maxComps=maxComps, iterations=iterations, - seg=seg, prop=prop, - random_seed=random_seed) + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, + seg=seg, prop=prop, random_seed=random_seed) } dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) @@ -304,7 +302,7 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, maxComps=nComps, iterations=iterations, prop=prop, verbose=TRUE) diff --git a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R new file mode 100644 index 0000000..be6cb7d --- /dev/null +++ b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R @@ -0,0 +1,411 @@ +#################################################################################################### +# +# Example "how-to" script illustrating the use of PLSR modeling to develop a +# spectra-trait algorithm to estimate leaf nitrogen content with leaf-level spectroscopy data. +# The example is built from published data source (DOI: https://doi.org/10.1093/jxb/erz061) +# This example illustrates how to select the optimal number of components and quantify model +# prediction uncertainty using bootstrap permutation +# +# Notes: +# * Questions, comments, or concerns can be sent to sserbin@bnl.gov +# * Code is provided under GNU General Public License v3.0 +# +#################################################################################################### + + +#--------------------------------------------------------------------------------------------------# +### Load libraries +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") +invisible(lapply(list.of.packages, library, character.only = TRUE)) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Setup other functions and options +# not in +`%notin%` <- Negate(`%in%`) + +# Script options +pls::pls.options(plsralg = "oscorespls") +pls::pls.options("plsralg") + +# Default par options +opar <- par(no.readonly = T) + +# Specify output directory, output_dir +# Options: +# tempdir - use a OS-specified temporary directory +# user defined PATH - e.g. "~/scratch/PLSR" +output_dir <- "tempdir" +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Load Ely et al 2019 dataset +data("ely_plsr_data") +head(ely_plsr_data)[,1:8] + +# What is the target variable? +inVar <- "N_g_m2" +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Set working directory +if (output_dir=="tempdir") { + outdir <- tempdir() +} else { + if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) + outdir <- file.path(path.expand(output_dir)) +} +setwd(outdir) # set working directory +getwd() # check wd +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### PLSR data +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +head(ely_plsr_data)[1:20] +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Create cal/val datasets +## Make a stratified random sampling in the strata USDA_Species_Code and Domain + +method <- "dplyr" #base/dplyr +# base R - a bit slow +# dplyr - much faster +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=23452135, prop=0.7, + group_variables="Species_Code") +names(split_data) +cal.plsr.data <- split_data$cal_data +head(cal.plsr.data)[1:8] +val.plsr.data <- split_data$val_data +head(val.plsr.data)[1:8] +rm(split_data) + +# Datasets: +print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) +print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) + +cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Cal. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Val. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, + device="png", width = 30, + height = 12, units = "cm", + dpi = 300) +# output cal/val data +write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), + row.names=FALSE) +write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), + row.names=FALSE) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Format PLSR data for model fitting +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(cal_spec)) +head(cal.plsr.data)[1:7] + +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(val_spec)) +head(val.plsr.data)[1:7] +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### plot cal and val spectra +par(mfrow=c(1,2)) # B, L, T, R +spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=seq(Start.wave,End.wave,1), + plot_label="Calibration") +spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=seq(Start.wave,End.wave,1), + plot_label="Validation") + +dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), + height=2500,width=4900, res=340) +dev.off(); +par(mfrow=c(1,1)) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Use permutation to determine the optimal number of components +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel = NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +method <- "firstMin" #firstPlateau, firstMin +random_seed <- 1245565 +seg <- 50 +maxComps <- 20 +iterations <- 80 +prop <- 0.70 +nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, prop=prop, + random_seed=random_seed, + group_variables="Species_Code") +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), + height=2800, width=3400, res=340) +dev.off(); +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Fit final model - using leave-one-out cross validation +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", + trace=FALSE,data=cal.plsr.data) +fit <- plsr.out$fitted.values[,1,nComps] +pls.options(parallel = NULL) + +# External validation fit stats +par(mfrow=c(1,2)) # B, L, T, R +pls::RMSEP(plsr.out, newdata = val.plsr.data) +plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", + xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) + +R2(plsr.out, newdata = val.plsr.data) +plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", + xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), + height=2800, width=4800, res=340) +dev.off(); +par(opar) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### PLSR fit observed vs. predicted plot data +#calibration +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], PLSR_Predicted=fit, + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) +cal.plsr.output <- cal.plsr.output %>% + mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) +head(cal.plsr.output) +cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) +cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) + +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], + PLSR_Predicted=as.vector(predict(plsr.out, + newdata = val.plsr.data, + ncomp=nComps, type="response")[,,1])) +val.plsr.output <- val.plsr.output %>% + mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) +head(val.plsr.output) +val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) +val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) + +rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) +cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", cal.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) +val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", val.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +# plot cal/val side-by-side +scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, + val_resid_histogram, nrow=2,ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), + plot = scatterplots, device="png", width = 32, height = 30, units = "cm", + dpi = 300) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### Generate Coefficient and VIP plots +vips <- spectratrait::VIP(plsr.out)[nComps,] + +par(mfrow=c(2,1)) +plot(plsr.out$coefficients[,,nComps], x=wv,xlab="Wavelength (nm)", + ylab="Regression coefficients",lwd=2,type='l') +box(lwd=2.2) +plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) +lines(seq(Start.wave,End.wave,1),vips,lwd=3) +abline(h=0.8,lty=2,col="dark grey") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), + height=3100, width=4100, res=340) +dev.off(); +par(opar) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel =NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +### PLSR bootstrap permutation uncertainty analysis +iterations <- 500 # how many permutation iterations to run +prop <- 0.70 # fraction of training data to keep for each iteration +plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, + iterations=iterations, + prop=prop, group_variables="Species_Code", + verbose=TRUE) +bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] +bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), + ,nComps] +rm(plsr_permutation) + +# apply coefficients to left-out validation data +interval <- c(0.025,0.975) +Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + + matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, + ncol=length(bootstrap_intercept)) +Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, + probs=c(interval[1], interval[2])) +sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) +sd_res <- sd(val.plsr.output$PLSR_Residuals) +sd_tot <- sqrt(sd_mean^2+sd_res^2) +val.plsr.output$LCI <- Interval_Conf[1,] +val.plsr.output$UCI <- Interval_Conf[2,] +val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot +val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot +head(val.plsr.output) + +# Bootstrap regression coefficient plot +spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = seq(Start.wave,End.wave,1), + plot_label="Bootstrap regression coefficients",position = 'bottomleft') +abline(h=0,lty=2,col="grey50") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), + height=2100, width=3800, res=340) +dev.off(); + +# validation plot +rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, + inVar = inVar, + residuals = val.plsr.output$PLSR_Residuals, + range="full") +RMSEP <- rmsep_percrmsep$rmse +perc_RMSEP <- rmsep_percrmsep$perc_rmse +r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2) +expr <- vector("expression", 3) +expr[[1]] <- bquote(R^2==.(r2)) +expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) +expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) +rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) +par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8) +abline(0,1,lty=2,lw=2) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8, add=T) +legend("topleft", legend=expr, bty="n", cex=1.5) +legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), + lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), + height=2800, width=3200, res=340) +dev.off(); +#--------------------------------------------------------------------------------------------------# + + +#---------------- Output jackknife results --------------------------------------------------------# +# Bootstrap Coefficients +out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), + Intercept=bootstrap_intercept,t(bootstrap_coef)) +names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) +head(out.jk.coefs)[1:6] +write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Bootstrap_PLSR_Coefficients.csv')), + row.names=FALSE) +#--------------------------------------------------------------------------------------------------# + + +#---------------- Export Model Output -------------------------------------------------------------# +print(paste("Output directory: ", getwd())) + +# Observed versus predicted +write.csv(cal.plsr.output,file=file.path(outdir,paste0(inVar,'_Observed_PLSR_CV_Pred_',nComps, + 'comp.csv')),row.names=FALSE) + +# Validation data +write.csv(val.plsr.output,file=file.path(outdir,paste0(inVar,'_Validation_PLSR_Pred_',nComps, + 'comp.csv')),row.names=FALSE) + +# Model coefficients +coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) +write.csv(coefs,file=file.path(outdir,paste0(inVar,'_PLSR_Coefficients_',nComps,'comp.csv')), + row.names=TRUE) + +# PLSR VIP +write.csv(vips,file=file.path(outdir,paste0(inVar,'_PLSR_VIPs_',nComps,'comp.csv'))) + +# confirm files were written to temp space. display a list of the files generated +print("**** PLSR output files: ") +print(list.files(getwd())[grep(pattern = inVar, list.files(getwd()))]) +#--------------------------------------------------------------------------------------------------# + + +#--------------------------------------------------------------------------------------------------# +### EOF \ No newline at end of file diff --git a/inst/scripts/spectra-trait_kit_sla_plsr_example.R b/inst/scripts/spectra-trait_kit_sla_plsr_example.R index ee9a530..c7ef1a3 100644 --- a/inst/scripts/spectra-trait_kit_sla_plsr_example.R +++ b/inst/scripts/spectra-trait_kit_sla_plsr_example.R @@ -20,8 +20,7 @@ #--------------------------------------------------------------------------------------------------# ### Load libraries -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) #--------------------------------------------------------------------------------------------------# @@ -186,12 +185,14 @@ maxComps <- 18 iterations <- 50 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) diff --git a/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R b/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R index 9a6709a..4394b7b 100644 --- a/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R +++ b/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R @@ -222,12 +222,14 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) @@ -371,7 +373,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose=TRUE) bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] diff --git a/inst/scripts/spectra-trait_neon_lma_plsr_example.R b/inst/scripts/spectra-trait_neon_lma_plsr_example.R index b697d09..805c503 100644 --- a/inst/scripts/spectra-trait_neon_lma_plsr_example.R +++ b/inst/scripts/spectra-trait_neon_lma_plsr_example.R @@ -186,12 +186,14 @@ maxComps <- 20 iterations <- 40 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } diff --git a/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R b/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R index 12a7348..6c94335 100644 --- a/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R +++ b/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R @@ -189,14 +189,15 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, maxComps=maxComps, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, maxComps=maxComps, - iterations=iterations, seg=seg, prop=prop, - random_seed=random_seed) + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, seg=seg, prop=prop, + random_seed=random_seed) } dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) @@ -332,7 +333,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose=TRUE) bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] diff --git a/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R b/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R index c8262b3..b878342 100644 --- a/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R +++ b/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R @@ -19,8 +19,7 @@ #--------------------------------------------------------------------------------------------------# ### Load libraries -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) #--------------------------------------------------------------------------------------------------# @@ -188,13 +187,14 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) diff --git a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R index d28063f..a602af9 100644 --- a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R +++ b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R @@ -23,8 +23,7 @@ #--------------------------------------------------------------------------------------------------# ### Step 1. # Load required libraries & spectratrait package -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) #--------------------------------------------------------------------------------------------------# @@ -215,12 +214,14 @@ maxComps <- 16 iterations <- 50 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } diff --git a/man/create_data_split.Rd b/man/create_data_split.Rd index cf8a212..287be8e 100644 --- a/man/create_data_split.Rd +++ b/man/create_data_split.Rd @@ -22,7 +22,7 @@ create_data_split( \item{prop}{the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). This sets the calibration proportion} -\item{group_variables}{Use factor variables to conduct a stratfied sampling for cal/val} +\item{group_variables}{Use factor variables to conduct a stratified sampling for cal/val} } \value{ output_list A list containing the calibration dataset (cal_data) diff --git a/man/find_optimal_comp_by_groups.Rd b/man/find_optimal_comp_by_groups.Rd new file mode 100644 index 0000000..a2dfef5 --- /dev/null +++ b/man/find_optimal_comp_by_groups.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find_optimal_components.R +\name{find_optimal_comp_by_groups} +\alias{find_optimal_comp_by_groups} +\title{Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, +by group (i.e. optimal selection by stratification)} +\usage{ +find_optimal_comp_by_groups( + dataset = NULL, + targetVariable = NULL, + method = "firstPlateau", + maxComps = 20, + iterations = 20, + prop = 0.7, + random_seed = 123456789, + group_variables = NULL +) +} +\arguments{ +\item{dataset}{input full PLSR dataset. Usually just the calibration dataset} + +\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? +Usually the "inVar" variable set at the beginning of a PLS script} + +\item{method}{Which approach to use to find optimal components. Options: firstPlateau, firstMin} + +\item{maxComps}{maximum number of components to consider} + +\item{iterations}{how many different permutations to run} + +\item{prop}{proportion of data to preserve for each permutation} + +\item{random_seed}{random seed to use for splitting data} + +\item{group_variables}{group_variables character vector of the form c("var1", "var2"..."varn") +providing the factors used for stratified sampling.} +} +\value{ +nComps the optimal number of PLSR components +} +\description{ +Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, +by group (i.e. optimal selection by stratification) +} +\author{ +asierrl, Shawn P. Serbin +} diff --git a/man/find_optimal_components.Rd b/man/find_optimal_components.Rd index aabec0a..f66114f 100644 --- a/man/find_optimal_components.Rd +++ b/man/find_optimal_components.Rd @@ -2,10 +2,11 @@ % Please edit documentation in R/find_optimal_components.R \name{find_optimal_components} \alias{find_optimal_components} -\title{Apply different methods to determing the optimal number of PLSR model components} +\title{Applies different methods for the determination of the optimal number of PLSR model components} \usage{ find_optimal_components( dataset = NULL, + targetVariable = NULL, method = "pls", maxComps = 20, iterations = 20, @@ -17,6 +18,9 @@ find_optimal_components( \arguments{ \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} +\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? +Usually the "inVar" variable set at the beginning of a PLS script} + \item{method}{Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin} \item{maxComps}{maximum number of components to consider} @@ -33,7 +37,7 @@ find_optimal_components( nComps the optimal number of PLSR components } \description{ -Apply different methods to determing the optimal number of PLSR model components +Applies different methods for the determination of the optimal number of PLSR model components } \author{ Julien Lamour, Jeremiah Anderson, Shawn P. Serbin diff --git a/man/pls_permutation.Rd b/man/pls_permutation.Rd index d44bc06..88252cd 100644 --- a/man/pls_permutation.Rd +++ b/man/pls_permutation.Rd @@ -7,6 +7,7 @@ or conduct a boostrap uncertainty analysis} \usage{ pls_permutation( dataset = NULL, + targetVariable = NULL, maxComps = 20, iterations = 20, prop = 0.7, @@ -16,6 +17,9 @@ pls_permutation( \arguments{ \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} +\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? +Usually the "inVar" variable set at the beginning of a PLS script} + \item{maxComps}{maximum number of components to use for each PLSR fit} \item{iterations}{how many different permutations to run} diff --git a/man/pls_permutation_by_groups.Rd b/man/pls_permutation_by_groups.Rd new file mode 100644 index 0000000..1736b3c --- /dev/null +++ b/man/pls_permutation_by_groups.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls_permutation.R +\name{pls_permutation_by_groups} +\alias{pls_permutation_by_groups} +\title{Run a PLSR model permutation analysis stratified by selected "groups". Can be used to +determine the optimal number of components or conduct a boostrap uncertainty analysis} +\usage{ +pls_permutation_by_groups( + dataset = NULL, + targetVariable = NULL, + maxComps = 20, + iterations = 20, + prop = 0.7, + group_variables = NULL, + verbose = FALSE +) +} +\arguments{ +\item{dataset}{input full PLSR dataset. Usually just the calibration dataset} + +\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? +Usually the "inVar" variable set at the beginning of a PLS script} + +\item{maxComps}{maximum number of components to use for each PLSR fit} + +\item{iterations}{how many different permutations to run} + +\item{prop}{proportion of data to preserve for each permutation} + +\item{group_variables}{Character vector of the form c("var1", "var2"..."varn") +providing the factors used for stratified sampling in the PLSR permutation analysis} + +\item{verbose}{Should the function report the current iteration status/progress to the terminal +or run silently? TRUE/FALSE. Default FALSE} +} +\value{ +output a list containing the PRESS and coef_array. +output <- list(PRESS=press.out, coef_array=coefs) +} +\description{ +Run a PLSR model permutation analysis stratified by selected "groups". Can be used to +determine the optimal number of components or conduct a boostrap uncertainty analysis +} +\author{ +asierrl, Shawn P. Serbin, Julien Lamour +} diff --git a/spectratrait_1.0.5.pdf b/spectratrait_1.0.5.pdf deleted file mode 100644 index 206eae0..0000000 Binary files a/spectratrait_1.0.5.pdf and /dev/null differ diff --git a/spectratrait_1.1.0.pdf b/spectratrait_1.1.0.pdf new file mode 100644 index 0000000..45a9a52 Binary files /dev/null and b/spectratrait_1.1.0.pdf differ diff --git a/tests/testthat.R b/tests/testthat.R index d7e4be9..2a9c3f1 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,5 +1,4 @@ library(testthat) -library(dplyr) library(spectratrait) -test_check("spectratrait") \ No newline at end of file +testthat::test_check("spectratrait") diff --git a/tests/testthat/test.create_data_split.R b/tests/testthat/test.create_data_split.R index 751e467..2ef3baf 100644 --- a/tests/testthat/test.create_data_split.R +++ b/tests/testthat/test.create_data_split.R @@ -1,4 +1,4 @@ -context("Test that the create data split function has the expected behavior") +context("*** Test that the create data split function has the expected behavior *** ") test_that("Generating a data split using the dplyr approach doesn't throw an error or generate duplicates between cal. and val. data", { plot<- rep(c("plot1", "plot2", "plot3"),each=42) diff --git a/tests/testthat/test.optimal_components.R b/tests/testthat/test.optimal_components.R new file mode 100644 index 0000000..5cccd4f --- /dev/null +++ b/tests/testthat/test.optimal_components.R @@ -0,0 +1,60 @@ +context("*** Test methods for finding optimal number of PLSR components ***") + +### Setup data for tests +#Load Ely et al 2019 dataset +data("ely_plsr_data") +inVar <- "N_g_m2" +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))]) +plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))], + Spectra=I(spec)) +### + +test_that("Finding optimal components using the built-in PLS package approach", { + method <- "pls" + random_seed <- 1245565 + seg <- 50 + maxComps <- 20 + iterations <- 80 + prop <- 0.70 + + nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, + random_seed=random_seed) + expect_gte(nComps, 12) +}) + +test_that("Finding optimal components using the firstMin approach", { + method <- "firstMin" + random_seed <- 1245565 + seg <- 50 + maxComps <- 20 + iterations <- 80 + prop <- 0.70 + + nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, seg=seg, prop=prop, + random_seed=random_seed) + expect_gte(nComps, 12) + +}) + +test_that("Finding optimal components using the firstPlateau approach", { + method <- "firstPlateau" + random_seed <- 1245565 + seg <- 50 + maxComps <- 20 + iterations <- 80 + prop <- 0.70 + + nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, seg=seg, prop=prop, + random_seed=random_seed) + expect_gte(nComps, 12) + +}) \ No newline at end of file diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd index 17a4d0a..d8e60c4 100644 --- a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd +++ b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd @@ -2,9 +2,9 @@ title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: - github_document: default - html_notebook: default pdf_document: default + html_notebook: default + github_document: default html_document: df_print: paged params: @@ -23,8 +23,7 @@ and fit a plsr model for leaf nitrogen content (Narea, g/m2) ### Getting Started ### Load libraries ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -162,13 +161,14 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) @@ -310,7 +310,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose = FALSE) bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.md b/vignettes/ely_leafN_bootstrap_plsr_example.md index 9910579..264df01 100644 --- a/vignettes/ely_leafN_bootstrap_plsr_example.md +++ b/vignettes/ely_leafN_bootstrap_plsr_example.md @@ -16,8 +16,7 @@ nitrogen content (Narea, g/m2) ### Load libraries ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -103,7 +102,7 @@ inVar <- "N_g_m2" ### Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpTADBVi" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpDzC9vA" ### Full PLSR dataset @@ -139,19 +138,19 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method ## HEAN3 Cal: 70% - ## CUSA4 Cal: 68.1818181818182% + ## CUSA4 Cal: 68.182% - ## CUPE Cal: 70.5882352941177% + ## CUPE Cal: 70.588% ## SOLYL Cal: 70% - ## OCBA Cal: 68.4210526315789% + ## OCBA Cal: 68.421% - ## POPUL Cal: 71.4285714285714% + ## POPUL Cal: 71.429% - ## GLMA4 Cal: 70.5882352941177% + ## GLMA4 Cal: 70.588% - ## PHVU Cal: 66.6666666666667% + ## PHVU Cal: 66.667% ``` r names(split_data) @@ -324,19 +323,21 @@ maxComps <- 16 iterations <- 80 prop <- 0.70 if (method=="pls") { - # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running PLS permutation test ***" ![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png) @@ -439,13 +440,13 @@ head(cal.plsr.output) ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 ## 7 CUPE field pumpkin 7.20 11.43007 128.42 25.83 1.215333 ## 10 SOLYL garden tomato 7.89 11.61918 142.23 27.40 1.304110 - ## CalVal PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals - ## 1 Cal 1.820666 1.702501 -0.40119317 - ## 2 Cal 1.609632 1.711772 0.48005882 - ## 4 Cal 1.364985 1.275526 -0.01243687 - ## 6 Cal 1.126062 1.060119 -0.05758587 - ## 7 Cal 1.227538 1.226708 0.01137583 - ## 10 Cal 1.358638 1.365181 0.06107105 + ## PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals + ## 1 1.820666 1.702501 -0.40119317 + ## 2 1.609632 1.711772 0.48005882 + ## 4 1.364985 1.275526 -0.01243687 + ## 6 1.126062 1.060119 -0.05758587 + ## 7 1.227538 1.226708 0.01137583 + ## 10 1.358638 1.365181 0.06107105 ``` r cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) @@ -467,13 +468,13 @@ head(val.plsr.output) ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 - ## CalVal PLSR_Predicted PLSR_Residuals - ## 3 Val 1.7125176 -0.052233917 - ## 5 Val 1.4618447 0.050483171 - ## 8 Val 1.0951891 -0.051652168 - ## 9 Val 1.2152379 0.076220509 - ## 13 Val 0.7992342 -0.149119020 - ## 15 Val 1.1267054 0.002059572 + ## PLSR_Predicted PLSR_Residuals + ## 3 1.7125176 -0.052233917 + ## 5 1.4618447 0.050483171 + ## 8 1.0951891 -0.051652168 + ## 9 1.2152379 0.076220509 + ## 13 0.7992342 -0.149119020 + ## 15 1.1267054 0.002059572 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -593,7 +594,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose = FALSE) ``` @@ -633,13 +635,13 @@ head(val.plsr.output) ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 - ## CalVal PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI - ## 3 Val 1.7125176 -0.052233917 1.5070086 1.8760564 1.2810247 2.144011 - ## 5 Val 1.4618447 0.050483171 1.2909822 1.5475356 1.0541359 1.869553 - ## 8 Val 1.0951891 -0.051652168 0.9595488 1.2335912 0.6846083 1.505770 - ## 9 Val 1.2152379 0.076220509 1.0746965 1.3367675 0.8068229 1.623653 - ## 13 Val 0.7992342 -0.149119020 0.6820207 0.9451323 0.3899050 1.208563 - ## 15 Val 1.1267054 0.002059572 1.0316572 1.2737521 0.7209233 1.532488 + ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 3 1.7125176 -0.052233917 1.5070086 1.8760564 1.2810247 2.144011 + ## 5 1.4618447 0.050483171 1.2909822 1.5475356 1.0541359 1.869553 + ## 8 1.0951891 -0.051652168 0.9595488 1.2335912 0.6846083 1.505770 + ## 9 1.2152379 0.076220509 1.0746965 1.3367675 0.8068229 1.623653 + ## 13 0.7992342 -0.149119020 0.6820207 0.9451323 0.3899050 1.208563 + ## 15 1.1267054 0.002059572 1.0316572 1.2737521 0.7209233 1.532488 ### Jackknife coefficient plot @@ -752,7 +754,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, print(paste("Output directory: ", outdir)) ``` - ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpTADBVi" + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpDzC9vA" ``` r # Observed versus predicted diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_example.pdf index e1963d1..9b41518 100644 Binary files a/vignettes/ely_leafN_bootstrap_plsr_example.pdf and b/vignettes/ely_leafN_bootstrap_plsr_example.pdf differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index 0257f75..615bc51 100644 Binary files a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd new file mode 100644 index 0000000..de22963 --- /dev/null +++ b/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd @@ -0,0 +1,429 @@ +--- +title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. +author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" +output: + github_document: default + html_notebook: default + pdf_document: default + html_document: + df_print: paged +params: + date: !r Sys.Date() +--- + +```{r setup, include=FALSE, echo=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +### Overview +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an +internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, +and fit a plsr model for leaf nitrogen content (Narea, g/m2) + +### Getting Started +### Load libraries +```{r, eval=TRUE, echo=TRUE} +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") +invisible(lapply(list.of.packages, library, character.only = TRUE)) +``` + +### Setup other functions and options +```{r, echo=TRUE} +### Setup other functions and options +# not in +`%notin%` <- Negate(`%in%`) + +# Script options +pls::pls.options(plsralg = "oscorespls") +pls::pls.options("plsralg") + +# Default par options +opar <- par(no.readonly = T) + +# Specify output directory, output_dir +# Options: +# tempdir - use a OS-specified temporary directory +# user defined PATH - e.g. "~/scratch/PLSR" +output_dir <- "tempdir" +``` + +### Load internal Ely et al 2019 dataset +```{r, echo=TRUE} +data("ely_plsr_data") +head(ely_plsr_data)[,1:8] + +# What is the target variable? +inVar <- "N_g_m2" +``` + +### Set working directory (scratch space) +```{r, echo=FALSE} +if (output_dir=="tempdir") { + outdir <- tempdir() +} else { + if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) + outdir <- file.path(path.expand(output_dir)) +} +setwd(outdir) # set working directory +getwd() # check wd +``` + +### Full PLSR dataset +```{r, echo=TRUE} +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +head(plsr_data)[,1:6] +``` +### Create cal/val datasets +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +### Create cal/val datasets +## Make a stratified random sampling in the strata USDA_Species_Code and Domain + +method <- "base" #base/dplyr +# base R - a bit slow +# dplyr - much faster +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=23452135, prop=0.7, + group_variables="Species_Code") +names(split_data) +cal.plsr.data <- split_data$cal_data +head(cal.plsr.data)[1:8] +val.plsr.data <- split_data$val_data +head(val.plsr.data)[1:8] +rm(split_data) + +# Datasets: +print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) +print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) + +cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Cal. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Val. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, + device="png", width = 30, + height = 12, units = "cm", + dpi = 300) +# output cal/val data +write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), + row.names=FALSE) +write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), + row.names=FALSE) +``` + +### Create calibration and validation PLSR datasets +```{r, echo=TRUE} +### Format PLSR data for model fitting +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(cal_spec)) +head(cal.plsr.data)[1:5] + +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(val_spec)) +head(val.plsr.data)[1:5] +``` + +### plot cal and val spectra +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +par(mfrow=c(1,2)) # B, L, T, R +spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") +spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") + +dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), + height=2500,width=4900, res=340) +dev.off(); +par(mfrow=c(1,1)) +``` + +### Use permutation to determine optimal number of components +```{r, fig.height = 6, fig.width = 10, echo=TRUE} +### Use permutation to determine the optimal number of components +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel = NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +method <- "firstMin" #firstPlateau, firstMin +random_seed <- 1245565 +seg <- 50 +maxComps <- 16 +iterations <- 80 +prop <- 0.70 +nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, prop=prop, + random_seed=random_seed, + group_variables="Species_Code") +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), + height=2800, width=3400, res=340) +dev.off(); +``` + +### Fit final model +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", + trace=FALSE,data=cal.plsr.data) +fit <- plsr.out$fitted.values[,1,nComps] +pls.options(parallel = NULL) + +# External validation fit stats +par(mfrow=c(1,2)) # B, L, T, R +pls::RMSEP(plsr.out, newdata = val.plsr.data) +plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", + xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) + +pls::R2(plsr.out, newdata = val.plsr.data) +plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", + xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), + height=2800, width=4800, res=340) +dev.off(); +par(opar) +``` + +### PLSR fit observed vs. predicted plot data +```{r, fig.height = 15, fig.width = 15, echo=TRUE} +#calibration +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], + PLSR_Predicted=fit, + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) +cal.plsr.output <- cal.plsr.output %>% + mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) +head(cal.plsr.output) +cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) +cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) + +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], + PLSR_Predicted=as.vector(predict(plsr.out, + newdata = val.plsr.data, + ncomp=nComps, type="response")[,,1])) +val.plsr.output <- val.plsr.output %>% + mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) +head(val.plsr.output) +val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) +val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) + +rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) +cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", + cal.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) +val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", + val.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +# plot cal/val side-by-side +scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, + val_resid_histogram, nrow=2,ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), + plot = scatterplots, device="png", + width = 32, + height = 30, units = "cm", + dpi = 300) +``` + +### Generate Coefficient and VIP plots +```{r, fig.height = 9, fig.width = 10, echo=TRUE} +vips <- spectratrait::VIP(plsr.out)[nComps,] +par(mfrow=c(2,1)) +plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", + ylab="Regression coefficients",legendpos = "bottomright", + ncomp=nComps,lwd=2) +box(lwd=2.2) +plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) +lines(seq(Start.wave,End.wave,1),vips,lwd=3) +abline(h=0.8,lty=2,col="dark grey") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), + height=3100, width=4100, res=340) +dev.off(); +``` + +### Bootstrap validation +```{r, echo=TRUE} +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel =NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +### PLSR bootstrap permutation uncertainty analysis +iterations <- 500 # how many permutation iterations to run +prop <- 0.70 # fraction of training data to keep for each iteration +plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, + targetVariable=inVar, + maxComps=nComps, + iterations=iterations, + prop=prop, group_variables="Species_Code", + verbose=FALSE) +bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] +bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), + ,nComps] +rm(plsr_permutation) + +# apply coefficients to left-out validation data +interval <- c(0.025,0.975) +Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + + matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, + ncol=length(bootstrap_intercept)) +Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, + probs=c(interval[1], interval[2])) +sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) +sd_res <- sd(val.plsr.output$PLSR_Residuals) +sd_tot <- sqrt(sd_mean^2+sd_res^2) +val.plsr.output$LCI <- Interval_Conf[1,] +val.plsr.output$UCI <- Interval_Conf[2,] +val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot +val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot +head(val.plsr.output) +``` + +### Jackknife coefficient plot +```{r, fig.height = 6, fig.width = 10, echo=TRUE} +# Bootstrap regression coefficient plot +spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, + plot_label="Bootstrap regression coefficients",position = 'bottomleft') +abline(h=0,lty=2,col="grey50") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), + height=2100, width=3800, res=340) +dev.off(); +``` + +### Bootstrap validation plot +```{r, fig.height = 7, fig.width = 8, echo=TRUE} +rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, + inVar = inVar, + residuals = val.plsr.output$PLSR_Residuals, + range="full") +RMSEP <- rmsep_percrmsep$rmse +perc_RMSEP <- rmsep_percrmsep$perc_rmse +r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2) +expr <- vector("expression", 3) +expr[[1]] <- bquote(R^2==.(r2)) +expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) +expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) +rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) +par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8) +abline(0,1,lty=2,lw=2) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8, add=T) +legend("topleft", legend=expr, bty="n", cex=1.5) +legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), + lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), + height=2800, width=3200, res=340) +dev.off(); +``` + +### Output bootstrap results +```{r, echo=TRUE} +# Bootstrap Coefficients +out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), + Intercept=bootstrap_intercept,t(bootstrap_coef)) +names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) +head(out.jk.coefs)[1:6] +write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, + '_Bootstrap_PLSR_Coefficients.csv')), + row.names=FALSE) +``` + +### Create core PLSR outputs +```{r, echo=TRUE} +print(paste("Output directory: ", outdir)) + +# Observed versus predicted +write.csv(cal.plsr.output,file=file.path(outdir, + paste0(inVar,'_Observed_PLSR_CV_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Validation data +write.csv(val.plsr.output,file=file.path(outdir, + paste0(inVar,'_Validation_PLSR_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Model coefficients +coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) +write.csv(coefs,file=file.path(outdir, + paste0(inVar,'_PLSR_Coefficients_', + nComps,'comp.csv')), + row.names=TRUE) + +# PLSR VIP +write.csv(vips,file=file.path(outdir, + paste0(inVar,'_PLSR_VIPs_', + nComps,'comp.csv'))) +``` + +### Confirm files were written to temp space +```{r, echo=TRUE} +print("**** PLSR output files: ") +print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) +``` diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.md b/vignettes/ely_leafN_bootstrap_plsr_grp_example.md new file mode 100644 index 0000000..b94b86f --- /dev/null +++ b/vignettes/ely_leafN_bootstrap_plsr_grp_example.md @@ -0,0 +1,810 @@ +Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen +content (Narea, g/m2) data from eight different crop species growing in +a glasshouse at Brookhaven National Laboratory. +================ +Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson + +### Overview + +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to +illustrate how to load an internal dataset (“ely\_plsr\_data”), choose +the “optimal” number of plsr components, and fit a plsr model for leaf +nitrogen content (Narea, g/m2) + +### Getting Started + +### Load libraries + +``` r +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") +invisible(lapply(list.of.packages, library, character.only = TRUE)) +``` + + ## + ## Attaching package: 'pls' + + ## The following object is masked from 'package:stats': + ## + ## loadings + + ## + ## Attaching package: 'dplyr' + + ## The following objects are masked from 'package:stats': + ## + ## filter, lag + + ## The following objects are masked from 'package:base': + ## + ## intersect, setdiff, setequal, union + + ## here() starts at /Users/sserbin/Data/GitHub/spectratrait + + ## + ## Attaching package: 'gridExtra' + + ## The following object is masked from 'package:dplyr': + ## + ## combine + +### Setup other functions and options + +``` r +### Setup other functions and options +# not in +`%notin%` <- Negate(`%in%`) + +# Script options +pls::pls.options(plsralg = "oscorespls") +pls::pls.options("plsralg") +``` + + ## $plsralg + ## [1] "oscorespls" + +``` r +# Default par options +opar <- par(no.readonly = T) + +# Specify output directory, output_dir +# Options: +# tempdir - use a OS-specified temporary directory +# user defined PATH - e.g. "~/scratch/PLSR" +output_dir <- "tempdir" +``` + +### Load internal Ely et al 2019 dataset + +``` r +data("ely_plsr_data") +head(ely_plsr_data)[,1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 3 HEAN3 common sunflower 7.70 15.02495 156.95 35.08 1.764752 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 5 CUSA4 garden cucumber 7.47 11.60735 123.58 26.71 1.411361 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## Wave_500 + ## 1 4.782000 + ## 2 4.341714 + ## 3 4.502857 + ## 4 3.333429 + ## 5 3.313571 + ## 6 3.272286 + +``` r +# What is the target variable? +inVar <- "N_g_m2" +``` + +### Set working directory (scratch space) + + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1HGXY2" + +### Full PLSR dataset + +``` r +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +head(plsr_data)[,1:6] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 + ## 3 HEAN3 common sunflower 7.70 15.02495 156.95 35.08 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 + ## 5 CUSA4 garden cucumber 7.47 11.60735 123.58 26.71 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 + +### Create cal/val datasets + +``` r +### Create cal/val datasets +## Make a stratified random sampling in the strata USDA_Species_Code and Domain + +method <- "base" #base/dplyr +# base R - a bit slow +# dplyr - much faster +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=23452135, prop=0.7, + group_variables="Species_Code") +``` + + ## HEAN3 Cal: 70% + + ## CUSA4 Cal: 68.182% + + ## CUPE Cal: 70.588% + + ## SOLYL Cal: 70% + + ## OCBA Cal: 68.421% + + ## POPUL Cal: 71.429% + + ## GLMA4 Cal: 70.588% + + ## PHVU Cal: 66.667% + +``` r +names(split_data) +``` + + ## [1] "cal_data" "val_data" + +``` r +cal.plsr.data <- split_data$cal_data +head(cal.plsr.data)[1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 25.83 1.215333 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 27.40 1.304110 + ## Wave_500 + ## 1 4.782000 + ## 2 4.341714 + ## 4 3.333429 + ## 6 3.272286 + ## 7 2.943143 + ## 10 4.145714 + +``` r +val.plsr.data <- split_data$val_data +head(val.plsr.data)[1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## Wave_500 + ## 3 4.502857 + ## 5 3.313571 + ## 8 2.868000 + ## 9 3.338286 + ## 13 3.960286 + ## 15 3.744000 + +``` r +rm(split_data) + +# Datasets: +print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) +``` + + ## [1] "Cal observations: 124" + +``` r +print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) +``` + + ## [1] "Val observations: 54" + +``` r +cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Cal. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Val. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) +``` + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png) + +``` r +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, + device="png", width = 30, + height = 12, units = "cm", + dpi = 300) +# output cal/val data +write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), + row.names=FALSE) +write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), + row.names=FALSE) +``` + +### Create calibration and validation PLSR datasets + +``` r +### Format PLSR data for model fitting +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(cal_spec)) +head(cal.plsr.data)[1:5] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 + +``` r +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(val_spec)) +head(val.plsr.data)[1:5] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 + +### plot cal and val spectra + +``` r +par(mfrow=c(1,2)) # B, L, T, R +spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") +spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") +``` + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), + height=2500,width=4900, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +``` r +par(mfrow=c(1,1)) +``` + +### Use permutation to determine optimal number of components + +``` r +### Use permutation to determine the optimal number of components +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel = NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +method <- "firstMin" #firstPlateau, firstMin +random_seed <- 1245565 +seg <- 50 +maxComps <- 16 +iterations <- 80 +prop <- 0.70 +nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, + iterations=iterations, prop=prop, + random_seed=random_seed, + group_variables="Species_Code") +``` + + ## [1] "*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***" + ## [1] "*** Running permutation test. Please hang tight, this can take awhile ***" + ## [1] "Options:" + ## [1] "Max Components: 16 Iterations: 80 Data Proportion (percent): 70" + ## [1] "*** Providing PRESS and coefficient array output ***" + + ## No id variables; using all as measure variables + + ## [1] "*** Optimal number of components based on t.test: 15" + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), + height=2800, width=3400, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Fit final model + +``` r +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", + trace=FALSE,data=cal.plsr.data) +fit <- plsr.out$fitted.values[,1,nComps] +pls.options(parallel = NULL) + +# External validation fit stats +par(mfrow=c(1,2)) # B, L, T, R +pls::RMSEP(plsr.out, newdata = val.plsr.data) +``` + + ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps + ## 0.5908 0.4735 0.4162 0.4037 0.3347 0.3023 + ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps + ## 0.2993 0.3081 0.2814 0.2445 0.2276 0.2104 + ## 12 comps 13 comps 14 comps 15 comps + ## 0.1954 0.2003 0.1973 0.2108 + +``` r +plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", + xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) + +pls::R2(plsr.out, newdata = val.plsr.data) +``` + + ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps + ## -0.004079 0.355010 0.501632 0.531088 0.677620 0.737143 + ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps + ## 0.742224 0.726835 0.772115 0.827942 0.850962 0.872685 + ## 12 comps 13 comps 14 comps 15 comps + ## 0.890124 0.884529 0.887961 0.872129 + +``` r +plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", + xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), + height=2800, width=4800, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +``` r +par(opar) +``` + +### PLSR fit observed vs. predicted plot data + +``` r +#calibration +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], + PLSR_Predicted=fit, + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) +cal.plsr.output <- cal.plsr.output %>% + mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) +head(cal.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 25.83 1.215333 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 27.40 1.304110 + ## PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals + ## 1 1.836047 1.714086 -0.38960842 + ## 2 1.530813 1.685388 0.45367526 + ## 4 1.254794 1.262835 -0.02512724 + ## 6 1.127053 1.129340 0.01163542 + ## 7 1.196259 1.188471 -0.02686200 + ## 10 1.276380 1.281683 -0.02242624 + +``` r +cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) +cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) + +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], + PLSR_Predicted=as.vector(predict(plsr.out, + newdata = val.plsr.data, + ncomp=nComps, type="response")[,,1])) +val.plsr.output <- val.plsr.output %>% + mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) +head(val.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## PLSR_Predicted PLSR_Residuals + ## 3 1.7624701 -0.002281391 + ## 5 1.2947218 -0.116639722 + ## 8 0.9934199 -0.153421396 + ## 9 1.1345273 -0.004490078 + ## 13 0.7432855 -0.205067758 + ## 15 1.1613789 0.036733007 + +``` r +val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) +val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) + +rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) +cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", + cal.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) +val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", + val.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +# plot cal/val side-by-side +scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, + val_resid_histogram, nrow=2,ncol=2) +``` + + ## Warning: Removed 5 rows containing missing values (geom_point). + + ## Warning: Removed 4 rows containing missing values (geom_point). + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png) + +``` r +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), + plot = scatterplots, device="png", + width = 32, + height = 30, units = "cm", + dpi = 300) +``` + +### Generate Coefficient and VIP plots + +``` r +vips <- spectratrait::VIP(plsr.out)[nComps,] +par(mfrow=c(2,1)) +plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", + ylab="Regression coefficients",legendpos = "bottomright", + ncomp=nComps,lwd=2) +box(lwd=2.2) +plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) +lines(seq(Start.wave,End.wave,1),vips,lwd=3) +abline(h=0.8,lty=2,col="dark grey") +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), + height=3100, width=4100, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Bootstrap validation + +``` r +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel =NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +### PLSR bootstrap permutation uncertainty analysis +iterations <- 500 # how many permutation iterations to run +prop <- 0.70 # fraction of training data to keep for each iteration +plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, + targetVariable=inVar, + maxComps=nComps, + iterations=iterations, + prop=prop, group_variables="Species_Code", + verbose=FALSE) +``` + + ## [1] "*** Running permutation test. Please hang tight, this can take awhile ***" + ## [1] "Options:" + ## [1] "Max Components: 15 Iterations: 500 Data Proportion (percent): 70" + ## [1] "*** Providing PRESS and coefficient array output ***" + +``` r +bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] +bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), + ,nComps] +rm(plsr_permutation) + +# apply coefficients to left-out validation data +interval <- c(0.025,0.975) +Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + + matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, + ncol=length(bootstrap_intercept)) +Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, + probs=c(interval[1], interval[2])) +sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) +sd_res <- sd(val.plsr.output$PLSR_Residuals) +sd_tot <- sqrt(sd_mean^2+sd_res^2) +val.plsr.output$LCI <- Interval_Conf[1,] +val.plsr.output$UCI <- Interval_Conf[2,] +val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot +val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot +head(val.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 3 1.7624701 -0.002281391 1.5710330 1.9443661 1.3151243 2.209816 + ## 5 1.2947218 -0.116639722 1.2019841 1.4531979 0.8688563 1.720587 + ## 8 0.9934199 -0.153421396 0.8544582 1.1646561 0.5564158 1.430424 + ## 9 1.1345273 -0.004490078 0.9954061 1.2824287 0.7007745 1.568280 + ## 13 0.7432855 -0.205067758 0.5836738 0.9094675 0.3042086 1.182362 + ## 15 1.1613789 0.036733007 1.0021191 1.2849671 0.7291004 1.593657 + +### Jackknife coefficient plot + +``` r +# Bootstrap regression coefficient plot +spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, + plot_label="Bootstrap regression coefficients",position = 'bottomleft') +abline(h=0,lty=2,col="grey50") +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), + height=2100, width=3800, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Bootstrap validation plot + +``` r +rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, + inVar = inVar, + residuals = val.plsr.output$PLSR_Residuals, + range="full") +RMSEP <- rmsep_percrmsep$rmse +perc_RMSEP <- rmsep_percrmsep$perc_rmse +r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2) +expr <- vector("expression", 3) +expr[[1]] <- bquote(R^2==.(r2)) +expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) +expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) +rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) +par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8) +abline(0,1,lty=2,lw=2) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8, add=T) +legend("topleft", legend=expr, bty="n", cex=1.5) +legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), + lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), + height=2800, width=3200, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Output bootstrap results + +``` r +# Bootstrap Coefficients +out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), + Intercept=bootstrap_intercept,t(bootstrap_coef)) +names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) +head(out.jk.coefs)[1:6] +``` + + ## Iteration Intercept Wave_500 Wave_501 Wave_502 Wave_503 + ## 1 1 0.4731951 0.0236618987 0.021719096 0.023063691 0.02187741 + ## 2 2 0.5415203 -0.0007012397 0.001892634 0.008241293 0.01105366 + ## 3 3 0.6512533 0.0123054098 0.013428257 0.015824665 0.01772586 + ## 4 4 -0.9976728 0.0145306759 0.016119715 0.018834952 0.01959049 + ## 5 5 0.1267626 0.0076041315 0.007329090 0.009971693 0.01339406 + ## 6 6 0.8509641 0.0139793124 0.015195593 0.015170417 0.01434085 + +``` r +write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, + '_Bootstrap_PLSR_Coefficients.csv')), + row.names=FALSE) +``` + +### Create core PLSR outputs + +``` r +print(paste("Output directory: ", outdir)) +``` + + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmp1HGXY2" + +``` r +# Observed versus predicted +write.csv(cal.plsr.output,file=file.path(outdir, + paste0(inVar,'_Observed_PLSR_CV_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Validation data +write.csv(val.plsr.output,file=file.path(outdir, + paste0(inVar,'_Validation_PLSR_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Model coefficients +coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) +write.csv(coefs,file=file.path(outdir, + paste0(inVar,'_PLSR_Coefficients_', + nComps,'comp.csv')), + row.names=TRUE) + +# PLSR VIP +write.csv(vips,file=file.path(outdir, + paste0(inVar,'_PLSR_VIPs_', + nComps,'comp.csv'))) +``` + +### Confirm files were written to temp space + +``` r +print("**** PLSR output files: ") +``` + + ## [1] "**** PLSR output files: " + +``` r +print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) +``` + + ## [1] "N_g_m2_Bootstrap_PLSR_Coefficients.csv" + ## [2] "N_g_m2_Bootstrap_Regression_Coefficients.png" + ## [3] "N_g_m2_Cal_PLSR_Dataset.csv" + ## [4] "N_g_m2_Cal_Val_Histograms.png" + ## [5] "N_g_m2_Cal_Val_Scatterplots.png" + ## [6] "N_g_m2_Cal_Val_Spectra.png" + ## [7] "N_g_m2_Coefficient_VIP_plot.png" + ## [8] "N_g_m2_Observed_PLSR_CV_Pred_15comp.csv" + ## [9] "N_g_m2_PLSR_Coefficients_15comp.csv" + ## [10] "N_g_m2_PLSR_Component_Selection.png" + ## [11] "N_g_m2_PLSR_Validation_Scatterplot.png" + ## [12] "N_g_m2_PLSR_VIPs_15comp.csv" + ## [13] "N_g_m2_Val_PLSR_Dataset.csv" + ## [14] "N_g_m2_Validation_PLSR_Pred_15comp.csv" + ## [15] "N_g_m2_Validation_RMSEP_R2_by_Component.png" diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf new file mode 100644 index 0000000..e8f358f Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png new file mode 100644 index 0000000..bab716c Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png new file mode 100644 index 0000000..3aa89fe Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png new file mode 100644 index 0000000..cc008ff Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png new file mode 100644 index 0000000..8d09d49 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png new file mode 100644 index 0000000..12162fd Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png new file mode 100644 index 0000000..6d79827 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png new file mode 100644 index 0000000..4452988 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png new file mode 100644 index 0000000..e435ae9 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/kit_sla_plsr_example.Rmd b/vignettes/kit_sla_plsr_example.Rmd index 6151db8..81851b0 100644 --- a/vignettes/kit_sla_plsr_example.Rmd +++ b/vignettes/kit_sla_plsr_example.Rmd @@ -2,9 +2,9 @@ title: Spectra-trait PLSR example using leaf-level spectra and specific leaf area (SLA) data from more than 40 species grassland species comprising both herbs and graminoids. author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: - github_document: default - html_notebook: default pdf_document: default + html_notebook: default + github_document: default html_document: df_print: paged params: @@ -110,9 +110,7 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method prop=0.8, group_variables="Plant_Species") names(split_data) cal.plsr.data <- split_data$cal_data -head(cal.plsr.data)[1:8] val.plsr.data <- split_data$val_data -head(val.plsr.data)[1:8] rm(split_data) # Datasets: @@ -181,12 +179,14 @@ iterations <- 50 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data,method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, diff --git a/vignettes/kit_sla_plsr_example.md b/vignettes/kit_sla_plsr_example.md index 11c7cf8..aa72fc2 100644 --- a/vignettes/kit_sla_plsr_example.md +++ b/vignettes/kit_sla_plsr_example.md @@ -85,7 +85,7 @@ output_dir <- "tempdir" ### Set working directory (scratch space) - ## [1] "Output directory: /private/var/folders/6h/r2g9xpxj2xq5xt1dn3cn5g800000gn/T/Rtmp0jgwQR" + ## [1] "Output directory: /private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp952NtZ" ### Grab data from EcoSIS @@ -104,16 +104,15 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) ## Downloading data... - ## Rows: 739 Columns: 2114 - - ## ── Column specification ──────────────────────────────────────────────────────── - ## Delimiter: "," - ## chr (13): Anthocyanin concentration (mg/g), Anthocyanin content ( g/cm ), ... - ## dbl (2101): 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412,... - ## - ## ℹ Use `spec()` to retrieve the full column specification for this data. - ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. + ## ── Column specification ──────────────────────────────────────────────────────── + ## cols( + ## .default = col_double(), + ## `growth form` = col_character(), + ## species = col_character(), + ## timestamp = col_character() + ## ) + ## ℹ Use `spec()` for the full column specifications. ## Download complete! @@ -123,32 +122,20 @@ head(dat_raw) ## # A tibble: 6 × 2,114 ## `Anthocyanin concen… `Anthocyanin cont… `Carotenoid concen… `Carotenoid conte… - ## - ## 1 0.00106305 0.996765974 0.00799017 7.491957938 - ## 2 0.003571021 1.217297195 0.022081567 7.527211759 - ## 3 0.002517379 1.142859188 0.018826449 8.546976036 - ## 4 0.003102353 2.262477235 0.015835418 11.54841829 - ## 5 0.004116414 1.733783943 0.021558342 9.080114754 - ## 6 0.003965355 1.021371167 0.033632402 8.662821832 - ## # … with 2,110 more variables: Chlorophyll concentration (mg/g) , - ## # Chlorophyll content ( g/cm ) , LDMC (g/g) , LFA (mg/cm ) , - ## # LWC (mg/cm ) , SLA (g/cm ) , growth form , species , + ## + ## 1 0.00106 0.997 0.00799 7.49 + ## 2 0.00357 1.22 0.0221 7.53 + ## 3 0.00252 1.14 0.0188 8.55 + ## 4 0.00310 2.26 0.0158 11.5 + ## 5 0.00412 1.73 0.0216 9.08 + ## 6 0.00397 1.02 0.0336 8.66 + ## # … with 2,110 more variables: Chlorophyll concentration (mg/g) , + ## # Chlorophyll content ( g/cm ) , LDMC (g/g) , LFA (mg/cm ) , + ## # LWC (mg/cm ) , SLA (g/cm ) , growth form , species , ## # timestamp , 400 , 401 , 402 , 403 , 404 , ## # 405 , 406 , 407 , 408 , 409 , 410 , ## # 411 , 412 , 413 , 414 , 415 , 416 , - ## # 417 , 418 , 419 , 420 , 421 , 422 , - ## # 423 , 424 , 425 , 426 , 427 , 428 , - ## # 429 , 430 , 431 , 432 , 433 , 434 , - ## # 435 , 436 , 437 , 438 , 439 , 440 , - ## # 441 , 442 , 443 , 444 , 445 , 446 , - ## # 447 , 448 , 449 , 450 , 451 , 452 , - ## # 453 , 454 , 455 , 456 , 457 , 458 , - ## # 459 , 460 , 461 , 462 , 463 , 464 , - ## # 465 , 466 , 467 , 468 , 469 , 470 , - ## # 471 , 472 , 473 , 474 , 475 , 476 , - ## # 477 , 478 , 479 , 480 , 481 , 482 , - ## # 483 , 484 , 485 , 486 , 487 , 488 , - ## # 489 , 490 , … + ## # 417 , 418 , 419 , 420 , 421 , 422 , … ``` r names(dat_raw)[1:40] @@ -190,16 +177,16 @@ head(sample_info) ## # A tibble: 6 × 13 ## `Anthocyanin concen… `Anthocyanin cont… `Carotenoid concen… `Carotenoid conte… - ## - ## 1 0.00106305 0.996765974 0.00799017 7.491957938 - ## 2 0.003571021 1.217297195 0.022081567 7.527211759 - ## 3 0.002517379 1.142859188 0.018826449 8.546976036 - ## 4 0.003102353 2.262477235 0.015835418 11.54841829 - ## 5 0.004116414 1.733783943 0.021558342 9.080114754 - ## 6 0.003965355 1.021371167 0.033632402 8.662821832 - ## # … with 9 more variables: Chlorophyll concentration (mg/g) , - ## # Chlorophyll content ( g/cm ) , LDMC (g/g) , LFA (mg/cm ) , - ## # LWC (mg/cm ) , SLA (g/cm ) , growth form , species , + ## + ## 1 0.00106 0.997 0.00799 7.49 + ## 2 0.00357 1.22 0.0221 7.53 + ## 3 0.00252 1.14 0.0188 8.55 + ## 4 0.00310 2.26 0.0158 11.5 + ## 5 0.00412 1.73 0.0216 9.08 + ## 6 0.00397 1.02 0.0336 8.66 + ## # … with 9 more variables: Chlorophyll concentration (mg/g) , + ## # Chlorophyll content ( g/cm ) , LDMC (g/g) , LFA (mg/cm ) , + ## # LWC (mg/cm ) , SLA (g/cm ) , growth form , species , ## # timestamp ``` r @@ -346,45 +333,7 @@ names(split_data) ``` r cal.plsr.data <- split_data$cal_data -head(cal.plsr.data)[1:8] -``` - - ## Plant_Species Growth_Form timestamp SLA_g_cm Wave_500 - ## 1 Calamagrostis epigejos graminoid 5/25/2016 12:20 106.6500 0.09180559 - ## 2 Anthoxanthum odoratum graminoid 5/27/2016 8:40 293.3565 0.09022668 - ## 3 Alopecurus pratensis graminoid 5/27/2016 9:23 220.2703 0.07998340 - ## 4 Festuca ovina graminoid 5/27/2016 9:23 137.1220 0.05205080 - ## 5 Agrostis capillaris graminoid 5/27/2016 9:42 237.4237 0.06695127 - ## 6 Aegopodium podagraria forb 5/25/2016 12:20 388.2384 0.04091566 - ## Wave_501 Wave_502 Wave_503 - ## 1 0.09293251 0.09417092 0.09552863 - ## 2 0.09125158 0.09237300 0.09359694 - ## 3 0.08109460 0.08231389 0.08365015 - ## 4 0.05256869 0.05314560 0.05378355 - ## 5 0.06766205 0.06845248 0.06932220 - ## 6 0.04169865 0.04257613 0.04355737 - -``` r val.plsr.data <- split_data$val_data -head(val.plsr.data)[1:8] -``` - - ## Plant_Species Growth_Form timestamp SLA_g_cm Wave_500 - ## 9 Urtica dioica forb 5/25/2016 12:37 284.6788 0.04716736 - ## 15 Stellaria media forb 5/25/2016 13:21 418.4284 0.05694278 - ## 23 Alopecurus pratensis graminoid 6/1/2016 11:32 218.2117 0.08135086 - ## 44 Alopecurus pratensis graminoid 6/8/2016 8:37 216.7568 0.10062342 - ## 46 Agrostis capillaris graminoid 6/8/2016 9:05 231.5292 0.08099724 - ## 47 Aegopodium podagraria forb 6/7/2016 9:05 311.4018 0.03778815 - ## Wave_501 Wave_502 Wave_503 - ## 9 0.04781633 0.04854276 0.04935320 - ## 15 0.05811729 0.05940497 0.06080936 - ## 23 0.08249180 0.08373915 0.08509719 - ## 44 0.10190706 0.10330054 0.10480538 - ## 46 0.08178586 0.08265099 0.08360108 - ## 47 0.03845043 0.03919155 0.04001581 - -``` r rm(split_data) # Datasets: @@ -6493,7 +6442,7 @@ head(val.plsr.data)[1:5] ## 23 0.13114422 0.13173930 0.13233132 0.13292035 ## 44 0.14035330 0.14102638 0.14169735 0.14236617 ## 46 0.13576063 0.13641384 0.13706527 0.13771485 - ## 47 0.08674581 0.08727845 0.08781028 0.08834134 + ## 47 0.08674581 0.08727845 0.08781028 0.08834133 ## Spectra.Wave_2049 Spectra.Wave_2050 Spectra.Wave_2051 Spectra.Wave_2052 ## 9 0.11252288 0.11318057 0.11383750 0.11449413 ## 15 0.05869478 0.05920211 0.05971230 0.06022535 @@ -6743,7 +6692,7 @@ head(val.plsr.data)[1:5] ## 9 0.17459234 0.17471639 0.17483767 0.17495602 ## 15 0.11900502 0.11910486 0.11920045 0.11929147 ## 23 0.18956770 0.18964676 0.18972298 0.18979607 - ## 44 0.20551218 0.20558308 0.20565096 0.20571562 + ## 44 0.20551219 0.20558308 0.20565096 0.20571562 ## 46 0.19599448 0.19604878 0.19610115 0.19615136 ## 47 0.14005093 0.14013975 0.14022655 0.14031084 ## Spectra.Wave_2193 Spectra.Wave_2194 Spectra.Wave_2195 Spectra.Wave_2196 @@ -7158,12 +7107,14 @@ iterations <- 50 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data,method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, @@ -7171,6 +7122,7 @@ if (method=="pls") { } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running PLS permutation test ***" ![](kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) diff --git a/vignettes/kit_sla_plsr_example.pdf b/vignettes/kit_sla_plsr_example.pdf index 30559bc..3c7f319 100644 Binary files a/vignettes/kit_sla_plsr_example.pdf and b/vignettes/kit_sla_plsr_example.pdf differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png index d51916d..fb5956a 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index a55ae5f..b498413 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index 7ab9c40..c571fa7 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png index 64b085d..e9f1deb 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png index 195c4cc..ee63a04 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png index 97ee767..542b475 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png index 9fb1a0a..f0da7cf 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png index 70eee78..e6a5935 100644 Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/neon_leafN_canopy_plsr_example.Rmd b/vignettes/neon_leafN_canopy_plsr_example.Rmd index 34ef6f1..e79c2ba 100644 --- a/vignettes/neon_leafN_canopy_plsr_example.Rmd +++ b/vignettes/neon_leafN_canopy_plsr_example.Rmd @@ -2,11 +2,11 @@ title: Spectra-trait PLSR example using NEON AOP pixel spectra and field-sampled leaf nitrogen content from CONUS NEON sites author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: - pdf_document: default + github_document: default html_notebook: default html_document: df_print: paged - github_document: default + pdf_document: default params: date: !r Sys.Date() --- @@ -23,8 +23,7 @@ https://ecosis.org/package/canopy-spectra-to-map-foliar-functional-traits-over-n ### Getting Started ### Load libraries ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -215,12 +214,14 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) @@ -361,7 +362,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose = FALSE) bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] diff --git a/vignettes/neon_leafN_canopy_plsr_example.md b/vignettes/neon_leafN_canopy_plsr_example.md index e5c134a..e1ad4c7 100644 --- a/vignettes/neon_leafN_canopy_plsr_example.md +++ b/vignettes/neon_leafN_canopy_plsr_example.md @@ -17,8 +17,7 @@ For more information refer to the dataset EcoSIS page: ### Load libraries ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -83,7 +82,7 @@ output_dir <- "tempdir" ### Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpvNSAkI" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpeLrBIP" ### Grab data from EcoSIS @@ -118,7 +117,7 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) head(dat_raw) ``` - ## # A tibble: 6 x 459 + ## # A tibble: 6 × 459 ## Affiliation Boron Calcium Carbon Carotenoids_area Carotenoids_mass Cellulose ## ## 1 University … 0.0420 24.2 463. 9.19 1.18 221. @@ -133,19 +132,7 @@ head(dat_raw) ## # PI , Phenolics , Phosphorus , Plot_ID , ## # Potassium , Project , SLA , Sample_Year , Starch , ## # Sugar , Sulfur , Water , d13C , d15N , 384 , - ## # 389 , 394 , 399 , 404 , 409 , 414 , - ## # 419 , 424 , 429 , 434 , 439 , 444 , - ## # 449 , 454 , 459 , 464 , 469 , 474 , - ## # 479 , 484 , 489 , 494 , 499 , 504 , - ## # 509 , 514 , 519 , 524 , 529 , 534 , - ## # 539 , 544 , 549 , 554 , 559 , 564 , - ## # 569 , 574 , 579 , 584 , 589 , 594 , - ## # 599 , 604 , 609 , 614 , 619 , 624 , - ## # 629 , 634 , 639 , 644 , 649 , 654 , - ## # 659 , 664 , 669 , 674 , 679 , 684 , - ## # 689 , 694 , 699 , 704 , 709 , 714 , - ## # 719 , 724 , 729 , 734 , 739 , 744 , - ## # 749 , … + ## # 389 , 394 , 399 , 404 , 409 , 414 , … ``` r names(dat_raw)[1:40] @@ -174,7 +161,7 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(300,2600,1)] head(sample_info) ``` - ## # A tibble: 6 x 33 + ## # A tibble: 6 × 33 ## Affiliation Boron Calcium Carbon Carotenoids_area Carotenoids_mass Cellulose ## ## 1 University … 0.0420 24.2 463. 9.19 1.18 221. @@ -258,19 +245,19 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method prop=0.8, group_variables="Plot_Num") ``` - ## D02 Cal: 80.4597701149425% + ## D02 Cal: 80.46% - ## D03 Cal: 80.327868852459% + ## D03 Cal: 80.328% ## D05 Cal: 80% - ## D06 Cal: 79.7297297297297% + ## D06 Cal: 79.73% - ## D07 Cal: 79.2452830188679% + ## D07 Cal: 79.245% - ## D08 Cal: 79.8165137614679% + ## D08 Cal: 79.817% - ## D09 Cal: 79.6296296296296% + ## D09 Cal: 79.63% ``` r names(split_data) @@ -427,18 +414,21 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running PLS permutation test ***" ![](neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) @@ -534,20 +524,20 @@ cal.plsr.output <- cal.plsr.output %>% head(cal.plsr.output) ``` - ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen CalVal - ## 2 D02 0002 D02_0002 2017 10.77861 27.70598 Cal - ## 3 D02 0003 D02_0003 2017 12.46154 34.63999 Cal - ## 5 D02 0005 D02_0005 2017 17.27620 26.64623 Cal - ## 6 D02 0006 D02_0006 2017 12.92806 20.69437 Cal - ## 7 D02 0007 D02_0007 2017 10.21521 28.87526 Cal - ## 8 D02 0008 D02_0008 2017 20.87397 33.63137 Cal - ## PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals - ## 2 24.65561 24.59452 -3.1114612 - ## 3 27.85223 27.64033 -6.9996606 - ## 5 29.36467 29.54595 2.8997194 - ## 6 21.66448 21.68116 0.9867955 - ## 7 23.04393 22.78554 -6.0897138 - ## 8 25.56637 25.29798 -8.3333884 + ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen PLSR_Predicted + ## 2 D02 0002 D02_0002 2017 10.77861 27.70598 24.65561 + ## 3 D02 0003 D02_0003 2017 12.46154 34.63999 27.85223 + ## 5 D02 0005 D02_0005 2017 17.27620 26.64623 29.36467 + ## 6 D02 0006 D02_0006 2017 12.92806 20.69437 21.66448 + ## 7 D02 0007 D02_0007 2017 10.21521 28.87526 23.04393 + ## 8 D02 0008 D02_0008 2017 20.87397 33.63137 25.56637 + ## PLSR_CV_Predicted PLSR_CV_Residuals + ## 2 24.59452 -3.1114612 + ## 3 27.64033 -6.9996606 + ## 5 29.54595 2.8997194 + ## 6 21.68116 0.9867955 + ## 7 22.78554 -6.0897138 + ## 8 25.29798 -8.3333884 ``` r cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) @@ -562,20 +552,20 @@ val.plsr.output <- val.plsr.output %>% head(val.plsr.output) ``` - ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen CalVal - ## 1 D02 0001 D02_0001 2017 13.66366 31.18030 Val - ## 4 D02 0004 D02_0004 2017 16.63205 34.54034 Val - ## 16 D02 0016 D02_0016 2017 14.44765 22.87740 Val - ## 18 D02 0019 D02_0019 2017 14.47103 17.73126 Val - ## 19 D02 0020 D02_0020 2017 18.98522 21.32929 Val - ## 20 D02 0021 D02_0021 2017 12.12731 29.50256 Val - ## PLSR_Predicted PLSR_Residuals - ## 1 22.55166 -8.628643 - ## 4 30.79494 -3.745399 - ## 16 29.14446 6.267060 - ## 18 23.47518 5.743923 - ## 19 23.00736 1.678070 - ## 20 31.93483 2.432274 + ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen PLSR_Predicted + ## 1 D02 0001 D02_0001 2017 13.66366 31.18030 22.55166 + ## 4 D02 0004 D02_0004 2017 16.63205 34.54034 30.79494 + ## 16 D02 0016 D02_0016 2017 14.44765 22.87740 29.14446 + ## 18 D02 0019 D02_0019 2017 14.47103 17.73126 23.47518 + ## 19 D02 0020 D02_0020 2017 18.98522 21.32929 23.00736 + ## 20 D02 0021 D02_0021 2017 12.12731 29.50256 31.93483 + ## PLSR_Residuals + ## 1 -8.628643 + ## 4 -3.745399 + ## 16 6.267060 + ## 18 5.743923 + ## 19 1.678070 + ## 20 2.432274 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -692,20 +682,20 @@ par(opar) ## [1] "Max Components: 12 Iterations: 500 Data Proportion (percent): 70" ## [1] "*** Providing PRESS and coefficient array output ***" - ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen CalVal - ## 1 D02 0001 D02_0001 2017 13.66366 31.18030 Val - ## 4 D02 0004 D02_0004 2017 16.63205 34.54034 Val - ## 16 D02 0016 D02_0016 2017 14.44765 22.87740 Val - ## 18 D02 0019 D02_0019 2017 14.47103 17.73126 Val - ## 19 D02 0020 D02_0020 2017 18.98522 21.32929 Val - ## 20 D02 0021 D02_0021 2017 12.12731 29.50256 Val - ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI - ## 1 22.55166 -8.628643 21.75139 23.67919 13.44246 31.66086 - ## 4 30.79494 -3.745399 29.24737 32.37867 21.60577 39.98412 - ## 16 29.14446 6.267060 27.57462 30.82609 19.93270 38.35621 - ## 18 23.47518 5.743923 21.73808 24.49326 14.31158 32.63878 - ## 19 23.00736 1.678070 20.70321 24.57934 13.73687 32.27785 - ## 20 31.93483 2.432274 30.75996 34.32739 22.69357 41.17610 + ## Plot_Num SampleID Plot_ID Sample_Year SLA Nitrogen PLSR_Predicted + ## 1 D02 0001 D02_0001 2017 13.66366 31.18030 22.55166 + ## 4 D02 0004 D02_0004 2017 16.63205 34.54034 30.79494 + ## 16 D02 0016 D02_0016 2017 14.44765 22.87740 29.14446 + ## 18 D02 0019 D02_0019 2017 14.47103 17.73126 23.47518 + ## 19 D02 0020 D02_0020 2017 18.98522 21.32929 23.00736 + ## 20 D02 0021 D02_0021 2017 12.12731 29.50256 31.93483 + ## PLSR_Residuals LCI UCI LPI UPI + ## 1 -8.628643 21.75139 23.67919 13.44246 31.66086 + ## 4 -3.745399 29.24737 32.37867 21.60577 39.98412 + ## 16 6.267060 27.57462 30.82609 19.93270 38.35621 + ## 18 5.743923 21.73808 24.49326 14.31158 32.63878 + ## 19 1.678070 20.70321 24.57934 13.73687 32.27785 + ## 20 2.432274 30.75996 34.32739 22.69357 41.17610 ### Jackknife coefficient plot diff --git a/vignettes/neon_leafN_canopy_plsr_example.pdf b/vignettes/neon_leafN_canopy_plsr_example.pdf index 9ac17a6..837c5af 100644 Binary files a/vignettes/neon_leafN_canopy_plsr_example.pdf and b/vignettes/neon_leafN_canopy_plsr_example.pdf differ diff --git a/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index aca3992..5f805e2 100644 Binary files a/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/neon_lma_plsr_example.Rmd b/vignettes/neon_lma_plsr_example.Rmd index 34f3a22..035e9a6 100644 --- a/vignettes/neon_lma_plsr_example.Rmd +++ b/vignettes/neon_lma_plsr_example.Rmd @@ -2,11 +2,11 @@ title: Spectra-trait PLSR example using leaf-level spectra and leaf mass per area (LMA) data from CONUS NEON sites author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: - pdf_document: default + github_document: default html_document: df_print: paged html_notebook: default - github_document: default + pdf_document: default params: date: !r Sys.Date() --- @@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how ### Getting Started ### Load libraries ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -175,12 +174,14 @@ maxComps <- 20 iterations <- 40 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) diff --git a/vignettes/neon_lma_plsr_example.md b/vignettes/neon_lma_plsr_example.md index a11b0d4..439d780 100644 --- a/vignettes/neon_lma_plsr_example.md +++ b/vignettes/neon_lma_plsr_example.md @@ -15,8 +15,7 @@ leaf-mass area (LMA) ### Load libraries ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -81,7 +80,7 @@ output_dir <- "tempdir" ### Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpRBdgMm" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1VUVAg" ### Grab data from EcoSIS @@ -125,7 +124,7 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) head(dat_raw) ``` - ## # A tibble: 6 x 2,162 + ## # A tibble: 6 × 2,162 ## Affiliation `Common Name` Domain Functional_type LMA `Latin Genus` ## ## 1 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans @@ -140,18 +139,7 @@ head(dat_raw) ## # 359 , 360 , 361 , 362 , 363 , 364 , ## # 365 , 366 , 367 , 368 , 369 , 370 , ## # 371 , 372 , 373 , 374 , 375 , 376 , - ## # 377 , 378 , 379 , 380 , 381 , 382 , - ## # 383 , 384 , 385 , 386 , 387 , 388 , - ## # 389 , 390 , 391 , 392 , 393 , 394 , - ## # 395 , 396 , 397 , 398 , 399 , 400 , - ## # 401 , 402 , 403 , 404 , 405 , 406 , - ## # 407 , 408 , 409 , 410 , 411 , 412 , - ## # 413 , 414 , 415 , 416 , 417 , 418 , - ## # 419 , 420 , 421 , 422 , 423 , 424 , - ## # 425 , 426 , 427 , 428 , 429 , 430 , - ## # 431 , 432 , 433 , 434 , 435 , 436 , - ## # 437 , 438 , 439 , 440 , 441 , 442 , - ## # 443 , 444 , … + ## # 377 , 378 , 379 , 380 , 381 , 382 , … ``` r names(dat_raw)[1:40] @@ -181,7 +169,7 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] head(sample_info) ``` - ## # A tibble: 6 x 11 + ## # A tibble: 6 × 11 ## Affiliation `Common Name` Domain Functional_type LMA `Latin Genus` ## ## 1 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans @@ -199,7 +187,7 @@ sample_info2 <- sample_info %>% head(sample_info2) ``` - ## # A tibble: 6 x 5 + ## # A tibble: 6 × 5 ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 ## ## 1 D02 broadleaf P0001 JUNI 72.9 @@ -255,20 +243,20 @@ val.plsr.data <- split_data$val_data head(val.plsr.data)[1:8] ``` - ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 Wave_500 - ## 4923 D08 broadleaf P2462 21.10 0.044964 - ## 4924 D08 broadleaf L2462 SANI 100.72 0.068921 - ## 4925 D08 broadleaf P2463 29.59 0.036254 - ## 4926 D08 broadleaf L2463 SANI 96.48 0.051810 - ## 4927 D08 broadleaf P2464 31.08 0.056587 - ## 4928 D08 broadleaf L2464 SANI 61.40 0.037310 - ## Wave_501 Wave_502 - ## 4923 0.045854 0.046911 - ## 4924 0.069633 0.070254 - ## 4925 0.036999 0.037671 - ## 4926 0.052113 0.052896 - ## 4927 0.057006 0.057734 - ## 4928 0.037223 0.037671 + ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 Wave_500 + ## 3 D02 broadleaf P0002 JUNI 60.77 0.043758 + ## 12 D02 broadleaf L0006 JUNI 42.54 0.044338 + ## 13 D02 broadleaf P0007 QUVE 106.57 0.015643 + ## 19 D02 broadleaf P0010 PRSE 78.82 0.033019 + ## 21 D02 broadleaf P0011 PRSE 86.09 0.024819 + ## 28 D02 broadleaf L0014 PRSE 67.11 0.040095 + ## Wave_501 Wave_502 + ## 3 0.044171 0.044869 + ## 12 0.044748 0.045294 + ## 13 0.015579 0.015431 + ## 19 0.033102 0.033245 + ## 21 0.024826 0.025045 + ## 28 0.040397 0.040864 ``` r rm(split_data) @@ -338,13 +326,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p head(val.plsr.data)[1:5] ``` - ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 - ## 4923 D08 broadleaf P2462 21.10 - ## 4924 D08 broadleaf L2462 SANI 100.72 - ## 4925 D08 broadleaf P2463 29.59 - ## 4926 D08 broadleaf L2463 SANI 96.48 - ## 4927 D08 broadleaf P2464 31.08 - ## 4928 D08 broadleaf L2464 SANI 61.40 + ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 + ## 3 D02 broadleaf P0002 JUNI 60.77 + ## 12 D02 broadleaf L0006 JUNI 42.54 + ## 13 D02 broadleaf P0007 QUVE 106.57 + ## 19 D02 broadleaf P0010 PRSE 78.82 + ## 21 D02 broadleaf P0011 PRSE 86.09 + ## 28 D02 broadleaf L0014 PRSE 67.11 ### plot cal and val spectra @@ -392,18 +380,21 @@ maxComps <- 20 iterations <- 40 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running permutation test. Please hang tight, this can take awhile ***" ## [1] "Options:" ## [1] "Max Components: 20 Iterations: 40 Data Proportion (percent): 70" @@ -448,11 +439,11 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## 27.155 17.610 16.595 15.483 13.235 12.374 + ## 29.372 18.664 18.166 16.187 12.760 12.149 ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps - ## 11.499 10.722 10.269 9.647 9.197 9.319 + ## 12.004 11.465 11.144 10.389 10.063 9.732 ## 12 comps - ## 9.515 + ## 9.633 ``` r plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), @@ -465,11 +456,11 @@ pls::R2(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## -0.006901 0.576543 0.623949 0.672643 0.760799 0.790906 + ## -0.001908 0.595475 0.616770 0.695732 0.810908 0.828593 ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps - ## 0.819456 0.843031 0.856001 0.872913 0.884511 0.881406 + ## 0.832656 0.847338 0.855775 0.874647 0.882410 0.890000 ## 12 comps - ## 0.876368 + ## 0.892247 ``` r plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", @@ -524,20 +515,20 @@ val.plsr.output <- val.plsr.output %>% head(val.plsr.output) ``` - ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 - ## 4923 D08 broadleaf P2462 21.10 - ## 4924 D08 broadleaf L2462 SANI 100.72 - ## 4925 D08 broadleaf P2463 29.59 - ## 4926 D08 broadleaf L2463 SANI 96.48 - ## 4927 D08 broadleaf P2464 31.08 - ## 4928 D08 broadleaf L2464 SANI 61.40 - ## PLSR_Predicted PLSR_Residuals - ## 4923 21.14155 0.04155041 - ## 4924 89.65467 -11.06533484 - ## 4925 27.94765 -1.64234512 - ## 4926 92.46121 -4.01879017 - ## 4927 40.73367 9.65367301 - ## 4928 65.94687 4.54686556 + ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_Predicted + ## 3 D02 broadleaf P0002 JUNI 60.77 63.90905 + ## 12 D02 broadleaf L0006 JUNI 42.54 41.54133 + ## 13 D02 broadleaf P0007 QUVE 106.57 99.99662 + ## 19 D02 broadleaf P0010 PRSE 78.82 89.03078 + ## 21 D02 broadleaf P0011 PRSE 86.09 85.17273 + ## 28 D02 broadleaf L0014 PRSE 67.11 67.95549 + ## PLSR_Residuals + ## 3 3.1390459 + ## 12 -0.9986720 + ## 13 -6.5733831 + ## 19 10.2107788 + ## 21 -0.9172668 + ## 28 0.8454930 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -611,7 +602,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo ## Warning: Removed 21 rows containing missing values (geom_point). - ## Warning: Removed 5 rows containing missing values (geom_point). + ## Warning: Removed 8 rows containing missing values (geom_point). ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. @@ -699,20 +690,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot head(val.plsr.output) ``` - ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 - ## 4923 D08 broadleaf P2462 21.10 - ## 4924 D08 broadleaf L2462 SANI 100.72 - ## 4925 D08 broadleaf P2463 29.59 - ## 4926 D08 broadleaf L2463 SANI 96.48 - ## 4927 D08 broadleaf P2464 31.08 - ## 4928 D08 broadleaf L2464 SANI 61.40 - ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI - ## 4923 21.14155 0.04155041 20.94847 21.30604 2.789108 39.49399 - ## 4924 89.65467 -11.06533484 89.45021 89.97246 71.301221 108.00811 - ## 4925 27.94765 -1.64234512 27.73172 28.16789 9.594597 46.30071 - ## 4926 92.46121 -4.01879017 92.26489 92.73225 74.107998 110.81442 - ## 4927 40.73367 9.65367301 40.50065 40.92588 22.380204 59.08714 - ## 4928 65.94687 4.54686556 65.77618 66.17712 47.594178 84.29955 + ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_Predicted + ## 3 D02 broadleaf P0002 JUNI 60.77 63.90905 + ## 12 D02 broadleaf L0006 JUNI 42.54 41.54133 + ## 13 D02 broadleaf P0007 QUVE 106.57 99.99662 + ## 19 D02 broadleaf P0010 PRSE 78.82 89.03078 + ## 21 D02 broadleaf P0011 PRSE 86.09 85.17273 + ## 28 D02 broadleaf L0014 PRSE 67.11 67.95549 + ## PLSR_Residuals LCI UCI LPI UPI + ## 3 3.1390459 63.75673 64.12043 45.02836 82.78973 + ## 12 -0.9986720 41.42248 41.69728 22.66069 60.42196 + ## 13 -6.5733831 99.88029 100.11962 81.11612 118.87712 + ## 19 10.2107788 88.83274 89.21623 70.14949 107.91207 + ## 21 -0.9172668 85.02330 85.32067 66.29194 104.05353 + ## 28 0.8454930 67.82558 68.15298 49.07457 86.83642 ### Jackknife coefficient plot diff --git a/vignettes/neon_lma_plsr_example.pdf b/vignettes/neon_lma_plsr_example.pdf index e03264f..dafd579 100644 Binary files a/vignettes/neon_lma_plsr_example.pdf and b/vignettes/neon_lma_plsr_example.pdf differ diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png index 477fd1f..72e4e57 100644 Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index bbd51e7..5d4c0de 100644 Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png index 817d4c2..17efef7 100644 Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png index b0dee53..d1cc56a 100644 Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png differ diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png index f5bdf2a..bfac6a2 100644 Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd b/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd index 89af664..1f2e0fd 100644 --- a/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd +++ b/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd @@ -2,11 +2,11 @@ title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from 36 species growing in Rosa rugosa invaded coastal grassland communities in Belgium author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: - github_document: default + pdf_document: default html_notebook: default html_document: df_print: paged - pdf_document: default + github_document: default params: date: !r Sys.Date() --- @@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how ### Getting Started ### Load libraries ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -188,12 +187,14 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) @@ -335,7 +336,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose = FALSE) bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.md b/vignettes/reseco_leafN_bootstrap_plsr_example.md index 8deb809..5e8c5ef 100644 --- a/vignettes/reseco_leafN_bootstrap_plsr_example.md +++ b/vignettes/reseco_leafN_bootstrap_plsr_example.md @@ -16,8 +16,7 @@ leaf nitrogen content (Narea, g/m2) ### Load libraries ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -82,7 +81,7 @@ output_dir <- "tempdir" ### Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpmNliia" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpYVNyVT" ### Grab data from EcoSIS @@ -117,38 +116,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) head(dat_raw) ``` - ## # A tibble: 6 x 2,164 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 2,164 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 2,160 more variables: ## # Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , ## # Leaf phosphorus content per leaf area (mg/mm2) , ## # Leaf potassium content per leaf area (mg/mm2) , - ## # Plant height vegetative (cm) , ids , plot code , - ## # species code , 350 , 351 , 352 , 353 , 354 , - ## # 355 , 356 , 357 , 358 , 359 , 360 , - ## # 361 , 362 , 363 , 364 , 365 , 366 , - ## # 367 , 368 , 369 , 370 , 371 , 372 , - ## # 373 , 374 , 375 , 376 , 377 , 378 , - ## # 379 , 380 , 381 , 382 , 383 , 384 , - ## # 385 , 386 , 387 , 388 , 389 , 390 , - ## # 391 , 392 , 393 , 394 , 395 , 396 , - ## # 397 , 398 , 399 , 400 , 401 , 402 , - ## # 403 , 404 , 405 , 406 , 407 , 408 , - ## # 409 , 410 , 411 , 412 , 413 , 414 , - ## # 415 , 416 , 417 , 418 , 419 , 420 , - ## # 421 , 422 , 423 , 424 , 425 , 426 , - ## # 427 , 428 , 429 , 430 , 431 , 432 , - ## # 433 , 434 , 435 , 436 , 437 , 438 , - ## # 439 , 440 , … + ## # Plant height vegetative (cm) , ids , plot code , … ``` r names(dat_raw)[1:40] @@ -208,15 +191,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] head(sample_info) ``` - ## # A tibble: 6 x 13 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 13 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , @@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>% head(sample_info2) ``` - ## # A tibble: 6 x 5 + ## # A tibble: 6 × 5 ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 ## ## 1 Arrhenatherum elatius Arrela DC1 0.0126 1.26 @@ -301,20 +284,20 @@ val.plsr.data <- split_data$val_data head(val.plsr.data)[1:8] ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 Wave_500 - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.06736887 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 0.07125000 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.05993560 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 0.06508300 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 0.15175000 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 0.06805547 - ## Wave_501 Wave_502 - ## 184 0.06870667 0.07014220 - ## 185 0.07235000 0.07368350 - ## 186 0.06162000 0.06352233 - ## 187 0.06625000 0.06758350 - ## 188 0.15275000 0.15415000 - ## 189 0.06938000 0.07093553 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 Wave_500 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 0.07066700 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 0.04144907 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 0.05563100 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 0.11588500 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 0.06029327 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 0.07391700 + ## Wave_501 Wave_502 + ## 1 0.07160000 0.0725330 + ## 4 0.04197333 0.0426356 + ## 8 0.05622143 0.0569690 + ## 11 0.11705000 0.1184500 + ## 14 0.06112000 0.0620312 + ## 19 0.07515000 0.0765500 ``` r rm(split_data) @@ -385,13 +368,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p head(val.plsr.data)[1:5] ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 ### plot cal and val spectra @@ -440,18 +423,21 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running PLS permutation test ***" ![](reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) @@ -487,9 +473,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## 0.6346 0.5045 0.4645 0.3415 0.3296 0.3037 + ## 0.5594 0.6034 0.5448 0.3842 0.3481 0.3027 ## 6 comps 7 comps 8 comps 9 comps 10 comps - ## 0.2703 0.2659 0.2524 0.2450 0.2452 + ## 0.2429 0.2268 0.2852 0.2818 0.2780 ``` r plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", @@ -500,9 +486,9 @@ pls::R2(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## -0.05977 0.33000 0.43217 0.69298 0.71415 0.75732 + ## -0.007544 -0.172296 0.044153 0.524579 0.609920 0.704963 ## 6 comps 7 comps 8 comps 9 comps 10 comps - ## 0.80776 0.81389 0.83228 0.84198 0.84176 + ## 0.809962 0.834383 0.738093 0.744325 0.751224 ``` r plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", @@ -571,20 +557,20 @@ val.plsr.output <- val.plsr.output %>% head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.9462916 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 1.5386676 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.8790482 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 1.1241560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 2.4527108 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 1.1553688 - ## PLSR_Residuals - ## 184 0.07059201 - ## 185 0.50732119 - ## 186 0.08220284 - ## 187 -0.14959995 - ## 188 -0.35456980 - ## 189 0.13020008 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 + ## PLSR_Predicted PLSR_Residuals + ## 1 1.340135 0.07869548 + ## 4 1.288026 0.07904830 + ## 8 1.155840 -0.02935675 + ## 11 2.014712 -0.08911757 + ## 14 1.328742 0.20749565 + ## 19 1.534162 0.08986811 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -645,7 +631,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo ## Warning: Removed 2 rows containing missing values (geom_point). - ## Warning: Removed 3 rows containing missing values (geom_point). + ## Warning: Removed 2 rows containing missing values (geom_point). ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. @@ -704,7 +690,8 @@ if(grepl("Windows", sessionInfo()$running)){ ### PLSR bootstrap permutation uncertainty analysis iterations <- 500 # how many permutation iterations to run prop <- 0.70 # fraction of training data to keep for each iteration -plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, + maxComps=nComps, iterations=iterations, prop=prop, verbose = FALSE) ``` @@ -737,20 +724,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.9462916 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 1.5386676 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.8790482 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 1.1241560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 2.4527108 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 1.1553688 - ## PLSR_Residuals LCI UCI LPI UPI - ## 184 0.07059201 0.8915898 1.008806 0.4588988 1.433684 - ## 185 0.50732119 1.4007173 1.636452 1.0403747 2.036960 - ## 186 0.08220284 0.6861219 1.160030 0.3405908 1.417506 - ## 187 -0.14959995 0.9651982 1.245464 0.6198291 1.628483 - ## 188 -0.35456980 2.1911406 2.619696 1.9245720 2.980850 - ## 189 0.13020008 1.0735154 1.233082 0.6651011 1.645636 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 + ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 1 1.340135 0.07869548 1.226834 1.429328 0.7824784 1.897793 + ## 4 1.288026 0.07904830 1.182535 1.382687 0.7308093 1.845242 + ## 8 1.155840 -0.02935675 1.094629 1.236994 0.6033307 1.708349 + ## 11 2.014712 -0.08911757 1.887670 2.098661 1.4570138 2.572411 + ## 14 1.328742 0.20749565 1.278652 1.359115 0.7791767 1.878308 + ## 19 1.534162 0.08986811 1.437618 1.642761 0.9768182 2.091505 ### Jackknife coefficient plot @@ -863,7 +850,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, print(paste("Output directory: ", outdir)) ``` - ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpmNliia" + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpYVNyVT" ``` r # Observed versus predicted diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.pdf b/vignettes/reseco_leafN_bootstrap_plsr_example.pdf index 55d71c5..35d5a0f 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example.pdf and b/vignettes/reseco_leafN_bootstrap_plsr_example.pdf differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index 46da6e4..b2bff86 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index 1475940..6b2141a 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png index 1153721..3fda55c 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png index d19d327..9faeaef 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png index ce44beb..0f05c9b 100644 Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/reseco_leafN_plsr_example.Rmd b/vignettes/reseco_leafN_plsr_example.Rmd index a9f27d8..a78ceea 100644 --- a/vignettes/reseco_leafN_plsr_example.Rmd +++ b/vignettes/reseco_leafN_plsr_example.Rmd @@ -3,10 +3,10 @@ title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen con author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: pdf_document: default - html_notebook: default + github_document: default html_document: df_print: paged - github_document: default + html_notebook: default params: date: !r Sys.Date() --- @@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how ### Getting Started ### Load libraries ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -187,12 +186,14 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) diff --git a/vignettes/reseco_leafN_plsr_example.md b/vignettes/reseco_leafN_plsr_example.md index f50aa76..e9e7896 100644 --- a/vignettes/reseco_leafN_plsr_example.md +++ b/vignettes/reseco_leafN_plsr_example.md @@ -16,8 +16,7 @@ leaf nitrogen content (Narea, g/m2) ### Load libraries ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -82,7 +81,7 @@ output_dir <- "tempdir" ### Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpJ6W1sB" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpYxPllO" ### Grab data from EcoSIS @@ -117,38 +116,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) head(dat_raw) ``` - ## # A tibble: 6 x 2,164 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 2,164 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 2,160 more variables: ## # Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , ## # Leaf phosphorus content per leaf area (mg/mm2) , ## # Leaf potassium content per leaf area (mg/mm2) , - ## # Plant height vegetative (cm) , ids , plot code , - ## # species code , 350 , 351 , 352 , 353 , 354 , - ## # 355 , 356 , 357 , 358 , 359 , 360 , - ## # 361 , 362 , 363 , 364 , 365 , 366 , - ## # 367 , 368 , 369 , 370 , 371 , 372 , - ## # 373 , 374 , 375 , 376 , 377 , 378 , - ## # 379 , 380 , 381 , 382 , 383 , 384 , - ## # 385 , 386 , 387 , 388 , 389 , 390 , - ## # 391 , 392 , 393 , 394 , 395 , 396 , - ## # 397 , 398 , 399 , 400 , 401 , 402 , - ## # 403 , 404 , 405 , 406 , 407 , 408 , - ## # 409 , 410 , 411 , 412 , 413 , 414 , - ## # 415 , 416 , 417 , 418 , 419 , 420 , - ## # 421 , 422 , 423 , 424 , 425 , 426 , - ## # 427 , 428 , 429 , 430 , 431 , 432 , - ## # 433 , 434 , 435 , 436 , 437 , 438 , - ## # 439 , 440 , … + ## # Plant height vegetative (cm) , ids , plot code , … ``` r names(dat_raw)[1:40] @@ -208,15 +191,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] head(sample_info) ``` - ## # A tibble: 6 x 13 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 13 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , @@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>% head(sample_info2) ``` - ## # A tibble: 6 x 5 + ## # A tibble: 6 × 5 ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 ## ## 1 Arrhenatherum elatius Arrela DC1 0.0126 1.26 @@ -300,20 +283,20 @@ val.plsr.data <- split_data$val_data head(val.plsr.data)[1:8] ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 Wave_500 - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.06736887 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 0.07125000 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.05993560 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 0.06508300 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 0.15175000 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 0.06805547 - ## Wave_501 Wave_502 - ## 184 0.06870667 0.07014220 - ## 185 0.07235000 0.07368350 - ## 186 0.06162000 0.06352233 - ## 187 0.06625000 0.06758350 - ## 188 0.15275000 0.15415000 - ## 189 0.06938000 0.07093553 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 Wave_500 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 0.07066700 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 0.04144907 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 0.05563100 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 0.11588500 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 0.06029327 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 0.07391700 + ## Wave_501 Wave_502 + ## 1 0.07160000 0.0725330 + ## 4 0.04197333 0.0426356 + ## 8 0.05622143 0.0569690 + ## 11 0.11705000 0.1184500 + ## 14 0.06112000 0.0620312 + ## 19 0.07515000 0.0765500 ``` r rm(split_data) @@ -384,13 +367,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p head(val.plsr.data)[1:5] ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 ### plot cal and val spectra @@ -439,18 +422,21 @@ iterations <- 80 prop <- 0.70 if (method=="pls") { # pls package approach - faster but estimates more components.... - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running PLS permutation test ***" ![](reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) @@ -486,9 +472,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## 0.6346 0.5045 0.4645 0.3415 0.3296 0.3037 + ## 0.5594 0.6034 0.5448 0.3842 0.3481 0.3027 ## 6 comps 7 comps 8 comps 9 comps 10 comps - ## 0.2703 0.2659 0.2524 0.2450 0.2452 + ## 0.2429 0.2268 0.2852 0.2818 0.2780 ``` r plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", @@ -499,9 +485,9 @@ pls::R2(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## -0.05977 0.33000 0.43217 0.69298 0.71415 0.75732 + ## -0.007544 -0.172296 0.044153 0.524579 0.609920 0.704963 ## 6 comps 7 comps 8 comps 9 comps 10 comps - ## 0.80776 0.81389 0.83228 0.84198 0.84176 + ## 0.809962 0.834383 0.738093 0.744325 0.751224 ``` r plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", @@ -570,20 +556,20 @@ val.plsr.output <- val.plsr.output %>% head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.9462916 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 1.5386676 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.8790482 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 1.1241560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 2.4527108 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 1.1553688 - ## PLSR_Residuals - ## 184 0.07059201 - ## 185 0.50732119 - ## 186 0.08220284 - ## 187 -0.14959995 - ## 188 -0.35456980 - ## 189 0.13020008 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 + ## PLSR_Predicted PLSR_Residuals + ## 1 1.340135 0.07869548 + ## 4 1.288026 0.07904830 + ## 8 1.155840 -0.02935675 + ## 11 2.014712 -0.08911757 + ## 14 1.328742 0.20749565 + ## 19 1.534162 0.08986811 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -644,7 +630,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo ## Warning: Removed 2 rows containing missing values (geom_point). - ## Warning: Removed 3 rows containing missing values (geom_point). + ## Warning: Removed 2 rows containing missing values (geom_point). ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. @@ -727,20 +713,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.9462916 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 1.5386676 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.8790482 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 1.1241560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 2.4527108 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 1.1553688 - ## PLSR_Residuals LCI UCI LPI UPI - ## 184 0.07059201 0.9154961 0.9532972 0.4623162 1.430267 - ## 185 0.50732119 1.4875834 1.5528063 1.0540777 2.023258 - ## 186 0.08220284 0.8472007 0.9329303 0.3936085 1.364488 - ## 187 -0.14959995 1.1075928 1.1743800 0.6395189 1.608793 - ## 188 -0.35456980 2.4248448 2.5638131 1.9651833 2.940238 - ## 189 0.13020008 1.1262731 1.1615642 0.6713762 1.639361 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 + ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 1 1.340135 0.07869548 1.298260 1.346986 0.7916762 1.888595 + ## 4 1.288026 0.07904830 1.262110 1.297939 0.7397937 1.836258 + ## 8 1.155840 -0.02935675 1.113678 1.172006 0.6072413 1.704439 + ## 11 2.014712 -0.08911757 1.936508 2.020049 1.4654399 2.563985 + ## 14 1.328742 0.20749565 1.298485 1.333454 0.7804978 1.876987 + ## 19 1.534162 0.08986811 1.522672 1.550848 0.9859820 2.082341 ``` r val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot @@ -748,20 +734,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.008756996 0.8756996 0.9462916 - ## 185 Potentilla reptans Potrep WC2 0.010313464 1.0313464 1.5386676 - ## 186 Rubus caesius Rubcae WC2 0.007968454 0.7968454 0.8790482 - ## 187 Urtica dioica Urtdio WC2 0.012737560 1.2737560 1.1241560 - ## 188 Ammophila arenaria Ammare WC3 0.028072806 2.8072806 2.4527108 - ## 189 Jacobaea vulgaris Jacvul WC3 0.010251687 1.0251687 1.1553688 - ## PLSR_Residuals LCI UCI LPI UPI - ## 184 0.07059201 0.9154961 0.9532972 0.4623162 1.430267 - ## 185 0.50732119 1.4875834 1.5528063 1.0540777 2.023258 - ## 186 0.08220284 0.8472007 0.9329303 0.3936085 1.364488 - ## 187 -0.14959995 1.1075928 1.1743800 0.6395189 1.608793 - ## 188 -0.35456980 2.4248448 2.5638131 1.9651833 2.940238 - ## 189 0.13020008 1.1262731 1.1615642 0.6713762 1.639361 + ## Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.01261440 1.261440 + ## 4 Rubus caesius Rubcae DC1 0.01208978 1.208978 + ## 8 Jacobaea vulgaris Jacvul DC2 0.01185197 1.185197 + ## 11 Carex arenaria Carare DC3 0.02103830 2.103830 + ## 14 Jacobaea vulgaris Jacvul DC3 0.01121247 1.121247 + ## 19 Oenothera glazioviana Oengla DC4 0.01444293 1.444293 + ## PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 1 1.340135 0.07869548 1.298260 1.346986 0.7916762 1.888595 + ## 4 1.288026 0.07904830 1.262110 1.297939 0.7397937 1.836258 + ## 8 1.155840 -0.02935675 1.113678 1.172006 0.6072413 1.704439 + ## 11 2.014712 -0.08911757 1.936508 2.020049 1.4654399 2.563985 + ## 14 1.328742 0.20749565 1.298485 1.333454 0.7804978 1.876987 + ## 19 1.534162 0.08986811 1.522672 1.550848 0.9859820 2.082341 ### Jackknife coefficient plot @@ -863,7 +849,7 @@ write.csv(out.jk.coefs,file=file.path(outdir, print(paste("Output directory: ", outdir)) ``` - ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpJ6W1sB" + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpYxPllO" ``` r # Observed versus predicted diff --git a/vignettes/reseco_leafN_plsr_example.pdf b/vignettes/reseco_leafN_plsr_example.pdf index 2b2f343..7723e0b 100644 Binary files a/vignettes/reseco_leafN_plsr_example.pdf and b/vignettes/reseco_leafN_plsr_example.pdf differ diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index 46da6e4..b2bff86 100644 Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index 1475940..6b2141a 100644 Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png index e071cc6..fcb2b20 100644 Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png index d19d327..9faeaef 100644 Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png index ce44beb..0f05c9b 100644 Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/reseco_lma_plsr_example.Rmd b/vignettes/reseco_lma_plsr_example.Rmd index 2aa25b9..de2485a 100644 --- a/vignettes/reseco_lma_plsr_example.Rmd +++ b/vignettes/reseco_lma_plsr_example.Rmd @@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how ### Getting Started ### Step 1. Load libraries needed to run example script ```{r, eval=TRUE, echo=TRUE} -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -205,12 +204,14 @@ maxComps <- 16 iterations <- 50 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) diff --git a/vignettes/reseco_lma_plsr_example.md b/vignettes/reseco_lma_plsr_example.md index 3015be4..35abdab 100644 --- a/vignettes/reseco_lma_plsr_example.md +++ b/vignettes/reseco_lma_plsr_example.md @@ -16,8 +16,7 @@ leaf-mass area (LMA) ### Step 1. Load libraries needed to run example script ``` r -list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", - "spectratrait") +list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") invisible(lapply(list.of.packages, library, character.only = TRUE)) ``` @@ -82,7 +81,7 @@ output_dir <- "tempdir" ### Step 3. Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpoqfeI6" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmpfxi2vB" ### Step 4. Pull example dataset from EcoSIS (ecosis.org) @@ -118,38 +117,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) head(dat_raw) ``` - ## # A tibble: 6 x 2,164 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 2,164 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 2,160 more variables: ## # Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , ## # Leaf phosphorus content per leaf area (mg/mm2) , ## # Leaf potassium content per leaf area (mg/mm2) , - ## # Plant height vegetative (cm) , ids , plot code , - ## # species code , 350 , 351 , 352 , 353 , 354 , - ## # 355 , 356 , 357 , 358 , 359 , 360 , - ## # 361 , 362 , 363 , 364 , 365 , 366 , - ## # 367 , 368 , 369 , 370 , 371 , 372 , - ## # 373 , 374 , 375 , 376 , 377 , 378 , - ## # 379 , 380 , 381 , 382 , 383 , 384 , - ## # 385 , 386 , 387 , 388 , 389 , 390 , - ## # 391 , 392 , 393 , 394 , 395 , 396 , - ## # 397 , 398 , 399 , 400 , 401 , 402 , - ## # 403 , 404 , 405 , 406 , 407 , 408 , - ## # 409 , 410 , 411 , 412 , 413 , 414 , - ## # 415 , 416 , 417 , 418 , 419 , 420 , - ## # 421 , 422 , 423 , 424 , 425 , 426 , - ## # 427 , 428 , 429 , 430 , 431 , 432 , - ## # 433 , 434 , 435 , 436 , 437 , 438 , - ## # 439 , 440 , … + ## # Plant height vegetative (cm) , ids , plot code , … ``` r names(dat_raw)[1:40] @@ -209,15 +192,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] head(sample_info) ``` - ## # A tibble: 6 x 13 - ## `Cw/EWT (cm3/cm2… `Latin Species` `Leaf area (mm2… `Leaf calcium content pe… - ## - ## 1 0.00887 Arrhenatherum el… 696. 0.0291 - ## 2 0.00824 Bromus sterilis 447. 0.0230 - ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 - ## 4 0.0106 Rubus caesius 5719. 0.0700 - ## 5 0.00851 Arrhenatherum el… 671. 0.0286 - ## 6 0.0153 Crepis capillaris 1401. 0.0470 + ## # A tibble: 6 × 13 + ## `Cw/EWT (cm3/cm2)` `Latin Species` `Leaf area (mm2)` `Leaf calcium cont… + ## + ## 1 0.00887 Arrhenatherum elatius 696. 0.0291 + ## 2 0.00824 Bromus sterilis 447. 0.0230 + ## 3 0.0280 Jacobaea vulgaris 2418. 0.0950 + ## 4 0.0106 Rubus caesius 5719. 0.0700 + ## 5 0.00851 Arrhenatherum elatius 671. 0.0286 + ## 6 0.0153 Crepis capillaris 1401. 0.0470 ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) , ## # Leaf mass per area (g/cm2) , ## # Leaf nitrogen content per leaf area (mg/mm2) , @@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>% head(sample_info2) ``` - ## # A tibble: 6 x 5 + ## # A tibble: 6 × 5 ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 ## ## 1 Arrhenatherum elatius Arrela DC1 0.00342 34.2 @@ -299,20 +282,20 @@ val.plsr.data <- split_data$val_data head(val.plsr.data)[1:8] ``` - ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 Wave_500 - ## 184 Jacobaea vulgaris Jacvul WC2 0.003551614 35.51614 0.06736887 - ## 185 Potentilla reptans Potrep WC2 0.005586320 55.86320 0.07125000 - ## 186 Rubus caesius Rubcae WC2 0.005803902 58.03902 0.05993560 - ## 187 Urtica dioica Urtdio WC2 0.005215705 52.15705 0.06508300 - ## 188 Ammophila arenaria Ammare WC3 0.018443757 184.43757 0.15175000 - ## 189 Jacobaea vulgaris Jacvul WC3 0.004980002 49.80002 0.06805547 - ## Wave_501 Wave_502 - ## 184 0.06870667 0.07014220 - ## 185 0.07235000 0.07368350 - ## 186 0.06162000 0.06352233 - ## 187 0.06625000 0.06758350 - ## 188 0.15275000 0.15415000 - ## 189 0.06938000 0.07093553 + ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 Wave_500 + ## 1 Arrhenatherum elatius Arrela DC1 0.003420518 34.20518 0.070667 + ## 2 Bromus sterilis Broste DC1 0.002816940 28.16940 0.105300 + ## 5 Arrhenatherum elatius Arrela DC2 0.003611619 36.11619 0.076300 + ## 6 Crepis capillaris Creves DC2 0.002828699 28.28699 0.062717 + ## 11 Carex arenaria Carare DC3 0.010579908 105.79908 0.115885 + ## 16 Elytrigia juncea Elyjun DC4 0.012400353 124.00353 0.116320 + ## Wave_501 Wave_502 + ## 1 0.07160 0.072533 + ## 2 0.10710 0.109030 + ## 5 0.07670 0.077300 + ## 6 0.06365 0.064850 + ## 11 0.11705 0.118450 + ## 16 0.11745 0.118850 ``` r rm(split_data) @@ -397,13 +380,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% head(val.plsr.data)[1:5] ``` - ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 - ## 184 Jacobaea vulgaris Jacvul WC2 0.003551614 35.51614 - ## 185 Potentilla reptans Potrep WC2 0.005586320 55.86320 - ## 186 Rubus caesius Rubcae WC2 0.005803902 58.03902 - ## 187 Urtica dioica Urtdio WC2 0.005215705 52.15705 - ## 188 Ammophila arenaria Ammare WC3 0.018443757 184.43757 - ## 189 Jacobaea vulgaris Jacvul WC3 0.004980002 49.80002 + ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 + ## 1 Arrhenatherum elatius Arrela DC1 0.003420518 34.20518 + ## 2 Bromus sterilis Broste DC1 0.002816940 28.16940 + ## 5 Arrhenatherum elatius Arrela DC2 0.003611619 36.11619 + ## 6 Crepis capillaris Creves DC2 0.002828699 28.28699 + ## 11 Carex arenaria Carare DC3 0.010579908 105.79908 + ## 16 Elytrigia juncea Elyjun DC4 0.012400353 124.00353 ### Step 9. Calibration and Validation spectra plot @@ -458,18 +441,21 @@ maxComps <- 16 iterations <- 50 prop <- 0.70 if (method=="pls") { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, seg=seg, random_seed=random_seed) print(paste0("*** Optimal number of components: ", nComps)) } else { - nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, + method=method, maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } ``` + ## [1] "*** Identifying optimal number of PLSR components ***" ## [1] "*** Running permutation test. Please hang tight, this can take awhile ***" ## [1] "Options:" ## [1] "Max Components: 16 Iterations: 50 Data Proportion (percent): 70" @@ -519,9 +505,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## 37.79 32.71 30.36 23.51 21.58 18.46 + ## 30.50 38.30 35.20 22.78 20.14 17.39 ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps - ## 15.89 15.44 15.52 15.19 15.14 13.68 + ## 13.10 12.56 14.13 17.45 15.61 12.70 ``` r plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", @@ -533,9 +519,9 @@ pls::R2(plsr.out, newdata = val.plsr.data) ``` ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps - ## -0.06195 0.20461 0.31467 0.58911 0.65365 0.74649 + ## -0.02137 -0.60981 -0.36001 0.43050 0.55467 0.66818 ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps - ## 0.81222 0.82276 0.82084 0.82841 0.82945 0.86090 + ## 0.81156 0.82673 0.78088 0.66593 0.73244 0.82292 ``` r plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", @@ -611,20 +597,20 @@ val.plsr.output <- val.plsr.output %>% head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.003551614 35.51614 43.51586 - ## 185 Potentilla reptans Potrep WC2 0.005586320 55.86320 61.41726 - ## 186 Rubus caesius Rubcae WC2 0.005803902 58.03902 45.55789 - ## 187 Urtica dioica Urtdio WC2 0.005215705 52.15705 46.65139 - ## 188 Ammophila arenaria Ammare WC3 0.018443757 184.43757 147.08781 - ## 189 Jacobaea vulgaris Jacvul WC3 0.004980002 49.80002 53.09532 - ## PLSR_Residuals - ## 184 7.999719 - ## 185 5.554059 - ## 186 -12.481126 - ## 187 -5.505664 - ## 188 -37.349758 - ## 189 3.295298 + ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 PLSR_Predicted + ## 1 Arrhenatherum elatius Arrela DC1 0.003420518 34.20518 36.09345 + ## 2 Bromus sterilis Broste DC1 0.002816940 28.16940 42.52977 + ## 5 Arrhenatherum elatius Arrela DC2 0.003611619 36.11619 21.87053 + ## 6 Crepis capillaris Creves DC2 0.002828699 28.28699 20.66219 + ## 11 Carex arenaria Carare DC3 0.010579908 105.79908 99.79501 + ## 16 Elytrigia juncea Elyjun DC4 0.012400353 124.00353 105.16400 + ## PLSR_Residuals + ## 1 1.888268 + ## 2 14.360370 + ## 5 -14.245663 + ## 6 -7.624796 + ## 11 -6.004066 + ## 16 -18.839527 ``` r val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) @@ -686,7 +672,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo ## Warning: Removed 6 rows containing missing values (geom_point). - ## Warning: Removed 6 rows containing missing values (geom_point). + ## Warning: Removed 3 rows containing missing values (geom_point). ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. @@ -775,20 +761,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot head(val.plsr.output) ``` - ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 PLSR_Predicted - ## 184 Jacobaea vulgaris Jacvul WC2 0.003551614 35.51614 43.51586 - ## 185 Potentilla reptans Potrep WC2 0.005586320 55.86320 61.41726 - ## 186 Rubus caesius Rubcae WC2 0.005803902 58.03902 45.55789 - ## 187 Urtica dioica Urtdio WC2 0.005215705 52.15705 46.65139 - ## 188 Ammophila arenaria Ammare WC3 0.018443757 184.43757 147.08781 - ## 189 Jacobaea vulgaris Jacvul WC3 0.004980002 49.80002 53.09532 - ## PLSR_Residuals LCI UCI LPI UPI - ## 184 7.999719 42.58086 44.15724 16.70642 70.32530 - ## 185 5.554059 60.10507 62.52674 34.59536 88.23916 - ## 186 -12.481126 44.66849 48.22967 18.70489 72.41090 - ## 187 -5.505664 45.70375 47.84938 19.82512 73.47765 - ## 188 -37.349758 145.09309 148.61694 120.18052 173.99510 - ## 189 3.295298 52.40880 53.97806 26.28498 79.90565 + ## Plant_Species Species_Code Plot LMA_g_cm2 LMA_g_m2 PLSR_Predicted + ## 1 Arrhenatherum elatius Arrela DC1 0.003420518 34.20518 36.09345 + ## 2 Bromus sterilis Broste DC1 0.002816940 28.16940 42.52977 + ## 5 Arrhenatherum elatius Arrela DC2 0.003611619 36.11619 21.87053 + ## 6 Crepis capillaris Creves DC2 0.002828699 28.28699 20.66219 + ## 11 Carex arenaria Carare DC3 0.010579908 105.79908 99.79501 + ## 16 Elytrigia juncea Elyjun DC4 0.012400353 124.00353 105.16400 + ## PLSR_Residuals LCI UCI LPI UPI + ## 1 1.888268 35.22975 36.83681 11.182998 61.00390 + ## 2 14.360370 41.61622 43.52851 17.617164 67.44238 + ## 5 -14.245663 20.07042 23.96996 -3.085793 46.82685 + ## 6 -7.624796 20.27384 21.15353 -4.234964 45.55935 + ## 11 -6.004066 98.52166 100.58017 74.888636 124.70139 + ## 16 -18.839527 104.18470 105.69273 80.260059 130.06795 ``` r ### Permutation coefficient plot @@ -897,7 +883,7 @@ write.csv(out.jk.coefs,file=file.path(outdir, print(paste("Output directory: ", outdir)) ``` - ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpoqfeI6" + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmpfxi2vB" ``` r # Observed versus predicted diff --git a/vignettes/reseco_lma_plsr_example.pdf b/vignettes/reseco_lma_plsr_example.pdf index b600447..6df708b 100644 Binary files a/vignettes/reseco_lma_plsr_example.pdf and b/vignettes/reseco_lma_plsr_example.pdf differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png index 5e9c7db..dc16377 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index 0aa5976..ab426cc 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png index e4d311c..04cce02 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png index 084496d..cfba8ed 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png index c8de2b7..8261b84 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ