diff --git a/README.md b/README.md
index 776caa3..754a0f4 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,7 @@ Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/721908
 EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species <br>
 EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9 <br>
 DOI: https://doi.org/doi:10.21232/C2GM2Z <br>
+Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/736689 <br>
 
 5) Canopy spectra to map foliar functional traits over NEON domains in eastern United States <br>
 Target variable: leaf nitrogen <br>
diff --git a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
index bdd298f..aaaf4a2 100644
--- a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
+++ b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
@@ -121,8 +121,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
 method <- "dplyr" #base/dplyr
 # base R - a bit slow
 # dplyr - much faster
-split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075,
-                                              prop=0.8, group_variables="Species_Code")
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=7529075,prop=0.8, 
+                                              group_variables="Species_Code")
 names(split_data)
 cal.plsr.data <- split_data$cal_data
 head(cal.plsr.data)[1:8]
@@ -138,11 +139,13 @@ text_loc <- c(max(hist(cal.plsr.data[,paste0(inVar)])$counts),
               max(hist(cal.plsr.data[,paste0(inVar)])$mids))
 cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
                        main = paste0("Calibration Histogram for ",inVar),
-                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7)) +
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7)) +
   annotate("text", x=text_loc[2], y=text_loc[1], label= "1.",size=10)
 val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
                        main = paste0("Validation Histogram for ",inVar),
-                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7))
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
 histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
 ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, 
        device="png", width = 30, height = 12, units = "cm", dpi = 300)
@@ -152,6 +155,14 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the 
+# calibration (i.e. model training) and validation datasets. The data was split using the 
+# spectratrait::create_data_split() function using "Species_Code" as the group_variable and
+# using a data split proportion per group of 80% to calibration and 20% to validation
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 8.
 ### Format PLSR data for model fitting 
@@ -181,6 +192,13 @@ par(mfrow=c(1,1))
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S2. The resulting calibration and validation spectral reflectance distribution by
+# wavelength. The spectra split was done at the same time as LMA, as described in
+# Supplemental Figure S1. 
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 10.
 ### Use permutation to determine the optimal number of components
@@ -206,13 +224,26 @@ if (method=="pls") {
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, random_seed=random_seed)
 }
-print("*** Figure 3. Optimal PLSR component selection ***")
+print("*** Figure S3. Optimal PLSR component selection ***")
 dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,"_PLSR_Component_Selection.png"))), 
          height=2800, width=3400,  res=340)
 dev.off();
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S3. A key challenge in building robust and parsimonious PLSR models is determining the
+# optimal number of PLSR components. A good definition is the minimum number of components that 
+# minimizes the PRESS statistic and where the next higher component doesn't produce a meaningful
+# increase in model performance (i.e. lower PRESS). We provide three methods in the
+# find_optimal_components() function to determine the optimal number of components statistically 
+# using the internal pls package jackknife method or our custom methods that are better in some 
+# conditions, including for large datasets. In this example we show "firstMin" option that 
+# selects the number of components corresponding to the first statistical minimum PRESS value
+# (vertical broken blue line).
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 11.
 ### Fit final model - using leave-one-out cross validation
@@ -242,6 +273,12 @@ par(opar)
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) and 
+# coefficient of determination (right) for the 0 to optimal number of components
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 12.
 ### PLSR fit observed vs. predicted plot data
@@ -319,6 +356,14 @@ ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")),
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S5. The calibration model and independent validation scatter plot results for the example
+# LMA PLSR model (top row). Also shown are the calibration model and validation PLSR
+# residuals, where the calibration results are based on the internal model cross-validation
+# and the validation residuals are the predicted minus observed values of LMA.
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 13.
 ### Generate Coefficient and VIP plots
@@ -341,6 +386,12 @@ par(opar)
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S6. The calibration model PLSR regression coefficient (top) and variable importance of 
+# projection (bottom) plots
+#--------------------------------------------------------------------------------------------------#
+
+
 #--------------------------------------------------------------------------------------------------#
 ### Step 14.
 ### Permutation analysis to derive uncertainty estimates
@@ -386,6 +437,10 @@ dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.p
          height=2100, width=3800, res=340)
 dev.off();
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S7. The calibration model jackknife PLSR regression coefficients 
+#--------------------------------------------------------------------------------------------------#
+
 # JK validation plot
 RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
 pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
@@ -413,9 +468,17 @@ dev.off();
 #--------------------------------------------------------------------------------------------------#
 
 
+#--------------------------------------------------------------------------------------------------#
+# Figure S8. Independent validation results for the LMA PLSR model with associated jackknife
+# uncertainty estimate 95% prediction intervals for each estimate LMA value. The %RMSEP is the 
+# model prediction performance standardized to the percentage of the response range, in this case
+# the range of LMA values
+#--------------------------------------------------------------------------------------------------#
+
+
 #---------------- Output jackknife results --------------------------------------------------------#
-### Step 15.
-# JK Coefficents
+### Step 15. Outputs the final PLSR model jackknife coefficients
+# JK Coefficients
 out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1),
                            Intercept=Jackknife_intercept,t(Jackknife_coef))
 head(out.jk.coefs)[1:6]
@@ -425,7 +488,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Jackkife_PLSR_Coeffi
 
 
 #---------------- Export Model Output -------------------------------------------------------------#
-### Step 16.
+### Step 16. Create and write all relevant PLSR model output to disk in .csv format
 print(paste("Output directory: ", getwd()))
 
 # Observed versus predicted
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd
new file mode 100644
index 0000000..6710d63
--- /dev/null
+++ b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd
@@ -0,0 +1,428 @@
+---
+title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory.
+author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
+output:
+  github_document: default
+  html_notebook: default
+  pdf_document: default
+  html_document:
+    df_print: paged
+params:
+  date: !r Sys.Date()
+---
+
+```{r setup, include=FALSE, echo=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+### Overview
+This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an
+internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 
+and fit a plsr model for leaf nitrogen content (Narea, g/m2)
+
+### Getting Started
+### Load libraries
+```{r, eval=TRUE, echo=TRUE}
+list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
+                      "spectratrait")
+invisible(lapply(list.of.packages, library, character.only = TRUE))
+```
+
+### Setup other functions and options
+```{r, echo=TRUE}
+### Setup other functions and options
+# not in
+`%notin%` <- Negate(`%in%`)
+
+# Script options
+pls::pls.options(plsralg = "oscorespls")
+pls::pls.options("plsralg")
+
+# Default par options
+opar <- par(no.readonly = T)
+
+# Specify output directory, output_dir 
+# Options: 
+# tempdir - use a OS-specified temporary directory 
+# user defined PATH - e.g. "~/scratch/PLSR"
+output_dir <- "tempdir"
+```
+
+### Load internal Ely et al 2019 dataset
+```{r, echo=TRUE}
+data("ely_plsr_data")
+head(ely_plsr_data)[,1:8]
+
+# What is the target variable?
+inVar <- "N_g_m2"
+```
+
+### Set working directory (scratch space)
+```{r, echo=FALSE}
+if (output_dir=="tempdir") {
+  outdir <- tempdir()
+} else {
+  if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
+  outdir <- file.path(path.expand(output_dir))
+}
+setwd(outdir) # set working directory
+getwd()  # check wd
+```
+
+### Full PLSR dataset
+```{r, echo=TRUE}
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+head(plsr_data)[,1:6]
+```
+### Create cal/val datasets
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+### Create cal/val datasets
+## Make a stratified random sampling in the strata USDA_Species_Code and Domain
+
+method <- "base" #base/dplyr
+# base R - a bit slow
+# dplyr - much faster
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=23452135, prop=0.7, 
+                                              group_variables="Species_Code")
+names(split_data)
+cal.plsr.data <- split_data$cal_data
+head(cal.plsr.data)[1:8]
+val.plsr.data <- split_data$val_data
+head(val.plsr.data)[1:8]
+rm(split_data)
+
+# Datasets:
+print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
+print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
+
+cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Cal. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Val. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, 
+       device="png", width = 30, 
+       height = 12, units = "cm",
+       dpi = 300)
+# output cal/val data
+write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
+          row.names=FALSE)
+write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
+          row.names=FALSE)
+```
+
+### Create calibration and validation PLSR datasets
+```{r, echo=TRUE}
+### Format PLSR data for model fitting 
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(cal_spec))
+head(cal.plsr.data)[1:5]
+
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(val_spec))
+head(val.plsr.data)[1:5]
+```
+
+### plot cal and val spectra
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+par(mfrow=c(1,2)) # B, L, T, R
+spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
+spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
+
+dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
+         height=2500,width=4900, res=340)
+dev.off();
+par(mfrow=c(1,1))
+```
+
+### Use permutation to determine optimal number of components
+```{r, fig.height = 6, fig.width = 10, echo=TRUE}
+### Use permutation to determine the optimal number of components
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel = NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+method <- "pls" #pls, firstPlateau, firstMin
+random_seed <- 1245565
+seg <- 50
+maxComps <- 16
+iterations <- 80
+prop <- 0.70
+if (method=="pls") {
+  # pls package approach - faster but estimates more components....
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+                                                  maxComps=maxComps, seg=seg, 
+                                                  random_seed=random_seed)
+  print(paste0("*** Optimal number of components: ", nComps))
+} else {
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+                                                  maxComps=maxComps, iterations=iterations, 
+                                                  seg=seg, prop=prop, 
+                                                  random_seed=random_seed)
+}
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
+         height=2800, width=3400,  res=340)
+dev.off();
+```
+
+### Fit final model
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
+                 trace=FALSE,data=cal.plsr.data)
+fit <- plsr.out$fitted.values[,1,nComps]
+pls.options(parallel = NULL)
+
+# External validation fit stats
+par(mfrow=c(1,2)) # B, L, T, R
+pls::RMSEP(plsr.out, newdata = val.plsr.data)
+plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
+     xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+
+pls::R2(plsr.out, newdata = val.plsr.data)
+plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
+     xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
+         height=2800, width=4800,  res=340)
+dev.off();
+par(opar)
+```
+
+### PLSR fit observed vs. predicted plot data
+```{r, fig.height = 15, fig.width = 15, echo=TRUE}  
+#calibration
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=fit,
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+cal.plsr.output <- cal.plsr.output %>%
+  mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
+head(cal.plsr.output)
+cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2)
+cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
+
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=as.vector(predict(plsr.out, 
+                                                               newdata = val.plsr.data, 
+                                                               ncomp=nComps, type="response")[,,1]))
+val.plsr.output <- val.plsr.output %>%
+  mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
+head(val.plsr.output)
+val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data)[[1]][nComps],2)
+val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
+
+rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
+cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
+                                                                            cal.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
+val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
+                                                                           val.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+# plot cal/val side-by-side
+scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
+                             val_resid_histogram, nrow=2,ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
+       plot = scatterplots, device="png", 
+       width = 32, 
+       height = 30, units = "cm",
+       dpi = 300)
+```
+
+### Generate Coefficient and VIP plots
+```{r, fig.height = 9, fig.width = 10, echo=TRUE}
+vips <- spectratrait::VIP(plsr.out)[nComps,]
+par(mfrow=c(2,1))
+plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
+     ylab="Regression coefficients",legendpos = "bottomright",
+     ncomp=nComps,lwd=2)
+box(lwd=2.2)
+plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
+lines(seq(Start.wave,End.wave,1),vips,lwd=3)
+abline(h=0.8,lty=2,col="dark grey")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
+         height=3100, width=4100, res=340)
+dev.off();
+```
+
+### Bootstrap validation
+```{r, echo=TRUE}
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel =NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+### PLSR bootstrap permutation uncertainty analysis
+iterations <- 500    # how many permutation iterations to run
+prop <- 0.70          # fraction of training data to keep for each iteration
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+                                                  iterations=iterations, prop=prop,
+                                                  verbose = FALSE)
+bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
+bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
+                                              ,nComps]
+rm(plsr_permutation)
+
+# apply coefficients to left-out validation data
+interval <- c(0.025,0.975)
+Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
+  matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
+         ncol=length(bootstrap_intercept))
+Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
+                       probs=c(interval[1], interval[2]))
+sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
+sd_res <- sd(val.plsr.output$PLSR_Residuals)
+sd_tot <- sqrt(sd_mean^2+sd_res^2)
+val.plsr.output$LCI <- Interval_Conf[1,]
+val.plsr.output$UCI <- Interval_Conf[2,]
+val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
+val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
+head(val.plsr.output)
+```
+
+### Jackknife coefficient plot
+```{r, fig.height = 6, fig.width = 10, echo=TRUE}
+# Bootstrap regression coefficient plot
+spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
+            plot_label="Bootstrap regression coefficients",position = 'bottomleft')
+abline(h=0,lty=2,col="grey50")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
+         height=2100, width=3800, res=340)
+dev.off();
+```
+
+### Bootstrap validation plot
+```{r, fig.height = 7, fig.width = 8, echo=TRUE}
+RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
+pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
+r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data)$val[nComps+1],2)
+expr <- vector("expression", 3)
+expr[[1]] <- bquote(R^2==.(r2))
+expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
+expr[[3]] <- bquote("%RMSEP"==.(round(pecr_RMSEP,2)))
+rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
+par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8)
+abline(0,1,lty=2,lw=2)
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8, add=T)
+legend("topleft", legend=expr, bty="n", cex=1.5)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
+         height=2800, width=3200,  res=340)
+dev.off();
+```
+
+### Output bootstrap results
+```{r, echo=TRUE}
+# Bootstrap Coefficients
+out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
+                           Intercept=bootstrap_intercept,t(bootstrap_coef))
+names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
+head(out.jk.coefs)[1:6]
+write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
+                                                    '_Bootstrap_PLSR_Coefficients.csv')),
+          row.names=FALSE)
+```
+
+### Create core PLSR outputs
+```{r, echo=TRUE}
+print(paste("Output directory: ", outdir))
+
+# Observed versus predicted
+write.csv(cal.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Observed_PLSR_CV_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Validation data
+write.csv(val.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Validation_PLSR_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Model coefficients
+coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
+write.csv(coefs,file=file.path(outdir,
+                               paste0(inVar,'_PLSR_Coefficients_',
+                                      nComps,'comp.csv')),
+          row.names=TRUE)
+
+# PLSR VIP
+write.csv(vips,file=file.path(outdir,
+                              paste0(inVar,'_PLSR_VIPs_',
+                                     nComps,'comp.csv')))
+```
+
+### Confirm files were written to temp space
+```{r, echo=TRUE}
+print("**** PLSR output files: ")
+print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
+```
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.md b/vignettes/ely_leafN_bootstrap_plsr_example.md
new file mode 100644
index 0000000..24ed1fb
--- /dev/null
+++ b/vignettes/ely_leafN_bootstrap_plsr_example.md
@@ -0,0 +1,803 @@
+Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen
+content (Narea, g/m2) data from eight different crop species growing in
+a glasshouse at Brookhaven National Laboratory.
+================
+Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson
+
+### Overview
+
+This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to
+illustrate how to load an internal dataset (“ely\_plsr\_data”), choose
+the “optimal” number of plsr components, and fit a plsr model for leaf
+nitrogen content (Narea, g/m2)
+
+### Getting Started
+
+### Load libraries
+
+``` r
+list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
+                      "spectratrait")
+invisible(lapply(list.of.packages, library, character.only = TRUE))
+```
+
+    ## 
+    ## Attaching package: 'pls'
+
+    ## The following object is masked from 'package:stats':
+    ## 
+    ##     loadings
+
+    ## 
+    ## Attaching package: 'dplyr'
+
+    ## The following objects are masked from 'package:stats':
+    ## 
+    ##     filter, lag
+
+    ## The following objects are masked from 'package:base':
+    ## 
+    ##     intersect, setdiff, setequal, union
+
+    ## here() starts at /Users/sserbin/Data/GitHub/PLSR_for_plant_trait_prediction
+
+    ## 
+    ## Attaching package: 'gridExtra'
+
+    ## The following object is masked from 'package:dplyr':
+    ## 
+    ##     combine
+
+### Setup other functions and options
+
+``` r
+### Setup other functions and options
+# not in
+`%notin%` <- Negate(`%in%`)
+
+# Script options
+pls::pls.options(plsralg = "oscorespls")
+pls::pls.options("plsralg")
+```
+
+    ## $plsralg
+    ## [1] "oscorespls"
+
+``` r
+# Default par options
+opar <- par(no.readonly = T)
+
+# Specify output directory, output_dir 
+# Options: 
+# tempdir - use a OS-specified temporary directory 
+# user defined PATH - e.g. "~/scratch/PLSR"
+output_dir <- "tempdir"
+```
+
+### Load internal Ely et al 2019 dataset
+
+``` r
+data("ely_plsr_data")
+head(ely_plsr_data)[,1:8]
+```
+
+    ##   Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1        HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2        HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 3        HEAN3 common sunflower     7.70 15.02495   156.95    35.08 1.764752
+    ## 4        CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 5        CUSA4  garden cucumber     7.47 11.60735   123.58    26.71 1.411361
+    ## 6        CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ##   Wave_500
+    ## 1 4.782000
+    ## 2 4.341714
+    ## 3 4.502857
+    ## 4 3.333429
+    ## 5 3.313571
+    ## 6 3.272286
+
+``` r
+# What is the target variable?
+inVar <- "N_g_m2"
+```
+
+### Set working directory (scratch space)
+
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpSup6Vk"
+
+### Full PLSR dataset
+
+``` r
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+head(plsr_data)[,1:6]
+```
+
+    ##   Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2
+    ## 1        HEAN3 common sunflower     7.58 15.61210   167.63    36.40
+    ## 2        HEAN3 common sunflower     8.33 14.73724   164.68    34.65
+    ## 3        HEAN3 common sunflower     7.70 15.02495   156.95    35.08
+    ## 4        CUSA4  garden cucumber     7.40 11.14835   111.52    26.23
+    ## 5        CUSA4  garden cucumber     7.47 11.60735   123.58    26.71
+    ## 6        CUSA4  garden cucumber     7.43  8.06035   114.36    18.40
+
+### Create cal/val datasets
+
+``` r
+### Create cal/val datasets
+## Make a stratified random sampling in the strata USDA_Species_Code and Domain
+
+method <- "base" #base/dplyr
+# base R - a bit slow
+# dplyr - much faster
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=23452135, prop=0.7, 
+                                              group_variables="Species_Code")
+```
+
+    ## HEAN3   Cal: 70%
+
+    ## CUSA4   Cal: 68.1818181818182%
+
+    ## CUPE   Cal: 70.5882352941177%
+
+    ## SOLYL   Cal: 70%
+
+    ## OCBA   Cal: 68.4210526315789%
+
+    ## POPUL   Cal: 71.4285714285714%
+
+    ## GLMA4   Cal: 70.5882352941177%
+
+    ## PHVU   Cal: 66.6666666666667%
+
+``` r
+names(split_data)
+```
+
+    ## [1] "cal_data" "val_data"
+
+``` r
+cal.plsr.data <- split_data$cal_data
+head(cal.plsr.data)[1:8]
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42    25.83 1.215333
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23    27.40 1.304110
+    ##    Wave_500
+    ## 1  4.782000
+    ## 2  4.341714
+    ## 4  3.333429
+    ## 6  3.272286
+    ## 7  2.943143
+    ## 10 4.145714
+
+``` r
+val.plsr.data <- split_data$val_data
+head(val.plsr.data)[1:8]
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    Wave_500
+    ## 3  4.502857
+    ## 5  3.313571
+    ## 8  2.868000
+    ## 9  3.338286
+    ## 13 3.960286
+    ## 15 3.744000
+
+``` r
+rm(split_data)
+
+# Datasets:
+print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
+```
+
+    ## [1] "Cal observations: 124"
+
+``` r
+print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
+```
+
+    ## [1] "Val observations: 54"
+
+``` r
+cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Cal. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Val. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
+```
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
+
+``` r
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, 
+       device="png", width = 30, 
+       height = 12, units = "cm",
+       dpi = 300)
+# output cal/val data
+write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
+          row.names=FALSE)
+write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
+          row.names=FALSE)
+```
+
+### Create calibration and validation PLSR datasets
+
+``` r
+### Format PLSR data for model fitting 
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(cal_spec))
+head(cal.plsr.data)[1:5]
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23
+
+``` r
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(val_spec))
+head(val.plsr.data)[1:5]
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30
+
+### plot cal and val spectra
+
+``` r
+par(mfrow=c(1,2)) # B, L, T, R
+spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
+spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
+```
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
+         height=2500,width=4900, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+``` r
+par(mfrow=c(1,1))
+```
+
+### Use permutation to determine optimal number of components
+
+``` r
+### Use permutation to determine the optimal number of components
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel = NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+method <- "pls" #pls, firstPlateau, firstMin
+random_seed <- 1245565
+seg <- 50
+maxComps <- 16
+iterations <- 80
+prop <- 0.70
+if (method=="pls") {
+  # pls package approach - faster but estimates more components....
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+                                                  maxComps=maxComps, seg=seg, 
+                                                  random_seed=random_seed)
+  print(paste0("*** Optimal number of components: ", nComps))
+} else {
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+                                                  maxComps=maxComps, iterations=iterations, 
+                                                  seg=seg, prop=prop, 
+                                                  random_seed=random_seed)
+}
+```
+
+    ## [1] "*** Running PLS permutation test ***"
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png)<!-- -->
+
+    ## [1] "*** Optimal number of components: 13"
+
+``` r
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
+         height=2800, width=3400,  res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Fit final model
+
+``` r
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
+                 trace=FALSE,data=cal.plsr.data)
+fit <- plsr.out$fitted.values[,1,nComps]
+pls.options(parallel = NULL)
+
+# External validation fit stats
+par(mfrow=c(1,2)) # B, L, T, R
+pls::RMSEP(plsr.out, newdata = val.plsr.data)
+```
+
+    ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
+    ##      0.5908       0.4735       0.4162       0.4037       0.3347       0.3023  
+    ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
+    ##      0.2993       0.3081       0.2814       0.2445       0.2276       0.2104  
+    ##    12 comps     13 comps  
+    ##      0.1954       0.2003
+
+``` r
+plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
+     xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+
+pls::R2(plsr.out, newdata = val.plsr.data)
+```
+
+    ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
+    ##   -0.004079     0.355010     0.501632     0.531088     0.677620     0.737143  
+    ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
+    ##    0.742224     0.726835     0.772115     0.827942     0.850962     0.872685  
+    ##    12 comps     13 comps  
+    ##    0.890124     0.884529
+
+``` r
+plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
+     xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
+         height=2800, width=4800,  res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+``` r
+par(opar)
+```
+
+### PLSR fit observed vs. predicted plot data
+
+``` r
+#calibration
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=fit,
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+cal.plsr.output <- cal.plsr.output %>%
+  mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
+head(cal.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42    25.83 1.215333
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23    27.40 1.304110
+    ##    CalVal PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals
+    ## 1     Cal       1.820666          1.702501       -0.40119317
+    ## 2     Cal       1.609632          1.711772        0.48005882
+    ## 4     Cal       1.364985          1.275526       -0.01243687
+    ## 6     Cal       1.126062          1.060119       -0.05758587
+    ## 7     Cal       1.227538          1.226708        0.01137583
+    ## 10    Cal       1.358638          1.365181        0.06107105
+
+``` r
+cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2)
+cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
+
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=as.vector(predict(plsr.out, 
+                                                               newdata = val.plsr.data, 
+                                                               ncomp=nComps, type="response")[,,1]))
+val.plsr.output <- val.plsr.output %>%
+  mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
+head(val.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    CalVal PLSR_Predicted PLSR_Residuals
+    ## 3     Val      1.7125176   -0.052233917
+    ## 5     Val      1.4618447    0.050483171
+    ## 8     Val      1.0951891   -0.051652168
+    ## 9     Val      1.2152379    0.076220509
+    ## 13    Val      0.7992342   -0.149119020
+    ## 15    Val      1.1267054    0.002059572
+
+``` r
+val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data)[[1]][nComps],2)
+val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
+
+rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
+cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
+                                                                            cal.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
+val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
+                                                                           val.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+# plot cal/val side-by-side
+scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
+                             val_resid_histogram, nrow=2,ncol=2)
+```
+
+    ## Warning: Removed 3 rows containing missing values (geom_point).
+
+    ## Warning: Removed 3 rows containing missing values (geom_point).
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
+
+``` r
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
+       plot = scatterplots, device="png", 
+       width = 32, 
+       height = 30, units = "cm",
+       dpi = 300)
+```
+
+### Generate Coefficient and VIP plots
+
+``` r
+vips <- spectratrait::VIP(plsr.out)[nComps,]
+par(mfrow=c(2,1))
+plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
+     ylab="Regression coefficients",legendpos = "bottomright",
+     ncomp=nComps,lwd=2)
+box(lwd=2.2)
+plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
+lines(seq(Start.wave,End.wave,1),vips,lwd=3)
+abline(h=0.8,lty=2,col="dark grey")
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
+         height=3100, width=4100, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Bootstrap validation
+
+``` r
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel =NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+### PLSR bootstrap permutation uncertainty analysis
+iterations <- 500    # how many permutation iterations to run
+prop <- 0.70          # fraction of training data to keep for each iteration
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+                                                  iterations=iterations, prop=prop,
+                                                  verbose = FALSE)
+```
+
+    ## [1] "*** Running permutation test.  Please hang tight, this can take awhile ***"
+    ## [1] "Options:"
+    ## [1] "Max Components: 13 Iterations: 500 Data Proportion (percent): 70"
+    ## [1] "*** Providing PRESS and coefficient array output ***"
+
+``` r
+bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
+bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
+                                              ,nComps]
+rm(plsr_permutation)
+
+# apply coefficients to left-out validation data
+interval <- c(0.025,0.975)
+Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
+  matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
+         ncol=length(bootstrap_intercept))
+Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
+                       probs=c(interval[1], interval[2]))
+sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
+sd_res <- sd(val.plsr.output$PLSR_Residuals)
+sd_tot <- sqrt(sd_mean^2+sd_res^2)
+val.plsr.output$LCI <- Interval_Conf[1,]
+val.plsr.output$UCI <- Interval_Conf[2,]
+val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
+val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
+head(val.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    CalVal PLSR_Predicted PLSR_Residuals       LCI       UCI       LPI      UPI
+    ## 3     Val      1.7125176   -0.052233917 1.5070086 1.8760564 1.2810247 2.144011
+    ## 5     Val      1.4618447    0.050483171 1.2909822 1.5475356 1.0541359 1.869553
+    ## 8     Val      1.0951891   -0.051652168 0.9595488 1.2335912 0.6846083 1.505770
+    ## 9     Val      1.2152379    0.076220509 1.0746965 1.3367675 0.8068229 1.623653
+    ## 13    Val      0.7992342   -0.149119020 0.6820207 0.9451323 0.3899050 1.208563
+    ## 15    Val      1.1267054    0.002059572 1.0316572 1.2737521 0.7209233 1.532488
+
+### Jackknife coefficient plot
+
+``` r
+# Bootstrap regression coefficient plot
+spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
+            plot_label="Bootstrap regression coefficients",position = 'bottomleft')
+abline(h=0,lty=2,col="grey50")
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
+         height=2100, width=3800, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Bootstrap validation plot
+
+``` r
+RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
+pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
+r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data)$val[nComps+1],2)
+expr <- vector("expression", 3)
+expr[[1]] <- bquote(R^2==.(r2))
+expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
+expr[[3]] <- bquote("%RMSEP"==.(round(pecr_RMSEP,2)))
+rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
+par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8)
+abline(0,1,lty=2,lw=2)
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8, add=T)
+legend("topleft", legend=expr, bty="n", cex=1.5)
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
+         height=2800, width=3200,  res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Output bootstrap results
+
+``` r
+# Bootstrap Coefficients
+out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
+                           Intercept=bootstrap_intercept,t(bootstrap_coef))
+names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
+head(out.jk.coefs)[1:6]
+```
+
+    ##   Iteration  Intercept      Wave_500     Wave_501     Wave_502     Wave_503
+    ## 1         1 -0.6617899 -0.0067918917 -0.006451152 -0.005571355 -0.004909648
+    ## 2         2 -0.4636504 -0.0040384348 -0.001804902  0.001375426  0.002477500
+    ## 3         3 -0.8146267  0.0031055624  0.003529288  0.005078394  0.005883173
+    ## 4         4  0.7030872  0.0003883207  0.002887701  0.003961071  0.003223096
+    ## 5         5  0.4765138  0.0045652557  0.005822813  0.005979498  0.004861387
+    ## 6         6  0.4146289  0.0085296345  0.009692141  0.010451131  0.009300204
+
+``` r
+write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
+                                                    '_Bootstrap_PLSR_Coefficients.csv')),
+          row.names=FALSE)
+```
+
+### Create core PLSR outputs
+
+``` r
+print(paste("Output directory: ", outdir))
+```
+
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpSup6Vk"
+
+``` r
+# Observed versus predicted
+write.csv(cal.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Observed_PLSR_CV_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Validation data
+write.csv(val.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Validation_PLSR_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Model coefficients
+coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
+write.csv(coefs,file=file.path(outdir,
+                               paste0(inVar,'_PLSR_Coefficients_',
+                                      nComps,'comp.csv')),
+          row.names=TRUE)
+
+# PLSR VIP
+write.csv(vips,file=file.path(outdir,
+                              paste0(inVar,'_PLSR_VIPs_',
+                                     nComps,'comp.csv')))
+```
+
+### Confirm files were written to temp space
+
+``` r
+print("**** PLSR output files: ")
+```
+
+    ## [1] "**** PLSR output files: "
+
+``` r
+print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
+```
+
+    ##  [1] "N_g_m2_Bootstrap_PLSR_Coefficients.csv"      
+    ##  [2] "N_g_m2_Bootstrap_Regression_Coefficients.png"
+    ##  [3] "N_g_m2_Cal_PLSR_Dataset.csv"                 
+    ##  [4] "N_g_m2_Cal_Val_Histograms.png"               
+    ##  [5] "N_g_m2_Cal_Val_Scatterplots.png"             
+    ##  [6] "N_g_m2_Cal_Val_Spectra.png"                  
+    ##  [7] "N_g_m2_Coefficient_VIP_plot.png"             
+    ##  [8] "N_g_m2_Observed_PLSR_CV_Pred_13comp.csv"     
+    ##  [9] "N_g_m2_PLSR_Coefficients_13comp.csv"         
+    ## [10] "N_g_m2_PLSR_Component_Selection.png"         
+    ## [11] "N_g_m2_PLSR_Validation_Scatterplot.png"      
+    ## [12] "N_g_m2_PLSR_VIPs_13comp.csv"                 
+    ## [13] "N_g_m2_Val_PLSR_Dataset.csv"                 
+    ## [14] "N_g_m2_Validation_PLSR_Pred_13comp.csv"      
+    ## [15] "N_g_m2_Validation_RMSEP_R2_by_Component.png"
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_example.pdf
new file mode 100644
index 0000000..5e09ade
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example.pdf differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png
new file mode 100644
index 0000000..33f1435
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
new file mode 100644
index 0000000..5bce084
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
new file mode 100644
index 0000000..cdc8cab
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png
new file mode 100644
index 0000000..a61b13f
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png
new file mode 100644
index 0000000..ca2deef
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png
new file mode 100644
index 0000000..6d79827
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png
new file mode 100644
index 0000000..4452988
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png
new file mode 100644
index 0000000..9e5c2df
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example.Rmd b/vignettes/reseco_lma_plsr_example.Rmd
index 738e25e..6c79b03 100644
--- a/vignettes/reseco_lma_plsr_example.Rmd
+++ b/vignettes/reseco_lma_plsr_example.Rmd
@@ -4,9 +4,9 @@ author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
   github_document: default
   html_notebook: default
-  pdf_document: default
   html_document:
     df_print: paged
+  pdf_document: default
 params:
   date: !r Sys.Date()
 ---
@@ -19,7 +19,7 @@ knitr::opts_chunk$set(echo = TRUE)
 This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to retrieve a dataset from the EcoSIS spectral database, choose the "optimal" number of plsr components, and fit a plsr model for leaf-mass area (LMA)
 
 ### Getting Started
-### Step 1. Load libraries to run example script
+### Step 1. Load libraries needed to run example script
 ```{r, eval=TRUE, echo=TRUE}
 list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
                       "spectratrait")
@@ -109,8 +109,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
 method <- "dplyr" #base/dplyr
 # base R - a bit slow
 # dplyr - much faster
-split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075, 
-                                prop=0.8, group_variables="Species_Code")
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=7529075, prop=0.8, 
+                                              group_variables="Species_Code")
 names(split_data)
 cal.plsr.data <- split_data$cal_data
 head(cal.plsr.data)[1:8]
@@ -134,8 +135,17 @@ val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
                        xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
                        alpha=I(.7))
 histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
-ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, 
-       device="png", width = 30, height = 12, units = "cm", dpi = 300)
+
+# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the 
+# calibration (i.e. model training) and validation datasets. The data was split using 
+# the spectratrait::create_data_split() function using "Species_Code" as the 
+# group_variable and using a data split proportion per group of 80% to calibration 
+# and 20% to validation
+``` 
+```{r, echo=TRUE}
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, device="png", width = 30, height = 12, units = "cm", 
+       dpi = 300)
 # output cal/val data
 write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
           row.names=FALSE)
@@ -146,13 +156,17 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
 ### Step 8. Create calibration and validation PLSR datasets
 ```{r, echo=TRUE}
 ### Format PLSR data for model fitting 
-cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
-cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% 
+                                              paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% 
+                                                    paste0("Wave_",wv))], 
                             Spectra=I(cal_spec))
 head(cal.plsr.data)[1:5]
 
-val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
-val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% 
+                                              paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% 
+                                                    paste0("Wave_",wv))],
                             Spectra=I(val_spec))
 head(val.plsr.data)[1:5]
 ```
@@ -166,12 +180,15 @@ text(550,95,labels = "2.",cex=3)
 spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,
             plot_label="Validation")
 
+# Figure S2. The resulting calibration and validation spectral reflectance distribution by
+# wavelength. The spectra split was done at the same time as LMA, as described in
+# Supplemental Figure S1.
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
          height=2500,width=4900, res=340)
 dev.off();
 par(mfrow=c(1,1))
 ```
-
 ### Step 10. Use permutation to determine the optimal number of components
 ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
 ### Use permutation to determine the optimal number of components
@@ -198,7 +215,11 @@ if (method=="pls") {
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
-print("*** Figure 3. Optimal PLSR component selection ***")
+# Figure S3. Selection of the optimal number of components based on the 
+# minimization of the PRESS statistic.  In this example we show "firstMin" 
+# option that selects the number of components corresponding to the first 
+# statistical minimum PRESS value (vertical broken blue line).
+
 dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,
                                             "_PLSR_Component_Selection.png"))), 
          height=2800, width=3400,  res=340)
@@ -208,8 +229,8 @@ dev.off();
 ### Step 11. Fit final model
 ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
 ### Fit final model - using leave-one-out cross validation
-plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
-                 trace=FALSE,data=cal.plsr.data)
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,
+                 validation="LOO",trace=FALSE,data=cal.plsr.data)
 fit <- plsr.out$fitted.values[,1,nComps]
 pls.options(parallel = NULL)
 
@@ -227,6 +248,9 @@ pls::R2(plsr.out, newdata = val.plsr.data)
 plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
      xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
 box(lwd=2.2)
+# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) 
+# and coefficient of determination (right) for the 0 to optimal number of components
+
 dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
          height=2800, width=4800,  res=340)
 dev.off();
@@ -236,19 +260,23 @@ par(opar)
 ### Step 12. PLSR fit observed vs. predicted plot data
 ```{r, fig.height = 15, fig.width = 15, echo=TRUE}  
 #calibration
-cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% 
+                                                      "Spectra")],
                               PLSR_Predicted=fit,
-                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,
+                                                                                   nComps]))
 cal.plsr.output <- cal.plsr.output %>%
   mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
 head(cal.plsr.output)
 cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2)
 cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
 
-val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% 
+                                                      "Spectra")],
                               PLSR_Predicted=as.vector(predict(plsr.out, 
                                                                newdata = val.plsr.data, 
-                                                               ncomp=nComps, type="response")[,,1]))
+                                                               ncomp=nComps, 
+                                                               type="response")[,,1]))
 val.plsr.output <- val.plsr.output %>%
   mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
 head(val.plsr.output)
@@ -258,13 +286,13 @@ val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
 rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
 cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
-                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
-                                                                              rng_quant[2]) + 
+                                          linetype="dashed", size=1.5) + 
+  xlim(rng_quant[1], rng_quant[2]) + 
   ylim(rng_quant[1], rng_quant[2]) +
   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
        y=paste0("Observed ", paste(inVar), " (units)"),
-       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
-                                                                            cal.RMSEP))) +
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", 
+                    paste0("RMSEP = ", cal.RMSEP))) +
   theme(axis.text=element_text(size=18), legend.position="none",
         axis.title=element_text(size=20, face="bold"), 
         axis.text.x = element_text(angle = 0,vjust = 0.5),
@@ -283,13 +311,13 @@ cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
 rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
 val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
-                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
-                                                                              rng_quant[2]) + 
+                                          linetype="dashed", size=1.5) + 
+  xlim(rng_quant[1], rng_quant[2]) + 
   ylim(rng_quant[1], rng_quant[2]) +
   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
        y=paste0("Observed ", paste(inVar), " (units)"),
-       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
-                                                                           val.RMSEP))) +
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", 
+                    paste0("RMSEP = ", val.RMSEP))) +
   theme(axis.text=element_text(size=18), legend.position="none",
         axis.title=element_text(size=20, face="bold"), 
         axis.text.x = element_text(angle = 0,vjust = 0.5),
@@ -307,6 +335,13 @@ val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
 # plot cal/val side-by-side
 scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
                              val_resid_histogram, nrow=2, ncol=2)
+# Figure S5. The calibration model and independent validation scatter plot results for 
+# the example LMA PLSR model (top row). Also shown are the calibration model and 
+# validation PLSR residuals, where the calibration results are based on the internal 
+# model cross-validation and the validation residuals are the predicted minus observed 
+# values of LMA.
+```
+```{r, echo=FALSE}  
 ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
        plot = scatterplots, device="png", width = 32, height = 30, units = "cm",
        dpi = 300)
@@ -326,6 +361,9 @@ plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
 lines(seq(Start.wave,End.wave,1),vips,lwd=3)
 abline(h=0.8,lty=2,col="dark grey")
 box(lwd=2.2)
+# Figure S6. The calibration model PLSR regression coefficient (top) and variable 
+# importance of projection (bottom) plots
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
          height=3100, width=4100, res=340)
 dev.off();
@@ -345,8 +383,9 @@ jk.plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE,
                          data=cal.plsr.data)
 pls.options(parallel = NULL)
 
-Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, data_plsr = cal.plsr.data, 
-                               ncomp = nComps, inVar=inVar)
+Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, 
+                                             data_plsr = cal.plsr.data, 
+                                             ncomp = nComps, inVar=inVar)
 Jackknife_intercept <- Jackknife_coef[1,,,]
 Jackknife_coef <- Jackknife_coef[2:dim(Jackknife_coef)[1],,,]
 
@@ -367,18 +406,22 @@ head(val.plsr.output)
 ```
 
 ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
+
 ### Permutation coefficient plot
 spectratrait::f.plot.coef(Z = t(Jackknife_coef), wv = wv, 
             plot_label="Jackknife regression coefficients",position = 'bottomleft')
 abline(h=0,lty=2,col="grey50")
 legend("topleft",legend = "7.", cex=2, bty="n")
 box(lwd=2.2)
+# Figure S7. The calibration model jackknife PLSR regression coefficients 
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.png')), 
          height=2100, width=3800, res=340)
 dev.off();
 ```
 
 ```{r, fig.height = 7, fig.width = 8, echo=TRUE}
+
 ### Permutation validation plot
 RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
 pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
@@ -400,6 +443,11 @@ abline(0,1,lty=2,lw=2)
 legend("topleft", legend=expr, bty="n", cex=1.5)
 legend("bottomright", legend="8.", bty="n", cex=2.2)
 box(lwd=2.2)
+# Figure S8. Independent validation results for the LMA PLSR model with associated 
+# jackknife uncertainty estimate 95% prediction intervals for each estimate LMA 
+# value. The %RMSEP is the model prediction performance standardized to the 
+# percentage of the response range, in this case the range of LMA values
+
 dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
          height=2800, width=3200,  res=340)
 dev.off();
@@ -411,7 +459,8 @@ out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1),
                            Intercept=Jackknife_intercept,t(Jackknife_coef))
 head(out.jk.coefs)[1:6]
 write.csv(out.jk.coefs,file=file.path(outdir,
-                                      paste0(inVar,'_Jackkife_PLSR_Coefficients.csv')),
+                                      paste0(inVar,
+                                             '_Jackkife_PLSR_Coefficients.csv')),
           row.names=FALSE)
 ```
 
diff --git a/vignettes/reseco_lma_plsr_example.md b/vignettes/reseco_lma_plsr_example.md
index 9f30f63..e10e879 100644
--- a/vignettes/reseco_lma_plsr_example.md
+++ b/vignettes/reseco_lma_plsr_example.md
@@ -13,7 +13,7 @@ leaf-mass area (LMA)
 
 ### Getting Started
 
-### Step 1. Load libraries to run example script
+### Step 1. Load libraries needed to run example script
 
 ``` r
 list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
@@ -82,7 +82,7 @@ output_dir <- "tempdir"
 
 ### Step 3. Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1Hsn79"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpiBKiLO"
 
 ### Step 4. Pull example dataset from EcoSIS (ecosis.org)
 
@@ -266,8 +266,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
 method <- "dplyr" #base/dplyr
 # base R - a bit slow
 # dplyr - much faster
-split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075, 
-                                prop=0.8, group_variables="Species_Code")
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=7529075, prop=0.8, 
+                                              group_variables="Species_Code")
 names(split_data)
 ```
 
@@ -349,8 +350,17 @@ histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
 ![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png)<!-- -->
 
 ``` r
-ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, 
-       device="png", width = 30, height = 12, units = "cm", dpi = 300)
+# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the 
+# calibration (i.e. model training) and validation datasets. The data was split using 
+# the spectratrait::create_data_split() function using "Species_Code" as the 
+# group_variable and using a data split proportion per group of 80% to calibration 
+# and 20% to validation
+```
+
+``` r
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, device="png", width = 30, height = 12, units = "cm", 
+       dpi = 300)
 # output cal/val data
 write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
           row.names=FALSE)
@@ -362,8 +372,10 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
 
 ``` r
 ### Format PLSR data for model fitting 
-cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
-cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% 
+                                              paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% 
+                                                    paste0("Wave_",wv))], 
                             Spectra=I(cal_spec))
 head(cal.plsr.data)[1:5]
 ```
@@ -377,8 +389,10 @@ head(cal.plsr.data)[1:5]
     ## 6 Ammophila arenaria       Ammare  ZC3 0.01802409 180.2409
 
 ``` r
-val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
-val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% 
+                                              paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% 
+                                                    paste0("Wave_",wv))],
                             Spectra=I(val_spec))
 head(val.plsr.data)[1:5]
 ```
@@ -402,9 +416,13 @@ spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,
             plot_label="Validation")
 ```
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
 
 ``` r
+# Figure S2. The resulting calibration and validation spectral reflectance distribution by
+# wavelength. The spectra split was done at the same time as LMA, as described in
+# Supplemental Figure S1.
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
          height=2500,width=4900, res=340)
 ```
@@ -461,15 +479,14 @@ if (method=="pls") {
 
     ## [1] "*** Optimal number of components based on t.test: 11"
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
 
 ``` r
-print("*** Figure 3. Optimal PLSR component selection ***")
-```
+# Figure S3. Selection of the optimal number of components based on the 
+# minimization of the PRESS statistic.  In this example we show "firstMin" 
+# option that selects the number of components corresponding to the first 
+# statistical minimum PRESS value (vertical broken blue line).
 
-    ## [1] "*** Figure 3. Optimal PLSR component selection ***"
-
-``` r
 dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,
                                             "_PLSR_Component_Selection.png"))), 
          height=2800, width=3400,  res=340)
@@ -489,8 +506,8 @@ dev.off();
 
 ``` r
 ### Fit final model - using leave-one-out cross validation
-plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
-                 trace=FALSE,data=cal.plsr.data)
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,
+                 validation="LOO",trace=FALSE,data=cal.plsr.data)
 fit <- plsr.out$fitted.values[,1,nComps]
 pls.options(parallel = NULL)
 
@@ -526,9 +543,12 @@ plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R
 box(lwd=2.2)
 ```
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
 
 ``` r
+# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) 
+# and coefficient of determination (right) for the 0 to optimal number of components
+
 dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
          height=2800, width=4800,  res=340)
 ```
@@ -551,9 +571,11 @@ par(opar)
 
 ``` r
 #calibration
-cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% 
+                                                      "Spectra")],
                               PLSR_Predicted=fit,
-                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,
+                                                                                   nComps]))
 cal.plsr.output <- cal.plsr.output %>%
   mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
 head(cal.plsr.output)
@@ -578,10 +600,12 @@ head(cal.plsr.output)
 cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2)
 cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
 
-val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% 
+                                                      "Spectra")],
                               PLSR_Predicted=as.vector(predict(plsr.out, 
                                                                newdata = val.plsr.data, 
-                                                               ncomp=nComps, type="response")[,,1]))
+                                                               ncomp=nComps, 
+                                                               type="response")[,,1]))
 val.plsr.output <- val.plsr.output %>%
   mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
 head(val.plsr.output)
@@ -609,13 +633,13 @@ val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
 rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
 cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
-                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
-                                                                              rng_quant[2]) + 
+                                          linetype="dashed", size=1.5) + 
+  xlim(rng_quant[1], rng_quant[2]) + 
   ylim(rng_quant[1], rng_quant[2]) +
   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
        y=paste0("Observed ", paste(inVar), " (units)"),
-       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
-                                                                            cal.RMSEP))) +
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", 
+                    paste0("RMSEP = ", cal.RMSEP))) +
   theme(axis.text=element_text(size=18), legend.position="none",
         axis.title=element_text(size=20, face="bold"), 
         axis.text.x = element_text(angle = 0,vjust = 0.5),
@@ -634,13 +658,13 @@ cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
 rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
 val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
-                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
-                                                                              rng_quant[2]) + 
+                                          linetype="dashed", size=1.5) + 
+  xlim(rng_quant[1], rng_quant[2]) + 
   ylim(rng_quant[1], rng_quant[2]) +
   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
        y=paste0("Observed ", paste(inVar), " (units)"),
-       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
-                                                                           val.RMSEP))) +
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", 
+                    paste0("RMSEP = ", val.RMSEP))) +
   theme(axis.text=element_text(size=18), legend.position="none",
         axis.title=element_text(size=20, face="bold"), 
         axis.text.x = element_text(angle = 0,vjust = 0.5),
@@ -667,12 +691,14 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png)<!-- -->
 
 ``` r
-ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
-       plot = scatterplots, device="png", width = 32, height = 30, units = "cm",
-       dpi = 300)
+# Figure S5. The calibration model and independent validation scatter plot results for 
+# the example LMA PLSR model (top row). Also shown are the calibration model and 
+# validation PLSR residuals, where the calibration results are based on the internal 
+# model cross-validation and the validation residuals are the predicted minus observed 
+# values of LMA.
 ```
 
 ### Step 13. Generate Coefficient and VIP plots
@@ -692,9 +718,12 @@ abline(h=0.8,lty=2,col="dark grey")
 box(lwd=2.2)
 ```
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png)<!-- -->
 
 ``` r
+# Figure S6. The calibration model PLSR regression coefficient (top) and variable 
+# importance of projection (bottom) plots
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
          height=3100, width=4100, res=340)
 ```
@@ -724,8 +753,9 @@ jk.plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE,
                          data=cal.plsr.data)
 pls.options(parallel = NULL)
 
-Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, data_plsr = cal.plsr.data, 
-                               ncomp = nComps, inVar=inVar)
+Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, 
+                                             data_plsr = cal.plsr.data, 
+                                             ncomp = nComps, inVar=inVar)
 Jackknife_intercept <- Jackknife_coef[1,,,]
 Jackknife_coef <- Jackknife_coef[2:dim(Jackknife_coef)[1],,,]
 
@@ -769,9 +799,11 @@ legend("topleft",legend = "7.", cex=2, bty="n")
 box(lwd=2.2)
 ```
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png)<!-- -->
 
 ``` r
+# Figure S7. The calibration model jackknife PLSR regression coefficients 
+
 dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.png')), 
          height=2100, width=3800, res=340)
 ```
@@ -810,9 +842,14 @@ legend("bottomright", legend="8.", bty="n", cex=2.2)
 box(lwd=2.2)
 ```
 
-![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png)<!-- -->
+![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png)<!-- -->
 
 ``` r
+# Figure S8. Independent validation results for the LMA PLSR model with associated 
+# jackknife uncertainty estimate 95% prediction intervals for each estimate LMA 
+# value. The %RMSEP is the model prediction performance standardized to the 
+# percentage of the response range, in this case the range of LMA values
+
 dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
          height=2800, width=3200,  res=340)
 ```
@@ -845,7 +882,8 @@ head(out.jk.coefs)[1:6]
 
 ``` r
 write.csv(out.jk.coefs,file=file.path(outdir,
-                                      paste0(inVar,'_Jackkife_PLSR_Coefficients.csv')),
+                                      paste0(inVar,
+                                             '_Jackkife_PLSR_Coefficients.csv')),
           row.names=FALSE)
 ```
 
@@ -855,7 +893,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,
 print(paste("Output directory: ", outdir))
 ```
 
-    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmp1Hsn79"
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpiBKiLO"
 
 ``` r
 # Observed versus predicted
diff --git a/vignettes/reseco_lma_plsr_example.pdf b/vignettes/reseco_lma_plsr_example.pdf
index 54fe0e3..2f2940f 100644
Binary files a/vignettes/reseco_lma_plsr_example.pdf and b/vignettes/reseco_lma_plsr_example.pdf differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png
index 46b86ae..5e9c7db 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index 0aa5976..46b86ae 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index 43a32eb..0aa5976 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png
index 9c6878f..43a32eb 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png
index 2360ecc..9c6878f 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png
new file mode 100644
index 0000000..2360ecc
Binary files /dev/null and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png
new file mode 100644
index 0000000..c8e9e03
Binary files /dev/null and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png differ