diff --git a/.gitignore b/.gitignore
index ded0e78..b08a346 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# R test files
+Rplots.pdf
+
 # History files
 .Rhistory
 .Rapp.history
diff --git a/DESCRIPTION b/DESCRIPTION
index f75d634..64dc885 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: spectratrait
 Title: A simple add-on package to aid in the fitting of leaf-level spectra-trait PLSR models
-Version: 1.0.5
+Version: 1.1.0
 Authors@R:
   c(person(given = "Julien",
            family = "Lamour",
@@ -33,12 +33,13 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 Imports: 
     httr (>= 1.4.2),
     readr (>= 1.3.1),
     pls (>= 2.7-2),
     dplyr (>= 1.0.1),
+    magrittr (>= 2.0.1),
     reshape2 (>= 1.4.4),
     here (>= 0.1),
     plotrix (>= 3.7-8),
diff --git a/NAMESPACE b/NAMESPACE
index abc135e..ff9a101 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,13 +7,36 @@ export(create_data_split)
 export(f.coef.valid)
 export(f.plot.coef)
 export(f.plot.spec)
+export(find_optimal_comp_by_groups)
 export(find_optimal_components)
 export(get_ecosis_data)
 export(percent_rmse)
 export(pls_permutation)
+export(pls_permutation_by_groups)
 export(source_GitHubData)
+import(ggplot2)
 import(httr)
+importFrom(dplyr,all_of)
+importFrom(dplyr,group_by_at)
+importFrom(dplyr,mutate)
+importFrom(dplyr,n)
+importFrom(dplyr,row_number)
+importFrom(dplyr,slice)
+importFrom(dplyr,vars)
+importFrom(graphics,box)
+importFrom(graphics,legend)
+importFrom(graphics,lines)
+importFrom(graphics,polygon)
+importFrom(magrittr,"%>%")
 importFrom(pls,plsr)
+importFrom(pls,selectNcomp)
+importFrom(readr,read_csv)
+importFrom(reshape2,melt)
+importFrom(stats,as.formula)
+importFrom(stats,coef)
+importFrom(stats,predict)
+importFrom(stats,quantile)
+importFrom(stats,t.test)
 importFrom(utils,flush.console)
 importFrom(utils,read.table)
 importFrom(utils,setTxtProgressBar)
diff --git a/R/create_data_split.R b/R/create_data_split.R
index b84efc0..d558af7 100644
--- a/R/create_data_split.R
+++ b/R/create_data_split.R
@@ -5,15 +5,19 @@
 ##' @param split_seed random seed to use for splitting data
 ##' @param prop the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). 
 ##' This sets the calibration proportion
-##' @param group_variables Use factor variables to conduct a stratfied sampling for cal/val
+##' @param group_variables Use factor variables to conduct a stratified sampling for cal/val
 ##' 
 ##' @return output_list A list containing the calibration dataset (cal_data)
 ##' and validation dataset (val_data)
 ##' 
+##' @importFrom magrittr %>%
+##' @importFrom dplyr mutate group_by_at slice n vars all_of 
+##' 
 ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
 ##' @export
 create_data_split <- function(dataset=NULL, approach=NULL, split_seed=123456789, prop=0.8,
                               group_variables=NULL) {
+  # TODO: import only required functions from dplyr
   set.seed(split_seed)
   
   # outer if/else to stop if approach set to NULL
diff --git a/R/f.plot.coef.R b/R/f.plot.coef.R
index 0b92d31..9ea0629 100644
--- a/R/f.plot.coef.R
+++ b/R/f.plot.coef.R
@@ -7,6 +7,9 @@
 ##' @param type Name of the y axis and of the legend
 ##' @param plot_label optional plot label to include with the figure
 ##' 
+##' @importFrom stats quantile
+##' @importFrom graphics polygon lines legend box
+##' 
 ##' @author Julien Lamour
 ##' @export
 f.plot.coef <- function(
diff --git a/R/f.plot.spec.R b/R/f.plot.spec.R
index 264af77..f0798d1 100644
--- a/R/f.plot.spec.R
+++ b/R/f.plot.spec.R
@@ -7,6 +7,9 @@
 ##' @param type Name of the y axis and of the legend. E.g. Reflectance, Transmittance
 ##' @param plot_label optional plot label to include with the figure
 ##' 
+##' @importFrom stats quantile
+##' @importFrom graphics polygon lines legend box
+##' 
 ##' @author Julien Lamour, Shawn P. Serbin
 ##' @export
 f.plot.spec <- function(
diff --git a/R/find_optimal_components.R b/R/find_optimal_components.R
index e53e544..92a49b4 100644
--- a/R/find_optimal_components.R
+++ b/R/find_optimal_components.R
@@ -1,6 +1,8 @@
-##' Apply different methods to determing the optimal number of PLSR model components
+##' Applies different methods for the determination of the optimal number of PLSR model components
 ##' 
 ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
+##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
+##' Usually the "inVar" variable set at the beginning of a PLS script
 ##' @param method Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin
 ##' @param maxComps maximum number of components to consider
 ##' @param iterations how many different permutations to run
@@ -8,23 +10,32 @@
 ##' @param prop proportion of data to preserve for each permutation
 ##' @param random_seed random seed to use for splitting data
 ##' 
+##' @importFrom stats as.formula coef predict quantile t.test
+##' @importFrom pls plsr selectNcomp
+##' @importFrom reshape2 melt
+##' @import ggplot2
+##' 
 ##' @return nComps the optimal number of PLSR components
 ##' 
 ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
 ##' @export 
-find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, iterations=20, seg=100, 
-                                    prop=0.70, random_seed=123456789) {
+find_optimal_components <- function(dataset=NULL, targetVariable=NULL, method="pls", maxComps=20, 
+                                    iterations=20, seg=100, prop=0.70, random_seed=123456789) {
+  
   set.seed(random_seed)
+  inVar <- targetVariable
+  print("*** Identifying optimal number of PLSR components ***")
+  
   if(method=="pls") {
     print("*** Running PLS permutation test ***")
     
     plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, ncomp=maxComps, 
                           validation="CV", segments = seg, segment.type="interleaved", trace=FALSE, 
-                          jackknife=TRUE, data=cal.plsr.data)
-    nComps <- selectNcomp(plsr.out, method = "onesigma", plot = TRUE)
+                          jackknife=TRUE, data=dataset)
+    nComps <- pls::selectNcomp(plsr.out, method = "onesigma", plot = TRUE)
   }
   if(method=="firstPlateau") {
-    press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps, 
+    press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 
                                                iterations=iterations, prop=prop)
     # PRESS plot
     pressDF <- as.data.frame(press.out$PRESS)
@@ -50,7 +61,7 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite
     print(bp)
   }
   if(method=="firstMin") {
-    press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps, 
+    press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 
                                                iterations=iterations, prop=prop)
     # PRESS plot
     pressDF <- as.data.frame(press.out$PRESS)
@@ -84,4 +95,103 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite
     print(bp)
   }
   return(nComps)
+}
+
+##' Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
+##' by group (i.e. optimal selection by stratification)
+##' 
+##' @param dataset input full PLSR dataset. Usually just the calibration dataset
+##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
+##' Usually the "inVar" variable set at the beginning of a PLS script
+##' @param method Which approach to use to find optimal components. Options: firstPlateau, firstMin
+##' @param maxComps maximum number of components to consider
+##' @param iterations how many different permutations to run
+##' @param prop proportion of data to preserve for each permutation
+##' @param random_seed random seed to use for splitting data
+##' @param group_variables group_variables character vector of the form c("var1", "var2"..."varn") 
+##' providing the factors used for stratified sampling. 
+##' 
+##' @importFrom stats as.formula coef predict quantile t.test
+##' @import ggplot2
+##' @importFrom reshape2 melt
+##' 
+##' @return nComps the optimal number of PLSR components
+##' 
+##' @author asierrl, Shawn P. Serbin
+##' @export 
+find_optimal_comp_by_groups <- function (dataset = NULL, targetVariable = NULL, method = "firstPlateau",
+                                         maxComps = 20, iterations = 20, prop = 0.7, random_seed = 123456789, 
+                                         group_variables=NULL) {
+  set.seed(random_seed)
+  inVar <- targetVariable
+  # TODO - really should merge this with the original and have an if/else if not NULL and select either
+  # pls_permutation OR pls_permutation_by_groups.
+  print("*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***")
+  if (method == "pls") {
+    stop("*** Please select either the firstMin and firstPlateau. The pls package approach is not compatible ***")
+  }
+  if (method == "firstPlateau") {
+    press.out <- spectratrait::pls_permutation_by_groups(dataset=dataset, targetVariable=inVar,
+                                                    maxComps=maxComps, iterations=iterations, 
+                                                    prop=prop, group_variables=group_variables)
+    pressDF <- as.data.frame(press.out$PRESS)
+    names(pressDF) <- as.character(seq(maxComps))
+    pressDFres <- reshape2::melt(pressDF)
+    results <- NULL
+    for (i in 1:(maxComps - 1)) {
+      p_value <- t.test(press.out$PRESS[, i], press.out$PRESS[, (i + 1)])$p.value
+      temp_results <- data.frame(Component = (i + 1), P.value = round(p_value, 6))
+      results <- rbind(results, temp_results)
+    }
+    nComps <- min(results[results$P.value > 0.05, "Component"])
+    print(paste0("*** Optimal number of components based on t.test: ", nComps))
+    bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 
+      theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 
+                                                      y = "PRESS") + 
+      stat_boxplot(geom = "errorbar", width = 0.2) + 
+      geom_vline(xintercept = nComps, linetype = "dashed", 
+                 color = "blue", size = 1)
+    theme(axis.text = element_text(size = 18), legend.position = "none", 
+          axis.title = element_text(size = 20, face = "bold"), 
+          axis.text.x = element_text(angle = 0, vjust = 0.5), 
+          panel.border = element_rect(linetype = "solid", 
+                                      fill = NA, size = 1.5))
+    print(bp)
+  }
+  if (method == "firstMin") {
+    press.out <- spectratrait::pls_permutation_by_groups(dataset = dataset, targetVariable=inVar,
+                                                   maxComps=maxComps, iterations=iterations, 
+                                                   prop=prop, group_variables=group_variables)
+    pressDF <- as.data.frame(press.out$PRESS)
+    names(pressDF) <- as.character(seq(maxComps))
+    pressDFres <- reshape2::melt(pressDF)
+    mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean)
+    lowest_PRESS <- which.min(mean_PRESS_comp)
+    results <- as.vector(array(data = "NA", dim = c(lowest_PRESS - 1, 1)))
+    for (i in seq_along(1:(lowest_PRESS - 1))) {
+      comp1 <- i
+      comp2 <- lowest_PRESS
+      ttest <- t.test(pressDFres$value[which(pressDFres$variable == comp1)], 
+                      pressDFres$value[which(pressDFres$variable == comp2)])
+      results[i] <- round(unlist(ttest$p.value), 8)
+    }
+    results <- data.frame(seq(1, lowest_PRESS - 1, 1), results)
+    names(results) <- c("Component", "P.value")
+    first <- min(which(as.numeric(as.character(results$P.value)) > 0.05))
+    nComps <- results$Component[first]
+    print(paste0("*** Optimal number of components based on t.test: ", nComps))
+    bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 
+      theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 
+                                                      y = "PRESS") + 
+      stat_boxplot(geom = "errorbar", width = 0.2) + 
+      geom_vline(xintercept = nComps, linetype = "dashed", 
+                 color = "blue", size = 1)
+    theme(axis.text = element_text(size = 18), legend.position = "none", 
+          axis.title = element_text(size = 20, face = "bold"), 
+          axis.text.x = element_text(angle = 0, vjust = 0.5), 
+          panel.border = element_rect(linetype = "solid", 
+                                      fill = NA, size = 1.5))
+    print(bp)
+  }
+  return(nComps)
 }
\ No newline at end of file
diff --git a/R/get_ecosis_data.R b/R/get_ecosis_data.R
index 5cff000..2973a04 100644
--- a/R/get_ecosis_data.R
+++ b/R/get_ecosis_data.R
@@ -10,6 +10,8 @@
 ##' names(dat_raw)[1:40]
 ##' }
 ##' 
+##' @importFrom readr read_csv
+##' 
 ##' @return EcoSIS spectral dataset object
 ##' 
 ##' @author Shawn P. Serbin, Alexey Shiklomanov
diff --git a/R/pls_permutation.R b/R/pls_permutation.R
index 8427e67..7ebc79b 100644
--- a/R/pls_permutation.R
+++ b/R/pls_permutation.R
@@ -4,6 +4,8 @@
 ##' See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123
 ##'
 ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
+##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
+##' Usually the "inVar" variable set at the beginning of a PLS script
 ##' @param maxComps maximum number of components to use for each PLSR fit
 ##' @param iterations how many different permutations to run
 ##' @param prop proportion of data to preserve for each permutation
@@ -17,8 +19,9 @@
 ##' 
 ##' @author Julien Lamour, Shawn P. Serbin
 ##' @export
-pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70,
-                            verbose=FALSE) {
+pls_permutation <- function(dataset=NULL, targetVariable=NULL, maxComps=20, iterations=20, 
+                            prop=0.70, verbose=FALSE) {
+  inVar <- targetVariable
   coefs <- array(0,dim=c((ncol(dataset$Spectra)+1),iterations,maxComps))
   press.out <- array(data=NA, dim=c(iterations,maxComps))
   print("*** Running permutation test.  Please hang tight, this can take awhile ***")
@@ -27,7 +30,7 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70,
               "Data Proportion (percent):", prop*100, sep=" "))
   
   if (verbose) {
-    j <- 1 # <--- Numeric counter for progress bar
+    j <- 1
     pb <- txtProgressBar(min = 0, max = iterations, 
                          char="*",width=70,style = 3)
   }
@@ -47,9 +50,9 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70,
     
     ### Display progress to console
     if (verbose) {
-      setTxtProgressBar(pb, j)    # show progress bar
-      j <- j+1                    # <--- increase counter by 1
-      flush.console()             #<--- show output in real-time
+      setTxtProgressBar(pb, j)
+      j <- j+1
+      flush.console()
     }
   }
   if (verbose) {
@@ -60,4 +63,82 @@ pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70,
   print("*** Providing PRESS and coefficient array output ***")
   output <- list(PRESS=press.out, coef_array=coefs)
   return(output)
+}
+
+
+##' Run a PLSR model permutation analysis stratified by selected "groups". Can be used to 
+##' determine the optimal number of components or conduct a boostrap uncertainty analysis
+##' 
+##' @param dataset input full PLSR dataset. Usually just the calibration dataset
+##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
+##' Usually the "inVar" variable set at the beginning of a PLS script
+##' @param maxComps maximum number of components to use for each PLSR fit
+##' @param iterations how many different permutations to run
+##' @param prop proportion of data to preserve for each permutation
+##' @param verbose Should the function report the current iteration status/progress to the terminal
+##' or run silently? TRUE/FALSE. Default FALSE
+##' @param group_variables Character vector of the form c("var1", "var2"..."varn") 
+##' providing the factors used for stratified sampling in the PLSR permutation analysis
+##' 
+##' @return output a list containing the PRESS and coef_array.
+##' output <- list(PRESS=press.out, coef_array=coefs)
+##' 
+##' @importFrom magrittr %>%
+##' @importFrom dplyr mutate group_by_at slice n row_number
+##' @importFrom pls plsr 
+##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar
+##' 
+##' @author asierrl, Shawn P. Serbin, Julien Lamour
+##' @export
+##' 
+pls_permutation_by_groups <- function (dataset = NULL, targetVariable=NULL, maxComps = 20, 
+                                       iterations = 20, prop = 0.7, group_variables=NULL,
+                                       verbose = FALSE) {
+  inVar <- targetVariable
+  coefs <- array(0, dim = c((ncol(dataset$Spectra) + 1), iterations, maxComps))
+  press.out <- array(data = NA, dim = c(iterations, maxComps))
+  print("*** Running permutation test.  Please hang tight, this can take awhile ***")
+  print("Options:")
+  print(paste("Max Components:", maxComps, "Iterations:", iterations, 
+              "Data Proportion (percent):", prop * 100, sep = " "))
+  if (verbose) {
+    j <- 1
+    pb <- utils::txtProgressBar(min = 0, max = iterations, 
+                                char = "*", width = 70, style = 3)
+  }
+  for (i in seq_along(1:iterations)) {
+    if (!is.null(group_variables)) {
+      trainset <- dataset %>%
+        mutate(int_id=row_number()) %>%
+        group_by_at(group_variables) %>%
+        slice(sample(1:n(), prop * n()))
+      rows <- trainset$int_id
+      } else {
+       rows <- sample(1:nrow(dataset), floor(prop * nrow(dataset)))
+      }
+    sub.data <- dataset[rows, ]
+    val.sub.data <- dataset[-rows, ]
+    plsr.out <- plsr(as.formula(paste(inVar, "~", "Spectra")), 
+                     scale = FALSE, center = TRUE, ncomp = maxComps, 
+                     validation = "none", 
+                     data = sub.data)
+    pred_val <- predict(plsr.out, newdata = val.sub.data)
+    sq_resid <- (pred_val[, , ] - val.sub.data[, inVar])^2
+    press <- apply(X = sq_resid, MARGIN = 2, FUN = sum)
+    press.out[i, ] <- press
+    coefs[, i, ] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps)
+    rm(rows, sub.data, val.sub.data, plsr.out, pred_val, sq_resid, press)
+    if (verbose) {
+      setTxtProgressBar(pb, j)
+      j <- j + 1
+      flush.console()
+      }
+  }
+  if (verbose) {
+    close(pb)
+  }
+  # create a new list with PRESS and permuted coefficients x wavelength x component number
+  print("*** Providing PRESS and coefficient array output ***")
+  output <- list(PRESS = press.out, coef_array = coefs)
+  return(output)
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index c4c2284..318b2bb 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ https://ecosml.org/package/github/TESTgroup-BNL/spectratrait
 
 ### Depends: 
 ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2), 
-httr (>= 1.4.2), pls (>= 2.7-2), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), 
+httr (>= 1.4.2), pls (>= 2.7-2), magrittr (>= 2.0.1), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), 
 plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1), knitr
 
 ### INSTALL
diff --git a/inst/scripts/simple_spectra-trait_plsr_example.R b/inst/scripts/simple_spectra-trait_plsr_example.R
index 2b7005a..34a9ed8 100644
--- a/inst/scripts/simple_spectra-trait_plsr_example.R
+++ b/inst/scripts/simple_spectra-trait_plsr_example.R
@@ -20,8 +20,7 @@
 
 #--------------------------------------------------------------------------------------------------#
 ### Load libraries
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","here","dplyr","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 #--------------------------------------------------------------------------------------------------#
 
@@ -139,13 +138,14 @@ maxComps <- 20
 iterations <- 40
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, method=method, maxComps=maxComps, 
-                                    seg=seg, random_seed=random_seed)
+  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, seg=seg, 
+                                                  random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, method=method, maxComps=maxComps, 
-                                    iterations=iterations, seg=seg, prop=prop, 
-                                    random_seed=random_seed)
+  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, iterations=iterations, 
+                                                  seg=seg, prop=prop, random_seed=random_seed)
 }
 dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
          height=2800, width=3400,  res=340)
diff --git a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R
index 1f23607..6d3d3b4 100644
--- a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R
+++ b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R
@@ -15,8 +15,7 @@
 
 #--------------------------------------------------------------------------------------------------#
 ### Load libraries
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 #--------------------------------------------------------------------------------------------------#
 
@@ -159,16 +158,15 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
-                                                  maxComps=maxComps, seg=seg, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
-                                                  maxComps=maxComps, iterations=iterations, 
-                                                  seg=seg, prop=prop, 
-                                                  random_seed=random_seed)
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, 
+                                                  iterations=iterations, 
+                                                  seg=seg, prop=prop, random_seed=random_seed)
 }
 dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
          height=2800, width=3400, res=340)
@@ -304,7 +302,7 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
                                                             maxComps=nComps, 
                                                             iterations=iterations, 
                                                             prop=prop, verbose=TRUE)
diff --git a/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R
new file mode 100644
index 0000000..be6cb7d
--- /dev/null
+++ b/inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_grp_example.R
@@ -0,0 +1,411 @@
+####################################################################################################
+#
+#    Example "how-to" script illustrating the use of PLSR modeling to develop a 
+#    spectra-trait algorithm to estimate leaf nitrogen content with leaf-level spectroscopy data.
+#    The example is built from published data source (DOI: https://doi.org/10.1093/jxb/erz061)
+#    This example illustrates how to select the optimal number of components and quantify model 
+#    prediction uncertainty using bootstrap permutation
+#
+#    Notes:
+#    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
+#    * Code is provided under GNU General Public License v3.0 
+#
+####################################################################################################
+
+
+#--------------------------------------------------------------------------------------------------#
+### Load libraries
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
+invisible(lapply(list.of.packages, library, character.only = TRUE))
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Setup other functions and options
+# not in
+`%notin%` <- Negate(`%in%`)
+
+# Script options
+pls::pls.options(plsralg = "oscorespls")
+pls::pls.options("plsralg")
+
+# Default par options
+opar <- par(no.readonly = T)
+
+# Specify output directory, output_dir 
+# Options: 
+# tempdir - use a OS-specified temporary directory 
+# user defined PATH - e.g. "~/scratch/PLSR"
+output_dir <- "tempdir"
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Load Ely et al 2019 dataset
+data("ely_plsr_data")
+head(ely_plsr_data)[,1:8]
+
+# What is the target variable?
+inVar <- "N_g_m2"
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Set working directory
+if (output_dir=="tempdir") {
+  outdir <- tempdir()
+} else {
+  if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
+  outdir <- file.path(path.expand(output_dir))
+}
+setwd(outdir) # set working directory
+getwd()  # check wd
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### PLSR data
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+head(ely_plsr_data)[1:20]
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Create cal/val datasets
+## Make a stratified random sampling in the strata USDA_Species_Code and Domain
+
+method <- "dplyr" #base/dplyr
+# base R - a bit slow
+# dplyr - much faster
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=23452135, prop=0.7, 
+                                              group_variables="Species_Code")
+names(split_data)
+cal.plsr.data <- split_data$cal_data
+head(cal.plsr.data)[1:8]
+val.plsr.data <- split_data$val_data
+head(val.plsr.data)[1:8]
+rm(split_data)
+
+# Datasets:
+print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
+print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
+
+cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Cal. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Val. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, 
+       device="png", width = 30, 
+       height = 12, units = "cm",
+       dpi = 300)
+# output cal/val data
+write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
+          row.names=FALSE)
+write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
+          row.names=FALSE)
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Format PLSR data for model fitting 
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(cal_spec))
+head(cal.plsr.data)[1:7]
+
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(val_spec))
+head(val.plsr.data)[1:7]
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### plot cal and val spectra
+par(mfrow=c(1,2)) # B, L, T, R
+spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=seq(Start.wave,End.wave,1),
+                          plot_label="Calibration")
+spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=seq(Start.wave,End.wave,1),
+                          plot_label="Validation")
+
+dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
+         height=2500,width=4900, res=340)
+dev.off();
+par(mfrow=c(1,1))
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Use permutation to determine the optimal number of components
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel = NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+method <- "firstMin" #firstPlateau, firstMin
+random_seed <- 1245565
+seg <- 50
+maxComps <- 20
+iterations <- 80
+prop <- 0.70
+nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                    method=method, maxComps=maxComps, 
+                                                    iterations=iterations, prop=prop, 
+                                                    random_seed=random_seed,
+                                                    group_variables="Species_Code")
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
+         height=2800, width=3400, res=340)
+dev.off();
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Fit final model - using leave-one-out cross validation
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
+                 trace=FALSE,data=cal.plsr.data)
+fit <- plsr.out$fitted.values[,1,nComps]
+pls.options(parallel = NULL)
+
+# External validation fit stats
+par(mfrow=c(1,2)) # B, L, T, R
+pls::RMSEP(plsr.out, newdata = val.plsr.data)
+plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
+     xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+
+R2(plsr.out, newdata = val.plsr.data)
+plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
+     xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
+         height=2800, width=4800,  res=340)
+dev.off();
+par(opar)
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### PLSR fit observed vs. predicted plot data
+#calibration
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], PLSR_Predicted=fit,
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+cal.plsr.output <- cal.plsr.output %>%
+  mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
+head(cal.plsr.output)
+cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
+cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
+
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=as.vector(predict(plsr.out, 
+                                                               newdata = val.plsr.data, 
+                                                               ncomp=nComps, type="response")[,,1]))
+val.plsr.output <- val.plsr.output %>%
+  mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
+head(val.plsr.output)
+val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
+val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
+
+rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
+cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", cal.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
+val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", val.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+# plot cal/val side-by-side
+scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
+                             val_resid_histogram, nrow=2,ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
+       plot = scatterplots, device="png", width = 32, height = 30, units = "cm",
+       dpi = 300)
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### Generate Coefficient and VIP plots
+vips <- spectratrait::VIP(plsr.out)[nComps,]
+
+par(mfrow=c(2,1))
+plot(plsr.out$coefficients[,,nComps], x=wv,xlab="Wavelength (nm)",
+     ylab="Regression coefficients",lwd=2,type='l')
+box(lwd=2.2)
+plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
+lines(seq(Start.wave,End.wave,1),vips,lwd=3)
+abline(h=0.8,lty=2,col="dark grey")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
+         height=3100, width=4100, res=340)
+dev.off();
+par(opar)
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel =NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+### PLSR bootstrap permutation uncertainty analysis
+iterations <- 500    # how many permutation iterations to run
+prop <- 0.70          # fraction of training data to keep for each iteration
+plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
+                                                  iterations=iterations, 
+                                                  prop=prop, group_variables="Species_Code", 
+                                                  verbose=TRUE)
+bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
+bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
+                                              ,nComps]
+rm(plsr_permutation)
+
+# apply coefficients to left-out validation data
+interval <- c(0.025,0.975)
+Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
+  matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
+         ncol=length(bootstrap_intercept))
+Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
+                       probs=c(interval[1], interval[2]))
+sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
+sd_res <- sd(val.plsr.output$PLSR_Residuals)
+sd_tot <- sqrt(sd_mean^2+sd_res^2)
+val.plsr.output$LCI <- Interval_Conf[1,]
+val.plsr.output$UCI <- Interval_Conf[2,]
+val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
+val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
+head(val.plsr.output)
+
+# Bootstrap regression coefficient plot
+spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = seq(Start.wave,End.wave,1), 
+                          plot_label="Bootstrap regression coefficients",position = 'bottomleft')
+abline(h=0,lty=2,col="grey50")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
+         height=2100, width=3800, res=340)
+dev.off();
+
+# validation plot
+rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 
+                                              inVar = inVar, 
+                                              residuals = val.plsr.output$PLSR_Residuals, 
+                                              range="full")
+RMSEP <- rmsep_percrmsep$rmse
+perc_RMSEP <- rmsep_percrmsep$perc_rmse
+r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2)
+expr <- vector("expression", 3)
+expr[[1]] <- bquote(R^2==.(r2))
+expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
+expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
+rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
+par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+                li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
+                lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+                err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
+                cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+                ylab=paste0("Observed ", paste(inVar), " (units)"),
+                cex.axis=1.5,cex.lab=1.8)
+abline(0,1,lty=2,lw=2)
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+                li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
+                lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+                err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
+                cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+                ylab=paste0("Observed ", paste(inVar), " (units)"),
+                cex.axis=1.5,cex.lab=1.8, add=T)
+legend("topleft", legend=expr, bty="n", cex=1.5)
+legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
+       lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
+         height=2800, width=3200,  res=340)
+dev.off();
+#--------------------------------------------------------------------------------------------------#
+
+
+#---------------- Output jackknife results --------------------------------------------------------#
+# Bootstrap Coefficients
+out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
+                           Intercept=bootstrap_intercept,t(bootstrap_coef))
+names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
+head(out.jk.coefs)[1:6]
+write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Bootstrap_PLSR_Coefficients.csv')),
+          row.names=FALSE)
+#--------------------------------------------------------------------------------------------------#
+
+
+#---------------- Export Model Output -------------------------------------------------------------#
+print(paste("Output directory: ", getwd()))
+
+# Observed versus predicted
+write.csv(cal.plsr.output,file=file.path(outdir,paste0(inVar,'_Observed_PLSR_CV_Pred_',nComps,
+                                                       'comp.csv')),row.names=FALSE)
+
+# Validation data
+write.csv(val.plsr.output,file=file.path(outdir,paste0(inVar,'_Validation_PLSR_Pred_',nComps,
+                                                       'comp.csv')),row.names=FALSE)
+
+# Model coefficients
+coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
+write.csv(coefs,file=file.path(outdir,paste0(inVar,'_PLSR_Coefficients_',nComps,'comp.csv')),
+          row.names=TRUE)
+
+# PLSR VIP
+write.csv(vips,file=file.path(outdir,paste0(inVar,'_PLSR_VIPs_',nComps,'comp.csv')))
+
+# confirm files were written to temp space. display a list of the files generated
+print("**** PLSR output files: ")
+print(list.files(getwd())[grep(pattern = inVar, list.files(getwd()))])
+#--------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------------------------------------------------------------------#
+### EOF
\ No newline at end of file
diff --git a/inst/scripts/spectra-trait_kit_sla_plsr_example.R b/inst/scripts/spectra-trait_kit_sla_plsr_example.R
index ee9a530..c7ef1a3 100644
--- a/inst/scripts/spectra-trait_kit_sla_plsr_example.R
+++ b/inst/scripts/spectra-trait_kit_sla_plsr_example.R
@@ -20,8 +20,7 @@
 
 #--------------------------------------------------------------------------------------------------#
 ### Load libraries
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 #--------------------------------------------------------------------------------------------------#
 
@@ -186,12 +185,14 @@ maxComps <- 18
 iterations <- 50
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
diff --git a/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R b/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R
index 9a6709a..4394b7b 100644
--- a/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R
+++ b/inst/scripts/spectra-trait_neon_leafN_canopy_plsr_example.R
@@ -222,12 +222,14 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
@@ -371,7 +373,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop, 
                                                   verbose=TRUE)
 bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
diff --git a/inst/scripts/spectra-trait_neon_lma_plsr_example.R b/inst/scripts/spectra-trait_neon_lma_plsr_example.R
index b697d09..805c503 100644
--- a/inst/scripts/spectra-trait_neon_lma_plsr_example.R
+++ b/inst/scripts/spectra-trait_neon_lma_plsr_example.R
@@ -186,12 +186,14 @@ maxComps <- 20
 iterations <- 40
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, random_seed=random_seed)
 }
diff --git a/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R b/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R
index 12a7348..6c94335 100644
--- a/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R
+++ b/inst/scripts/spectra-trait_reseco_leafN_plsr_bootstrap_example.R
@@ -189,14 +189,15 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, maxComps=maxComps, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, maxComps=maxComps, 
                                                   seg=seg, random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, maxComps=maxComps, 
-                                    iterations=iterations, seg=seg, prop=prop, 
-                                    random_seed=random_seed)
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, maxComps=maxComps, 
+                                                  iterations=iterations, seg=seg, prop=prop, 
+                                                  random_seed=random_seed)
 }
 dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
          height=2800, width=3400,  res=340)
@@ -332,7 +333,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop, 
                                                   verbose=TRUE)
 bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
diff --git a/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R b/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R
index c8262b3..b878342 100644
--- a/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R
+++ b/inst/scripts/spectra-trait_reseco_leafN_plsr_example.R
@@ -19,8 +19,7 @@
 
 #--------------------------------------------------------------------------------------------------#
 ### Load libraries
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 #--------------------------------------------------------------------------------------------------#
 
@@ -188,13 +187,14 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
diff --git a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
index d28063f..a602af9 100644
--- a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
+++ b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R
@@ -23,8 +23,7 @@
 #--------------------------------------------------------------------------------------------------#
 ### Step 1.
 # Load required libraries & spectratrait package
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 #--------------------------------------------------------------------------------------------------#
 
@@ -215,12 +214,14 @@ maxComps <- 16
 iterations <- 50
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, random_seed=random_seed)
 }
diff --git a/man/create_data_split.Rd b/man/create_data_split.Rd
index cf8a212..287be8e 100644
--- a/man/create_data_split.Rd
+++ b/man/create_data_split.Rd
@@ -22,7 +22,7 @@ create_data_split(
 \item{prop}{the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2).
 This sets the calibration proportion}
 
-\item{group_variables}{Use factor variables to conduct a stratfied sampling for cal/val}
+\item{group_variables}{Use factor variables to conduct a stratified sampling for cal/val}
 }
 \value{
 output_list A list containing the calibration dataset (cal_data)
diff --git a/man/find_optimal_comp_by_groups.Rd b/man/find_optimal_comp_by_groups.Rd
new file mode 100644
index 0000000..a2dfef5
--- /dev/null
+++ b/man/find_optimal_comp_by_groups.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/find_optimal_components.R
+\name{find_optimal_comp_by_groups}
+\alias{find_optimal_comp_by_groups}
+\title{Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
+by group (i.e. optimal selection by stratification)}
+\usage{
+find_optimal_comp_by_groups(
+  dataset = NULL,
+  targetVariable = NULL,
+  method = "firstPlateau",
+  maxComps = 20,
+  iterations = 20,
+  prop = 0.7,
+  random_seed = 123456789,
+  group_variables = NULL
+)
+}
+\arguments{
+\item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
+
+\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
+Usually the "inVar" variable set at the beginning of a PLS script}
+
+\item{method}{Which approach to use to find optimal components. Options: firstPlateau, firstMin}
+
+\item{maxComps}{maximum number of components to consider}
+
+\item{iterations}{how many different permutations to run}
+
+\item{prop}{proportion of data to preserve for each permutation}
+
+\item{random_seed}{random seed to use for splitting data}
+
+\item{group_variables}{group_variables character vector of the form c("var1", "var2"..."varn")
+providing the factors used for stratified sampling.}
+}
+\value{
+nComps the optimal number of PLSR components
+}
+\description{
+Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
+by group (i.e. optimal selection by stratification)
+}
+\author{
+asierrl, Shawn P. Serbin
+}
diff --git a/man/find_optimal_components.Rd b/man/find_optimal_components.Rd
index aabec0a..f66114f 100644
--- a/man/find_optimal_components.Rd
+++ b/man/find_optimal_components.Rd
@@ -2,10 +2,11 @@
 % Please edit documentation in R/find_optimal_components.R
 \name{find_optimal_components}
 \alias{find_optimal_components}
-\title{Apply different methods to determing the optimal number of PLSR model components}
+\title{Applies different methods for the determination of the optimal number of PLSR model components}
 \usage{
 find_optimal_components(
   dataset = NULL,
+  targetVariable = NULL,
   method = "pls",
   maxComps = 20,
   iterations = 20,
@@ -17,6 +18,9 @@ find_optimal_components(
 \arguments{
 \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
 
+\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
+Usually the "inVar" variable set at the beginning of a PLS script}
+
 \item{method}{Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin}
 
 \item{maxComps}{maximum number of components to consider}
@@ -33,7 +37,7 @@ find_optimal_components(
 nComps the optimal number of PLSR components
 }
 \description{
-Apply different methods to determing the optimal number of PLSR model components
+Applies different methods for the determination of the optimal number of PLSR model components
 }
 \author{
 Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
diff --git a/man/pls_permutation.Rd b/man/pls_permutation.Rd
index d44bc06..88252cd 100644
--- a/man/pls_permutation.Rd
+++ b/man/pls_permutation.Rd
@@ -7,6 +7,7 @@ or conduct a boostrap uncertainty analysis}
 \usage{
 pls_permutation(
   dataset = NULL,
+  targetVariable = NULL,
   maxComps = 20,
   iterations = 20,
   prop = 0.7,
@@ -16,6 +17,9 @@ pls_permutation(
 \arguments{
 \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
 
+\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
+Usually the "inVar" variable set at the beginning of a PLS script}
+
 \item{maxComps}{maximum number of components to use for each PLSR fit}
 
 \item{iterations}{how many different permutations to run}
diff --git a/man/pls_permutation_by_groups.Rd b/man/pls_permutation_by_groups.Rd
new file mode 100644
index 0000000..1736b3c
--- /dev/null
+++ b/man/pls_permutation_by_groups.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pls_permutation.R
+\name{pls_permutation_by_groups}
+\alias{pls_permutation_by_groups}
+\title{Run a PLSR model permutation analysis stratified by selected "groups". Can be used to
+determine the optimal number of components or conduct a boostrap uncertainty analysis}
+\usage{
+pls_permutation_by_groups(
+  dataset = NULL,
+  targetVariable = NULL,
+  maxComps = 20,
+  iterations = 20,
+  prop = 0.7,
+  group_variables = NULL,
+  verbose = FALSE
+)
+}
+\arguments{
+\item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
+
+\item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
+Usually the "inVar" variable set at the beginning of a PLS script}
+
+\item{maxComps}{maximum number of components to use for each PLSR fit}
+
+\item{iterations}{how many different permutations to run}
+
+\item{prop}{proportion of data to preserve for each permutation}
+
+\item{group_variables}{Character vector of the form c("var1", "var2"..."varn")
+providing the factors used for stratified sampling in the PLSR permutation analysis}
+
+\item{verbose}{Should the function report the current iteration status/progress to the terminal
+or run silently? TRUE/FALSE. Default FALSE}
+}
+\value{
+output a list containing the PRESS and coef_array.
+output <- list(PRESS=press.out, coef_array=coefs)
+}
+\description{
+Run a PLSR model permutation analysis stratified by selected "groups". Can be used to
+determine the optimal number of components or conduct a boostrap uncertainty analysis
+}
+\author{
+asierrl, Shawn P. Serbin, Julien Lamour
+}
diff --git a/spectratrait_1.0.5.pdf b/spectratrait_1.0.5.pdf
deleted file mode 100644
index 206eae0..0000000
Binary files a/spectratrait_1.0.5.pdf and /dev/null differ
diff --git a/spectratrait_1.1.0.pdf b/spectratrait_1.1.0.pdf
new file mode 100644
index 0000000..45a9a52
Binary files /dev/null and b/spectratrait_1.1.0.pdf differ
diff --git a/tests/testthat.R b/tests/testthat.R
index d7e4be9..2a9c3f1 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,5 +1,4 @@
 library(testthat)
-library(dplyr)
 library(spectratrait)
 
-test_check("spectratrait")
\ No newline at end of file
+testthat::test_check("spectratrait")
diff --git a/tests/testthat/test.create_data_split.R b/tests/testthat/test.create_data_split.R
index 751e467..2ef3baf 100644
--- a/tests/testthat/test.create_data_split.R
+++ b/tests/testthat/test.create_data_split.R
@@ -1,4 +1,4 @@
-context("Test that the create data split function has the expected behavior")
+context("*** Test that the create data split function has the expected behavior *** ")
 
 test_that("Generating a data split using the dplyr approach doesn't throw an error or generate duplicates between cal. and val. data", {
   plot<- rep(c("plot1", "plot2", "plot3"),each=42)
diff --git a/tests/testthat/test.optimal_components.R b/tests/testthat/test.optimal_components.R
new file mode 100644
index 0000000..5cccd4f
--- /dev/null
+++ b/tests/testthat/test.optimal_components.R
@@ -0,0 +1,60 @@
+context("*** Test methods for finding optimal number of PLSR components ***")
+
+### Setup data for tests
+#Load Ely et al 2019 dataset
+data("ely_plsr_data")
+inVar <- "N_g_m2"
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))])
+plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))],
+                        Spectra=I(spec))
+###
+
+test_that("Finding optimal components using the built-in PLS package approach", {
+  method <- "pls"
+  random_seed <- 1245565
+  seg <- 50
+  maxComps <- 20
+  iterations <- 80
+  prop <- 0.70
+  
+  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar,
+                                                  method=method, maxComps=maxComps, seg=seg, 
+                                                  random_seed=random_seed)
+  expect_gte(nComps, 12)
+})
+
+test_that("Finding optimal components using the firstMin approach", {
+  method <- "firstMin"
+  random_seed <- 1245565
+  seg <- 50
+  maxComps <- 20
+  iterations <- 80
+  prop <- 0.70
+  
+  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, 
+                                                  iterations=iterations, seg=seg, prop=prop, 
+                                                  random_seed=random_seed)
+  expect_gte(nComps, 12)
+  
+})
+
+test_that("Finding optimal components using the firstPlateau approach", {
+  method <- "firstPlateau"
+  random_seed <- 1245565
+  seg <- 50
+  maxComps <- 20
+  iterations <- 80
+  prop <- 0.70
+  
+  nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
+                                                  method=method, maxComps=maxComps, 
+                                                  iterations=iterations, seg=seg, prop=prop, 
+                                                  random_seed=random_seed)
+  expect_gte(nComps, 12)
+  
+})
\ No newline at end of file
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd
index 17a4d0a..d8e60c4 100644
--- a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd
+++ b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd
@@ -2,9 +2,9 @@
 title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory.
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
-  github_document: default
-  html_notebook: default
   pdf_document: default
+  html_notebook: default
+  github_document: default
   html_document:
     df_print: paged
 params:
@@ -23,8 +23,7 @@ and fit a plsr model for leaf nitrogen content (Narea, g/m2)
 ### Getting Started
 ### Load libraries
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -162,13 +161,14 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method,
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
@@ -310,7 +310,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop,
                                                   verbose = FALSE)
 bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.md b/vignettes/ely_leafN_bootstrap_plsr_example.md
index 9910579..264df01 100644
--- a/vignettes/ely_leafN_bootstrap_plsr_example.md
+++ b/vignettes/ely_leafN_bootstrap_plsr_example.md
@@ -16,8 +16,7 @@ nitrogen content (Narea, g/m2)
 ### Load libraries
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -103,7 +102,7 @@ inVar <- "N_g_m2"
 
 ### Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpTADBVi"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpDzC9vA"
 
 ### Full PLSR dataset
 
@@ -139,19 +138,19 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method
 
     ## HEAN3   Cal: 70%
 
-    ## CUSA4   Cal: 68.1818181818182%
+    ## CUSA4   Cal: 68.182%
 
-    ## CUPE   Cal: 70.5882352941177%
+    ## CUPE   Cal: 70.588%
 
     ## SOLYL   Cal: 70%
 
-    ## OCBA   Cal: 68.4210526315789%
+    ## OCBA   Cal: 68.421%
 
-    ## POPUL   Cal: 71.4285714285714%
+    ## POPUL   Cal: 71.429%
 
-    ## GLMA4   Cal: 70.5882352941177%
+    ## GLMA4   Cal: 70.588%
 
-    ## PHVU   Cal: 66.6666666666667%
+    ## PHVU   Cal: 66.667%
 
 ``` r
 names(split_data)
@@ -324,19 +323,21 @@ maxComps <- 16
 iterations <- 80
 prop <- 0.70
 if (method=="pls") {
-  # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method,
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running PLS permutation test ***"
 
 ![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png)<!-- -->
@@ -439,13 +440,13 @@ head(cal.plsr.output)
     ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
     ## 7          CUPE    field pumpkin     7.20 11.43007   128.42    25.83 1.215333
     ## 10        SOLYL    garden tomato     7.89 11.61918   142.23    27.40 1.304110
-    ##    CalVal PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals
-    ## 1     Cal       1.820666          1.702501       -0.40119317
-    ## 2     Cal       1.609632          1.711772        0.48005882
-    ## 4     Cal       1.364985          1.275526       -0.01243687
-    ## 6     Cal       1.126062          1.060119       -0.05758587
-    ## 7     Cal       1.227538          1.226708        0.01137583
-    ## 10    Cal       1.358638          1.365181        0.06107105
+    ##    PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals
+    ## 1        1.820666          1.702501       -0.40119317
+    ## 2        1.609632          1.711772        0.48005882
+    ## 4        1.364985          1.275526       -0.01243687
+    ## 6        1.126062          1.060119       -0.05758587
+    ## 7        1.227538          1.226708        0.01137583
+    ## 10       1.358638          1.365181        0.06107105
 
 ``` r
 cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
@@ -467,13 +468,13 @@ head(val.plsr.output)
     ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
     ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
     ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
-    ##    CalVal PLSR_Predicted PLSR_Residuals
-    ## 3     Val      1.7125176   -0.052233917
-    ## 5     Val      1.4618447    0.050483171
-    ## 8     Val      1.0951891   -0.051652168
-    ## 9     Val      1.2152379    0.076220509
-    ## 13    Val      0.7992342   -0.149119020
-    ## 15    Val      1.1267054    0.002059572
+    ##    PLSR_Predicted PLSR_Residuals
+    ## 3       1.7125176   -0.052233917
+    ## 5       1.4618447    0.050483171
+    ## 8       1.0951891   -0.051652168
+    ## 9       1.2152379    0.076220509
+    ## 13      0.7992342   -0.149119020
+    ## 15      1.1267054    0.002059572
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -593,7 +594,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop,
                                                   verbose = FALSE)
 ```
@@ -633,13 +635,13 @@ head(val.plsr.output)
     ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
     ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
     ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
-    ##    CalVal PLSR_Predicted PLSR_Residuals       LCI       UCI       LPI      UPI
-    ## 3     Val      1.7125176   -0.052233917 1.5070086 1.8760564 1.2810247 2.144011
-    ## 5     Val      1.4618447    0.050483171 1.2909822 1.5475356 1.0541359 1.869553
-    ## 8     Val      1.0951891   -0.051652168 0.9595488 1.2335912 0.6846083 1.505770
-    ## 9     Val      1.2152379    0.076220509 1.0746965 1.3367675 0.8068229 1.623653
-    ## 13    Val      0.7992342   -0.149119020 0.6820207 0.9451323 0.3899050 1.208563
-    ## 15    Val      1.1267054    0.002059572 1.0316572 1.2737521 0.7209233 1.532488
+    ##    PLSR_Predicted PLSR_Residuals       LCI       UCI       LPI      UPI
+    ## 3       1.7125176   -0.052233917 1.5070086 1.8760564 1.2810247 2.144011
+    ## 5       1.4618447    0.050483171 1.2909822 1.5475356 1.0541359 1.869553
+    ## 8       1.0951891   -0.051652168 0.9595488 1.2335912 0.6846083 1.505770
+    ## 9       1.2152379    0.076220509 1.0746965 1.3367675 0.8068229 1.623653
+    ## 13      0.7992342   -0.149119020 0.6820207 0.9451323 0.3899050 1.208563
+    ## 15      1.1267054    0.002059572 1.0316572 1.2737521 0.7209233 1.532488
 
 ### Jackknife coefficient plot
 
@@ -752,7 +754,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
 print(paste("Output directory: ", outdir))
 ```
 
-    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpTADBVi"
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpDzC9vA"
 
 ``` r
 # Observed versus predicted
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_example.pdf
index e1963d1..9b41518 100644
Binary files a/vignettes/ely_leafN_bootstrap_plsr_example.pdf and b/vignettes/ely_leafN_bootstrap_plsr_example.pdf differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index 0257f75..615bc51 100644
Binary files a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd
new file mode 100644
index 0000000..de22963
--- /dev/null
+++ b/vignettes/ely_leafN_bootstrap_plsr_grp_example.Rmd
@@ -0,0 +1,429 @@
+---
+title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory.
+author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
+output:
+  github_document: default
+  html_notebook: default
+  pdf_document: default
+  html_document:
+    df_print: paged
+params:
+  date: !r Sys.Date()
+---
+
+```{r setup, include=FALSE, echo=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+### Overview
+This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an
+internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 
+and fit a plsr model for leaf nitrogen content (Narea, g/m2)
+
+### Getting Started
+### Load libraries
+```{r, eval=TRUE, echo=TRUE}
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
+invisible(lapply(list.of.packages, library, character.only = TRUE))
+```
+
+### Setup other functions and options
+```{r, echo=TRUE}
+### Setup other functions and options
+# not in
+`%notin%` <- Negate(`%in%`)
+
+# Script options
+pls::pls.options(plsralg = "oscorespls")
+pls::pls.options("plsralg")
+
+# Default par options
+opar <- par(no.readonly = T)
+
+# Specify output directory, output_dir 
+# Options: 
+# tempdir - use a OS-specified temporary directory 
+# user defined PATH - e.g. "~/scratch/PLSR"
+output_dir <- "tempdir"
+```
+
+### Load internal Ely et al 2019 dataset
+```{r, echo=TRUE}
+data("ely_plsr_data")
+head(ely_plsr_data)[,1:8]
+
+# What is the target variable?
+inVar <- "N_g_m2"
+```
+
+### Set working directory (scratch space)
+```{r, echo=FALSE}
+if (output_dir=="tempdir") {
+  outdir <- tempdir()
+} else {
+  if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
+  outdir <- file.path(path.expand(output_dir))
+}
+setwd(outdir) # set working directory
+getwd()  # check wd
+```
+
+### Full PLSR dataset
+```{r, echo=TRUE}
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+head(plsr_data)[,1:6]
+```
+### Create cal/val datasets
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+### Create cal/val datasets
+## Make a stratified random sampling in the strata USDA_Species_Code and Domain
+
+method <- "base" #base/dplyr
+# base R - a bit slow
+# dplyr - much faster
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=23452135, prop=0.7, 
+                                              group_variables="Species_Code")
+names(split_data)
+cal.plsr.data <- split_data$cal_data
+head(cal.plsr.data)[1:8]
+val.plsr.data <- split_data$val_data
+head(val.plsr.data)[1:8]
+rm(split_data)
+
+# Datasets:
+print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
+print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
+
+cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Cal. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Val. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, 
+       device="png", width = 30, 
+       height = 12, units = "cm",
+       dpi = 300)
+# output cal/val data
+write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
+          row.names=FALSE)
+write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
+          row.names=FALSE)
+```
+
+### Create calibration and validation PLSR datasets
+```{r, echo=TRUE}
+### Format PLSR data for model fitting 
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(cal_spec))
+head(cal.plsr.data)[1:5]
+
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(val_spec))
+head(val.plsr.data)[1:5]
+```
+
+### plot cal and val spectra
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+par(mfrow=c(1,2)) # B, L, T, R
+spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
+spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
+
+dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
+         height=2500,width=4900, res=340)
+dev.off();
+par(mfrow=c(1,1))
+```
+
+### Use permutation to determine optimal number of components
+```{r, fig.height = 6, fig.width = 10, echo=TRUE}
+### Use permutation to determine the optimal number of components
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel = NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+method <- "firstMin" #firstPlateau, firstMin
+random_seed <- 1245565
+seg <- 50
+maxComps <- 16
+iterations <- 80
+prop <- 0.70
+nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                    method=method, maxComps=maxComps, 
+                                                    iterations=iterations, prop=prop, 
+                                                    random_seed=random_seed,
+                                                    group_variables="Species_Code")
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
+         height=2800, width=3400, res=340)
+dev.off();
+```
+
+### Fit final model
+```{r, fig.height = 5, fig.width = 12, echo=TRUE}
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
+                 trace=FALSE,data=cal.plsr.data)
+fit <- plsr.out$fitted.values[,1,nComps]
+pls.options(parallel = NULL)
+
+# External validation fit stats
+par(mfrow=c(1,2)) # B, L, T, R
+pls::RMSEP(plsr.out, newdata = val.plsr.data)
+plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
+     xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+
+pls::R2(plsr.out, newdata = val.plsr.data)
+plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
+     xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
+         height=2800, width=4800,  res=340)
+dev.off();
+par(opar)
+```
+
+### PLSR fit observed vs. predicted plot data
+```{r, fig.height = 15, fig.width = 15, echo=TRUE}  
+#calibration
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=fit,
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+cal.plsr.output <- cal.plsr.output %>%
+  mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
+head(cal.plsr.output)
+cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
+cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
+
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=as.vector(predict(plsr.out, 
+                                                               newdata = val.plsr.data, 
+                                                               ncomp=nComps, type="response")[,,1]))
+val.plsr.output <- val.plsr.output %>%
+  mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
+head(val.plsr.output)
+val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
+val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
+
+rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
+cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
+                                                                            cal.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
+val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
+                                                                           val.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+# plot cal/val side-by-side
+scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
+                             val_resid_histogram, nrow=2,ncol=2)
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
+       plot = scatterplots, device="png", 
+       width = 32, 
+       height = 30, units = "cm",
+       dpi = 300)
+```
+
+### Generate Coefficient and VIP plots
+```{r, fig.height = 9, fig.width = 10, echo=TRUE}
+vips <- spectratrait::VIP(plsr.out)[nComps,]
+par(mfrow=c(2,1))
+plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
+     ylab="Regression coefficients",legendpos = "bottomright",
+     ncomp=nComps,lwd=2)
+box(lwd=2.2)
+plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
+lines(seq(Start.wave,End.wave,1),vips,lwd=3)
+abline(h=0.8,lty=2,col="dark grey")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
+         height=3100, width=4100, res=340)
+dev.off();
+```
+
+### Bootstrap validation
+```{r, echo=TRUE}
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel =NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+### PLSR bootstrap permutation uncertainty analysis
+iterations <- 500    # how many permutation iterations to run
+prop <- 0.70          # fraction of training data to keep for each iteration
+plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, 
+                                                            targetVariable=inVar,
+                                                            maxComps=nComps, 
+                                                            iterations=iterations, 
+                                                            prop=prop, group_variables="Species_Code", 
+                                                            verbose=FALSE)
+bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
+bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
+                                              ,nComps]
+rm(plsr_permutation)
+
+# apply coefficients to left-out validation data
+interval <- c(0.025,0.975)
+Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
+  matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
+         ncol=length(bootstrap_intercept))
+Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
+                       probs=c(interval[1], interval[2]))
+sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
+sd_res <- sd(val.plsr.output$PLSR_Residuals)
+sd_tot <- sqrt(sd_mean^2+sd_res^2)
+val.plsr.output$LCI <- Interval_Conf[1,]
+val.plsr.output$UCI <- Interval_Conf[2,]
+val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
+val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
+head(val.plsr.output)
+```
+
+### Jackknife coefficient plot
+```{r, fig.height = 6, fig.width = 10, echo=TRUE}
+# Bootstrap regression coefficient plot
+spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
+            plot_label="Bootstrap regression coefficients",position = 'bottomleft')
+abline(h=0,lty=2,col="grey50")
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
+         height=2100, width=3800, res=340)
+dev.off();
+```
+
+### Bootstrap validation plot
+```{r, fig.height = 7, fig.width = 8, echo=TRUE}
+rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 
+                                              inVar = inVar, 
+                                              residuals = val.plsr.output$PLSR_Residuals, 
+                                              range="full")
+RMSEP <- rmsep_percrmsep$rmse
+perc_RMSEP <- rmsep_percrmsep$perc_rmse
+r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2)
+expr <- vector("expression", 3)
+expr[[1]] <- bquote(R^2==.(r2))
+expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
+expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
+rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
+par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8)
+abline(0,1,lty=2,lw=2)
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8, add=T)
+legend("topleft", legend=expr, bty="n", cex=1.5)
+legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
+       lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
+box(lwd=2.2)
+dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
+         height=2800, width=3200,  res=340)
+dev.off();
+```
+
+### Output bootstrap results
+```{r, echo=TRUE}
+# Bootstrap Coefficients
+out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
+                           Intercept=bootstrap_intercept,t(bootstrap_coef))
+names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
+head(out.jk.coefs)[1:6]
+write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
+                                                    '_Bootstrap_PLSR_Coefficients.csv')),
+          row.names=FALSE)
+```
+
+### Create core PLSR outputs
+```{r, echo=TRUE}
+print(paste("Output directory: ", outdir))
+
+# Observed versus predicted
+write.csv(cal.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Observed_PLSR_CV_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Validation data
+write.csv(val.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Validation_PLSR_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Model coefficients
+coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
+write.csv(coefs,file=file.path(outdir,
+                               paste0(inVar,'_PLSR_Coefficients_',
+                                      nComps,'comp.csv')),
+          row.names=TRUE)
+
+# PLSR VIP
+write.csv(vips,file=file.path(outdir,
+                              paste0(inVar,'_PLSR_VIPs_',
+                                     nComps,'comp.csv')))
+```
+
+### Confirm files were written to temp space
+```{r, echo=TRUE}
+print("**** PLSR output files: ")
+print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
+```
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.md b/vignettes/ely_leafN_bootstrap_plsr_grp_example.md
new file mode 100644
index 0000000..b94b86f
--- /dev/null
+++ b/vignettes/ely_leafN_bootstrap_plsr_grp_example.md
@@ -0,0 +1,810 @@
+Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen
+content (Narea, g/m2) data from eight different crop species growing in
+a glasshouse at Brookhaven National Laboratory.
+================
+Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson
+
+### Overview
+
+This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to
+illustrate how to load an internal dataset (“ely\_plsr\_data”), choose
+the “optimal” number of plsr components, and fit a plsr model for leaf
+nitrogen content (Narea, g/m2)
+
+### Getting Started
+
+### Load libraries
+
+``` r
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
+invisible(lapply(list.of.packages, library, character.only = TRUE))
+```
+
+    ## 
+    ## Attaching package: 'pls'
+
+    ## The following object is masked from 'package:stats':
+    ## 
+    ##     loadings
+
+    ## 
+    ## Attaching package: 'dplyr'
+
+    ## The following objects are masked from 'package:stats':
+    ## 
+    ##     filter, lag
+
+    ## The following objects are masked from 'package:base':
+    ## 
+    ##     intersect, setdiff, setequal, union
+
+    ## here() starts at /Users/sserbin/Data/GitHub/spectratrait
+
+    ## 
+    ## Attaching package: 'gridExtra'
+
+    ## The following object is masked from 'package:dplyr':
+    ## 
+    ##     combine
+
+### Setup other functions and options
+
+``` r
+### Setup other functions and options
+# not in
+`%notin%` <- Negate(`%in%`)
+
+# Script options
+pls::pls.options(plsralg = "oscorespls")
+pls::pls.options("plsralg")
+```
+
+    ## $plsralg
+    ## [1] "oscorespls"
+
+``` r
+# Default par options
+opar <- par(no.readonly = T)
+
+# Specify output directory, output_dir 
+# Options: 
+# tempdir - use a OS-specified temporary directory 
+# user defined PATH - e.g. "~/scratch/PLSR"
+output_dir <- "tempdir"
+```
+
+### Load internal Ely et al 2019 dataset
+
+``` r
+data("ely_plsr_data")
+head(ely_plsr_data)[,1:8]
+```
+
+    ##   Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1        HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2        HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 3        HEAN3 common sunflower     7.70 15.02495   156.95    35.08 1.764752
+    ## 4        CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 5        CUSA4  garden cucumber     7.47 11.60735   123.58    26.71 1.411361
+    ## 6        CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ##   Wave_500
+    ## 1 4.782000
+    ## 2 4.341714
+    ## 3 4.502857
+    ## 4 3.333429
+    ## 5 3.313571
+    ## 6 3.272286
+
+``` r
+# What is the target variable?
+inVar <- "N_g_m2"
+```
+
+### Set working directory (scratch space)
+
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1HGXY2"
+
+### Full PLSR dataset
+
+``` r
+Start.wave <- 500
+End.wave <- 2400
+wv <- seq(Start.wave,End.wave,1)
+plsr_data <- ely_plsr_data
+head(plsr_data)[,1:6]
+```
+
+    ##   Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2
+    ## 1        HEAN3 common sunflower     7.58 15.61210   167.63    36.40
+    ## 2        HEAN3 common sunflower     8.33 14.73724   164.68    34.65
+    ## 3        HEAN3 common sunflower     7.70 15.02495   156.95    35.08
+    ## 4        CUSA4  garden cucumber     7.40 11.14835   111.52    26.23
+    ## 5        CUSA4  garden cucumber     7.47 11.60735   123.58    26.71
+    ## 6        CUSA4  garden cucumber     7.43  8.06035   114.36    18.40
+
+### Create cal/val datasets
+
+``` r
+### Create cal/val datasets
+## Make a stratified random sampling in the strata USDA_Species_Code and Domain
+
+method <- "base" #base/dplyr
+# base R - a bit slow
+# dplyr - much faster
+split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
+                                              split_seed=23452135, prop=0.7, 
+                                              group_variables="Species_Code")
+```
+
+    ## HEAN3   Cal: 70%
+
+    ## CUSA4   Cal: 68.182%
+
+    ## CUPE   Cal: 70.588%
+
+    ## SOLYL   Cal: 70%
+
+    ## OCBA   Cal: 68.421%
+
+    ## POPUL   Cal: 71.429%
+
+    ## GLMA4   Cal: 70.588%
+
+    ## PHVU   Cal: 66.667%
+
+``` r
+names(split_data)
+```
+
+    ## [1] "cal_data" "val_data"
+
+``` r
+cal.plsr.data <- split_data$cal_data
+head(cal.plsr.data)[1:8]
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42    25.83 1.215333
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23    27.40 1.304110
+    ##    Wave_500
+    ## 1  4.782000
+    ## 2  4.341714
+    ## 4  3.333429
+    ## 6  3.272286
+    ## 7  2.943143
+    ## 10 4.145714
+
+``` r
+val.plsr.data <- split_data$val_data
+head(val.plsr.data)[1:8]
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    Wave_500
+    ## 3  4.502857
+    ## 5  3.313571
+    ## 8  2.868000
+    ## 9  3.338286
+    ## 13 3.960286
+    ## 15 3.744000
+
+``` r
+rm(split_data)
+
+# Datasets:
+print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
+```
+
+    ## [1] "Cal observations: 124"
+
+``` r
+print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
+```
+
+    ## [1] "Val observations: 54"
+
+``` r
+cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Cal. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
+                       main = paste0("Val. Histogram for ",inVar),
+                       xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
+                       alpha=I(.7))
+histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
+```
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
+
+``` r
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
+       plot = histograms, 
+       device="png", width = 30, 
+       height = 12, units = "cm",
+       dpi = 300)
+# output cal/val data
+write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
+          row.names=FALSE)
+write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
+          row.names=FALSE)
+```
+
+### Create calibration and validation PLSR datasets
+
+``` r
+### Format PLSR data for model fitting 
+cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
+cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(cal_spec))
+head(cal.plsr.data)[1:5]
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23
+
+``` r
+val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
+val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
+                            Spectra=I(val_spec))
+head(val.plsr.data)[1:5]
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30
+
+### plot cal and val spectra
+
+``` r
+par(mfrow=c(1,2)) # B, L, T, R
+spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
+spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
+```
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
+         height=2500,width=4900, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+``` r
+par(mfrow=c(1,1))
+```
+
+### Use permutation to determine optimal number of components
+
+``` r
+### Use permutation to determine the optimal number of components
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel = NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+method <- "firstMin" #firstPlateau, firstMin
+random_seed <- 1245565
+seg <- 50
+maxComps <- 16
+iterations <- 80
+prop <- 0.70
+nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                    method=method, maxComps=maxComps, 
+                                                    iterations=iterations, prop=prop, 
+                                                    random_seed=random_seed,
+                                                    group_variables="Species_Code")
+```
+
+    ## [1] "*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***"
+    ## [1] "*** Running permutation test.  Please hang tight, this can take awhile ***"
+    ## [1] "Options:"
+    ## [1] "Max Components: 16 Iterations: 80 Data Proportion (percent): 70"
+    ## [1] "*** Providing PRESS and coefficient array output ***"
+
+    ## No id variables; using all as measure variables
+
+    ## [1] "*** Optimal number of components based on t.test: 15"
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
+         height=2800, width=3400, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Fit final model
+
+``` r
+plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
+                 trace=FALSE,data=cal.plsr.data)
+fit <- plsr.out$fitted.values[,1,nComps]
+pls.options(parallel = NULL)
+
+# External validation fit stats
+par(mfrow=c(1,2)) # B, L, T, R
+pls::RMSEP(plsr.out, newdata = val.plsr.data)
+```
+
+    ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
+    ##      0.5908       0.4735       0.4162       0.4037       0.3347       0.3023  
+    ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
+    ##      0.2993       0.3081       0.2814       0.2445       0.2276       0.2104  
+    ##    12 comps     13 comps     14 comps     15 comps  
+    ##      0.1954       0.2003       0.1973       0.2108
+
+``` r
+plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
+     xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+
+pls::R2(plsr.out, newdata = val.plsr.data)
+```
+
+    ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
+    ##   -0.004079     0.355010     0.501632     0.531088     0.677620     0.737143  
+    ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
+    ##    0.742224     0.726835     0.772115     0.827942     0.850962     0.872685  
+    ##    12 comps     13 comps     14 comps     15 comps  
+    ##    0.890124     0.884529     0.887961     0.872129
+
+``` r
+plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
+     xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
+         height=2800, width=4800,  res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+``` r
+par(opar)
+```
+
+### PLSR fit observed vs. predicted plot data
+
+``` r
+#calibration
+cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=fit,
+                              PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
+cal.plsr.output <- cal.plsr.output %>%
+  mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
+head(cal.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass   C_g_m2 H20_g_m2 LMA_g_m2   N_g_m2
+    ## 1         HEAN3 common sunflower     7.58 15.61210   167.63    36.40 2.103694
+    ## 2         HEAN3 common sunflower     8.33 14.73724   164.68    34.65 1.231713
+    ## 4         CUSA4  garden cucumber     7.40 11.14835   111.52    26.23 1.287963
+    ## 6         CUSA4  garden cucumber     7.43  8.06035   114.36    18.40 1.117704
+    ## 7          CUPE    field pumpkin     7.20 11.43007   128.42    25.83 1.215333
+    ## 10        SOLYL    garden tomato     7.89 11.61918   142.23    27.40 1.304110
+    ##    PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals
+    ## 1        1.836047          1.714086       -0.38960842
+    ## 2        1.530813          1.685388        0.45367526
+    ## 4        1.254794          1.262835       -0.02512724
+    ## 6        1.127053          1.129340        0.01163542
+    ## 7        1.196259          1.188471       -0.02686200
+    ## 10       1.276380          1.281683       -0.02242624
+
+``` r
+cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
+cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
+
+val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
+                              PLSR_Predicted=as.vector(predict(plsr.out, 
+                                                               newdata = val.plsr.data, 
+                                                               ncomp=nComps, type="response")[,,1]))
+val.plsr.output <- val.plsr.output %>%
+  mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
+head(val.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    PLSR_Predicted PLSR_Residuals
+    ## 3       1.7624701   -0.002281391
+    ## 5       1.2947218   -0.116639722
+    ## 8       0.9934199   -0.153421396
+    ## 9       1.1345273   -0.004490078
+    ## 13      0.7432855   -0.205067758
+    ## 15      1.1613789    0.036733007
+
+``` r
+val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
+val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
+
+rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
+cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
+                                                                            cal.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
+val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
+  theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
+                                          linetype="dashed", size=1.5) + xlim(rng_quant[1], 
+                                                                              rng_quant[2]) + 
+  ylim(rng_quant[1], rng_quant[2]) +
+  labs(x=paste0("Predicted ", paste(inVar), " (units)"),
+       y=paste0("Observed ", paste(inVar), " (units)"),
+       title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
+                                                                           val.RMSEP))) +
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
+  geom_histogram(alpha=.5, position="identity") + 
+  geom_vline(xintercept = 0, color="black", 
+             linetype="dashed", size=1) + theme_bw() + 
+  theme(axis.text=element_text(size=18), legend.position="none",
+        axis.title=element_text(size=20, face="bold"), 
+        axis.text.x = element_text(angle = 0,vjust = 0.5),
+        panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
+
+# plot cal/val side-by-side
+scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
+                             val_resid_histogram, nrow=2,ncol=2)
+```
+
+    ## Warning: Removed 5 rows containing missing values (geom_point).
+
+    ## Warning: Removed 4 rows containing missing values (geom_point).
+
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
+
+``` r
+ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
+       plot = scatterplots, device="png", 
+       width = 32, 
+       height = 30, units = "cm",
+       dpi = 300)
+```
+
+### Generate Coefficient and VIP plots
+
+``` r
+vips <- spectratrait::VIP(plsr.out)[nComps,]
+par(mfrow=c(2,1))
+plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
+     ylab="Regression coefficients",legendpos = "bottomright",
+     ncomp=nComps,lwd=2)
+box(lwd=2.2)
+plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
+lines(seq(Start.wave,End.wave,1),vips,lwd=3)
+abline(h=0.8,lty=2,col="dark grey")
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
+         height=3100, width=4100, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Bootstrap validation
+
+``` r
+if(grepl("Windows", sessionInfo()$running)){
+  pls.options(parallel =NULL)
+} else {
+  pls.options(parallel = parallel::detectCores()-1)
+}
+
+### PLSR bootstrap permutation uncertainty analysis
+iterations <- 500    # how many permutation iterations to run
+prop <- 0.70          # fraction of training data to keep for each iteration
+plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, 
+                                                            targetVariable=inVar,
+                                                            maxComps=nComps, 
+                                                            iterations=iterations, 
+                                                            prop=prop, group_variables="Species_Code", 
+                                                            verbose=FALSE)
+```
+
+    ## [1] "*** Running permutation test.  Please hang tight, this can take awhile ***"
+    ## [1] "Options:"
+    ## [1] "Max Components: 15 Iterations: 500 Data Proportion (percent): 70"
+    ## [1] "*** Providing PRESS and coefficient array output ***"
+
+``` r
+bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
+bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
+                                              ,nComps]
+rm(plsr_permutation)
+
+# apply coefficients to left-out validation data
+interval <- c(0.025,0.975)
+Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
+  matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
+         ncol=length(bootstrap_intercept))
+Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
+                       probs=c(interval[1], interval[2]))
+sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
+sd_res <- sd(val.plsr.output$PLSR_Residuals)
+sd_tot <- sqrt(sd_mean^2+sd_res^2)
+val.plsr.output$LCI <- Interval_Conf[1,]
+val.plsr.output$UCI <- Interval_Conf[2,]
+val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
+val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
+head(val.plsr.output)
+```
+
+    ##    Species_Code      Common_Name C_N_mass    C_g_m2 H20_g_m2 LMA_g_m2    N_g_m2
+    ## 3         HEAN3 common sunflower     7.70 15.024947   156.95    35.08 1.7647515
+    ## 5         CUSA4  garden cucumber     7.47 11.607347   123.58    26.71 1.4113615
+    ## 8          CUPE    field pumpkin     7.67 12.466238   124.67    29.22 1.1468413
+    ## 9          CUPE    field pumpkin     7.64 17.100448   142.85    43.39 1.1390174
+    ## 13        SOLYL    garden tomato     7.73  7.938866   129.95    17.96 0.9483533
+    ## 15         OCBA      sweet basil     8.13 16.975969   173.30    38.65 1.1246459
+    ##    PLSR_Predicted PLSR_Residuals       LCI       UCI       LPI      UPI
+    ## 3       1.7624701   -0.002281391 1.5710330 1.9443661 1.3151243 2.209816
+    ## 5       1.2947218   -0.116639722 1.2019841 1.4531979 0.8688563 1.720587
+    ## 8       0.9934199   -0.153421396 0.8544582 1.1646561 0.5564158 1.430424
+    ## 9       1.1345273   -0.004490078 0.9954061 1.2824287 0.7007745 1.568280
+    ## 13      0.7432855   -0.205067758 0.5836738 0.9094675 0.3042086 1.182362
+    ## 15      1.1613789    0.036733007 1.0021191 1.2849671 0.7291004 1.593657
+
+### Jackknife coefficient plot
+
+``` r
+# Bootstrap regression coefficient plot
+spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
+            plot_label="Bootstrap regression coefficients",position = 'bottomleft')
+abline(h=0,lty=2,col="grey50")
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
+         height=2100, width=3800, res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Bootstrap validation plot
+
+``` r
+rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 
+                                              inVar = inVar, 
+                                              residuals = val.plsr.output$PLSR_Residuals, 
+                                              range="full")
+RMSEP <- rmsep_percrmsep$rmse
+perc_RMSEP <- rmsep_percrmsep$perc_rmse
+r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2)
+expr <- vector("expression", 3)
+expr[[1]] <- bquote(R^2==.(r2))
+expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
+expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
+rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
+par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8)
+abline(0,1,lty=2,lw=2)
+plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
+       li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
+       lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
+       err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
+       cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
+       ylab=paste0("Observed ", paste(inVar), " (units)"),
+       cex.axis=1.5,cex.lab=1.8, add=T)
+legend("topleft", legend=expr, bty="n", cex=1.5)
+legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
+       lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
+box(lwd=2.2)
+```
+
+![](ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png)<!-- -->
+
+``` r
+dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
+         height=2800, width=3200,  res=340)
+```
+
+    ## quartz_off_screen 
+    ##                 3
+
+``` r
+dev.off();
+```
+
+    ## quartz_off_screen 
+    ##                 2
+
+### Output bootstrap results
+
+``` r
+# Bootstrap Coefficients
+out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
+                           Intercept=bootstrap_intercept,t(bootstrap_coef))
+names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
+head(out.jk.coefs)[1:6]
+```
+
+    ##   Iteration  Intercept      Wave_500    Wave_501    Wave_502   Wave_503
+    ## 1         1  0.4731951  0.0236618987 0.021719096 0.023063691 0.02187741
+    ## 2         2  0.5415203 -0.0007012397 0.001892634 0.008241293 0.01105366
+    ## 3         3  0.6512533  0.0123054098 0.013428257 0.015824665 0.01772586
+    ## 4         4 -0.9976728  0.0145306759 0.016119715 0.018834952 0.01959049
+    ## 5         5  0.1267626  0.0076041315 0.007329090 0.009971693 0.01339406
+    ## 6         6  0.8509641  0.0139793124 0.015195593 0.015170417 0.01434085
+
+``` r
+write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
+                                                    '_Bootstrap_PLSR_Coefficients.csv')),
+          row.names=FALSE)
+```
+
+### Create core PLSR outputs
+
+``` r
+print(paste("Output directory: ", outdir))
+```
+
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmp1HGXY2"
+
+``` r
+# Observed versus predicted
+write.csv(cal.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Observed_PLSR_CV_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Validation data
+write.csv(val.plsr.output,file=file.path(outdir,
+                                         paste0(inVar,'_Validation_PLSR_Pred_',
+                                                nComps,'comp.csv')),
+          row.names=FALSE)
+
+# Model coefficients
+coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
+write.csv(coefs,file=file.path(outdir,
+                               paste0(inVar,'_PLSR_Coefficients_',
+                                      nComps,'comp.csv')),
+          row.names=TRUE)
+
+# PLSR VIP
+write.csv(vips,file=file.path(outdir,
+                              paste0(inVar,'_PLSR_VIPs_',
+                                     nComps,'comp.csv')))
+```
+
+### Confirm files were written to temp space
+
+``` r
+print("**** PLSR output files: ")
+```
+
+    ## [1] "**** PLSR output files: "
+
+``` r
+print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
+```
+
+    ##  [1] "N_g_m2_Bootstrap_PLSR_Coefficients.csv"      
+    ##  [2] "N_g_m2_Bootstrap_Regression_Coefficients.png"
+    ##  [3] "N_g_m2_Cal_PLSR_Dataset.csv"                 
+    ##  [4] "N_g_m2_Cal_Val_Histograms.png"               
+    ##  [5] "N_g_m2_Cal_Val_Scatterplots.png"             
+    ##  [6] "N_g_m2_Cal_Val_Spectra.png"                  
+    ##  [7] "N_g_m2_Coefficient_VIP_plot.png"             
+    ##  [8] "N_g_m2_Observed_PLSR_CV_Pred_15comp.csv"     
+    ##  [9] "N_g_m2_PLSR_Coefficients_15comp.csv"         
+    ## [10] "N_g_m2_PLSR_Component_Selection.png"         
+    ## [11] "N_g_m2_PLSR_Validation_Scatterplot.png"      
+    ## [12] "N_g_m2_PLSR_VIPs_15comp.csv"                 
+    ## [13] "N_g_m2_Val_PLSR_Dataset.csv"                 
+    ## [14] "N_g_m2_Validation_PLSR_Pred_15comp.csv"      
+    ## [15] "N_g_m2_Validation_RMSEP_R2_by_Component.png"
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf
new file mode 100644
index 0000000..e8f358f
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example.pdf differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png
new file mode 100644
index 0000000..bab716c
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png
new file mode 100644
index 0000000..3aa89fe
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png
new file mode 100644
index 0000000..cc008ff
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png
new file mode 100644
index 0000000..8d09d49
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-14-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png
new file mode 100644
index 0000000..12162fd
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-15-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png
new file mode 100644
index 0000000..6d79827
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-6-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png
new file mode 100644
index 0000000..4452988
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-8-1.png differ
diff --git a/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png
new file mode 100644
index 0000000..e435ae9
Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_grp_example_files/figure-gfm/unnamed-chunk-9-1.png differ
diff --git a/vignettes/kit_sla_plsr_example.Rmd b/vignettes/kit_sla_plsr_example.Rmd
index 6151db8..81851b0 100644
--- a/vignettes/kit_sla_plsr_example.Rmd
+++ b/vignettes/kit_sla_plsr_example.Rmd
@@ -2,9 +2,9 @@
 title: Spectra-trait PLSR example using leaf-level spectra and specific leaf area (SLA) data from more than 40 species grassland species comprising both herbs and graminoids. 
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
-  github_document: default
-  html_notebook: default
   pdf_document: default
+  html_notebook: default
+  github_document: default
   html_document:
     df_print: paged
 params:
@@ -110,9 +110,7 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method
                                               prop=0.8, group_variables="Plant_Species")
 names(split_data)
 cal.plsr.data <- split_data$cal_data
-head(cal.plsr.data)[1:8]
 val.plsr.data <- split_data$val_data
-head(val.plsr.data)[1:8]
 rm(split_data)
 
 # Datasets:
@@ -181,12 +179,14 @@ iterations <- 50
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data,method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, 
                                                   iterations=iterations, 
                                                   seg=seg, prop=prop, 
diff --git a/vignettes/kit_sla_plsr_example.md b/vignettes/kit_sla_plsr_example.md
index 11c7cf8..aa72fc2 100644
--- a/vignettes/kit_sla_plsr_example.md
+++ b/vignettes/kit_sla_plsr_example.md
@@ -85,7 +85,7 @@ output_dir <- "tempdir"
 
 ### Set working directory (scratch space)
 
-    ## [1] "Output directory: /private/var/folders/6h/r2g9xpxj2xq5xt1dn3cn5g800000gn/T/Rtmp0jgwQR"
+    ## [1] "Output directory: /private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp952NtZ"
 
 ### Grab data from EcoSIS
 
@@ -104,16 +104,15 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 
     ## Downloading data...
 
-    ## Rows: 739 Columns: 2114
-
-    ## ── Column specification ────────────────────────────────────────────────────────
-    ## Delimiter: ","
-    ## chr   (13): Anthocyanin concentration (mg/g), Anthocyanin content ( g/cm ), ...
-    ## dbl (2101): 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412,...
-
     ## 
-    ## ℹ Use `spec()` to retrieve the full column specification for this data.
-    ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+    ## ── Column specification ────────────────────────────────────────────────────────
+    ## cols(
+    ##   .default = col_double(),
+    ##   `growth form` = col_character(),
+    ##   species = col_character(),
+    ##   timestamp = col_character()
+    ## )
+    ## ℹ Use `spec()` for the full column specifications.
 
     ## Download complete!
 
@@ -123,32 +122,20 @@ head(dat_raw)
 
     ## # A tibble: 6 × 2,114
     ##   `Anthocyanin concen… `Anthocyanin cont… `Carotenoid concen… `Carotenoid conte…
-    ##   <chr>                <chr>              <chr>               <chr>             
-    ## 1 0.00106305           0.996765974        0.00799017          7.491957938       
-    ## 2 0.003571021          1.217297195        0.022081567         7.527211759       
-    ## 3 0.002517379          1.142859188        0.018826449         8.546976036       
-    ## 4 0.003102353          2.262477235        0.015835418         11.54841829       
-    ## 5 0.004116414          1.733783943        0.021558342         9.080114754       
-    ## 6 0.003965355          1.021371167        0.033632402         8.662821832       
-    ## # … with 2,110 more variables: Chlorophyll concentration (mg/g) <chr>,
-    ## #   Chlorophyll content ( g/cm ) <chr>, LDMC (g/g) <chr>, LFA (mg/cm ) <chr>,
-    ## #   LWC (mg/cm ) <chr>, SLA (g/cm ) <chr>, growth form <chr>, species <chr>,
+    ##                  <dbl>              <dbl>               <dbl>              <dbl>
+    ## 1              0.00106              0.997             0.00799               7.49
+    ## 2              0.00357              1.22              0.0221                7.53
+    ## 3              0.00252              1.14              0.0188                8.55
+    ## 4              0.00310              2.26              0.0158               11.5 
+    ## 5              0.00412              1.73              0.0216                9.08
+    ## 6              0.00397              1.02              0.0336                8.66
+    ## # … with 2,110 more variables: Chlorophyll concentration (mg/g) <dbl>,
+    ## #   Chlorophyll content ( g/cm ) <dbl>, LDMC (g/g) <dbl>, LFA (mg/cm ) <dbl>,
+    ## #   LWC (mg/cm ) <dbl>, SLA (g/cm ) <dbl>, growth form <chr>, species <chr>,
     ## #   timestamp <chr>, 400 <dbl>, 401 <dbl>, 402 <dbl>, 403 <dbl>, 404 <dbl>,
     ## #   405 <dbl>, 406 <dbl>, 407 <dbl>, 408 <dbl>, 409 <dbl>, 410 <dbl>,
     ## #   411 <dbl>, 412 <dbl>, 413 <dbl>, 414 <dbl>, 415 <dbl>, 416 <dbl>,
-    ## #   417 <dbl>, 418 <dbl>, 419 <dbl>, 420 <dbl>, 421 <dbl>, 422 <dbl>,
-    ## #   423 <dbl>, 424 <dbl>, 425 <dbl>, 426 <dbl>, 427 <dbl>, 428 <dbl>,
-    ## #   429 <dbl>, 430 <dbl>, 431 <dbl>, 432 <dbl>, 433 <dbl>, 434 <dbl>,
-    ## #   435 <dbl>, 436 <dbl>, 437 <dbl>, 438 <dbl>, 439 <dbl>, 440 <dbl>,
-    ## #   441 <dbl>, 442 <dbl>, 443 <dbl>, 444 <dbl>, 445 <dbl>, 446 <dbl>,
-    ## #   447 <dbl>, 448 <dbl>, 449 <dbl>, 450 <dbl>, 451 <dbl>, 452 <dbl>,
-    ## #   453 <dbl>, 454 <dbl>, 455 <dbl>, 456 <dbl>, 457 <dbl>, 458 <dbl>,
-    ## #   459 <dbl>, 460 <dbl>, 461 <dbl>, 462 <dbl>, 463 <dbl>, 464 <dbl>,
-    ## #   465 <dbl>, 466 <dbl>, 467 <dbl>, 468 <dbl>, 469 <dbl>, 470 <dbl>,
-    ## #   471 <dbl>, 472 <dbl>, 473 <dbl>, 474 <dbl>, 475 <dbl>, 476 <dbl>,
-    ## #   477 <dbl>, 478 <dbl>, 479 <dbl>, 480 <dbl>, 481 <dbl>, 482 <dbl>,
-    ## #   483 <dbl>, 484 <dbl>, 485 <dbl>, 486 <dbl>, 487 <dbl>, 488 <dbl>,
-    ## #   489 <dbl>, 490 <dbl>, …
+    ## #   417 <dbl>, 418 <dbl>, 419 <dbl>, 420 <dbl>, 421 <dbl>, 422 <dbl>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -190,16 +177,16 @@ head(sample_info)
 
     ## # A tibble: 6 × 13
     ##   `Anthocyanin concen… `Anthocyanin cont… `Carotenoid concen… `Carotenoid conte…
-    ##   <chr>                <chr>              <chr>               <chr>             
-    ## 1 0.00106305           0.996765974        0.00799017          7.491957938       
-    ## 2 0.003571021          1.217297195        0.022081567         7.527211759       
-    ## 3 0.002517379          1.142859188        0.018826449         8.546976036       
-    ## 4 0.003102353          2.262477235        0.015835418         11.54841829       
-    ## 5 0.004116414          1.733783943        0.021558342         9.080114754       
-    ## 6 0.003965355          1.021371167        0.033632402         8.662821832       
-    ## # … with 9 more variables: Chlorophyll concentration (mg/g) <chr>,
-    ## #   Chlorophyll content ( g/cm ) <chr>, LDMC (g/g) <chr>, LFA (mg/cm ) <chr>,
-    ## #   LWC (mg/cm ) <chr>, SLA (g/cm ) <chr>, growth form <chr>, species <chr>,
+    ##                  <dbl>              <dbl>               <dbl>              <dbl>
+    ## 1              0.00106              0.997             0.00799               7.49
+    ## 2              0.00357              1.22              0.0221                7.53
+    ## 3              0.00252              1.14              0.0188                8.55
+    ## 4              0.00310              2.26              0.0158               11.5 
+    ## 5              0.00412              1.73              0.0216                9.08
+    ## 6              0.00397              1.02              0.0336                8.66
+    ## # … with 9 more variables: Chlorophyll concentration (mg/g) <dbl>,
+    ## #   Chlorophyll content ( g/cm ) <dbl>, LDMC (g/g) <dbl>, LFA (mg/cm ) <dbl>,
+    ## #   LWC (mg/cm ) <dbl>, SLA (g/cm ) <dbl>, growth form <chr>, species <chr>,
     ## #   timestamp <chr>
 
 ``` r
@@ -346,45 +333,7 @@ names(split_data)
 
 ``` r
 cal.plsr.data <- split_data$cal_data
-head(cal.plsr.data)[1:8]
-```
-
-    ##            Plant_Species Growth_Form       timestamp SLA_g_cm   Wave_500
-    ## 1 Calamagrostis epigejos   graminoid 5/25/2016 12:20 106.6500 0.09180559
-    ## 2  Anthoxanthum odoratum   graminoid  5/27/2016 8:40 293.3565 0.09022668
-    ## 3   Alopecurus pratensis   graminoid  5/27/2016 9:23 220.2703 0.07998340
-    ## 4          Festuca ovina   graminoid  5/27/2016 9:23 137.1220 0.05205080
-    ## 5    Agrostis capillaris   graminoid  5/27/2016 9:42 237.4237 0.06695127
-    ## 6  Aegopodium podagraria        forb 5/25/2016 12:20 388.2384 0.04091566
-    ##     Wave_501   Wave_502   Wave_503
-    ## 1 0.09293251 0.09417092 0.09552863
-    ## 2 0.09125158 0.09237300 0.09359694
-    ## 3 0.08109460 0.08231389 0.08365015
-    ## 4 0.05256869 0.05314560 0.05378355
-    ## 5 0.06766205 0.06845248 0.06932220
-    ## 6 0.04169865 0.04257613 0.04355737
-
-``` r
 val.plsr.data <- split_data$val_data
-head(val.plsr.data)[1:8]
-```
-
-    ##            Plant_Species Growth_Form       timestamp SLA_g_cm   Wave_500
-    ## 9          Urtica dioica        forb 5/25/2016 12:37 284.6788 0.04716736
-    ## 15       Stellaria media        forb 5/25/2016 13:21 418.4284 0.05694278
-    ## 23  Alopecurus pratensis   graminoid  6/1/2016 11:32 218.2117 0.08135086
-    ## 44  Alopecurus pratensis   graminoid   6/8/2016 8:37 216.7568 0.10062342
-    ## 46   Agrostis capillaris   graminoid   6/8/2016 9:05 231.5292 0.08099724
-    ## 47 Aegopodium podagraria        forb   6/7/2016 9:05 311.4018 0.03778815
-    ##      Wave_501   Wave_502   Wave_503
-    ## 9  0.04781633 0.04854276 0.04935320
-    ## 15 0.05811729 0.05940497 0.06080936
-    ## 23 0.08249180 0.08373915 0.08509719
-    ## 44 0.10190706 0.10330054 0.10480538
-    ## 46 0.08178586 0.08265099 0.08360108
-    ## 47 0.03845043 0.03919155 0.04001581
-
-``` r
 rm(split_data)
 
 # Datasets:
@@ -6493,7 +6442,7 @@ head(val.plsr.data)[1:5]
     ## 23        0.13114422        0.13173930        0.13233132        0.13292035
     ## 44        0.14035330        0.14102638        0.14169735        0.14236617
     ## 46        0.13576063        0.13641384        0.13706527        0.13771485
-    ## 47        0.08674581        0.08727845        0.08781028        0.08834134
+    ## 47        0.08674581        0.08727845        0.08781028        0.08834133
     ##    Spectra.Wave_2049 Spectra.Wave_2050 Spectra.Wave_2051 Spectra.Wave_2052
     ## 9         0.11252288        0.11318057        0.11383750        0.11449413
     ## 15        0.05869478        0.05920211        0.05971230        0.06022535
@@ -6743,7 +6692,7 @@ head(val.plsr.data)[1:5]
     ## 9         0.17459234        0.17471639        0.17483767        0.17495602
     ## 15        0.11900502        0.11910486        0.11920045        0.11929147
     ## 23        0.18956770        0.18964676        0.18972298        0.18979607
-    ## 44        0.20551218        0.20558308        0.20565096        0.20571562
+    ## 44        0.20551219        0.20558308        0.20565096        0.20571562
     ## 46        0.19599448        0.19604878        0.19610115        0.19615136
     ## 47        0.14005093        0.14013975        0.14022655        0.14031084
     ##    Spectra.Wave_2193 Spectra.Wave_2194 Spectra.Wave_2195 Spectra.Wave_2196
@@ -7158,12 +7107,14 @@ iterations <- 50
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data,method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, 
                                                   iterations=iterations, 
                                                   seg=seg, prop=prop, 
@@ -7171,6 +7122,7 @@ if (method=="pls") {
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running PLS permutation test ***"
 
 ![](kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
diff --git a/vignettes/kit_sla_plsr_example.pdf b/vignettes/kit_sla_plsr_example.pdf
index 30559bc..3c7f319 100644
Binary files a/vignettes/kit_sla_plsr_example.pdf and b/vignettes/kit_sla_plsr_example.pdf differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png
index d51916d..fb5956a 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index a55ae5f..b498413 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index 7ab9c40..c571fa7 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png
index 64b085d..e9f1deb 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png
index 195c4cc..ee63a04 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png
index 97ee767..542b475 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png
index 9fb1a0a..f0da7cf 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ
diff --git a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png
index 70eee78..e6a5935 100644
Binary files a/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/kit_sla_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ
diff --git a/vignettes/neon_leafN_canopy_plsr_example.Rmd b/vignettes/neon_leafN_canopy_plsr_example.Rmd
index 34ef6f1..e79c2ba 100644
--- a/vignettes/neon_leafN_canopy_plsr_example.Rmd
+++ b/vignettes/neon_leafN_canopy_plsr_example.Rmd
@@ -2,11 +2,11 @@
 title: Spectra-trait PLSR example using NEON AOP pixel spectra and field-sampled leaf nitrogen content from CONUS NEON sites
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
-  pdf_document: default
+  github_document: default
   html_notebook: default
   html_document:
     df_print: paged
-  github_document: default
+  pdf_document: default
 params:
   date: !r Sys.Date()
 ---
@@ -23,8 +23,7 @@ https://ecosis.org/package/canopy-spectra-to-map-foliar-functional-traits-over-n
 ### Getting Started
 ### Load libraries
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -215,12 +214,14 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
@@ -361,7 +362,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop,
                                                   verbose = FALSE)
 bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
diff --git a/vignettes/neon_leafN_canopy_plsr_example.md b/vignettes/neon_leafN_canopy_plsr_example.md
index e5c134a..e1ad4c7 100644
--- a/vignettes/neon_leafN_canopy_plsr_example.md
+++ b/vignettes/neon_leafN_canopy_plsr_example.md
@@ -17,8 +17,7 @@ For more information refer to the dataset EcoSIS page:
 ### Load libraries
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -83,7 +82,7 @@ output_dir <- "tempdir"
 
 ### Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpvNSAkI"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpeLrBIP"
 
 ### Grab data from EcoSIS
 
@@ -118,7 +117,7 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 head(dat_raw)
 ```
 
-    ## # A tibble: 6 x 459
+    ## # A tibble: 6 × 459
     ##   Affiliation   Boron Calcium Carbon Carotenoids_area Carotenoids_mass Cellulose
     ##   <chr>         <dbl>   <dbl>  <dbl>            <dbl>            <dbl>     <dbl>
     ## 1 University … 0.0420   24.2    463.             9.19             1.18      221.
@@ -133,19 +132,7 @@ head(dat_raw)
     ## #   PI <chr>, Phenolics <dbl>, Phosphorus <dbl>, Plot_ID <chr>,
     ## #   Potassium <dbl>, Project <chr>, SLA <dbl>, Sample_Year <dbl>, Starch <dbl>,
     ## #   Sugar <dbl>, Sulfur <dbl>, Water <dbl>, d13C <dbl>, d15N <dbl>, 384 <dbl>,
-    ## #   389 <dbl>, 394 <dbl>, 399 <dbl>, 404 <dbl>, 409 <dbl>, 414 <dbl>,
-    ## #   419 <dbl>, 424 <dbl>, 429 <dbl>, 434 <dbl>, 439 <dbl>, 444 <dbl>,
-    ## #   449 <dbl>, 454 <dbl>, 459 <dbl>, 464 <dbl>, 469 <dbl>, 474 <dbl>,
-    ## #   479 <dbl>, 484 <dbl>, 489 <dbl>, 494 <dbl>, 499 <dbl>, 504 <dbl>,
-    ## #   509 <dbl>, 514 <dbl>, 519 <dbl>, 524 <dbl>, 529 <dbl>, 534 <dbl>,
-    ## #   539 <dbl>, 544 <dbl>, 549 <dbl>, 554 <dbl>, 559 <dbl>, 564 <dbl>,
-    ## #   569 <dbl>, 574 <dbl>, 579 <dbl>, 584 <dbl>, 589 <dbl>, 594 <dbl>,
-    ## #   599 <dbl>, 604 <dbl>, 609 <dbl>, 614 <dbl>, 619 <dbl>, 624 <dbl>,
-    ## #   629 <dbl>, 634 <dbl>, 639 <dbl>, 644 <dbl>, 649 <dbl>, 654 <dbl>,
-    ## #   659 <dbl>, 664 <dbl>, 669 <dbl>, 674 <dbl>, 679 <dbl>, 684 <dbl>,
-    ## #   689 <dbl>, 694 <dbl>, 699 <dbl>, 704 <dbl>, 709 <dbl>, 714 <dbl>,
-    ## #   719 <dbl>, 724 <dbl>, 729 <dbl>, 734 <dbl>, 739 <dbl>, 744 <dbl>,
-    ## #   749 <dbl>, …
+    ## #   389 <dbl>, 394 <dbl>, 399 <dbl>, 404 <dbl>, 409 <dbl>, 414 <dbl>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -174,7 +161,7 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(300,2600,1)]
 head(sample_info)
 ```
 
-    ## # A tibble: 6 x 33
+    ## # A tibble: 6 × 33
     ##   Affiliation   Boron Calcium Carbon Carotenoids_area Carotenoids_mass Cellulose
     ##   <chr>         <dbl>   <dbl>  <dbl>            <dbl>            <dbl>     <dbl>
     ## 1 University … 0.0420   24.2    463.             9.19             1.18      221.
@@ -258,19 +245,19 @@ split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method
                                               prop=0.8, group_variables="Plot_Num")
 ```
 
-    ## D02   Cal: 80.4597701149425%
+    ## D02   Cal: 80.46%
 
-    ## D03   Cal: 80.327868852459%
+    ## D03   Cal: 80.328%
 
     ## D05   Cal: 80%
 
-    ## D06   Cal: 79.7297297297297%
+    ## D06   Cal: 79.73%
 
-    ## D07   Cal: 79.2452830188679%
+    ## D07   Cal: 79.245%
 
-    ## D08   Cal: 79.8165137614679%
+    ## D08   Cal: 79.817%
 
-    ## D09   Cal: 79.6296296296296%
+    ## D09   Cal: 79.63%
 
 ``` r
 names(split_data)
@@ -427,18 +414,21 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running PLS permutation test ***"
 
 ![](neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
@@ -534,20 +524,20 @@ cal.plsr.output <- cal.plsr.output %>%
 head(cal.plsr.output)
 ```
 
-    ##   Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen CalVal
-    ## 2      D02     0002 D02_0002        2017 10.77861 27.70598    Cal
-    ## 3      D02     0003 D02_0003        2017 12.46154 34.63999    Cal
-    ## 5      D02     0005 D02_0005        2017 17.27620 26.64623    Cal
-    ## 6      D02     0006 D02_0006        2017 12.92806 20.69437    Cal
-    ## 7      D02     0007 D02_0007        2017 10.21521 28.87526    Cal
-    ## 8      D02     0008 D02_0008        2017 20.87397 33.63137    Cal
-    ##   PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals
-    ## 2       24.65561          24.59452        -3.1114612
-    ## 3       27.85223          27.64033        -6.9996606
-    ## 5       29.36467          29.54595         2.8997194
-    ## 6       21.66448          21.68116         0.9867955
-    ## 7       23.04393          22.78554        -6.0897138
-    ## 8       25.56637          25.29798        -8.3333884
+    ##   Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen PLSR_Predicted
+    ## 2      D02     0002 D02_0002        2017 10.77861 27.70598       24.65561
+    ## 3      D02     0003 D02_0003        2017 12.46154 34.63999       27.85223
+    ## 5      D02     0005 D02_0005        2017 17.27620 26.64623       29.36467
+    ## 6      D02     0006 D02_0006        2017 12.92806 20.69437       21.66448
+    ## 7      D02     0007 D02_0007        2017 10.21521 28.87526       23.04393
+    ## 8      D02     0008 D02_0008        2017 20.87397 33.63137       25.56637
+    ##   PLSR_CV_Predicted PLSR_CV_Residuals
+    ## 2          24.59452        -3.1114612
+    ## 3          27.64033        -6.9996606
+    ## 5          29.54595         2.8997194
+    ## 6          21.68116         0.9867955
+    ## 7          22.78554        -6.0897138
+    ## 8          25.29798        -8.3333884
 
 ``` r
 cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
@@ -562,20 +552,20 @@ val.plsr.output <- val.plsr.output %>%
 head(val.plsr.output)
 ```
 
-    ##    Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen CalVal
-    ## 1       D02     0001 D02_0001        2017 13.66366 31.18030    Val
-    ## 4       D02     0004 D02_0004        2017 16.63205 34.54034    Val
-    ## 16      D02     0016 D02_0016        2017 14.44765 22.87740    Val
-    ## 18      D02     0019 D02_0019        2017 14.47103 17.73126    Val
-    ## 19      D02     0020 D02_0020        2017 18.98522 21.32929    Val
-    ## 20      D02     0021 D02_0021        2017 12.12731 29.50256    Val
-    ##    PLSR_Predicted PLSR_Residuals
-    ## 1        22.55166      -8.628643
-    ## 4        30.79494      -3.745399
-    ## 16       29.14446       6.267060
-    ## 18       23.47518       5.743923
-    ## 19       23.00736       1.678070
-    ## 20       31.93483       2.432274
+    ##    Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen PLSR_Predicted
+    ## 1       D02     0001 D02_0001        2017 13.66366 31.18030       22.55166
+    ## 4       D02     0004 D02_0004        2017 16.63205 34.54034       30.79494
+    ## 16      D02     0016 D02_0016        2017 14.44765 22.87740       29.14446
+    ## 18      D02     0019 D02_0019        2017 14.47103 17.73126       23.47518
+    ## 19      D02     0020 D02_0020        2017 18.98522 21.32929       23.00736
+    ## 20      D02     0021 D02_0021        2017 12.12731 29.50256       31.93483
+    ##    PLSR_Residuals
+    ## 1       -8.628643
+    ## 4       -3.745399
+    ## 16       6.267060
+    ## 18       5.743923
+    ## 19       1.678070
+    ## 20       2.432274
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -692,20 +682,20 @@ par(opar)
     ## [1] "Max Components: 12 Iterations: 500 Data Proportion (percent): 70"
     ## [1] "*** Providing PRESS and coefficient array output ***"
 
-    ##    Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen CalVal
-    ## 1       D02     0001 D02_0001        2017 13.66366 31.18030    Val
-    ## 4       D02     0004 D02_0004        2017 16.63205 34.54034    Val
-    ## 16      D02     0016 D02_0016        2017 14.44765 22.87740    Val
-    ## 18      D02     0019 D02_0019        2017 14.47103 17.73126    Val
-    ## 19      D02     0020 D02_0020        2017 18.98522 21.32929    Val
-    ## 20      D02     0021 D02_0021        2017 12.12731 29.50256    Val
-    ##    PLSR_Predicted PLSR_Residuals      LCI      UCI      LPI      UPI
-    ## 1        22.55166      -8.628643 21.75139 23.67919 13.44246 31.66086
-    ## 4        30.79494      -3.745399 29.24737 32.37867 21.60577 39.98412
-    ## 16       29.14446       6.267060 27.57462 30.82609 19.93270 38.35621
-    ## 18       23.47518       5.743923 21.73808 24.49326 14.31158 32.63878
-    ## 19       23.00736       1.678070 20.70321 24.57934 13.73687 32.27785
-    ## 20       31.93483       2.432274 30.75996 34.32739 22.69357 41.17610
+    ##    Plot_Num SampleID  Plot_ID Sample_Year      SLA Nitrogen PLSR_Predicted
+    ## 1       D02     0001 D02_0001        2017 13.66366 31.18030       22.55166
+    ## 4       D02     0004 D02_0004        2017 16.63205 34.54034       30.79494
+    ## 16      D02     0016 D02_0016        2017 14.44765 22.87740       29.14446
+    ## 18      D02     0019 D02_0019        2017 14.47103 17.73126       23.47518
+    ## 19      D02     0020 D02_0020        2017 18.98522 21.32929       23.00736
+    ## 20      D02     0021 D02_0021        2017 12.12731 29.50256       31.93483
+    ##    PLSR_Residuals      LCI      UCI      LPI      UPI
+    ## 1       -8.628643 21.75139 23.67919 13.44246 31.66086
+    ## 4       -3.745399 29.24737 32.37867 21.60577 39.98412
+    ## 16       6.267060 27.57462 30.82609 19.93270 38.35621
+    ## 18       5.743923 21.73808 24.49326 14.31158 32.63878
+    ## 19       1.678070 20.70321 24.57934 13.73687 32.27785
+    ## 20       2.432274 30.75996 34.32739 22.69357 41.17610
 
 ### Jackknife coefficient plot
 
diff --git a/vignettes/neon_leafN_canopy_plsr_example.pdf b/vignettes/neon_leafN_canopy_plsr_example.pdf
index 9ac17a6..837c5af 100644
Binary files a/vignettes/neon_leafN_canopy_plsr_example.pdf and b/vignettes/neon_leafN_canopy_plsr_example.pdf differ
diff --git a/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index aca3992..5f805e2 100644
Binary files a/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/neon_leafN_canopy_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/neon_lma_plsr_example.Rmd b/vignettes/neon_lma_plsr_example.Rmd
index 34f3a22..035e9a6 100644
--- a/vignettes/neon_lma_plsr_example.Rmd
+++ b/vignettes/neon_lma_plsr_example.Rmd
@@ -2,11 +2,11 @@
 title: Spectra-trait PLSR example using leaf-level spectra and leaf mass per area (LMA) data from CONUS NEON sites
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
-  pdf_document: default
+  github_document: default
   html_document:
     df_print: paged
   html_notebook: default
-  github_document: default
+  pdf_document: default
 params:
   date: !r Sys.Date()
 ---
@@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how
 ### Getting Started
 ### Load libraries
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -175,12 +174,14 @@ maxComps <- 20
 iterations <- 40
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
diff --git a/vignettes/neon_lma_plsr_example.md b/vignettes/neon_lma_plsr_example.md
index a11b0d4..439d780 100644
--- a/vignettes/neon_lma_plsr_example.md
+++ b/vignettes/neon_lma_plsr_example.md
@@ -15,8 +15,7 @@ leaf-mass area (LMA)
 ### Load libraries
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -81,7 +80,7 @@ output_dir <- "tempdir"
 
 ### Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpRBdgMm"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1VUVAg"
 
 ### Grab data from EcoSIS
 
@@ -125,7 +124,7 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 head(dat_raw)
 ```
 
-    ## # A tibble: 6 x 2,162
+    ## # A tibble: 6 × 2,162
     ##   Affiliation           `Common Name` Domain Functional_type   LMA `Latin Genus`
     ##   <chr>                 <chr>         <chr>  <chr>           <dbl> <chr>        
     ## 1 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
@@ -140,18 +139,7 @@ head(dat_raw)
     ## #   359 <dbl>, 360 <dbl>, 361 <dbl>, 362 <dbl>, 363 <dbl>, 364 <dbl>,
     ## #   365 <dbl>, 366 <dbl>, 367 <dbl>, 368 <dbl>, 369 <dbl>, 370 <dbl>,
     ## #   371 <dbl>, 372 <dbl>, 373 <dbl>, 374 <dbl>, 375 <dbl>, 376 <dbl>,
-    ## #   377 <dbl>, 378 <dbl>, 379 <dbl>, 380 <dbl>, 381 <dbl>, 382 <dbl>,
-    ## #   383 <dbl>, 384 <dbl>, 385 <dbl>, 386 <dbl>, 387 <dbl>, 388 <dbl>,
-    ## #   389 <dbl>, 390 <dbl>, 391 <dbl>, 392 <dbl>, 393 <dbl>, 394 <dbl>,
-    ## #   395 <dbl>, 396 <dbl>, 397 <dbl>, 398 <dbl>, 399 <dbl>, 400 <dbl>,
-    ## #   401 <dbl>, 402 <dbl>, 403 <dbl>, 404 <dbl>, 405 <dbl>, 406 <dbl>,
-    ## #   407 <dbl>, 408 <dbl>, 409 <dbl>, 410 <dbl>, 411 <dbl>, 412 <dbl>,
-    ## #   413 <dbl>, 414 <dbl>, 415 <dbl>, 416 <dbl>, 417 <dbl>, 418 <dbl>,
-    ## #   419 <dbl>, 420 <dbl>, 421 <dbl>, 422 <dbl>, 423 <dbl>, 424 <dbl>,
-    ## #   425 <dbl>, 426 <dbl>, 427 <dbl>, 428 <dbl>, 429 <dbl>, 430 <dbl>,
-    ## #   431 <dbl>, 432 <dbl>, 433 <dbl>, 434 <dbl>, 435 <dbl>, 436 <dbl>,
-    ## #   437 <dbl>, 438 <dbl>, 439 <dbl>, 440 <dbl>, 441 <dbl>, 442 <dbl>,
-    ## #   443 <dbl>, 444 <dbl>, …
+    ## #   377 <dbl>, 378 <dbl>, 379 <dbl>, 380 <dbl>, 381 <dbl>, 382 <dbl>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -181,7 +169,7 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 head(sample_info)
 ```
 
-    ## # A tibble: 6 x 11
+    ## # A tibble: 6 × 11
     ##   Affiliation           `Common Name` Domain Functional_type   LMA `Latin Genus`
     ##   <chr>                 <chr>         <chr>  <chr>           <dbl> <chr>        
     ## 1 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
@@ -199,7 +187,7 @@ sample_info2 <- sample_info %>%
 head(sample_info2)
 ```
 
-    ## # A tibble: 6 x 5
+    ## # A tibble: 6 × 5
     ##   Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
     ##   <chr>  <chr>           <chr>     <chr>                  <dbl>
     ## 1 D02    broadleaf       P0001     JUNI                    72.9
@@ -255,20 +243,20 @@ val.plsr.data <- split_data$val_data
 head(val.plsr.data)[1:8]
 ```
 
-    ##      Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 Wave_500
-    ## 4923    D08       broadleaf     P2462              <NA>      21.10 0.044964
-    ## 4924    D08       broadleaf     L2462              SANI     100.72 0.068921
-    ## 4925    D08       broadleaf     P2463              <NA>      29.59 0.036254
-    ## 4926    D08       broadleaf     L2463              SANI      96.48 0.051810
-    ## 4927    D08       broadleaf     P2464              <NA>      31.08 0.056587
-    ## 4928    D08       broadleaf     L2464              SANI      61.40 0.037310
-    ##      Wave_501 Wave_502
-    ## 4923 0.045854 0.046911
-    ## 4924 0.069633 0.070254
-    ## 4925 0.036999 0.037671
-    ## 4926 0.052113 0.052896
-    ## 4927 0.057006 0.057734
-    ## 4928 0.037223 0.037671
+    ##    Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 Wave_500
+    ## 3     D02       broadleaf     P0002              JUNI      60.77 0.043758
+    ## 12    D02       broadleaf     L0006              JUNI      42.54 0.044338
+    ## 13    D02       broadleaf     P0007              QUVE     106.57 0.015643
+    ## 19    D02       broadleaf     P0010              PRSE      78.82 0.033019
+    ## 21    D02       broadleaf     P0011              PRSE      86.09 0.024819
+    ## 28    D02       broadleaf     L0014              PRSE      67.11 0.040095
+    ##    Wave_501 Wave_502
+    ## 3  0.044171 0.044869
+    ## 12 0.044748 0.045294
+    ## 13 0.015579 0.015431
+    ## 19 0.033102 0.033245
+    ## 21 0.024826 0.025045
+    ## 28 0.040397 0.040864
 
 ``` r
 rm(split_data)
@@ -338,13 +326,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p
 head(val.plsr.data)[1:5]
 ```
 
-    ##      Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
-    ## 4923    D08       broadleaf     P2462              <NA>      21.10
-    ## 4924    D08       broadleaf     L2462              SANI     100.72
-    ## 4925    D08       broadleaf     P2463              <NA>      29.59
-    ## 4926    D08       broadleaf     L2463              SANI      96.48
-    ## 4927    D08       broadleaf     P2464              <NA>      31.08
-    ## 4928    D08       broadleaf     L2464              SANI      61.40
+    ##    Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
+    ## 3     D02       broadleaf     P0002              JUNI      60.77
+    ## 12    D02       broadleaf     L0006              JUNI      42.54
+    ## 13    D02       broadleaf     P0007              QUVE     106.57
+    ## 19    D02       broadleaf     P0010              PRSE      78.82
+    ## 21    D02       broadleaf     P0011              PRSE      86.09
+    ## 28    D02       broadleaf     L0014              PRSE      67.11
 
 ### plot cal and val spectra
 
@@ -392,18 +380,21 @@ maxComps <- 20
 iterations <- 40
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running permutation test.  Please hang tight, this can take awhile ***"
     ## [1] "Options:"
     ## [1] "Max Components: 20 Iterations: 40 Data Proportion (percent): 70"
@@ -448,11 +439,11 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##      27.155       17.610       16.595       15.483       13.235       12.374  
+    ##      29.372       18.664       18.166       16.187       12.760       12.149  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
-    ##      11.499       10.722       10.269        9.647        9.197        9.319  
+    ##      12.004       11.465       11.144       10.389       10.063        9.732  
     ##    12 comps  
-    ##       9.515
+    ##       9.633
 
 ``` r
 plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), 
@@ -465,11 +456,11 @@ pls::R2(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##   -0.006901     0.576543     0.623949     0.672643     0.760799     0.790906  
+    ##   -0.001908     0.595475     0.616770     0.695732     0.810908     0.828593  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
-    ##    0.819456     0.843031     0.856001     0.872913     0.884511     0.881406  
+    ##    0.832656     0.847338     0.855775     0.874647     0.882410     0.890000  
     ##    12 comps  
-    ##    0.876368
+    ##    0.892247
 
 ``` r
 plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
@@ -524,20 +515,20 @@ val.plsr.output <- val.plsr.output %>%
 head(val.plsr.output)
 ```
 
-    ##      Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
-    ## 4923    D08       broadleaf     P2462              <NA>      21.10
-    ## 4924    D08       broadleaf     L2462              SANI     100.72
-    ## 4925    D08       broadleaf     P2463              <NA>      29.59
-    ## 4926    D08       broadleaf     L2463              SANI      96.48
-    ## 4927    D08       broadleaf     P2464              <NA>      31.08
-    ## 4928    D08       broadleaf     L2464              SANI      61.40
-    ##      PLSR_Predicted PLSR_Residuals
-    ## 4923       21.14155     0.04155041
-    ## 4924       89.65467   -11.06533484
-    ## 4925       27.94765    -1.64234512
-    ## 4926       92.46121    -4.01879017
-    ## 4927       40.73367     9.65367301
-    ## 4928       65.94687     4.54686556
+    ##    Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_Predicted
+    ## 3     D02       broadleaf     P0002              JUNI      60.77       63.90905
+    ## 12    D02       broadleaf     L0006              JUNI      42.54       41.54133
+    ## 13    D02       broadleaf     P0007              QUVE     106.57       99.99662
+    ## 19    D02       broadleaf     P0010              PRSE      78.82       89.03078
+    ## 21    D02       broadleaf     P0011              PRSE      86.09       85.17273
+    ## 28    D02       broadleaf     L0014              PRSE      67.11       67.95549
+    ##    PLSR_Residuals
+    ## 3       3.1390459
+    ## 12     -0.9986720
+    ## 13     -6.5733831
+    ## 19     10.2107788
+    ## 21     -0.9172668
+    ## 28      0.8454930
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -611,7 +602,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo
 
     ## Warning: Removed 21 rows containing missing values (geom_point).
 
-    ## Warning: Removed 5 rows containing missing values (geom_point).
+    ## Warning: Removed 8 rows containing missing values (geom_point).
 
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
@@ -699,20 +690,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
 head(val.plsr.output)
 ```
 
-    ##      Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
-    ## 4923    D08       broadleaf     P2462              <NA>      21.10
-    ## 4924    D08       broadleaf     L2462              SANI     100.72
-    ## 4925    D08       broadleaf     P2463              <NA>      29.59
-    ## 4926    D08       broadleaf     L2463              SANI      96.48
-    ## 4927    D08       broadleaf     P2464              <NA>      31.08
-    ## 4928    D08       broadleaf     L2464              SANI      61.40
-    ##      PLSR_Predicted PLSR_Residuals      LCI      UCI       LPI       UPI
-    ## 4923       21.14155     0.04155041 20.94847 21.30604  2.789108  39.49399
-    ## 4924       89.65467   -11.06533484 89.45021 89.97246 71.301221 108.00811
-    ## 4925       27.94765    -1.64234512 27.73172 28.16789  9.594597  46.30071
-    ## 4926       92.46121    -4.01879017 92.26489 92.73225 74.107998 110.81442
-    ## 4927       40.73367     9.65367301 40.50065 40.92588 22.380204  59.08714
-    ## 4928       65.94687     4.54686556 65.77618 66.17712 47.594178  84.29955
+    ##    Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_Predicted
+    ## 3     D02       broadleaf     P0002              JUNI      60.77       63.90905
+    ## 12    D02       broadleaf     L0006              JUNI      42.54       41.54133
+    ## 13    D02       broadleaf     P0007              QUVE     106.57       99.99662
+    ## 19    D02       broadleaf     P0010              PRSE      78.82       89.03078
+    ## 21    D02       broadleaf     P0011              PRSE      86.09       85.17273
+    ## 28    D02       broadleaf     L0014              PRSE      67.11       67.95549
+    ##    PLSR_Residuals      LCI       UCI      LPI       UPI
+    ## 3       3.1390459 63.75673  64.12043 45.02836  82.78973
+    ## 12     -0.9986720 41.42248  41.69728 22.66069  60.42196
+    ## 13     -6.5733831 99.88029 100.11962 81.11612 118.87712
+    ## 19     10.2107788 88.83274  89.21623 70.14949 107.91207
+    ## 21     -0.9172668 85.02330  85.32067 66.29194 104.05353
+    ## 28      0.8454930 67.82558  68.15298 49.07457  86.83642
 
 ### Jackknife coefficient plot
 
diff --git a/vignettes/neon_lma_plsr_example.pdf b/vignettes/neon_lma_plsr_example.pdf
index e03264f..dafd579 100644
Binary files a/vignettes/neon_lma_plsr_example.pdf and b/vignettes/neon_lma_plsr_example.pdf differ
diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png
index 477fd1f..72e4e57 100644
Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index bbd51e7..5d4c0de 100644
Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png
index 817d4c2..17efef7 100644
Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ
diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png
index b0dee53..d1cc56a 100644
Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png differ
diff --git a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png
index f5bdf2a..bfac6a2 100644
Binary files a/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png and b/vignettes/neon_lma_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd b/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd
index 89af664..1f2e0fd 100644
--- a/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd
+++ b/vignettes/reseco_leafN_bootstrap_plsr_example.Rmd
@@ -2,11 +2,11 @@
 title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from 36 species growing in Rosa rugosa invaded coastal grassland communities in Belgium
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
-  github_document: default
+  pdf_document: default
   html_notebook: default
   html_document:
     df_print: paged
-  pdf_document: default
+  github_document: default
 params:
   date: !r Sys.Date()
 ---
@@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how
 ### Getting Started
 ### Load libraries
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -188,12 +187,14 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method,
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
@@ -335,7 +336,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop,
                                                   verbose = FALSE)
 bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.md b/vignettes/reseco_leafN_bootstrap_plsr_example.md
index 8deb809..5e8c5ef 100644
--- a/vignettes/reseco_leafN_bootstrap_plsr_example.md
+++ b/vignettes/reseco_leafN_bootstrap_plsr_example.md
@@ -16,8 +16,7 @@ leaf nitrogen content (Narea, g/m2)
 ### Load libraries
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -82,7 +81,7 @@ output_dir <- "tempdir"
 
 ### Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpmNliia"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpYVNyVT"
 
 ### Grab data from EcoSIS
 
@@ -117,38 +116,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 head(dat_raw)
 ```
 
-    ## # A tibble: 6 x 2,164
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 2,164
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 2,160 more variables:
     ## #   Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf phosphorus content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf potassium content per leaf area (mg/mm2) <dbl>,
-    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>,
-    ## #   species code <chr>, 350 <dbl>, 351 <dbl>, 352 <dbl>, 353 <dbl>, 354 <dbl>,
-    ## #   355 <dbl>, 356 <dbl>, 357 <dbl>, 358 <dbl>, 359 <dbl>, 360 <dbl>,
-    ## #   361 <dbl>, 362 <dbl>, 363 <dbl>, 364 <dbl>, 365 <dbl>, 366 <dbl>,
-    ## #   367 <dbl>, 368 <dbl>, 369 <dbl>, 370 <dbl>, 371 <dbl>, 372 <dbl>,
-    ## #   373 <dbl>, 374 <dbl>, 375 <dbl>, 376 <dbl>, 377 <dbl>, 378 <dbl>,
-    ## #   379 <dbl>, 380 <dbl>, 381 <dbl>, 382 <dbl>, 383 <dbl>, 384 <dbl>,
-    ## #   385 <dbl>, 386 <dbl>, 387 <dbl>, 388 <dbl>, 389 <dbl>, 390 <dbl>,
-    ## #   391 <dbl>, 392 <dbl>, 393 <dbl>, 394 <dbl>, 395 <dbl>, 396 <dbl>,
-    ## #   397 <dbl>, 398 <dbl>, 399 <dbl>, 400 <dbl>, 401 <dbl>, 402 <dbl>,
-    ## #   403 <dbl>, 404 <dbl>, 405 <dbl>, 406 <dbl>, 407 <dbl>, 408 <dbl>,
-    ## #   409 <dbl>, 410 <dbl>, 411 <dbl>, 412 <dbl>, 413 <dbl>, 414 <dbl>,
-    ## #   415 <dbl>, 416 <dbl>, 417 <dbl>, 418 <dbl>, 419 <dbl>, 420 <dbl>,
-    ## #   421 <dbl>, 422 <dbl>, 423 <dbl>, 424 <dbl>, 425 <dbl>, 426 <dbl>,
-    ## #   427 <dbl>, 428 <dbl>, 429 <dbl>, 430 <dbl>, 431 <dbl>, 432 <dbl>,
-    ## #   433 <dbl>, 434 <dbl>, 435 <dbl>, 436 <dbl>, 437 <dbl>, 438 <dbl>,
-    ## #   439 <dbl>, 440 <dbl>, …
+    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -208,15 +191,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 head(sample_info)
 ```
 
-    ## # A tibble: 6 x 13
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 13
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
@@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>%
 head(sample_info2)
 ```
 
-    ## # A tibble: 6 x 5
+    ## # A tibble: 6 × 5
     ##   Plant_Species         Species_Code Plot  Narea_mg_mm2 Narea_g_m2
     ##   <chr>                 <chr>        <chr>        <dbl>      <dbl>
     ## 1 Arrhenatherum elatius Arrela       DC1        0.0126       1.26 
@@ -301,20 +284,20 @@ val.plsr.data <- split_data$val_data
 head(val.plsr.data)[1:8]
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2   Wave_500
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996 0.06736887
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464 0.07125000
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454 0.05993560
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560 0.06508300
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806 0.15175000
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687 0.06805547
-    ##       Wave_501   Wave_502
-    ## 184 0.06870667 0.07014220
-    ## 185 0.07235000 0.07368350
-    ## 186 0.06162000 0.06352233
-    ## 187 0.06625000 0.06758350
-    ## 188 0.15275000 0.15415000
-    ## 189 0.06938000 0.07093553
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2   Wave_500
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440 0.07066700
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978 0.04144907
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197 0.05563100
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830 0.11588500
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247 0.06029327
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293 0.07391700
+    ##      Wave_501  Wave_502
+    ## 1  0.07160000 0.0725330
+    ## 4  0.04197333 0.0426356
+    ## 8  0.05622143 0.0569690
+    ## 11 0.11705000 0.1184500
+    ## 14 0.06112000 0.0620312
+    ## 19 0.07515000 0.0765500
 
 ``` r
 rm(split_data)
@@ -385,13 +368,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p
 head(val.plsr.data)[1:5]
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
 
 ### plot cal and val spectra
 
@@ -440,18 +423,21 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method,
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method,
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running PLS permutation test ***"
 
 ![](reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
@@ -487,9 +473,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##      0.6346       0.5045       0.4645       0.3415       0.3296       0.3037  
+    ##      0.5594       0.6034       0.5448       0.3842       0.3481       0.3027  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps  
-    ##      0.2703       0.2659       0.2524       0.2450       0.2452
+    ##      0.2429       0.2268       0.2852       0.2818       0.2780
 
 ``` r
 plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
@@ -500,9 +486,9 @@ pls::R2(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##    -0.05977      0.33000      0.43217      0.69298      0.71415      0.75732  
+    ##   -0.007544    -0.172296     0.044153     0.524579     0.609920     0.704963  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps  
-    ##     0.80776      0.81389      0.83228      0.84198      0.84176
+    ##    0.809962     0.834383     0.738093     0.744325     0.751224
 
 ``` r
 plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
@@ -571,20 +557,20 @@ val.plsr.output <- val.plsr.output %>%
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996      0.9462916
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464      1.5386676
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454      0.8790482
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560      1.1241560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806      2.4527108
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687      1.1553688
-    ##     PLSR_Residuals
-    ## 184     0.07059201
-    ## 185     0.50732119
-    ## 186     0.08220284
-    ## 187    -0.14959995
-    ## 188    -0.35456980
-    ## 189     0.13020008
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
+    ##    PLSR_Predicted PLSR_Residuals
+    ## 1        1.340135     0.07869548
+    ## 4        1.288026     0.07904830
+    ## 8        1.155840    -0.02935675
+    ## 11       2.014712    -0.08911757
+    ## 14       1.328742     0.20749565
+    ## 19       1.534162     0.08986811
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -645,7 +631,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo
 
     ## Warning: Removed 2 rows containing missing values (geom_point).
 
-    ## Warning: Removed 3 rows containing missing values (geom_point).
+    ## Warning: Removed 2 rows containing missing values (geom_point).
 
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
@@ -704,7 +690,8 @@ if(grepl("Windows", sessionInfo()$running)){
 ### PLSR bootstrap permutation uncertainty analysis
 iterations <- 500    # how many permutation iterations to run
 prop <- 0.70          # fraction of training data to keep for each iteration
-plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, 
+plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  maxComps=nComps, 
                                                   iterations=iterations, prop=prop,
                                                   verbose = FALSE)
 ```
@@ -737,20 +724,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996      0.9462916
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464      1.5386676
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454      0.8790482
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560      1.1241560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806      2.4527108
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687      1.1553688
-    ##     PLSR_Residuals       LCI      UCI       LPI      UPI
-    ## 184     0.07059201 0.8915898 1.008806 0.4588988 1.433684
-    ## 185     0.50732119 1.4007173 1.636452 1.0403747 2.036960
-    ## 186     0.08220284 0.6861219 1.160030 0.3405908 1.417506
-    ## 187    -0.14959995 0.9651982 1.245464 0.6198291 1.628483
-    ## 188    -0.35456980 2.1911406 2.619696 1.9245720 2.980850
-    ## 189     0.13020008 1.0735154 1.233082 0.6651011 1.645636
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
+    ##    PLSR_Predicted PLSR_Residuals      LCI      UCI       LPI      UPI
+    ## 1        1.340135     0.07869548 1.226834 1.429328 0.7824784 1.897793
+    ## 4        1.288026     0.07904830 1.182535 1.382687 0.7308093 1.845242
+    ## 8        1.155840    -0.02935675 1.094629 1.236994 0.6033307 1.708349
+    ## 11       2.014712    -0.08911757 1.887670 2.098661 1.4570138 2.572411
+    ## 14       1.328742     0.20749565 1.278652 1.359115 0.7791767 1.878308
+    ## 19       1.534162     0.08986811 1.437618 1.642761 0.9768182 2.091505
 
 ### Jackknife coefficient plot
 
@@ -863,7 +850,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
 print(paste("Output directory: ", outdir))
 ```
 
-    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpmNliia"
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpYVNyVT"
 
 ``` r
 # Observed versus predicted
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example.pdf b/vignettes/reseco_leafN_bootstrap_plsr_example.pdf
index 55d71c5..35d5a0f 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example.pdf and b/vignettes/reseco_leafN_bootstrap_plsr_example.pdf differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index 46da6e4..b2bff86 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index 1475940..6b2141a 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png
index 1153721..3fda55c 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png
index d19d327..9faeaef 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ
diff --git a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png
index ce44beb..0f05c9b 100644
Binary files a/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/reseco_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ
diff --git a/vignettes/reseco_leafN_plsr_example.Rmd b/vignettes/reseco_leafN_plsr_example.Rmd
index a9f27d8..a78ceea 100644
--- a/vignettes/reseco_leafN_plsr_example.Rmd
+++ b/vignettes/reseco_leafN_plsr_example.Rmd
@@ -3,10 +3,10 @@ title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen con
 author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
 output:
   pdf_document: default
-  html_notebook: default
+  github_document: default
   html_document:
     df_print: paged
-  github_document: default
+  html_notebook: default
 params:
   date: !r Sys.Date()
 ---
@@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how
 ### Getting Started
 ### Load libraries
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -187,12 +186,14 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
diff --git a/vignettes/reseco_leafN_plsr_example.md b/vignettes/reseco_leafN_plsr_example.md
index f50aa76..e9e7896 100644
--- a/vignettes/reseco_leafN_plsr_example.md
+++ b/vignettes/reseco_leafN_plsr_example.md
@@ -16,8 +16,7 @@ leaf nitrogen content (Narea, g/m2)
 ### Load libraries
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -82,7 +81,7 @@ output_dir <- "tempdir"
 
 ### Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpJ6W1sB"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpYxPllO"
 
 ### Grab data from EcoSIS
 
@@ -117,38 +116,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 head(dat_raw)
 ```
 
-    ## # A tibble: 6 x 2,164
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 2,164
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 2,160 more variables:
     ## #   Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf phosphorus content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf potassium content per leaf area (mg/mm2) <dbl>,
-    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>,
-    ## #   species code <chr>, 350 <dbl>, 351 <dbl>, 352 <dbl>, 353 <dbl>, 354 <dbl>,
-    ## #   355 <dbl>, 356 <dbl>, 357 <dbl>, 358 <dbl>, 359 <dbl>, 360 <dbl>,
-    ## #   361 <dbl>, 362 <dbl>, 363 <dbl>, 364 <dbl>, 365 <dbl>, 366 <dbl>,
-    ## #   367 <dbl>, 368 <dbl>, 369 <dbl>, 370 <dbl>, 371 <dbl>, 372 <dbl>,
-    ## #   373 <dbl>, 374 <dbl>, 375 <dbl>, 376 <dbl>, 377 <dbl>, 378 <dbl>,
-    ## #   379 <dbl>, 380 <dbl>, 381 <dbl>, 382 <dbl>, 383 <dbl>, 384 <dbl>,
-    ## #   385 <dbl>, 386 <dbl>, 387 <dbl>, 388 <dbl>, 389 <dbl>, 390 <dbl>,
-    ## #   391 <dbl>, 392 <dbl>, 393 <dbl>, 394 <dbl>, 395 <dbl>, 396 <dbl>,
-    ## #   397 <dbl>, 398 <dbl>, 399 <dbl>, 400 <dbl>, 401 <dbl>, 402 <dbl>,
-    ## #   403 <dbl>, 404 <dbl>, 405 <dbl>, 406 <dbl>, 407 <dbl>, 408 <dbl>,
-    ## #   409 <dbl>, 410 <dbl>, 411 <dbl>, 412 <dbl>, 413 <dbl>, 414 <dbl>,
-    ## #   415 <dbl>, 416 <dbl>, 417 <dbl>, 418 <dbl>, 419 <dbl>, 420 <dbl>,
-    ## #   421 <dbl>, 422 <dbl>, 423 <dbl>, 424 <dbl>, 425 <dbl>, 426 <dbl>,
-    ## #   427 <dbl>, 428 <dbl>, 429 <dbl>, 430 <dbl>, 431 <dbl>, 432 <dbl>,
-    ## #   433 <dbl>, 434 <dbl>, 435 <dbl>, 436 <dbl>, 437 <dbl>, 438 <dbl>,
-    ## #   439 <dbl>, 440 <dbl>, …
+    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -208,15 +191,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 head(sample_info)
 ```
 
-    ## # A tibble: 6 x 13
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 13
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
@@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>%
 head(sample_info2)
 ```
 
-    ## # A tibble: 6 x 5
+    ## # A tibble: 6 × 5
     ##   Plant_Species         Species_Code Plot  Narea_mg_mm2 Narea_g_m2
     ##   <chr>                 <chr>        <chr>        <dbl>      <dbl>
     ## 1 Arrhenatherum elatius Arrela       DC1        0.0126       1.26 
@@ -300,20 +283,20 @@ val.plsr.data <- split_data$val_data
 head(val.plsr.data)[1:8]
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2   Wave_500
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996 0.06736887
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464 0.07125000
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454 0.05993560
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560 0.06508300
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806 0.15175000
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687 0.06805547
-    ##       Wave_501   Wave_502
-    ## 184 0.06870667 0.07014220
-    ## 185 0.07235000 0.07368350
-    ## 186 0.06162000 0.06352233
-    ## 187 0.06625000 0.06758350
-    ## 188 0.15275000 0.15415000
-    ## 189 0.06938000 0.07093553
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2   Wave_500
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440 0.07066700
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978 0.04144907
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197 0.05563100
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830 0.11588500
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247 0.06029327
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293 0.07391700
+    ##      Wave_501  Wave_502
+    ## 1  0.07160000 0.0725330
+    ## 4  0.04197333 0.0426356
+    ## 8  0.05622143 0.0569690
+    ## 11 0.11705000 0.1184500
+    ## 14 0.06112000 0.0620312
+    ## 19 0.07515000 0.0765500
 
 ``` r
 rm(split_data)
@@ -384,13 +367,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% p
 head(val.plsr.data)[1:5]
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
 
 ### plot cal and val spectra
 
@@ -439,18 +422,21 @@ iterations <- 80
 prop <- 0.70
 if (method=="pls") {
   # pls package approach - faster but estimates more components....
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running PLS permutation test ***"
 
 ![](reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
@@ -486,9 +472,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##      0.6346       0.5045       0.4645       0.3415       0.3296       0.3037  
+    ##      0.5594       0.6034       0.5448       0.3842       0.3481       0.3027  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps  
-    ##      0.2703       0.2659       0.2524       0.2450       0.2452
+    ##      0.2429       0.2268       0.2852       0.2818       0.2780
 
 ``` r
 plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
@@ -499,9 +485,9 @@ pls::R2(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##    -0.05977      0.33000      0.43217      0.69298      0.71415      0.75732  
+    ##   -0.007544    -0.172296     0.044153     0.524579     0.609920     0.704963  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps  
-    ##     0.80776      0.81389      0.83228      0.84198      0.84176
+    ##    0.809962     0.834383     0.738093     0.744325     0.751224
 
 ``` r
 plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
@@ -570,20 +556,20 @@ val.plsr.output <- val.plsr.output %>%
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996      0.9462916
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464      1.5386676
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454      0.8790482
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560      1.1241560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806      2.4527108
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687      1.1553688
-    ##     PLSR_Residuals
-    ## 184     0.07059201
-    ## 185     0.50732119
-    ## 186     0.08220284
-    ## 187    -0.14959995
-    ## 188    -0.35456980
-    ## 189     0.13020008
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
+    ##    PLSR_Predicted PLSR_Residuals
+    ## 1        1.340135     0.07869548
+    ## 4        1.288026     0.07904830
+    ## 8        1.155840    -0.02935675
+    ## 11       2.014712    -0.08911757
+    ## 14       1.328742     0.20749565
+    ## 19       1.534162     0.08986811
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -644,7 +630,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo
 
     ## Warning: Removed 2 rows containing missing values (geom_point).
 
-    ## Warning: Removed 3 rows containing missing values (geom_point).
+    ## Warning: Removed 2 rows containing missing values (geom_point).
 
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
@@ -727,20 +713,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996      0.9462916
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464      1.5386676
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454      0.8790482
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560      1.1241560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806      2.4527108
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687      1.1553688
-    ##     PLSR_Residuals       LCI       UCI       LPI      UPI
-    ## 184     0.07059201 0.9154961 0.9532972 0.4623162 1.430267
-    ## 185     0.50732119 1.4875834 1.5528063 1.0540777 2.023258
-    ## 186     0.08220284 0.8472007 0.9329303 0.3936085 1.364488
-    ## 187    -0.14959995 1.1075928 1.1743800 0.6395189 1.608793
-    ## 188    -0.35456980 2.4248448 2.5638131 1.9651833 2.940238
-    ## 189     0.13020008 1.1262731 1.1615642 0.6713762 1.639361
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
+    ##    PLSR_Predicted PLSR_Residuals      LCI      UCI       LPI      UPI
+    ## 1        1.340135     0.07869548 1.298260 1.346986 0.7916762 1.888595
+    ## 4        1.288026     0.07904830 1.262110 1.297939 0.7397937 1.836258
+    ## 8        1.155840    -0.02935675 1.113678 1.172006 0.6072413 1.704439
+    ## 11       2.014712    -0.08911757 1.936508 2.020049 1.4654399 2.563985
+    ## 14       1.328742     0.20749565 1.298485 1.333454 0.7804978 1.876987
+    ## 19       1.534162     0.08986811 1.522672 1.550848 0.9859820 2.082341
 
 ``` r
 val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
@@ -748,20 +734,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2  0.008756996  0.8756996      0.9462916
-    ## 185 Potentilla reptans       Potrep  WC2  0.010313464  1.0313464      1.5386676
-    ## 186      Rubus caesius       Rubcae  WC2  0.007968454  0.7968454      0.8790482
-    ## 187      Urtica dioica       Urtdio  WC2  0.012737560  1.2737560      1.1241560
-    ## 188 Ammophila arenaria       Ammare  WC3  0.028072806  2.8072806      2.4527108
-    ## 189  Jacobaea vulgaris       Jacvul  WC3  0.010251687  1.0251687      1.1553688
-    ##     PLSR_Residuals       LCI       UCI       LPI      UPI
-    ## 184     0.07059201 0.9154961 0.9532972 0.4623162 1.430267
-    ## 185     0.50732119 1.4875834 1.5528063 1.0540777 2.023258
-    ## 186     0.08220284 0.8472007 0.9329303 0.3936085 1.364488
-    ## 187    -0.14959995 1.1075928 1.1743800 0.6395189 1.608793
-    ## 188    -0.35456980 2.4248448 2.5638131 1.9651833 2.940238
-    ## 189     0.13020008 1.1262731 1.1615642 0.6713762 1.639361
+    ##            Plant_Species Species_Code Plot Narea_mg_mm2 Narea_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1   0.01261440   1.261440
+    ## 4          Rubus caesius       Rubcae  DC1   0.01208978   1.208978
+    ## 8      Jacobaea vulgaris       Jacvul  DC2   0.01185197   1.185197
+    ## 11        Carex arenaria       Carare  DC3   0.02103830   2.103830
+    ## 14     Jacobaea vulgaris       Jacvul  DC3   0.01121247   1.121247
+    ## 19 Oenothera glazioviana       Oengla  DC4   0.01444293   1.444293
+    ##    PLSR_Predicted PLSR_Residuals      LCI      UCI       LPI      UPI
+    ## 1        1.340135     0.07869548 1.298260 1.346986 0.7916762 1.888595
+    ## 4        1.288026     0.07904830 1.262110 1.297939 0.7397937 1.836258
+    ## 8        1.155840    -0.02935675 1.113678 1.172006 0.6072413 1.704439
+    ## 11       2.014712    -0.08911757 1.936508 2.020049 1.4654399 2.563985
+    ## 14       1.328742     0.20749565 1.298485 1.333454 0.7804978 1.876987
+    ## 19       1.534162     0.08986811 1.522672 1.550848 0.9859820 2.082341
 
 ### Jackknife coefficient plot
 
@@ -863,7 +849,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,
 print(paste("Output directory: ", outdir))
 ```
 
-    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpJ6W1sB"
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpYxPllO"
 
 ``` r
 # Observed versus predicted
diff --git a/vignettes/reseco_leafN_plsr_example.pdf b/vignettes/reseco_leafN_plsr_example.pdf
index 2b2f343..7723e0b 100644
Binary files a/vignettes/reseco_leafN_plsr_example.pdf and b/vignettes/reseco_leafN_plsr_example.pdf differ
diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png
index 46da6e4..b2bff86 100644
Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ
diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index 1475940..6b2141a 100644
Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png
index e071cc6..fcb2b20 100644
Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png differ
diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png
index d19d327..9faeaef 100644
Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ
diff --git a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png
index ce44beb..0f05c9b 100644
Binary files a/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/reseco_leafN_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example.Rmd b/vignettes/reseco_lma_plsr_example.Rmd
index 2aa25b9..de2485a 100644
--- a/vignettes/reseco_lma_plsr_example.Rmd
+++ b/vignettes/reseco_lma_plsr_example.Rmd
@@ -21,8 +21,7 @@ This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how
 ### Getting Started
 ### Step 1. Load libraries needed to run example script
 ```{r, eval=TRUE, echo=TRUE}
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -205,12 +204,14 @@ maxComps <- 16
 iterations <- 50
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
diff --git a/vignettes/reseco_lma_plsr_example.md b/vignettes/reseco_lma_plsr_example.md
index 3015be4..35abdab 100644
--- a/vignettes/reseco_lma_plsr_example.md
+++ b/vignettes/reseco_lma_plsr_example.md
@@ -16,8 +16,7 @@ leaf-mass area (LMA)
 ### Step 1. Load libraries needed to run example script
 
 ``` r
-list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
-                      "spectratrait")
+list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 ```
 
@@ -82,7 +81,7 @@ output_dir <- "tempdir"
 
 ### Step 3. Set working directory (scratch space)
 
-    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpoqfeI6"
+    ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmpfxi2vB"
 
 ### Step 4. Pull example dataset from EcoSIS (ecosis.org)
 
@@ -118,38 +117,22 @@ dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 head(dat_raw)
 ```
 
-    ## # A tibble: 6 x 2,164
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 2,164
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 2,160 more variables:
     ## #   Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf phosphorus content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf potassium content per leaf area (mg/mm2) <dbl>,
-    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>,
-    ## #   species code <chr>, 350 <dbl>, 351 <dbl>, 352 <dbl>, 353 <dbl>, 354 <dbl>,
-    ## #   355 <dbl>, 356 <dbl>, 357 <dbl>, 358 <dbl>, 359 <dbl>, 360 <dbl>,
-    ## #   361 <dbl>, 362 <dbl>, 363 <dbl>, 364 <dbl>, 365 <dbl>, 366 <dbl>,
-    ## #   367 <dbl>, 368 <dbl>, 369 <dbl>, 370 <dbl>, 371 <dbl>, 372 <dbl>,
-    ## #   373 <dbl>, 374 <dbl>, 375 <dbl>, 376 <dbl>, 377 <dbl>, 378 <dbl>,
-    ## #   379 <dbl>, 380 <dbl>, 381 <dbl>, 382 <dbl>, 383 <dbl>, 384 <dbl>,
-    ## #   385 <dbl>, 386 <dbl>, 387 <dbl>, 388 <dbl>, 389 <dbl>, 390 <dbl>,
-    ## #   391 <dbl>, 392 <dbl>, 393 <dbl>, 394 <dbl>, 395 <dbl>, 396 <dbl>,
-    ## #   397 <dbl>, 398 <dbl>, 399 <dbl>, 400 <dbl>, 401 <dbl>, 402 <dbl>,
-    ## #   403 <dbl>, 404 <dbl>, 405 <dbl>, 406 <dbl>, 407 <dbl>, 408 <dbl>,
-    ## #   409 <dbl>, 410 <dbl>, 411 <dbl>, 412 <dbl>, 413 <dbl>, 414 <dbl>,
-    ## #   415 <dbl>, 416 <dbl>, 417 <dbl>, 418 <dbl>, 419 <dbl>, 420 <dbl>,
-    ## #   421 <dbl>, 422 <dbl>, 423 <dbl>, 424 <dbl>, 425 <dbl>, 426 <dbl>,
-    ## #   427 <dbl>, 428 <dbl>, 429 <dbl>, 430 <dbl>, 431 <dbl>, 432 <dbl>,
-    ## #   433 <dbl>, 434 <dbl>, 435 <dbl>, 436 <dbl>, 437 <dbl>, 438 <dbl>,
-    ## #   439 <dbl>, 440 <dbl>, …
+    ## #   Plant height vegetative (cm) <dbl>, ids <chr>, plot code <chr>, …
 
 ``` r
 names(dat_raw)[1:40]
@@ -209,15 +192,15 @@ sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 head(sample_info)
 ```
 
-    ## # A tibble: 6 x 13
-    ##   `Cw/EWT (cm3/cm2… `Latin Species`   `Leaf area (mm2… `Leaf calcium content pe…
-    ##               <dbl> <chr>                        <dbl>                     <dbl>
-    ## 1           0.00887 Arrhenatherum el…             696.                    0.0291
-    ## 2           0.00824 Bromus sterilis               447.                    0.0230
-    ## 3           0.0280  Jacobaea vulgaris            2418.                    0.0950
-    ## 4           0.0106  Rubus caesius                5719.                    0.0700
-    ## 5           0.00851 Arrhenatherum el…             671.                    0.0286
-    ## 6           0.0153  Crepis capillaris            1401.                    0.0470
+    ## # A tibble: 6 × 13
+    ##   `Cw/EWT (cm3/cm2)` `Latin Species`       `Leaf area (mm2)` `Leaf calcium cont…
+    ##                <dbl> <chr>                             <dbl>               <dbl>
+    ## 1            0.00887 Arrhenatherum elatius              696.              0.0291
+    ## 2            0.00824 Bromus sterilis                    447.              0.0230
+    ## 3            0.0280  Jacobaea vulgaris                 2418.              0.0950
+    ## 4            0.0106  Rubus caesius                     5719.              0.0700
+    ## 5            0.00851 Arrhenatherum elatius              671.              0.0286
+    ## 6            0.0153  Crepis capillaris                 1401.              0.0470
     ## # … with 9 more variables: Leaf magnesium content per leaf area (mg/mm2) <dbl>,
     ## #   Leaf mass per area (g/cm2) <dbl>,
     ## #   Leaf nitrogen content per leaf area (mg/mm2) <dbl>,
@@ -235,7 +218,7 @@ sample_info2 <- sample_info2 %>%
 head(sample_info2)
 ```
 
-    ## # A tibble: 6 x 5
+    ## # A tibble: 6 × 5
     ##   Plant_Species         Species_Code Plot  LMA_g_cm2 LMA_g_m2
     ##   <chr>                 <chr>        <chr>     <dbl>    <dbl>
     ## 1 Arrhenatherum elatius Arrela       DC1     0.00342     34.2
@@ -299,20 +282,20 @@ val.plsr.data <- split_data$val_data
 head(val.plsr.data)[1:8]
 ```
 
-    ##          Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2   Wave_500
-    ## 184  Jacobaea vulgaris       Jacvul  WC2 0.003551614  35.51614 0.06736887
-    ## 185 Potentilla reptans       Potrep  WC2 0.005586320  55.86320 0.07125000
-    ## 186      Rubus caesius       Rubcae  WC2 0.005803902  58.03902 0.05993560
-    ## 187      Urtica dioica       Urtdio  WC2 0.005215705  52.15705 0.06508300
-    ## 188 Ammophila arenaria       Ammare  WC3 0.018443757 184.43757 0.15175000
-    ## 189  Jacobaea vulgaris       Jacvul  WC3 0.004980002  49.80002 0.06805547
-    ##       Wave_501   Wave_502
-    ## 184 0.06870667 0.07014220
-    ## 185 0.07235000 0.07368350
-    ## 186 0.06162000 0.06352233
-    ## 187 0.06625000 0.06758350
-    ## 188 0.15275000 0.15415000
-    ## 189 0.06938000 0.07093553
+    ##            Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2 Wave_500
+    ## 1  Arrhenatherum elatius       Arrela  DC1 0.003420518  34.20518 0.070667
+    ## 2        Bromus sterilis       Broste  DC1 0.002816940  28.16940 0.105300
+    ## 5  Arrhenatherum elatius       Arrela  DC2 0.003611619  36.11619 0.076300
+    ## 6      Crepis capillaris       Creves  DC2 0.002828699  28.28699 0.062717
+    ## 11        Carex arenaria       Carare  DC3 0.010579908 105.79908 0.115885
+    ## 16      Elytrigia juncea       Elyjun  DC4 0.012400353 124.00353 0.116320
+    ##    Wave_501 Wave_502
+    ## 1   0.07160 0.072533
+    ## 2   0.10710 0.109030
+    ## 5   0.07670 0.077300
+    ## 6   0.06365 0.064850
+    ## 11  0.11705 0.118450
+    ## 16  0.11745 0.118850
 
 ``` r
 rm(split_data)
@@ -397,13 +380,13 @@ val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin%
 head(val.plsr.data)[1:5]
 ```
 
-    ##          Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2
-    ## 184  Jacobaea vulgaris       Jacvul  WC2 0.003551614  35.51614
-    ## 185 Potentilla reptans       Potrep  WC2 0.005586320  55.86320
-    ## 186      Rubus caesius       Rubcae  WC2 0.005803902  58.03902
-    ## 187      Urtica dioica       Urtdio  WC2 0.005215705  52.15705
-    ## 188 Ammophila arenaria       Ammare  WC3 0.018443757 184.43757
-    ## 189  Jacobaea vulgaris       Jacvul  WC3 0.004980002  49.80002
+    ##            Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2
+    ## 1  Arrhenatherum elatius       Arrela  DC1 0.003420518  34.20518
+    ## 2        Bromus sterilis       Broste  DC1 0.002816940  28.16940
+    ## 5  Arrhenatherum elatius       Arrela  DC2 0.003611619  36.11619
+    ## 6      Crepis capillaris       Creves  DC2 0.002828699  28.28699
+    ## 11        Carex arenaria       Carare  DC3 0.010579908 105.79908
+    ## 16      Elytrigia juncea       Elyjun  DC4 0.012400353 124.00353
 
 ### Step 9. Calibration and Validation spectra plot
 
@@ -458,18 +441,21 @@ maxComps <- 16
 iterations <- 50
 prop <- 0.70
 if (method=="pls") {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, seg=seg, 
                                                   random_seed=random_seed)
   print(paste0("*** Optimal number of components: ", nComps))
 } else {
-  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, 
+  nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
+                                                  method=method, 
                                                   maxComps=maxComps, iterations=iterations, 
                                                   seg=seg, prop=prop, 
                                                   random_seed=random_seed)
 }
 ```
 
+    ## [1] "*** Identifying optimal number of PLSR components ***"
     ## [1] "*** Running permutation test.  Please hang tight, this can take awhile ***"
     ## [1] "Options:"
     ## [1] "Max Components: 16 Iterations: 50 Data Proportion (percent): 70"
@@ -519,9 +505,9 @@ pls::RMSEP(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##       37.79        32.71        30.36        23.51        21.58        18.46  
+    ##       30.50        38.30        35.20        22.78        20.14        17.39  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
-    ##       15.89        15.44        15.52        15.19        15.14        13.68
+    ##       13.10        12.56        14.13        17.45        15.61        12.70
 
 ``` r
 plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
@@ -533,9 +519,9 @@ pls::R2(plsr.out, newdata = val.plsr.data)
 ```
 
     ## (Intercept)      1 comps      2 comps      3 comps      4 comps      5 comps  
-    ##    -0.06195      0.20461      0.31467      0.58911      0.65365      0.74649  
+    ##    -0.02137     -0.60981     -0.36001      0.43050      0.55467      0.66818  
     ##     6 comps      7 comps      8 comps      9 comps     10 comps     11 comps  
-    ##     0.81222      0.82276      0.82084      0.82841      0.82945      0.86090
+    ##     0.81156      0.82673      0.78088      0.66593      0.73244      0.82292
 
 ``` r
 plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
@@ -611,20 +597,20 @@ val.plsr.output <- val.plsr.output %>%
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2 0.003551614  35.51614       43.51586
-    ## 185 Potentilla reptans       Potrep  WC2 0.005586320  55.86320       61.41726
-    ## 186      Rubus caesius       Rubcae  WC2 0.005803902  58.03902       45.55789
-    ## 187      Urtica dioica       Urtdio  WC2 0.005215705  52.15705       46.65139
-    ## 188 Ammophila arenaria       Ammare  WC3 0.018443757 184.43757      147.08781
-    ## 189  Jacobaea vulgaris       Jacvul  WC3 0.004980002  49.80002       53.09532
-    ##     PLSR_Residuals
-    ## 184       7.999719
-    ## 185       5.554059
-    ## 186     -12.481126
-    ## 187      -5.505664
-    ## 188     -37.349758
-    ## 189       3.295298
+    ##            Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2 PLSR_Predicted
+    ## 1  Arrhenatherum elatius       Arrela  DC1 0.003420518  34.20518       36.09345
+    ## 2        Bromus sterilis       Broste  DC1 0.002816940  28.16940       42.52977
+    ## 5  Arrhenatherum elatius       Arrela  DC2 0.003611619  36.11619       21.87053
+    ## 6      Crepis capillaris       Creves  DC2 0.002828699  28.28699       20.66219
+    ## 11        Carex arenaria       Carare  DC3 0.010579908 105.79908       99.79501
+    ## 16      Elytrigia juncea       Elyjun  DC4 0.012400353 124.00353      105.16400
+    ##    PLSR_Residuals
+    ## 1        1.888268
+    ## 2       14.360370
+    ## 5      -14.245663
+    ## 6       -7.624796
+    ## 11      -6.004066
+    ## 16     -18.839527
 
 ``` r
 val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
@@ -686,7 +672,7 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo
 
     ## Warning: Removed 6 rows containing missing values (geom_point).
 
-    ## Warning: Removed 6 rows containing missing values (geom_point).
+    ## Warning: Removed 3 rows containing missing values (geom_point).
 
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
     ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
@@ -775,20 +761,20 @@ val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
 head(val.plsr.output)
 ```
 
-    ##          Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2 PLSR_Predicted
-    ## 184  Jacobaea vulgaris       Jacvul  WC2 0.003551614  35.51614       43.51586
-    ## 185 Potentilla reptans       Potrep  WC2 0.005586320  55.86320       61.41726
-    ## 186      Rubus caesius       Rubcae  WC2 0.005803902  58.03902       45.55789
-    ## 187      Urtica dioica       Urtdio  WC2 0.005215705  52.15705       46.65139
-    ## 188 Ammophila arenaria       Ammare  WC3 0.018443757 184.43757      147.08781
-    ## 189  Jacobaea vulgaris       Jacvul  WC3 0.004980002  49.80002       53.09532
-    ##     PLSR_Residuals       LCI       UCI       LPI       UPI
-    ## 184       7.999719  42.58086  44.15724  16.70642  70.32530
-    ## 185       5.554059  60.10507  62.52674  34.59536  88.23916
-    ## 186     -12.481126  44.66849  48.22967  18.70489  72.41090
-    ## 187      -5.505664  45.70375  47.84938  19.82512  73.47765
-    ## 188     -37.349758 145.09309 148.61694 120.18052 173.99510
-    ## 189       3.295298  52.40880  53.97806  26.28498  79.90565
+    ##            Plant_Species Species_Code Plot   LMA_g_cm2  LMA_g_m2 PLSR_Predicted
+    ## 1  Arrhenatherum elatius       Arrela  DC1 0.003420518  34.20518       36.09345
+    ## 2        Bromus sterilis       Broste  DC1 0.002816940  28.16940       42.52977
+    ## 5  Arrhenatherum elatius       Arrela  DC2 0.003611619  36.11619       21.87053
+    ## 6      Crepis capillaris       Creves  DC2 0.002828699  28.28699       20.66219
+    ## 11        Carex arenaria       Carare  DC3 0.010579908 105.79908       99.79501
+    ## 16      Elytrigia juncea       Elyjun  DC4 0.012400353 124.00353      105.16400
+    ##    PLSR_Residuals       LCI       UCI       LPI       UPI
+    ## 1        1.888268  35.22975  36.83681 11.182998  61.00390
+    ## 2       14.360370  41.61622  43.52851 17.617164  67.44238
+    ## 5      -14.245663  20.07042  23.96996 -3.085793  46.82685
+    ## 6       -7.624796  20.27384  21.15353 -4.234964  45.55935
+    ## 11      -6.004066  98.52166 100.58017 74.888636 124.70139
+    ## 16     -18.839527 104.18470 105.69273 80.260059 130.06795
 
 ``` r
 ### Permutation coefficient plot
@@ -897,7 +883,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,
 print(paste("Output directory: ", outdir))
 ```
 
-    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpoqfeI6"
+    ## [1] "Output directory:  /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmpfxi2vB"
 
 ``` r
 # Observed versus predicted
diff --git a/vignettes/reseco_lma_plsr_example.pdf b/vignettes/reseco_lma_plsr_example.pdf
index b600447..6df708b 100644
Binary files a/vignettes/reseco_lma_plsr_example.pdf and b/vignettes/reseco_lma_plsr_example.pdf differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png
index 5e9c7db..dc16377 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png
index 0aa5976..ab426cc 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png
index e4d311c..04cce02 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png
index 084496d..cfba8ed 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png differ
diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png
index c8de2b7..8261b84 100644
Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png differ