diff --git a/README.md b/README.md index 776caa3..754a0f4 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/721908 EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species
EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9
DOI: https://doi.org/doi:10.21232/C2GM2Z
+Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/736689
5) Canopy spectra to map foliar functional traits over NEON domains in eastern United States
Target variable: leaf nitrogen
diff --git a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R index bdd298f..aaaf4a2 100644 --- a/inst/scripts/spectra-trait_reseco_lma_plsr_example.R +++ b/inst/scripts/spectra-trait_reseco_lma_plsr_example.R @@ -121,8 +121,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% method <- "dplyr" #base/dplyr # base R - a bit slow # dplyr - much faster -split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075, - prop=0.8, group_variables="Species_Code") +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=7529075,prop=0.8, + group_variables="Species_Code") names(split_data) cal.plsr.data <- split_data$cal_data head(cal.plsr.data)[1:8] @@ -138,11 +139,13 @@ text_loc <- c(max(hist(cal.plsr.data[,paste0(inVar)])$counts), max(hist(cal.plsr.data[,paste0(inVar)])$mids)) cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", main = paste0("Calibration Histogram for ",inVar), - xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7)) + + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) + annotate("text", x=text_loc[2], y=text_loc[1], label= "1.",size=10) val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", main = paste0("Validation Histogram for ",inVar), - xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7)) + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, device="png", width = 30, height = 12, units = "cm", dpi = 300) @@ -152,6 +155,14 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the +# calibration (i.e. model training) and validation datasets. The data was split using the +# spectratrait::create_data_split() function using "Species_Code" as the group_variable and +# using a data split proportion per group of 80% to calibration and 20% to validation +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 8. ### Format PLSR data for model fitting @@ -181,6 +192,13 @@ par(mfrow=c(1,1)) #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S2. The resulting calibration and validation spectral reflectance distribution by +# wavelength. The spectra split was done at the same time as LMA, as described in +# Supplemental Figure S1. +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 10. ### Use permutation to determine the optimal number of components @@ -206,13 +224,26 @@ if (method=="pls") { maxComps=maxComps, iterations=iterations, seg=seg, prop=prop, random_seed=random_seed) } -print("*** Figure 3. Optimal PLSR component selection ***") +print("*** Figure S3. Optimal PLSR component selection ***") dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,"_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) dev.off(); #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S3. A key challenge in building robust and parsimonious PLSR models is determining the +# optimal number of PLSR components. A good definition is the minimum number of components that +# minimizes the PRESS statistic and where the next higher component doesn't produce a meaningful +# increase in model performance (i.e. lower PRESS). We provide three methods in the +# find_optimal_components() function to determine the optimal number of components statistically +# using the internal pls package jackknife method or our custom methods that are better in some +# conditions, including for large datasets. In this example we show "firstMin" option that +# selects the number of components corresponding to the first statistical minimum PRESS value +# (vertical broken blue line). +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 11. ### Fit final model - using leave-one-out cross validation @@ -242,6 +273,12 @@ par(opar) #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) and +# coefficient of determination (right) for the 0 to optimal number of components +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 12. ### PLSR fit observed vs. predicted plot data @@ -319,6 +356,14 @@ ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S5. The calibration model and independent validation scatter plot results for the example +# LMA PLSR model (top row). Also shown are the calibration model and validation PLSR +# residuals, where the calibration results are based on the internal model cross-validation +# and the validation residuals are the predicted minus observed values of LMA. +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 13. ### Generate Coefficient and VIP plots @@ -341,6 +386,12 @@ par(opar) #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S6. The calibration model PLSR regression coefficient (top) and variable importance of +# projection (bottom) plots +#--------------------------------------------------------------------------------------------------# + + #--------------------------------------------------------------------------------------------------# ### Step 14. ### Permutation analysis to derive uncertainty estimates @@ -386,6 +437,10 @@ dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.p height=2100, width=3800, res=340) dev.off(); +#--------------------------------------------------------------------------------------------------# +# Figure S7. The calibration model jackknife PLSR regression coefficients +#--------------------------------------------------------------------------------------------------# + # JK validation plot RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2)) pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100 @@ -413,9 +468,17 @@ dev.off(); #--------------------------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# Figure S8. Independent validation results for the LMA PLSR model with associated jackknife +# uncertainty estimate 95% prediction intervals for each estimate LMA value. The %RMSEP is the +# model prediction performance standardized to the percentage of the response range, in this case +# the range of LMA values +#--------------------------------------------------------------------------------------------------# + + #---------------- Output jackknife results --------------------------------------------------------# -### Step 15. -# JK Coefficents +### Step 15. Outputs the final PLSR model jackknife coefficients +# JK Coefficients out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1), Intercept=Jackknife_intercept,t(Jackknife_coef)) head(out.jk.coefs)[1:6] @@ -425,7 +488,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Jackkife_PLSR_Coeffi #---------------- Export Model Output -------------------------------------------------------------# -### Step 16. +### Step 16. Create and write all relevant PLSR model output to disk in .csv format print(paste("Output directory: ", getwd())) # Observed versus predicted diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.Rmd b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd new file mode 100644 index 0000000..6710d63 --- /dev/null +++ b/vignettes/ely_leafN_bootstrap_plsr_example.Rmd @@ -0,0 +1,428 @@ +--- +title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. +author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" +output: + github_document: default + html_notebook: default + pdf_document: default + html_document: + df_print: paged +params: + date: !r Sys.Date() +--- + +```{r setup, include=FALSE, echo=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +### Overview +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an +internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, +and fit a plsr model for leaf nitrogen content (Narea, g/m2) + +### Getting Started +### Load libraries +```{r, eval=TRUE, echo=TRUE} +list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", + "spectratrait") +invisible(lapply(list.of.packages, library, character.only = TRUE)) +``` + +### Setup other functions and options +```{r, echo=TRUE} +### Setup other functions and options +# not in +`%notin%` <- Negate(`%in%`) + +# Script options +pls::pls.options(plsralg = "oscorespls") +pls::pls.options("plsralg") + +# Default par options +opar <- par(no.readonly = T) + +# Specify output directory, output_dir +# Options: +# tempdir - use a OS-specified temporary directory +# user defined PATH - e.g. "~/scratch/PLSR" +output_dir <- "tempdir" +``` + +### Load internal Ely et al 2019 dataset +```{r, echo=TRUE} +data("ely_plsr_data") +head(ely_plsr_data)[,1:8] + +# What is the target variable? +inVar <- "N_g_m2" +``` + +### Set working directory (scratch space) +```{r, echo=FALSE} +if (output_dir=="tempdir") { + outdir <- tempdir() +} else { + if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) + outdir <- file.path(path.expand(output_dir)) +} +setwd(outdir) # set working directory +getwd() # check wd +``` + +### Full PLSR dataset +```{r, echo=TRUE} +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +head(plsr_data)[,1:6] +``` +### Create cal/val datasets +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +### Create cal/val datasets +## Make a stratified random sampling in the strata USDA_Species_Code and Domain + +method <- "base" #base/dplyr +# base R - a bit slow +# dplyr - much faster +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=23452135, prop=0.7, + group_variables="Species_Code") +names(split_data) +cal.plsr.data <- split_data$cal_data +head(cal.plsr.data)[1:8] +val.plsr.data <- split_data$val_data +head(val.plsr.data)[1:8] +rm(split_data) + +# Datasets: +print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) +print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) + +cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Cal. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Val. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, + device="png", width = 30, + height = 12, units = "cm", + dpi = 300) +# output cal/val data +write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), + row.names=FALSE) +write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), + row.names=FALSE) +``` + +### Create calibration and validation PLSR datasets +```{r, echo=TRUE} +### Format PLSR data for model fitting +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(cal_spec)) +head(cal.plsr.data)[1:5] + +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(val_spec)) +head(val.plsr.data)[1:5] +``` + +### plot cal and val spectra +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +par(mfrow=c(1,2)) # B, L, T, R +spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") +spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") + +dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), + height=2500,width=4900, res=340) +dev.off(); +par(mfrow=c(1,1)) +``` + +### Use permutation to determine optimal number of components +```{r, fig.height = 6, fig.width = 10, echo=TRUE} +### Use permutation to determine the optimal number of components +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel = NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +method <- "pls" #pls, firstPlateau, firstMin +random_seed <- 1245565 +seg <- 50 +maxComps <- 16 +iterations <- 80 +prop <- 0.70 +if (method=="pls") { + # pls package approach - faster but estimates more components.... + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + maxComps=maxComps, seg=seg, + random_seed=random_seed) + print(paste0("*** Optimal number of components: ", nComps)) +} else { + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + maxComps=maxComps, iterations=iterations, + seg=seg, prop=prop, + random_seed=random_seed) +} +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), + height=2800, width=3400, res=340) +dev.off(); +``` + +### Fit final model +```{r, fig.height = 5, fig.width = 12, echo=TRUE} +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", + trace=FALSE,data=cal.plsr.data) +fit <- plsr.out$fitted.values[,1,nComps] +pls.options(parallel = NULL) + +# External validation fit stats +par(mfrow=c(1,2)) # B, L, T, R +pls::RMSEP(plsr.out, newdata = val.plsr.data) +plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", + xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) + +pls::R2(plsr.out, newdata = val.plsr.data) +plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", + xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), + height=2800, width=4800, res=340) +dev.off(); +par(opar) +``` + +### PLSR fit observed vs. predicted plot data +```{r, fig.height = 15, fig.width = 15, echo=TRUE} +#calibration +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], + PLSR_Predicted=fit, + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) +cal.plsr.output <- cal.plsr.output %>% + mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) +head(cal.plsr.output) +cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2) +cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) + +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], + PLSR_Predicted=as.vector(predict(plsr.out, + newdata = val.plsr.data, + ncomp=nComps, type="response")[,,1])) +val.plsr.output <- val.plsr.output %>% + mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) +head(val.plsr.output) +val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data)[[1]][nComps],2) +val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) + +rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) +cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", + cal.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) +val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", + val.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +# plot cal/val side-by-side +scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, + val_resid_histogram, nrow=2,ncol=2) +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), + plot = scatterplots, device="png", + width = 32, + height = 30, units = "cm", + dpi = 300) +``` + +### Generate Coefficient and VIP plots +```{r, fig.height = 9, fig.width = 10, echo=TRUE} +vips <- spectratrait::VIP(plsr.out)[nComps,] +par(mfrow=c(2,1)) +plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", + ylab="Regression coefficients",legendpos = "bottomright", + ncomp=nComps,lwd=2) +box(lwd=2.2) +plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) +lines(seq(Start.wave,End.wave,1),vips,lwd=3) +abline(h=0.8,lty=2,col="dark grey") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), + height=3100, width=4100, res=340) +dev.off(); +``` + +### Bootstrap validation +```{r, echo=TRUE} +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel =NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +### PLSR bootstrap permutation uncertainty analysis +iterations <- 500 # how many permutation iterations to run +prop <- 0.70 # fraction of training data to keep for each iteration +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, + iterations=iterations, prop=prop, + verbose = FALSE) +bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] +bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), + ,nComps] +rm(plsr_permutation) + +# apply coefficients to left-out validation data +interval <- c(0.025,0.975) +Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + + matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, + ncol=length(bootstrap_intercept)) +Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, + probs=c(interval[1], interval[2])) +sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) +sd_res <- sd(val.plsr.output$PLSR_Residuals) +sd_tot <- sqrt(sd_mean^2+sd_res^2) +val.plsr.output$LCI <- Interval_Conf[1,] +val.plsr.output$UCI <- Interval_Conf[2,] +val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot +val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot +head(val.plsr.output) +``` + +### Jackknife coefficient plot +```{r, fig.height = 6, fig.width = 10, echo=TRUE} +# Bootstrap regression coefficient plot +spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, + plot_label="Bootstrap regression coefficients",position = 'bottomleft') +abline(h=0,lty=2,col="grey50") +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), + height=2100, width=3800, res=340) +dev.off(); +``` + +### Bootstrap validation plot +```{r, fig.height = 7, fig.width = 8, echo=TRUE} +RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2)) +pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100 +r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data)$val[nComps+1],2) +expr <- vector("expression", 3) +expr[[1]] <- bquote(R^2==.(r2)) +expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) +expr[[3]] <- bquote("%RMSEP"==.(round(pecr_RMSEP,2))) +rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) +par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8) +abline(0,1,lty=2,lw=2) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8, add=T) +legend("topleft", legend=expr, bty="n", cex=1.5) +box(lwd=2.2) +dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), + height=2800, width=3200, res=340) +dev.off(); +``` + +### Output bootstrap results +```{r, echo=TRUE} +# Bootstrap Coefficients +out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), + Intercept=bootstrap_intercept,t(bootstrap_coef)) +names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) +head(out.jk.coefs)[1:6] +write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, + '_Bootstrap_PLSR_Coefficients.csv')), + row.names=FALSE) +``` + +### Create core PLSR outputs +```{r, echo=TRUE} +print(paste("Output directory: ", outdir)) + +# Observed versus predicted +write.csv(cal.plsr.output,file=file.path(outdir, + paste0(inVar,'_Observed_PLSR_CV_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Validation data +write.csv(val.plsr.output,file=file.path(outdir, + paste0(inVar,'_Validation_PLSR_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Model coefficients +coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) +write.csv(coefs,file=file.path(outdir, + paste0(inVar,'_PLSR_Coefficients_', + nComps,'comp.csv')), + row.names=TRUE) + +# PLSR VIP +write.csv(vips,file=file.path(outdir, + paste0(inVar,'_PLSR_VIPs_', + nComps,'comp.csv'))) +``` + +### Confirm files were written to temp space +```{r, echo=TRUE} +print("**** PLSR output files: ") +print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) +``` diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.md b/vignettes/ely_leafN_bootstrap_plsr_example.md new file mode 100644 index 0000000..24ed1fb --- /dev/null +++ b/vignettes/ely_leafN_bootstrap_plsr_example.md @@ -0,0 +1,803 @@ +Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen +content (Narea, g/m2) data from eight different crop species growing in +a glasshouse at Brookhaven National Laboratory. +================ +Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson + +### Overview + +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to +illustrate how to load an internal dataset (“ely\_plsr\_data”), choose +the “optimal” number of plsr components, and fit a plsr model for leaf +nitrogen content (Narea, g/m2) + +### Getting Started + +### Load libraries + +``` r +list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", + "spectratrait") +invisible(lapply(list.of.packages, library, character.only = TRUE)) +``` + + ## + ## Attaching package: 'pls' + + ## The following object is masked from 'package:stats': + ## + ## loadings + + ## + ## Attaching package: 'dplyr' + + ## The following objects are masked from 'package:stats': + ## + ## filter, lag + + ## The following objects are masked from 'package:base': + ## + ## intersect, setdiff, setequal, union + + ## here() starts at /Users/sserbin/Data/GitHub/PLSR_for_plant_trait_prediction + + ## + ## Attaching package: 'gridExtra' + + ## The following object is masked from 'package:dplyr': + ## + ## combine + +### Setup other functions and options + +``` r +### Setup other functions and options +# not in +`%notin%` <- Negate(`%in%`) + +# Script options +pls::pls.options(plsralg = "oscorespls") +pls::pls.options("plsralg") +``` + + ## $plsralg + ## [1] "oscorespls" + +``` r +# Default par options +opar <- par(no.readonly = T) + +# Specify output directory, output_dir +# Options: +# tempdir - use a OS-specified temporary directory +# user defined PATH - e.g. "~/scratch/PLSR" +output_dir <- "tempdir" +``` + +### Load internal Ely et al 2019 dataset + +``` r +data("ely_plsr_data") +head(ely_plsr_data)[,1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 3 HEAN3 common sunflower 7.70 15.02495 156.95 35.08 1.764752 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 5 CUSA4 garden cucumber 7.47 11.60735 123.58 26.71 1.411361 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## Wave_500 + ## 1 4.782000 + ## 2 4.341714 + ## 3 4.502857 + ## 4 3.333429 + ## 5 3.313571 + ## 6 3.272286 + +``` r +# What is the target variable? +inVar <- "N_g_m2" +``` + +### Set working directory (scratch space) + + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpSup6Vk" + +### Full PLSR dataset + +``` r +Start.wave <- 500 +End.wave <- 2400 +wv <- seq(Start.wave,End.wave,1) +plsr_data <- ely_plsr_data +head(plsr_data)[,1:6] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 + ## 3 HEAN3 common sunflower 7.70 15.02495 156.95 35.08 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 + ## 5 CUSA4 garden cucumber 7.47 11.60735 123.58 26.71 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 + +### Create cal/val datasets + +``` r +### Create cal/val datasets +## Make a stratified random sampling in the strata USDA_Species_Code and Domain + +method <- "base" #base/dplyr +# base R - a bit slow +# dplyr - much faster +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=23452135, prop=0.7, + group_variables="Species_Code") +``` + + ## HEAN3 Cal: 70% + + ## CUSA4 Cal: 68.1818181818182% + + ## CUPE Cal: 70.5882352941177% + + ## SOLYL Cal: 70% + + ## OCBA Cal: 68.4210526315789% + + ## POPUL Cal: 71.4285714285714% + + ## GLMA4 Cal: 70.5882352941177% + + ## PHVU Cal: 66.6666666666667% + +``` r +names(split_data) +``` + + ## [1] "cal_data" "val_data" + +``` r +cal.plsr.data <- split_data$cal_data +head(cal.plsr.data)[1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 25.83 1.215333 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 27.40 1.304110 + ## Wave_500 + ## 1 4.782000 + ## 2 4.341714 + ## 4 3.333429 + ## 6 3.272286 + ## 7 2.943143 + ## 10 4.145714 + +``` r +val.plsr.data <- split_data$val_data +head(val.plsr.data)[1:8] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## Wave_500 + ## 3 4.502857 + ## 5 3.313571 + ## 8 2.868000 + ## 9 3.338286 + ## 13 3.960286 + ## 15 3.744000 + +``` r +rm(split_data) + +# Datasets: +print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) +``` + + ## [1] "Cal observations: 124" + +``` r +print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) +``` + + ## [1] "Val observations: 54" + +``` r +cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Cal. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", + main = paste0("Val. Histogram for ",inVar), + xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), + alpha=I(.7)) +histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) +``` + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png) + +``` r +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, + device="png", width = 30, + height = 12, units = "cm", + dpi = 300) +# output cal/val data +write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), + row.names=FALSE) +write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), + row.names=FALSE) +``` + +### Create calibration and validation PLSR datasets + +``` r +### Format PLSR data for model fitting +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(cal_spec)) +head(cal.plsr.data)[1:5] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 + +``` r +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], + Spectra=I(val_spec)) +head(val.plsr.data)[1:5] +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 + +### plot cal and val spectra + +``` r +par(mfrow=c(1,2)) # B, L, T, R +spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") +spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") +``` + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), + height=2500,width=4900, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +``` r +par(mfrow=c(1,1)) +``` + +### Use permutation to determine optimal number of components + +``` r +### Use permutation to determine the optimal number of components +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel = NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +method <- "pls" #pls, firstPlateau, firstMin +random_seed <- 1245565 +seg <- 50 +maxComps <- 16 +iterations <- 80 +prop <- 0.70 +if (method=="pls") { + # pls package approach - faster but estimates more components.... + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + maxComps=maxComps, seg=seg, + random_seed=random_seed) + print(paste0("*** Optimal number of components: ", nComps)) +} else { + nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, method=method, + maxComps=maxComps, iterations=iterations, + seg=seg, prop=prop, + random_seed=random_seed) +} +``` + + ## [1] "*** Running PLS permutation test ***" + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png) + + ## [1] "*** Optimal number of components: 13" + +``` r +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), + height=2800, width=3400, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Fit final model + +``` r +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", + trace=FALSE,data=cal.plsr.data) +fit <- plsr.out$fitted.values[,1,nComps] +pls.options(parallel = NULL) + +# External validation fit stats +par(mfrow=c(1,2)) # B, L, T, R +pls::RMSEP(plsr.out, newdata = val.plsr.data) +``` + + ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps + ## 0.5908 0.4735 0.4162 0.4037 0.3347 0.3023 + ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps + ## 0.2993 0.3081 0.2814 0.2445 0.2276 0.2104 + ## 12 comps 13 comps + ## 0.1954 0.2003 + +``` r +plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", + xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) + +pls::R2(plsr.out, newdata = val.plsr.data) +``` + + ## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps + ## -0.004079 0.355010 0.501632 0.531088 0.677620 0.737143 + ## 6 comps 7 comps 8 comps 9 comps 10 comps 11 comps + ## 0.742224 0.726835 0.772115 0.827942 0.850962 0.872685 + ## 12 comps 13 comps + ## 0.890124 0.884529 + +``` r +plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", + xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), + height=2800, width=4800, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +``` r +par(opar) +``` + +### PLSR fit observed vs. predicted plot data + +``` r +#calibration +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], + PLSR_Predicted=fit, + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) +cal.plsr.output <- cal.plsr.output %>% + mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) +head(cal.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 1 HEAN3 common sunflower 7.58 15.61210 167.63 36.40 2.103694 + ## 2 HEAN3 common sunflower 8.33 14.73724 164.68 34.65 1.231713 + ## 4 CUSA4 garden cucumber 7.40 11.14835 111.52 26.23 1.287963 + ## 6 CUSA4 garden cucumber 7.43 8.06035 114.36 18.40 1.117704 + ## 7 CUPE field pumpkin 7.20 11.43007 128.42 25.83 1.215333 + ## 10 SOLYL garden tomato 7.89 11.61918 142.23 27.40 1.304110 + ## CalVal PLSR_Predicted PLSR_CV_Predicted PLSR_CV_Residuals + ## 1 Cal 1.820666 1.702501 -0.40119317 + ## 2 Cal 1.609632 1.711772 0.48005882 + ## 4 Cal 1.364985 1.275526 -0.01243687 + ## 6 Cal 1.126062 1.060119 -0.05758587 + ## 7 Cal 1.227538 1.226708 0.01137583 + ## 10 Cal 1.358638 1.365181 0.06107105 + +``` r +cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2) +cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) + +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], + PLSR_Predicted=as.vector(predict(plsr.out, + newdata = val.plsr.data, + ncomp=nComps, type="response")[,,1])) +val.plsr.output <- val.plsr.output %>% + mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) +head(val.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## CalVal PLSR_Predicted PLSR_Residuals + ## 3 Val 1.7125176 -0.052233917 + ## 5 Val 1.4618447 0.050483171 + ## 8 Val 1.0951891 -0.051652168 + ## 9 Val 1.2152379 0.076220509 + ## 13 Val 0.7992342 -0.149119020 + ## 15 Val 1.1267054 0.002059572 + +``` r +val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data)[[1]][nComps],2) +val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) + +rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) +cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", + cal.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) +val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", + linetype="dashed", size=1.5) + xlim(rng_quant[1], + rng_quant[2]) + + ylim(rng_quant[1], rng_quant[2]) + + labs(x=paste0("Predicted ", paste(inVar), " (units)"), + y=paste0("Observed ", paste(inVar), " (units)"), + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", + val.RMSEP))) + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + + geom_histogram(alpha=.5, position="identity") + + geom_vline(xintercept = 0, color="black", + linetype="dashed", size=1) + theme_bw() + + theme(axis.text=element_text(size=18), legend.position="none", + axis.title=element_text(size=20, face="bold"), + axis.text.x = element_text(angle = 0,vjust = 0.5), + panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) + +# plot cal/val side-by-side +scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, + val_resid_histogram, nrow=2,ncol=2) +``` + + ## Warning: Removed 3 rows containing missing values (geom_point). + + ## Warning: Removed 3 rows containing missing values (geom_point). + + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png) + +``` r +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), + plot = scatterplots, device="png", + width = 32, + height = 30, units = "cm", + dpi = 300) +``` + +### Generate Coefficient and VIP plots + +``` r +vips <- spectratrait::VIP(plsr.out)[nComps,] +par(mfrow=c(2,1)) +plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", + ylab="Regression coefficients",legendpos = "bottomright", + ncomp=nComps,lwd=2) +box(lwd=2.2) +plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) +lines(seq(Start.wave,End.wave,1),vips,lwd=3) +abline(h=0.8,lty=2,col="dark grey") +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), + height=3100, width=4100, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Bootstrap validation + +``` r +if(grepl("Windows", sessionInfo()$running)){ + pls.options(parallel =NULL) +} else { + pls.options(parallel = parallel::detectCores()-1) +} + +### PLSR bootstrap permutation uncertainty analysis +iterations <- 500 # how many permutation iterations to run +prop <- 0.70 # fraction of training data to keep for each iteration +plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, maxComps=nComps, + iterations=iterations, prop=prop, + verbose = FALSE) +``` + + ## [1] "*** Running permutation test. Please hang tight, this can take awhile ***" + ## [1] "Options:" + ## [1] "Max Components: 13 Iterations: 500 Data Proportion (percent): 70" + ## [1] "*** Providing PRESS and coefficient array output ***" + +``` r +bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] +bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), + ,nComps] +rm(plsr_permutation) + +# apply coefficients to left-out validation data +interval <- c(0.025,0.975) +Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + + matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, + ncol=length(bootstrap_intercept)) +Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, + probs=c(interval[1], interval[2])) +sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) +sd_res <- sd(val.plsr.output$PLSR_Residuals) +sd_tot <- sqrt(sd_mean^2+sd_res^2) +val.plsr.output$LCI <- Interval_Conf[1,] +val.plsr.output$UCI <- Interval_Conf[2,] +val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot +val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot +head(val.plsr.output) +``` + + ## Species_Code Common_Name C_N_mass C_g_m2 H20_g_m2 LMA_g_m2 N_g_m2 + ## 3 HEAN3 common sunflower 7.70 15.024947 156.95 35.08 1.7647515 + ## 5 CUSA4 garden cucumber 7.47 11.607347 123.58 26.71 1.4113615 + ## 8 CUPE field pumpkin 7.67 12.466238 124.67 29.22 1.1468413 + ## 9 CUPE field pumpkin 7.64 17.100448 142.85 43.39 1.1390174 + ## 13 SOLYL garden tomato 7.73 7.938866 129.95 17.96 0.9483533 + ## 15 OCBA sweet basil 8.13 16.975969 173.30 38.65 1.1246459 + ## CalVal PLSR_Predicted PLSR_Residuals LCI UCI LPI UPI + ## 3 Val 1.7125176 -0.052233917 1.5070086 1.8760564 1.2810247 2.144011 + ## 5 Val 1.4618447 0.050483171 1.2909822 1.5475356 1.0541359 1.869553 + ## 8 Val 1.0951891 -0.051652168 0.9595488 1.2335912 0.6846083 1.505770 + ## 9 Val 1.2152379 0.076220509 1.0746965 1.3367675 0.8068229 1.623653 + ## 13 Val 0.7992342 -0.149119020 0.6820207 0.9451323 0.3899050 1.208563 + ## 15 Val 1.1267054 0.002059572 1.0316572 1.2737521 0.7209233 1.532488 + +### Jackknife coefficient plot + +``` r +# Bootstrap regression coefficient plot +spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, + plot_label="Bootstrap regression coefficients",position = 'bottomleft') +abline(h=0,lty=2,col="grey50") +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), + height=2100, width=3800, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Bootstrap validation plot + +``` r +RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2)) +pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100 +r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data)$val[nComps+1],2) +expr <- vector("expression", 3) +expr[[1]] <- bquote(R^2==.(r2)) +expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) +expr[[3]] <- bquote("%RMSEP"==.(round(pecr_RMSEP,2))) +rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) +par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8) +abline(0,1,lty=2,lw=2) +plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], + li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, + lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), + err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", + cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), + ylab=paste0("Observed ", paste(inVar), " (units)"), + cex.axis=1.5,cex.lab=1.8, add=T) +legend("topleft", legend=expr, bty="n", cex=1.5) +box(lwd=2.2) +``` + +![](ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png) + +``` r +dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), + height=2800, width=3200, res=340) +``` + + ## quartz_off_screen + ## 3 + +``` r +dev.off(); +``` + + ## quartz_off_screen + ## 2 + +### Output bootstrap results + +``` r +# Bootstrap Coefficients +out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), + Intercept=bootstrap_intercept,t(bootstrap_coef)) +names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) +head(out.jk.coefs)[1:6] +``` + + ## Iteration Intercept Wave_500 Wave_501 Wave_502 Wave_503 + ## 1 1 -0.6617899 -0.0067918917 -0.006451152 -0.005571355 -0.004909648 + ## 2 2 -0.4636504 -0.0040384348 -0.001804902 0.001375426 0.002477500 + ## 3 3 -0.8146267 0.0031055624 0.003529288 0.005078394 0.005883173 + ## 4 4 0.7030872 0.0003883207 0.002887701 0.003961071 0.003223096 + ## 5 5 0.4765138 0.0045652557 0.005822813 0.005979498 0.004861387 + ## 6 6 0.4146289 0.0085296345 0.009692141 0.010451131 0.009300204 + +``` r +write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, + '_Bootstrap_PLSR_Coefficients.csv')), + row.names=FALSE) +``` + +### Create core PLSR outputs + +``` r +print(paste("Output directory: ", outdir)) +``` + + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpSup6Vk" + +``` r +# Observed versus predicted +write.csv(cal.plsr.output,file=file.path(outdir, + paste0(inVar,'_Observed_PLSR_CV_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Validation data +write.csv(val.plsr.output,file=file.path(outdir, + paste0(inVar,'_Validation_PLSR_Pred_', + nComps,'comp.csv')), + row.names=FALSE) + +# Model coefficients +coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) +write.csv(coefs,file=file.path(outdir, + paste0(inVar,'_PLSR_Coefficients_', + nComps,'comp.csv')), + row.names=TRUE) + +# PLSR VIP +write.csv(vips,file=file.path(outdir, + paste0(inVar,'_PLSR_VIPs_', + nComps,'comp.csv'))) +``` + +### Confirm files were written to temp space + +``` r +print("**** PLSR output files: ") +``` + + ## [1] "**** PLSR output files: " + +``` r +print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) +``` + + ## [1] "N_g_m2_Bootstrap_PLSR_Coefficients.csv" + ## [2] "N_g_m2_Bootstrap_Regression_Coefficients.png" + ## [3] "N_g_m2_Cal_PLSR_Dataset.csv" + ## [4] "N_g_m2_Cal_Val_Histograms.png" + ## [5] "N_g_m2_Cal_Val_Scatterplots.png" + ## [6] "N_g_m2_Cal_Val_Spectra.png" + ## [7] "N_g_m2_Coefficient_VIP_plot.png" + ## [8] "N_g_m2_Observed_PLSR_CV_Pred_13comp.csv" + ## [9] "N_g_m2_PLSR_Coefficients_13comp.csv" + ## [10] "N_g_m2_PLSR_Component_Selection.png" + ## [11] "N_g_m2_PLSR_Validation_Scatterplot.png" + ## [12] "N_g_m2_PLSR_VIPs_13comp.csv" + ## [13] "N_g_m2_Val_PLSR_Dataset.csv" + ## [14] "N_g_m2_Validation_PLSR_Pred_13comp.csv" + ## [15] "N_g_m2_Validation_RMSEP_R2_by_Component.png" diff --git a/vignettes/ely_leafN_bootstrap_plsr_example.pdf b/vignettes/ely_leafN_bootstrap_plsr_example.pdf new file mode 100644 index 0000000..5e09ade Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example.pdf differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png new file mode 100644 index 0000000..33f1435 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png new file mode 100644 index 0000000..5bce084 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png new file mode 100644 index 0000000..cdc8cab Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png new file mode 100644 index 0000000..a61b13f Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-14-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png new file mode 100644 index 0000000..ca2deef Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png new file mode 100644 index 0000000..6d79827 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-6-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png new file mode 100644 index 0000000..4452988 Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-8-1.png differ diff --git a/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png new file mode 100644 index 0000000..9e5c2df Binary files /dev/null and b/vignettes/ely_leafN_bootstrap_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/reseco_lma_plsr_example.Rmd b/vignettes/reseco_lma_plsr_example.Rmd index 738e25e..6c79b03 100644 --- a/vignettes/reseco_lma_plsr_example.Rmd +++ b/vignettes/reseco_lma_plsr_example.Rmd @@ -4,9 +4,9 @@ author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" output: github_document: default html_notebook: default - pdf_document: default html_document: df_print: paged + pdf_document: default params: date: !r Sys.Date() --- @@ -19,7 +19,7 @@ knitr::opts_chunk$set(echo = TRUE) This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to retrieve a dataset from the EcoSIS spectral database, choose the "optimal" number of plsr components, and fit a plsr model for leaf-mass area (LMA) ### Getting Started -### Step 1. Load libraries to run example script +### Step 1. Load libraries needed to run example script ```{r, eval=TRUE, echo=TRUE} list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", "spectratrait") @@ -109,8 +109,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% method <- "dplyr" #base/dplyr # base R - a bit slow # dplyr - much faster -split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075, - prop=0.8, group_variables="Species_Code") +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=7529075, prop=0.8, + group_variables="Species_Code") names(split_data) cal.plsr.data <- split_data$cal_data head(cal.plsr.data)[1:8] @@ -134,8 +135,17 @@ val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram", xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"), alpha=I(.7)) histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) -ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, - device="png", width = 30, height = 12, units = "cm", dpi = 300) + +# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the +# calibration (i.e. model training) and validation datasets. The data was split using +# the spectratrait::create_data_split() function using "Species_Code" as the +# group_variable and using a data split proportion per group of 80% to calibration +# and 20% to validation +``` +```{r, echo=TRUE} +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, device="png", width = 30, height = 12, units = "cm", + dpi = 300) # output cal/val data write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), row.names=FALSE) @@ -146,13 +156,17 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs ### Step 8. Create calibration and validation PLSR datasets ```{r, echo=TRUE} ### Format PLSR data for model fitting -cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) -cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% + paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% + paste0("Wave_",wv))], Spectra=I(cal_spec)) head(cal.plsr.data)[1:5] -val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) -val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% + paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% + paste0("Wave_",wv))], Spectra=I(val_spec)) head(val.plsr.data)[1:5] ``` @@ -166,12 +180,15 @@ text(550,95,labels = "2.",cex=3) spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv, plot_label="Validation") +# Figure S2. The resulting calibration and validation spectral reflectance distribution by +# wavelength. The spectra split was done at the same time as LMA, as described in +# Supplemental Figure S1. + dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), height=2500,width=4900, res=340) dev.off(); par(mfrow=c(1,1)) ``` - ### Step 10. Use permutation to determine the optimal number of components ```{r, fig.height = 6, fig.width = 10, echo=TRUE} ### Use permutation to determine the optimal number of components @@ -198,7 +215,11 @@ if (method=="pls") { seg=seg, prop=prop, random_seed=random_seed) } -print("*** Figure 3. Optimal PLSR component selection ***") +# Figure S3. Selection of the optimal number of components based on the +# minimization of the PRESS statistic. In this example we show "firstMin" +# option that selects the number of components corresponding to the first +# statistical minimum PRESS value (vertical broken blue line). + dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar, "_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) @@ -208,8 +229,8 @@ dev.off(); ### Step 11. Fit final model ```{r, fig.height = 5, fig.width = 12, echo=TRUE} ### Fit final model - using leave-one-out cross validation -plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", - trace=FALSE,data=cal.plsr.data) +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps, + validation="LOO",trace=FALSE,data=cal.plsr.data) fit <- plsr.out$fitted.values[,1,nComps] pls.options(parallel = NULL) @@ -227,6 +248,9 @@ pls::R2(plsr.out, newdata = val.plsr.data) plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) box(lwd=2.2) +# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) +# and coefficient of determination (right) for the 0 to optimal number of components + dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), height=2800, width=4800, res=340) dev.off(); @@ -236,19 +260,23 @@ par(opar) ### Step 12. PLSR fit observed vs. predicted plot data ```{r, fig.height = 15, fig.width = 15, echo=TRUE} #calibration -cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% + "Spectra")], PLSR_Predicted=fit, - PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,, + nComps])) cal.plsr.output <- cal.plsr.output %>% mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) head(cal.plsr.output) cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2) cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) -val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% + "Spectra")], PLSR_Predicted=as.vector(predict(plsr.out, newdata = val.plsr.data, - ncomp=nComps, type="response")[,,1])) + ncomp=nComps, + type="response")[,,1])) val.plsr.output <- val.plsr.output %>% mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) head(val.plsr.output) @@ -258,13 +286,13 @@ val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", - linetype="dashed", size=1.5) + xlim(rng_quant[1], - rng_quant[2]) + + linetype="dashed", size=1.5) + + xlim(rng_quant[1], rng_quant[2]) + ylim(rng_quant[1], rng_quant[2]) + labs(x=paste0("Predicted ", paste(inVar), " (units)"), y=paste0("Observed ", paste(inVar), " (units)"), - title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", - cal.RMSEP))) + + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", + paste0("RMSEP = ", cal.RMSEP))) + theme(axis.text=element_text(size=18), legend.position="none", axis.title=element_text(size=20, face="bold"), axis.text.x = element_text(angle = 0,vjust = 0.5), @@ -283,13 +311,13 @@ cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", - linetype="dashed", size=1.5) + xlim(rng_quant[1], - rng_quant[2]) + + linetype="dashed", size=1.5) + + xlim(rng_quant[1], rng_quant[2]) + ylim(rng_quant[1], rng_quant[2]) + labs(x=paste0("Predicted ", paste(inVar), " (units)"), y=paste0("Observed ", paste(inVar), " (units)"), - title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", - val.RMSEP))) + + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", + paste0("RMSEP = ", val.RMSEP))) + theme(axis.text=element_text(size=18), legend.position="none", axis.title=element_text(size=20, face="bold"), axis.text.x = element_text(angle = 0,vjust = 0.5), @@ -307,6 +335,13 @@ val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + # plot cal/val side-by-side scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, val_resid_histogram, nrow=2, ncol=2) +# Figure S5. The calibration model and independent validation scatter plot results for +# the example LMA PLSR model (top row). Also shown are the calibration model and +# validation PLSR residuals, where the calibration results are based on the internal +# model cross-validation and the validation residuals are the predicted minus observed +# values of LMA. +``` +```{r, echo=FALSE} ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), plot = scatterplots, device="png", width = 32, height = 30, units = "cm", dpi = 300) @@ -326,6 +361,9 @@ plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) lines(seq(Start.wave,End.wave,1),vips,lwd=3) abline(h=0.8,lty=2,col="dark grey") box(lwd=2.2) +# Figure S6. The calibration model PLSR regression coefficient (top) and variable +# importance of projection (bottom) plots + dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), height=3100, width=4100, res=340) dev.off(); @@ -345,8 +383,9 @@ jk.plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, data=cal.plsr.data) pls.options(parallel = NULL) -Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, data_plsr = cal.plsr.data, - ncomp = nComps, inVar=inVar) +Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, + data_plsr = cal.plsr.data, + ncomp = nComps, inVar=inVar) Jackknife_intercept <- Jackknife_coef[1,,,] Jackknife_coef <- Jackknife_coef[2:dim(Jackknife_coef)[1],,,] @@ -367,18 +406,22 @@ head(val.plsr.output) ``` ```{r, fig.height = 6, fig.width = 10, echo=TRUE} + ### Permutation coefficient plot spectratrait::f.plot.coef(Z = t(Jackknife_coef), wv = wv, plot_label="Jackknife regression coefficients",position = 'bottomleft') abline(h=0,lty=2,col="grey50") legend("topleft",legend = "7.", cex=2, bty="n") box(lwd=2.2) +# Figure S7. The calibration model jackknife PLSR regression coefficients + dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.png')), height=2100, width=3800, res=340) dev.off(); ``` ```{r, fig.height = 7, fig.width = 8, echo=TRUE} + ### Permutation validation plot RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2)) pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100 @@ -400,6 +443,11 @@ abline(0,1,lty=2,lw=2) legend("topleft", legend=expr, bty="n", cex=1.5) legend("bottomright", legend="8.", bty="n", cex=2.2) box(lwd=2.2) +# Figure S8. Independent validation results for the LMA PLSR model with associated +# jackknife uncertainty estimate 95% prediction intervals for each estimate LMA +# value. The %RMSEP is the model prediction performance standardized to the +# percentage of the response range, in this case the range of LMA values + dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), height=2800, width=3200, res=340) dev.off(); @@ -411,7 +459,8 @@ out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1), Intercept=Jackknife_intercept,t(Jackknife_coef)) head(out.jk.coefs)[1:6] write.csv(out.jk.coefs,file=file.path(outdir, - paste0(inVar,'_Jackkife_PLSR_Coefficients.csv')), + paste0(inVar, + '_Jackkife_PLSR_Coefficients.csv')), row.names=FALSE) ``` diff --git a/vignettes/reseco_lma_plsr_example.md b/vignettes/reseco_lma_plsr_example.md index 9f30f63..e10e879 100644 --- a/vignettes/reseco_lma_plsr_example.md +++ b/vignettes/reseco_lma_plsr_example.md @@ -13,7 +13,7 @@ leaf-mass area (LMA) ### Getting Started -### Step 1. Load libraries to run example script +### Step 1. Load libraries needed to run example script ``` r list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", @@ -82,7 +82,7 @@ output_dir <- "tempdir" ### Step 3. Set working directory (scratch space) - ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/Rtmp1Hsn79" + ## [1] "/private/var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T/RtmpiBKiLO" ### Step 4. Pull example dataset from EcoSIS (ecosis.org) @@ -266,8 +266,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% method <- "dplyr" #base/dplyr # base R - a bit slow # dplyr - much faster -split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075, - prop=0.8, group_variables="Species_Code") +split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, + split_seed=7529075, prop=0.8, + group_variables="Species_Code") names(split_data) ``` @@ -349,8 +350,17 @@ histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) ![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-7-1.png) ``` r -ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms, - device="png", width = 30, height = 12, units = "cm", dpi = 300) +# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the +# calibration (i.e. model training) and validation datasets. The data was split using +# the spectratrait::create_data_split() function using "Species_Code" as the +# group_variable and using a data split proportion per group of 80% to calibration +# and 20% to validation +``` + +``` r +ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), + plot = histograms, device="png", width = 30, height = 12, units = "cm", + dpi = 300) # output cal/val data write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), row.names=FALSE) @@ -362,8 +372,10 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs ``` r ### Format PLSR data for model fitting -cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) -cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], +cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% + paste0("Wave_",wv))]) +cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% + paste0("Wave_",wv))], Spectra=I(cal_spec)) head(cal.plsr.data)[1:5] ``` @@ -377,8 +389,10 @@ head(cal.plsr.data)[1:5] ## 6 Ammophila arenaria Ammare ZC3 0.01802409 180.2409 ``` r -val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) -val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], +val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% + paste0("Wave_",wv))]) +val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% + paste0("Wave_",wv))], Spectra=I(val_spec)) head(val.plsr.data)[1:5] ``` @@ -402,9 +416,13 @@ spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv, plot_label="Validation") ``` -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-9-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) ``` r +# Figure S2. The resulting calibration and validation spectral reflectance distribution by +# wavelength. The spectra split was done at the same time as LMA, as described in +# Supplemental Figure S1. + dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), height=2500,width=4900, res=340) ``` @@ -461,15 +479,14 @@ if (method=="pls") { ## [1] "*** Optimal number of components based on t.test: 11" -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png) ``` r -print("*** Figure 3. Optimal PLSR component selection ***") -``` +# Figure S3. Selection of the optimal number of components based on the +# minimization of the PRESS statistic. In this example we show "firstMin" +# option that selects the number of components corresponding to the first +# statistical minimum PRESS value (vertical broken blue line). - ## [1] "*** Figure 3. Optimal PLSR component selection ***" - -``` r dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar, "_PLSR_Component_Selection.png"))), height=2800, width=3400, res=340) @@ -489,8 +506,8 @@ dev.off(); ``` r ### Fit final model - using leave-one-out cross validation -plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", - trace=FALSE,data=cal.plsr.data) +plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps, + validation="LOO",trace=FALSE,data=cal.plsr.data) fit <- plsr.out$fitted.values[,1,nComps] pls.options(parallel = NULL) @@ -526,9 +543,12 @@ plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R box(lwd=2.2) ``` -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png) ``` r +# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) +# and coefficient of determination (right) for the 0 to optimal number of components + dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), height=2800, width=4800, res=340) ``` @@ -551,9 +571,11 @@ par(opar) ``` r #calibration -cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], +cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% + "Spectra")], PLSR_Predicted=fit, - PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) + PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,, + nComps])) cal.plsr.output <- cal.plsr.output %>% mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) head(cal.plsr.output) @@ -578,10 +600,12 @@ head(cal.plsr.output) cal.R2 <- round(pls::R2(plsr.out)[[1]][nComps],2) cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) -val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], +val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% + "Spectra")], PLSR_Predicted=as.vector(predict(plsr.out, newdata = val.plsr.data, - ncomp=nComps, type="response")[,,1])) + ncomp=nComps, + type="response")[,,1])) val.plsr.output <- val.plsr.output %>% mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) head(val.plsr.output) @@ -609,13 +633,13 @@ val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", - linetype="dashed", size=1.5) + xlim(rng_quant[1], - rng_quant[2]) + + linetype="dashed", size=1.5) + + xlim(rng_quant[1], rng_quant[2]) + ylim(rng_quant[1], rng_quant[2]) + labs(x=paste0("Predicted ", paste(inVar), " (units)"), y=paste0("Observed ", paste(inVar), " (units)"), - title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", - cal.RMSEP))) + + title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", + paste0("RMSEP = ", cal.RMSEP))) + theme(axis.text=element_text(size=18), legend.position="none", axis.title=element_text(size=20, face="bold"), axis.text.x = element_text(angle = 0,vjust = 0.5), @@ -634,13 +658,13 @@ cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", - linetype="dashed", size=1.5) + xlim(rng_quant[1], - rng_quant[2]) + + linetype="dashed", size=1.5) + + xlim(rng_quant[1], rng_quant[2]) + ylim(rng_quant[1], rng_quant[2]) + labs(x=paste0("Predicted ", paste(inVar), " (units)"), y=paste0("Observed ", paste(inVar), " (units)"), - title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", - val.RMSEP))) + + title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", + paste0("RMSEP = ", val.RMSEP))) + theme(axis.text=element_text(size=18), legend.position="none", axis.title=element_text(size=20, face="bold"), axis.text.x = element_text(angle = 0,vjust = 0.5), @@ -667,12 +691,14 @@ scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histo ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png) ``` r -ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), - plot = scatterplots, device="png", width = 32, height = 30, units = "cm", - dpi = 300) +# Figure S5. The calibration model and independent validation scatter plot results for +# the example LMA PLSR model (top row). Also shown are the calibration model and +# validation PLSR residuals, where the calibration results are based on the internal +# model cross-validation and the validation residuals are the predicted minus observed +# values of LMA. ``` ### Step 13. Generate Coefficient and VIP plots @@ -692,9 +718,12 @@ abline(h=0.8,lty=2,col="dark grey") box(lwd=2.2) ``` -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png) ``` r +# Figure S6. The calibration model PLSR regression coefficient (top) and variable +# importance of projection (bottom) plots + dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), height=3100, width=4100, res=340) ``` @@ -724,8 +753,9 @@ jk.plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, data=cal.plsr.data) pls.options(parallel = NULL) -Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, data_plsr = cal.plsr.data, - ncomp = nComps, inVar=inVar) +Jackknife_coef <- spectratrait::f.coef.valid(plsr.out = jk.plsr.out, + data_plsr = cal.plsr.data, + ncomp = nComps, inVar=inVar) Jackknife_intercept <- Jackknife_coef[1,,,] Jackknife_coef <- Jackknife_coef[2:dim(Jackknife_coef)[1],,,] @@ -769,9 +799,11 @@ legend("topleft",legend = "7.", cex=2, bty="n") box(lwd=2.2) ``` -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png) ``` r +# Figure S7. The calibration model jackknife PLSR regression coefficients + dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.png')), height=2100, width=3800, res=340) ``` @@ -810,9 +842,14 @@ legend("bottomright", legend="8.", bty="n", cex=2.2) box(lwd=2.2) ``` -![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-16-1.png) +![](reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png) ``` r +# Figure S8. Independent validation results for the LMA PLSR model with associated +# jackknife uncertainty estimate 95% prediction intervals for each estimate LMA +# value. The %RMSEP is the model prediction performance standardized to the +# percentage of the response range, in this case the range of LMA values + dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), height=2800, width=3200, res=340) ``` @@ -845,7 +882,8 @@ head(out.jk.coefs)[1:6] ``` r write.csv(out.jk.coefs,file=file.path(outdir, - paste0(inVar,'_Jackkife_PLSR_Coefficients.csv')), + paste0(inVar, + '_Jackkife_PLSR_Coefficients.csv')), row.names=FALSE) ``` @@ -855,7 +893,7 @@ write.csv(out.jk.coefs,file=file.path(outdir, print(paste("Output directory: ", outdir)) ``` - ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//Rtmp1Hsn79" + ## [1] "Output directory: /var/folders/xp/h3k9vf3n2jx181ts786_yjrn9c2gjq/T//RtmpiBKiLO" ``` r # Observed versus predicted diff --git a/vignettes/reseco_lma_plsr_example.pdf b/vignettes/reseco_lma_plsr_example.pdf index 54fe0e3..2f2940f 100644 Binary files a/vignettes/reseco_lma_plsr_example.pdf and b/vignettes/reseco_lma_plsr_example.pdf differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png index 46b86ae..5e9c7db 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png index 0aa5976..46b86ae 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png index 43a32eb..0aa5976 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png index 9c6878f..43a32eb 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-13-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png index 2360ecc..9c6878f 100644 Binary files a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png new file mode 100644 index 0000000..2360ecc Binary files /dev/null and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-17-1.png differ diff --git a/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png new file mode 100644 index 0000000..c8e9e03 Binary files /dev/null and b/vignettes/reseco_lma_plsr_example_files/figure-gfm/unnamed-chunk-18-1.png differ