Skip to content

Commit

Permalink
Merge pull request #76 from TESTgroup-BNL/updating_manuscript_example
Browse files Browse the repository at this point in the history
Adding figure captions
  • Loading branch information
Shawn P. Serbin authored Mar 10, 2021
2 parents 1e2216a + a6fc608 commit 005a08f
Show file tree
Hide file tree
Showing 23 changed files with 1,462 additions and 80 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/721908
EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species <br>
EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9 <br>
DOI: https://doi.org/doi:10.21232/C2GM2Z <br>
Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/736689 <br>

5) Canopy spectra to map foliar functional traits over NEON domains in eastern United States <br>
Target variable: leaf nitrogen <br>
Expand Down
79 changes: 71 additions & 8 deletions inst/scripts/spectra-trait_reseco_lma_plsr_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
method <- "dplyr" #base/dplyr
# base R - a bit slow
# dplyr - much faster
split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075,
prop=0.8, group_variables="Species_Code")
split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method,
split_seed=7529075,prop=0.8,
group_variables="Species_Code")
names(split_data)
cal.plsr.data <- split_data$cal_data
head(cal.plsr.data)[1:8]
Expand All @@ -138,11 +139,13 @@ text_loc <- c(max(hist(cal.plsr.data[,paste0(inVar)])$counts),
max(hist(cal.plsr.data[,paste0(inVar)])$mids))
cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
main = paste0("Calibration Histogram for ",inVar),
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7)) +
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
alpha=I(.7)) +
annotate("text", x=text_loc[2], y=text_loc[1], label= "1.",size=10)
val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
main = paste0("Validation Histogram for ",inVar),
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7))
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
alpha=I(.7))
histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms,
device="png", width = 30, height = 12, units = "cm", dpi = 300)
Expand All @@ -152,6 +155,14 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the
# calibration (i.e. model training) and validation datasets. The data was split using the
# spectratrait::create_data_split() function using "Species_Code" as the group_variable and
# using a data split proportion per group of 80% to calibration and 20% to validation
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 8.
### Format PLSR data for model fitting
Expand Down Expand Up @@ -181,6 +192,13 @@ par(mfrow=c(1,1))
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S2. The resulting calibration and validation spectral reflectance distribution by
# wavelength. The spectra split was done at the same time as LMA, as described in
# Supplemental Figure S1.
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 10.
### Use permutation to determine the optimal number of components
Expand All @@ -206,13 +224,26 @@ if (method=="pls") {
maxComps=maxComps, iterations=iterations,
seg=seg, prop=prop, random_seed=random_seed)
}
print("*** Figure 3. Optimal PLSR component selection ***")
print("*** Figure S3. Optimal PLSR component selection ***")
dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,"_PLSR_Component_Selection.png"))),
height=2800, width=3400, res=340)
dev.off();
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S3. A key challenge in building robust and parsimonious PLSR models is determining the
# optimal number of PLSR components. A good definition is the minimum number of components that
# minimizes the PRESS statistic and where the next higher component doesn't produce a meaningful
# increase in model performance (i.e. lower PRESS). We provide three methods in the
# find_optimal_components() function to determine the optimal number of components statistically
# using the internal pls package jackknife method or our custom methods that are better in some
# conditions, including for large datasets. In this example we show "firstMin" option that
# selects the number of components corresponding to the first statistical minimum PRESS value
# (vertical broken blue line).
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 11.
### Fit final model - using leave-one-out cross validation
Expand Down Expand Up @@ -242,6 +273,12 @@ par(opar)
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) and
# coefficient of determination (right) for the 0 to optimal number of components
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 12.
### PLSR fit observed vs. predicted plot data
Expand Down Expand Up @@ -319,6 +356,14 @@ ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")),
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S5. The calibration model and independent validation scatter plot results for the example
# LMA PLSR model (top row). Also shown are the calibration model and validation PLSR
# residuals, where the calibration results are based on the internal model cross-validation
# and the validation residuals are the predicted minus observed values of LMA.
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 13.
### Generate Coefficient and VIP plots
Expand All @@ -341,6 +386,12 @@ par(opar)
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S6. The calibration model PLSR regression coefficient (top) and variable importance of
# projection (bottom) plots
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
### Step 14.
### Permutation analysis to derive uncertainty estimates
Expand Down Expand Up @@ -386,6 +437,10 @@ dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.p
height=2100, width=3800, res=340)
dev.off();

#--------------------------------------------------------------------------------------------------#
# Figure S7. The calibration model jackknife PLSR regression coefficients
#--------------------------------------------------------------------------------------------------#

# JK validation plot
RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
Expand Down Expand Up @@ -413,9 +468,17 @@ dev.off();
#--------------------------------------------------------------------------------------------------#


#--------------------------------------------------------------------------------------------------#
# Figure S8. Independent validation results for the LMA PLSR model with associated jackknife
# uncertainty estimate 95% prediction intervals for each estimate LMA value. The %RMSEP is the
# model prediction performance standardized to the percentage of the response range, in this case
# the range of LMA values
#--------------------------------------------------------------------------------------------------#


#---------------- Output jackknife results --------------------------------------------------------#
### Step 15.
# JK Coefficents
### Step 15. Outputs the final PLSR model jackknife coefficients
# JK Coefficients
out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1),
Intercept=Jackknife_intercept,t(Jackknife_coef))
head(out.jk.coefs)[1:6]
Expand All @@ -425,7 +488,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Jackkife_PLSR_Coeffi


#---------------- Export Model Output -------------------------------------------------------------#
### Step 16.
### Step 16. Create and write all relevant PLSR model output to disk in .csv format
print(paste("Output directory: ", getwd()))

# Observed versus predicted
Expand Down
Loading

0 comments on commit 005a08f

Please sign in to comment.