Skip to content

Commit

Permalink
Merge pull request #74 from TESTgroup-BNL/cleaning_package_load
Browse files Browse the repository at this point in the history
initial cleaning up
  • Loading branch information
Shawn P. Serbin authored Mar 9, 2021
2 parents a07a419 + e3d6359 commit 1e2216a
Show file tree
Hide file tree
Showing 37 changed files with 551 additions and 2,739 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: spectratrait
Title: A simple add-on package to aid in the fitting of leaf-level spectra-trait PLSR models
Version: 0.9.9
Version: 1.0.1
Authors@R:
c(person(given = "Julien",
family = "Lamour",
Expand Down
8 changes: 4 additions & 4 deletions R/find_optimal_components.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite
nComps <- selectNcomp(plsr.out, method = "onesigma", plot = TRUE)
}
if(method=="firstPlateau") {
press.out <- pls_permutation(dataset=dataset, maxComps=maxComps, iterations=iterations,
seg=seg, prop=prop)
press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps,
iterations=iterations, prop=prop)
# PRESS plot
pressDF <- as.data.frame(press.out$PRESS)
names(pressDF) <- as.character(seq(maxComps))
Expand All @@ -50,8 +50,8 @@ find_optimal_components <- function(dataset=NULL, method="pls", maxComps=20, ite
print(bp)
}
if(method=="firstMin") {
press.out <- pls_permutation(dataset=dataset, maxComps=maxComps, iterations=iterations,
seg=seg, prop=prop)
press.out <- spectratrait::pls_permutation(dataset=dataset, maxComps=maxComps,
iterations=iterations, prop=prop)
# PRESS plot
pressDF <- as.data.frame(press.out$PRESS)
names(pressDF) <- as.character(seq(maxComps))
Expand Down
34 changes: 28 additions & 6 deletions R/pls_permutation.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,53 @@
##' @param dataset input full PLSR dataset. Usually just the calibration dataset
##' @param maxComps maximum number of components to use for each PLSR fit
##' @param iterations how many different permutations to run
##' @param seg currently unused - should be removed from this function call
##' @param prop proportion of data to preserve for each permutation
##' @param verbose Should the function report the current iteration status/progress to the terminal
##' or run silently? TRUE/FALSE. Default FALSE
##'
##' @author Julien Lamour, Shawn P. Serbin
##' @export
pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, seg=100, prop=0.70) {
pls_permutation <- function(dataset=NULL, maxComps=20, iterations=20, prop=0.70,
verbose=FALSE) {
coefs <- array(0,dim=c((ncol(dataset$Spectra)+1),iterations,maxComps))
press.out <- array(data=NA, dim=c(iterations,maxComps))
print("*** Running permutation test. Please hang tight, this can take awhile ***")
print(paste("Options:", maxComps, iterations, seg, prop, sep=" "))
print("Options:")
print(paste("Max Components:",maxComps, "Iterations:", iterations,
"Data Proportion (percent):", prop*100, sep=" "))

if (verbose) {
j <- 1 # <--- Numeric counter for progress bar
pb <- utils::txtProgressBar(min = 0, max = iterations,
char="*",width=70,style = 3)
}

for (i in seq_along(1:iterations)) {
message(paste("Running interation", i))
rows <- sample(1:nrow(dataset),floor(prop*nrow(dataset)))
sub.data <- dataset[rows,]
val.sub.data <- dataset[-rows,]
plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, ncomp=maxComps,
validation="none", data=sub.data)
plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE,
ncomp=maxComps, validation="none", data=sub.data)
pred_val <- predict(plsr.out,newdata=val.sub.data)
sq_resid <- (pred_val[,,]-val.sub.data[,inVar])^2
press <- apply(X = sq_resid, MARGIN = 2, FUN = sum)
press.out[i,] <- press
coefs[,i,] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps)
rm(rows,sub.data,val.sub.data,plsr.out,pred_val,sq_resid,press)

### Display progress to console
if (verbose) {
setTxtProgressBar(pb, j) # show progress bar
j <- j+1 # <--- increase counter by 1
flush.console() #<--- show output in real-time
}
}
if (verbose) {
close(pb)
}

# create a new list with PRESS and permuted coefficients x wavelength x component number
print("*** Providing PRESS and coefficient array output ***")
output <- list(PRESS=press.out, coef_array=coefs)
return(output)
}
76 changes: 41 additions & 35 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,59 +1,65 @@
# PLSR modeling for the estimation of plant functional traits
This repository contains example scripts illustrating "best-practices" for fitting, evaluating, and reporting leaf-level spectra-trait PLSR models. These scripts encompass several possibilities that you may encounter doing PLSR. Start by reading *Burnett et al. 2020*, then work through the scripts or vignettes.
This repository contains example scripts illustrating best-practices for fitting, evaluating, and reporting leaf level spectra-trait PLSR models. These scripts encompass several possibilities that you may encounter doing PLSR. Start by reading *Burnett et al. in review*, then work through the scripts or vignettes.

### Article citation:
Burnett AC, Anderson J, Davidson KD, Ely KS, Lamour J, Li Q, Morrison BD, Yang D, Rogers A, Serbin SP (2020) A best-practice guide to predicting plant traits from leaf-level hyperspectral data using partial least squares regression. Journal of Experimental Botany. In Review.
Burnett AC, Anderson J, Davidson KD, Ely KS, Lamour J, Li Q, Morrison BD, Yang D, Rogers A, Serbin SP (in review) A best-practice guide to predicting plant traits from leaf-level hyperspectral data using partial least squares regression. Journal of Experimental Botany. In Review.

### Source code citation:
[![DOI](https://zenodo.org/badge/222699149.svg)](https://zenodo.org/badge/latestdoi/222699149)

### EcoSML
https://ecosml.org/package/github/TESTgroup-BNL/PLSR_for_plant_trait_prediction

### Package authors:
Julien Lamour, Jeremiah Anderson, Ken Davidson, Shawn P. Serbin

### Getting started, tips and tricks:
* If you are new to R you should start by reading https://support.rstudio.com/hc/en-us/articles/201141096-Getting-Started-with-R && https://www.dataquest.io/blog/tutorial-getting-started-with-r-and-rstudio/
* If you are new to R you should start by reading https://support.rstudio.com/hc/en-us/articles/201141096-Getting-Started-with-R & https://www.dataquest.io/blog/tutorial-getting-started-with-r-and-rstudio/
* Software requirements: R software (version 4.0 or above) and preferred operating environment (e.g. RStudio).
* Load script into operating environment: Using the green "Code" button at the top of this page, Download ZIP. Extract the contents of the ZIP in your preferred location. Use R Studio to open your selected example script from the inst/scripts folder.
* Initial run to install packages: Step 1 in the example code will check for tools and packages and install what is required. An internet connection is required to run this step. The user will be required to respond to several prompts. (Suggested responses for standard set up; 1, Yes).
* Install package dependencies and the spectratrait package: See the Depends and INSTALL sections below
* To work with the repository locally, clone the repository to your local machine (https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). Once you have the repository on your local machine you can run the scripts in inst/scripts or vignettes folders. You can also start editing the code yourself or contributing to the development of the package through new pull requests (https://guides.github.com/activities/hello-world/)
* Or if you don't want to obtain the code via cloning the repository, you can click the green "Code" button at the top of this page and select "Download ZIP". Extract the contents of the ZIP in your preferred location. Use RStudio to open your selected example script from the inst/scripts folder and then run or source the code.

### Depends:
ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2), httr (>= 1.4.2), pls (>= 2.7-2),
dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1)
ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2),
httr (>= 1.4.2), pls (>= 2.7-2), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1),
plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1)

### INSTALL
spectratrait is not currently on CRAN, but you can install from GitHub using devtools():
spectratrait is not currently on CRAN, but you can install from GitHub using devtools(). First, make sure you have all of the package dependencies installed. You can do this either by 1) installing the packages individually using install.packages(), for example:

``` r
# install.packages("devtools")
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", dependencies=TRUE)
install.packages("pls")
install.packages("ggplot2")
...
```

# or a specific branch, e.g. a branch named devbranch
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", ref = "devbranch",
and so forth until all of the dependencies (listed above in the "Depends" section) are installed. **Note** - you should pay careful attention at this stage to any R messages in your terminal alerting you that you need to update existing or install new R packages. These messages usually show up after you attempt to run install.packages() and require you
to respond in your terminal to a y/n or multiple choice question before the install can continue.

Or 2) you can also run or source the "install_dependencies.R" script located in inst/scripts which should also install all of the required dependencies. **Note** - again you will need to watch for any R prompts to update packages in order for the install to proceed correctly.

Finally, to complete the installation you will also need to install the spectratrait package itself. You can do this by copying and pasting the command below into your R or RStudio (preferred) terminal.

``` r
# to install the master branch version
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction",
dependencies=TRUE)

# to install a specific release, for example release 1.0.0
devtools::install_github(repo = "TESTgroup-BNL/[email protected]",
dependencies=TRUE)

# or a specific branch, e.g. a branch named devbranch
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction",
ref = "devbranch", dependencies=TRUE)
```

## Contains:
1. Example R script files that illustrate the "best-practices" of PLSR model fitting for the estimation of leaf functional traits with reflectance spectroscopy
* _spectra-trait_kit_sla_plsr_example.R_ Small dataset looking at SLA with some data cleaning (removal of NA's and suspect high values)
* _spectra-trait_neon_lma_plsr_example.R_ Large dataset looking at LMA with multiple grouping variables, very slow (>6000 observations)
* _spectra-trait_reseco_leafN_plsr_example.R_ Small dataset looking at leaf nitrogen content
* _spectra-trait_reseco_lma_plsr_example.R_ Small dataset looking at LMA
* _simple_spectra-trait_plsr_example.R_ Basic PLSR example using a large dataset
* _pull_data_from_ecosis_ Quick example of how to pull data from EcoSIS and plot it

2. Non-CRAN or external library R functions used in the example PLSR model fitting scripts provided in the "functions.R" file
* _get_ecosis_data()_ Function to pull data from the EcoSIS database (ecosis.org) using their application programmer interface (API)
* _create_data_split()_ Randomly splits data into calibration and validation datasets based on grouping variables. 'base' option is slow but verbose. 'dplyr' is fast and quiet.
* _f.plot.spec()_ Function to generate spectral plot with mean, min/max and 95% confidence intervals
* _find_optimal_components()_ Finds optimum number of components for PLSR. 'pls' chooses the model with fewest components that is still less than one standard error away from the overall best model. 'first plateau' chooses the first component that gives statistically (t-test) the same result as the following component. 'firstMin' finds the first component that gives statistically (t-test) the same result as the overall best model.
* _pls_permutation()_ Generates PLSR model permutation analysis ensembles for opimal component selection and uncertainty analysis. Currently called by _find_optimal_components()_
* _f.plot.coef()_ Plots PLSR model coefficients with uncertainty envelope
* _f.coef.valid()_ Returns the intercept and the coefficients of the jackknife permutation analysis.

3. Example Rmarkdown vignettes illustrating the various PLSR model fitting examples
1. Core package functions are located in the in the main "R" folder
2. inst/scripts contains example PLSR workflows for fitting example leaf and canopy spectra-trait PLSR models for different leaf traits, including LMA and foliar nitrogen
3. Example datasets that can be loaded in your R environment using the base load() function can be found in the data/ folder
4. man - the manual pages that are accessible in R
5. tests - package tests to check that functions are still operational and produce the expected results
6. vignettes - example Rmarkdown and github markdown vignettes illustrating the various PLSR model fitting examples. These can be used to learn how to use the PLSR workflow and associated functions for new applications
7. spectratrait_X.X.X.pdf (where X.X.X is the current release number) is the pdf documentation

### Linked dataset citations, DOIs, and EcoSIS IDs/URLs: <br>
1) Leaf reflectance plant functional gradient IFGG/KIT <br>
Expand All @@ -62,7 +68,7 @@ EcoSIS URL: https://ecosis.org/package/leaf-reflectance-plant-functional-gradien
EcoSIS ID: 3cf6b27e-d80e-4bc7-b214-c95506e46daa <br>
Rpubs example output: https://rpubs.com/sserbin/722040

2) Fresh Leaf Spectra to Estimate LMA over NEON domains in eastern United States <br>
2) Fresh leaf spectra to estimate LMA over NEON domains in eastern United States <br>
Target variable: LMA <br>
EcoSIS URL: https://ecosis.org/package/fresh-leaf-spectra-to-estimate-lma-over-neon-domains-in-eastern-united-states <br>
EcoSIS ID: 5617da17-c925-49fb-b395-45a51291bd2d <br>
Expand All @@ -83,7 +89,7 @@ EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-
EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9 <br>
DOI: https://doi.org/doi:10.21232/C2GM2Z <br>

5) Canopy Spectra to Map Foliar Functional Traits over NEON domains in eastern United States <br>
5) Canopy spectra to map foliar functional traits over NEON domains in eastern United States <br>
Target variable: leaf nitrogen <br>
EcoSIS URL: https://ecosis.org/package/canopy-spectra-to-map-foliar-functional-traits-over-neon-domains-in-eastern-united-states <br>
EcoSIS ID: b9dbf3db-5b9c-4ab2-88c2-26c8b39d0903 <br>
Expand Down
10 changes: 2 additions & 8 deletions inst/scripts/apply_sserbin2019_lma_plsr_to_ely_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,8 @@

#--------------------------------------------------------------------------------------------------#
### Load libraries
# make sure required tools are available
req.packages <- c("devtools")
new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies=c("Depends", "Imports",
"LinkingTo"))
# install spectratrait package
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", dependencies=TRUE)
list.of.packages <- c("httr","dplyr","here","plotrix","spectratrait")
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
"spectratrait")
invisible(lapply(list.of.packages, library, character.only = TRUE))
#--------------------------------------------------------------------------------------------------#

Expand Down
10 changes: 2 additions & 8 deletions inst/scripts/apply_sserbin2019_lma_plsr_to_neon_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,8 @@

#--------------------------------------------------------------------------------------------------#
### Load libraries
# make sure required tools are available
req.packages <- c("devtools")
new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies=c("Depends", "Imports",
"LinkingTo"))
# install spectratrait package
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", dependencies=TRUE)
list.of.packages <- c("httr","dplyr","here","plotrix","spectratrait")
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
"spectratrait")
invisible(lapply(list.of.packages, library, character.only = TRUE))
#--------------------------------------------------------------------------------------------------#

Expand Down
12 changes: 12 additions & 0 deletions inst/scripts/install_dependencies.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
####################################################################################################
# Install dependencies
#
####################################################################################################


#--------------------------------------------------------------------------------------------------#
req.packages <- c("devtools","remotes","readr","RCurl","httr","pls","dplyr","reshape2","here",
"plotrix","scales","ggplot2","gridExtra")
new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies=TRUE)
#--------------------------------------------------------------------------------------------------#
10 changes: 2 additions & 8 deletions inst/scripts/pull_data_from_ecosis.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,8 @@

#--------------------------------------------------------------------------------------------------#
### Load libraries
# make sure required tools are available
req.packages <- c("devtools")
new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies=c("Depends", "Imports",
"LinkingTo"))
# install spectratrait package
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", dependencies=TRUE)
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra","spectratrait")
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
"spectratrait")
invisible(lapply(list.of.packages, library, character.only = TRUE))
#--------------------------------------------------------------------------------------------------#

Expand Down
10 changes: 2 additions & 8 deletions inst/scripts/simple_spectra-trait_plsr_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@

#--------------------------------------------------------------------------------------------------#
### Load libraries
# make sure required tools are available
req.packages <- c("devtools")
new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies=c("Depends", "Imports",
"LinkingTo"))
# install spectratrait package
devtools::install_github(repo = "TESTgroup-BNL/PLSR_for_plant_trait_prediction", dependencies=TRUE)
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra","spectratrait")
list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
"spectratrait")
invisible(lapply(list.of.packages, library, character.only = TRUE))
#--------------------------------------------------------------------------------------------------#

Expand Down
Loading

0 comments on commit 1e2216a

Please sign in to comment.