diff --git a/NEWS.md b/NEWS.md index e465f789..c5185236 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,8 @@ # flexsdm 1.3.5 +- it is possible to restrict the cell used to perform collinearity reduction analysis to a geographical area smaller than the full extent of environmental variables in [`correct_clinvar()`](https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html) - esm_ family function was improved and debugged -- occfilt_geo has a new argument "rep" to control number o repetition to filter occurrences +- `occfilt_geo` has a new argument "rep" to control number o repetition to filter occurrences # flexsdm 1.3.4 diff --git a/R/correct_colinvar.R b/R/correct_colinvar.R index 8f371858..f4da346a 100644 --- a/R/correct_colinvar.R +++ b/R/correct_colinvar.R @@ -16,6 +16,8 @@ #' Usage method = c('pca'). #' \item fa: Perform a Factorial Analysis and select, from the original predictors, the number of factors is defined by Broken-Stick and variables with the highest correlation to the factors are selected. Usage method = c('fa'). #' } +#' @param restric_to_region SpatVector. Area used to restrict cells of env_layer at moment to perform collinearity reduction. Default: NULL. +#' @param restric_pca_proj logical. Area used to restrict geographically PCA projection within SpatVector used in restric_to_region. Only use for PCA analysis. Default: FALSE. #' @param proj character. Only used for pca method. Path to a folder that contains sub-folders for the different projection #' scenarios. Variables names must have the same names as in the raster used in env_layer argument. Usage proj = "C:/User/Desktop/Projections" (see in Details more about the use of this argument) #' @param maxcell numeric. Number of raster cells to be randomly sampled. Taking a sample could be @@ -157,11 +159,56 @@ #' var$removed_variables #' var$uniqueness #' var$loadings +#' +#' ##%######################################################%## +#' # # +#' #### Other option to perform PCA #### +#' #### considering cell restricted to a region #### +#' # # +#' ##%######################################################%## +#' +#' # Define a calibration area +#' abies2 <- abies %>% +#' dplyr::select(x, y, pr_ab) %>% +#' dplyr::filter(pr_ab==1) +#' +#' plot(somevar[[1]]) +#' points(abies2[-3]) +#' ca <- calib_area(abies2, x = "x", y = "y", method = c("mcp"), crs=crs(somevar)) +#' plot(ca, add=T) +#' +#' # Full geographical range to perform PCA +#' pca_fr <- correct_colinvar(env_layer = somevar , +#' method = c("pca"), +#' maxcell = NULL, +#' restric_to_region = NULL, +#' restric_pca_proj = FALSE) +#' +#' # Perform PCA only with cell delimited by polygon used in restric_to_region +#' pca_rr <- correct_colinvar(env_layer = somevar , +#' method = c("pca"), +#' maxcell = NULL, +#' restric_to_region = ca, +#' restric_pca_proj = FALSE) +#' +#' # Perform and predicted PCA only with cell delimited by polygon used in restric_to_region +#' pca_rrp <- correct_colinvar(env_layer = somevar , +#' method = c("pca"), +#' maxcell = NULL, +#' restric_to_region = ca, +#' restric_pca_proj = TRUE) +#' +#' plot(pca_fr$env_layer) # PCA with all cells +#' plot(pca_rr$env_layer) # PCA with calibration area cell but predicted for entire region +#' plot(pca_rrp$env_layer) # PCA performed and predicted for cells within calibration area (ca) +#' #' } #' correct_colinvar <- function(env_layer, method, proj = NULL, + restric_to_region = NULL, + restric_pca_proj = FALSE, maxcell = NULL) { . <- NULL if (!any(c("pearson", "vif", "pca", "fa") %in% method)) { @@ -173,8 +220,18 @@ correct_colinvar <- function(env_layer, if (class(env_layer)[1] != "SpatRaster") { env_layer <- terra::rast(env_layer) } + if (!is.null(restric_to_region)) { + if(any(method %in% c("pca", "fa"))){ + env_layer_constr <- env_layer %>% + terra::crop(., restric_to_region) %>% + terra::mask(., restric_to_region) + } else { + env_layer <- env_layer_constr + } + } if (any(method %in% "pearson")) { + if (is.na(method["th"])) { th <- 0.7 } else { @@ -208,6 +265,7 @@ correct_colinvar <- function(env_layer, } if (any(method %in% "vif")) { + if (is.null(method["th"])) { th <- 10 } else { @@ -274,6 +332,14 @@ correct_colinvar <- function(env_layer, if (any(method %in% "pca")) { + # Restrict cells if required + if (!is.null(restric_to_region)) { + env_layer_original <- env_layer + env_layer <- env_layer_constr + } else { + env_layer_original <- env_layer + } + # mean means <- t(terra::global(env_layer, 'mean', na.rm=T)) %>% c() names(means) <- names(env_layer) @@ -314,7 +380,20 @@ correct_colinvar <- function(env_layer, # p <- terra::as.data.frame(env_layer, xy = FALSE, na.rm = TRUE) p <- stats::prcomp(p0, retx = TRUE, scale. = FALSE, center = FALSE, rank. = naxis) - env_layer <- terra::predict(env_layer, p) + + # env_layer <- terra::predict(env_layer, p) + if(restric_pca_proj & is.null(restric_to_region)){ + message("No data was provided to 'restric_to_region' argument, so no geographical restriction will be applied to PCA projections") + restric_pca_proj = FALSE + } + if(restric_pca_proj){ + env_layer <- terra::predict(env_layer, p) + } else { + env_layer_original <- terra::scale(env_layer_original, + center = means, + scale = stds) + env_layer <- terra::predict(env_layer_original, p) + } rm(p0) @@ -337,6 +416,13 @@ correct_colinvar <- function(env_layer, for (i in 1:length(proj)) { scen <- terra::rast(list.files(proj[i], full.names = TRUE)) scen <- scen[[names(means)]] + + if(restric_pca_proj){ + scen <- scen %>% + terra::crop(., restric_to_region) %>% + terra::mask(., restric_to_region) + } + scen <- terra::scale(scen, center = means, scale = stds) scen <- terra::predict(scen, p) terra::writeRaster( @@ -351,6 +437,7 @@ correct_colinvar <- function(env_layer, } if (any(method %in% "fa")) { + p <- terra::scale(env_layer, center = TRUE, scale = TRUE) if(is.null(maxcell)){ diff --git a/docs/articles/v01_pre_modeling.html b/docs/articles/v01_pre_modeling.html index 642c12f7..7bc71ac9 100644 --- a/docs/articles/v01_pre_modeling.html +++ b/docs/articles/v01_pre_modeling.html @@ -161,7 +161,7 @@

Installation#> #> intersect, setdiff, setequal, union library(terra) -#> terra 1.7.55 +#> terra 1.7.71 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': @@ -233,11 +233,11 @@

Geographic region
 regions <- system.file("external/regions.tif", package = "flexsdm")
-regions <- terra::rast(regions)
+regions <- terra::rast(regions)

How are the points distributed across our study area?

-try(plot(regions), silent=TRUE)
-points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species))
+try(plot(regions), silent=TRUE) +points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species))

@@ -271,7 +271,7 @@

1. Buffer
-crs(regions, proj=TRUE)
+crs(regions, proj=TRUE)
 #> [1] "+proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs"
 
 ca_1 <- calib_area(
@@ -279,11 +279,11 @@ 

1. Buffer x = "x", y = "y", method = c("buffer", width = 40000), - crs = crs(regions) + crs = crs(regions) ) -plot(regions, main = "Buffer method") -plot(ca_1, add = TRUE) -points(spp1[, 2:3], pch = 19, cex = 0.5)

+plot(regions, main = "Buffer method") +plot(ca_1, add = TRUE) +points(spp1[, 2:3], pch = 19, cex = 0.5)

@@ -297,12 +297,12 @@

2. Minimum convex polygon x = "x", y = "y", method = c("mcp"), - crs = crs(regions) + crs = crs(regions) ) -plot(regions, main = "Minimum convex polygon method") -plot(ca_2, add = TRUE) -points(spp1[, 2:3], pch = 19, cex = 0.5)

+plot(regions, main = "Minimum convex polygon method") +plot(ca_2, add = TRUE) +points(spp1[, 2:3], pch = 19, cex = 0.5)

@@ -315,12 +315,12 @@

3. Buffered minimum convex polygon x = "x", y = "y", method = c("bmcp", width = 40000), - crs = crs(regions) + crs = crs(regions) ) -plot(regions, main = "Buffered minimum convex polygon") -plot(ca_3, add = TRUE) -points(spp1[, 2:3], pch = 19, cex = 0.5)

+plot(regions, main = "Buffered minimum convex polygon") +plot(ca_3, add = TRUE) +points(spp1[, 2:3], pch = 19, cex = 0.5)

@@ -336,20 +336,20 @@

4. Mask area) are on the right.

 clusters <- system.file("external/clusters.shp", package = "flexsdm")
-clusters <- terra::vect(clusters)
+clusters <- terra::vect(clusters)
 
 ca_4 <- calib_area(
   data = spp1,
   x = "x",
   y = "y",
   method = c("mask", clusters, "clusters"), 
-  crs = crs(regions)
+  crs = crs(regions)
 )
 
 par(mfrow = c(1, 2))
-plot(clusters, main = "Original polygons")
-plot(ca_4, main = "Polygons with points (mask)")
-points(spp1[, 2:3], pch = 19, cex = 0.5)
+plot(clusters, main = "Original polygons") +plot(ca_4, main = "Polygons with points (mask)") +points(spp1[, 2:3], pch = 19, cex = 0.5)

@@ -369,18 +369,18 @@

Environmental predictors
 somevar <- system.file("external/somevar.tif", package = "flexsdm")
 
-somevar <- terra::rast(somevar)
+somevar <- terra::rast(somevar)
 
-names(somevar) <- c("aet", "cwd", "tmx", "tmn")
+names(somevar) <- c("aet", "cwd", "tmx", "tmn")
 
-plot(somevar)
+plot(somevar)

The relationship between different environmental variables can be visualized with the pairs() function from the terra package. Several of our variables are highly correlated (.89 for predictors tmx and tmn).

-terra::pairs(somevar)
+terra::pairs(somevar)

So how can we correct for or reduce this collinearity? The function correct_colinvar() has four methods to deal with collinearity: pearson, @@ -535,6 +535,15 @@

4. Factorial analysisfa_var$uniqueness fa_var$loadings +
+

5. Comments +

+

In flexsdm it is also possible to restrict the cell used to +perform collinearity reduction analysis to a geographical area smaller +than the full extent of environmental variables. See the +‘restric_to_region’ and ‘restric_pca_proj’ in correct_colinvar +and the examples with alternative PCA given in this function help.

+

Data filtering @@ -564,7 +573,7 @@

Environmental filtering
 
-spp1$idd <- 1:nrow(spp1)
+spp1$idd <- 1:nrow(spp1)
 
 filt_env5 <- occfilt_env(
   data = spp1,
@@ -607,14 +616,14 @@ 

Environmental filtering par(mfrow = c(2, 2)) -somevar[[1]] %>% plot(main = "Original occurrence data") -points(spp1 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with 5 bins") -points(filt_env5 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with 8 bins") -points(filt_env8 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with 12 bins") -points(filt_env12 %>% select(x, y))

+somevar[[1]] %>% plot(main = "Original occurrence data") +points(spp1 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with 5 bins") +points(filt_env5 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with 8 bins") +points(filt_env8 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with 12 bins") +points(filt_env12 %>% select(x, y))

@@ -635,7 +644,7 @@

Geographical filtering y = "y", env_layer = somevar, method = c("moran"), - prj = crs(somevar) + prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables @@ -650,7 +659,7 @@

Geographical filtering y = "y", env_layer = somevar, method = c("cellsize", factor = "3"), # coarser resolution than the provided raster - prj = crs(somevar) + prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables @@ -664,23 +673,23 @@

Geographical filtering y = "y", env_layer = somevar, method = c("defined", d = "30"), - prj = crs(somevar) + prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Distance threshold(km): 30 -#> Number of filtered records: 78 +#> Number of filtered records: 0 par(mfrow = c(2, 2)) -somevar[[1]] %>% plot(main = "Original occurrence data") -points(spp1 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with Moran's I") -points(filt_geo1 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with cell size") -points(filt_geo2 %>% select(x, y)) -somevar[[1]] %>% plot(main = "Filtering with defined distance (30km)") -points(filt_geo3 %>% select(x, y))

+somevar[[1]] %>% plot(main = "Original occurrence data") +points(spp1 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with Moran's I") +points(filt_geo1 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with cell size") +points(filt_geo2 %>% select(x, y)) +somevar[[1]] %>% plot(main = "Filtering with defined distance (30km)") +points(filt_geo3 %>% select(x, y))

@@ -750,8 +759,8 @@

2. Spatial band cross-validati #> Creating basic raster mask... #> Searching for the optimal number of bands...
-plot(sp_part2$grid, col = gray.colors(20))
-points(sp_part2$part[c("x", "y")],
+plot(sp_part2$grid, col = gray.colors(20))
+points(sp_part2$part[c("x", "y")],
   col = rainbow(8)[sp_part2$part$.part],
   cex = 0.9,
   pch = c(1, 19)[sp_part2$part$pr_ab + 1]
@@ -786,8 +795,8 @@ 

3. Spatial block cross-valid #> Creating basic raster mask... #> Searching for the optimal grid size... -plot(sp_part3$grid) -points(sp_part3$part[c("x", "y")], +plot(sp_part3$grid) +points(sp_part3$part[c("x", "y")], col = c("blue", "red")[sp_part3$part$.part], cex = 0.5, pch = 19 @@ -801,16 +810,16 @@

3. Spatial block cross-valid really useful for generating pseudo-absence or background sample points, which we will explore in the next section.

-terra::res(sp_part3$grid)
+terra::res(sp_part3$grid)
 #> [1] 881131 881131
-terra::res(somevar)
+terra::res(somevar)
 #> [1] 1890 1890
 
 grid_env <- get_block(env_layer = somevar, best_grid = sp_part3$grid)
 
-plot(grid_env) # this is a block layer with the same layer
+plot(grid_env) # this is a block layer with the same layer
 # properties as environmental variables.
-points(sp_part3$part[c("x", "y")],
+points(sp_part3$part[c("x", "y")],
   col = c("blue", "red")[sp_part3$part$.part],
   cex = 0.5,
   pch = 19
@@ -844,9 +853,9 @@ 

4. Environmental a #> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 #> Searching best partition... -plot(regions, col = gray.colors(9)) -points(sp_part4$part[c("x", "y")], - col = hcl.colors(length(unique(sp_part4$part)))[sp_part4$part$.part], +plot(regions, col = gray.colors(9)) +points(sp_part4$part[c("x", "y")], + col = hcl.colors(length(unique(sp_part4$part)))[sp_part4$part$.part], cex = 1, pch = 19 )

@@ -898,13 +907,13 @@

1. Sample backgroundpar(mfrow = c(2, 1)) -plot(grid_env, main = "Presence points") -plot(ca_1, add = TRUE) -points(p_data, cex = .7, pch = 19) +plot(grid_env, main = "Presence points") +plot(ca_1, add = TRUE) +points(p_data, cex = .7, pch = 19) -plot(grid_env, main = "Background points") -plot(ca_1, add = TRUE) -points(bg, cex = .1, pch = 19)

+plot(grid_env, main = "Background points") +plot(ca_1, add = TRUE) +points(bg, cex = .1, pch = 19)

@@ -946,13 +955,13 @@

2. Sample pseudo-absences par(mfrow = c(2, 1)) -plot(grid_env, main = "Presence points") -plot(ca_1, add = TRUE) -points(p_data, cex = .7, pch = 19) +plot(grid_env, main = "Presence points") +plot(ca_1, add = TRUE) +points(p_data, cex = .7, pch = 19) -plot(grid_env, main = "Pseudo-absence points") -plot(ca_1, add = TRUE) -points(psa, cex = .7, pch = 19)

+plot(grid_env, main = "Pseudo-absence points") +plot(ca_1, add = TRUE) +points(psa, cex = .7, pch = 19)

diff --git a/docs/articles/v01_pre_modeling_files/figure-html/geo occurrence filtering-1.png b/docs/articles/v01_pre_modeling_files/figure-html/geo occurrence filtering-1.png index 9bfc8346..1915a634 100644 Binary files a/docs/articles/v01_pre_modeling_files/figure-html/geo occurrence filtering-1.png and b/docs/articles/v01_pre_modeling_files/figure-html/geo occurrence filtering-1.png differ diff --git a/docs/news/index.html b/docs/news/index.html index f551346f..718fc53b 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -70,8 +70,11 @@

flexsdm 1.3.5

-

flexsdm 1.3.4

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 359e0b3e..856b2d5e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -8,7 +8,7 @@ articles: v04_Red_fir_example: v04_Red_fir_example.html v05_Rare_species_example: v05_Rare_species_example.html v06_Extrapolation_example: v06_Extrapolation_example.html -last_built: 2024-03-13T13:14Z +last_built: 2024-04-26T16:17Z urls: reference: https://sjevelazco.github.io/flexsdm/reference article: https://sjevelazco.github.io/flexsdm/articles diff --git a/docs/reference/correct_colinvar.html b/docs/reference/correct_colinvar.html index ff9ce18d..32b222a0 100644 --- a/docs/reference/correct_colinvar.html +++ b/docs/reference/correct_colinvar.html @@ -75,7 +75,14 @@

Usage

-
correct_colinvar(env_layer, method, proj = NULL, maxcell = NULL)
+
correct_colinvar(
+  env_layer,
+  method,
+  proj = NULL,
+  restric_to_region = NULL,
+  restric_pca_proj = FALSE,
+  maxcell = NULL
+)
@@ -104,6 +111,14 @@

Arguments +
restric_to_region
+

SpatVector. Area used to restrict cells of env_layer at moment to perform collinearity reduction.

+ + +
restric_pca_proj
+

logical. Area used to restrict geographically PCA projection within SpatVector used in restric_to_region. Only use for PCA analysis. default: FALSE.

+ +
maxcell

numeric. Number of raster cells to be randomly sampled. Taking a sample could be useful to reduce memory usage for large rasters. If NULL, the function will use all @@ -170,7 +185,7 @@

Examplesrequire(dplyr) somevar <- system.file("external/somevar.tif", package = "flexsdm") -somevar <- terra::rast(somevar) +somevar <- terra::rast(somevar) # Perform pearson collinearity control var <- correct_colinvar(env_layer = somevar, method = c("pearson", th = "0.7")) @@ -190,7 +205,7 @@

Examples # Perform pca collinearity control var <- correct_colinvar(env_layer = somevar, method = c("pca")) -plot(var$env_layer) +plot(var$env_layer) var$env_layer var$coefficients var$cumulative_variance @@ -206,10 +221,10 @@

Examples somevar <- system.file("external/somevar.tif", package = "flexsdm") -somevar <- terra::rast(somevar) +somevar <- terra::rast(somevar) -terra::writeRaster(somevar, file.path(dir_sc[1], "somevar.tif"), overwrite=TRUE) -terra::writeRaster(somevar, file.path(dir_sc[2], "somevar.tif"), overwrite=TRUE) +terra::writeRaster(somevar, file.path(dir_sc[1], "somevar.tif"), overwrite=TRUE) +terra::writeRaster(somevar, file.path(dir_sc[2], "somevar.tif"), overwrite=TRUE) ## Perform pca with projections dir_w_proj <- dirname(dir_sc[1]) @@ -228,6 +243,49 @@

Examplesvar$removed_variables var$uniqueness var$loadings + +##%######################################################%## +# # +#### Other option to perform PCA #### +#### considering cell restricted to a region #### +# # +##%######################################################%## + +# Define a calibration area +abies2 <- abies %>% + dplyr::select(x, y, pr_ab) %>% + dplyr::filter(pr_ab==1) + +plot(somevar[[1]]) +points(abies2[-3]) +ca <- calib_area(abies2, x = "x", y = "y", method = c("mcp"), crs=crs(somevar)) +plot(ca, add=T) + +# Full geographical range to perform PCA +pca_fr <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = NULL, + restric_pca_proj = FALSE) + +# Perform PCA only with cell delimited by polygon used in restric_to_region +pca_rr <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = FALSE) + +# Perform and predicted PCA only with cell delimited by polygon used in restric_to_region +pca_rrp <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = TRUE) + +plot(pca_fr$env_layer) # PCA with all cells +plot(pca_rr$env_layer) # PCA with calibration area cell but predicted for entire region +plot(pca_rrp$env_layer) # PCA performed and predicted for cells within calibration area (ca) + }

diff --git a/docs/search.json b/docs/search.json index a1a12bb7..fa324ca3 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1 +1 @@ -[{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Pre-modeling functions","text":"Species distribution modeling (SDM) become standard tool many research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM active area theoretical methodological research. flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situation (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn first set functions flexsdm package fall “pre-modeling” umbrella (see full list). pre-modeling functions calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictors env_outliers() Integration outliers detection methods environmental space part_random() Data partitioning training testing models part_sblock() Spatial block cross-validation part_sband() Spatial band cross-validation part_senv() Environmental cross-validation plot_res() Plot different resolutions used part_sblock get_block() Transform spatial partition layer spatial properties environmental variables sample_background() Sample background points sample_pseudoabs() Sample pseudo-absence sdm_directory() Create directories saving outputs flexsdm sdm_extract() Extract environmental data based x y coordinates occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Pre-modeling functions","text":"First, install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(terra) #> terra 1.7.55 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project Directory Setup","title":"flexsdm: Overview of Pre-modeling functions","text":"building SDM’s, organizing folders (directories) project save time confusion. project directory main project folder store relevant data results current project. Now, let’s create project directory initial data model results stored. function sdm_directory() can , based types model algorithms want use /types projections like make. First decide computer like store inputs outputs project (main directory) use dir.create() create main directory. Next, specify whether want include folders projections, calibration areas, algorithms, ensembles, thresholds.","code":"my_project <- file.path(file.path(tempdir(), \"flex_sdm_project\")) dir.create(my_project) project_directory <- sdm_directory( main_dir = my_project, projections = NULL, calibration_area = TRUE, algorithm = c(\"fit_max\", \"tune_raf\"), ensemble = c(\"mean\"), threshold = TRUE, return_vector = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-species-occurrence-and-background-data","dir":"Articles","previous_headings":"","what":"Data, species occurrence and background data","title":"flexsdm: Overview of Pre-modeling functions","text":"tutorial, using species occurrences available flexsdm package. “spp” example dataset includes pr_ab column (presence = 1, absence = 0), location columns (x, y). can load “spp” data local R environment using code :","code":"data(\"spp\") spp #> # A tibble: 1,150 × 4 #> species x y pr_ab #> #> 1 sp1 -5541. -145138. 0 #> 2 sp1 -51981. 16322. 0 #> 3 sp1 -269871. 69512. 1 #> 4 sp1 -96261. -32008. 0 #> 5 sp1 269589. -566338. 0 #> 6 sp1 29829. -328468. 0 #> 7 sp1 -152691. 393782. 0 #> 8 sp1 -195081. 253652. 0 #> 9 sp1 -951. -277978. 0 #> 10 sp1 145929. -271498. 0 #> # ℹ 1,140 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"geographic-region","dir":"Articles","previous_headings":"","what":"Geographic region","title":"flexsdm: Overview of Pre-modeling functions","text":"species occurrences located California Floristic Province (far western USA). “regions” dataset can used visualize study area geographic space. points distributed across study area?","code":"regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) try(plot(regions), silent=TRUE) points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"calibration-area","dir":"Articles","previous_headings":"","what":"Calibration area","title":"flexsdm: Overview of Pre-modeling functions","text":"important decision SDM delimit model’s calibration area, geographic space use train model(s). Choice calibration area affects modeling steps, including sampling pseudo-absence background points, performance metrics, geographic patterns habitat suitability. want train SDM using entire extent United States interested geographic distribution environmental controls rare plant species found mountaintops Sierra Nevada, California! Let’s use presence locations one species exercise. calib_area() function offers three methods defining calibration area: buffer, mcp, bmcp, mask. briefly go .","code":"spp1 <- spp %>% dplyr::filter(species == \"sp1\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"buffer","dir":"Articles","previous_headings":"Calibration area","what":"1. Buffer","title":"flexsdm: Overview of Pre-modeling functions","text":"calibration area defined using buffers around presence points. User’s can specify distance around points using “width” argument. buffer width value interpreted m CRS longitude/latitude, map units cases.","code":"crs(regions, proj=TRUE) #> [1] \"+proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs\" ca_1 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"buffer\", width = 40000), crs = crs(regions) ) plot(regions, main = \"Buffer method\") plot(ca_1, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"minimum-convex-polygon","dir":"Articles","previous_headings":"Calibration area","what":"2. Minimum convex polygon","title":"flexsdm: Overview of Pre-modeling functions","text":"minimum convex polygon (mcp) method produces much simpler shape.","code":"ca_2 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"mcp\"), crs = crs(regions) ) plot(regions, main = \"Minimum convex polygon method\") plot(ca_2, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"buffered-minimum-convex-polygon","dir":"Articles","previous_headings":"Calibration area","what":"3. Buffered minimum convex polygon","title":"flexsdm: Overview of Pre-modeling functions","text":"can also create buffer around minimum convex polygon.","code":"ca_3 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 40000), crs = crs(regions) ) plot(regions, main = \"Buffered minimum convex polygon\") plot(ca_3, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"mask","dir":"Articles","previous_headings":"Calibration area","what":"4. Mask","title":"flexsdm: Overview of Pre-modeling functions","text":"mask method allows polygons selected intersect species locations delineate calibration area. useful expect species distributions associated ecologically significant (mapped) ecoregions, interested distributions within political boundaries. use random set polygons named “clusters” illustrate mask method. original polygons left polygons contain points (“mask” calibration area) right.","code":"clusters <- system.file(\"external/clusters.shp\", package = \"flexsdm\") clusters <- terra::vect(clusters) ca_4 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"mask\", clusters, \"clusters\"), crs = crs(regions) ) par(mfrow = c(1, 2)) plot(clusters, main = \"Original polygons\") plot(ca_4, main = \"Polygons with points (mask)\") points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"reducing-collinearity-among-the-predictors","dir":"Articles","previous_headings":"","what":"Reducing collinearity among the predictors","title":"flexsdm: Overview of Pre-modeling functions","text":"Predictor collinearity common issue SDMs, can lead model overfitting inaccurate tests significance predictors (De Marco & Nóbrega, 2018; Dormann et al., 2013).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-predictors","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"Environmental predictors","title":"flexsdm: Overview of Pre-modeling functions","text":"use four climatic variables available flexsdm package: actual evapotranspiration (CFP_1), climatic water deficit (CFP_2), maximum temperature warmest month (CFP_3), minimum temperature coldest month (CFP_4). relationship different environmental variables can visualized pairs() function terra package. Several variables highly correlated (.89 predictors tmx tmn). can correct reduce collinearity? function correct_colinvar() four methods deal collinearity: pearson, vif, pca, fa. method returns 1) raster object (SpatRaster) selected predictors 2) useful outputs relevant method. functions used supplementary tools, predictor selection SDMs complicated ultimately based relationship environment species’ biology. said, functions offer options exploring relationships predictor variables can aid predictor selection process. Let’s look method:","code":"somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") plot(somevar) terra::pairs(somevar)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"pearson-correlation","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"1. Pearson correlation","title":"flexsdm: Overview of Pre-modeling functions","text":"method returns three objects 1) SpatRaster environmental variables correlation given threshold (default 0.7), 2) names variables correlation given threshold “removed” environmental data, 3) correlation matrix environmental variables. However, strongly urge users use information along knowledge specific species-environment relationships select ecologically-relevant predictors SDMs. example, , modeling distribution plant species water-limited Mediterranean-type ecosystem, may want include climatic water deficit (cwd) actual evapotranspiration (aet). Despite highly correlated, variables capture water availability evaporative demand, respectively (Stephenson 1998). Additionally, minimum absolute temperature strongly controls vegetation distributions (Woodward, Lomas, Kelly 2004), select tmn (minimum temperature coldest month) example. references, see:","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"woodward-f--i--m--r--lomas-and-c--k--kelly--2004--global-climate-and-the-distribution-of-plant-biomes--philosophical-transactions-of-the-royal-society-of-london--series-b-biological-sciences-35914651476-","dir":"Articles","previous_headings":"Reducing collinearity among the predictors > 1. Pearson correlation","what":"2. Woodward, F. I., M. R. Lomas, and C. K. Kelly. 2004. Global climate and the distribution of plant biomes. Philosophical transactions of the Royal Society of London. Series B, Biological sciences 359:1465–1476.","title":"flexsdm: Overview of Pre-modeling functions","text":"","code":"pearson_var <- correct_colinvar(somevar, method = c(\"pearson\", th = \"0.7\")) pearson_var$cor_table #> aet cwd tmx tmn #> aet 0.0000000 0.7689893 0.7924813 0.7845401 #> cwd 0.7689893 0.0000000 0.4168956 0.5881831 #> tmx 0.7924813 0.4168956 0.0000000 0.7323259 #> tmn 0.7845401 0.5881831 0.7323259 0.0000000 pearson_var$cor_variables #> $aet #> [1] \"cwd\" \"tmx\" \"tmn\" #> #> $cwd #> [1] \"aet\" #> #> $tmx #> [1] \"aet\" \"tmn\" #> #> $tmn #> [1] \"aet\" \"tmx\" chosen_variables <- somevar[[c('cwd','aet','tmn')]]"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"variance-inflation-factor","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"2. Variance inflation factor","title":"flexsdm: Overview of Pre-modeling functions","text":"method removes predictors variance inflation factor higher chosen threshold. , users can specify threshold (default 10). method retains predictors aet, tmx, tmn removes cwd. output method matches produced pearson method: 1) environmental layer retained variables, 2) list removed variables, 3) correlation matrix variables.","code":"vif_var <- correct_colinvar(somevar, method = c(\"vif\", th = \"10\")) vif_var$env_layer #> class : SpatRaster #> dimensions : 558, 394, 4 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source : somevar.tif #> names : aet, cwd, tmx, tmn #> min values : 0.000, -9.39489, 22.44685, 0.2591429 #> max values : 1357.865, 14.20047, 614.69125, 64.3747588 vif_var$removed_variables #> NULL vif_var$vif_table #> # A tibble: 4 × 2 #> Variables VIF #> #> 1 aet 7.62 #> 2 cwd 3.29 #> 3 tmx 3.95 #> 4 tmn 2.89"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"principal-component-analysis","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"3. Principal component analysis","title":"flexsdm: Overview of Pre-modeling functions","text":"Finally, “pca” method performs principal components analysis predictors returns axis accounts 95% total variance system. method returns 1) SpatRaster object selected environmental variables, 2) matrix coefficients principal components predictors, 3) tibble cumulative variance explained selected principal components.","code":"pca_var <- correct_colinvar(somevar, method = c(\"pca\")) pca_var$env_layer #> class : SpatRaster #> dimensions : 558, 394, 3 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : PC1, PC2, PC3 #> min values : -8.453273, -4.260147, -1.525085 #> max values : 2.827164, 3.337545, 4.342864 pca_var$coefficients #> # A tibble: 4 × 5 #> variable PC1 PC2 PC3 PC4 #> #> 1 aet 0.550 -0.0722 0.296 -0.778 #> 2 cwd 0.450 -0.777 0.103 0.429 #> 3 tmx -0.485 -0.594 -0.450 -0.459 #> 4 tmn -0.511 -0.198 0.836 -0.0241 pca_var$cumulative_variance #> # A tibble: 4 × 2 #> PC cvar #> #> 1 1 0.764 #> 2 2 0.915 #> 3 3 0.979 #> 4 4 1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"factorial-analysis","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"4. Factorial analysis","title":"flexsdm: Overview of Pre-modeling functions","text":"Selecting “fa” method performs factorial analysis reduce dimensionality selects predictor(s) highest correlation axis. outputs method similar produced ‘pca’ method.","code":"fa_var <- correct_colinvar(env_layer = somevar, method = c(\"fa\")) fa_var$env_layer fa_var$number_factors fa_var$removed_variables fa_var$uniqueness fa_var$loadings"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-filtering","dir":"Articles","previous_headings":"","what":"Data filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"Sample bias species occurrence data common issue ecological studies filtering occurrence data can reduce bias. flexsdm provides two functions different types filtering, based geographical environmental “thinning”, randomly removing points dense (oversampling) geographical environmental space. can improve model performance reduce redundancy data.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-filtering","dir":"Articles","previous_headings":"Data filtering","what":"Environmental filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"function occfilt_env(), performs environmental filtering species occurrence data. method basically reduces environmental redundancy data based methods outlined Valera et al. (2014). However, function unique flexsdm, able use number environmental dimensions perform PCA filtering. example, use original environmental data (somevar) occurrence data single species (spp1). filtering occurrences, important row species data unique code (example: idd). function also gives user option specifying number classes used split environmental condition. explore results using 5, 8, 12 bins. Increasing number bins increases number occurrence points retained.","code":"spp1$idd <- 1:nrow(spp1) filt_env5 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 5 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 57 filt_env8 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 8 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 112 filt_env12 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 12 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 173 par(mfrow = c(2, 2)) somevar[[1]] %>% plot(main = \"Original occurrence data\") points(spp1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 5 bins\") points(filt_env5 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 8 bins\") points(filt_env8 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 12 bins\") points(filt_env12 %>% select(x, y))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"geographical-filtering","dir":"Articles","previous_headings":"Data filtering","what":"Geographical filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"Next, look occfilt_geo(), three alternatives determine distance threshold pair points: “moran” determines threshold distance points minimizes spatial autocorrelation occurrence data; “cellsize” filters occurrences based resolution predictors (specified coarser resolution); finally, “determined” allows users manually determine distance threshold.","code":"filt_geo1 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"moran\"), prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Threshold for Moran: 0.1 #> Distance threshold(km): 345.859 #> Number of filtered records: 4 filt_geo2 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"cellsize\", factor = \"3\"), # coarser resolution than the provided raster prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Distance threshold(km): 4.617 #> Number of filtered records: 212 filt_geo3 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"defined\", d = \"30\"), prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Distance threshold(km): 30 #> Number of filtered records: 78 par(mfrow = c(2, 2)) somevar[[1]] %>% plot(main = \"Original occurrence data\") points(spp1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with Moran's I\") points(filt_geo1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with cell size\") points(filt_geo2 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with defined distance (30km)\") points(filt_geo3 %>% select(x, y))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-partitioning","dir":"Articles","previous_headings":"","what":"Data partitioning","title":"flexsdm: Overview of Pre-modeling functions","text":"Data partitioning, splitting data testing training groups, key step building SDMs. flexsdm offers multiple options data partitioning, including part_random(), part_sband(), part_sblock(), part_senv(). Let’s explore methods.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"conventional-data-partitioning-methods-part_random","dir":"Articles","previous_headings":"Data partitioning","what":"1. Conventional data partitioning methods (part_random)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_random() function provides users ability divide species occurrence data based conventional partition methods including k-folds, repeated k-folds, leave-one-cross-validation, bootstrap partitioning. , use “kfold” method 10 folds divide data. results 10 folds occurrence data 25 observations fold.","code":"spp1$pr_ab <- 1 # Add a column with 1 to denote that this is presences only data sp_part1 <- part_random( data = spp1, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) sp_part1$.part %>% table() #> . #> 1 2 3 4 5 6 7 8 9 10 #> 25 25 25 25 25 25 25 25 25 25"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"spatial-band-cross-validation-part_sband","dir":"Articles","previous_headings":"Data partitioning","what":"2. Spatial band cross-validation (part_sband)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_sband() part_sblock() partition data based position geographic space. Geographically structured data partitioning methods especially useful users want evaluate model transferability different regions time periods. function part_sband tests different numbers spatial partitions using latitudinal longitudinal bands selects best number bands given presence, presence-absence, presence-background dataset. procedure based spatial autocorrelation, environmental similarity, number presence/absence records band partition. function’s output includes 1) tibble presence/absence locations assigned partition number, 2) tibble information best partition, 3) SpatRaster showing selected grid.","code":"set.seed(1) sp_part2 <- part_sband( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lat\", # specify bands across different degrees of longitude 'lon' or latitude 'lat'. min_bands = 2, # minimum number of spatial bands to be tested max_bands = 20, # maximum number of spatial bands to be tested n_part = 2, prop = 0.5 ) #> 12 rows were excluded from database because NAs were found #> The following number of bands will be tested: #> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 #> Creating basic raster mask... #> Searching for the optimal number of bands... plot(sp_part2$grid, col = gray.colors(20)) points(sp_part2$part[c(\"x\", \"y\")], col = rainbow(8)[sp_part2$part$.part], cex = 0.9, pch = c(1, 19)[sp_part2$part$pr_ab + 1] )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"spatial-block-cross-validation-part_sblock","dir":"Articles","previous_headings":"Data partitioning","what":"3. Spatial block cross-validation (part_sblock)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_sblock() function similar part_sband() instead bands explores spatial blocks different raster cells sizes returns one best suited input dataset. , can see data divided different “blocks” training testing. However, notice grid partition produced part_sblock different resolution original environmental variables. want map layer properties (.e. resolution, extent, NAs) original environmental variables, apply get_block() function grid resulting part_sblock(). layer can really useful generating pseudo-absence background sample points, explore next section.","code":"sp_part3 <- part_sblock( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, # Minimum value used for multiplying raster resolution and define the finest resolution to be tested max_res_mult = 500, # Maximum value used for multiplying raster resolution and define the coarsest resolution to be tested num_grids = 30, # Number of grid to be tested between min_res_mult X (raster resolution) and max_res_mult X (raster resolution) n_part = 2, # Number of partitions prop = 0.5 # Proportion of points used for testing autocorrelation between groupds (0-1) ) #> 12 rows were excluded from database because NAs were found #> The following grid cell sizes will be tested: #> 18900 | 50834.48 | 82768.97 | 114703.45 | 146637.93 | 178572.41 | 210506.9 | 242441.38 | 274375.86 | 306310.34 | 338244.83 | 370179.31 | 402113.79 | 434048.28 | 465982.76 | 497917.24 | 529851.72 | 561786.21 | 593720.69 | 625655.17 | 657589.66 | 689524.14 | 721458.62 | 753393.1 | 785327.59 | 817262.07 | 849196.55 | 881131.03 | 913065.52 | 945000 #> Creating basic raster mask... #> Searching for the optimal grid size... plot(sp_part3$grid) points(sp_part3$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[sp_part3$part$.part], cex = 0.5, pch = 19 ) terra::res(sp_part3$grid) #> [1] 881131 881131 terra::res(somevar) #> [1] 1890 1890 grid_env <- get_block(env_layer = somevar, best_grid = sp_part3$grid) plot(grid_env) # this is a block layer with the same layer # properties as environmental variables. points(sp_part3$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[sp_part3$part$.part], cex = 0.5, pch = 19 )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-and-spatial-cross-validation-part_senv","dir":"Articles","previous_headings":"Data partitioning","what":"4. Environmental and spatial cross-validation (part_senv)","title":"flexsdm: Overview of Pre-modeling functions","text":"final partitioning function flexsdm part_senv(), explores different numbers environmental partitions based K-means clustering algorithm returns one best-suited particular dataset, considering spatial autocorrelation, environmental similarity, number presence /absence records partition. map shows partitioning based environmental spatial factors.","code":"sp_part4 <- part_senv( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_n_groups = 2, # Minimum number of groups to be tested max_n_groups = 10, # Maximum number of groups to be tested prop = 0.5 # Proportion of points used for testing autocorrelation between groups (0-1) ) #> 12 rows were excluded from database because NAs were found #> The following grid cell sizes will be tested: #> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 #> Searching best partition... plot(regions, col = gray.colors(9)) points(sp_part4$part[c(\"x\", \"y\")], col = hcl.colors(length(unique(sp_part4$part)))[sp_part4$part$.part], cex = 1, pch = 19 )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"background-and-pseudo-absence-sampling","dir":"Articles","previous_headings":"","what":"Background and pseudo-absence sampling","title":"flexsdm: Overview of Pre-modeling functions","text":"Presence-occurrence data quite common ecology researchers may adequate “absence” data species interest. Sometimes building species distribution models, need able generate background pseudo-absence points modeling goals. flexsdm package allows users using sample_background() sample_pseudoabs().","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"sample-background","dir":"Articles","previous_headings":"Background and pseudo-absence sampling","what":"1. Sample background","title":"flexsdm: Overview of Pre-modeling functions","text":"function sample_background() allows slection background sample points based different geographic restrictions sampling methods. , sample set background points based earlier spatial block partitioning using “random” method. Using lapply() case ensures generate background points spatial blocks (n = 2). also specifying want ten times amount background points original occurrences calibration area buffer area around presence points (see section “Calibration area”).","code":"p_data <- sp_part3$part # presence data from spatial block partition example set.seed(10) bg <- lapply(1:2, function(x) { sample_background( data = p_data, x = \"x\", y = \"y\", n = sum(p_data == x) * 10, # number of background points to be sampled method = \"random\", rlayer = grid_env, maskval = x, calibarea = ca_1 # A SpatVector which delimit the calibration area used for a given species ) }) %>% bind_rows() %>% mutate(pr_ab = 0) par(mfrow = c(2, 1)) plot(grid_env, main = \"Presence points\") plot(ca_1, add = TRUE) points(p_data, cex = .7, pch = 19) plot(grid_env, main = \"Background points\") plot(ca_1, add = TRUE) points(bg, cex = .1, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"sample-pseudo-absences","dir":"Articles","previous_headings":"Background and pseudo-absence sampling","what":"2. Sample pseudo-absences","title":"flexsdm: Overview of Pre-modeling functions","text":"Similarly, function sample_pseudoabs allows random pseudo-absence sampling based environmental /geographical constraints. example, specifying method = “env_const” selects pseudo-absences environmentally constrained regions lower suitability values predicted Bioclim model. Additionally, function allows users specify calibration area generate pseudo-absence points. , use buffer area around presence points (ca_1) show might look like. can see, generated pseudo-absence points general vicinity presence points, concentrated areas lower environmental suitability. specific method chosen sampling background /pseudo-absence points vary depending research goals.","code":"set.seed(10) psa <- lapply(1:2, function(x) { sample_pseudoabs( data = p_data, x = \"x\", y = \"y\", n = sum(p_data == x), # number of pseudo-absence points to be sampled method = c(\"env_const\", env = somevar), rlayer = grid_env, maskval = x, calibarea = ca_1 ) }) %>% bind_rows() %>% mutate(pr_ab = 0) #> Extents do not match, raster layers used were croped to minimum extent #> Extents do not match, raster layers used were croped to minimum extent par(mfrow = c(2, 1)) plot(grid_env, main = \"Presence points\") plot(ca_1, add = TRUE) points(p_data, cex = .7, pch = 19) plot(grid_env, main = \"Pseudo-absence points\") plot(ca_1, add = TRUE) points(psa, cex = .7, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"","what":"Extracting environmental values","title":"flexsdm: Overview of Pre-modeling functions","text":"Finally, modeling species geographic distributions, must extract environmental data presences + absences/pseudo-absences/background point locations. function sdm_extract() extracts environmental data values based x y coordinates returns tibble original data + additional columns extracted environmental variables locations. Let’s original presence points (spp1) background locations (bg). #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"all_points <- bind_rows(spp1 %>% dplyr::select(-idd), bg) ex_spp <- sdm_extract( data = all_points, x = \"x\", y = \"y\", env_layer = somevar, # Raster with environmental variables variables = NULL, # Vector with the variable names of predictor variables Usage variables. = c(\"aet\", \"cwd\", \"tmin\"). If no variable is specified, function will return data for all layers. filter_na = TRUE ) ex_spp"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Modeling functions","text":"Species distribution modeling (SDM) become standard tool multiple research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM area active theoretical methodological research. flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situations (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn second set functions flexsdm package fall “modeling” umbrella. functions designed construct validate different types models can grouped fit_* , tune_* , esm_* family functions. addition function perform ensemble modeling. fit_* functions construct validate models default hyper-parameter values. tune_* functions construct validate models searching best combination hyper-parameter values, esm_ functions can used constructing validating Ensemble Small Models. Finally, fit_ensemble() function fitting validating ensemble models. functions model construction validation: fit_* functions family fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forest models fit_svm() Fit validate Support Vector Machine models tune_* functions family tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters tune_max() Fit validate Maximum Entropy models exploration hyper-parameters tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters model ensemble fit_ensemble() Fit validate ensemble models different ensemble methods esm_* functions family esm_gam() Fit validate Generalized Additive Models Ensemble Small Model approach esm_gau() Fit validate Gaussian Process models Models Ensemble Small Model approach esm_gbm() Fit validate Generalized Boosted Regression models Ensemble Small Model approach esm_glm() Fit validate Generalized Linear Models Ensemble Small Model approach esm_max() Fit validate Maximum Entropy models Ensemble Small Model approach esm_net() Fit validate Neural Networks models Ensemble Small Model approach esm_svm() Fit validate Support Vector Machine models Ensemble Small Model approach","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Modeling functions","text":"First, install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') require(flexsdm) #> Loading required package: flexsdm require(terra) #> Loading required package: terra #> terra 1.7.55 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin require(dplyr) #> Loading required package: dplyr #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:terra': #> #> intersect, union #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project directory setup","title":"flexsdm: Overview of Modeling functions","text":"Decide computer like store inputs outputs project (main directory). Use existing one use dir.create() create main directory. specify whether include folders projections, calibration areas, algorithms, ensembles, thresholds. details see Vignette 01_pre_modeling","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"data-species-occurrence-and-background-data","dir":"Articles","previous_headings":"","what":"Data, species occurrence and background data","title":"flexsdm: Overview of Modeling functions","text":"tutorial, using species occurrences environmental data available flexsdm package. “abies” example dataset includes pr_ab column (presence = 1, absence = 0), location columns (x, y) environmental data. can load “abies” data local R environment using code : (EXAMPLE LOOKS LITTLE STRANGE ALSO USING BACKGROUND DATA, ABIES DATASET CLEARLY ABSENCES…) want replace abies dataset data, make sure dataset contains environmental conditions related presence-absence data. use pre-modeling family function k-fold partition method (used cross-validation). partition method number folds replications must presence-absence background points datasets. Now, abies2 object new column called “.part” 5 k-folds (1, 2, 3, 4, 5), indicating partition record (row) member . Next, apply partition method number folds environmental conditions background points. backg2 object new column called “.part” 5 k-folds (1, 2, 3, 4, 5).","code":"data(\"abies\") data(\"backg\") dplyr::glimpse(abies) #> Rows: 1,400 #> Columns: 13 #> $ id 715, 5680, 7907, 1850, 1702, 10036, 12384, 6513, 9884, 8651, … #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… #> $ x -95417.134, 98986.536, 121474.257, -39976.221, 111372.261, -2… #> $ y 314240.13, -159415.18, -99463.44, -17456.11, -91404.05, 39222… #> $ aet 323.1133, 447.5567, 182.2833, 372.3867, 209.4567, 308.3000, 5… #> $ cwd 546.1400, 815.4033, 271.1800, 946.2933, 398.5500, 534.9533, 3… #> $ tmin 1.2433, 9.4267, -4.9500, 8.7767, -4.0333, 4.6600, 4.3800, 4.9… #> $ ppt_djf 62.7257, 129.6406, 150.7003, 116.0236, 164.9327, 166.2220, 48… #> $ ppt_jja 17.7941, 6.4317, 11.2294, 2.7020, 9.2686, 16.5310, 41.2494, 8… #> $ pH 5.773341, 5.600000, 0.000000, 6.411796, 0.000000, 5.700000, 5… #> $ awc 0.10837019, 0.16000000, 0.00000000, 0.09719457, 0.00000000, 0… #> $ depth 152.000000, 201.000000, 0.000000, 59.759930, 0.000000, 112.99… #> $ landform 7, 11, 15, 14, 15, 15, 7, 15, 4, 10, 6, 10, 10, 15, 10, 11, 1… dplyr::glimpse(backg) #> Rows: 5,000 #> Columns: 13 #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … #> $ x 160779.16, 36849.16, -240170.84, -152420.84, -193190.84, … #> $ y -449968.33, 24151.67, 90031.67, -143518.33, 24151.67, 223… #> $ aet 280.4567, 259.7800, 400.1767, 367.4833, 397.3667, 385.263… #> $ cwd 1137.2433, 381.5367, 699.6500, 843.4467, 842.3833, 637.35… #> $ tmin 13.5100, -3.1733, 8.6800, 9.0133, 8.9700, 4.9333, 6.2933,… #> $ ppt_djf 71.2741, 171.4537, 285.0893, 72.0309, 125.2467, 226.1534,… #> $ ppt_jja 1.1920, 17.5193, 5.0158, 1.2047, 1.9778, 8.1554, 18.4182,… #> $ pH 0.0000000, 0.2122687, 5.7222223, 7.5350823, 6.1963525, 5.… #> $ awc 0.000000000, 0.003473487, 0.080370426, 0.170000002, 0.131… #> $ depth 0.00000, 201.00000, 50.07409, 154.39426, 122.39575, 56.17… #> $ percent_clay 0.0000000, 0.4438345, 18.4111176, 46.9751244, 37.1873169,… #> $ landform 13, 10, 6, 6, 10, 14, 8, 14, 6, 7, 11, 14, 14, 10, 6, 6, … abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) dplyr::glimpse(abies2) #> Rows: 1,400 #> Columns: 14 #> $ id 715, 5680, 7907, 1850, 1702, 10036, 12384, 6513, 9884, 8651, … #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… #> $ x -95417.134, 98986.536, 121474.257, -39976.221, 111372.261, -2… #> $ y 314240.13, -159415.18, -99463.44, -17456.11, -91404.05, 39222… #> $ aet 323.1133, 447.5567, 182.2833, 372.3867, 209.4567, 308.3000, 5… #> $ cwd 546.1400, 815.4033, 271.1800, 946.2933, 398.5500, 534.9533, 3… #> $ tmin 1.2433, 9.4267, -4.9500, 8.7767, -4.0333, 4.6600, 4.3800, 4.9… #> $ ppt_djf 62.7257, 129.6406, 150.7003, 116.0236, 164.9327, 166.2220, 48… #> $ ppt_jja 17.7941, 6.4317, 11.2294, 2.7020, 9.2686, 16.5310, 41.2494, 8… #> $ pH 5.773341, 5.600000, 0.000000, 6.411796, 0.000000, 5.700000, 5… #> $ awc 0.10837019, 0.16000000, 0.00000000, 0.09719457, 0.00000000, 0… #> $ depth 152.000000, 201.000000, 0.000000, 59.759930, 0.000000, 112.99… #> $ landform 7, 11, 15, 14, 15, 15, 7, 15, 4, 10, 6, 10, 10, 15, 10, 11, 1… #> $ .part 2, 2, 3, 4, 2, 1, 5, 5, 2, 2, 4, 4, 1, 5, 4, 5, 5, 5, 1, 3, 1… backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) dplyr::glimpse(backg2) #> Rows: 5,000 #> Columns: 14 #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … #> $ x 160779.16, 36849.16, -240170.84, -152420.84, -193190.84, … #> $ y -449968.33, 24151.67, 90031.67, -143518.33, 24151.67, 223… #> $ aet 280.4567, 259.7800, 400.1767, 367.4833, 397.3667, 385.263… #> $ cwd 1137.2433, 381.5367, 699.6500, 843.4467, 842.3833, 637.35… #> $ tmin 13.5100, -3.1733, 8.6800, 9.0133, 8.9700, 4.9333, 6.2933,… #> $ ppt_djf 71.2741, 171.4537, 285.0893, 72.0309, 125.2467, 226.1534,… #> $ ppt_jja 1.1920, 17.5193, 5.0158, 1.2047, 1.9778, 8.1554, 18.4182,… #> $ pH 0.0000000, 0.2122687, 5.7222223, 7.5350823, 6.1963525, 5.… #> $ awc 0.000000000, 0.003473487, 0.080370426, 0.170000002, 0.131… #> $ depth 0.00000, 201.00000, 50.07409, 154.39426, 122.39575, 56.17… #> $ percent_clay 0.0000000, 0.4438345, 18.4111176, 46.9751244, 37.1873169,… #> $ landform 13, 10, 6, 6, 10, 14, 8, 14, 6, 7, 11, 14, 14, 10, 6, 6, … #> $ .part 2, 3, 4, 4, 1, 4, 5, 4, 3, 1, 5, 1, 4, 2, 5, 4, 2, 5, 1, …"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"fit-and-validate-models","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"1. Fit and validate models","title":"flexsdm: Overview of Modeling functions","text":"fit validate models: . maximum entropy model default hyper-parameter values (flexsdm::fit_max) II. random forest model exploration hyper-parameters (flexsdm::tune_raf). . Maximum Entropy models default hyper-parameter values. function returns list object following elements: model: “MaxEnt” class object. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: performance metric (see sdm_eval). metrics threshold dependent calculated based threshold specified argument. can see selected threshold values. Predicted suitability test partition (row) based best model. database used fit_ensemble. II- Random forest models exploration hyper-parameters. First, create data.frame provides hyper-parameters values tested. recommended generate data.frame. Hyper-parameter needed tuning ‘mtry’. maximum mtry must equal total number predictors. use data object abies2, k-fold partition method: Let’s see output object contains. function returns list object following elements: model: “randomForest” class object. object can used see formula details, basic summary o fthe model, predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: performance metric (see sdm_eval). metrics threshold dependent calculated based threshold specified argument. can see selected threshold values. Predicted suitability test partition (row) based best model. database used fit_ensemble. model objects can used flexsdm::fit_ensemble().","code":"max_t1 <- fit_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) #> Formula used for model fitting: #> ~aet + ppt_jja + pH + awc + depth + I(aet^2) + I(ppt_jja^2) + I(pH^2) + I(awc^2) + I(depth^2) + hinge(aet) + hinge(ppt_jja) + hinge(pH) + hinge(awc) + hinge(depth) + ppt_jja:aet + pH:aet + awc:aet + depth:aet + pH:ppt_jja + awc:ppt_jja + depth:ppt_jja + awc:pH + depth:pH + depth:awc + categorical(landform) - 1 #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 names(max_t1) #> [1] \"model\" \"predictors\" \"performance\" \"data_ens\" options(max.print = 20) max_t1$model #> #> Call: glmnet::glmnet(x = mm, y = as.factor(p), family = \"binomial\", weights = weights, lambda = 10^(seq(4, 0, length.out = 200)) * sum(reg)/length(reg) * sum(p)/sum(weights), standardize = F, penalty.factor = reg) #> #> Df %Dev Lambda #> 1 0 0.00 21.3700 #> 2 0 0.00 20.4100 #> 3 0 0.00 19.4800 #> 4 0 0.00 18.6000 #> 5 0 0.00 17.7600 #> 6 0 0.00 16.9600 #> [ reached getOption(\"max.print\") -- omitted 194 rows ] max_t1$predictors #> # A tibble: 1 × 6 #> c1 c2 c3 c4 c5 f #> #> 1 aet ppt_jja pH awc depth landform max_t1$performance #> # A tibble: 3 × 25 #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> #> 1 max equal_sens_sp… 0.573 700 700 0.669 0.0288 0.669 #> 2 max max_sens_spec 0.416 700 700 0.877 0.0609 0.56 #> 3 max max_sorensen 0.335 700 700 0.951 0.0362 0.457 #> # ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd , #> # JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd , #> # OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean , #> # AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean , #> # IMAE_sd max_t1$data_ens #> # A tibble: 1,400 × 5 #> rnames replicates part pr_ab pred #> #> 1 6 .part 1 0 0.656 #> 2 13 .part 1 0 0.0405 #> 3 19 .part 1 0 0.779 #> 4 21 .part 1 0 0.407 #> 5 25 .part 1 0 0.851 #> 6 27 .part 1 0 0.706 #> 7 31 .part 1 0 0.395 #> 8 33 .part 1 0 0.0456 #> 9 35 .part 1 0 0.412 #> 10 36 .part 1 0 0.130 #> # ℹ 1,390 more rows tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Tuning model... #> Replica number: 1/1 #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 names(rf_t) #> [1] \"model\" \"predictors\" \"performance\" #> [4] \"hyper_performance\" \"data_ens\" rf_t$model #> #> Call: #> randomForest(formula = formula1, data = data, mtry = mtry, ntree = 500, importance = FALSE, ) #> Type of random forest: classification #> Number of trees: 500 #> No. of variables tried at each split: 1 #> #> OOB estimate of error rate: 11.64% #> Confusion matrix: #> 0 1 class.error #> 0 588 112 0.16000000 #> 1 51 649 0.07285714 rf_t$predictors #> # A tibble: 1 × 9 #> c1 c2 c3 c4 c5 c6 c7 c8 f #> #> 1 aet cwd tmin ppt_djf ppt_jja pH awc depth landform rf_t$performance #> # A tibble: 1 × 26 #> mtry model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 raf max_sens_spec 0.606 700 700 0.93 0.0333 #> # ℹ 18 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd rf_t$data_ens #> # A tibble: 1,400 × 5 #> rnames replicates part pr_ab pred #> #> 1 6 .part 1 0 0.27 #> 2 13 .part 1 0 0.032 #> 3 19 .part 1 0 0.09 #> 4 21 .part 1 0 0.09 #> 5 25 .part 1 0 0.24 #> 6 27 .part 1 0 0.27 #> 7 31 .part 1 0 0.272 #> 8 33 .part 1 0 0.02 #> 9 35 .part 1 0 0.156 #> 10 36 .part 1 0 0.018 #> # ℹ 1,390 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"model-ensemble","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"2. Model Ensemble","title":"flexsdm: Overview of Modeling functions","text":"example fit validate ensemble model using two model objects just created.","code":"# Fit and validate ensemble model an_ensemble <- fit_ensemble( models = list(max_t1, rf_t), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% # Outputs names(an_ensemble) #> [1] \"models\" \"thr_metric\" \"predictors\" \"performance\" an_ensemble$thr_metric #> [1] \"max_sens_spec\" \"TSS_mean\" an_ensemble$predictors #> # A tibble: 2 × 9 #> c1 c2 c3 c4 c5 f c6 c7 c8 #> #> 1 aet ppt_jja pH awc depth landform NA NA NA #> 2 aet cwd tmin ppt_djf ppt_jja landform pH awc depth an_ensemble$performance #> # A tibble: 7 × 25 #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> #> 1 meansup equal_sens_… 0.596 700 700 0.879 0.0220 0.88 #> 2 meansup lpt 0.05 700 700 1 0 0.414 #> 3 meansup max_fpb 0.568 700 700 0.931 0.0322 0.86 #> 4 meansup max_jaccard 0.568 700 700 0.931 0.0322 0.86 #> 5 meansup max_sens_sp… 0.568 700 700 0.93 0.0333 0.861 #> 6 meansup max_sorensen 0.568 700 700 0.931 0.0322 0.86 #> 7 meansup sensitivity 0.55 700 700 0.9 0 0.861 #> # ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd , #> # JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd , #> # OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean , #> # AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean , #> # IMAE_sd "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"fit-and-validate-models-with-ensemble-of-small-model-approach","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"3. Fit and validate models with Ensemble of Small Model approach","title":"flexsdm: Overview of Modeling functions","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers’ D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic applied species occurrences. detail see Breiner et al. (2015, 2018) can use different methods flexsdm::part_random function according data. See part_random details. function constructs Generalized Additive Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018). function returns list object following elements: esm_model: list “GAM” class object bivariate model. object can used predicting using ESM approachwith sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metrics calculated based threshold specified argument. Now, test rep_kfold partition method. method ‘folds’ refers number partitions data partitioning ‘replicate’ refers number replicates. assume values >=1. use new rep_kfold partition gam model Test random bootstrap partitioning. method ‘replicate’ refers number replicates (assumes value >=1), ‘proportion’ refers proportion occurrences used model fitting (assumes value >0 <=1). method can configure proportion training testing data according species occurrences. example, proportion=‘0.7’ indicates 70% data used model training, 30% used model testing. method, function return .partX columns “train” “test” words entries. Use new rep_kfold partition gam model #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"data(\"abies\") library(dplyr) # Create a smaller subset of occurrences set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() # Using k-fold partition method for model cross validation abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 #> # A tibble: 20 × 14 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 3 more variables: depth , landform , .part # We set the model without threshold specification and with the kfold created above esm_gam_t1 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k names(esm_gam_t1) #> NULL options(max.print = 10) # If you don't want to see printed all the output esm_gam_t1$esm_model #> NULL esm_gam_t1$predictors #> NULL esm_gam_t1$performance #> NULL # Remove the previous k-fold partition abies2 <- abies2 %>% select(-starts_with(\".\")) # Test with rep_kfold partition using 3 folds and 5 replicates set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 #> # A tibble: 20 × 18 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 7 more variables: depth , landform , .part1 , .part2 , #> # .part3 , .part4 , .part5 esm_gam_t2 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k # Remove the previous k-fold partition abies2 <- abies2 %>% select(-starts_with(\".\")) # Test with bootstrap partition using 10 replicates set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 #> # A tibble: 20 × 23 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 12 more variables: depth , landform , .part1 , .part2 , #> # .part3 , .part4 , .part5 , .part6 , .part7 , #> # .part8 , .part9 , .part10 esm_gam_t3 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Post-modeling functions","text":"Species distribution modeling (SDM) become standard tool multiple research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM area active theoretical methodological research flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situations (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn post-modeling set functions flexsdm package. functions designed aim assisting flexsdm user predicting, evaluating, correcting SDMs. functions created model prediction, evaluation correction: Post-modeling functions sdm_predict() Spatial predictions individual ensemble models sdm_summarize() Merge model performance tables interp() Raster interpolation SDM predictions two time periods extra_eval() Measure model extrapolation extra_correct() Constraint suitability values given extrapolation value msdm_priori() Create spatial predictor variables reduce overprediction species distribution models msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Post-modeling functions","text":"Install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(terra) #> terra 1.7.55 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project directory setup","title":"flexsdm: Overview of Post-modeling functions","text":"Decide computer like store inputs outputs project (main directory). Use existing one use dir.create() create main directory. specify whether include folders projections, calibration areas, algorithms, ensembles, thresholds. details see Vignette 01_pre_modeling","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"species-occurrence-presenceabsense-and-environmental-data","dir":"Articles","previous_headings":"","what":"Species occurrence, presence/absense and environmental data","title":"flexsdm: Overview of Post-modeling functions","text":"tutorial, using “spp” example dataset includes pr_ab (presence = 1, absence = 0), location (x, y) data 3 plant species found California raster environmental data. can load data local R environment using code : want replace spp dataset data, make sure contains coordinates, species presence = 1 / absence = 0 raster environmental data. First, prepare occurrences, environmental conditions partitions Next, fit different models","code":"data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select only one species some_sp <- spp %>% filter(species == \"sp3\") # Extract the environmental condition from the rsater for sp3 some_sp <- sdm_extract( data = some_sp, x = \"x\", y = \"y\", env_layer = somevar ) #> 4 rows were excluded from database because NAs were found # Make a partition defining the method, folds and replicates some_sp <- part_random( data = some_sp, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) # Fit and validate a [generalized linear model](https://sjevelazco.github.io/flexsdm/reference/fit_glm.html) mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 + I(CFP_1^2) + I(CFP_2^2) + I(CFP_3^2) + I(CFP_4^2) #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 # Fit and validate a [random forest model](https://sjevelazco.github.io/flexsdm/reference/fit_raf.html) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 # Fit and validate a [general boosted regression model](https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"fit-and-ensemble-the-models-above","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"1. Fit and ensemble the models above","title":"flexsdm: Overview of Post-modeling functions","text":"can also fit model using Ensembles Small Models approach. example, fit without threshold specification k-fold cross-validation. Finally, can predict different kinds models data (some_sp). sdm_predict can used predicting one models fitted fit_ tune_ functions. output list SpatRaster continuous /binary predictions.","code":"# Fit and ensemble the models. To choose the arguments that best fit your own data, see all options available in [fit_ensemble](https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html) mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% msmall <- esm_gam( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", thr = NULL ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% # Predict using a single model, which is an mglm model in this example, # and a threshold type for binary predictions ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting individual models # Inspect the object. It's a SpatRaster with 2 layers: glm, max_fpb # These are the continuous and binary prediction from the model ind_p #> $glm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : glm, max_fpb #> min values : 2.220446e-16, TRUE #> max values : 1.000000e+00, TRUE # Plot to see this layers ind_p_rst <- terra::rast(ind_p) plot(ind_p_rst) # Predict a list of more than one model, specifying a threshold type list_p <- sdm_predict( models = list(mglm, mraf, mgbm), pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting list of individual models # Inspect the object. It's a list with 3 SpatRaster, one for each model, # each of which contains 2 layers, for the continuous and thresholded binary predictions. list_p #> $glm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : glm, max_fpb #> min values : 2.220446e-16, TRUE #> max values : 1.000000e+00, TRUE #> #> $raf #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : raf, max_fpb #> min values : 0, FALSE #> max values : 1, TRUE #> #> $gbm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : gbm, max_fpb #> min values : 0.0002949323, FALSE #> max values : 0.9986537352, TRUE # Plot to see this layers list_p_rst <- terra::rast(list_p) plot(list_p_rst) # Predict an ensemble model. This is only possible using one fit_ensemble object. It's not possible to include e.g., list(fit_ensemble1, fit_ensemble2) in the model argument. ensemble_p <- sdm_predict( models = mensemble, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting ensembles # Inspect the object. It's a SpatRaster with 2 layers, mensemble and max_fpb # These are the continuous and binary prediction from the ensemble model ensemble_p #> $meansup #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : meansup, max_fpb #> min values : 0.0001474662, FALSE #> max values : 0.9972242977, TRUE # Plot to see this layers ensemble_p_rst <- terra::rast(ensemble_p) plot(ensemble_p_rst) # Predict an ensembles of small models. small_p <- sdm_predict( models = msmall, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting ensemble of small models # Inspect the object It's a SpatRaster with 2 layers, msmall and max_fpb # These are the continuous and binary prediction from the ESM model small_p #> $esm_gam #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : esm_gam, max_fpb #> min values : 1.961046e-05, FALSE #> max values : 8.644150e-01, TRUE # Plot to see this layers small_p_rst <- terra::rast(small_p) plot(small_p_rst)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"merge-model-performance-tables","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"2. Merge model performance tables","title":"flexsdm: Overview of Post-modeling functions","text":"function combines model performance tables input models. function requires list one models fitted fit_ tune_ functions, fit_ensemble output, esm_ family function output. Build models use performance table merge Finally, merge three sdm performance tables.","code":"# Load abies data data(abies) abies #> # A tibble: 1,400 × 13 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 715 0 -95417. 314240. 323. 546. 1.24 62.7 17.8 5.77 0.108 #> 2 5680 0 98987. -159415. 448. 815. 9.43 130. 6.43 5.60 0.160 #> 3 7907 0 121474. -99463. 182. 271. -4.95 151. 11.2 0 0 #> 4 1850 0 -39976. -17456. 372. 946. 8.78 116. 2.70 6.41 0.0972 #> 5 1702 0 111372. -91404. 209. 399. -4.03 165. 9.27 0 0 #> 6 10036 0 -255715. 392229. 308. 535. 4.66 166. 16.5 5.70 0.0777 #> 7 12384 0 -311765. 380213. 568. 352. 4.38 480. 41.2 5.80 0.110 #> 8 6513 0 111360. -120229. 327. 633. 4.93 163. 8.91 1.18 0.0116 #> 9 9884 0 -284326. 442136. 377. 446. 3.99 296. 16.8 5.96 0.0900 #> 10 8651 0 137640. -110538. 215. 265. -4.62 180. 9.57 0 0 #> # ℹ 1,390 more rows #> # ℹ 2 more variables: depth , landform # We will partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # Build a generalized additive model, and a generalized linear model using fit_ family functions gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) #> Formula used for model fitting: #> pr_ab ~ s(aet, k = -1) + s(ppt_jja, k = -1) + s(pH, k = -1) + s(awc, k = -1) + s(depth, k = -1) + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) #> Formula used for model fitting: #> pr_ab ~ aet + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 # Build a tuned model using tune_ family functions # Prepare the grid object to use in grid argument tune_grid <- expand.grid(mtry = seq(1, 7, 1)) # Build a tuned random forest model rf_t1 <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Tuning model... #> Replica number: 1/1 #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 rf_t1$performance #> # A tibble: 1 × 26 #> mtry model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 3 raf max_sens_spec 0.638 700 700 0.916 0.0430 #> # ℹ 18 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd # Note in rf_t1$performance the best model was the one with mtry = 3 and threshold = 'max_sens_spec' merge_df <- sdm_summarize(models = list(gam_t1, glm_t1, rf_t1)) merge_df #> # A tibble: 7 × 27 #> model_ID model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 gam equal_sens_sp… 0.540 700 700 0.737 0.0366 #> 2 1 gam max_sens_spec 0.530 700 700 0.75 0.0460 #> 3 1 gam max_sorensen 0.359 700 700 0.863 0.0601 #> 4 2 glm equal_sens_sp… 0.523 700 700 0.663 0.0583 #> 5 2 glm max_sens_spec 0.463 700 700 0.803 0.111 #> 6 2 glm max_sorensen 0.356 700 700 0.876 0.0436 #> 7 3 raf max_sens_spec 0.638 700 700 0.916 0.0430 #> # ℹ 19 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd , mtry "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"raster-interpolation-between-two-time-periods","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"3. Raster interpolation between two time periods","title":"flexsdm: Overview of Post-modeling functions","text":"function useful calculating projected suitability values two time periods simple interpolation using two raster objects suitability values. useful , example, SDM projected future past time period (using maps predictor variables, climate variables, different time periods), user requires estimate suitability intermediate time periods. example may needed input types models risk analysis. function returns SpatRaster dir_save used NULL. However, user specifies dir_save, function save interpolated raster files given directory. function create object interpolated values n annual layers ranging initial final year. resolution dimensions result object remain initial final maps. example, nine annual (2011-2019) interpolated maps generated initial (2010) final (2020) prediction maps. cell starting value 1 ending value 0 changed increments (1-0)/((2020-2010)-1), given interpolated values 0.9, 0.8, 0.7…0.1","code":"library(terra) library(dplyr) f <- system.file(\"external/suit_time_step.tif\", package = \"flexsdm\") abma <- terra::rast(f) plot(abma) int <- interp( r1 = abma[[1]], # set the raster of initial year r2 = abma[[2]], # set the raster of final year y1 = 2010, # set the numeric initial year y2 = 2020, # set the numeric final year rastername = \"Abies\", dir_save = NULL ) # Layers in the abma SpatRaster names(abma) #> [1] \"current\" \"future\" # plot(abma) # Layers in the int SpatRaster int #> class : SpatRaster #> dimensions : 558, 394, 11 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : suit_time_step #> suit_time_step #> suit_time_step #> ... #> names : Abies_2010, Abies_2011, Abies_2012, Abies_2013, Abies_2014, Abies_2015, ... #> min values : 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, ... #> max values : 0.9756107, 0.9606077, 0.9504615, 0.9440073, 0.9442941, 0.9463548, ... plot(int)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"measure-model-extrapolation","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"4. Measure model extrapolation","title":"flexsdm: Overview of Post-modeling functions","text":"function measures extent model extrapolation comparing data used modeling calibration area model projection using approach proposed Velazco et al., prep. accessible area defines calibration area used extract environmental conditions","code":"library(dplyr) library(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Inspect the unique values for species spp$species %>% unique() #> [1] \"sp1\" \"sp2\" \"sp3\" # Subset spp data into a tibble only with coordinates for sp3 and pr_ab == 1 sp <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) %>% dplyr::select(x, y) # Define accessible area for sp3 based on a buffer with around each point that is related to dispersal ability or some other ecological criterion ca <- calib_area( sp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 30000), crs = crs(somevar) ) # Plot the SpatRaster, occurrences and accessible area plot(somevar$CFP_1) points(sp) plot(ca, add = T) somevar_ca <- somevar %>% crop(., ca) %>% mask(., ca) # Plot environmental conditions of the calibration area plot(somevar_ca) xp <- extra_eval( training_data = somevar_ca, projection_data = somevar, n_cores = 1, aggreg_factor = 3 ) # Plot the SpatRaster object with the extrapolation values measured in percentage plot(xp)"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"create-spatial-predictor-variables-to-reduce-overprediction-of-species-distribution-models","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"6. Create spatial predictor variables to reduce overprediction of species distribution models","title":"flexsdm: Overview of Post-modeling functions","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models. function returns SpatRaster object, used together environmental variables construct species distribution models. ‘xy’ approach creates single pair raster layers can used species share study region. Otherwise, ‘cml’, ‘min’, ‘ker’ create species-specific raster layer. Next, use different methods according data.","code":"library(dplyr) library(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select the presences of one species (sp3) occ <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) # Select a raster layer to be used as a basic raster a_variable <- somevar[[1]] plot(a_variable) points(occ %>% dplyr::select(x, y)) # Use xy method m_xy <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"xy\", env_layer = a_variable ) plot(m_xy) # Explore the object. This method assumes that spatial structure can partially explain species distribution (Bahn & Mcgill, 2007). Therefore, the result are two raster layers containing the latitude and longitude of pixels, respectively. This method could be used for all species set that share the same study area region. m_xy #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : msdm_lon, msdm_lat #> min values : -370850.8, -601978.3 #> max values : 368139.2, 448861.7 m_cml <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"cml\", env_layer = a_variable ) plot(m_cml) # Explore the object. This method assumes that pixels closer to presences are likely included in species distributions. The results is a raster layer containing the sum of euclidean geographic distances from each pixel to all occurrences of a species. m_cml #> class : SpatRaster #> dimensions : 558, 394, 1 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varname : somevar #> name : msdm_cml #> min value : 0 #> max value : 1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"methods-to-correct-overprediction-of-species-distribution-models-based-on-occurrences-and-suitability-patterns","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"7. Methods to correct overprediction of species distribution models based on occurrences and suitability patterns","title":"flexsdm: Overview of Post-modeling functions","text":"methods designed reduce overprediction species distribution models based posteriori method (see Mendes et al 2020), .e., combination patterns species occurrences predicted suitability. First, prepare data Next, fit predict model Next, let’s predict model plot map Finally, perform correction avoid models overpredictions. #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"library(dplyr) library(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Prepare data for modeling a species set.seed(10) occ <- spp %>% dplyr::filter(species == \"sp2\") %>% # filter using only sp2 sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) %>% # extract variables values from the raster layer part_random(., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) # add columns with partition #> 6 rows were excluded from database because NAs were found m_glm <- fit_glm( data = occ, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = \"equal_sens_spec\", ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 + I(CFP_1^2) + I(CFP_2^2) + I(CFP_3^2) + I(CFP_4^2) #> Replica number: 1/1 #> Partition number: 1/10 #> Partition number: 2/10 #> Partition number: 3/10 #> Partition number: 4/10 #> Partition number: 5/10 #> Partition number: 6/10 #> Partition number: 7/10 #> Partition number: 8/10 #> Partition number: 9/10 #> Partition number: 10/10 # Predict this model m_pred <- sdm_predict(models = m_glm, pred = somevar, thr = NULL, con_thr = FALSE) #> Predicting individual models # Predicting individual models plot(m_pred[[1]]) # Using mcp method. The Minimum Convex Polygon (mcp) method excludes from SDMs climate suitable pixels that do not intercept a minimum convex polygon, with interior angles smaller than 180, enclosing all occurrences of a species. m_mcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"mcp\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_mcp) # Using bmcp method. The Buffered Minimum Convex Polygon (bmcp) method is similar to the 'mcp' except by the inclusion of a buffer zone surrounding minimum convex polygons. m_bmcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"bmcp\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = 30000, crs = crs(m_pred[[1]]) ) plot(m_bmcp) # Using obr method. The Occurrences Based Restriction (obr) method assumes that suitable patches intercepting species occurrences are more likely a part of species distributions than suitable patches that do not intercept any occurrence. m_obr <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"obr\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_obr) # Using pres method. The only occurrences based restriction (pres) method only retains those pixels in suitability patches intercepting occurrences. m_pres <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"pres\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_pres) # Using lq method. The Lower Quantile (lq) method works whenever a suitable pixel is within a k patch, i.e., not within this lower quartile, the suitability of the pixel is reduced to zero. This means that 75% of k patches were withdrawn from the model. m_lq <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"lq\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_lq)"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"study-species-overview-of-methods","dir":"Articles","previous_headings":"Example of full modeling process","what":"Study species & overview of methods","title":"flexsdm: Red Fir example","text":", used flexsdm package model current distribution California red fir (Abies magnifica). Red fir high-elevation conifer species ’s geographic range extends Sierra Nevada California, USA, southern portion Cascade Range Oregon. species, used presence data compiled several public datasets curated natural resources agencies. built distribution models using four hydro-climatic variables: actual evapotranspiration, climatic water deficit, maximum temperature warmest month, minimum temperature coldest month. variables resampled (aggregated) 1890 m spatial resolution improve processing time.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"delimit-of-a-calibration-area","dir":"Articles","previous_headings":"Example of full modeling process","what":"Delimit of a calibration area","title":"flexsdm: Red Fir example","text":"Delimiting calibration area (aka accessible area) essential step SDMs methodological theoretical terms. calibration area affect several characteristics SDM like range environmental variables, number absences, distribution background points pseudo-absences, unfortunately, performance metrics like AUC TSS. several ways delimit calibration area. calib_area(). used method calibration area delimited 100-km buffer around presences (shown figure ).","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies_p <- abies %>% select(x, y, pr_ab) %>% filter(pr_ab == 1) # filter only for presence locations ca <- calib_area( data = abies_p, x = 'x', y = 'y', method = c('buffer', width = 100000), crs = crs(somevar) ) # create a calibration area with 100 km buffer around occurrence points # visualize the species occurrences layer1 <- somevar[[1]] layer1[!is.na(layer1)] <- 1 plot(layer1, col=\"gray80\", legend=FALSE, axes=FALSE) plot(crop(ca, layer1), add=TRUE) points(abies_p[,c(\"x\", \"y\")], col = \"#00000480\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"occurrence-filtering","dir":"Articles","previous_headings":"Example of full modeling process","what":"Occurrence filtering","title":"flexsdm: Red Fir example","text":"Sample bias species occurrence data long recognized issue SDM. However, environmental filtering observation data can improve model predictions reducing redundancy environmental (e.g. climatic) hyper-space (Varela et al. 2014). use function occfilt_env() thin red fir occurrences based environmental space. function unique flexsdm, contrast packages able use number environmental dimensions perform PCA filtering. Next apply environmental occurrence filtering using 8 bins display resulting filtered occurrence data","code":"abies_p$id <- 1:nrow(abies_p) # adding unique id to each row abies_pf <- abies_p %>% occfilt_env( data = ., x = \"x\", y = \"y\", id = \"id\", nbins = 8, env_layer = somevar ) %>% left_join(abies_p, by = c(\"id\", \"x\", \"y\")) #> Extracting values from raster ... #> 27 records were removed because they have NAs for some variables #> Number of unfiltered records: 673 #> Number of filtered records: 216 plot(layer1, col=\"gray80\", legend=FALSE, axes=FALSE) plot(crop(ca, layer1), add=TRUE) points(abies_p[,c(\"x\", \"y\")], col = \"#00000480\") points(abies_pf[,c(\"x\", \"y\")], col = \"#5DC86180\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"block-partition-with-4-folds","dir":"Articles","previous_headings":"Example of full modeling process","what":"Block partition with 4 folds","title":"flexsdm: Red Fir example","text":"Data partitioning, splitting data testing training groups, key step building SDMs. flexsdm offers multiple options data partitioning use spatial block method. Geographically structured data partitioning methods especially useful users want evaluate model transferability different regions time periods. part_sblock() function explores spatial blocks different raster cells sizes returns one best suited input datset based spatial autocorrelation, environmental similarity, number presence/absence records block partition. function’s output provides users 1) tibble presence/absence locations assigned partition number, 2) tibble information best partition, 3) SpatRaster showing selected grid. want divide data 4 different partitions using spatial block method.","code":"set.seed(10) occ_part <- abies_pf %>% part_sblock( data = ., env_layer = somevar, pr_ab = \"pr_ab\", x = \"x\", y = \"y\", n_part = 4, min_res_mult = 3, max_res_mult = 200, num_grids = 30, prop = 1 ) #> The following grid cell sizes will be tested: #> 5670 | 18508.97 | 31347.93 | 44186.9 | 57025.86 | 69864.83 | 82703.79 | 95542.76 | 108381.72 | 121220.69 | 134059.66 | 146898.62 | 159737.59 | 172576.55 | 185415.52 | 198254.48 | 211093.45 | 223932.41 | 236771.38 | 249610.34 | 262449.31 | 275288.28 | 288127.24 | 300966.21 | 313805.17 | 326644.14 | 339483.1 | 352322.07 | 365161.03 | 378000 #> Creating basic raster mask... #> Searching for the optimal grid size... abies_pf <- occ_part$part # Transform best block partition to a raster layer with same resolution and extent than # predictor variables block_layer <- get_block(env_layer = somevar, best_grid = occ_part$grid) cl <- c(\"#64146D\", \"#9E2962\", \"#F47C15\", \"#FCFFA4\") plot(block_layer, col=cl, legend=FALSE, axes=FALSE) points(abies_pf[,c(\"x\", \"y\")]) # Number of presences per block abies_pf %>% dplyr::group_by(.part) %>% dplyr::count() #> # A tibble: 4 × 2 #> # Groups: .part [4] #> .part n #> #> 1 1 38 #> 2 2 59 #> 3 3 33 #> 4 4 86 # Additional information of the best block occ_part$best_part_info #> # A tibble: 1 × 5 #> n_grid cell_size spa_auto env_sim sd_p #> #> 1 14 172577. 0.5 173. 24.1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"pseudo-absencebackground-points-using-partition-previously-created-as-a-mask","dir":"Articles","previous_headings":"Example of full modeling process","what":"Pseudo-absence/background points (using partition previously created as a mask)","title":"flexsdm: Red Fir example","text":"example, species presence data. However, SDM methods require either pseudo-absence background data. , use spatial block partition just created generate pseudo-absence background points. Extract environmental data presence-absence background data . View distributions present points, pseudo-absence points, background points using blocks reference map.","code":"# Spatial blocks where species occurs # Sample background points throughout study area with random method, allocating 10X the number of presences a background set.seed(10) bg <- lapply(1:4, function(x) { sample_background( data = abies_pf, x = \"x\", y = \"y\", n = sum(abies_pf$.part == x) * 10, method = \"random\", rlayer = block_layer, maskval = x, calibarea = ca ) }) %>% bind_rows() bg <- sdm_extract(data = bg, x = \"x\", y = \"y\", env_layer = block_layer) # Sample a number of pseudo-absences equal to the presence in each partition set.seed(10) psa <- lapply(1:4, function(x) { sample_pseudoabs( data = abies_pf, x = \"x\", y = \"y\", n = sum(abies_pf$.part == x), method = \"random\", rlayer = block_layer, maskval = x, calibarea = ca ) }) %>% bind_rows() psa <- sdm_extract(data = psa, x = \"x\", y = \"y\", env_layer = block_layer) cl <- c(\"#280B50\", \"#9E2962\", \"#F47C15\", \"#FCFFA4\") plot(block_layer, col=\"gray80\", legend=FALSE, axes=FALSE) points(bg[,c(\"x\", \"y\")], col=cl[bg$.part], cex=0.8) # Background points points(psa[,c(\"x\", \"y\")], bg=cl[psa$.part], cex=0.8, pch=21) # Pseudo-absences # Bind a presences and pseudo-absences abies_pa <- bind_rows(abies_pf, psa) abies_pa # Presence-Pseudo-absence database #> # A tibble: 432 × 4 #> x y pr_ab .part #> #> 1 -12558. 68530. 1 2 #> 2 115217. -145937. 1 4 #> 3 3634. 22501. 1 2 #> 4 44972. -60781. 1 2 #> 5 -34463. 160313. 1 3 #> 6 83108. -27300. 1 2 #> 7 124877. -176319. 1 4 #> 8 118707. -179991. 1 4 #> 9 126141. -176302. 1 4 #> 10 -49722. 141124. 1 3 #> # ℹ 422 more rows bg # Background points #> # A tibble: 2,160 × 4 #> x y pr_ab .part #> #> 1 -153501. 392162. 0 1 #> 2 -89241. 263642. 0 1 #> 3 -89241. 27392. 0 1 #> 4 -130821. 331682. 0 1 #> 5 -132711. 339242. 0 1 #> 6 -51441. -63328. 0 1 #> 7 -59001. 67082. 0 1 #> 8 -32541. -51988. 0 1 #> 9 -96801. 932. 0 1 #> 10 -47661. -31198. 0 1 #> # ℹ 2,150 more rows abies_pa <- abies_pa %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) bg <- bg %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"fit-models-with-tune_max-fit_gau-and-fit_glm","dir":"Articles","previous_headings":"Example of full modeling process","what":"Fit models with tune_max, fit_gau, and fit_glm","title":"flexsdm: Red Fir example","text":"Now, fit models. flexsdm package offers wide range modeling options, traditional statistical methods like GLMs GAMs, machine learning methods like random forests support vector machines. modeling method, flexsdm provides fit_ tune_ functions, allow users use default settings adjust hyperparameters depending research goals. , test tune_max() (tuned Maximum Entropy model), fit_gau() (fit Guassian Process model), fit_glm (fit Generalized Linear Model). model, selected three threshold values generate binary suitability predictions: threshold maximizes TSS (max_sens_spec), threshold sensitivity specificity equal (equal_sens_spec), threshold Sorenson index highest (max_sorenson). example, selected TSS performance metric used selecting best combination hyper-parameter values tuned Maximum Entropy model.","code":"t_max <- tune_max( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), background = bg, partition = \".part\", grid = expand.grid( regmult = seq(0.1, 3, 0.5), classes = c(\"l\", \"lq\", \"lqhpt\") ), thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\" ) #> Tuning model... #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 #> Fitting best model #> Formula used for model fitting: #> ~aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) + hinge(aet) + hinge(cwd) + hinge(tmx) + hinge(tmn) + thresholds(aet) + thresholds(cwd) + thresholds(tmx) + thresholds(tmn) + cwd:aet + tmx:aet + tmn:aet + tmx:cwd + tmn:cwd + tmn:tmx - 1 #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 f_gau <- fit_gau( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 f_glm <- fit_glm( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 2 ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"fit-an-ensemble-model","dir":"Articles","previous_headings":"Example of full modeling process","what":"Fit an ensemble model","title":"flexsdm: Red Fir example","text":"Spatial predictions different SDM algorithms can vary substantially, ensemble modeling become increasingly popular. fit_ensemble() function, users can easily produce ensemble SDM based individual fit_ tune_ models included package. example, fit ensemble model red fir based weighted average three individual models. used threshold values performance metric implemented individual models. output flexsdm model objects allows easily compare metrics across models, AUC TSS. example, can use sdm_summarize() function merge model performance tables.","code":"ens_m <- fit_ensemble( models = list(t_max, f_gau, f_glm), ens_method = \"meanw\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% ens_m$performance #> 
[38;5;246m# A tibble: 3 × 25
[39m #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m #> 
[38;5;250m1
[39m meanw equal_sens_sp… 0.582 216 216 0.787 0.079
[4m5
[24m 0.808 #> 
[38;5;250m2
[39m meanw max_sens_spec 0.470 216 216 0.949 0.016
[4m2
[24m 0.752 #> 
[38;5;250m3
[39m meanw max_sorensen 0.449 216 216 0.963 0.014
[4m3
[24m 0.738 #> 
[38;5;246m# ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd ,
[39m #> 
[38;5;246m# JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd ,
[39m #> 
[38;5;246m# OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean ,
[39m #> 
[38;5;246m# AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean ,
[39m #> 
[38;5;246m# IMAE_sd 
[39m model_perf <- sdm_summarize(list(t_max, f_gau, f_glm, ens_m)) model_perf #> # A tibble: 10 × 28 #> model_ID model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 max max_sens_spec 0.364 216 216 0.954 0.0316 #> 2 2 gau equal_sens_s… 0.643 216 216 0.784 0.0890 #> 3 2 gau max_sens_spec 0.471 216 216 0.952 0.0122 #> 4 2 gau max_sorensen 0.471 216 216 0.964 0.0108 #> 5 3 glm equal_sens_s… 0.649 216 216 0.800 0.0851 #> 6 3 glm max_sens_spec 0.554 216 216 0.954 0.0493 #> 7 3 glm max_sorensen 0.423 216 216 0.977 0.0379 #> 8 4 meanw equal_sens_s… 0.582 216 216 0.787 0.0795 #> 9 4 meanw max_sens_spec 0.470 216 216 0.949 0.0162 #> 10 4 meanw max_sorensen 0.449 216 216 0.963 0.0143 #> # ℹ 20 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd , regmult , classes "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"project-the-ensemble-model","dir":"Articles","previous_headings":"Example of full modeling process","what":"Project the ensemble model","title":"flexsdm: Red Fir example","text":"Next project ensemble model space across entire extent environmental layer, California Floristic Province, using sdm_predict() function. function can use predict species suitability across area species’ current future suitability. example, project ensemble model one threshold, though users option project multiple models multiple threshold values. , also specify want function return SpatRast continuous suitability values threshold (con_thr = TRUE).","code":"pr_1 <- sdm_predict( models = ens_m, pred = somevar, thr = \"max_sens_spec\", con_thr = TRUE, predict_area = NULL ) #> Predicting ensembles unconstrained <- pr_1$meanw[[1]] names(unconstrained) <- \"unconstrained\" cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") plot(unconstrained, col=cl, legend=FALSE, axes=FALSE)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"constrain-the-model-with-msdm_posterior","dir":"Articles","previous_headings":"Example of full modeling process","what":"Constrain the model with msdm_posterior","title":"flexsdm: Red Fir example","text":"Finally, flexsdm offers users function help correct overprediction SDM based occurrence records suitability patterns. example constrained ensemble model using method “occurrence based restriction”, assumes suitable patches intercept species occurrences likely part species distributions suitable patches intercept occurrences. methods msdm_posteriori() function work presences important always use original database (.e., presences spatially environmentally filtered). methods available msdm_posteriori() function based Mendes et al. (2020). #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"thr_val <- ens_m$performance %>% dplyr::filter(threshold == \"max_sens_spec\") %>% pull(thr_value) m_pres <- msdm_posteriori( records = abies_p, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", cont_suit = pr_1$meanw[[1]], method = c(\"obr\"), thr = c(\"sensitivity\", sens = thr_val), buffer = NULL ) constrained <- m_pres$meanw[[1]] names(constrained) <- \"constrained\" cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") plot(constrained, col=cl, legend=FALSE, axes=FALSE)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"intro","dir":"Articles","previous_headings":"","what":"Intro","title":"flexsdm: Modeling a rare species","text":"Creating SDMs rare poorly known species can difficult task. Occurrence data often limited observation, can lead model overfitting, especially using many predictor variables build models. However, researchers often interested building SDMs rare species, often threatened need conservation action. address issues associated modeling spatial distributions rare species, Lomba et al. (2010) Breiner et al. (2015) proposed method “ensemble small models” ESM. ESM, many bivariate models pairwise combinations predictor variable, ensemble performed. flexsdm, ensemble created using average suitability across “small models”, weighted Somers’ D (D = 2 * (AUC-.5)). important note method allow use categorical variables (soil type). practical applications ESMs include identifying areas reintroduction rare species areas establishing new populations, especially face climate change. example, Dubos et al. (2021) used variation ESM identify areas may remain suitable climate change two rare species Madagascar: golden mantella frog (Mantella aurantiaca) Manapany day gecko (Phelsuma inexpectata). example, walk process comparing ESM traditional modeling approaches Hesperocyparis stephensonii (Cuyamaca cypress), conifer tree species endemic southern California. species listed Critically Endangered IUCN found headwaters King Creek San Diego County. Cedar Fire 2003 left 30-40 surviving trees. hypothetical example, searching suitable areas might possible establish new populations species, hopes decreasing species’ future extinction risk.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"data","dir":"Articles","previous_headings":"Intro","what":"Data","title":"flexsdm: Modeling a rare species","text":"models, use four environmental variables influence plant distributions California: available evapotranspiration (aet), climatic water deficit (cwd), maximum temperature warmest month (tmx), minimum temperature coldest month (tmn). occurrence data include 21 geo-referenced observations downloaded online database Calflora.","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(terra) library(dplyr) # environmental data somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") # species occurence data (presence-only) data(hespero) hespero <- hespero %>% dplyr::select(-id) # California ecoregions regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) regions <- as.polygons(regions) sp_region <- terra::subset(regions, regions$category == \"SCR\") # ecoregion where *Hesperocyparis stephensonii* is found # visualize the species occurrences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Hesperocyparis stephensonii occurrences\" ) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16) cols <- rep(\"gray80\", 8) cols[regions$category == \"SCR\"] <- \"yellow\" terra::inset( regions, loc = \"bottomleft\", scale = .3, col = cols )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"delimit-calibration-area","dir":"Articles","previous_headings":"Intro","what":"Delimit calibration area","title":"flexsdm: Modeling a rare species","text":"First, must define model’s calibration area. flexsdm package offers several methods defining model calibration area. , use 25-km buffer areas around presence points select pseudo-absence locations.","code":"ca <- calib_area( data = hespero, x = \"x\", y = \"y\", method = c('buffer', width=25000), crs = crs(somevar) ) # visualize the species occurrences & calibration area plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Calibration area and occurrences\") plot(ca, add=TRUE) points(hespero[,c(\"x\", \"y\")], col = \"black\", pch = 16)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"create-pseudo-absence-data","dir":"Articles","previous_headings":"Intro","what":"Create pseudo-absence data","title":"flexsdm: Modeling a rare species","text":"often case rare species, species presence data. However, SDM methods require either pseudo-absence background data. , use calibration area produce pseudo-absence data can used SDMs.","code":"# Sample the same number of species presences set.seed(10) psa <- sample_pseudoabs( data = hespero, x = \"x\", y = \"y\", n = sum(hespero$pr_ab), # selecting number of pseudo-absence points that is equal to number of presences method = \"random\", rlayer = somevar, calibarea = ca ) # Visualize species presences and pseudo-absences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, xlim = c(289347, 353284), ylim = c(-598052, -520709), main = \"Presence = yellow, Pseudo-absence = black\") plot(ca, add=TRUE) points(psa[,c(\"x\", \"y\")], cex=0.8, pch=16, col = \"black\") # Pseudo-absences points(hespero[,c(\"x\", \"y\")], col = \"yellow\", pch = 16, cex = 1.5) # Presences # Bind a presences and pseudo-absences hespero_pa <- bind_rows(hespero, psa) hespero_pa # Presence-Pseudo-absence database #> # A tibble: 42 × 3 #> x y pr_ab #> #> 1 316923. -557843. 1 #> 2 317155. -559234. 1 #> 3 316960. -558186. 1 #> 4 314347. -559648. 1 #> 5 317348. -557349. 1 #> 6 316753. -559679. 1 #> 7 316777. -558644. 1 #> 8 317050. -559043. 1 #> 9 316655. -559928. 1 #> 10 316418. -567439. 1 #> # ℹ 32 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"partition-data-for-evaluating-models","dir":"Articles","previous_headings":"Intro","what":"Partition data for evaluating models","title":"flexsdm: Modeling a rare species","text":"evaluate model performance, need specify data testing training. flexsdm offers range random spatial random data partition methods evaluating SDMs. use repeated K-fold cross-validation, suitable partition approach performing ESM.","code":"set.seed(10) # Repeated K-fold method hespero_pa2 <- part_random( data = hespero_pa, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 10) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"Intro","what":"Extracting environmental values","title":"flexsdm: Modeling a rare species","text":"Next, extract values four environmental predictors presence pseudo-absence locations.","code":"hespero_pa3 <- sdm_extract( data = hespero_pa2, x = 'x', y = 'y', env_layer = somevar, variables = c('aet', 'cwd', 'tmx', 'tmn') )"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"standard-models","dir":"Articles","previous_headings":"Intro > Modeling","what":"Standard models","title":"flexsdm: Modeling a rare species","text":"First, let’s use three standard algorithms model distribution Hesperocyparis stephensonii: GLM, GBM, SVM. case, use calibration area making predictions.","code":"mglm <- fit_glm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 mgbm <- fit_gbm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 msvm <- fit_svm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 mpred <- sdm_predict( models = list(mglm, mgbm, msvm), pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting list of individual models"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"ensemble-of-small-models","dir":"Articles","previous_headings":"Intro > Modeling","what":"Ensemble of small models","title":"flexsdm: Modeling a rare species","text":"Now let’s try algorithms ESM approach. Note predicting ESM, possible process one time.","code":"eglm <- esm_glm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% egbm <- esm_gbm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% esvm <- esm_svm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% eglm_pred <- sdm_predict( models = eglm , pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models egbm_pred <- sdm_predict( models = egbm , pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models esvm_pred <- sdm_predict( models = esvm, pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"comparing-our-models","dir":"Articles","previous_headings":"Intro","what":"Comparing our models","title":"flexsdm: Modeling a rare species","text":"First, let’s take look spatial predictions models. spatial outputs suggest standard models tend predict broader areas high suitability values ESMs. Next, look performance metrics models, based repeated k-folds cross-validation partition method. can easily done using “sdm_summarize()” function flexsdm. , can see AUC, TSS, Jaccard index higher ESMs corresponding standard model. However, Boyce index Inverse Mean Absolute Error slightly higher standard models.","code":"par(mfrow = c(3, 2)) plot(mpred$glm, main = 'Standard GLM') #points(hespero$x, hespero$y, pch = 19) plot(eglm_pred[[1]], main = 'ESM GLM') #points(hespero$x, hespero$y, pch = 19) plot(mpred$gbm, main = 'Standard GBM') #points(hespero$x, hespero$y, pch = 19) plot(egbm_pred[[1]], main = 'ESM GBM') #points(hespero$x, hespero$y, pch = 19) plot(mpred$svm, main = 'Standard SVM') #points(hespero$x, hespero$y, pch = 19) plot(esvm_pred[[1]], main = 'ESM SVM') #points(hespero$x, hespero$y, pch = 19) merge_df <- sdm_summarize(models = list(mglm, mgbm, msvm, eglm, egbm, esvm)) knitr::kable( merge_df %>% dplyr::select( model, AUC = AUC_mean, TSS = TSS_mean, JACCARD = JACCARD_mean, BOYCE = BOYCE_mean, IMAE = IMAE_mean ) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"conclusions","dir":"Articles","previous_headings":"Intro","what":"Conclusions","title":"flexsdm: Modeling a rare species","text":"Modeling decisions context-dependent must made case--case basis. However, ESM useful approach practitioners interested modeling rare species want avoid common model overfitting issues. always producing SDMs “real-world” applications, important consider spatial prediction patterns along multiple model performance metrics.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"references","dir":"Articles","previous_headings":"Intro","what":"References","title":"flexsdm: Modeling a rare species","text":"Lomba, ., L. Pellissier, C. Randin, J. Vicente, F. Moreira, J. Honrado, . Guisan. 2010. Overcoming rare species modelling paradox: novel hierarchical framework applied Iberian endemic plant. Biological conservation 143:2647–2657. https://doi.org/10.1016/j.biocon.2010.07.007 Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210–1218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802–808. https://doi.org/10.1111/2041-210X.12957 Dubos, N., Montfort, F., Grinand, C., Nourtier, M., Deso, G., Probst, J.-M., Razafimanahaka, J. H., Andriantsimanarilafy, R. R., Rakotondrasoa, E. F., Razafindraibe, P., Jenkins, R., & Crottini, . (2021). narrow-ranging species doomed extinction? Projected dramatic decline future climate suitability two highly threatened species. Perspectives Ecology Conservation, S2530064421000894. https://doi.org/10.1016/j.pecon.2021.10.002","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Many SDM applications require model extrapolation, e.g., predictions beyond range data set used fit model. example, models often must extrapolate predicting habitat suitability novel environmental conditions induced climate change predicting spread invasive species outside native range based species-environment relationship observed native range. flexsdm, offer new approach (known Shape) evaluating extrapolation truncating spatial predictions based degree extrapolation measured. Shape model-agnostic approach calculating degree extrapolation given projection data point multivariate distance nearest training data point – capturing often complex shape data within environmental space. distances relativized factor reflects dispersion training data environmental space. implemented flexsdm, Shape approach also incorporates adjustable threshold allow binary discrimination acceptable unacceptable degrees extrapolation, based user’s needs applications. information Shape metric, recommend reading article Velazco et al., 2023. vignette, walk evaluate model extrapolation Hesperocyparis stephensonii (Cuyamaca cypress), conifer tree species endemic southern California. species listed Critically Endangered IUCN extremely restricted distribution, found headwaters King Creek San Diego County. Note: tutorial follows generally workflow vignette modeling distribution rare species using ensemble small models (ESM). However, instead constructing ESMs, evaluate model extrapolation predict models extent California Floristic Province (CFP).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"data","dir":"Articles","previous_headings":"","what":"Data","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"models, use four environmental variables influence plant distributions California: available evapotranspiration (aet), climatic water deficit (cwd), maximum temperature warmest month (tmx), minimum temperature coldest month (tmn). occurrence data include 21 geo-referenced observations downloaded online database Calflora.","code":"library(flexsdm) library(terra) library(dplyr) library(patchwork) # environmental data somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\") # species occurence data (presence-only) data(hespero) hespero <- hespero %>% dplyr::select(-id) # California ecoregions regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) regions <- terra::as.polygons(regions) sp_region <- terra::subset(regions, regions$category == \"SCR\") # ecoregion where *Hesperocyparis stephensonii* is found # visualize the species occurrences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Hesperocyparis stephensonii occurrences\" ) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16) cols <- rep(\"gray80\", 8) cols[regions$category == \"SCR\"] <- \"yellow\" terra::inset( regions, loc = \"bottomleft\", scale = .3, col = cols )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"delimit-calibration-area","dir":"Articles","previous_headings":"","what":"Delimit calibration area","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"First, must define model’s calibration area. flexsdm package offers several methods defining model calibration area. , use 25-km buffer areas around presence points select pseudo-absence locations.","code":"ca <- calib_area( data = hespero, x = \"x\", y = \"y\", method = c(\"buffer\", width = 25000), crs = crs(somevar) ) # visualize the species occurrences & calibration area plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Calibration area and occurrences\" ) plot(ca, add = TRUE) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"create-pseudo-absence-data","dir":"Articles","previous_headings":"","what":"Create pseudo-absence data","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"often case rare species, species presence data. However, SDM methods require either pseudo-absence background point data. , use calibration area produce pseudo-absence data can used SDMs.","code":"# Sample the same number of species presences set.seed(10) psa <- sample_pseudoabs( data = hespero, x = \"x\", y = \"y\", n = sum(hespero$pr_ab), # number of pseudo-absence points equal to number of presences method = \"random\", rlayer = somevar, calibarea = ca ) # Visualize species presences and pseudo-absences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, xlim = c(289347, 353284), ylim = c(-598052, -520709), main = \"Presence = yellow, Pseudo-absence = black\" ) plot(ca, add = TRUE) points(psa[, c(\"x\", \"y\")], cex = 0.8, pch = 16, col = \"black\") # Pseudo-absences points(hespero[, c(\"x\", \"y\")], col = \"yellow\", pch = 16, cex = 1.5) # Presences # Bind a presences and pseudo-absences hespero_pa <- bind_rows(hespero, psa) hespero_pa # Presence-Pseudo-absence database #> # A tibble: 42 × 3 #> x y pr_ab #> #> 1 316923. -557843. 1 #> 2 317155. -559234. 1 #> 3 316960. -558186. 1 #> 4 314347. -559648. 1 #> 5 317348. -557349. 1 #> 6 316753. -559679. 1 #> 7 316777. -558644. 1 #> 8 317050. -559043. 1 #> 9 316655. -559928. 1 #> 10 316418. -567439. 1 #> # ℹ 32 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"partition-data-for-evaluating-models","dir":"Articles","previous_headings":"","what":"Partition data for evaluating models","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"evaluate model performance, need specify data testing training. flexsdm offers range random spatial random data partition methods evaluating SDMs. use repeated K-fold cross-validation, suitable partition approach validating SDM data.","code":"set.seed(10) # Repeated K-fold method hespero_pa2 <- part_random( data = hespero_pa, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 10) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"","what":"Extracting environmental values","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Next, extract values four environmental predictors presence pseudo-absence locations.","code":"hespero_pa3 <- sdm_extract( data = hespero_pa2, x = \"x\", y = \"y\", env_layer = somevar, variables = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\") )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"modeling","dir":"Articles","previous_headings":"","what":"Modeling","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Let’s use three standard algorithms model distribution Hesperocyparis stephensonii: GLM, GBM, SVM. case, use extent CFP prediction area can evaluate model extrapolation across broad geographic area.","code":"mglm <- fit_glm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) mgbm <- fit_gbm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) msvm <- fit_svm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) mpred <- sdm_predict( models = list(mglm, mgbm, msvm), pred = somevar, con_thr = TRUE, predict_area = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"comparing-our-models","dir":"Articles","previous_headings":"","what":"Comparing our models","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"First, let’s take look spatial predictions models. GLM GBM predict lot suitable habitat far species found!","code":"par(mfrow = c(1, 3)) plot(mpred$glm, main = \"GLM\") # points(hespero$x, hespero$y, pch = 19) plot(mpred$gbm, main = \"GBM\") # points(hespero$x, hespero$y, pch = 19) plot(mpred$svm, main = \"SVM\") # points(hespero$x, hespero$y, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"partial-dependence-plots-to-explore-the-impact-of-predictor-conditions-on-suitability","dir":"Articles","previous_headings":"","what":"Partial dependence plots to explore the impact of predictor conditions on suitability","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Extrapolation reflects issue model handles novel data. , see three algorithms explored tutorial predict pretty different geographic patterns habitat suitability based occurrence/pseudo-absence data environmental predictors. Let’s take look partial dependence plots see marginal effect environmental predictors suitability looks like test models. function allows visualize model may extrapolate outside environmental conditions used training, visualizing “projection” data different color. case, environmental predictors cover extent CFP. flexsdm allows users plot univariate partial dependence plots (p_pdp) bivariate partial dependence plots (p_bpdp); shown model. Note: p_bpdp function allows users option show boundaries training data using either rectangle convex hull approach. use convex hull approach. Uni bivariate partial dependence plots GLM: Uni bivariate partial dependence plots GBM: Uni bivariate partial dependence plots SVM: plots show really interesting story! notably, GLM GBM show consistently high habitat suitability areas much higher actual evapotranspiration narrow range values used train model. However, SVM seems best job estimating high habitat suitability environmental values outside training data. Importantly, models can behave differently depending modeling situation context.","code":"p_pdp(model = mglm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = mglm$model, training_data = hespero_pa3, training_boundaries = \"convexh\") p_pdp(model = mgbm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = mgbm$model, training_data = hespero_pa3, training_boundaries = \"convexh\", resolution = 100) p_pdp(model = msvm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = msvm$model, training_data = hespero_pa3, training_boundaries = \"convexh\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"extrapolation-evaluation","dir":"Articles","previous_headings":"","what":"Extrapolation evaluation","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Remember species highly restricted southern California! However, two models (GLM GBM) predict high habitat suitability throughout parts CFP, SVM provides conservative predictions. see GLM GBM tend predict high habitat suitability areas environmentally different training conditions. models extrapolating environmental space? Let’s find using “extra_eval” function SDM. function requires input model training data, column specifying presence vs. absence locations, projection data (can SpatRaster tibble containing data used model projection – can reflect larger region, separate region, different time period used model training), metric calculating degree extrapolation (default Mahalanobis distance, though euclidean also option- explore ), number cores parallel processing, aggregation factor, case want measure extrapolation large data set. First look degree extrapolation geographic space using Shape method based Mahalanobis distance. Also distinguish univariate combinatorial extrapolation. Using Mahalanobis distance: output extra_eval function SpatRaster, showing degree extrapolation across projection area, estimated Shape method. can also explore extrapolation suitability patterns environmental geographic space, using just one function. , use p_extra function. function plots ggplot object. Let’s start extrapolation evaluation. plots show areas high extrapolation (dark blue) far training data (shown black) environmental geographic space. higher extrapolation values extrapolation area northwestern portion CFP. Let’s explore univariate combinatorial extrapolation. former defined projecting data outside range training conditions, combinatorial extrapolation area projecting data within range training conditions.","code":"xp_m <- extra_eval( training_data = hespero_pa3, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = TRUE, n_cores = 1, aggreg_factor = 1 ) xp_m #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : extrapolation, uni_comb #> min values : 0.000, 1 #> max values : 3730.677, 2 cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") par(mfrow = c(1, 2)) plot(xp_m$extrapolation, main = \"Shape metric\", col = cl) plot(xp_m$uni_comb, main = \"Univariate (1) and \\n combinatorial (2) extrapolation\", col = cl) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = xp_m, projection_data = somevar, geo_space = TRUE, prop_points = 0.05 ) #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = xp_m$uni_comb, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"#B3DC2B\", \"#30678D\"), alpha_p = 0.2 ) #> Number of cell used to plot 3642 (5%)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"truncating-sdms-predictions-based-on-extrapolation-thresholds","dir":"Articles","previous_headings":"","what":"Truncating SDMs predictions based on extrapolation thresholds","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Depending user’s end goal, may want exclude suitability values environmentally “” far modeling training data. Shape method allows select extrapolation threshold exclude suitability values. truncating models can use p_extra function explore binary extrapolation patter environmental geographical space. test values 50, 100, 500, comparison. Values 1 (yellow one) depict environmental geographical regions constraint models suitability (truncate). Note lower threshold, restrictive environmental geographic regions used constrain model. Now use function extra_truncate truncate suitability predictions made GLM, GBM, SVM based extrapolation thresholds explored previously. note, threshold selection user-dependent, function allows select multiple thresholds one time compare outputs. Users can also select “trunc_value” within extra_truncate function, specifies value assigned cells exceed extrapolation threshold (also specified function). default 0 users also choose another value reduce suitability. Based maps, can see lower extrapolation threshold, restricted habitat suitability patterns, higher values retain greater amount suitable habitat. Selecting best threshold depend modeling goals objectives, . Want learn Shape extrapolation metrics? Read article “Velazco, S. J. E., Brooke, M. R., De Marco Jr., P., Regan, H. M., & Franklin, J. (2023). far can extrapolate species distribution model? Exploring Shape, novel method. Ecography, 11, e06992. https://doi.org/10.1111/ecog.06992”","code":"p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 50), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 50\") #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 100), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 100\") #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 500), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 500\") #> Number of cell used to plot 3642 (5%) glm_trunc <- extra_truncate( suit = mpred$glm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) gbm_trunc <- extra_truncate( suit = mpred$gbm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) svm_trunc <- extra_truncate( suit = mpred$svm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) par(mfrow = c(3, 3)) plot(glm_trunc$`50`, main = \"GLM; extra threshold = 50\", col = cl) plot(glm_trunc$`100`, main = \"GLM; extra threshold = 100\", col = cl) plot(glm_trunc$`500`, main = \"GLM; extra threshold = 500\", col = cl) plot(gbm_trunc$`50`, main = \"GBM; extra threshold = 50\", col = cl) plot(gbm_trunc$`100`, main = \"GBM; extra threshold = 100\", col = cl) plot(gbm_trunc$`500`, main = \"GBM; extra threshold = 500\", col = cl) plot(svm_trunc$`50`, main = \"SVM; extra threshold = 50\", col = cl) plot(svm_trunc$`100`, main = \"SVM; extra threshold = 100\", col = cl) plot(svm_trunc$`500`, main = \"SVM; extra threshold = 500\", col = cl)"},{"path":"https://sjevelazco.github.io/flexsdm/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Santiago J.E. Velazco. Author, maintainer. Brooke Rose. Author. André F.. Andrade. Author. Ignacio Minoli. Author. Janet Franklin. Author.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Velazco, S.J.E., Rose, M.B., Andrade, .F.., Minoli, ., Franklin, J. (2022). flexsdm: R package supporting comprehensive flexible species distribution modelling workflow. Methods Ecology Evolution, 13(8) 1661-1669. https://doi.org/10.1111/2041-210X.13874","code":"@Article{, title = {flexsdm: An R package for supporting a comprehensive and flexible species distribution modelling workflow}, author = {Santiago J.E. Velazco and Brooke Rose and André F.A. Andrade and Ignacio Minoli and Janet Franklin}, journal = {Methods in Ecology and Evolution}, year = {2022}, volume = {13}, number = {8}, pages = {1661-1669}, url = {https://onlinelibrary.wiley.com/doi/10.1111/2041-210X.13874}, }"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Species distribution modeling become standard tool several research areas ecology, conservation biology, biogeography, paleobiogeography, epidemiology. Species distribution modeling area active research theoretical methodological aspects. One exciting features flexsdm high manipulation parametrization capacity based different functions arguments. attributes enable users define complete partial modeling workflow specific modeling situation (e.g., number variables, number records, different algorithms, algorithms tuning, ensemble methods).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"structure-of-flexsdm","dir":"","previous_headings":"","what":"Structure of flexsdm","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"function flexsdm package organized three major modeling steps","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_1-pre-modeling-functions","dir":"","previous_headings":"","what":"1. Pre-modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Set tools prepare modeling input data (e.g., species occurrences thinning, sample pseudo-absences background points, delimitation calibration area). calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictors env_outliers() Integration outliers detection methods environmental space part_random() Data partitioning training testing models part_sblock() Spatial block cross validation part_sband() Spatial band cross validation part_senv() Environmental cross-validation plot_res() Plot different resolutions used part_sblock get_block() Transform spatial partition layer spatial properties environmental variables sample_background() Sample background points sample_pseudoabs() Sampel pseudo-absence sdm_directory() Create directories saving outputs flexsdm sdm_extract() Extract environmental data based x y coordinates occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_2-modeling-functions","dir":"","previous_headings":"","what":"2. Modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"includes functions related modeling construction validation. Several can grouped fit_*, tune_*, esm_* family functions. fit_* construct validate models default hyper-parameter values. tune_* construct validate models searching best hyper-parameter values combination. esm_ construct validate Ensemble Small Models.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"model-evaluation","dir":"","previous_headings":"2. Modeling functions","what":"Model evaluation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"sdm_eval() Calculate different model performance metrics","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"fit_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"fit_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forest models fit_svm() Fit validate Support Vector Machine models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"tune_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"tune_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters tune_max() Fit validate Maximum Entropy models exploration hyper-parameters tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"model-ensemble","dir":"","previous_headings":"2. Modeling functions","what":"Model ensemble","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"fit_ensemble() Fit validate ensemble models different ensemble methods","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"esm_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"esm_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"esm_gam() Fit validate Generalized Additive Models Ensemble Small Model approach esm_gau() Fit validate Gaussian Process models Models Ensemble Small Model approach esm_gbm() Fit validate Generalized Boosted Regression models Ensemble Small Model approach esm_glm() Fit validate Generalized Linear Models Ensemble Small Model approach esm_max() Fit validate Maximum Entropy models Ensemble Small Model approach esm_net() Fit validate Neural Networks models Ensemble Small Model approach esm_svm() Fit validate Support Vector Machine models Ensemble Small Model approach","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_3-post-modeling-functions","dir":"","previous_headings":"","what":"3. Post-modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Tools related models’ geographical predictions, evaluation, correction. sdm_predict() Spatial predictions individual ensemble model sdm_summarize() Merge model performance tables interp() Raster interpolation two time periods extra_eval() Measure model extrapolation extra_truncate() Constraint suitability values given extrapolation value msdm_priori() Create spatial predictor variables reduce overprediction species distribution models msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_4-graphical-model-exploration","dir":"","previous_headings":"","what":"4. Graphical model exploration","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Useful tools visually explore models’ geographical environemtal predictions, model extrapolation, partial depnendece plot. p_pdp() Create partial dependence plot(s) explore marginal effect predictors suitability p_bpdp() Create partial dependence surface plot(s) explore bivariate marginal effect predictors suitability p_extra() Graphical exploration extrapolation suitability pattern environmental geographical space data_pdp() Calculate data construct partial dependence plots data_bpdp() Calculate data construct partial dependence surface plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"can install development version flexsdm github ⚠️ NOTE: version 1.4-22 terra package causing errors trying instal flexsdm. Please, first install version ≥ 1.5-12 terra package available CRAN development version terra flexsdm.","code":"# install.packages(\"remotes\") # For Windows and Mac OS operating systems remotes::install_github(\"sjevelazco/flexsdm\") # For Linux operating system remotes::install_github(\"sjevelazco/flexsdm@HEAD\")"},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"package-website","dir":"","previous_headings":"","what":"Package website","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"See package website (https://sjevelazco.github.io/flexsdm/) functions explanation vignettes.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"package-citation","dir":"","previous_headings":"","what":"Package citation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Velazco, S.J.E., Rose, M.B., Andrade, .F.., Minoli, ., Franklin, J. (2022). flexsdm: R package supporting comprehensive flexible species distribution modelling workflow. Methods Ecology Evolution, 13(8) 1661–1669. https://doi.org/10.1111/2041-210X.13874 Test package give us feedback send e-mail sjevelazco@gmail.com.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/pkg_citation/index.html","id":null,"dir":"Pkg_citation","previous_headings":"","what":"Function reference","title":"Function reference","text":"abies data set containing localities environmental condition Abies (fir tree) species California, USA backg data set containing environmental conditions background points calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictor variables data_bpdp() Calculate data construct partial dependence surface plots data_pdp() Calculate data construct partial dependence plots env_outliers() Integration outliers detection methods environmental space esm_gam() Fit validate Generalized Additive Models based Ensembles Small Models approach esm_gau() Fit validate Gaussian Process models based Ensembles Small Models approach esm_gbm() Fit validate Generalized Boosted Regression models based Ensembles Small Models approach esm_glm() Fit validate Generalized Linear Models based Ensembles Small Models approach esm_max() Fit validate Maximum Entropy Models based Ensemble Small Model approach esm_net() Fit validate Neural Networks based Ensembles Small Models approach esm_svm() Fit validate Support Vector Machine models based Ensembles Small Models approach extra_eval() Measure model extrapolation based Shape extrapolation metric extra_truncate() Truncate suitability predictions based extrapolation value fit_ensemble() Ensemble model fitting validation fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forests models fit_svm() Fit validate Support Vector Machine models get_block() Transform spatial partition layer spatial properties environmental variables hespero data set containing localities Hesperocyparis stephensonii species California, USA homogenize_na() Homogenize cells NAs across layers interp() Raster interpolation two time periods msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns. msdm_priori() Create spatial predictor variables reduce overprediction species distribution models occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences part_random() Conventional data partitioning methods part_sband() Spatial band cross-validation part_sblock() Spatial block cross-validation part_senv() Environmental spatial cross-validation plot_res() Plot different resolutions used part_sblock p_bpdp() Bivariate partial dependence plot p_extra() Graphical exploration extrapolation suitability pattern environmental geographical space p_pdp() Partial Dependent Plot sample_background() Sample background points sample_pseudoabs() Sample pseudo-absences sdm_directory() Create directories saving outputs flexsdm sdm_eval() Calculate different model performance metrics sdm_extract() Extract environmental data values spatial raster based x y coordinates sdm_predict() Spatial predictions individual ensemble models sdm_summarize() Merge model performance tables spp data set containing presences absences three virtual species tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters optimize performance tune_max() Fit validate Maximum Entropy models exploration hyper-parameters optimize performance tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters optimize performance tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"data set containing localities environmental condition Abies (fir tree) species California, USA","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"","code":"abies"},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"tibble object 5000 rows 10 variables: ID presences absences records ID pr_ab presence absences denoted 1 0 respectively x y columns coordinates Albers Equal Area Conic coordinate system column aet landform columns values environmental variables locality","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"","code":"if (FALSE) { require(dplyr) data(\"abies\") abies }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing environmental conditions of background points — backg","title":"A data set containing environmental conditions of background points — backg","text":"data set containing environmental conditions background points","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing environmental conditions of background points — backg","text":"","code":"backg"},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing environmental conditions of background points — backg","text":"tibble object 5000 rows 10 variables: pr_ab background point denoted 0 x y columns geographical coordinates column aet landform columns values environmental variables coordinate locations","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing environmental conditions of background points — backg","text":"","code":"if (FALSE) { require(dplyr) data(\"backg\") backg }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":null,"dir":"Reference","previous_headings":"","what":"Delimit calibration area for constructing species distribution models — calib_area","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"function offers different methods define calibration area. output used flexsdm functions like sample_backgroud, sample_pseudoabs, sdm_predict, among others","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"","code":"calib_area(data, x, y, method, groups = NULL, crs = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"data data.frame tibble. Database presences x character. Column name longitude data y character. Column name latitude data method character. Method used delimiting calibration area. necessary concatenate (c()) different objects argument. following methods implemented: buffer: calibration area defined buffer around presences. Usage method = c('buffer', width=40000). value buffer width m must provided CRS longitude/latitude, map units cases mcp: calibration area defined minimum convex polygon. Usage method = 'mcp'. bmcp: calibration area defined buffered minimum convex polygon buffer width. Usage method = c('bmcp', width=40000). value buffer width m must provided CRS longitude/latitude, map units cases mask: calibration area defined selected polygons spatial vector object intersected presences. Usage method = c(\"mask\", clusters, \"DN\"). second concatenated element must SpatVector, third element character column name SpatVector used filtering polygons. groups character. Column name indicating differentiated subsets points. used mcp bmcp method. Default NULL crs character. Coordinate reference system used transforming occurrences outputs. set NULL, result mask method crs SpatVector used. Define crs mandatory buffer, mcp bmcp method.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"SpatVector","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(\"spp\") clusters <- system.file(\"external/clusters.shp\", package = \"flexsdm\") clusters <- terra::vect(clusters) single_spp <- spp %>% dplyr::filter(species == \"sp1\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab) plot(clusters) points(single_spp[-1], col=\"red\") crs(clusters, proj=TRUE) # coordinate reference system (CRS) used for this points database # note that the unit of this CRS is in m, consequently the buffer width # will be interpreted in m too # buffer method ca_1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 40000), crs = crs(clusters) ) plot(ca_1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mcp method ca_2 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = \"mcp\", crs = crs(clusters) ) plot(ca_2) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mcp method for different groups single_spp <- single_spp %>% mutate(groups = ifelse(x > 150000, \"a\", \"b\")) plot(single_spp[, 2:3], pch = 19, col = \"blue\") points(single_spp[single_spp$groups == \"a\", 2:3], col = \"red\", pch = 19) points(single_spp[, 2:3]) ca_2.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"mcp\"), crs = crs(clusters), groups = \"groups\" ) plot(ca_2.1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # bmcp method ca_3 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 30000), crs = crs(clusters) ) plot(ca_3) points(single_spp[, 2:3], pch = 19, cex = 0.5) # bmcp method for different groups ca_3.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 30000), crs = crs(clusters), groups = \"groups\" ) plot(ca_3.1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mask method plot(clusters) names(clusters) ca_3.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"mask\", clusters, \"clusters\"), ) plot(ca_3.1) points(single_spp[, 2:3], pch = 19, cex = 0.5, col = \"red\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":null,"dir":"Reference","previous_headings":"","what":"Collinearity reduction of predictor variables — correct_colinvar","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"Collinearity reduction predictor variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"","code":"correct_colinvar(env_layer, method, proj = NULL, maxcell = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"env_layer SpatRaster object class SpatRaster containing predictors. function allow categorical variables method character. Collinearity reduction method. necessary provide vector argument. next methods implemented: pearson: Highlights correlated variables according Pearson correlation. threshold maximum correlation must specified. Otherwise, threshold 0.7 defined default. Usage method = c('pearson', th='0.7'). vif: Select variables Variance Inflation Factor, threshold can specified user. Otherwise, threshold 10 defined default.Usage method = c('vif', th = '10'). pca: Perform Principal Component Analysis use principal components new predictors. selected components account 95% whole variation system. Usage method = c('pca'). fa: Perform Factorial Analysis select, original predictors, number factors defined Broken-Stick variables highest correlation factors selected. Usage method = c('fa'). proj character. used pca method. Path folder contains sub-folders different projection scenarios. Variables names must names raster used env_layer argument. Usage proj = \"C:/User/Desktop/Projections\" (see Details use argument) maxcell numeric. Number raster cells randomly sampled. Taking sample useful reduce memory usage large rasters. NULL, function use raster cells. Default NULL. Usage maxcell = 50000.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"#' 'pearson', returns list following elements: cor_table: matrix object pairwise correlation values environmental variables cor_variables: list object length number environmental values containing pairwise relations exceeded correlation threshold one environmental variables 'vif' method, returns list following elements: env_layer: SpatRaster object selected environmental variables removed_variables: character vector removed environmental variables vif_table: data frame VIF values environmental variables 'pca' method, returns list following elements: env_layer: SpatRaster scores selected principal component (PC) sum 95% whole variation original environmental variables coefficients: matrix coefficient principal component (PC) predictors cumulative_variance: tibble cumulative variance explained selected principal component (PC) 'fa' method, returns list following elements: env_layer: SpatRaster scores selected variables due correlation factors. number_factors: number factors selected according Broken-Stick criteria, removed_variables: removed variables, uniqueness: uniqueness environmental variable according factorial analysis, loadings: environmental variables loadings chosen factors","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"case environmental variables current conditions time periods (future present), recommended perform PCA analysis current environmental condition project PCA time periods. , necessary use “proj” argument. Path folder (e.g., projections) contains sub-folders different projection scenarios (e.g., years emissions). Within sub-folder must stored single multiband rasters environmental variables. example: C:/Users/my_pc/projections/ ├── MRIESM_2050_ssp126 │ └── var1.tif │ └── var2.tif │ └── var3.tif ├── MRIESM_2080_ssp585 │ └── var1.tif │ └── var2.tif │ └── var3.tif ├── UKESM_2050_ssp370 │ └── var1.tif │ └── var2.tif │ └── var3.tif pca method run time projections, correct_colinvar function create Projection_PCA (exact path path object returned function) system sub-folders multiband raster principal components (pcs.tif) C:/Users/my_pc/Projection_PCA/ ├── MRIESM_2050_ssp126 │ └── pcs.tif # multiband tif principal components ├── MRIESM_2080_ssp585 │ └── pcs.tif ├── UKESM_2050_ssp370 │ └── pcs.tif","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"","code":"if (FALSE) { require(terra) require(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Perform pearson collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"pearson\", th = \"0.7\")) var$cor_table var$cor_variables # For all correct_colinvar methods it is possible to take a sample or raster to reduce memory var <- correct_colinvar(env_layer = somevar, method = c(\"pearson\", th = \"0.7\"), maxcell = 10000) var$cor_table var$cor_variables # Perform vif collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"vif\", th = \"8\")) var$env_layer var$removed_variables var$vif_table # Perform pca collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"pca\")) plot(var$env_layer) var$env_layer var$coefficients var$cumulative_variance # Perform pca collinearity control with different projections ## Below will be created a set of folders to simulate the structure of the directory where ## environmental variables are stored for different scenarios dir_sc <- file.path(tempdir(), \"projections\") dir.create(dir_sc) dir_sc <- file.path(dir_sc, c('scenario_1', 'scenario_2')) sapply(dir_sc, dir.create) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) terra::writeRaster(somevar, file.path(dir_sc[1], \"somevar.tif\"), overwrite=TRUE) terra::writeRaster(somevar, file.path(dir_sc[2], \"somevar.tif\"), overwrite=TRUE) ## Perform pca with projections dir_w_proj <- dirname(dir_sc[1]) dir_w_proj var <- correct_colinvar(env_layer = somevar, method = \"pca\", proj = dir_w_proj) var$env_layer var$coefficients var$cumulative_variance var$proj # Perform fa colinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"fa\")) var$env_layer var$number_factors var$removed_variables var$uniqueness var$loadings }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence surface plots — data_bpdp","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"Calculate data construct Partial dependence surface plot (.e., bivariate dependence plot) two predictor set","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"","code":"data_bpdp( model, predictors, resolution = 50, training_data = NULL, training_boundaries = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector two predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL training_boundaries character. Plot training conditions boundaries based training data (.e., presences, presences absences, etc). training_boundaries = \"convexh\", function delimit training environmental region based convex-hull. training_boundaries = \"rectangle\", function delimit training environmental region based four straight lines. used methods necessary provide data training_data argument. NULL predictors used. Default NULL. projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"list two tibbles \"pdpdata\" \"resid\". pspdata: data construct partial dependence surface plot, first two column includes values selected environmental variables, third column predicted suitability. training_boundaries: data plot boundaries training data.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) m <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_bpdp( model = m$model, predictors = c(\"aet\", \"cwd\"), resolution = 50, projection_data = somevar, training_boundaries = \"rectangle\", training_data = abies2, clamping = TRUE ) df names(df) df$pspdata df$training_boundaries # see p_bpdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence plots — data_pdp","title":"Calculate data to construct partial dependence plots — data_pdp","text":"Calculate data construct partial dependence plots given predictor","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence plots — data_pdp","text":"","code":"data_pdp( model, predictors, resolution = 50, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence plots — data_pdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name. resolution numeric. Number equally spaced points predict continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence plots — data_pdp","text":"list two tibbles \"pdpdata\" \"resid\". pdpdata: data construct partial dependence plots, first column includes values selected environmental variable, second column predicted suitability, third column range type, two values Training Projecting, referring suitability calculated within outside range training conditions. Third column returned \"projection_data\" argument used resid: data plot residuals. first column includes values selected environmental variable second column predicted suitability.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence plots — data_pdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_pdp( model = svm_t1$model, predictors = c(\"aet\"), resolution = 100, resid = TRUE, projection_data = somevar, training_data = abies2, clamping = FALSE ) df names(df) df$pdpdata df$resid plot(df$pdpdata[1:2], type = \"l\") points(df$resid[1:2], cex = 0.5) # see p_pdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence surface plots — data_psp","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"Calculate data construct Partial dependence surface plot (.e., bivariate dependence plot) two predictor set","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"","code":"data_psp( model, predictors, resolution = 50, training_data = NULL, pchull = FALSE, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector two predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL pchull logical. Extract convex-hull limit training data. Default FALSE projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"list two tibbles \"pdpdata\" \"resid\". pspdata: data construct partial dependence surface plot, first two column includes values selected environmental variables, third column predicted suitability. pchull: data plot residuals convex hull polygon bounding calibration data.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) m <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_psp( model = m$model, predictors = c(\"aet\", \"cwd\"), resolution = 50, projection_data = somevar, pchull = TRUE, training_data = abies2, clamping = TRUE ) df names(df) df$pspdata df$pchull # see p_psp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":null,"dir":"Reference","previous_headings":"","what":"Integration of outliers detection methods in environmental space — env_outliers","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"function performs different methods detecting outliers species distribution data based environmental conditions occurrences. methods need presence absence data (e.g. Two-class Support Vector Machine Random Forest) use presences (e.g. Reverse Jackknife, Box-plot, Random Forest outliers) . Outlier detection can useful procedure occurrence data cleaning (Chapman 2005, Liu et al., 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"","code":"env_outliers(data, x, y, pr_ab, id, env_layer)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"data data.frame tibble presence (presence-absence) records, coordinates x character. Column name longitude data. y character. Column name latitude data. pr_ab character. Column name presence absence data (.e. 1 0) id character. Column name row id. row (record) must unique code. env_layer SpatRaster. Raster environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"tibble object database used 'data' argument seven additional columns, 1 0 denote presence detected outliers .out_bxpt: outliers detected Box-plot method .out_jack: outliers detected Reverse Jackknife method .out_svm: outliers detected Support Vector Machine method .out_rf: outliers detected Random Forest method .out_rfout: outliers detected Random Forest Outliers method .out_sum: frequency presences records detected outliers based previews methods (values 0 6).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"function apply outliers detection methods occurrence data. Box-plot Reverse Jackknife method test outliers variable individually, occurrence behaves outlier least one variable highlighted outlier. user uses presence data, Support Vector Machine Random Forest Methods performed. Support Vector Machine Random Forest performed default hyper-parameter values. case species < 7 occurrences, function perform methods (.e. additional columns 0 values); nonetheless, return tibble additional columns 0 1. information methods, see Chapman (2005), Liu et al. (2018), Velazco et al. (2022).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"Chapman, . D. (2005). Principles methods data cleaning: Primary Species Species- Occurrence Data. version 1.0. Report Global Biodiversity Information Facility, Copenhagen. p72. http://www.gbif.org/document/80528 Liu, C., White, M., & Newell, G. (2018). Detecting outliers species distribution data. Journal Biogeography, 45(1), 164 - 176. https://doi.org/10.1111/jbi.13122 Velazco, S.J.E.; Bedrij, N..; Keller, H..; Rojas, J.L.; Ribeiro, B.R.; De Marco, P. (2022) Quantifying role protected areas safeguarding uses biodiversity. Biological Conservation, xx(xx) xx-xx. https://doi.org/10.1016/j.biocon.2022.109525","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"","code":"if (FALSE) { require(dplyr) require(terra) require(ggplot2) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\") somevar[[1]] %>% plot() points(spp1 %>% filter(pr_ab == 1) %>% select(x, y), col = \"blue\", pch = 19) points(spp1 %>% filter(pr_ab == 0) %>% select(x, y), col = \"red\", cex = 0.5) spp1 <- spp1 %>% mutate(idd = 1:nrow(spp1)) # Detect outliers outs_1 <- env_outliers( data = spp1, pr_ab = \"pr_ab\", x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar ) # How many outliers were detected by different methods? out_pa <- outs_1 %>% dplyr::select(starts_with(\".\"), -.out_sum) %>% apply(., 2, function(x) sum(x, na.rm = T)) out_pa # How many outliers were detected by the sum of different methods? outs_1 %>% dplyr::group_by(.out_sum) %>% dplyr::count() # Let explor where are locate records highlighted as outliers outs_1 %>% dplyr::filter(pr_ab == 1, .out_sum > 0) %>% ggplot(aes(x, y)) + geom_point(aes(col = factor(.out_sum))) + facet_wrap(. ~ factor(.out_sum)) # Detect outliers only with presences outs_2 <- env_outliers( data = spp1 %>% dplyr::filter(pr_ab == 1), pr_ab = \"pr_ab\", x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar ) # How many outliers were detected by different methods out_p <- outs_2 %>% dplyr::select(starts_with(\".\"), -.out_sum) %>% apply(., 2, function(x) sum(x, na.rm = T)) # How many outliers were detected by the sum of different methods? outs_2 %>% dplyr::group_by(.out_sum) %>% dplyr::count() # Let explor where are locate records highlighted as outliers outs_2 %>% dplyr::filter(pr_ab == 1, .out_sum > 0) %>% ggplot(aes(x, y)) + geom_point(aes(col = factor(.out_sum))) + facet_wrap(. ~ factor(.out_sum)) # Comparison of function outputs when using it with # presences-absences or only presences data. bind_rows(out_p, out_pa) # Because the second case only were used presences, outliers methods # based in Random Forest (.out_rf) and Support Vector Machines (.out_svm) # were not performed. }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"function constructs Generalized Additive Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"","code":"esm_gam(data, response, predictors, partition, thr = NULL, k = 3)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function allow categorical variables can construct models continuous variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified k integer. dimension basis used represent smooth term. Default 3. ESM proposed fit models little data, recommend using small values parameter.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"list object : esm_model: list \"gam\" class object mgcv package bivariate model. object can used predicting ensemble small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function fits GAM using mgvc package, Binomial distribution family thin plate regression spline smoothing basis (see ?mgvc::s).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 # Without threshold specification and with kfold esm_gam_t1 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t1$esm_model # bivariate model esm_gam_t1$predictors esm_gam_t1$performance # Test with rep_kfold partition abies2 <- abies2 %>% select(-starts_with(\".\")) set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 esm_gam_t2 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t2$esm_model # bivariate model esm_gam_t2$predictors esm_gam_t2$performance # Test with other bootstrap abies2 <- abies2 %>% select(-starts_with(\".\")) set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 esm_gam_t3 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t3$esm_model # bivariate model esm_gam_t3$predictors esm_gam_t3$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"function constructs Gaussian Process models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"","code":"esm_gau(data, response, predictors, partition, thr = NULL, background = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function allow categorical variables can construct models continuous variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\") partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified background data.frame. Database response column 0 predictors variables. column names must consistent data. Default NULL","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"list object : esm_model: list \"graf\" class object bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"method consists creating bivariate models pair-wise combinations predictors performs ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_gau_t1 <- esm_gau( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gau_t1$esm_model # bivariate model esm_gau_t1$predictors esm_gau_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"function constructs Generalized Boosted Regression using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"","code":"esm_gbm( data, response, predictors, partition, thr = NULL, n_trees = 100, n_minobsinnode = NULL, shrinkage = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\") partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. case use one threshold type necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. n_trees Integer specifying total number trees fit. equivalent number iterations number basis functions additive expansion. Default 100. n_minobsinnode Integer specifying minimum number observations terminal nodes trees. Note actual number observations, total weight. n_minobsinnode NULL, parameter assume value equal nrow(data)*0.5/4. Default NULL. shrinkage Numeric. parameter applied tree expansion. Also known learning rate step-size reduction; 0.001 0.1 usually works, smaller learning rate typically requires trees. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"list object : esm_model: list \"gbm\" class object gbm package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_gbm_t1 <- esm_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, n_trees = 100, n_minobsinnode = NULL, shrinkage = 0.1 ) esm_gbm_t1$esm_model # bivariate model esm_gbm_t1$predictors esm_gbm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"function constructs Generalized Linear Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"","code":"esm_glm( data, response, predictors, partition, thr = NULL, poly = 0, inter_order = 0 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified poly interger >= 2. used values >= 2 model use polynomials continuous variables (.e. used predictors argument). Default 0. ESM constructed occurrences recommended use polynomials avoid overfitting. inter_order interger >= 0. interaction order explanatory variables. Default 0. ESM constructed occurrences recommended use interaction terms.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"list object : esm_model: list \"glm\" class object stats package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_glm_t1 <- esm_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, poly = 0, inter_order = 0 ) esm_glm_t1$esm_model # bivariate model esm_glm_t1$predictors esm_glm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"function constructs Maxent Models using Ensemble Small Model (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"","code":"esm_max( data, response, predictors, partition, thr = NULL, background = NULL, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 2.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables, allow categorical variables Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. background data.frame. Database response column 0 predictors variables. column names must consistent data. Default NULL clamp logical. set TRUE, predictors features restricted range seen model training. classes character. single feature combinations . Features symbolized letters: l (linear), q (quadratic), h (hinge), p (product), t (threshold). Usage classes = \"lpq\". Default \"default\" (see details). pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" regmult numeric. constant adjust regularization. ESM used modeling species records default value 2.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"list object : esm_model: list \"maxnet\" class object maxnet package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). threshold dependent metric calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function use default regularization multiplier equal 2.5 (see Breiner et al., 2018) argument “classes” set default MaxEnt use different features combination depending number presences (np) follow rule: np < 10 classes = \"l\", np 10 15 classes = \"lq\", np 15 80 classes = \"lqh\", np >= 80 classes = \"lqph\" presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) abies2 set.seed(10) backg2 <- backg %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 100) %>% group_by() backg2 <- part_random( data = backg2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) backg2 # Without threshold specification and with kfold esm_max_t1 <- esm_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, background = backg2, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) esm_max_t1$esm_model # bivariate model esm_max_t1$predictors esm_max_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"function constructs Neural Networks using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"","code":"esm_net(data, response, predictors, partition, thr = NULL, size = 2, decay = 0)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified, default used 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified size numeric. Number units hidden layer. Can zero skip-layer units. Default 2. decay numeric. Parameter weight decay. Default 0.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"list object : esm_model: list \"nnet\" class object nnet package bivariate model. object can used predicting ensemble small model sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_net_t1 <- esm_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_net_t1$esm_model # bivariate model esm_net_t1$predictors esm_net_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"function constructs Support Vector Machine models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"","code":"esm_svm( data, response, predictors, partition, thr = NULL, sigma = \"automatic\", C = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified sigma numeric. Inverse kernel width Radial Basis kernel function \"rbfdot\". Default \"automatic\". C numeric. Cost constraints violation, 'C' constant regularization term Lagrange formulation. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"list object : esm_model: list \"ksvm\" class object ksvm package bivariate model. object can used predicting ensemble small model sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function constructs 'C-svc' classification type uses Radial Basis kernel \"Gaussian\" function (rbfdot). See details ksvm","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_svm_t1 <- esm_svm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), partition = \".part\", thr = NULL ) esm_svm_t1$esm_model # bivariate model esm_svm_t1$predictors esm_svm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"Measure extrapolation comparing environmental data used modeling calibration area model projection. function use Shape metric proposed Velazco et al., 2023","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"","code":"extra_eval( training_data, pr_ab, projection_data, metric = \"mahalanobis\", univar_comb = FALSE, n_cores = 1, aggreg_factor = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"training_data data.frame tibble environmental conditions presence absence (background points pseudo-absences) used constructing models pr_ab character. Column name presence absence (background points pseudo-absences) data (.e., 1 0) projection_data SpatRaster, data.frame tibble environmental condition used projecting model (e.g., larger, encompassing region, spatially separate region, different time period). data.frame tibble used function return tibble object. Otherwise, SpatRaster object. metric character. Metric used measure degree extrapolation. Default = mahalanobis. mahalanobis: Degree extrapolation calculated based Mahalanobis distance. euclidean: Degree extrapolation calculated based Euclidean distance. univar_comb logical. true, function add layer column distinguish univariate (.e., projection data outside range training conditions) combinatorial extrapolation (.e., projection data within range training conditions) using values 1 2, respectively. Default FALSE n_cores numeric. Number cores use parallelization. Default 1 aggreg_factor positive integer. Aggregation factor expressed number cells direction reduce raster resolution. Use value higher 1 useful measuring extrapolation using raster high number cells. resolution output raster object used 'projection_data' argument. Default 1, .e., default, changes made resolution environmental variables.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"SpatRaster tibble object extrapolation values measured Shape metric. Also possible estimate univariate combinatorial extrapolation metric (see `univar_comb` argument).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"function measure model extrapolation base Shape metric (Velazco et al., 2023). Shape model-agnostic approach calculates extrapolation degree given projection data point multivariate distance nearest training data point. distances relativized factor reflects dispersion training data environmental space. Distinct approaches (e.g., MESS-Multivariate Environmental Similarity Surfaces, EO-Environmental Overlap, MOP-Mobility-Oriented Parity, EXDET-Extrapolation Detection, AOA-Area Applicability), Shape incorporates adjustable threshold control binary discrimination acceptable unacceptable extrapolation degrees (see extra_truncate). See vignette flexsdm website details Shape metric, model truncation, tools explore model extrapolation.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"Velazco, S.J.E., Brooke, M.R., De Marco Jr., P., Regan, H.M. Franklin, J. 2023. far can extrapolate species distribution model? Exploring Shape, novel method. Ecography: e06992. https://doi.org/10.1111/ecog.06992","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") spp$species %>% unique() sp <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) %>% dplyr::select(x, y, pr_ab) # Calibration area based on some criterion such as dispersal ability ca <- calib_area(sp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 50000), crs = crs(somevar)) plot(somevar[[1]]) points(sp) plot(ca, add = T) # Sampling pseudo-absences set.seed(10) psa <- sample_pseudoabs( data = sp, x = \"x\", y = \"y\", n = nrow(sp) * 2, method = \"random\", rlayer = somevar, calibarea = ca ) # Merge presences and absences databases to get a complete calibration data sp_pa <- dplyr::bind_rows(sp, psa) sp_pa # Get environmental condition of calibration area sp_pa_2 <- sdm_extract(data = sp_pa, x = \"x\", y = \"y\", env_layer = somevar) sp_pa_2 # Measure degree of extrapolation based on Mahalanobis and # for a projection area based on a SpatRaster object extr <- extra_eval( training_data = sp_pa_2, projection_data = somevar, pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\" ) plot(extr, main = \"Extrapolation pattern\") # Let's fit, predict and truncate a model with extra_truncate sp_pa_2 <- part_random( data = sp_pa_2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) a_model <- fit_glm( data = sp_pa_2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sorensen\") ) predsuit <- sdm_predict(models = a_model, pred = somevar, thr = \"max_sorensen\") predsuit # list with a raster with two layer plot(predsuit[[1]]) # Truncate a model based on a given value of extrapolation # using 'extra_truncate' function par(mfrow = c(1, 2)) plot(extr, main = \"Extrapolation\") plot(predsuit[[1]][[1]], main = \"Suitability\") par(mfrow = c(1, 1)) predsuit_2 <- extra_truncate( suit = predsuit[[1]], extra = extr, threshold = c(50, 100, 200) ) predsuit_2 # a list of continuous and binary models with # different truncated at different extrapolation thresholds plot(predsuit_2$`50`) plot(predsuit_2$`100`) plot(predsuit_2$`200`) ##%######################################################%## #### Measure degree of extrapolation for #### #### projection area based on data.frame #### ##%######################################################%## extr_df <- extra_eval( training_data = sp_pa_2, projection_data = as.data.frame(somevar, xy=TRUE), pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\" ) extr_df # see 'p_extra()' to explore extrapolation or suitability pattern in the # environmental and/or geographical space ##%######################################################%## #### Explore Shape metric with #### #### univariate and combinatorial extrapolation #### ##%######################################################%## extr <- extra_eval( training_data = sp_pa_2, projection_data = somevar, pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\", univar_comb = TRUE ) extr plot(extr) # In the second layer, values equal to 1 and 2 # depict univariate and combinatorial extrapolation, respectively }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":null,"dir":"Reference","previous_headings":"","what":"Constraint of suitability based on extrapolation — extra_exclude","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"Exclusion suitability values less given extrapolation value (EXPERIMENTAL)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"","code":"extra_exclude(suit, extra, threshold = 50)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"suit SpatRaster suitability values extra SpatRaster extrapolation values measured percentage (output extra_eval function) threshold numeric. Vector one values used correct extrapolation. Default 50% (FUNCTION SET PROJECTED SUITABILITY VALUES LESS THRESHOLD ZERO? UNCLEAR. PLEASE EXPLICIT)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"SpatRaster object corrected suitability values","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"","code":"if (FALSE) { # see examples in extra_eval function }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":null,"dir":"Reference","previous_headings":"","what":"Truncate suitability predictions based on an extrapolation value — extra_truncate","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"Exclusion suitability predictions environmental conditions assumed extrapolative.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"","code":"extra_truncate(suit, extra, threshold = 50, trunc_value = 0)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"suit SpatRaster suitability values extra SpatRaster extrapolation values preferable measured extra_eval function threshold numeric. Vector one extrapolation values used truncate suitability Default 50% trunc_value numeric. Numeric value used cells assumed extrapolative","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"SpatRaster object truncated suitability values","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"Exclusion suitability predictions environmental conditions assumed extrapolative. function possible use metric measuring degree extrapolation (e.g., MESS-Multivariate Environmental Similarity Surfaces, EO-Environmental Overlap, MOP-Mobility-Oriented Parity, EXDET-Extrapolation Detection, AOA-Area Applicability). However, recommend use Shape approach (see extra_eval, Velazco et al., 2023). function truncates suitability predictions assigning given value, generally 0 NA. Usage trunc_value = NA. Default 0. cells assumed extrapolative, .e., higher given threshold given extrapolation metric. See vignette flexsdm website details Shape metric, model truncation, tools explore model extrapolation.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"","code":"if (FALSE) { # see examples in extra_eval function }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensemble model fitting and validation — fit_ensemble","title":"Ensemble model fitting and validation — fit_ensemble","text":"Ensemble model fitting validation","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensemble model fitting and validation — fit_ensemble","text":"","code":"fit_ensemble( models, ens_method = c(\"mean\", \"meanw\", \"meansup\", \"meanthr\", \"median\"), thr = NULL, thr_model = NULL, metric = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensemble model fitting and validation — fit_ensemble","text":"models list. list models fitted fit_ tune_ function family. Models used ensemble must presences-absences records, partition methods, threshold types. ens_method character. Method used create ensemble different models. vector must provided argument. meansup, meanw pcasup method, necessary provide evaluation metric threshold 'metric' 'thr_model' arguments respectively. default following ensemble methods performed: mean: Simple average different models. meanw: Weighted average models based performance. evaluation metric threshold type must provided. meansup: Average best models (evaluation metric average). evaluation metric must provided. meanthr: Averaging performed cells suitability values selected threshold. median: Median different models. Usage ensemble = \"meanthr\". several ensemble methods implemented necessary concatenate , e.g., ensemble = c(\"meanw\", \"meanthr\", \"median\") thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold criterion. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified, default 0.9. case using one threshold type necessary concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. thr_model character. threshold needed conduct meanw, meandsup, meanthr ensemble methods. mandatory use one threshold, must threshold used fit models used \"models\" argument. Usage thr_model = 'equal_sens_spec' metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, IMAE, BOYCE. Default TSS. Usage metric = BOYCE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensemble model fitting and validation — fit_ensemble","text":"list object : models: list models used performing ensemble. thr_metric: Threshold metric specified function. predictors: tibble quantitative (column names c) qualitative (column names f) variables used models. performance: tibble performance metrics (see sdm_eval). metrics threshold-dependent calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensemble model fitting and validation — fit_ensemble","text":"","code":"if (FALSE) { require(dplyr) require(terra) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Species occurrences data(\"spp\") set.seed(1) some_sp <- spp %>% dplyr::filter(species == \"sp2\") %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, variables = names(somevar), filter_na = TRUE ) %>% part_random( data = ., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) # gam mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) # Fit and validate ensemble model mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Additive Models — fit_gam","title":"Fit and validate Generalized Additive Models — fit_gam","text":"Fit validate Generalized Additive Models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Additive Models — fit_gam","text":"","code":"fit_gam( data, response, predictors, predictors_f = NULL, select_pred = FALSE, partition, thr = NULL, fit_formula = NULL, k = -1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Additive Models — fit_gam","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables; factors). Usage predictors_f = c(\"landform\") select_pred logical. Perform predictor selection. Default FALSE. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold types none specified. fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments k integer. dimension basis used represent smooth term. Default -1 (.e., k=10). See help ?mgcv::s.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Additive Models — fit_gam","text":"list object : model: \"gam\" class object mgcv package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Additive Models — fit_gam","text":"function fits GAM using mgvc package, Binomial distribution family thin plate regression spline smoothing basis (see ?mgvc::s).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Additive Models — fit_gam","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\" ) gam_t1$model gam_t1$predictors gam_t1$performance # Specifying the formula explicitly require(mgcv) gam_t2 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\", fit_formula = stats::formula(pr_ab ~ s(aet) + s(ppt_jja) + s(pH) + landform) ) gam_t2$model gam_t2$predictors gam_t2$performance %>% dplyr::select(ends_with(\"_mean\")) # Using repeated k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) abies2 gam_t3 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\" ) gam_t3 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Gaussian Process models — fit_gau","title":"Fit and validate Gaussian Process models — fit_gau","text":"Fit validate Gaussian Process models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Gaussian Process models — fit_gau","text":"","code":"fit_gau( data, response, predictors, predictors_f = NULL, background = NULL, partition, thr = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Gaussian Process models — fit_gau","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") background data.frame. Database response column 0 predictors variables. column names must consistent data partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), useful threshold-dependent performance metrics. possible use one threshold type. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold criteria none specified.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Gaussian Process models — fit_gau","text":"list object : model: \"graf\" class object. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold criteria specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Gaussian Process models — fit_gau","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 bg <- abies2 bg$pr_ab <- 0 gaup_t1 <- fit_gau( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = bg, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gaup_t1$model gaup_t1$predictors gaup_t1$performance gaup_t1$data_ens # Using bootstrap partition method only with presence-absence abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 5, proportion = 0.7) ) abies2 gaup_t2 <- fit_gau( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(type = c(\"lpt\", \"max_sens_spec\", \"sensitivity\"), sens = \"0.8\") ) gaup_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models — fit_gbm","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"Fit validate Generalized Boosted Regression models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"","code":"fit_gbm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, n_trees = 100, n_minobsinnode = as.integer(nrow(data) * 0.5/4), shrinkage = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. n_trees Integer specifying total number trees fit. equivalent number iterations number basis functions additive expansion. Default 100. n_minobsinnode Integer specifying minimum number observations terminal nodes trees. Note actual number observations, total weight. default value used nrow(data)*0.5/4 shrinkage Numeric. parameter applied tree expansion. Also known learning rate step-size reduction; 0.001 0.1 usually works, smaller learning rate typically requires trees. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"list object : model: \"gbm\" class object gbm package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 gbm_t1 <- fit_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gbm_t1$model gbm_t1$predictors gbm_t1$performance gbm_t1$data_ens # Using bootstrap partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 gbm_t2 <- fit_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), partition = \".part\", thr = \"max_sens_spec\" ) gbm_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Linear Models — fit_glm","title":"Fit and validate Generalized Linear Models — fit_glm","text":"Fit validate Generalized Linear Models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Linear Models — fit_glm","text":"","code":"fit_glm( data, response, predictors, predictors_f = NULL, select_pred = FALSE, partition, thr = NULL, fit_formula = NULL, poly = 2, inter_order = 0 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Linear Models — fit_glm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") select_pred logical. Perform predictor selection. TRUE predictors selected based backward step wise approach. Default FALSE. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments poly interger >= 2. used values >= 2 model use polynomials continuous variables (.e. used predictors argument). Default 0. inter_order interger >= 0. interaction order explanatory variables. Default 0.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Linear Models — fit_glm","text":"list object : model: \"glm\" class object stats package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metric calculated based threshold specified thr argument . data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Linear Models — fit_glm","text":"","code":"if (FALSE) { data(\"abies\") abies # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) glm_t1$model glm_t1$predictors glm_t1$performance glm_t1$data_ens # Using second order polynomial terms and first-order interaction terms glm_t2 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 2, inter_order = 1 ) # Using repeated k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Using third order polynomial terms and second-order interaction terms glm_t3 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 3, inter_order = 2 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy models — fit_max","title":"Fit and validate Maximum Entropy models — fit_max","text":"Fit validate Maximum Entropy models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy models — fit_max","text":"","code":"fit_max( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, background = NULL, thr = NULL, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy models — fit_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables. See maxnet.formula function maxnet package. Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. background data.frame. Database including rows 0 values response column predictors variables. column names must consistent data. Default NULL thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. clamp logical. TRUE, predictors features restricted range seen model training. classes character. single feature combinations . Features symbolized letters: l (linear), q (quadratic), h (hinge), p (product), t (threshold). Usage classes = \"lpq\". Default \"default\" (see details). pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" regmult numeric. constant adjust regularization. Default 1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy models — fit_max","text":"list object : model: \"maxnet\" class object maxnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy models — fit_max","text":"argument “classes” set default MaxEnt use different features combination depending number presences (np) follow rule: np < 10 classes = \"l\", np 10 15 classes = \"lq\", np 15 80 classes = \"lqh\", np >= 80 classes = \"lqph\" presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy models — fit_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") abies # environmental conditions of presence-absence data backg # environmental conditions of background points # Using k-fold partition method # Note that the partition method, number of folds or replications must # be the same for presence-absence and background points datasets abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) backg2 max_t1 <- fit_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) length(max_t1) max_t1$model max_t1$predictors max_t1$performance max_t1$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks models — fit_net","title":"Fit and validate Neural Networks models — fit_net","text":"Fit validate Neural Networks models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks models — fit_net","text":"","code":"fit_net( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, size = 2, decay = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks models — fit_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Defaul NULL. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1)., needed threshold-dependent performance metrics. one threshold type can specified. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. size numeric. Number units hidden layer. Can zero skip-layer units. Default 2. decay numeric. Parameter weight decay. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks models — fit_net","text":"list object : model: \"nnet.formula\" \"nnet\" class object nnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metric calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks models — fit_net","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 nnet_t1 <- fit_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) nnet_t1$model nnet_t1$predictors nnet_t1$performance nnet_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 nnet_t2 <- fit_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) nnet_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Random Forests models — fit_raf","title":"Fit and validate Random Forests models — fit_raf","text":"Fit validate Random Forests models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Random Forests models — fit_raf","text":"","code":"fit_raf( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, mtry = sqrt(length(c(predictors, predictors_f))) )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Random Forests models — fit_raf","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. mtry numeric. Number variables randomly sampled candidates split. Default sqrt(length(c(predictors, predictors_f)))","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Random Forests models — fit_raf","text":"list object : model: \"randomForest\" class object randomForest package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Random Forests models — fit_raf","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 rf_t1 <- fit_raf( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) rf_t1$model rf_t1$predictors rf_t1$performance rf_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 rf_t2 <- fit_raf( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) rf_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models — fit_svm","title":"Fit and validate Support Vector Machine models — fit_svm","text":"Fit validate Support Vector Machine models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models — fit_svm","text":"","code":"fit_svm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, sigma = \"automatic\", C = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models — fit_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. sigma numeric. Inverse kernel width Radial Basis kernel function \"rbfdot\". Default \"automatic\". C numeric. Cost constraints violation, 'C'-constant regularization term Lagrange formulation. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models — fit_svm","text":"list object : model: \"ksvm\" class object kernlab package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Support Vector Machine models — fit_svm","text":"function constructs 'C-svc' classification type uses Radial Basis kernel \"Gaussian\" function (rbfdot). See details details ksvm.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models — fit_svm","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) names(svm_t1) svm_t1$model svm_t1$predictors svm_t1$performance svm_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 svm_t2 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) svm_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":null,"dir":"Reference","previous_headings":"","what":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"Transform spatial partition layer spatial properties environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"","code":"get_block(env_layer, best_grid)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"env_layer SpatRaster object environmental variables used block_partition band_partition function. Function always select first layer best_grid SpatRaster object returned block_partition band_partition","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"SpatRaster layer resolution extent environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"Transform layer originating function block_partition band_partition spatial properties environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example for a single species single_spp <- spp %>% dplyr::filter(species == \"sp3\") part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 100, max_res_mult = 500, num_grids = 10, min_occ = 5, n_part = 2 ) grid_env <- get_block(env_layer = somevar, best_grid = part$grid) grid_env part$grid plot(part$grid) plot(grid_env) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"data set containing localities Hesperocyparis stephensonii species California, USA","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"","code":"hespero"},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"tibble object 14 rows 4 variables: ID presences records ID x y columns coordinates Albers Equal Area Conic coordinate system pr_ab presence denoted 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"","code":"if (FALSE) { require(dplyr) data(\"hespero\") hespero }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":null,"dir":"Reference","previous_headings":"","what":"Homogenize cells with NAs across all layers — homogenize_na","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"Homogenize cells NAs across layers","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"","code":"homogenize_na(x)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"x SpatRaster.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"SpatRaster","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"Homogenize cells NAs across layers SpatRaster resulting SpatRaster layers cells NAa","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"","code":"if (FALSE) { #' require(terra) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) somevar2 <- homogenize_na(somevar) par(mfrow = c(2, 1)) plot(somevar$CFP_4) plot(somevar2$CFP_4) par(mfrow = c(1, 1)) # In somevar2 all layers have the same cells with NAs }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":null,"dir":"Reference","previous_headings":"","what":"Raster interpolation between two time periods — interp","title":"Raster interpolation between two time periods — interp","text":"function interpolates values year two specified years simple interpolation using two raster objects containing e.g. habitat suitability values predicted using species distribution model.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Raster interpolation between two time periods — interp","text":"","code":"interp(r1, r2, y1, y2, rastername = NULL, dir_save = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Raster interpolation between two time periods — interp","text":"r1 SpatRaster. Raster object initial year r2 SpatRaster. Raster object final year y1 numeric. Initial year y2 numeric. Final year rastername character. Word used prefix raster file name. Default NULL dir_save character. Directory path name folder raster files saved. NULL, function return SpatRaster object, else, save raster given directory. Default NULL","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Raster interpolation between two time periods — interp","text":"dir_save NULL, function returns SpatRaster suitability interpolation year. dir_save used, function outputs saved directory specified dir_save.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Raster interpolation between two time periods — interp","text":"function interpolates suitability values assuming annual changes suitability linear. function useful linking SDM output based averaged climate data climate change scenarios models require suitability values disaggregated time periods, population dynamics (Keith et al., 2008; Conlisk et al., 2013; Syphard et al., 2013).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Raster interpolation between two time periods — interp","text":"Keith, D.., Akçakaya, H.R., Thuiller, W., Midgley, G.F., Pearson, R.G., Phillips, S.J., Regan, H.M., Araujo, M.B. & Rebelo, T.G. (2008) Predicting extinction risks climate change: coupling stochastic population models dynamic bioclimatic habitat models. Biology Letters, 4, 560-563. Conlisk, E., Syphard, .D., Franklin, J., Flint, L., Flint, . & Regan, H.M. (2013) Management implications uncertainty assessing impacts multiple landscape-scale threats species persistence using linked modeling approach. Global Change Biology 3, 858-869. Syphard, .D., Regan, H.M., Franklin, J. & Swab, R. (2013) functional type vulnerability multiple threats depend spatial context Mediterranean-climate regions? Diversity Distributions, 19, 1263-1274.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Raster interpolation between two time periods — interp","text":"","code":"if (FALSE) { require(terra) require(dplyr) f <- system.file(\"external/suit_time_step.tif\", package = \"flexsdm\") abma <- terra::rast(f) plot(abma) int <- interp( r1 = abma[[1]], r2 = abma[[2]], y1 = 2010, y2 = 2020, rastername = \"Abies\", dir_save = NULL ) int }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":null,"dir":"Reference","previous_headings":"","what":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"methods reduce overprediction species distribution models based posteriori methods (see Mendes et al 2020), .e., combination patterns species occurrences predicted suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"","code":"msdm_posteriori( records, x, y, pr_ab, cont_suit, method = c(\"obr\", \"pres\", \"lq\", \"mcp\", \"bmcp\"), thr = \"equal_sens_spec\", buffer = NULL, crs = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"records tibble data.frame. database spatial coordinates species presences absences (pseudo-absence) used create species distribution models. x character. Column name spatial x coordinates. y character. Column name spatial y coordinates. pr_ab character. Column name presence absence data (.e. 1 0) cont_suit SpatRaster. Raster continuous suitability predictions \"species_specific\" type calculates minimum pairwise-distances occurrences selects maximum distance, .e., value buffer maximum distance minimum distance. procedure depends spatial pattern species' occurrences; thus, species, value buffer width calculated (usage buffer=\"species_specific\"). method character. character string indicating constraint method used. thr character numeric. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can specified. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 Also, possible specifying threshold value using numeric values (thr = 0.623). Default \"equal_sens_spec\". buffer numeric. Buffer width use 'bmcp' approach. buffer width interpreted m Coordinate reference system used \"crs\" argument longitude/latitude, map units cases. Usage buffer=50000. Default NULL crs character. Coordinate reference system used calculating buffer \"bmcp\" method.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"function return SpatRaster continuous binary prediction.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"function help reduce overprediction species distribution models based combination patterns species occurrences predicted suitability. recommended use approaches current distribution models projected different time periods (past future). Five methods implemented: Abbreviation list SDM: species distribution model l: suitability patches intercept species occurrences k: suitability patches intercept species occurrences T: threshold distances used select suitability patches methods reduce overprediction species distribution models already fitted based occurrences suitability patterns species (see 'thr' arguments) Method 'obr' (Occurrences Based Restriction). method assumes suitable patches intercepting species occurrences (l) likely part species distributions suitable patches intercept occurrence (k). Distance k patches closest l patch calculated, k patches removed exceed species-specific distance threshold SDMs models. threshold (T) calculated maximum distance vector minimum pairwise distances occurrences. Whenever suitable pixel within k patch distance T closest l patch, suitability pixel reduced zero. assumed simple threshold surrogate species-specific dispersal ability. T low, either species sampled throughout distribution, species geographically restricted, justifying narrow inclusion k patches (Mendes et al., 2020). Method 'pres' (occurrences based restriction). restrictive variant 'obr' method. retains pixels suitability patches intercepting occurrences (k) (Mendes et al., 2020). Method 'lq' (Lower Quantile). method similar 'obr' method, except procedure define distance threshold withdrawn k patches, lower quartile distance k patches closest l patch. Whenever suitable pixel within k patch, .e., within lower quartile, suitability pixel reduced zero. means 75% k patches withdrawn model (Mendes et al., 2020). Method 'mcp' (Minimum Convex Polygon). Compiled adapted Kremen et al. (2008), method excludes SDM predictions suitable pixels intercept minimum convex polygon, interior angles smaller 180, enclosing occurrences species. Method 'bmcp' (Buffered Minimum Convex Polygon). Compiled adapted Kremen et al. (2008), similar 'mcp' method except inclusion buffer zone surrounding minimum convex polygons. method buffer width value must provided \"buffer\" argument CRS \"crs\" argument. methodological performance information methods see Mendes et al. (2020). using one constraining methods, cite Mendes et al (2020).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"Mendes, P.; Velazco S.J.E.; Andrade, .F..; De Marco, P. (2020) Dealing overprediction species distribution models: adding distance constraints can improve model accuracy, Ecological Modelling, press. https://doi.org/10.1016/j.ecolmodel.2020.109180 Kremen, C., Cameron, ., Moilanen, ., Phillips, S. J., Thomas, C. D., Beentje, H., . Zjhra, M. L. (2008). Aligning Conservation Priorities Across Taxa Madagascar High-Resolution Planning Tools. Science, 320(5873), 222-226. doi:10.1126/science.1155193","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Preparing data for modeling a species set.seed(10) occ <- spp %>% dplyr::filter(species == \"sp2\") %>% # filter a species sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) %>% # extrac variables values part_random(., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) # add columns with partition # Fit a model m_glm <- fit_glm( data = occ, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = \"equal_sens_spec\", ) # Lets predict this model m_pred <- sdm_predict(models = m_glm, pred = somevar, thr = NULL, con_thr = FALSE) plot(m_pred[[1]]) m_pred[[1]] %>% plot() # Lets extract the raster from this list m_pred <- m_pred[[1]] ### bmcp method m_bmcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"bmcp\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = 30000, crs=crs(m_pred) ) plot(m_bmcp) ### mcp method m_mcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"mcp\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_mcp) ### pres method m_pres <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"pres\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_pres) ### lq method m_lq <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"lq\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_lq) ### obr method m_obr <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"obr\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_obr) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":null,"dir":"Reference","previous_headings":"","what":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"","code":"msdm_priori(data, x, y, method = c(\"xy\", \"min\", \"cml\", \"ker\"), env_layer)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"data tibble data.frame. database geographical coordinates species presences. x character. Column name spatial x coordinates. y character. Column name spatial y coordinates. method character. character string indicating MSDM method used. following methods available: 'xy', 'min', 'cml', 'ker'. Usage method = 'cml' env_layer raster layer used construct species distribution models. object used create constraining variables resolution, extent, pattern empty cells environmental variables. advisable use raster environmental layer used create species distribution models avoid mismatch (e.g. resolution, extent, cells NA) environmental constraining variables.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function returns SpatRaster object. raster/s used together environmental variables construct species distribution models. 'xy' approach creates single pair raster layers can used species share study region. Otherwise, 'cml', 'min', 'ker' create species-specific raster layer.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models. recommended use approaches create models projected current conditions different time periods (past future). Four methods implemented: xy (Latlong method). method assumes spatial structure can partially explain species distribution (Bahn & McGill, 2007). Therefore, two raster layers created, containing latitude longitude pixels, respectively. raster layers included covariates environmental layers construct species distribution models. method interact species occurrence generic given study region; reason, possible use method species set share study region. min (Nearest neighbor distance method). Compiled adapted Allouche et al. (2008), method calculates cell Euclidean geographic distance nearest presence point. cml (Cumulative distance method). Compiled adapted Allouche et al. (2008), method assumes pixels closer presences likely included species distributions. Therefore, raster layer created containing sum Euclidean geographic distances pixel occurrences species. Obtained values normalized vary zero one. raster layer included environmental layers construct species distribution models. ker (Kernel method). Compiled adapted Allouche et al. (2008), method, like cml, assumes pixels located areas higher density occurrences likely included actual species distribution. Thus, raster layer created containing Gaussian values based density occurrences species. standard deviation Gaussian distribution maximum value vector minimum distances pairs occurrences species. Gaussian values normalized vary zero one. raster layer included environmental layers construct species distribution models. See Mendes et al. (2020) methodological performance details. used one constraining method cite Mendes et al 2020.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"Mendes, P.; Velazco S.J.E.; Andrade, .F..; De Marco, P. (2020) Dealing overprediction species distribution models: adding distance constraints can improve model accuracy, Ecological Modelling, press. https://doi.org/10.1016/j.ecolmodel.2020.109180 Allouche, O.; Steinitz, O.; Rotem, D.; Rosenfeld, .; Kadmon, R. (2008). Incorporating distance constraints species distribution models. Journal Applied Ecology, 45(2), 599-609. doi:10.1111/j.1365-2664.2007.01445.x Bahn, V.; McGill, B. J. (2007). Can niche-based distribution models outperform spatial interpolation? Global Ecology Biogeography, 16(6), 733-742. doi:10.1111/j.1466-8238.2007.00331.x","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select the presences of a species occ <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) # Select a raster layer to be used as a basic raster a_variable <- somevar[[1]] plot(a_variable) points(occ %>% dplyr::select(x, y)) ### xy method m_xy <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"xy\", env_layer = a_variable ) plot(m_xy) ### min method m_min <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"min\", env_layer = a_variable ) plot(m_min) ### cml method m_cml <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"cml\", env_layer = a_variable ) plot(m_cml) ### ker method m_ker <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"ker\", env_layer = a_variable ) plot(m_ker) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform environmental filtering on species occurrences — occfilt_env","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"function perform filtering species occurrences based environmental conditions.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"","code":"occfilt_env(data, x, y, id, env_layer, nbins)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"data data.frame. Data.frame tibble object presences (presence-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates id character. Column names rows id. important row unique code. env_layer SpatRaster. Rasters environmental conditions nbins integer. number classes used split environmental condition","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"tibble object data environmentally filtered","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"function uses approach adapted approach proposed Varela et al. (2014). consists filtering occurrences environmental space. First, regular multidimensional grid created environmental space. multidimensional grid determined environmental variables (always use continuous variables) grid cell size defined number bins, used dividing variable range interval classes (Varela et al. 2014; Castellanos et al., 2019). number bins set \"nbins\" argument. , single occurrence randomly selected within cell multidimensional grid. Consider trade-number bins number filtered records number bins decreases, cell size grids increases, number filtered records decreases (Castellanos et al., 2019). occfilt_env works number dimensions (variables) original variables without performing PCA beforehand. greater number predictor variables (.e., number dimensions multidimensional environmental grid) greater number bins, greater time processing computer memory used. Therefore, recommended use small number bins 2-5 ten variables used. Environmental filters sensitive number bins. procedure selecting number bins used Velazco et al. (2020). selection consists testing different numbers bins, calculating average spatial autocorrelation among variables (based Moran’s index), selecting lowest average spatial autocorrelation highest number occurrences. Note greater number bins, greater records retained","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"Castellanos, . ., Huntley, J. W., Voelker, G., & Lawing, . M. (2019). Environmental filtering improves ecological niche models across multiple scales. Methods Ecology Evolution, 10(4), 481-492. https://doi.org/10.1111/2041-210X.13142 Varela, S., Anderson, R. P., Garcia-Valdes, R., & Fernandez-Gonzalez, F. (2014). Environmental filters reduce effects sampling bias improve predictions ecological niche models. Ecography, 37, 1084-1091. https://doi.org/10.1111/j.1600-0587.2013.00441.x Velazco, S. J. E., Svenning, J-C., Ribeiro, B. R., & Laureto, L. M. O. (2020). opportunities threats conserve phylogenetic diversity Neotropical palms. Diversity Distributions, 27, 512–523. https://doi.org/10.1111/ddi.13215","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) plot(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) somevar[[1]] %>% plot() points(spp1 %>% select(x, y)) spp1$idd <- 1:nrow(spp1) # split environmental variables into 5 bins filtered_1 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 5 ) # split into 8 bins filtered_2 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 8 ) # split into 12 bins filtered_3 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 12 ) # note that the higher the nbins parameter the more # classes must be processed (4 variables, 30 bins = 923521 classes) # While the greater the greater the number of bins, the greater records retained }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform geographical filtering on species occurrences — occfilt_geo","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"function perform geographical filtering species occurrences based different approach define minimum nearest-neighbor distance points.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"","code":"occfilt_geo( data, x, y, env_layer, method, prj = \"+proj=longlat +datum=WGS84\", reps = 20 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"data data.frame. Data.frame tibble object presences (presence-absence) records, coordinates x character. Column name longitude data y character. Column name latitude data env_layer SpatRaster. Raster variables used fit model method character. Method perform geographical thinning. Pairs points filtered based geographical distance criteria.following methods available: moran: records filtered based smallest distance reduces Moran's values lower 0.1. Latlong = TRUE occurrences geographical projection. Usage method: method = c('moran'). cellsize: records filtered based resolution environmental variables can aggregated coarser resolution defined factor. Usage method: method = c('cellsize', factor = '2'). defined: records filtered based distance value (d) provided km. Usage method: method = c('defined', d = 300). prj character. Projection string (PROJ4) occurrences. necessary projection used WGS84 (\"+proj=longlat +datum=WGS84\"). Default \"+proj=longlat +datum=WGS84\" reps integer. Number times repeat thinning process. Default 20","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"tibble object data filtered geographically","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"function three alternatives implemented determine distance threshold pair points: 1-\"moran\" determines minimum nearest-neighbor distance minimizes spatial autocorrelation occurrence data, following Moran's semivariogram. Principal Component Analysis environmental variables performed first Principal Component used calculate semivariograms. , method allow use continuous variables. Sometimes, method can () greatly reduce number presences. 2-\"cellsize\" filters occurrences based predictors' resolution. method calculate distance first two cells environmental variable use distance minimum nearest-neighbor distance filter occurrences. resolution raster aggregated based values used \"factor\". Thus, distance used filtering can adjusted represent larger grid size. 3-\"determined\" method uses minimum nearest-neighbor distance specified km. third method \"thin\" function spThin package used (Aiello-Lammens et al., 2015) following argument settings reps = 20, write.files = FALSE, locs.thinned.list.return = TRUE, write.log.file = FALSE.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"Aiello-Lammens, M. E., Boria, R. ., Radosavljevic, ., Vilela, B., & Anderson, R. P. (2015). spThin: R package spatial thinning species occurrence records use ecological niche models. Ecography, 38(5), 541-545. https://doi.org/10.1111/ecog.01132","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) plot(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) somevar[[1]] %>% plot() points(spp1 %>% select(x, y)) # Using Moran method filtered_1 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"moran\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_1 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data # Using cellsize method filtered_2 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"cellsize\", factor = \"3\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_2 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data # Using defined method filtered_3 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"defined\", d = \"30\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_3 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":null,"dir":"Reference","previous_headings":"","what":"Conventional data partitioning methods — part_random","title":"Conventional data partitioning methods — part_random","text":"function provides different conventional (randomized, non-spatial) partitioning methods based cross validation folds (kfold, rep_kfold, loocv), well bootstrap (boot)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Conventional data partitioning methods — part_random","text":"","code":"part_random(data, pr_ab, method = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Conventional data partitioning methods — part_random","text":"data data.frame. Database presences, presence-absence, pseudo-absence, records given species pr_ab character. Column name \"data\" presences, presence-absence, pseudo-absence. Presences must represented 1 absences 0 method character. Vector data partitioning method used. Usage part=c(method= 'kfold', folds='5'). Methods include: kfold: Random partitioning k-folds cross-validation. 'folds' refers number folds data partitioning, assumes value >=1. Usage method = c(method = \"kfold\", folds = 10). rep_kfold: Random partitioning repeated k-folds cross-validation. Usage method = c(method = \"rep_kfold\", folds = 10, replicates=10). 'folds' refers number folds data partitioning, assumes value >=1. 'replicate' refers number replicates, assumes value >=1. loocv: Leave-one-cross-validation (.k.. Jackknife). special case k-fold cross validation number partitions equal number records. Usage method = c(method = \"loocv\"). boot: Random bootstrap partitioning. Usage method=c(method='boot', replicates='2', proportion='0.7'). 'replicate' refers number replicates, assumes value >=1. 'proportion' refers proportion occurrences used model fitting, assumes value >0 <=1. example proportion='0.7' mean 70% data used model training, 30% used model testing.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Conventional data partitioning methods — part_random","text":"tibble object information used 'data' argument additional columns named .part containing partition groups. rep_kfold boot method return many \".part\" columns replicated defined. rest methods, single .part column returned. kfold, rep_kfold, loocv partition methods, groups defined integers. contrast, boot method, partition groups defined characters 'train' 'test'.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Conventional data partitioning methods — part_random","text":"Fielding, . H., & Bell, J. F. (1997). review methods assessment prediction errors conservation presence/absence models. Environmental Conservation, 24(1), 38-49. https://doi.org/10.1017/S0376892997000088","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Conventional data partitioning methods — part_random","text":"","code":"if (FALSE) { data(\"abies\") abies$partition <- NULL abies <- tibble(abies) # K-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 # Repeated K-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 10, replicates = 10) ) abies2 # Leave-one-out cross-validation (loocv) method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"loocv\") ) abies2 # Bootstrap method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 50, proportion = 0.7) ) abies2 abies2$.part1 %>% table() # Note that for this method .partX columns have train and test words. }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial band cross-validation — part_sband","title":"Spatial band cross-validation — part_sband","text":"function explores different numbers spatial bands returns suitable value given presence presence-absence database. selection best number bands performed automatically considering spatial autocorrelation, environmental similarity, number presence absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial band cross-validation — part_sband","text":"","code":"part_sband( env_layer, data, x, y, pr_ab, type = \"lon\", n_part = 2, min_bands = 2, max_bands = 20, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial band cross-validation — part_sband","text":"env_layer SpatRaster. Raster environmental variable. Used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous environmental variables data data.frame. Data.frame tibble object presences (presence-absence, presence-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presences, presence-absence, -pseudo-absence. Presences must represented 1 absences 0 type character. Specify bands across different degrees longitude 'lon' latitude 'lat'. Default 'lon'. n_part integer. Number partition. Default 2, values 2 yet implemented. min_bands integer. Minimum number spatial bands tested, default 2. max_bands integer. Maximum number spatial bands tested, default 20. min_occ numeric. Minimum number presences absences partition fold. min_occ value base number predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion points used testing autocorrelation groups (values > 0 <=1). smaller number , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial band cross-validation — part_sband","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number best partition (n_grid), number bands (n_bands), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto), environmental similarity based Euclidean distance (env_sim). grid: SpatRaster object bands","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spatial band cross-validation — part_sband","text":"part_sbands function allows testing different numbers partitions using range latitudinal longitudinal bands. function explores range numbers bands given number partitions automatically selects best number bands given presence, presence-absences, presence-pseudo-absences dataset. Selection number bands based optimization procedure explores partitions three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among partition groups (Standard Deviation - SD; Velazco et al., 2019). procedure iterative; first select partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances, difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefits partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions groups, iii) controls selection partitions little data may problematic model fitting (\"min_occ\" argument). Partitions geographically structured tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models used projections regions outside calibration area time periods. Band partitions can option species best partition found part_sblock species distributed linearly (e.g., species inhabit coastlines). function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Spatial band cross-validation — part_sband","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial band cross-validation — part_sband","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example of two longitudinal partitions with presences and absences single_spp <- spp %>% dplyr::filter(species == \"sp1\") part_1 <- part_sband( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lon\", min_bands = 2, max_bands = 20, n_part = 2, min_occ = 10, prop = 0.5 ) part_1$part # database with partition fold (.part) part_1$part %>% group_by(pr_ab, .part) %>% count() # number of presences and absences in each fold part_1$best_part_info # information of the best partition part_1$grid # raster with folds # Explore grid object and presences and absences points plot(part_1$grid, col = gray.colors(20)) points(part_1$part[c(\"x\", \"y\")], col = rainbow(8)[part_1$part$.part], cex = 0.9, pch = c(1, 19)[part_1$part$pr_ab + 1] ) # Example of four latitudinal partition and only presences single_spp <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) part_2 <- part_sband( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lat\", min_bands = 8, max_bands = 40, n_part = 8, min_occ = 10, prop = 0.5 ) part_2$part part_2$best_part_info part_2$grid # Explore Grid object and presences points plot(part_2$grid, col = gray.colors(20)) points(part_2$part[c(\"x\", \"y\")], col = rainbow(8)[part_2$part$.part], cex = 0.5, pch = 19 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial block cross-validation — part_sblock","title":"Spatial block cross-validation — part_sblock","text":"function explores spatial blocks different cell sizes returns suitable size given presence presence-absence database. selection best grid size performed automatically considering spatial autocorrelation, environmental similarity, number presence absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial block cross-validation — part_sblock","text":"","code":"part_sblock( env_layer, data, x, y, pr_ab, n_part = 3, min_res_mult = 3, max_res_mult = 200, num_grids = 30, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial block cross-validation — part_sblock","text":"env_layer SpatRaster. Raster environmental variable. Used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous environmental variables data data.frame. Data.frame tibble object presence (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presence, presence-absence, pseudo-absence records. Presences must represented 1 absences 0 n_part integer. Number partition. Default 2. min_res_mult integer. Minimum value used multiplying raster resolution define finest resolution tested, default 3. max_res_mult integer. Maximum value used multiplying raster resolution define coarsest resolution tested, default 200. num_grids integer. Number grid tested min_res_mult X (raster resolution) max_res_mult X (raster resolution), default 30 min_occ numeric. Minimum number presences absences partition fold. min_occ value base amount predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion point used testing autocorrelation groups (values > 0 <=1). smaller proportion , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial block cross-validation — part_sblock","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number best partition (n_grid), cell size (cell_size), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto), environmental similarity based Euclidean distance (env_sim). grid: SpatRaster object blocks","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spatial block cross-validation — part_sblock","text":"part_sblock allows test different numbers partitions using square blocks (like checkerboard). function explores range block sizes automatically selects best size given given presence, presence-absences, presence-pseudo-absences dataset. Number partition selection based optimization procedure explores partition size three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among partition groups (Standard Deviation - SD; Velazco et al., 2019). procedure iteratively select partitions, first partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefit partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions, iii) controls selection partitions data may problematic model fitting (\"min_occ\" argument). Geographically structured partitions tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models used projections regions outside calibration area time periods. function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Spatial block cross-validation — part_sblock","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial block cross-validation — part_sblock","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example for one single species single_spp <- spp %>% dplyr::filter(species == \"sp3\") part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 2, min_occ = 5, prop = 0.5 ) part part$part # database with partition fold (.part) part$part %>% group_by(pr_ab, .part) %>% count() # number of presences and absences in each fold part$best_part_info # information of the best partition part$grid # raster with folds # Explore the Grid object plot(part$grid) points(part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[part$part$.part], cex = 0.5, pch = 19 ) terra::res(part$grid) terra::res(somevar) # Note that this is a layer with block partition, but it has a # different resolution than the original environmental variables. # If you wish have a layer with the same properties # (i.e. resolution, extent, NAs) as your original environmental # variables you can use the \\code{\\link{get_block}} function. grid_env <- get_block(env_layer = somevar, best_grid = part$grid) plot(grid_env) # this is a block layer with the same layer # properties as environmental variables. points(part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[part$part$.part], cex = 0.5, pch = 19 ) # This layer is very useful if you need to sample # pseudo_absence or background point # See examples in \\code{\\link{backgroudp}} and \\code{\\link{pseudoabs}} # Example of a higher number of partitions part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 4, min_occ = 2, prop = 0.5 ) # Explore the Grid object plot(part$grid, col = gray.colors(4)) points(part$part[c(\"x\", \"y\")], col = rainbow(n = 4)[part$part$.part], cex = 0.5, pch = 19 ) # Using these functions with several species spp2 <- split(spp, spp$species) class(spp2) length(spp2) names(spp2) part_list <- lapply(spp2, function(x) { result <- part_sblock( env_layer = somevar, data = x, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 2, min_occ = 5, prop = 0.5 ) result }) part_list$sp3 # For this dataset a suitable partition was not found # Create a single database for all species occ_part <- lapply(part_list, function(x) { if (!length(x) > 0) { x[[1]] } }) %>% dplyr::bind_rows(.id = \"species\") occ_part # Get the best grid info for all species grid_info <- dplyr::bind_rows(lapply( part_list, function(x) x[[2]] ), .id = \"species\") # Get the best grid layer for all species grid_layer <- lapply(part_list, function(x) x$grid) grid_layer2 <- lapply(grid_layer, function(x) { get_block(env_layer = somevar[[1]], best_grid = x) }) grid_layer2 <- terra::rast(grid_layer2) grid_layer2 plot(grid_layer2) # Block partition for presences-only database single_spp <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) single_spp single_spp$pr_ab %>% unique() # only presences part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 4, min_occ = 10, prop = 0.5 ) part$part %>% dim() part$best_part_info part$grid plot(part$grid) points( part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\", \"green\", \"black\")[part$part$.part], cex = 0.5, ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":null,"dir":"Reference","previous_headings":"","what":"Environmental and spatial cross-validation — part_senv","title":"Environmental and spatial cross-validation — part_senv","text":"function explores different numbers environmental partitions (clusters) based K-means clustering algorithm returns number partitions best suited given presence, presence-absences, presence-pseudo-absences database. Selection best number partitions performed automatically considering spatial autocorrelation, environmental similarity, number presence /absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Environmental and spatial cross-validation — part_senv","text":"","code":"part_senv( env_layer, data, x, y, pr_ab, min_n_groups = 2, max_n_groups = 10, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Environmental and spatial cross-validation — part_senv","text":"env_layer SpatRaster. Raster environmental variable. used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous variables data data.frame. Data.frame tibble object presence (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presences, presence-absence, pseudo-absence. Presences must represented 1 absences 0 min_n_groups integer. Minimum number groups tested. Default 2. max_n_groups integer. Maximum number groups tested. Default 10. min_occ numeric. Minimum number presences absences partition fold. min_occ value base amount predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion point used testing autocorrelation groups (values > 0 <=1). smaller number , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Environmental and spatial cross-validation — part_senv","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number partition (n_groups), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto) environmental similarity based Euclidean distance (env_sim)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Environmental and spatial cross-validation — part_senv","text":"part_sblock allows test different numbers partitions defined envirnomental clusters delimited K-mean cluster algorithm. function explores range environmental clusters automatically selects best number cluster given given presence, presence-absences, presence-pseudo-absences dataset. selection number clusters based optimization procedure explores partition size three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among clusters (Standard Deviation - SD; Velazco et al., 2019). procedure cyclically select partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefit partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions, iii) controls partition selection data may problematic model fitting (\"min_occ\" argument).. Partitions geographically structured tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models want used projections regions outside calibration area periods. function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Environmental and spatial cross-validation — part_senv","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Environmental and spatial cross-validation — part_senv","text":"","code":"if (FALSE) { require(terra) require(ggplot2) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Select a species spp1 <- spp %>% dplyr::filter(species == \"sp1\") part1 <- part_senv( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_n_groups = 2, max_n_groups = 10, min_occ = 10, prop = 0.2 ) part1 ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) + facet_wrap(. ~ .part) ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) + facet_wrap(. ~ pr_ab) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence plots — pdp_data","title":"Calculate data to construct partial dependence plots — pdp_data","text":"Calculate data construct partial dependence plots given predictor","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence plots — pdp_data","text":"","code":"pdp_data( model, predictors, resolution = 50, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence plots — pdp_data","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence plots — pdp_data","text":"list two tibbles \"pdpdata\" \"resid\". #' pdpdata: data construct partial dependence plots, first column includes values selected environmental variable, second column predicted suitability, third column range type, two values Training Projecting, referring suitability calculated within outside range training conditions. Third column returned \"projection_data\" argument used resid: data plot residuals. first column includes values selected environmental variable second column predicted suitability.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence plots — pdp_data","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- pdp_data( model = svm_t1$model, predictors = c(\"aet\"), resolution = 100, resid = TRUE, projection_data = somevar, training_data = abies2, clamping = FALSE ) df names(df) df$pdpdata df$resid plot(df$pdpdata[1:2], type = \"l\") points(df$resid[1:2], cex = 0.5) # see p_pdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot different resolutions to be used in part_sblock — plot_res","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"function useful display maximum minimum resolution want test block_partition function. Note resolution tested fine, plot display may take long time.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"","code":"plot_res(r, res_mult)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"r SpatRaster. raster layer, preferably layer environmental variables used res_mult numeric. Maximum minimum resolution tested.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"plot original raster overlapped grid resolution used","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"","code":"if (FALSE) { f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") r <- terra::rast(f) r <- r$CFP_1 plot_res(r, res_mult = 100) plot_res(r, res_mult = 200) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Bivariate partial dependence plot — p_bpdp","title":"Bivariate partial dependence plot — p_bpdp","text":"Create bivariate partial dependence plot(s) explore bivariate marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bivariate partial dependence plot — p_bpdp","text":"","code":"p_bpdp( model, predictors = NULL, resolution = 50, training_data = NULL, training_boundaries = NULL, projection_data = NULL, clamping = FALSE, color_gradient = c(\"#000004\", \"#1B0A40\", \"#4A0C69\", \"#781B6C\", \"#A42C5F\", \"#CD4345\", \"#EC6824\", \"#FA990B\", \"#F7CF3D\", \"#FCFFA4\"), color_training_boundaries = \"white\", theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bivariate partial dependence plot — p_bpdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor names calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL training_boundaries character. Plot training conditions boundaries based training data (.e., presences, presences absences, etc). training_boundaries = \"convexh\", function delimit training environmental region based convex-hull. training_boundaries = \"rectangle\", function delimit training environmental region based four straight lines. used methods necessary provide data training_data argument. NULL predictors used. Default NULL. projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE color_gradient character. vector range colors plot. Default c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") color_training_boundaries character. vector one color used color points residuals, Default \"white\" theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bivariate partial dependence plot — p_bpdp","text":"function creates partial dependent surface plots explore bivariate marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. Partial dependence surface plot used interpret model explore model extrapolate outside environmental conditions used train model (convex hull polygon).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bivariate partial dependence plot — p_bpdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence surface plot p_bpdp(model = svm_t1$model, training_data = abies2) p_bpdp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_bpdp(model = svm_t1$model, training_data = abies2, resolution = 10) p_bpdp(model = svm_t1$model, training_data = abies2, resolution = 70) # With training condition boundaires p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\") p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"rectangle\", color_training_boundaries = \"yellow\") p_bpdp( model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\", color_training_boundaries = \"orange\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # With projection data p_bpdp( model = svm_t1$model, training_data = abies2, training_boundaries = \"rectangle\", projection_data = somevar, # a SpatRaster used to predict or project the model color_training_boundaries = \"white\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # Bivariate partial dependence plot for training and projection condition plot(somevar[[1]], main = \"Projection area\") p_bpdp(model = svm_t1$model, training_data = abies2, projection_data = somevar, # a SpatRaster used to predict or project the model training_boundaries = \"convexh\") # Bivariate partial dependece plot with categorical variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") cat <- system.file(\"external/clusters.shp\", package = \"flexsdm\") cat <- terra::vect(cat) cat$clusters <- paste0(\"c\", cat$clusters) cat <- terra::rasterize(cat, somevar, field = \"clusters\") somevar <- c(somevar, cat) plot(somevar) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract( data = abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), predictors_f = \"clusters\", partition = \".part\", thr = c(\"max_sens_spec\") ) p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"Graphical exploration extrapolation suitability pattern environmental geographical space","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"","code":"p_extra( training_data, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data, projection_data, predictors = NULL, geo_space = TRUE, geo_position = \"right\", prop_points = 0.2, maxcells = 1e+05, alpha_p = 0.5, color_p = \"black\", alpha_gradient = 0.5, color_gradient = c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\"), theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"training_data data.frame. Database response (0,1) predictor values used fit model. x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column name species absence-presence, pseudo-absence-presence, background-presence data (0,1). extra_suit_data SpatRaster. Raster layer extrapolation suitability values. extra_suit_data must resolution extent projection_data projection_data SpatRaster. Raster layer environmental variables used model projection. projection_data must resolution extent extra_suit_data predictors character. Vector predictor name(s) calculate partial dependence plots. NULL predictors used. Default NULL. geo_space logical. TRUE produced map. Default TRUE geo_position character. Map position regarding plot environmental space, right, left, bottom, upper. Default \"right\" prop_points numeric. Proportion cells extra_suit_data projection_data select plotting. default. 0.5. maxcells integer. Maximum number cells used plot geographical space. Default 100000 alpha_p numeric. value 0 1 control transparency presence-absence points. Lower values corresponding transparent colors. Default 0.5 color_p character. vector color used color presence-absence points. Default \"black\" alpha_gradient numeric. value 0 1 control transparency projection data Lower values corresponding transparent colors. Default 0.5 color_gradient character. vector colors used color projection data. Default c( \"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"plot","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"","code":"if (FALSE) { require(dplyr) require(terra) require(ggplot2) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") spp$species %>% unique() sp <- spp %>% dplyr::filter(species == \"sp2\", pr_ab == 1) %>% dplyr::select(x, y, pr_ab) # Calibration area based on some criterion such as dispersal ability ca <- calib_area(sp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs = crs(somevar)) plot(somevar[[1]]) points(sp) plot(ca, add = T) # Sampling pseudo-absences set.seed(10) psa <- sample_pseudoabs( data = sp, x = \"x\", y = \"y\", n = nrow(sp) * 2, method = \"random\", rlayer = somevar, calibarea = ca ) # Merge presences and abasences databases to get a complete calibration data sp_pa <- dplyr::bind_rows(sp, psa) sp_pa # Get environmental condition of calibration area sp_pa_2 <- sdm_extract(data = sp_pa, x = \"x\", y = \"y\", env_layer = somevar) sp_pa_2 # Measure extrapolation based on calibration data (presence and pseudo-absences) # using SHAPE metric extr <- extra_eval( training_data = sp_pa_2, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = FALSE, n_cores = 1, aggreg_factor = 1 ) plot(extr) ## %######################################################%## #### Explore extrapolation in the #### #### environmental and geographical space #### ## %######################################################%## p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = TRUE, prop_points = 0.05 ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, predictors = c(\"tmn\", \"cwd\"), geo_space = TRUE, prop_points = 0.05 ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, predictors = c(\"cwd\", \"tmx\", \"aet\"), geo_space = TRUE, geo_position = \"left\", prop_points = 0.05, color_p = \"white\", alpha_p = 0.5, alpha_gradient = 0.2, color_gradient = c(\"#404096\", \"#529DB7\", \"#7DB874\", \"#E39C37\", \"#D92120\"), theme = ggplot2::theme_dark() ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_p = \"white\", alpha_p = 0.5, alpha_gradient = 0.2, color_gradient = c(\"#404096\", \"#529DB7\", \"#7DB874\", \"#E39C37\", \"#D92120\"), theme = ggplot2::theme_dark() ) # Explore extrapolation only in the environmental space p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = FALSE, prop_points = 0.05, color_p = \"black\", color_gradient = c(\"#085CF8\", \"#65AF1E\", \"#F3CC1D\", \"#FC6A9B\", \"#D70500\"), theme = ggplot2::theme_minimal() ) ##%######################################################%## #### Explore univariate #### #### and combinatorial extrapolation #### ##%######################################################%## extr <- extra_eval( training_data = sp_pa_2, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = TRUE, n_cores = 1, aggreg_factor = 1 ) plot(extr) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr$uni_comb, # use uni_comb layer projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"#B3DC2B\",\"#25818E\") ) ## %######################################################%## #### With p_extra also is possible #### #### to explore the patterns of suitability #### ## %######################################################%## sp_pa_2 <- part_random( data = sp_pa_2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) rf_m1 <- fit_raf( data = sp_pa_2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sorensen\") ) suit <- sdm_predict(models = rf_m1, pred = somevar) plot(suit$raf) suit <- suit$raf # Pasterns of suitability in geographical and environmental space p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, ) # Pasterns of suitability plotting as points only presences p_extra( training_data = sp_pa_2 %>% dplyr::filter(pr_ab == 1), x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, ) # Pasterns of suitability in the environmental space only # and plotting as points only presences p_extra( training_data = sp_pa_2 %>% dplyr::filter(pr_ab == 1), x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = FALSE, prop_points = 0.05, ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial Dependent Plot — p_pdp","title":"Partial Dependent Plot — p_pdp","text":"Create partial dependence plot(s) explore marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial Dependent Plot — p_pdp","text":"","code":"p_pdp( model, predictors = NULL, resolution = 100, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE, rug = FALSE, colorl = c(\"#462777\", \"#6DCC57\"), colorp = \"black\", alpha = 0.2, theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial Dependent Plot — p_pdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name(s) calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE rug logical. Display training data rug plot x-axis. Note: time-consuming large databases. Default FALSE colorl character. vector one two colors used color lines. projection_data argument used necessary provide two colors. Default c(\"#462777\", \"#6DCC57\") colorp character. vector one color used color points residuals, Default \"black\" alpha numeric. value 0 1 control transparency residual points. Lower values corresponding transparent colors. Default 0.2 theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial Dependent Plot — p_pdp","text":"function creates partial dependent plots explore marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. range projection data greater training data plotted different color. Partial dependence curves used interpret model explore model may extrapolate outside environmental conditions used train model.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial Dependent Plot — p_pdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence plot p_pdp(model = svm_t1$model, training_data = abies2) p_pdp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_pdp(model = svm_t1$model, training_data = abies2, resolution = 5) p_pdp(model = svm_t1$model, training_data = abies2, resolution = 50) p_pdp(model = svm_t1$model, training_data = abies2, resid = TRUE) p_pdp( model = svm_t1$model, training_data = abies2, resid = TRUE, colorl = \"black\", colorp = \"red\", alpha = 0.1 ) p_pdp( model = svm_t1$model, training_data = abies2, resid = TRUE, colorl = \"black\", colorp = \"red\", alpha = 0.1, rug = TRUE ) # Partial depence plot for training and projection condition found in a projection area plot(somevar[[1]], main = \"Projection area\") p_pdp(model = svm_t1$model, training_data = abies2, projection_data = somevar) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\") ) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\"), resid = TRUE, colorp = \"gray\" ) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\"), resid = TRUE, colorp = \"gray\", rug = TRUE, theme = ggplot2::theme_dark() ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial Dependent Suface Plot — p_psp","title":"Partial Dependent Suface Plot — p_psp","text":"Create partial dependence surface plot(s) explore bivariate marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial Dependent Suface Plot — p_psp","text":"","code":"p_psp( model, predictors = NULL, resolution = 50, training_data = NULL, pchull = FALSE, projection_data = NULL, clamping = FALSE, color_gradient = c(\"#000004\", \"#1B0A40\", \"#4A0C69\", \"#781B6C\", \"#A42C5F\", \"#CD4345\", \"#EC6824\", \"#FA990B\", \"#F7CF3D\", \"#FCFFA4\"), color_chull = \"white\", theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial Dependent Suface Plot — p_psp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor names calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL pchull logical. Plot convex-hull limit training data. Default FALSE. TRUE necessary provide data training_data argument projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE color_gradient character. vector range colors plot. Default c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") color_chull character. vector one color used color points residuals, Default \"white\" theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial Dependent Suface Plot — p_psp","text":"function creates partial dependent surface plots explore bivariate marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. Partial dependence surface plot used interpret model explore model extrapolate outside environmental conditions used train model (convex hull polygon).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial Dependent Suface Plot — p_psp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence surface plot p_psp(model = svm_t1$model, training_data = abies2) p_psp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_psp(model = svm_t1$model, training_data = abies2, resolution = 10) p_psp(model = svm_t1$model, training_data = abies2, resolution = 70) p_psp(model = svm_t1$model, training_data = abies2, pchull = TRUE) p_psp( model = svm_t1$model, training_data = abies2, pchull = TRUE, color_chull = \"orange\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # Partial depence surface plot for training and projection condition plot(somevar[[1]], main = \"Projection area\") p_psp(model = svm_t1$model, training_data = abies2, projection_data = somevar, pchull = TRUE) # PSP with categorical variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") cat <- system.file(\"external/clusters.shp\", package = \"flexsdm\") cat <- terra::vect(cat) cat$clusters <- paste0(\"c\", cat$clusters) cat <- terra::rasterize(cat, somevar, field = \"clusters\") somevar <- c(somevar, cat) plot(somevar) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract( data = abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), predictors_f = \"clusters\", partition = \".part\", thr = c(\"max_sens_spec\") ) p_psp(model = svm_t1$model, training_data = abies2) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points — sample_background","title":"Sample background points — sample_background","text":"Sampling background points options using different geographical restrictions sampling methods.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points — sample_background","text":"","code":"sample_background( data, x, y, n, method = \"random\", rlayer, maskval = NULL, calibarea = NULL, rbias = NULL, sp_name = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points — sample_background","text":"data data.frame tibble. Database presences records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates n integer. Number background point sampled method character. Background allocation method. methods implemented : random: Random allocation background points. Usage method = 'random' thickening: Thickening background points based Vollering et al. (2019) method. method, buffer width must defined used around presences points. buffer can defined using argument method = c(\"thickening\", width = 20000). Buffer width must m raster (used rlayer) longitude/latitude CRS, map units cases. buffer width provided function use width value equal mean pair-wise presence distances. width value provided, argument must used method = 'thickening'. biased: method, similar \"thickening\", sample background biased bias presences. However, background points sampled used presences probability throughout entire study area, restricting bias within buffers “thickening” approach. using method, necessary provide layer presences bias \"rbias\" argument (Phillips et al., 2009). Usage method='thickening' method = c(\"thickening\", width = 20000). Default 'random' rlayer SpatRaster used sampling background points. best use layer resolution extent environmental variables used modeling. using maskval argument, raster layer must contain values constrain sampling maskval integer, character, factor. Values raster layer used constraining sampling background points calibarea SpatVect delimits calibration area used given species (see calib_area function). rbias SpatRaster used choosing background points using bias method. raster bias data must provided. recommended rbias match resolution extent rlayer. sp_name character. Species name output used. argument used, first output column species name. Default NULL.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points — sample_background","text":"tibble object x y coordinates sampled background points","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Sample background points — sample_background","text":"Phillips, S. J., Dudík, M., Elith, J., Graham, C. H., Lehmann, ., Leathwick, J., & Ferrier, S. (2009). Sample selection bias presence-distribution models: Implications background pseudo-absence data. Ecological Applications, 19(1), 181-197. Vollering, J., Halvorsen, R., Auestad, ., & Rydgren, K. (2019). Bunching background betters bias species distribution models. Ecography, 42(10), 1717-1727. https://doi.org/10.1111/ecog.04503","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample background points — sample_background","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(spp) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Example for a single species spp_pa <- spp %>% dplyr::filter(species == \"sp3\") # Spatially structured partition part <- part_sblock( env_layer = somevar, data = spp_pa, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 100, max_res_mult = 500, num_grids = 30, min_occ = 5, n_part = 2 ) grid_env <- get_block(env_layer = somevar, best_grid = part$grid) plot(grid_env) ## %######################################################%## # # #### Random background method #### # # ## %######################################################%## # Sample background points throughout study area with random sampling method spp_p <- spp_pa %>% dplyr::filter(pr_ab == 1) bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, sp_name = \"sp3\" ) bg plot(grid_env) points(bg[-1]) # Sample random background points constrained to a region with a give set of values plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 1 ) %>% points() plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 2 ) %>% points() plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = c(1, 2) ) %>% points() # Sample random background within a calibration area and constrained to a region ca_ps1 <- calib_area( data = spp_pa, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs = crs(somevar) ) plot(grid_env) plot(ca_ps1, add = T) points(spp_pa[-1], col = \"blue\", cex = 0.7, pch = 19) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 1, calibarea = ca_ps1 ) %>% points(col = \"red\") ## %######################################################%## # # #### Thickening background method #### # # ## %######################################################%## # Thickening background without constraining them spp_p # presences database of a species grid_env # The raster layer used for sampling background bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 5000, method = \"thickening\", rlayer = grid_env, ) plot(grid_env) bg %>% points(col = \"red\") # Thickening background spp_p # presences database of a species grid_env # The raster layer used for sampling background bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 5000, method = c(\"thickening\", width = 150000), rlayer = grid_env ) plot(grid_env) bg %>% points(col = \"red\") # Sample thickening background within a calibration area and constrained to a region bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 3000, method = \"thickening\", rlayer = grid_env, maskval = 2, calibarea = ca_ps1 ) plot(grid_env) plot(ca_ps1, add = T) bg %>% points(col = \"red\", cex = 0.3) points(spp_p[c(\"x\", \"y\")], pch = 19) ## %######################################################%## # # #### Biased background method #### # # ## %######################################################%## require(dplyr) require(terra) data(spp) # Select the presences of a species spp_p <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) # Raster layer with density of points to obtain a biased sampling background occ_density <- system.file(\"external/occ_density.tif\", package = \"flexsdm\") occ_density <- terra::rast(occ_density) plot(occ_density) points(spp_p %>% dplyr::select(x, y), cex = 0.5) # A layer with region used to contrain background sampling area regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) plot(regions) points(spp_p %>% dplyr::select(x, y), cex = 0.5) # Biased background points spp_p # presences database of a species bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 3000, method = \"biased\", rlayer = regions, rbias = occ_density ) plot(occ_density) bg %>% points(col = \"red\", cex = 0.1) spp_p %>% dplyr::select(x, y) %>% points(., col = \"black\", pch = 19, cex = 0.5) # Biased background points constrained to a region # It will be selected region 6 plot(regions) plot(regions %in% c(1, 6)) bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 500, method = \"biased\", rlayer = regions, rbias = occ_density, maskval = c(1, 2) ) plot(occ_density) bg %>% points(col = \"red\", cex = 0.5) spp_p %>% dplyr::select(x, y) %>% points(., col = \"black\", pch = 19, cex = 0.5) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absences — sample_pseudoabs","title":"Sample pseudo-absences — sample_pseudoabs","text":"function provide several methods sampling pseudo-absences, instance totally random sampling method, options using different environmental geographical constraints.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absences — sample_pseudoabs","text":"","code":"sample_pseudoabs( data, x, y, n, method, rlayer, maskval = NULL, calibarea = NULL, sp_name = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absences — sample_pseudoabs","text":"data data.frame tibble. Database presences (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates n integer. Number pseudo-absences sampled method character. Pseudo-absence allocation method. necessary provide vector argument. methods implemented : random: Random allocation pseudo-absences throughout area used model fitting. Usage method='random'. env_const: Pseudo-absences environmentally constrained regions lower suitability values predicted Bioclim model. method, necessary provide raster stack brick object environmental variables Usage method=c(method='env_const', env = somevar). geo_const: Pseudo-absences allocated far occurrences based geographical buffer. value buffer width m must provided raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_const', width='50000'). geo_env_const: Pseudo-absences constrained environmentally (based Bioclim model) distributed geographically far occurrences based geographical buffer. method, raster environmental variables stored SpatRaster object provided. value buffer width m must provided raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_env_const', width='50000', env = somevar). geo_env_km_const: Pseudo-absences constrained using three-level procedure; similar geo_env_const additional step distributes pseudo-absences environmental space using k-means cluster analysis. method, necessary provide raster stack brick object environmental variables value buffer width m raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_env_km_const', width='50000', env = somevar). rlayer SpatRaster. raster layer used sampling pseudo-absence layer resolution extent environmental variables used modeling recommended. case use maskval argument, raster layer must contain values used constrain sampling maskval integer, character, factor. Values raster layer used constraining pseudo-absence sampling calibarea SpatVector SpatVector delimit calibration area used given species (see calib_area function). sp_name character. Species name output used. argument used, first output column species name. Default NULL.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absences — sample_pseudoabs","text":"tibble object x y coordinates sampled pseudo-absence points","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample pseudo-absences — sample_pseudoabs","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) plot(regions) single_spp <- spp %>% dplyr::filter(species == \"sp3\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab) # Pseudo-absences randomly sampled throughout study area ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = \"random\", rlayer = regions, maskval = NULL, sp_name = \"sp3\" ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) # presences points(ps1[-1], col = \"red\", cex = 0.7, pch = 19) # absences # Pseudo-absences randomly sampled within a regions where a species occurs ## Regions where this species occurrs samp_here <- terra::extract(regions, single_spp[2:3])[, 2] %>% unique() %>% na.exclude() ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = \"random\", rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with geographical constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_const\", width = \"30000\"), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"env_const\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental and geographical constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_env_const\", width = \"50000\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental and geographical constraint and with k-mean clustering ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_env_km_const\", width = \"50000\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Sampling pseudo-absence using a calibration area ca_ps1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs=crs(somevar) ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 50, method = \"random\", rlayer = regions, maskval = NULL, calibarea = ca_ps1 ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(ps1, col = \"red\", cex = 0.7, pch = 19) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 50, method = \"random\", rlayer = regions, maskval = samp_here, calibarea = ca_ps1 ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(ps1, col = \"red\", cex = 0.7, pch = 19) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Create directories for saving the outputs of the flexsdm — sdm_directory","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"function assists creating directory system different sub-folders assist organisation modelling process outputs.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"","code":"sdm_directory( main_dir = NULL, projections = NULL, calibration_area = TRUE, algorithm = NULL, ensemble = NULL, threshold = FALSE, return_vector = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"main_dir character. Directory path containing main folder saving model inputs outputs. NULL function assumes directory path current working R session creates sub-folder name 'flexsdm_results'. Default NULL projections vector. Vector folder names future scenarios/different regions/time periods save model projections output. calibration_area logical. TRUE, function creates folder 1_Inputs storing calibration area. Default TRUE algorithm vector. Vector model names used. Usage algorithm = c(gam, tune_max, tune_net, esm_glm). \"\" used function creates folders algorithms available flexsdm . .e. 'gam', 'gau', 'gbm', 'glm', 'max', 'net', 'raf', 'svm'. Default NULL ensemble vector. Vector methods used ensemble different models. Usage ensemble = c(\"mean\", \"meanthr\"). Default NULL threshold logical. TRUE sub-folders \"/1_con\", \"/2_bin\" created within algorithm /ensemble folder. Used storing continuous binarized models separately. Default FALSE return_vector logical. TRUE function returns vector path folders. Default TRUE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"character vector paths created folders","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"sdm_directory function assists saving workflow outputs creating folders (directories) based user specifications, choice algorithms, ensemble methods, model projections new geographic regions periods. function first creates two folders within user-specified project folder, one model inputs (1_Inputs) one model outputs (2_Outputs). Within 1_Inputs, three sub-folders users store model inputs: 1_Occurrences, 2_Predictors, 3_Calibration_area. user chooses include projections modeling framework, 2_Projections subfolder created within 2_Predictors folder store environmental data projection scenarios provided \"projections\" argument. Additionally, sdm_directory offers users enhanced flexibility saving modeling outputs, giving offers users option save results modeling ensemble technique presented flexsdm","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"","code":"if (FALSE) { require(dplyr) # require(sf) # Implement sdm_directory without specific path and project name dirs_1 <- sdm_directory( main_dir = NULL, projections = NULL, calibration_area = TRUE, algorithm = c(\"gam\", \"tune_max\"), ensemble = c(\"mean\", \"meanthr\"), threshold = FALSE, return_vector = TRUE ) dirs_1 dirs_1[1] %>% fs::dir_tree(., recurse = TRUE) unlink(dirs_1[1], recursive = TRUE) # this directory and sub-folder will be removed # Implement sdm_directory with specific path and project name getwd() %>% dirname() dirs_2 <- sdm_directory( main_dir = getwd() %>% dirname() %>% file.path(., \"my_project_name\"), projections = c( \"cnrm_rpc8.5_2050\", \"cnrm_rpc4.5_2050\" ), calibration_area = TRUE, algorithm = \"all\", ensemble = c(\"mean\", \"meanthr\"), threshold = TRUE ) dirs_2[1] %>% fs::dir_tree(., recurse = TRUE) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate different model performance metrics — sdm_eval","title":"Calculate different model performance metrics — sdm_eval","text":"function calculates threshold dependent independent model performance metrics.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate different model performance metrics — sdm_eval","text":"","code":"sdm_eval(p, a, bg = NULL, thr = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate different model performance metrics — sdm_eval","text":"p numeric. Predicted suitability presences numeric. Predicted suitability absences bg numeric. Predicted suitability background points, used BOYCE metric. bg set NULL, BOYCE metric calculated presences absences suitabilities values thr character. Threshold criterion used get binary suitability values (.e. 0,1). Used threshold-dependent performance metrics. possible use one threshold type. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold Sensitivity Specificity equal. max_sens_spec: Threshold sum Sensitivity Specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified Sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers Sensitivity value. sensitivity value specified, default value 0.9 one threshold type used, concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold type specified","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate different model performance metrics — sdm_eval","text":"tibble next columns threshold: threshold names thr_value: threshold values n_presences: number presences n_absences: number absences TPR IMAE: performance metrics","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculate different model performance metrics — sdm_eval","text":"function used evaluating different models approaches base combination presence-absences presence-pseudo-absences background point data suitability predicted model flexsdm modeling function families (fit_, esm_, tune_.) calculates next performance metric: \\* BOYCE calculated based presences background points, case background points provided calculated using presences absences. codes calculating metric adaptation enmSdm package (https://github.com/adamlilith/enmSdm) \\** IMAE calculated 1-(Mean Absolute Error) order consistent metrics higher value given performance metric, greater model's accuracy","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate different model performance metrics — sdm_eval","text":"","code":"if (FALSE) { require(dplyr) set.seed(0) p <- rnorm(50, mean = 0.7, sd = 0.3) %>% abs() p[p > 1] <- 1 p[p < 0] <- 0 set.seed(0) a <- rnorm(50, mean = 0.3, sd = 0.2) %>% abs() a[a > 1] <- 1 a[a < 0] <- 0 set.seed(0) backg <- rnorm(1000, mean = 0.4, sd = 0.4) %>% abs() backg[backg > 1] <- 1 backg[backg < 0] <- 0 # Function use without threshold specification e <- sdm_eval(p, a) e # Function use with threshold specification sdm_eval(p, a, thr = \"max_sorensen\") sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"max_jaccard\")) sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"sensitivity\")) sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"sensitivity\", sens = \"0.95\")) # Use of bg argument (it will only be used for calculating BOYCE index) sdm_eval(p, a, thr = \"max_sens_spec\") sdm_eval(p, a, thr = c(\"max_sens_spec\"), bg = backg) # If background will be used to calculate all other metrics # background values can be used in \"a\" argument sdm_eval(p, backg, thr = \"max_sens_spec\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"Extract environmental data values spatial raster based x y coordinates","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"","code":"sdm_extract(data, x, y, env_layer, variables = NULL, filter_na = TRUE)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"data data.frame. Database species presence, presence-absence, pseudo-absence records x y coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates env_layer SpatRaster. Raster raster stack environmental variables. variables character. Vector variable names predictor (environmental) variables Usage variables. = c(\"aet\", \"cwd\", \"tmin\"). variable specified, function return data layers. Default NULL filter_na logical. filter_na = TRUE (default), rows NA values environmental variables removed returned tibble.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"tibble returns original data base additional columns extracted environmental variables xy location SpatRaster object used 'env_layer'","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"","code":"if (FALSE) { require(terra) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Extract environmental data from somevar for all locations in spp ex_spp <- sdm_extract( data = spp, x = \"x\", y = \"y\", env_layer = somevar, variables = NULL, filter_na = FALSE ) # Extract environmental for two variables and remove rows with NAs ex_spp2 <- sdm_extract( data = spp, x = \"x\", y = \"y\", env_layer = somevar, variables = c(\"CFP_3\", \"CFP_4\"), filter_na = TRUE ) ex_spp ex_spp2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial predictions from individual and ensemble models — sdm_predict","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"function allows geographical prediction one models constructed fit_ tune_ function set, models fitted esm_ function set (.e., ensemble small models approach), models constructed fit_ensemble function. can return continuous continuous binary predictions one thresholds","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"","code":"sdm_predict( models, pred, nchunk = 1, thr = NULL, con_thr = FALSE, predict_area = NULL, clamp = TRUE, pred_type = \"cloglog\" )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"models list one models fitted fit_ tune_ functions. case use models fitted fit_ensemble esm_ family function one model used. Usage models = mglm models = list(mglm, mraf, mgbm) pred SpatRaster. Raster layer predictor variables. Names layers must exactly match used model fitting. nchunk interger. Number chunks split data used predict models (.e., SpatRaster used pred argument). Predicting models chunks helps reduce memory requirements cases models predicted large scales high resolution. Default = 1 thr character. Threshold used get binary suitability values (.e., 0,1). possible use one threshold type. mandatory use threshold/s used fit models. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value used fit models. : threshold used model outputs used 'models' argument used. Usage thr = c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity'), thr=''. threshold specified (.e., thr = NULL) function return continuous prediction . Default NULL con_thr logical. true predictions suitability values threshold/s returned. Default = FALSE predict_area SpatVector, SpatialPolygon, SpatialPolygonDataFrame. Spatial polygon used restring prediction given region. Default = NULL clamp logical. set TRUE, predictors features restricted range seen model training. valid Maxent model (see tune_mx fit_mx). Default TRUE. pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". valid Maxent model (see tune_mx fit_mx). Default \"cloglog\".","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"list SpatRaster continuous /binary predictions","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Extract data some_sp <- spp %>% filter(species == \"sp3\") some_sp <- sdm_extract( data = some_sp, x = \"x\", y = \"y\", env_layer = somevar ) # Partition some_sp <- part_random( data = some_sp, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) ## %######################################################%## # # #### Create different type of models #### # # ## %######################################################%## # Fit some models mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) # Fit an ensemble model mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) # Fit a model with the Ensembles of Small Models approach # Without threshold specification and with kfold msmall <- esm_gam( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", thr = NULL ) ## %######################################################%## # # #### Predict different kind of models #### # # ## %######################################################%## # sdm_predict can be used for predict one or more models fitted with fit_ or tune_ functions # a single model ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # a list of models list_p <- sdm_predict( models = list(mglm, mraf, mgbm), pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # Predict an ensemble model # (only is possilbe use one fit_ensemble) ensemble_p <- sdm_predict( models = mensemble, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # Predict an ensemble of small models # (only is possible to use one ensemble of small models) small_p <- sdm_predict( models = msmall, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) ##%######################################################%## # # #### Predict model using chunks #### # # ##%######################################################%## # Predicting models in chunks helps reduce memory requirements in # cases where models are predicted for large scales and high resolution ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL, nchunk = 4 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":null,"dir":"Reference","previous_headings":"","what":"Merge model performance tables — sdm_summarize","title":"Merge model performance tables — sdm_summarize","text":"Merge model performance tables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Merge model performance tables — sdm_summarize","text":"","code":"sdm_summarize(models)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Merge model performance tables — sdm_summarize","text":"models list one models fitted fit_ tune_ functions, fit_ensemble output, esm_ family function output. list single several models fitted fit_ tune_ functions object returned fit_ensemble function. Usage models = list(mod1, mod2, mod3)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Merge model performance tables — sdm_summarize","text":"Combined model performance table input models. Models fit tune include model performance best hyperparameters.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Merge model performance tables — sdm_summarize","text":"","code":"if (FALSE) { data(abies) abies # In this example we will partition the data using the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # Build a generalized additive model using fit_gam gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gam_t1$performance # Build a generalized linear model using fit_glm glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) glm_t1$performance # Build a tuned random forest model using tune_raf tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t1 <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", ) rf_t1$performance # Merge sdm performance tables merge_df <- sdm_summarize(models = list(gam_t1, glm_t1, rf_t1)) merge_df }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing presences and absences of three virtual species — spp","title":"A data set containing presences and absences of three virtual species — spp","text":"data set containing presences absences three virtual species","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing presences and absences of three virtual species — spp","text":"","code":"spp"},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing presences and absences of three virtual species — spp","text":"tibble 1150 rows 3 variables: species virtual species names x longitude species occurrences y latitude species occurrences pr_ab presences absences denoted 1 0 respectively","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing presences and absences of three virtual species — spp","text":"","code":"if (FALSE) { require(dplyr) data(\"spp\") spp }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"Fit validate Generalized Boosted Regression models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"","code":"tune_gbm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameter values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'n.trees', 'shrinkage', 'n.minobsinnode'. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. possible use one threshold type. Provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenate, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold types threshold specified. metric character. Performance metric used selecting best combination hyper-parameter values. following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"list object : model: \"gbm\" class object gbm package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameter values performance metric (see sdm_eval) best hyper-parameter combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab is the name of the column with species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( n.trees = c(20, 50, 100), shrinkage = c(0.1, 0.5, 1), n.minobsinnode = c(1, 3, 5, 7, 9) ) gbm_t <- tune_gbm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs gbm_t$model gbm_t$predictors gbm_t$performance gbm_t$data_ens gbm_t$hyper_performance # Graphical exploration of performance of each hyper-parameter setting require(ggplot2) pg <- position_dodge(width = 0.5) ggplot(gbm_t$hyper_performance, aes(factor(n.minobsinnode), TSS_mean, col = factor(shrinkage) )) + geom_errorbar(aes(ymin = TSS_mean - TSS_sd, ymax = TSS_mean + TSS_sd), width = 0.2, position = pg ) + geom_point(position = pg) + geom_line( data = gbm_t$tune_performance, aes(as.numeric(factor(n.minobsinnode)), TSS_mean, col = factor(shrinkage) ), position = pg ) + facet_wrap(. ~ n.trees) + theme(legend.position = \"bottom\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"Fit validate Maximum Entropy models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"","code":"tune_max( data, response, predictors, predictors_f = NULL, background = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") background data.frame. Database response variable column containing 0 values, predictors variables. column names must consistent data partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'regmult' 'classes' (combination following letters l -linear-, q -quadratic-, h -hinge-, p -product-, t -threshold-). thr character. Threshold used get binary suitability values (.e. 0,1)., needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold # FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 used. one threshold type used, concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper -parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. clamp logical. TRUE, predictors features restricted range seen model training. pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"list object : model: \"maxnet\" class object maxnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metrics (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metrics (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") abies # environmental conditions of presence-absence data backg # environmental conditions of background points # Using k-fold partition method # Remember that the partition method, number of folds or replications must # be the same for presence-absence and background points datasets abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 set.seed(1) backg <- dplyr::sample_n(backg, size = 2000, replace = FALSE) backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) backg gridtest <- expand.grid( regmult = seq(0.1, 3, 0.5), classes = c(\"l\", \"lq\", \"lqh\") ) max_t1 <- tune_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, grid = gridtest, thr = \"max_sens_spec\", metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\", n_cores = 2 # activate two cores to speed up this process ) length(max_t1) max_t1$model max_t1$predictors max_t1$performance max_t1$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"Fit validate Neural Networks models exploration hyper-parameters","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"","code":"tune_net( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variable names used must consistent used response, predictors, predictors_f arguments. Defaul NULL. partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9. using one threshold type concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"list object : model: \"nnet\" class object nnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"","code":"if (FALSE) { data(abies) abies # Partitioning the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab columns is species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( size = c(2, 4, 6, 8, 10), decay = c(0.001, 0.05, 0.1, 1, 3, 4, 5, 10) ) net_t <- tune_net( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs net_t$model net_t$predictors net_t$performance net_t$hyper_performance net_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"Fit validate Random Forest models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"","code":"tune_raf( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameter needed tuning 'mtry'. maximum mtry exceed total number predictors. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 using one threshold type concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper -parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"list object : model: \"randomForest\" class object randomForest package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs rf_t$model rf_t$predictors rf_t$performance rf_t$hyper_performance rf_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"Fit validate Support Vector Machine models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"","code":"tune_svm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variable names used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. grid data.frame. Provide data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'size' 'decay'. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. next threshold area available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold # FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. case use one threshold type necessary concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"list object : model: \"ksvm\" class object kernlab package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metrics (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab column is species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( C = c(2, 4, 8, 16, 20), sigma = c(0.01, 0.1, 0.2, 0.3, 0.4) ) svm_t <- tune_svm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs svm_t$model svm_t$predictors svm_t$performance svm_t$hyper_performance svm_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-135","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.5","title":"flexsdm 1.3.5","text":"occfilt_geo new argument “rep” control number o repetition filter occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-134","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.4","title":"flexsdm 1.3.4","text":"rgeos removed dependencies #356 New vignette use different tools explore model extrapolation truncate models added #352 Univariate combinatorial extrapolation metric added extra_eval. Minor bugs fixed project PCA time periods #351 Best grid raster names changed .part part_sblock part_sband Improvements correct_colinvar speed function using maxcell argument Improvements correct_colinvar project PCA time periods","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-133","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.3","title":"flexsdm 1.3.3","text":"Improvements correct_colinvar now possible sample rasters reduce machine memory speed process Improvements sdm_predict possible predict model chunks reduce machine memory p_extra, p_pdp, p_bpdp fixed New function p_bpdp Bivariate Partial Dependent Plot New function data_bpdp Calculate data construct bivariate partial dependence plots Improvements p_dpd Calculate data construct partial dependence plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-132","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.2","title":"flexsdm 1.3.2","text":"New function p_extra Graphical exploration extrapolation suitability pattern environmental geographical space New function p_pdp Partial Dependent Plot New function data_pdp Calculate data construct partial dependence plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-131","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.1","title":"flexsdm 1.3.1","text":"New argument “crs” added function msdm_posteriori New argument “sp_name” sample_background sample_pseudoabs raster, flexclust, ape, sp removed dependencies Functions using CRS data improved codes possible use numeric value specify threshold msdm_posteriori extra_eval can use tibble SpatRaster object env_calib argument extra_truncate new argument define values used model truncation documentation improved. #","code":""}] +[{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Pre-modeling functions","text":"Species distribution modeling (SDM) become standard tool many research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM active area theoretical methodological research. flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situation (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn first set functions flexsdm package fall “pre-modeling” umbrella (see full list). pre-modeling functions calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictors env_outliers() Integration outliers detection methods environmental space part_random() Data partitioning training testing models part_sblock() Spatial block cross-validation part_sband() Spatial band cross-validation part_senv() Environmental cross-validation plot_res() Plot different resolutions used part_sblock get_block() Transform spatial partition layer spatial properties environmental variables sample_background() Sample background points sample_pseudoabs() Sample pseudo-absence sdm_directory() Create directories saving outputs flexsdm sdm_extract() Extract environmental data based x y coordinates occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Pre-modeling functions","text":"First, install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(terra) #> terra 1.7.71 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project Directory Setup","title":"flexsdm: Overview of Pre-modeling functions","text":"building SDM’s, organizing folders (directories) project save time confusion. project directory main project folder store relevant data results current project. Now, let’s create project directory initial data model results stored. function sdm_directory() can , based types model algorithms want use /types projections like make. First decide computer like store inputs outputs project (main directory) use dir.create() create main directory. Next, specify whether want include folders projections, calibration areas, algorithms, ensembles, thresholds.","code":"my_project <- file.path(file.path(tempdir(), \"flex_sdm_project\")) dir.create(my_project) project_directory <- sdm_directory( main_dir = my_project, projections = NULL, calibration_area = TRUE, algorithm = c(\"fit_max\", \"tune_raf\"), ensemble = c(\"mean\"), threshold = TRUE, return_vector = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-species-occurrence-and-background-data","dir":"Articles","previous_headings":"","what":"Data, species occurrence and background data","title":"flexsdm: Overview of Pre-modeling functions","text":"tutorial, using species occurrences available flexsdm package. “spp” example dataset includes pr_ab column (presence = 1, absence = 0), location columns (x, y). can load “spp” data local R environment using code :","code":"data(\"spp\") spp #> # A tibble: 1,150 × 4 #> species x y pr_ab #> #> 1 sp1 -5541. -145138. 0 #> 2 sp1 -51981. 16322. 0 #> 3 sp1 -269871. 69512. 1 #> 4 sp1 -96261. -32008. 0 #> 5 sp1 269589. -566338. 0 #> 6 sp1 29829. -328468. 0 #> 7 sp1 -152691. 393782. 0 #> 8 sp1 -195081. 253652. 0 #> 9 sp1 -951. -277978. 0 #> 10 sp1 145929. -271498. 0 #> # ℹ 1,140 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"geographic-region","dir":"Articles","previous_headings":"","what":"Geographic region","title":"flexsdm: Overview of Pre-modeling functions","text":"species occurrences located California Floristic Province (far western USA). “regions” dataset can used visualize study area geographic space. points distributed across study area?","code":"regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) try(plot(regions), silent=TRUE) points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"calibration-area","dir":"Articles","previous_headings":"","what":"Calibration area","title":"flexsdm: Overview of Pre-modeling functions","text":"important decision SDM delimit model’s calibration area, geographic space use train model(s). Choice calibration area affects modeling steps, including sampling pseudo-absence background points, performance metrics, geographic patterns habitat suitability. want train SDM using entire extent United States interested geographic distribution environmental controls rare plant species found mountaintops Sierra Nevada, California! Let’s use presence locations one species exercise. calib_area() function offers three methods defining calibration area: buffer, mcp, bmcp, mask. briefly go .","code":"spp1 <- spp %>% dplyr::filter(species == \"sp1\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"buffer","dir":"Articles","previous_headings":"Calibration area","what":"1. Buffer","title":"flexsdm: Overview of Pre-modeling functions","text":"calibration area defined using buffers around presence points. User’s can specify distance around points using “width” argument. buffer width value interpreted m CRS longitude/latitude, map units cases.","code":"crs(regions, proj=TRUE) #> [1] \"+proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs\" ca_1 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"buffer\", width = 40000), crs = crs(regions) ) plot(regions, main = \"Buffer method\") plot(ca_1, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"minimum-convex-polygon","dir":"Articles","previous_headings":"Calibration area","what":"2. Minimum convex polygon","title":"flexsdm: Overview of Pre-modeling functions","text":"minimum convex polygon (mcp) method produces much simpler shape.","code":"ca_2 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"mcp\"), crs = crs(regions) ) plot(regions, main = \"Minimum convex polygon method\") plot(ca_2, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"buffered-minimum-convex-polygon","dir":"Articles","previous_headings":"Calibration area","what":"3. Buffered minimum convex polygon","title":"flexsdm: Overview of Pre-modeling functions","text":"can also create buffer around minimum convex polygon.","code":"ca_3 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 40000), crs = crs(regions) ) plot(regions, main = \"Buffered minimum convex polygon\") plot(ca_3, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"mask","dir":"Articles","previous_headings":"Calibration area","what":"4. Mask","title":"flexsdm: Overview of Pre-modeling functions","text":"mask method allows polygons selected intersect species locations delineate calibration area. useful expect species distributions associated ecologically significant (mapped) ecoregions, interested distributions within political boundaries. use random set polygons named “clusters” illustrate mask method. original polygons left polygons contain points (“mask” calibration area) right.","code":"clusters <- system.file(\"external/clusters.shp\", package = \"flexsdm\") clusters <- terra::vect(clusters) ca_4 <- calib_area( data = spp1, x = \"x\", y = \"y\", method = c(\"mask\", clusters, \"clusters\"), crs = crs(regions) ) par(mfrow = c(1, 2)) plot(clusters, main = \"Original polygons\") plot(ca_4, main = \"Polygons with points (mask)\") points(spp1[, 2:3], pch = 19, cex = 0.5)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"reducing-collinearity-among-the-predictors","dir":"Articles","previous_headings":"","what":"Reducing collinearity among the predictors","title":"flexsdm: Overview of Pre-modeling functions","text":"Predictor collinearity common issue SDMs, can lead model overfitting inaccurate tests significance predictors (De Marco & Nóbrega, 2018; Dormann et al., 2013).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-predictors","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"Environmental predictors","title":"flexsdm: Overview of Pre-modeling functions","text":"use four climatic variables available flexsdm package: actual evapotranspiration (CFP_1), climatic water deficit (CFP_2), maximum temperature warmest month (CFP_3), minimum temperature coldest month (CFP_4). relationship different environmental variables can visualized pairs() function terra package. Several variables highly correlated (.89 predictors tmx tmn). can correct reduce collinearity? function correct_colinvar() four methods deal collinearity: pearson, vif, pca, fa. method returns 1) raster object (SpatRaster) selected predictors 2) useful outputs relevant method. functions used supplementary tools, predictor selection SDMs complicated ultimately based relationship environment species’ biology. said, functions offer options exploring relationships predictor variables can aid predictor selection process. Let’s look method:","code":"somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") plot(somevar) terra::pairs(somevar)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"pearson-correlation","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"1. Pearson correlation","title":"flexsdm: Overview of Pre-modeling functions","text":"method returns three objects 1) SpatRaster environmental variables correlation given threshold (default 0.7), 2) names variables correlation given threshold “removed” environmental data, 3) correlation matrix environmental variables. However, strongly urge users use information along knowledge specific species-environment relationships select ecologically-relevant predictors SDMs. example, , modeling distribution plant species water-limited Mediterranean-type ecosystem, may want include climatic water deficit (cwd) actual evapotranspiration (aet). Despite highly correlated, variables capture water availability evaporative demand, respectively (Stephenson 1998). Additionally, minimum absolute temperature strongly controls vegetation distributions (Woodward, Lomas, Kelly 2004), select tmn (minimum temperature coldest month) example. references, see:","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"woodward-f--i--m--r--lomas-and-c--k--kelly--2004--global-climate-and-the-distribution-of-plant-biomes--philosophical-transactions-of-the-royal-society-of-london--series-b-biological-sciences-35914651476-","dir":"Articles","previous_headings":"Reducing collinearity among the predictors > 1. Pearson correlation","what":"2. Woodward, F. I., M. R. Lomas, and C. K. Kelly. 2004. Global climate and the distribution of plant biomes. Philosophical transactions of the Royal Society of London. Series B, Biological sciences 359:1465–1476.","title":"flexsdm: Overview of Pre-modeling functions","text":"","code":"pearson_var <- correct_colinvar(somevar, method = c(\"pearson\", th = \"0.7\")) pearson_var$cor_table #> aet cwd tmx tmn #> aet 0.0000000 0.7689893 0.7924813 0.7845401 #> cwd 0.7689893 0.0000000 0.4168956 0.5881831 #> tmx 0.7924813 0.4168956 0.0000000 0.7323259 #> tmn 0.7845401 0.5881831 0.7323259 0.0000000 pearson_var$cor_variables #> $aet #> [1] \"cwd\" \"tmx\" \"tmn\" #> #> $cwd #> [1] \"aet\" #> #> $tmx #> [1] \"aet\" \"tmn\" #> #> $tmn #> [1] \"aet\" \"tmx\" chosen_variables <- somevar[[c('cwd','aet','tmn')]]"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"variance-inflation-factor","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"2. Variance inflation factor","title":"flexsdm: Overview of Pre-modeling functions","text":"method removes predictors variance inflation factor higher chosen threshold. , users can specify threshold (default 10). method retains predictors aet, tmx, tmn removes cwd. output method matches produced pearson method: 1) environmental layer retained variables, 2) list removed variables, 3) correlation matrix variables.","code":"vif_var <- correct_colinvar(somevar, method = c(\"vif\", th = \"10\")) vif_var$env_layer #> class : SpatRaster #> dimensions : 558, 394, 4 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source : somevar.tif #> names : aet, cwd, tmx, tmn #> min values : 0.000, -9.39489, 22.44685, 0.2591429 #> max values : 1357.865, 14.20047, 614.69125, 64.3747588 vif_var$removed_variables #> NULL vif_var$vif_table #> # A tibble: 4 × 2 #> Variables VIF #> #> 1 aet 7.62 #> 2 cwd 3.29 #> 3 tmx 3.95 #> 4 tmn 2.89"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"principal-component-analysis","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"3. Principal component analysis","title":"flexsdm: Overview of Pre-modeling functions","text":"Finally, “pca” method performs principal components analysis predictors returns axis accounts 95% total variance system. method returns 1) SpatRaster object selected environmental variables, 2) matrix coefficients principal components predictors, 3) tibble cumulative variance explained selected principal components.","code":"pca_var <- correct_colinvar(somevar, method = c(\"pca\")) pca_var$env_layer #> class : SpatRaster #> dimensions : 558, 394, 3 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : PC1, PC2, PC3 #> min values : -8.453273, -4.260147, -1.525085 #> max values : 2.827164, 3.337545, 4.342864 pca_var$coefficients #> # A tibble: 4 × 5 #> variable PC1 PC2 PC3 PC4 #> #> 1 aet 0.550 -0.0722 0.296 -0.778 #> 2 cwd 0.450 -0.777 0.103 0.429 #> 3 tmx -0.485 -0.594 -0.450 -0.459 #> 4 tmn -0.511 -0.198 0.836 -0.0241 pca_var$cumulative_variance #> # A tibble: 4 × 2 #> PC cvar #> #> 1 1 0.764 #> 2 2 0.915 #> 3 3 0.979 #> 4 4 1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"factorial-analysis","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"4. Factorial analysis","title":"flexsdm: Overview of Pre-modeling functions","text":"Selecting “fa” method performs factorial analysis reduce dimensionality selects predictor(s) highest correlation axis. outputs method similar produced ‘pca’ method.","code":"fa_var <- correct_colinvar(env_layer = somevar, method = c(\"fa\")) fa_var$env_layer fa_var$number_factors fa_var$removed_variables fa_var$uniqueness fa_var$loadings"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"comments","dir":"Articles","previous_headings":"Reducing collinearity among the predictors","what":"5. Comments","title":"flexsdm: Overview of Pre-modeling functions","text":"flexsdm also possible restrict cell used perform collinearity reduction analysis geographical area smaller full extent environmental variables. See ‘restric_to_region’ ‘restric_pca_proj’ correct_colinvar examples alternative PCA given function help.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-filtering","dir":"Articles","previous_headings":"","what":"Data filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"Sample bias species occurrence data common issue ecological studies filtering occurrence data can reduce bias. flexsdm provides two functions different types filtering, based geographical environmental “thinning”, randomly removing points dense (oversampling) geographical environmental space. can improve model performance reduce redundancy data.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-filtering","dir":"Articles","previous_headings":"Data filtering","what":"Environmental filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"function occfilt_env(), performs environmental filtering species occurrence data. method basically reduces environmental redundancy data based methods outlined Valera et al. (2014). However, function unique flexsdm, able use number environmental dimensions perform PCA filtering. example, use original environmental data (somevar) occurrence data single species (spp1). filtering occurrences, important row species data unique code (example: idd). function also gives user option specifying number classes used split environmental condition. explore results using 5, 8, 12 bins. Increasing number bins increases number occurrence points retained.","code":"spp1$idd <- 1:nrow(spp1) filt_env5 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 5 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 57 filt_env8 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 8 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 112 filt_env12 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 12 ) #> Extracting values from raster ... #> 12 records were removed because they have NAs for some variables #> Number of unfiltered records: 238 #> Number of filtered records: 173 par(mfrow = c(2, 2)) somevar[[1]] %>% plot(main = \"Original occurrence data\") points(spp1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 5 bins\") points(filt_env5 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 8 bins\") points(filt_env8 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with 12 bins\") points(filt_env12 %>% select(x, y))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"geographical-filtering","dir":"Articles","previous_headings":"Data filtering","what":"Geographical filtering","title":"flexsdm: Overview of Pre-modeling functions","text":"Next, look occfilt_geo(), three alternatives determine distance threshold pair points: “moran” determines threshold distance points minimizes spatial autocorrelation occurrence data; “cellsize” filters occurrences based resolution predictors (specified coarser resolution); finally, “determined” allows users manually determine distance threshold.","code":"filt_geo1 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"moran\"), prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Threshold for Moran: 0.1 #> Distance threshold(km): 345.859 #> Number of filtered records: 4 filt_geo2 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"cellsize\", factor = \"3\"), # coarser resolution than the provided raster prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Distance threshold(km): 4.617 #> Number of filtered records: 212 filt_geo3 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"defined\", d = \"30\"), prj = crs(somevar) ) #> Extracting values from raster ... #> 16 records were removed because they have NAs for some variables #> Number of unfiltered records: 234 #> Distance threshold(km): 30 #> Number of filtered records: 0 par(mfrow = c(2, 2)) somevar[[1]] %>% plot(main = \"Original occurrence data\") points(spp1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with Moran's I\") points(filt_geo1 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with cell size\") points(filt_geo2 %>% select(x, y)) somevar[[1]] %>% plot(main = \"Filtering with defined distance (30km)\") points(filt_geo3 %>% select(x, y))"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"data-partitioning","dir":"Articles","previous_headings":"","what":"Data partitioning","title":"flexsdm: Overview of Pre-modeling functions","text":"Data partitioning, splitting data testing training groups, key step building SDMs. flexsdm offers multiple options data partitioning, including part_random(), part_sband(), part_sblock(), part_senv(). Let’s explore methods.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"conventional-data-partitioning-methods-part_random","dir":"Articles","previous_headings":"Data partitioning","what":"1. Conventional data partitioning methods (part_random)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_random() function provides users ability divide species occurrence data based conventional partition methods including k-folds, repeated k-folds, leave-one-cross-validation, bootstrap partitioning. , use “kfold” method 10 folds divide data. results 10 folds occurrence data 25 observations fold.","code":"spp1$pr_ab <- 1 # Add a column with 1 to denote that this is presences only data sp_part1 <- part_random( data = spp1, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) sp_part1$.part %>% table() #> . #> 1 2 3 4 5 6 7 8 9 10 #> 25 25 25 25 25 25 25 25 25 25"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"spatial-band-cross-validation-part_sband","dir":"Articles","previous_headings":"Data partitioning","what":"2. Spatial band cross-validation (part_sband)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_sband() part_sblock() partition data based position geographic space. Geographically structured data partitioning methods especially useful users want evaluate model transferability different regions time periods. function part_sband tests different numbers spatial partitions using latitudinal longitudinal bands selects best number bands given presence, presence-absence, presence-background dataset. procedure based spatial autocorrelation, environmental similarity, number presence/absence records band partition. function’s output includes 1) tibble presence/absence locations assigned partition number, 2) tibble information best partition, 3) SpatRaster showing selected grid.","code":"set.seed(1) sp_part2 <- part_sband( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lat\", # specify bands across different degrees of longitude 'lon' or latitude 'lat'. min_bands = 2, # minimum number of spatial bands to be tested max_bands = 20, # maximum number of spatial bands to be tested n_part = 2, prop = 0.5 ) #> 12 rows were excluded from database because NAs were found #> The following number of bands will be tested: #> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 #> Creating basic raster mask... #> Searching for the optimal number of bands... plot(sp_part2$grid, col = gray.colors(20)) points(sp_part2$part[c(\"x\", \"y\")], col = rainbow(8)[sp_part2$part$.part], cex = 0.9, pch = c(1, 19)[sp_part2$part$pr_ab + 1] )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"spatial-block-cross-validation-part_sblock","dir":"Articles","previous_headings":"Data partitioning","what":"3. Spatial block cross-validation (part_sblock)","title":"flexsdm: Overview of Pre-modeling functions","text":"part_sblock() function similar part_sband() instead bands explores spatial blocks different raster cells sizes returns one best suited input dataset. , can see data divided different “blocks” training testing. However, notice grid partition produced part_sblock different resolution original environmental variables. want map layer properties (.e. resolution, extent, NAs) original environmental variables, apply get_block() function grid resulting part_sblock(). layer can really useful generating pseudo-absence background sample points, explore next section.","code":"sp_part3 <- part_sblock( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, # Minimum value used for multiplying raster resolution and define the finest resolution to be tested max_res_mult = 500, # Maximum value used for multiplying raster resolution and define the coarsest resolution to be tested num_grids = 30, # Number of grid to be tested between min_res_mult X (raster resolution) and max_res_mult X (raster resolution) n_part = 2, # Number of partitions prop = 0.5 # Proportion of points used for testing autocorrelation between groupds (0-1) ) #> 12 rows were excluded from database because NAs were found #> The following grid cell sizes will be tested: #> 18900 | 50834.48 | 82768.97 | 114703.45 | 146637.93 | 178572.41 | 210506.9 | 242441.38 | 274375.86 | 306310.34 | 338244.83 | 370179.31 | 402113.79 | 434048.28 | 465982.76 | 497917.24 | 529851.72 | 561786.21 | 593720.69 | 625655.17 | 657589.66 | 689524.14 | 721458.62 | 753393.1 | 785327.59 | 817262.07 | 849196.55 | 881131.03 | 913065.52 | 945000 #> Creating basic raster mask... #> Searching for the optimal grid size... plot(sp_part3$grid) points(sp_part3$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[sp_part3$part$.part], cex = 0.5, pch = 19 ) terra::res(sp_part3$grid) #> [1] 881131 881131 terra::res(somevar) #> [1] 1890 1890 grid_env <- get_block(env_layer = somevar, best_grid = sp_part3$grid) plot(grid_env) # this is a block layer with the same layer # properties as environmental variables. points(sp_part3$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[sp_part3$part$.part], cex = 0.5, pch = 19 )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"environmental-and-spatial-cross-validation-part_senv","dir":"Articles","previous_headings":"Data partitioning","what":"4. Environmental and spatial cross-validation (part_senv)","title":"flexsdm: Overview of Pre-modeling functions","text":"final partitioning function flexsdm part_senv(), explores different numbers environmental partitions based K-means clustering algorithm returns one best-suited particular dataset, considering spatial autocorrelation, environmental similarity, number presence /absence records partition. map shows partitioning based environmental spatial factors.","code":"sp_part4 <- part_senv( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_n_groups = 2, # Minimum number of groups to be tested max_n_groups = 10, # Maximum number of groups to be tested prop = 0.5 # Proportion of points used for testing autocorrelation between groups (0-1) ) #> 12 rows were excluded from database because NAs were found #> The following grid cell sizes will be tested: #> 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 #> Searching best partition... plot(regions, col = gray.colors(9)) points(sp_part4$part[c(\"x\", \"y\")], col = hcl.colors(length(unique(sp_part4$part)))[sp_part4$part$.part], cex = 1, pch = 19 )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"background-and-pseudo-absence-sampling","dir":"Articles","previous_headings":"","what":"Background and pseudo-absence sampling","title":"flexsdm: Overview of Pre-modeling functions","text":"Presence-occurrence data quite common ecology researchers may adequate “absence” data species interest. Sometimes building species distribution models, need able generate background pseudo-absence points modeling goals. flexsdm package allows users using sample_background() sample_pseudoabs().","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"sample-background","dir":"Articles","previous_headings":"Background and pseudo-absence sampling","what":"1. Sample background","title":"flexsdm: Overview of Pre-modeling functions","text":"function sample_background() allows slection background sample points based different geographic restrictions sampling methods. , sample set background points based earlier spatial block partitioning using “random” method. Using lapply() case ensures generate background points spatial blocks (n = 2). also specifying want ten times amount background points original occurrences calibration area buffer area around presence points (see section “Calibration area”).","code":"p_data <- sp_part3$part # presence data from spatial block partition example set.seed(10) bg <- lapply(1:2, function(x) { sample_background( data = p_data, x = \"x\", y = \"y\", n = sum(p_data == x) * 10, # number of background points to be sampled method = \"random\", rlayer = grid_env, maskval = x, calibarea = ca_1 # A SpatVector which delimit the calibration area used for a given species ) }) %>% bind_rows() %>% mutate(pr_ab = 0) par(mfrow = c(2, 1)) plot(grid_env, main = \"Presence points\") plot(ca_1, add = TRUE) points(p_data, cex = .7, pch = 19) plot(grid_env, main = \"Background points\") plot(ca_1, add = TRUE) points(bg, cex = .1, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"sample-pseudo-absences","dir":"Articles","previous_headings":"Background and pseudo-absence sampling","what":"2. Sample pseudo-absences","title":"flexsdm: Overview of Pre-modeling functions","text":"Similarly, function sample_pseudoabs allows random pseudo-absence sampling based environmental /geographical constraints. example, specifying method = “env_const” selects pseudo-absences environmentally constrained regions lower suitability values predicted Bioclim model. Additionally, function allows users specify calibration area generate pseudo-absence points. , use buffer area around presence points (ca_1) show might look like. can see, generated pseudo-absence points general vicinity presence points, concentrated areas lower environmental suitability. specific method chosen sampling background /pseudo-absence points vary depending research goals.","code":"set.seed(10) psa <- lapply(1:2, function(x) { sample_pseudoabs( data = p_data, x = \"x\", y = \"y\", n = sum(p_data == x), # number of pseudo-absence points to be sampled method = c(\"env_const\", env = somevar), rlayer = grid_env, maskval = x, calibarea = ca_1 ) }) %>% bind_rows() %>% mutate(pr_ab = 0) #> Extents do not match, raster layers used were croped to minimum extent #> Extents do not match, raster layers used were croped to minimum extent par(mfrow = c(2, 1)) plot(grid_env, main = \"Presence points\") plot(ca_1, add = TRUE) points(p_data, cex = .7, pch = 19) plot(grid_env, main = \"Pseudo-absence points\") plot(ca_1, add = TRUE) points(psa, cex = .7, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v01_pre_modeling.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"","what":"Extracting environmental values","title":"flexsdm: Overview of Pre-modeling functions","text":"Finally, modeling species geographic distributions, must extract environmental data presences + absences/pseudo-absences/background point locations. function sdm_extract() extracts environmental data values based x y coordinates returns tibble original data + additional columns extracted environmental variables locations. Let’s original presence points (spp1) background locations (bg). #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"all_points <- bind_rows(spp1 %>% dplyr::select(-idd), bg) ex_spp <- sdm_extract( data = all_points, x = \"x\", y = \"y\", env_layer = somevar, # Raster with environmental variables variables = NULL, # Vector with the variable names of predictor variables Usage variables. = c(\"aet\", \"cwd\", \"tmin\"). If no variable is specified, function will return data for all layers. filter_na = TRUE ) ex_spp"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Modeling functions","text":"Species distribution modeling (SDM) become standard tool multiple research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM area active theoretical methodological research. flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situations (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn second set functions flexsdm package fall “modeling” umbrella. functions designed construct validate different types models can grouped fit_* , tune_* , esm_* family functions. addition function perform ensemble modeling. fit_* functions construct validate models default hyper-parameter values. tune_* functions construct validate models searching best combination hyper-parameter values, esm_ functions can used constructing validating Ensemble Small Models. Finally, fit_ensemble() function fitting validating ensemble models. functions model construction validation: fit_* functions family fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forest models fit_svm() Fit validate Support Vector Machine models tune_* functions family tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters tune_max() Fit validate Maximum Entropy models exploration hyper-parameters tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters model ensemble fit_ensemble() Fit validate ensemble models different ensemble methods esm_* functions family esm_gam() Fit validate Generalized Additive Models Ensemble Small Model approach esm_gau() Fit validate Gaussian Process models Models Ensemble Small Model approach esm_gbm() Fit validate Generalized Boosted Regression models Ensemble Small Model approach esm_glm() Fit validate Generalized Linear Models Ensemble Small Model approach esm_max() Fit validate Maximum Entropy models Ensemble Small Model approach esm_net() Fit validate Neural Networks models Ensemble Small Model approach esm_svm() Fit validate Support Vector Machine models Ensemble Small Model approach","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Modeling functions","text":"First, install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') require(flexsdm) #> Loading required package: flexsdm require(terra) #> Loading required package: terra #> terra 1.7.55 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin require(dplyr) #> Loading required package: dplyr #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:terra': #> #> intersect, union #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project directory setup","title":"flexsdm: Overview of Modeling functions","text":"Decide computer like store inputs outputs project (main directory). Use existing one use dir.create() create main directory. specify whether include folders projections, calibration areas, algorithms, ensembles, thresholds. details see Vignette 01_pre_modeling","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"data-species-occurrence-and-background-data","dir":"Articles","previous_headings":"","what":"Data, species occurrence and background data","title":"flexsdm: Overview of Modeling functions","text":"tutorial, using species occurrences environmental data available flexsdm package. “abies” example dataset includes pr_ab column (presence = 1, absence = 0), location columns (x, y) environmental data. can load “abies” data local R environment using code : (EXAMPLE LOOKS LITTLE STRANGE ALSO USING BACKGROUND DATA, ABIES DATASET CLEARLY ABSENCES…) want replace abies dataset data, make sure dataset contains environmental conditions related presence-absence data. use pre-modeling family function k-fold partition method (used cross-validation). partition method number folds replications must presence-absence background points datasets. Now, abies2 object new column called “.part” 5 k-folds (1, 2, 3, 4, 5), indicating partition record (row) member . Next, apply partition method number folds environmental conditions background points. backg2 object new column called “.part” 5 k-folds (1, 2, 3, 4, 5).","code":"data(\"abies\") data(\"backg\") dplyr::glimpse(abies) #> Rows: 1,400 #> Columns: 13 #> $ id 715, 5680, 7907, 1850, 1702, 10036, 12384, 6513, 9884, 8651, … #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… #> $ x -95417.134, 98986.536, 121474.257, -39976.221, 111372.261, -2… #> $ y 314240.13, -159415.18, -99463.44, -17456.11, -91404.05, 39222… #> $ aet 323.1133, 447.5567, 182.2833, 372.3867, 209.4567, 308.3000, 5… #> $ cwd 546.1400, 815.4033, 271.1800, 946.2933, 398.5500, 534.9533, 3… #> $ tmin 1.2433, 9.4267, -4.9500, 8.7767, -4.0333, 4.6600, 4.3800, 4.9… #> $ ppt_djf 62.7257, 129.6406, 150.7003, 116.0236, 164.9327, 166.2220, 48… #> $ ppt_jja 17.7941, 6.4317, 11.2294, 2.7020, 9.2686, 16.5310, 41.2494, 8… #> $ pH 5.773341, 5.600000, 0.000000, 6.411796, 0.000000, 5.700000, 5… #> $ awc 0.10837019, 0.16000000, 0.00000000, 0.09719457, 0.00000000, 0… #> $ depth 152.000000, 201.000000, 0.000000, 59.759930, 0.000000, 112.99… #> $ landform 7, 11, 15, 14, 15, 15, 7, 15, 4, 10, 6, 10, 10, 15, 10, 11, 1… dplyr::glimpse(backg) #> Rows: 5,000 #> Columns: 13 #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … #> $ x 160779.16, 36849.16, -240170.84, -152420.84, -193190.84, … #> $ y -449968.33, 24151.67, 90031.67, -143518.33, 24151.67, 223… #> $ aet 280.4567, 259.7800, 400.1767, 367.4833, 397.3667, 385.263… #> $ cwd 1137.2433, 381.5367, 699.6500, 843.4467, 842.3833, 637.35… #> $ tmin 13.5100, -3.1733, 8.6800, 9.0133, 8.9700, 4.9333, 6.2933,… #> $ ppt_djf 71.2741, 171.4537, 285.0893, 72.0309, 125.2467, 226.1534,… #> $ ppt_jja 1.1920, 17.5193, 5.0158, 1.2047, 1.9778, 8.1554, 18.4182,… #> $ pH 0.0000000, 0.2122687, 5.7222223, 7.5350823, 6.1963525, 5.… #> $ awc 0.000000000, 0.003473487, 0.080370426, 0.170000002, 0.131… #> $ depth 0.00000, 201.00000, 50.07409, 154.39426, 122.39575, 56.17… #> $ percent_clay 0.0000000, 0.4438345, 18.4111176, 46.9751244, 37.1873169,… #> $ landform 13, 10, 6, 6, 10, 14, 8, 14, 6, 7, 11, 14, 14, 10, 6, 6, … abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) dplyr::glimpse(abies2) #> Rows: 1,400 #> Columns: 14 #> $ id 715, 5680, 7907, 1850, 1702, 10036, 12384, 6513, 9884, 8651, … #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… #> $ x -95417.134, 98986.536, 121474.257, -39976.221, 111372.261, -2… #> $ y 314240.13, -159415.18, -99463.44, -17456.11, -91404.05, 39222… #> $ aet 323.1133, 447.5567, 182.2833, 372.3867, 209.4567, 308.3000, 5… #> $ cwd 546.1400, 815.4033, 271.1800, 946.2933, 398.5500, 534.9533, 3… #> $ tmin 1.2433, 9.4267, -4.9500, 8.7767, -4.0333, 4.6600, 4.3800, 4.9… #> $ ppt_djf 62.7257, 129.6406, 150.7003, 116.0236, 164.9327, 166.2220, 48… #> $ ppt_jja 17.7941, 6.4317, 11.2294, 2.7020, 9.2686, 16.5310, 41.2494, 8… #> $ pH 5.773341, 5.600000, 0.000000, 6.411796, 0.000000, 5.700000, 5… #> $ awc 0.10837019, 0.16000000, 0.00000000, 0.09719457, 0.00000000, 0… #> $ depth 152.000000, 201.000000, 0.000000, 59.759930, 0.000000, 112.99… #> $ landform 7, 11, 15, 14, 15, 15, 7, 15, 4, 10, 6, 10, 10, 15, 10, 11, 1… #> $ .part 2, 2, 3, 4, 2, 1, 5, 5, 2, 2, 4, 4, 1, 5, 4, 5, 5, 5, 1, 3, 1… backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) dplyr::glimpse(backg2) #> Rows: 5,000 #> Columns: 14 #> $ pr_ab 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … #> $ x 160779.16, 36849.16, -240170.84, -152420.84, -193190.84, … #> $ y -449968.33, 24151.67, 90031.67, -143518.33, 24151.67, 223… #> $ aet 280.4567, 259.7800, 400.1767, 367.4833, 397.3667, 385.263… #> $ cwd 1137.2433, 381.5367, 699.6500, 843.4467, 842.3833, 637.35… #> $ tmin 13.5100, -3.1733, 8.6800, 9.0133, 8.9700, 4.9333, 6.2933,… #> $ ppt_djf 71.2741, 171.4537, 285.0893, 72.0309, 125.2467, 226.1534,… #> $ ppt_jja 1.1920, 17.5193, 5.0158, 1.2047, 1.9778, 8.1554, 18.4182,… #> $ pH 0.0000000, 0.2122687, 5.7222223, 7.5350823, 6.1963525, 5.… #> $ awc 0.000000000, 0.003473487, 0.080370426, 0.170000002, 0.131… #> $ depth 0.00000, 201.00000, 50.07409, 154.39426, 122.39575, 56.17… #> $ percent_clay 0.0000000, 0.4438345, 18.4111176, 46.9751244, 37.1873169,… #> $ landform 13, 10, 6, 6, 10, 14, 8, 14, 6, 7, 11, 14, 14, 10, 6, 6, … #> $ .part 2, 3, 4, 4, 1, 4, 5, 4, 3, 1, 5, 1, 4, 2, 5, 4, 2, 5, 1, …"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"fit-and-validate-models","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"1. Fit and validate models","title":"flexsdm: Overview of Modeling functions","text":"fit validate models: . maximum entropy model default hyper-parameter values (flexsdm::fit_max) II. random forest model exploration hyper-parameters (flexsdm::tune_raf). . Maximum Entropy models default hyper-parameter values. function returns list object following elements: model: “MaxEnt” class object. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: performance metric (see sdm_eval). metrics threshold dependent calculated based threshold specified argument. can see selected threshold values. Predicted suitability test partition (row) based best model. database used fit_ensemble. II- Random forest models exploration hyper-parameters. First, create data.frame provides hyper-parameters values tested. recommended generate data.frame. Hyper-parameter needed tuning ‘mtry’. maximum mtry must equal total number predictors. use data object abies2, k-fold partition method: Let’s see output object contains. function returns list object following elements: model: “randomForest” class object. object can used see formula details, basic summary o fthe model, predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: performance metric (see sdm_eval). metrics threshold dependent calculated based threshold specified argument. can see selected threshold values. Predicted suitability test partition (row) based best model. database used fit_ensemble. model objects can used flexsdm::fit_ensemble().","code":"max_t1 <- fit_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) #> Formula used for model fitting: #> ~aet + ppt_jja + pH + awc + depth + I(aet^2) + I(ppt_jja^2) + I(pH^2) + I(awc^2) + I(depth^2) + hinge(aet) + hinge(ppt_jja) + hinge(pH) + hinge(awc) + hinge(depth) + ppt_jja:aet + pH:aet + awc:aet + depth:aet + pH:ppt_jja + awc:ppt_jja + depth:ppt_jja + awc:pH + depth:pH + depth:awc + categorical(landform) - 1 #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 names(max_t1) #> [1] \"model\" \"predictors\" \"performance\" \"data_ens\" options(max.print = 20) max_t1$model #> #> Call: glmnet::glmnet(x = mm, y = as.factor(p), family = \"binomial\", weights = weights, lambda = 10^(seq(4, 0, length.out = 200)) * sum(reg)/length(reg) * sum(p)/sum(weights), standardize = F, penalty.factor = reg) #> #> Df %Dev Lambda #> 1 0 0.00 21.3700 #> 2 0 0.00 20.4100 #> 3 0 0.00 19.4800 #> 4 0 0.00 18.6000 #> 5 0 0.00 17.7600 #> 6 0 0.00 16.9600 #> [ reached getOption(\"max.print\") -- omitted 194 rows ] max_t1$predictors #> # A tibble: 1 × 6 #> c1 c2 c3 c4 c5 f #> #> 1 aet ppt_jja pH awc depth landform max_t1$performance #> # A tibble: 3 × 25 #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> #> 1 max equal_sens_sp… 0.573 700 700 0.669 0.0288 0.669 #> 2 max max_sens_spec 0.416 700 700 0.877 0.0609 0.56 #> 3 max max_sorensen 0.335 700 700 0.951 0.0362 0.457 #> # ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd , #> # JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd , #> # OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean , #> # AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean , #> # IMAE_sd max_t1$data_ens #> # A tibble: 1,400 × 5 #> rnames replicates part pr_ab pred #> #> 1 6 .part 1 0 0.656 #> 2 13 .part 1 0 0.0405 #> 3 19 .part 1 0 0.779 #> 4 21 .part 1 0 0.407 #> 5 25 .part 1 0 0.851 #> 6 27 .part 1 0 0.706 #> 7 31 .part 1 0 0.395 #> 8 33 .part 1 0 0.0456 #> 9 35 .part 1 0 0.412 #> 10 36 .part 1 0 0.130 #> # ℹ 1,390 more rows tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Tuning model... #> Replica number: 1/1 #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 names(rf_t) #> [1] \"model\" \"predictors\" \"performance\" #> [4] \"hyper_performance\" \"data_ens\" rf_t$model #> #> Call: #> randomForest(formula = formula1, data = data, mtry = mtry, ntree = 500, importance = FALSE, ) #> Type of random forest: classification #> Number of trees: 500 #> No. of variables tried at each split: 1 #> #> OOB estimate of error rate: 11.64% #> Confusion matrix: #> 0 1 class.error #> 0 588 112 0.16000000 #> 1 51 649 0.07285714 rf_t$predictors #> # A tibble: 1 × 9 #> c1 c2 c3 c4 c5 c6 c7 c8 f #> #> 1 aet cwd tmin ppt_djf ppt_jja pH awc depth landform rf_t$performance #> # A tibble: 1 × 26 #> mtry model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 raf max_sens_spec 0.606 700 700 0.93 0.0333 #> # ℹ 18 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd rf_t$data_ens #> # A tibble: 1,400 × 5 #> rnames replicates part pr_ab pred #> #> 1 6 .part 1 0 0.27 #> 2 13 .part 1 0 0.032 #> 3 19 .part 1 0 0.09 #> 4 21 .part 1 0 0.09 #> 5 25 .part 1 0 0.24 #> 6 27 .part 1 0 0.27 #> 7 31 .part 1 0 0.272 #> 8 33 .part 1 0 0.02 #> 9 35 .part 1 0 0.156 #> 10 36 .part 1 0 0.018 #> # ℹ 1,390 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"model-ensemble","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"2. Model Ensemble","title":"flexsdm: Overview of Modeling functions","text":"example fit validate ensemble model using two model objects just created.","code":"# Fit and validate ensemble model an_ensemble <- fit_ensemble( models = list(max_t1, rf_t), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% # Outputs names(an_ensemble) #> [1] \"models\" \"thr_metric\" \"predictors\" \"performance\" an_ensemble$thr_metric #> [1] \"max_sens_spec\" \"TSS_mean\" an_ensemble$predictors #> # A tibble: 2 × 9 #> c1 c2 c3 c4 c5 f c6 c7 c8 #> #> 1 aet ppt_jja pH awc depth landform NA NA NA #> 2 aet cwd tmin ppt_djf ppt_jja landform pH awc depth an_ensemble$performance #> # A tibble: 7 × 25 #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> #> 1 meansup equal_sens_… 0.596 700 700 0.879 0.0220 0.88 #> 2 meansup lpt 0.05 700 700 1 0 0.414 #> 3 meansup max_fpb 0.568 700 700 0.931 0.0322 0.86 #> 4 meansup max_jaccard 0.568 700 700 0.931 0.0322 0.86 #> 5 meansup max_sens_sp… 0.568 700 700 0.93 0.0333 0.861 #> 6 meansup max_sorensen 0.568 700 700 0.931 0.0322 0.86 #> 7 meansup sensitivity 0.55 700 700 0.9 0 0.861 #> # ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd , #> # JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd , #> # OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean , #> # AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean , #> # IMAE_sd "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v02_modeling.html","id":"fit-and-validate-models-with-ensemble-of-small-model-approach","dir":"Articles","previous_headings":"Data, species occurrence and background data","what":"3. Fit and validate models with Ensemble of Small Model approach","title":"flexsdm: Overview of Modeling functions","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers’ D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic applied species occurrences. detail see Breiner et al. (2015, 2018) can use different methods flexsdm::part_random function according data. See part_random details. function constructs Generalized Additive Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018). function returns list object following elements: esm_model: list “GAM” class object bivariate model. object can used predicting using ESM approachwith sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metrics calculated based threshold specified argument. Now, test rep_kfold partition method. method ‘folds’ refers number partitions data partitioning ‘replicate’ refers number replicates. assume values >=1. use new rep_kfold partition gam model Test random bootstrap partitioning. method ‘replicate’ refers number replicates (assumes value >=1), ‘proportion’ refers proportion occurrences used model fitting (assumes value >0 <=1). method can configure proportion training testing data according species occurrences. example, proportion=‘0.7’ indicates 70% data used model training, 30% used model testing. method, function return .partX columns “train” “test” words entries. Use new rep_kfold partition gam model #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"data(\"abies\") library(dplyr) # Create a smaller subset of occurrences set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() # Using k-fold partition method for model cross validation abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 #> # A tibble: 20 × 14 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 3 more variables: depth , landform , .part # We set the model without threshold specification and with the kfold created above esm_gam_t1 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k names(esm_gam_t1) #> NULL options(max.print = 10) # If you don't want to see printed all the output esm_gam_t1$esm_model #> NULL esm_gam_t1$predictors #> NULL esm_gam_t1$performance #> NULL # Remove the previous k-fold partition abies2 <- abies2 %>% select(-starts_with(\".\")) # Test with rep_kfold partition using 3 folds and 5 replicates set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 #> # A tibble: 20 × 18 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 7 more variables: depth , landform , .part1 , .part2 , #> # .part3 , .part4 , .part5 esm_gam_t2 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k # Remove the previous k-fold partition abies2 <- abies2 %>% select(-starts_with(\".\")) # Test with bootstrap partition using 10 replicates set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 #> # A tibble: 20 × 23 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 12040 0 -308909. 384248. 573. 332. 4.84 521. 48.8 5.63 0.108 #> 2 10361 0 -254286. 417158. 260. 469. 2.93 151. 15.1 6.20 0.0950 #> 3 9402 0 -286979. 386206. 587. 376. 6.45 333. 15.7 5.5 0.160 #> 4 9815 0 -291849. 445595. 443. 455. 4.39 332. 19.1 6 0.0700 #> 5 10524 0 -256658. 184438. 355. 568. 5.87 303. 10.6 5.20 0.0800 #> 6 8860 0 121343. -164170. 354. 733. 3.97 182. 9.83 0 0 #> 7 6431 0 107903. -122968. 461. 578. 4.87 161. 7.66 5.90 0.0900 #> 8 11730 0 -333903. 431238. 561. 364. 6.73 387. 25.2 5.80 0.130 #> 9 808 0 -150163. 357180. 339. 564. 2.64 220. 15.3 6.40 0.100 #> 10 11054 0 -293663. 340981. 477. 396. 3.89 332. 26.4 4.60 0.0634 #> 11 2960 1 -49273. 181752. 512. 275. 0.920 319. 17.3 5.92 0.0900 #> 12 3065 1 126907. -198892. 322. 544. 0.700 203. 10.6 5.60 0.110 #> 13 5527 1 116751. -181089. 261. 537. 0.363 178. 7.43 0 0 #> 14 4035 1 -31777. 115940. 394. 440. 2.07 298. 11.2 6.01 0.0769 #> 15 4081 1 -5158. 90159. 301. 502. 0.703 203. 14.6 6.11 0.0633 #> 16 3087 1 102151. -143976. 299. 425. -2.08 205. 13.4 3.88 0.110 #> 17 3495 1 -19586. 89803. 438. 419. 2.13 189. 15.2 6.19 0.0959 #> 18 4441 1 49405. -60502. 362. 582. 2.42 218. 7.84 5.64 0.0786 #> 19 301 1 -132516. 270845. 367. 196. -2.56 422. 26.3 6.70 0.0300 #> 20 3162 1 59905. -53634. 319. 626. 1.99 212. 4.50 4.51 0.0396 #> # ℹ 12 more variables: depth , landform , .part1 , .part2 , #> # .part3 , .part4 , .part5 , .part6 , .part7 , #> # .part8 , .part9 , .part10 esm_gam_t3 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) #> #> Model has more coefficients than data used for training it. Try to reduce k"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Overview of Post-modeling functions","text":"Species distribution modeling (SDM) become standard tool multiple research areas, including ecology, conservation biology, biogeography, paleobiogeography, epidemiology. SDM area active theoretical methodological research flexsdm package provides users ability manipulate parameterize models variety ways meet unique research needs. flexibility enables users define complete partial modeling procedure specific modeling situations (e.g., number variables, number records, different algorithms ensemble methods, algorithms tuning, etc.). vignette, users learn post-modeling set functions flexsdm package. functions designed aim assisting flexsdm user predicting, evaluating, correcting SDMs. functions created model prediction, evaluation correction: Post-modeling functions sdm_predict() Spatial predictions individual ensemble models sdm_summarize() Merge model performance tables interp() Raster interpolation SDM predictions two time periods extra_eval() Measure model extrapolation extra_correct() Constraint suitability values given extrapolation value msdm_priori() Create spatial predictor variables reduce overprediction species distribution models msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"flexsdm: Overview of Post-modeling functions","text":"Install flexsdm package. can install released version flexsdm github :","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(terra) #> terra 1.7.55 #> #> Attaching package: 'terra' #> The following object is masked from 'package:knitr': #> #> spin"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"project-directory-setup","dir":"Articles","previous_headings":"","what":"Project directory setup","title":"flexsdm: Overview of Post-modeling functions","text":"Decide computer like store inputs outputs project (main directory). Use existing one use dir.create() create main directory. specify whether include folders projections, calibration areas, algorithms, ensembles, thresholds. details see Vignette 01_pre_modeling","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"species-occurrence-presenceabsense-and-environmental-data","dir":"Articles","previous_headings":"","what":"Species occurrence, presence/absense and environmental data","title":"flexsdm: Overview of Post-modeling functions","text":"tutorial, using “spp” example dataset includes pr_ab (presence = 1, absence = 0), location (x, y) data 3 plant species found California raster environmental data. can load data local R environment using code : want replace spp dataset data, make sure contains coordinates, species presence = 1 / absence = 0 raster environmental data. First, prepare occurrences, environmental conditions partitions Next, fit different models","code":"data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select only one species some_sp <- spp %>% filter(species == \"sp3\") # Extract the environmental condition from the rsater for sp3 some_sp <- sdm_extract( data = some_sp, x = \"x\", y = \"y\", env_layer = somevar ) #> 4 rows were excluded from database because NAs were found # Make a partition defining the method, folds and replicates some_sp <- part_random( data = some_sp, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) # Fit and validate a [generalized linear model](https://sjevelazco.github.io/flexsdm/reference/fit_glm.html) mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 + I(CFP_1^2) + I(CFP_2^2) + I(CFP_3^2) + I(CFP_4^2) #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 # Fit and validate a [random forest model](https://sjevelazco.github.io/flexsdm/reference/fit_raf.html) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 # Fit and validate a [general boosted regression model](https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 #> Replica number: 1/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 2/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 3/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 4/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3 #> Replica number: 5/5 #> Partition number: 1/3 #> Partition number: 2/3 #> Partition number: 3/3"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"fit-and-ensemble-the-models-above","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"1. Fit and ensemble the models above","title":"flexsdm: Overview of Post-modeling functions","text":"can also fit model using Ensembles Small Models approach. example, fit without threshold specification k-fold cross-validation. Finally, can predict different kinds models data (some_sp). sdm_predict can used predicting one models fitted fit_ tune_ functions. output list SpatRaster continuous /binary predictions.","code":"# Fit and ensemble the models. To choose the arguments that best fit your own data, see all options available in [fit_ensemble](https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html) mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% msmall <- esm_gam( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", thr = NULL ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% # Predict using a single model, which is an mglm model in this example, # and a threshold type for binary predictions ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting individual models # Inspect the object. It's a SpatRaster with 2 layers: glm, max_fpb # These are the continuous and binary prediction from the model ind_p #> $glm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : glm, max_fpb #> min values : 2.220446e-16, TRUE #> max values : 1.000000e+00, TRUE # Plot to see this layers ind_p_rst <- terra::rast(ind_p) plot(ind_p_rst) # Predict a list of more than one model, specifying a threshold type list_p <- sdm_predict( models = list(mglm, mraf, mgbm), pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting list of individual models # Inspect the object. It's a list with 3 SpatRaster, one for each model, # each of which contains 2 layers, for the continuous and thresholded binary predictions. list_p #> $glm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : glm, max_fpb #> min values : 2.220446e-16, TRUE #> max values : 1.000000e+00, TRUE #> #> $raf #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : raf, max_fpb #> min values : 0, FALSE #> max values : 1, TRUE #> #> $gbm #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : gbm, max_fpb #> min values : 0.0002949323, FALSE #> max values : 0.9986537352, TRUE # Plot to see this layers list_p_rst <- terra::rast(list_p) plot(list_p_rst) # Predict an ensemble model. This is only possible using one fit_ensemble object. It's not possible to include e.g., list(fit_ensemble1, fit_ensemble2) in the model argument. ensemble_p <- sdm_predict( models = mensemble, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting ensembles # Inspect the object. It's a SpatRaster with 2 layers, mensemble and max_fpb # These are the continuous and binary prediction from the ensemble model ensemble_p #> $meansup #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : meansup, max_fpb #> min values : 0.0001474662, FALSE #> max values : 0.9972242977, TRUE # Plot to see this layers ensemble_p_rst <- terra::rast(ensemble_p) plot(ensemble_p_rst) # Predict an ensembles of small models. small_p <- sdm_predict( models = msmall, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) #> Predicting ensemble of small models # Inspect the object It's a SpatRaster with 2 layers, msmall and max_fpb # These are the continuous and binary prediction from the ESM model small_p #> $esm_gam #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : esm_gam, max_fpb #> min values : 1.961046e-05, FALSE #> max values : 8.644150e-01, TRUE # Plot to see this layers small_p_rst <- terra::rast(small_p) plot(small_p_rst)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"merge-model-performance-tables","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"2. Merge model performance tables","title":"flexsdm: Overview of Post-modeling functions","text":"function combines model performance tables input models. function requires list one models fitted fit_ tune_ functions, fit_ensemble output, esm_ family function output. Build models use performance table merge Finally, merge three sdm performance tables.","code":"# Load abies data data(abies) abies #> # A tibble: 1,400 × 13 #> id pr_ab x y aet cwd tmin ppt_djf ppt_jja pH awc #> #> 1 715 0 -95417. 314240. 323. 546. 1.24 62.7 17.8 5.77 0.108 #> 2 5680 0 98987. -159415. 448. 815. 9.43 130. 6.43 5.60 0.160 #> 3 7907 0 121474. -99463. 182. 271. -4.95 151. 11.2 0 0 #> 4 1850 0 -39976. -17456. 372. 946. 8.78 116. 2.70 6.41 0.0972 #> 5 1702 0 111372. -91404. 209. 399. -4.03 165. 9.27 0 0 #> 6 10036 0 -255715. 392229. 308. 535. 4.66 166. 16.5 5.70 0.0777 #> 7 12384 0 -311765. 380213. 568. 352. 4.38 480. 41.2 5.80 0.110 #> 8 6513 0 111360. -120229. 327. 633. 4.93 163. 8.91 1.18 0.0116 #> 9 9884 0 -284326. 442136. 377. 446. 3.99 296. 16.8 5.96 0.0900 #> 10 8651 0 137640. -110538. 215. 265. -4.62 180. 9.57 0 0 #> # ℹ 1,390 more rows #> # ℹ 2 more variables: depth , landform # We will partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # Build a generalized additive model, and a generalized linear model using fit_ family functions gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) #> Formula used for model fitting: #> pr_ab ~ s(aet, k = -1) + s(ppt_jja, k = -1) + s(pH, k = -1) + s(awc, k = -1) + s(depth, k = -1) + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) #> Formula used for model fitting: #> pr_ab ~ aet + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 # Build a tuned model using tune_ family functions # Prepare the grid object to use in grid argument tune_grid <- expand.grid(mtry = seq(1, 7, 1)) # Build a tuned random forest model rf_t1 <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Tuning model... #> Replica number: 1/1 #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmin + ppt_djf + ppt_jja + pH + awc + depth + landform #> Replica number: 1/1 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 rf_t1$performance #> # A tibble: 1 × 26 #> mtry model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 3 raf max_sens_spec 0.638 700 700 0.916 0.0430 #> # ℹ 18 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd # Note in rf_t1$performance the best model was the one with mtry = 3 and threshold = 'max_sens_spec' merge_df <- sdm_summarize(models = list(gam_t1, glm_t1, rf_t1)) merge_df #> # A tibble: 7 × 27 #> model_ID model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 gam equal_sens_sp… 0.540 700 700 0.737 0.0366 #> 2 1 gam max_sens_spec 0.530 700 700 0.75 0.0460 #> 3 1 gam max_sorensen 0.359 700 700 0.863 0.0601 #> 4 2 glm equal_sens_sp… 0.523 700 700 0.663 0.0583 #> 5 2 glm max_sens_spec 0.463 700 700 0.803 0.111 #> 6 2 glm max_sorensen 0.356 700 700 0.876 0.0436 #> 7 3 raf max_sens_spec 0.638 700 700 0.916 0.0430 #> # ℹ 19 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd , mtry "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"raster-interpolation-between-two-time-periods","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"3. Raster interpolation between two time periods","title":"flexsdm: Overview of Post-modeling functions","text":"function useful calculating projected suitability values two time periods simple interpolation using two raster objects suitability values. useful , example, SDM projected future past time period (using maps predictor variables, climate variables, different time periods), user requires estimate suitability intermediate time periods. example may needed input types models risk analysis. function returns SpatRaster dir_save used NULL. However, user specifies dir_save, function save interpolated raster files given directory. function create object interpolated values n annual layers ranging initial final year. resolution dimensions result object remain initial final maps. example, nine annual (2011-2019) interpolated maps generated initial (2010) final (2020) prediction maps. cell starting value 1 ending value 0 changed increments (1-0)/((2020-2010)-1), given interpolated values 0.9, 0.8, 0.7…0.1","code":"library(terra) library(dplyr) f <- system.file(\"external/suit_time_step.tif\", package = \"flexsdm\") abma <- terra::rast(f) plot(abma) int <- interp( r1 = abma[[1]], # set the raster of initial year r2 = abma[[2]], # set the raster of final year y1 = 2010, # set the numeric initial year y2 = 2020, # set the numeric final year rastername = \"Abies\", dir_save = NULL ) # Layers in the abma SpatRaster names(abma) #> [1] \"current\" \"future\" # plot(abma) # Layers in the int SpatRaster int #> class : SpatRaster #> dimensions : 558, 394, 11 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : suit_time_step #> suit_time_step #> suit_time_step #> ... #> names : Abies_2010, Abies_2011, Abies_2012, Abies_2013, Abies_2014, Abies_2015, ... #> min values : 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, ... #> max values : 0.9756107, 0.9606077, 0.9504615, 0.9440073, 0.9442941, 0.9463548, ... plot(int)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"measure-model-extrapolation","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"4. Measure model extrapolation","title":"flexsdm: Overview of Post-modeling functions","text":"function measures extent model extrapolation comparing data used modeling calibration area model projection using approach proposed Velazco et al., prep. accessible area defines calibration area used extract environmental conditions","code":"library(dplyr) library(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Inspect the unique values for species spp$species %>% unique() #> [1] \"sp1\" \"sp2\" \"sp3\" # Subset spp data into a tibble only with coordinates for sp3 and pr_ab == 1 sp <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) %>% dplyr::select(x, y) # Define accessible area for sp3 based on a buffer with around each point that is related to dispersal ability or some other ecological criterion ca <- calib_area( sp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 30000), crs = crs(somevar) ) # Plot the SpatRaster, occurrences and accessible area plot(somevar$CFP_1) points(sp) plot(ca, add = T) somevar_ca <- somevar %>% crop(., ca) %>% mask(., ca) # Plot environmental conditions of the calibration area plot(somevar_ca) xp <- extra_eval( training_data = somevar_ca, projection_data = somevar, n_cores = 1, aggreg_factor = 3 ) # Plot the SpatRaster object with the extrapolation values measured in percentage plot(xp)"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"create-spatial-predictor-variables-to-reduce-overprediction-of-species-distribution-models","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"6. Create spatial predictor variables to reduce overprediction of species distribution models","title":"flexsdm: Overview of Post-modeling functions","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models. function returns SpatRaster object, used together environmental variables construct species distribution models. ‘xy’ approach creates single pair raster layers can used species share study region. Otherwise, ‘cml’, ‘min’, ‘ker’ create species-specific raster layer. Next, use different methods according data.","code":"library(dplyr) library(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select the presences of one species (sp3) occ <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) # Select a raster layer to be used as a basic raster a_variable <- somevar[[1]] plot(a_variable) points(occ %>% dplyr::select(x, y)) # Use xy method m_xy <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"xy\", env_layer = a_variable ) plot(m_xy) # Explore the object. This method assumes that spatial structure can partially explain species distribution (Bahn & Mcgill, 2007). Therefore, the result are two raster layers containing the latitude and longitude of pixels, respectively. This method could be used for all species set that share the same study area region. m_xy #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varnames : somevar #> somevar #> names : msdm_lon, msdm_lat #> min values : -370850.8, -601978.3 #> max values : 368139.2, 448861.7 m_cml <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"cml\", env_layer = a_variable ) plot(m_cml) # Explore the object. This method assumes that pixels closer to presences are likely included in species distributions. The results is a raster layer containing the sum of euclidean geographic distances from each pixel to all occurrences of a species. m_cml #> class : SpatRaster #> dimensions : 558, 394, 1 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> varname : somevar #> name : msdm_cml #> min value : 0 #> max value : 1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v03_post_modeling.html","id":"methods-to-correct-overprediction-of-species-distribution-models-based-on-occurrences-and-suitability-patterns","dir":"Articles","previous_headings":"Species occurrence, presence/absense and environmental data","what":"7. Methods to correct overprediction of species distribution models based on occurrences and suitability patterns","title":"flexsdm: Overview of Post-modeling functions","text":"methods designed reduce overprediction species distribution models based posteriori method (see Mendes et al 2020), .e., combination patterns species occurrences predicted suitability. First, prepare data Next, fit predict model Next, let’s predict model plot map Finally, perform correction avoid models overpredictions. #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"library(dplyr) library(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Prepare data for modeling a species set.seed(10) occ <- spp %>% dplyr::filter(species == \"sp2\") %>% # filter using only sp2 sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) %>% # extract variables values from the raster layer part_random(., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) # add columns with partition #> 6 rows were excluded from database because NAs were found m_glm <- fit_glm( data = occ, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = \"equal_sens_spec\", ) #> Formula used for model fitting: #> pr_ab ~ CFP_1 + CFP_2 + CFP_3 + CFP_4 + I(CFP_1^2) + I(CFP_2^2) + I(CFP_3^2) + I(CFP_4^2) #> Replica number: 1/1 #> Partition number: 1/10 #> Partition number: 2/10 #> Partition number: 3/10 #> Partition number: 4/10 #> Partition number: 5/10 #> Partition number: 6/10 #> Partition number: 7/10 #> Partition number: 8/10 #> Partition number: 9/10 #> Partition number: 10/10 # Predict this model m_pred <- sdm_predict(models = m_glm, pred = somevar, thr = NULL, con_thr = FALSE) #> Predicting individual models # Predicting individual models plot(m_pred[[1]]) # Using mcp method. The Minimum Convex Polygon (mcp) method excludes from SDMs climate suitable pixels that do not intercept a minimum convex polygon, with interior angles smaller than 180, enclosing all occurrences of a species. m_mcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"mcp\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_mcp) # Using bmcp method. The Buffered Minimum Convex Polygon (bmcp) method is similar to the 'mcp' except by the inclusion of a buffer zone surrounding minimum convex polygons. m_bmcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"bmcp\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = 30000, crs = crs(m_pred[[1]]) ) plot(m_bmcp) # Using obr method. The Occurrences Based Restriction (obr) method assumes that suitable patches intercepting species occurrences are more likely a part of species distributions than suitable patches that do not intercept any occurrence. m_obr <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"obr\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_obr) # Using pres method. The only occurrences based restriction (pres) method only retains those pixels in suitability patches intercepting occurrences. m_pres <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"pres\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_pres) # Using lq method. The Lower Quantile (lq) method works whenever a suitable pixel is within a k patch, i.e., not within this lower quartile, the suitability of the pixel is reduced to zero. This means that 75% of k patches were withdrawn from the model. m_lq <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"lq\", cont_suit = m_pred[[1]], thr = \"equal_sens_spec\", buffer = NULL ) plot(m_lq)"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"study-species-overview-of-methods","dir":"Articles","previous_headings":"Example of full modeling process","what":"Study species & overview of methods","title":"flexsdm: Red Fir example","text":", used flexsdm package model current distribution California red fir (Abies magnifica). Red fir high-elevation conifer species ’s geographic range extends Sierra Nevada California, USA, southern portion Cascade Range Oregon. species, used presence data compiled several public datasets curated natural resources agencies. built distribution models using four hydro-climatic variables: actual evapotranspiration, climatic water deficit, maximum temperature warmest month, minimum temperature coldest month. variables resampled (aggregated) 1890 m spatial resolution improve processing time.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"delimit-of-a-calibration-area","dir":"Articles","previous_headings":"Example of full modeling process","what":"Delimit of a calibration area","title":"flexsdm: Red Fir example","text":"Delimiting calibration area (aka accessible area) essential step SDMs methodological theoretical terms. calibration area affect several characteristics SDM like range environmental variables, number absences, distribution background points pseudo-absences, unfortunately, performance metrics like AUC TSS. several ways delimit calibration area. calib_area(). used method calibration area delimited 100-km buffer around presences (shown figure ).","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies_p <- abies %>% select(x, y, pr_ab) %>% filter(pr_ab == 1) # filter only for presence locations ca <- calib_area( data = abies_p, x = 'x', y = 'y', method = c('buffer', width = 100000), crs = crs(somevar) ) # create a calibration area with 100 km buffer around occurrence points # visualize the species occurrences layer1 <- somevar[[1]] layer1[!is.na(layer1)] <- 1 plot(layer1, col=\"gray80\", legend=FALSE, axes=FALSE) plot(crop(ca, layer1), add=TRUE) points(abies_p[,c(\"x\", \"y\")], col = \"#00000480\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"occurrence-filtering","dir":"Articles","previous_headings":"Example of full modeling process","what":"Occurrence filtering","title":"flexsdm: Red Fir example","text":"Sample bias species occurrence data long recognized issue SDM. However, environmental filtering observation data can improve model predictions reducing redundancy environmental (e.g. climatic) hyper-space (Varela et al. 2014). use function occfilt_env() thin red fir occurrences based environmental space. function unique flexsdm, contrast packages able use number environmental dimensions perform PCA filtering. Next apply environmental occurrence filtering using 8 bins display resulting filtered occurrence data","code":"abies_p$id <- 1:nrow(abies_p) # adding unique id to each row abies_pf <- abies_p %>% occfilt_env( data = ., x = \"x\", y = \"y\", id = \"id\", nbins = 8, env_layer = somevar ) %>% left_join(abies_p, by = c(\"id\", \"x\", \"y\")) #> Extracting values from raster ... #> 27 records were removed because they have NAs for some variables #> Number of unfiltered records: 673 #> Number of filtered records: 216 plot(layer1, col=\"gray80\", legend=FALSE, axes=FALSE) plot(crop(ca, layer1), add=TRUE) points(abies_p[,c(\"x\", \"y\")], col = \"#00000480\") points(abies_pf[,c(\"x\", \"y\")], col = \"#5DC86180\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"block-partition-with-4-folds","dir":"Articles","previous_headings":"Example of full modeling process","what":"Block partition with 4 folds","title":"flexsdm: Red Fir example","text":"Data partitioning, splitting data testing training groups, key step building SDMs. flexsdm offers multiple options data partitioning use spatial block method. Geographically structured data partitioning methods especially useful users want evaluate model transferability different regions time periods. part_sblock() function explores spatial blocks different raster cells sizes returns one best suited input datset based spatial autocorrelation, environmental similarity, number presence/absence records block partition. function’s output provides users 1) tibble presence/absence locations assigned partition number, 2) tibble information best partition, 3) SpatRaster showing selected grid. want divide data 4 different partitions using spatial block method.","code":"set.seed(10) occ_part <- abies_pf %>% part_sblock( data = ., env_layer = somevar, pr_ab = \"pr_ab\", x = \"x\", y = \"y\", n_part = 4, min_res_mult = 3, max_res_mult = 200, num_grids = 30, prop = 1 ) #> The following grid cell sizes will be tested: #> 5670 | 18508.97 | 31347.93 | 44186.9 | 57025.86 | 69864.83 | 82703.79 | 95542.76 | 108381.72 | 121220.69 | 134059.66 | 146898.62 | 159737.59 | 172576.55 | 185415.52 | 198254.48 | 211093.45 | 223932.41 | 236771.38 | 249610.34 | 262449.31 | 275288.28 | 288127.24 | 300966.21 | 313805.17 | 326644.14 | 339483.1 | 352322.07 | 365161.03 | 378000 #> Creating basic raster mask... #> Searching for the optimal grid size... abies_pf <- occ_part$part # Transform best block partition to a raster layer with same resolution and extent than # predictor variables block_layer <- get_block(env_layer = somevar, best_grid = occ_part$grid) cl <- c(\"#64146D\", \"#9E2962\", \"#F47C15\", \"#FCFFA4\") plot(block_layer, col=cl, legend=FALSE, axes=FALSE) points(abies_pf[,c(\"x\", \"y\")]) # Number of presences per block abies_pf %>% dplyr::group_by(.part) %>% dplyr::count() #> # A tibble: 4 × 2 #> # Groups: .part [4] #> .part n #> #> 1 1 38 #> 2 2 59 #> 3 3 33 #> 4 4 86 # Additional information of the best block occ_part$best_part_info #> # A tibble: 1 × 5 #> n_grid cell_size spa_auto env_sim sd_p #> #> 1 14 172577. 0.5 173. 24.1"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"pseudo-absencebackground-points-using-partition-previously-created-as-a-mask","dir":"Articles","previous_headings":"Example of full modeling process","what":"Pseudo-absence/background points (using partition previously created as a mask)","title":"flexsdm: Red Fir example","text":"example, species presence data. However, SDM methods require either pseudo-absence background data. , use spatial block partition just created generate pseudo-absence background points. Extract environmental data presence-absence background data . View distributions present points, pseudo-absence points, background points using blocks reference map.","code":"# Spatial blocks where species occurs # Sample background points throughout study area with random method, allocating 10X the number of presences a background set.seed(10) bg <- lapply(1:4, function(x) { sample_background( data = abies_pf, x = \"x\", y = \"y\", n = sum(abies_pf$.part == x) * 10, method = \"random\", rlayer = block_layer, maskval = x, calibarea = ca ) }) %>% bind_rows() bg <- sdm_extract(data = bg, x = \"x\", y = \"y\", env_layer = block_layer) # Sample a number of pseudo-absences equal to the presence in each partition set.seed(10) psa <- lapply(1:4, function(x) { sample_pseudoabs( data = abies_pf, x = \"x\", y = \"y\", n = sum(abies_pf$.part == x), method = \"random\", rlayer = block_layer, maskval = x, calibarea = ca ) }) %>% bind_rows() psa <- sdm_extract(data = psa, x = \"x\", y = \"y\", env_layer = block_layer) cl <- c(\"#280B50\", \"#9E2962\", \"#F47C15\", \"#FCFFA4\") plot(block_layer, col=\"gray80\", legend=FALSE, axes=FALSE) points(bg[,c(\"x\", \"y\")], col=cl[bg$.part], cex=0.8) # Background points points(psa[,c(\"x\", \"y\")], bg=cl[psa$.part], cex=0.8, pch=21) # Pseudo-absences # Bind a presences and pseudo-absences abies_pa <- bind_rows(abies_pf, psa) abies_pa # Presence-Pseudo-absence database #> # A tibble: 432 × 4 #> x y pr_ab .part #> #> 1 -12558. 68530. 1 2 #> 2 115217. -145937. 1 4 #> 3 3634. 22501. 1 2 #> 4 44972. -60781. 1 2 #> 5 -34463. 160313. 1 3 #> 6 83108. -27300. 1 2 #> 7 124877. -176319. 1 4 #> 8 118707. -179991. 1 4 #> 9 126141. -176302. 1 4 #> 10 -49722. 141124. 1 3 #> # ℹ 422 more rows bg # Background points #> # A tibble: 2,160 × 4 #> x y pr_ab .part #> #> 1 -153501. 392162. 0 1 #> 2 -89241. 263642. 0 1 #> 3 -89241. 27392. 0 1 #> 4 -130821. 331682. 0 1 #> 5 -132711. 339242. 0 1 #> 6 -51441. -63328. 0 1 #> 7 -59001. 67082. 0 1 #> 8 -32541. -51988. 0 1 #> 9 -96801. 932. 0 1 #> 10 -47661. -31198. 0 1 #> # ℹ 2,150 more rows abies_pa <- abies_pa %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) bg <- bg %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"fit-models-with-tune_max-fit_gau-and-fit_glm","dir":"Articles","previous_headings":"Example of full modeling process","what":"Fit models with tune_max, fit_gau, and fit_glm","title":"flexsdm: Red Fir example","text":"Now, fit models. flexsdm package offers wide range modeling options, traditional statistical methods like GLMs GAMs, machine learning methods like random forests support vector machines. modeling method, flexsdm provides fit_ tune_ functions, allow users use default settings adjust hyperparameters depending research goals. , test tune_max() (tuned Maximum Entropy model), fit_gau() (fit Guassian Process model), fit_glm (fit Generalized Linear Model). model, selected three threshold values generate binary suitability predictions: threshold maximizes TSS (max_sens_spec), threshold sensitivity specificity equal (equal_sens_spec), threshold Sorenson index highest (max_sorenson). example, selected TSS performance metric used selecting best combination hyper-parameter values tuned Maximum Entropy model.","code":"t_max <- tune_max( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), background = bg, partition = \".part\", grid = expand.grid( regmult = seq(0.1, 3, 0.5), classes = c(\"l\", \"lq\", \"lqhpt\") ), thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\" ) #> Tuning model... #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 #> Fitting best model #> Formula used for model fitting: #> ~aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) + hinge(aet) + hinge(cwd) + hinge(tmx) + hinge(tmn) + thresholds(aet) + thresholds(cwd) + thresholds(tmx) + thresholds(tmn) + cwd:aet + tmx:aet + tmn:aet + tmx:cwd + tmn:cwd + tmn:tmx - 1 #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 f_gau <- fit_gau( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4 f_glm <- fit_glm( data = abies_pa, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 2 ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) #> Replica number: 1/1 #> Partition number: 1/4 #> Partition number: 2/4 #> Partition number: 3/4 #> Partition number: 4/4"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"fit-an-ensemble-model","dir":"Articles","previous_headings":"Example of full modeling process","what":"Fit an ensemble model","title":"flexsdm: Red Fir example","text":"Spatial predictions different SDM algorithms can vary substantially, ensemble modeling become increasingly popular. fit_ensemble() function, users can easily produce ensemble SDM based individual fit_ tune_ models included package. example, fit ensemble model red fir based weighted average three individual models. used threshold values performance metric implemented individual models. output flexsdm model objects allows easily compare metrics across models, AUC TSS. example, can use sdm_summarize() function merge model performance tables.","code":"ens_m <- fit_ensemble( models = list(t_max, f_gau, f_glm), ens_method = \"meanw\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), thr_model = \"max_sens_spec\", metric = \"TSS\" ) #> | | | 0% | |======================================================================| 100% ens_m$performance #> 
[38;5;246m# A tibble: 3 × 25
[39m #> model threshold thr_value n_presences n_absences TPR_mean TPR_sd TNR_mean #> 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m 
[3m
[38;5;246m
[39m
[23m #> 
[38;5;250m1
[39m meanw equal_sens_sp… 0.582 216 216 0.787 0.079
[4m5
[24m 0.808 #> 
[38;5;250m2
[39m meanw max_sens_spec 0.470 216 216 0.949 0.016
[4m2
[24m 0.752 #> 
[38;5;250m3
[39m meanw max_sorensen 0.449 216 216 0.963 0.014
[4m3
[24m 0.738 #> 
[38;5;246m# ℹ 17 more variables: TNR_sd , SORENSEN_mean , SORENSEN_sd ,
[39m #> 
[38;5;246m# JACCARD_mean , JACCARD_sd , FPB_mean , FPB_sd ,
[39m #> 
[38;5;246m# OR_mean , OR_sd , TSS_mean , TSS_sd , AUC_mean ,
[39m #> 
[38;5;246m# AUC_sd , BOYCE_mean , BOYCE_sd , IMAE_mean ,
[39m #> 
[38;5;246m# IMAE_sd 
[39m model_perf <- sdm_summarize(list(t_max, f_gau, f_glm, ens_m)) model_perf #> # A tibble: 10 × 28 #> model_ID model threshold thr_value n_presences n_absences TPR_mean TPR_sd #> #> 1 1 max max_sens_spec 0.364 216 216 0.954 0.0316 #> 2 2 gau equal_sens_s… 0.643 216 216 0.784 0.0890 #> 3 2 gau max_sens_spec 0.471 216 216 0.952 0.0122 #> 4 2 gau max_sorensen 0.471 216 216 0.964 0.0108 #> 5 3 glm equal_sens_s… 0.649 216 216 0.800 0.0851 #> 6 3 glm max_sens_spec 0.554 216 216 0.954 0.0493 #> 7 3 glm max_sorensen 0.423 216 216 0.977 0.0379 #> 8 4 meanw equal_sens_s… 0.582 216 216 0.787 0.0795 #> 9 4 meanw max_sens_spec 0.470 216 216 0.949 0.0162 #> 10 4 meanw max_sorensen 0.449 216 216 0.963 0.0143 #> # ℹ 20 more variables: TNR_mean , TNR_sd , SORENSEN_mean , #> # SORENSEN_sd , JACCARD_mean , JACCARD_sd , FPB_mean , #> # FPB_sd , OR_mean , OR_sd , TSS_mean , TSS_sd , #> # AUC_mean , AUC_sd , BOYCE_mean , BOYCE_sd , #> # IMAE_mean , IMAE_sd , regmult , classes "},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"project-the-ensemble-model","dir":"Articles","previous_headings":"Example of full modeling process","what":"Project the ensemble model","title":"flexsdm: Red Fir example","text":"Next project ensemble model space across entire extent environmental layer, California Floristic Province, using sdm_predict() function. function can use predict species suitability across area species’ current future suitability. example, project ensemble model one threshold, though users option project multiple models multiple threshold values. , also specify want function return SpatRast continuous suitability values threshold (con_thr = TRUE).","code":"pr_1 <- sdm_predict( models = ens_m, pred = somevar, thr = \"max_sens_spec\", con_thr = TRUE, predict_area = NULL ) #> Predicting ensembles unconstrained <- pr_1$meanw[[1]] names(unconstrained) <- \"unconstrained\" cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") plot(unconstrained, col=cl, legend=FALSE, axes=FALSE)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v04_Red_fir_example.html","id":"constrain-the-model-with-msdm_posterior","dir":"Articles","previous_headings":"Example of full modeling process","what":"Constrain the model with msdm_posterior","title":"flexsdm: Red Fir example","text":"Finally, flexsdm offers users function help correct overprediction SDM based occurrence records suitability patterns. example constrained ensemble model using method “occurrence based restriction”, assumes suitable patches intercept species occurrences likely part species distributions suitable patches intercept occurrences. methods msdm_posteriori() function work presences important always use original database (.e., presences spatially environmentally filtered). methods available msdm_posteriori() function based Mendes et al. (2020). #=========#=========#=========#=========#=========#=========#=========# Vignette still construction changes #=========#=========#=========#=========#=========#=========#=========#","code":"thr_val <- ens_m$performance %>% dplyr::filter(threshold == \"max_sens_spec\") %>% pull(thr_value) m_pres <- msdm_posteriori( records = abies_p, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", cont_suit = pr_1$meanw[[1]], method = c(\"obr\"), thr = c(\"sensitivity\", sens = thr_val), buffer = NULL ) constrained <- m_pres$meanw[[1]] names(constrained) <- \"constrained\" cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") plot(constrained, col=cl, legend=FALSE, axes=FALSE)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"intro","dir":"Articles","previous_headings":"","what":"Intro","title":"flexsdm: Modeling a rare species","text":"Creating SDMs rare poorly known species can difficult task. Occurrence data often limited observation, can lead model overfitting, especially using many predictor variables build models. However, researchers often interested building SDMs rare species, often threatened need conservation action. address issues associated modeling spatial distributions rare species, Lomba et al. (2010) Breiner et al. (2015) proposed method “ensemble small models” ESM. ESM, many bivariate models pairwise combinations predictor variable, ensemble performed. flexsdm, ensemble created using average suitability across “small models”, weighted Somers’ D (D = 2 * (AUC-.5)). important note method allow use categorical variables (soil type). practical applications ESMs include identifying areas reintroduction rare species areas establishing new populations, especially face climate change. example, Dubos et al. (2021) used variation ESM identify areas may remain suitable climate change two rare species Madagascar: golden mantella frog (Mantella aurantiaca) Manapany day gecko (Phelsuma inexpectata). example, walk process comparing ESM traditional modeling approaches Hesperocyparis stephensonii (Cuyamaca cypress), conifer tree species endemic southern California. species listed Critically Endangered IUCN found headwaters King Creek San Diego County. Cedar Fire 2003 left 30-40 surviving trees. hypothetical example, searching suitable areas might possible establish new populations species, hopes decreasing species’ future extinction risk.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"data","dir":"Articles","previous_headings":"Intro","what":"Data","title":"flexsdm: Modeling a rare species","text":"models, use four environmental variables influence plant distributions California: available evapotranspiration (aet), climatic water deficit (cwd), maximum temperature warmest month (tmx), minimum temperature coldest month (tmn). occurrence data include 21 geo-referenced observations downloaded online database Calflora.","code":"# devtools::install_github('sjevelazco/flexsdm') library(flexsdm) library(terra) library(dplyr) # environmental data somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") # species occurence data (presence-only) data(hespero) hespero <- hespero %>% dplyr::select(-id) # California ecoregions regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) regions <- as.polygons(regions) sp_region <- terra::subset(regions, regions$category == \"SCR\") # ecoregion where *Hesperocyparis stephensonii* is found # visualize the species occurrences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Hesperocyparis stephensonii occurrences\" ) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16) cols <- rep(\"gray80\", 8) cols[regions$category == \"SCR\"] <- \"yellow\" terra::inset( regions, loc = \"bottomleft\", scale = .3, col = cols )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"delimit-calibration-area","dir":"Articles","previous_headings":"Intro","what":"Delimit calibration area","title":"flexsdm: Modeling a rare species","text":"First, must define model’s calibration area. flexsdm package offers several methods defining model calibration area. , use 25-km buffer areas around presence points select pseudo-absence locations.","code":"ca <- calib_area( data = hespero, x = \"x\", y = \"y\", method = c('buffer', width=25000), crs = crs(somevar) ) # visualize the species occurrences & calibration area plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Calibration area and occurrences\") plot(ca, add=TRUE) points(hespero[,c(\"x\", \"y\")], col = \"black\", pch = 16)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"create-pseudo-absence-data","dir":"Articles","previous_headings":"Intro","what":"Create pseudo-absence data","title":"flexsdm: Modeling a rare species","text":"often case rare species, species presence data. However, SDM methods require either pseudo-absence background data. , use calibration area produce pseudo-absence data can used SDMs.","code":"# Sample the same number of species presences set.seed(10) psa <- sample_pseudoabs( data = hespero, x = \"x\", y = \"y\", n = sum(hespero$pr_ab), # selecting number of pseudo-absence points that is equal to number of presences method = \"random\", rlayer = somevar, calibarea = ca ) # Visualize species presences and pseudo-absences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, xlim = c(289347, 353284), ylim = c(-598052, -520709), main = \"Presence = yellow, Pseudo-absence = black\") plot(ca, add=TRUE) points(psa[,c(\"x\", \"y\")], cex=0.8, pch=16, col = \"black\") # Pseudo-absences points(hespero[,c(\"x\", \"y\")], col = \"yellow\", pch = 16, cex = 1.5) # Presences # Bind a presences and pseudo-absences hespero_pa <- bind_rows(hespero, psa) hespero_pa # Presence-Pseudo-absence database #> # A tibble: 42 × 3 #> x y pr_ab #> #> 1 316923. -557843. 1 #> 2 317155. -559234. 1 #> 3 316960. -558186. 1 #> 4 314347. -559648. 1 #> 5 317348. -557349. 1 #> 6 316753. -559679. 1 #> 7 316777. -558644. 1 #> 8 317050. -559043. 1 #> 9 316655. -559928. 1 #> 10 316418. -567439. 1 #> # ℹ 32 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"partition-data-for-evaluating-models","dir":"Articles","previous_headings":"Intro","what":"Partition data for evaluating models","title":"flexsdm: Modeling a rare species","text":"evaluate model performance, need specify data testing training. flexsdm offers range random spatial random data partition methods evaluating SDMs. use repeated K-fold cross-validation, suitable partition approach performing ESM.","code":"set.seed(10) # Repeated K-fold method hespero_pa2 <- part_random( data = hespero_pa, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 10) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"Intro","what":"Extracting environmental values","title":"flexsdm: Modeling a rare species","text":"Next, extract values four environmental predictors presence pseudo-absence locations.","code":"hespero_pa3 <- sdm_extract( data = hespero_pa2, x = 'x', y = 'y', env_layer = somevar, variables = c('aet', 'cwd', 'tmx', 'tmn') )"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"standard-models","dir":"Articles","previous_headings":"Intro > Modeling","what":"Standard models","title":"flexsdm: Modeling a rare species","text":"First, let’s use three standard algorithms model distribution Hesperocyparis stephensonii: GLM, GBM, SVM. case, use calibration area making predictions.","code":"mglm <- fit_glm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn + I(aet^2) + I(cwd^2) + I(tmx^2) + I(tmn^2) #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 mgbm <- fit_gbm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 msvm <- fit_svm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> Formula used for model fitting: #> pr_ab ~ aet + cwd + tmx + tmn #> Replica number: 1/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 2/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 3/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 4/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 5/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 6/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 7/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 8/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 9/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 #> Replica number: 10/10 #> Partition number: 1/5 #> Partition number: 2/5 #> Partition number: 3/5 #> Partition number: 4/5 #> Partition number: 5/5 mpred <- sdm_predict( models = list(mglm, mgbm, msvm), pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting list of individual models"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"ensemble-of-small-models","dir":"Articles","previous_headings":"Intro > Modeling","what":"Ensemble of small models","title":"flexsdm: Modeling a rare species","text":"Now let’s try algorithms ESM approach. Note predicting ESM, possible process one time.","code":"eglm <- esm_glm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% egbm <- esm_gbm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% esvm <- esm_svm( data = hespero_pa3, response = 'pr_ab', predictors = c('aet', 'cwd', 'tmx', 'tmn'), partition = '.part', thr = 'max_sens_spec' ) #> | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100% eglm_pred <- sdm_predict( models = eglm , pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models egbm_pred <- sdm_predict( models = egbm , pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models esvm_pred <- sdm_predict( models = esvm, pred = somevar, con_thr = TRUE, predict_area = ca ) #> Predicting ensemble of small models"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"comparing-our-models","dir":"Articles","previous_headings":"Intro","what":"Comparing our models","title":"flexsdm: Modeling a rare species","text":"First, let’s take look spatial predictions models. spatial outputs suggest standard models tend predict broader areas high suitability values ESMs. Next, look performance metrics models, based repeated k-folds cross-validation partition method. can easily done using “sdm_summarize()” function flexsdm. , can see AUC, TSS, Jaccard index higher ESMs corresponding standard model. However, Boyce index Inverse Mean Absolute Error slightly higher standard models.","code":"par(mfrow = c(3, 2)) plot(mpred$glm, main = 'Standard GLM') #points(hespero$x, hespero$y, pch = 19) plot(eglm_pred[[1]], main = 'ESM GLM') #points(hespero$x, hespero$y, pch = 19) plot(mpred$gbm, main = 'Standard GBM') #points(hespero$x, hespero$y, pch = 19) plot(egbm_pred[[1]], main = 'ESM GBM') #points(hespero$x, hespero$y, pch = 19) plot(mpred$svm, main = 'Standard SVM') #points(hespero$x, hespero$y, pch = 19) plot(esvm_pred[[1]], main = 'ESM SVM') #points(hespero$x, hespero$y, pch = 19) merge_df <- sdm_summarize(models = list(mglm, mgbm, msvm, eglm, egbm, esvm)) knitr::kable( merge_df %>% dplyr::select( model, AUC = AUC_mean, TSS = TSS_mean, JACCARD = JACCARD_mean, BOYCE = BOYCE_mean, IMAE = IMAE_mean ) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"conclusions","dir":"Articles","previous_headings":"Intro","what":"Conclusions","title":"flexsdm: Modeling a rare species","text":"Modeling decisions context-dependent must made case--case basis. However, ESM useful approach practitioners interested modeling rare species want avoid common model overfitting issues. always producing SDMs “real-world” applications, important consider spatial prediction patterns along multiple model performance metrics.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v05_Rare_species_example.html","id":"references","dir":"Articles","previous_headings":"Intro","what":"References","title":"flexsdm: Modeling a rare species","text":"Lomba, ., L. Pellissier, C. Randin, J. Vicente, F. Moreira, J. Honrado, . Guisan. 2010. Overcoming rare species modelling paradox: novel hierarchical framework applied Iberian endemic plant. Biological conservation 143:2647–2657. https://doi.org/10.1016/j.biocon.2010.07.007 Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210–1218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802–808. https://doi.org/10.1111/2041-210X.12957 Dubos, N., Montfort, F., Grinand, C., Nourtier, M., Deso, G., Probst, J.-M., Razafimanahaka, J. H., Andriantsimanarilafy, R. R., Rakotondrasoa, E. F., Razafindraibe, P., Jenkins, R., & Crottini, . (2021). narrow-ranging species doomed extinction? Projected dramatic decline future climate suitability two highly threatened species. Perspectives Ecology Conservation, S2530064421000894. https://doi.org/10.1016/j.pecon.2021.10.002","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Many SDM applications require model extrapolation, e.g., predictions beyond range data set used fit model. example, models often must extrapolate predicting habitat suitability novel environmental conditions induced climate change predicting spread invasive species outside native range based species-environment relationship observed native range. flexsdm, offer new approach (known Shape) evaluating extrapolation truncating spatial predictions based degree extrapolation measured. Shape model-agnostic approach calculating degree extrapolation given projection data point multivariate distance nearest training data point – capturing often complex shape data within environmental space. distances relativized factor reflects dispersion training data environmental space. implemented flexsdm, Shape approach also incorporates adjustable threshold allow binary discrimination acceptable unacceptable degrees extrapolation, based user’s needs applications. information Shape metric, recommend reading article Velazco et al., 2023. vignette, walk evaluate model extrapolation Hesperocyparis stephensonii (Cuyamaca cypress), conifer tree species endemic southern California. species listed Critically Endangered IUCN extremely restricted distribution, found headwaters King Creek San Diego County. Note: tutorial follows generally workflow vignette modeling distribution rare species using ensemble small models (ESM). However, instead constructing ESMs, evaluate model extrapolation predict models extent California Floristic Province (CFP).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"data","dir":"Articles","previous_headings":"","what":"Data","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"models, use four environmental variables influence plant distributions California: available evapotranspiration (aet), climatic water deficit (cwd), maximum temperature warmest month (tmx), minimum temperature coldest month (tmn). occurrence data include 21 geo-referenced observations downloaded online database Calflora.","code":"library(flexsdm) library(terra) library(dplyr) library(patchwork) # environmental data somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) names(somevar) <- c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\") # species occurence data (presence-only) data(hespero) hespero <- hespero %>% dplyr::select(-id) # California ecoregions regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) regions <- terra::as.polygons(regions) sp_region <- terra::subset(regions, regions$category == \"SCR\") # ecoregion where *Hesperocyparis stephensonii* is found # visualize the species occurrences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Hesperocyparis stephensonii occurrences\" ) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16) cols <- rep(\"gray80\", 8) cols[regions$category == \"SCR\"] <- \"yellow\" terra::inset( regions, loc = \"bottomleft\", scale = .3, col = cols )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"delimit-calibration-area","dir":"Articles","previous_headings":"","what":"Delimit calibration area","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"First, must define model’s calibration area. flexsdm package offers several methods defining model calibration area. , use 25-km buffer areas around presence points select pseudo-absence locations.","code":"ca <- calib_area( data = hespero, x = \"x\", y = \"y\", method = c(\"buffer\", width = 25000), crs = crs(somevar) ) # visualize the species occurrences & calibration area plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, main = \"Calibration area and occurrences\" ) plot(ca, add = TRUE) points(hespero[, c(\"x\", \"y\")], col = \"black\", pch = 16)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"create-pseudo-absence-data","dir":"Articles","previous_headings":"","what":"Create pseudo-absence data","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"often case rare species, species presence data. However, SDM methods require either pseudo-absence background point data. , use calibration area produce pseudo-absence data can used SDMs.","code":"# Sample the same number of species presences set.seed(10) psa <- sample_pseudoabs( data = hespero, x = \"x\", y = \"y\", n = sum(hespero$pr_ab), # number of pseudo-absence points equal to number of presences method = \"random\", rlayer = somevar, calibarea = ca ) # Visualize species presences and pseudo-absences plot( sp_region, col = \"gray80\", legend = FALSE, axes = FALSE, xlim = c(289347, 353284), ylim = c(-598052, -520709), main = \"Presence = yellow, Pseudo-absence = black\" ) plot(ca, add = TRUE) points(psa[, c(\"x\", \"y\")], cex = 0.8, pch = 16, col = \"black\") # Pseudo-absences points(hespero[, c(\"x\", \"y\")], col = \"yellow\", pch = 16, cex = 1.5) # Presences # Bind a presences and pseudo-absences hespero_pa <- bind_rows(hespero, psa) hespero_pa # Presence-Pseudo-absence database #> # A tibble: 42 × 3 #> x y pr_ab #> #> 1 316923. -557843. 1 #> 2 317155. -559234. 1 #> 3 316960. -558186. 1 #> 4 314347. -559648. 1 #> 5 317348. -557349. 1 #> 6 316753. -559679. 1 #> 7 316777. -558644. 1 #> 8 317050. -559043. 1 #> 9 316655. -559928. 1 #> 10 316418. -567439. 1 #> # ℹ 32 more rows"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"partition-data-for-evaluating-models","dir":"Articles","previous_headings":"","what":"Partition data for evaluating models","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"evaluate model performance, need specify data testing training. flexsdm offers range random spatial random data partition methods evaluating SDMs. use repeated K-fold cross-validation, suitable partition approach validating SDM data.","code":"set.seed(10) # Repeated K-fold method hespero_pa2 <- part_random( data = hespero_pa, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 10) )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"extracting-environmental-values","dir":"Articles","previous_headings":"","what":"Extracting environmental values","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Next, extract values four environmental predictors presence pseudo-absence locations.","code":"hespero_pa3 <- sdm_extract( data = hespero_pa2, x = \"x\", y = \"y\", env_layer = somevar, variables = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\") )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"modeling","dir":"Articles","previous_headings":"","what":"Modeling","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Let’s use three standard algorithms model distribution Hesperocyparis stephensonii: GLM, GBM, SVM. case, use extent CFP prediction area can evaluate model extrapolation across broad geographic area.","code":"mglm <- fit_glm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) mgbm <- fit_gbm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) msvm <- fit_svm( data = hespero_pa3, response = \"pr_ab\", predictors = c(\"cwd\", \"tmn\", \"aet\", \"ppt_jja\"), partition = \".part\", thr = \"max_sens_spec\" ) mpred <- sdm_predict( models = list(mglm, mgbm, msvm), pred = somevar, con_thr = TRUE, predict_area = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"comparing-our-models","dir":"Articles","previous_headings":"","what":"Comparing our models","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"First, let’s take look spatial predictions models. GLM GBM predict lot suitable habitat far species found!","code":"par(mfrow = c(1, 3)) plot(mpred$glm, main = \"GLM\") # points(hespero$x, hespero$y, pch = 19) plot(mpred$gbm, main = \"GBM\") # points(hespero$x, hespero$y, pch = 19) plot(mpred$svm, main = \"SVM\") # points(hespero$x, hespero$y, pch = 19)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"partial-dependence-plots-to-explore-the-impact-of-predictor-conditions-on-suitability","dir":"Articles","previous_headings":"","what":"Partial dependence plots to explore the impact of predictor conditions on suitability","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Extrapolation reflects issue model handles novel data. , see three algorithms explored tutorial predict pretty different geographic patterns habitat suitability based occurrence/pseudo-absence data environmental predictors. Let’s take look partial dependence plots see marginal effect environmental predictors suitability looks like test models. function allows visualize model may extrapolate outside environmental conditions used training, visualizing “projection” data different color. case, environmental predictors cover extent CFP. flexsdm allows users plot univariate partial dependence plots (p_pdp) bivariate partial dependence plots (p_bpdp); shown model. Note: p_bpdp function allows users option show boundaries training data using either rectangle convex hull approach. use convex hull approach. Uni bivariate partial dependence plots GLM: Uni bivariate partial dependence plots GBM: Uni bivariate partial dependence plots SVM: plots show really interesting story! notably, GLM GBM show consistently high habitat suitability areas much higher actual evapotranspiration narrow range values used train model. However, SVM seems best job estimating high habitat suitability environmental values outside training data. Importantly, models can behave differently depending modeling situation context.","code":"p_pdp(model = mglm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = mglm$model, training_data = hespero_pa3, training_boundaries = \"convexh\") p_pdp(model = mgbm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = mgbm$model, training_data = hespero_pa3, training_boundaries = \"convexh\", resolution = 100) p_pdp(model = msvm$model, training_data = hespero_pa3, projection_data = somevar) p_bpdp(model = msvm$model, training_data = hespero_pa3, training_boundaries = \"convexh\")"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"extrapolation-evaluation","dir":"Articles","previous_headings":"","what":"Extrapolation evaluation","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Remember species highly restricted southern California! However, two models (GLM GBM) predict high habitat suitability throughout parts CFP, SVM provides conservative predictions. see GLM GBM tend predict high habitat suitability areas environmentally different training conditions. models extrapolating environmental space? Let’s find using “extra_eval” function SDM. function requires input model training data, column specifying presence vs. absence locations, projection data (can SpatRaster tibble containing data used model projection – can reflect larger region, separate region, different time period used model training), metric calculating degree extrapolation (default Mahalanobis distance, though euclidean also option- explore ), number cores parallel processing, aggregation factor, case want measure extrapolation large data set. First look degree extrapolation geographic space using Shape method based Mahalanobis distance. Also distinguish univariate combinatorial extrapolation. Using Mahalanobis distance: output extra_eval function SpatRaster, showing degree extrapolation across projection area, estimated Shape method. can also explore extrapolation suitability patterns environmental geographic space, using just one function. , use p_extra function. function plots ggplot object. Let’s start extrapolation evaluation. plots show areas high extrapolation (dark blue) far training data (shown black) environmental geographic space. higher extrapolation values extrapolation area northwestern portion CFP. Let’s explore univariate combinatorial extrapolation. former defined projecting data outside range training conditions, combinatorial extrapolation area projecting data within range training conditions.","code":"xp_m <- extra_eval( training_data = hespero_pa3, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = TRUE, n_cores = 1, aggreg_factor = 1 ) xp_m #> class : SpatRaster #> dimensions : 558, 394, 2 (nrow, ncol, nlyr) #> resolution : 1890, 1890 (x, y) #> extent : -373685.8, 370974.2, -604813.3, 449806.7 (xmin, xmax, ymin, ymax) #> coord. ref. : +proj=aea +lat_0=0 +lon_0=-120 +lat_1=34 +lat_2=40.5 +x_0=0 +y_0=-4000000 +datum=NAD83 +units=m +no_defs #> source(s) : memory #> names : extrapolation, uni_comb #> min values : 0.000, 1 #> max values : 3730.677, 2 cl <- c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") par(mfrow = c(1, 2)) plot(xp_m$extrapolation, main = \"Shape metric\", col = cl) plot(xp_m$uni_comb, main = \"Univariate (1) and \\n combinatorial (2) extrapolation\", col = cl) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = xp_m, projection_data = somevar, geo_space = TRUE, prop_points = 0.05 ) #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = xp_m$uni_comb, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"#B3DC2B\", \"#30678D\"), alpha_p = 0.2 ) #> Number of cell used to plot 3642 (5%)"},{"path":"https://sjevelazco.github.io/flexsdm/articles/v06_Extrapolation_example.html","id":"truncating-sdms-predictions-based-on-extrapolation-thresholds","dir":"Articles","previous_headings":"","what":"Truncating SDMs predictions based on extrapolation thresholds","title":"flexsdm: Tools to explore extrapolation in SDMs","text":"Depending user’s end goal, may want exclude suitability values environmentally “” far modeling training data. Shape method allows select extrapolation threshold exclude suitability values. truncating models can use p_extra function explore binary extrapolation patter environmental geographical space. test values 50, 100, 500, comparison. Values 1 (yellow one) depict environmental geographical regions constraint models suitability (truncate). Note lower threshold, restrictive environmental geographic regions used constrain model. Now use function extra_truncate truncate suitability predictions made GLM, GBM, SVM based extrapolation thresholds explored previously. note, threshold selection user-dependent, function allows select multiple thresholds one time compare outputs. Users can also select “trunc_value” within extra_truncate function, specifies value assigned cells exceed extrapolation threshold (also specified function). default 0 users also choose another value reduce suitability. Based maps, can see lower extrapolation threshold, restricted habitat suitability patterns, higher values retain greater amount suitable habitat. Selecting best threshold depend modeling goals objectives, . Want learn Shape extrapolation metrics? Read article “Velazco, S. J. E., Brooke, M. R., De Marco Jr., P., Regan, H. M., & Franklin, J. (2023). far can extrapolate species distribution model? Exploring Shape, novel method. Ecography, 11, e06992. https://doi.org/10.1111/ecog.06992”","code":"p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 50), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 50\") #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 100), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 100\") #> Number of cell used to plot 3642 (5%) p_extra( training_data = hespero_pa3, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", color_p = \"black\", extra_suit_data = as.numeric(xp_m$extrapolation < 500), projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"gray\", \"#FDE725\"), alpha_p = 0.5 ) + plot_annotation(subtitle = \"Binary extrapolation pattern with using a threshold of 500\") #> Number of cell used to plot 3642 (5%) glm_trunc <- extra_truncate( suit = mpred$glm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) gbm_trunc <- extra_truncate( suit = mpred$gbm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) svm_trunc <- extra_truncate( suit = mpred$svm, extra = xp_m, threshold = c(50, 100, 500), trunc_value = 0 ) par(mfrow = c(3, 3)) plot(glm_trunc$`50`, main = \"GLM; extra threshold = 50\", col = cl) plot(glm_trunc$`100`, main = \"GLM; extra threshold = 100\", col = cl) plot(glm_trunc$`500`, main = \"GLM; extra threshold = 500\", col = cl) plot(gbm_trunc$`50`, main = \"GBM; extra threshold = 50\", col = cl) plot(gbm_trunc$`100`, main = \"GBM; extra threshold = 100\", col = cl) plot(gbm_trunc$`500`, main = \"GBM; extra threshold = 500\", col = cl) plot(svm_trunc$`50`, main = \"SVM; extra threshold = 50\", col = cl) plot(svm_trunc$`100`, main = \"SVM; extra threshold = 100\", col = cl) plot(svm_trunc$`500`, main = \"SVM; extra threshold = 500\", col = cl)"},{"path":"https://sjevelazco.github.io/flexsdm/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Santiago J.E. Velazco. Author, maintainer. Brooke Rose. Author. André F.. Andrade. Author. Ignacio Minoli. Author. Janet Franklin. Author.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Velazco, S.J.E., Rose, M.B., Andrade, .F.., Minoli, ., Franklin, J. (2022). flexsdm: R package supporting comprehensive flexible species distribution modelling workflow. Methods Ecology Evolution, 13(8) 1661-1669. https://doi.org/10.1111/2041-210X.13874","code":"@Article{, title = {flexsdm: An R package for supporting a comprehensive and flexible species distribution modelling workflow}, author = {Santiago J.E. Velazco and Brooke Rose and André F.A. Andrade and Ignacio Minoli and Janet Franklin}, journal = {Methods in Ecology and Evolution}, year = {2022}, volume = {13}, number = {8}, pages = {1661-1669}, url = {https://onlinelibrary.wiley.com/doi/10.1111/2041-210X.13874}, }"},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Species distribution modeling become standard tool several research areas ecology, conservation biology, biogeography, paleobiogeography, epidemiology. Species distribution modeling area active research theoretical methodological aspects. One exciting features flexsdm high manipulation parametrization capacity based different functions arguments. attributes enable users define complete partial modeling workflow specific modeling situation (e.g., number variables, number records, different algorithms, algorithms tuning, ensemble methods).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"structure-of-flexsdm","dir":"","previous_headings":"","what":"Structure of flexsdm","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"function flexsdm package organized three major modeling steps","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_1-pre-modeling-functions","dir":"","previous_headings":"","what":"1. Pre-modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Set tools prepare modeling input data (e.g., species occurrences thinning, sample pseudo-absences background points, delimitation calibration area). calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictors env_outliers() Integration outliers detection methods environmental space part_random() Data partitioning training testing models part_sblock() Spatial block cross validation part_sband() Spatial band cross validation part_senv() Environmental cross-validation plot_res() Plot different resolutions used part_sblock get_block() Transform spatial partition layer spatial properties environmental variables sample_background() Sample background points sample_pseudoabs() Sampel pseudo-absence sdm_directory() Create directories saving outputs flexsdm sdm_extract() Extract environmental data based x y coordinates occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_2-modeling-functions","dir":"","previous_headings":"","what":"2. Modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"includes functions related modeling construction validation. Several can grouped fit_*, tune_*, esm_* family functions. fit_* construct validate models default hyper-parameter values. tune_* construct validate models searching best hyper-parameter values combination. esm_ construct validate Ensemble Small Models.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"model-evaluation","dir":"","previous_headings":"2. Modeling functions","what":"Model evaluation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"sdm_eval() Calculate different model performance metrics","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"fit_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"fit_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forest models fit_svm() Fit validate Support Vector Machine models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"tune_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"tune_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters tune_max() Fit validate Maximum Entropy models exploration hyper-parameters tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"model-ensemble","dir":"","previous_headings":"2. Modeling functions","what":"Model ensemble","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"fit_ensemble() Fit validate ensemble models different ensemble methods","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"esm_-functions-family","dir":"","previous_headings":"2. Modeling functions","what":"esm_* functions family","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"esm_gam() Fit validate Generalized Additive Models Ensemble Small Model approach esm_gau() Fit validate Gaussian Process models Models Ensemble Small Model approach esm_gbm() Fit validate Generalized Boosted Regression models Ensemble Small Model approach esm_glm() Fit validate Generalized Linear Models Ensemble Small Model approach esm_max() Fit validate Maximum Entropy models Ensemble Small Model approach esm_net() Fit validate Neural Networks models Ensemble Small Model approach esm_svm() Fit validate Support Vector Machine models Ensemble Small Model approach","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_3-post-modeling-functions","dir":"","previous_headings":"","what":"3. Post-modeling functions","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Tools related models’ geographical predictions, evaluation, correction. sdm_predict() Spatial predictions individual ensemble model sdm_summarize() Merge model performance tables interp() Raster interpolation two time periods extra_eval() Measure model extrapolation extra_truncate() Constraint suitability values given extrapolation value msdm_priori() Create spatial predictor variables reduce overprediction species distribution models msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"id_4-graphical-model-exploration","dir":"","previous_headings":"","what":"4. Graphical model exploration","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Useful tools visually explore models’ geographical environemtal predictions, model extrapolation, partial depnendece plot. p_pdp() Create partial dependence plot(s) explore marginal effect predictors suitability p_bpdp() Create partial dependence surface plot(s) explore bivariate marginal effect predictors suitability p_extra() Graphical exploration extrapolation suitability pattern environmental geographical space data_pdp() Calculate data construct partial dependence plots data_bpdp() Calculate data construct partial dependence surface plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"can install development version flexsdm github ⚠️ NOTE: version 1.4-22 terra package causing errors trying instal flexsdm. Please, first install version ≥ 1.5-12 terra package available CRAN development version terra flexsdm.","code":"# install.packages(\"remotes\") # For Windows and Mac OS operating systems remotes::install_github(\"sjevelazco/flexsdm\") # For Linux operating system remotes::install_github(\"sjevelazco/flexsdm@HEAD\")"},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"package-website","dir":"","previous_headings":"","what":"Package website","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"See package website (https://sjevelazco.github.io/flexsdm/) functions explanation vignettes.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/index.html","id":"package-citation","dir":"","previous_headings":"","what":"Package citation","title":"Tools for Data Preparation, Fitting, Prediction, Evaluation, and Post-Processing of Species Distribution Models","text":"Velazco, S.J.E., Rose, M.B., Andrade, .F.., Minoli, ., Franklin, J. (2022). flexsdm: R package supporting comprehensive flexible species distribution modelling workflow. Methods Ecology Evolution, 13(8) 1661–1669. https://doi.org/10.1111/2041-210X.13874 Test package give us feedback send e-mail sjevelazco@gmail.com.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/pkg_citation/index.html","id":null,"dir":"Pkg_citation","previous_headings":"","what":"Function reference","title":"Function reference","text":"abies data set containing localities environmental condition Abies (fir tree) species California, USA backg data set containing environmental conditions background points calib_area() Delimit calibration area constructing species distribution models correct_colinvar() Collinearity reduction predictor variables data_bpdp() Calculate data construct partial dependence surface plots data_pdp() Calculate data construct partial dependence plots env_outliers() Integration outliers detection methods environmental space esm_gam() Fit validate Generalized Additive Models based Ensembles Small Models approach esm_gau() Fit validate Gaussian Process models based Ensembles Small Models approach esm_gbm() Fit validate Generalized Boosted Regression models based Ensembles Small Models approach esm_glm() Fit validate Generalized Linear Models based Ensembles Small Models approach esm_max() Fit validate Maximum Entropy Models based Ensemble Small Model approach esm_net() Fit validate Neural Networks based Ensembles Small Models approach esm_svm() Fit validate Support Vector Machine models based Ensembles Small Models approach extra_eval() Measure model extrapolation based Shape extrapolation metric extra_truncate() Truncate suitability predictions based extrapolation value fit_ensemble() Ensemble model fitting validation fit_gam() Fit validate Generalized Additive Models fit_gau() Fit validate Gaussian Process models fit_gbm() Fit validate Generalized Boosted Regression models fit_glm() Fit validate Generalized Linear Models fit_max() Fit validate Maximum Entropy models fit_net() Fit validate Neural Networks models fit_raf() Fit validate Random Forests models fit_svm() Fit validate Support Vector Machine models get_block() Transform spatial partition layer spatial properties environmental variables hespero data set containing localities Hesperocyparis stephensonii species California, USA homogenize_na() Homogenize cells NAs across layers interp() Raster interpolation two time periods msdm_posteriori() Methods correct overprediction species distribution models based occurrences suitability patterns. msdm_priori() Create spatial predictor variables reduce overprediction species distribution models occfilt_env() Perform environmental filtering species occurrences occfilt_geo() Perform geographical filtering species occurrences part_random() Conventional data partitioning methods part_sband() Spatial band cross-validation part_sblock() Spatial block cross-validation part_senv() Environmental spatial cross-validation plot_res() Plot different resolutions used part_sblock p_bpdp() Bivariate partial dependence plot p_extra() Graphical exploration extrapolation suitability pattern environmental geographical space p_pdp() Partial Dependent Plot sample_background() Sample background points sample_pseudoabs() Sample pseudo-absences sdm_directory() Create directories saving outputs flexsdm sdm_eval() Calculate different model performance metrics sdm_extract() Extract environmental data values spatial raster based x y coordinates sdm_predict() Spatial predictions individual ensemble models sdm_summarize() Merge model performance tables spp data set containing presences absences three virtual species tune_gbm() Fit validate Generalized Boosted Regression models exploration hyper-parameters optimize performance tune_max() Fit validate Maximum Entropy models exploration hyper-parameters optimize performance tune_net() Fit validate Neural Networks models exploration hyper-parameters tune_raf() Fit validate Random Forest models exploration hyper-parameters optimize performance tune_svm() Fit validate Support Vector Machine models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"data set containing localities environmental condition Abies (fir tree) species California, USA","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"","code":"abies"},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"tibble object 5000 rows 10 variables: ID presences absences records ID pr_ab presence absences denoted 1 0 respectively x y columns coordinates Albers Equal Area Conic coordinate system column aet landform columns values environmental variables locality","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/abies.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing localities and environmental condition of an Abies (fir tree) species in California, USA — abies","text":"","code":"if (FALSE) { require(dplyr) data(\"abies\") abies }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing environmental conditions of background points — backg","title":"A data set containing environmental conditions of background points — backg","text":"data set containing environmental conditions background points","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing environmental conditions of background points — backg","text":"","code":"backg"},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing environmental conditions of background points — backg","text":"tibble object 5000 rows 10 variables: pr_ab background point denoted 0 x y columns geographical coordinates column aet landform columns values environmental variables coordinate locations","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/backg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing environmental conditions of background points — backg","text":"","code":"if (FALSE) { require(dplyr) data(\"backg\") backg }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":null,"dir":"Reference","previous_headings":"","what":"Delimit calibration area for constructing species distribution models — calib_area","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"function offers different methods define calibration area. output used flexsdm functions like sample_backgroud, sample_pseudoabs, sdm_predict, among others","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"","code":"calib_area(data, x, y, method, groups = NULL, crs = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"data data.frame tibble. Database presences x character. Column name longitude data y character. Column name latitude data method character. Method used delimiting calibration area. necessary concatenate (c()) different objects argument. following methods implemented: buffer: calibration area defined buffer around presences. Usage method = c('buffer', width=40000). value buffer width m must provided CRS longitude/latitude, map units cases mcp: calibration area defined minimum convex polygon. Usage method = 'mcp'. bmcp: calibration area defined buffered minimum convex polygon buffer width. Usage method = c('bmcp', width=40000). value buffer width m must provided CRS longitude/latitude, map units cases mask: calibration area defined selected polygons spatial vector object intersected presences. Usage method = c(\"mask\", clusters, \"DN\"). second concatenated element must SpatVector, third element character column name SpatVector used filtering polygons. groups character. Column name indicating differentiated subsets points. used mcp bmcp method. Default NULL crs character. Coordinate reference system used transforming occurrences outputs. set NULL, result mask method crs SpatVector used. Define crs mandatory buffer, mcp bmcp method.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"SpatVector","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/calib_area.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Delimit calibration area for constructing species distribution models — calib_area","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(\"spp\") clusters <- system.file(\"external/clusters.shp\", package = \"flexsdm\") clusters <- terra::vect(clusters) single_spp <- spp %>% dplyr::filter(species == \"sp1\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab) plot(clusters) points(single_spp[-1], col=\"red\") crs(clusters, proj=TRUE) # coordinate reference system (CRS) used for this points database # note that the unit of this CRS is in m, consequently the buffer width # will be interpreted in m too # buffer method ca_1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 40000), crs = crs(clusters) ) plot(ca_1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mcp method ca_2 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = \"mcp\", crs = crs(clusters) ) plot(ca_2) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mcp method for different groups single_spp <- single_spp %>% mutate(groups = ifelse(x > 150000, \"a\", \"b\")) plot(single_spp[, 2:3], pch = 19, col = \"blue\") points(single_spp[single_spp$groups == \"a\", 2:3], col = \"red\", pch = 19) points(single_spp[, 2:3]) ca_2.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"mcp\"), crs = crs(clusters), groups = \"groups\" ) plot(ca_2.1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # bmcp method ca_3 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 30000), crs = crs(clusters) ) plot(ca_3) points(single_spp[, 2:3], pch = 19, cex = 0.5) # bmcp method for different groups ca_3.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 30000), crs = crs(clusters), groups = \"groups\" ) plot(ca_3.1) points(single_spp[, 2:3], pch = 19, cex = 0.5) # mask method plot(clusters) names(clusters) ca_3.1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"mask\", clusters, \"clusters\"), ) plot(ca_3.1) points(single_spp[, 2:3], pch = 19, cex = 0.5, col = \"red\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":null,"dir":"Reference","previous_headings":"","what":"Collinearity reduction of predictor variables — correct_colinvar","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"Collinearity reduction predictor variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"","code":"correct_colinvar( env_layer, method, proj = NULL, restric_to_region = NULL, restric_pca_proj = FALSE, maxcell = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"env_layer SpatRaster object class SpatRaster containing predictors. function allow categorical variables method character. Collinearity reduction method. necessary provide vector argument. next methods implemented: pearson: Highlights correlated variables according Pearson correlation. threshold maximum correlation must specified. Otherwise, threshold 0.7 defined default. Usage method = c('pearson', th='0.7'). vif: Select variables Variance Inflation Factor, threshold can specified user. Otherwise, threshold 10 defined default.Usage method = c('vif', th = '10'). pca: Perform Principal Component Analysis use principal components new predictors. selected components account 95% whole variation system. Usage method = c('pca'). fa: Perform Factorial Analysis select, original predictors, number factors defined Broken-Stick variables highest correlation factors selected. Usage method = c('fa'). proj character. used pca method. Path folder contains sub-folders different projection scenarios. Variables names must names raster used env_layer argument. Usage proj = \"C:/User/Desktop/Projections\" (see Details use argument) restric_to_region SpatVector. Area used restrict cells env_layer moment perform collinearity reduction. restric_pca_proj logical. Area used restrict geographically PCA projection within SpatVector used restric_to_region. use PCA analysis. default: FALSE. maxcell numeric. Number raster cells randomly sampled. Taking sample useful reduce memory usage large rasters. NULL, function use raster cells. Default NULL. Usage maxcell = 50000.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"#' 'pearson', returns list following elements: cor_table: matrix object pairwise correlation values environmental variables cor_variables: list object length number environmental values containing pairwise relations exceeded correlation threshold one environmental variables 'vif' method, returns list following elements: env_layer: SpatRaster object selected environmental variables removed_variables: character vector removed environmental variables vif_table: data frame VIF values environmental variables 'pca' method, returns list following elements: env_layer: SpatRaster scores selected principal component (PC) sum 95% whole variation original environmental variables coefficients: matrix coefficient principal component (PC) predictors cumulative_variance: tibble cumulative variance explained selected principal component (PC) 'fa' method, returns list following elements: env_layer: SpatRaster scores selected variables due correlation factors. number_factors: number factors selected according Broken-Stick criteria, removed_variables: removed variables, uniqueness: uniqueness environmental variable according factorial analysis, loadings: environmental variables loadings chosen factors","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"case environmental variables current conditions time periods (future present), recommended perform PCA analysis current environmental condition project PCA time periods. , necessary use “proj” argument. Path folder (e.g., projections) contains sub-folders different projection scenarios (e.g., years emissions). Within sub-folder must stored single multiband rasters environmental variables. example: C:/Users/my_pc/projections/ ├── MRIESM_2050_ssp126 │ └── var1.tif │ └── var2.tif │ └── var3.tif ├── MRIESM_2080_ssp585 │ └── var1.tif │ └── var2.tif │ └── var3.tif ├── UKESM_2050_ssp370 │ └── var1.tif │ └── var2.tif │ └── var3.tif pca method run time projections, correct_colinvar function create Projection_PCA (exact path path object returned function) system sub-folders multiband raster principal components (pcs.tif) C:/Users/my_pc/Projection_PCA/ ├── MRIESM_2050_ssp126 │ └── pcs.tif # multiband tif principal components ├── MRIESM_2080_ssp585 │ └── pcs.tif ├── UKESM_2050_ssp370 │ └── pcs.tif","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Collinearity reduction of predictor variables — correct_colinvar","text":"","code":"if (FALSE) { require(terra) require(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Perform pearson collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"pearson\", th = \"0.7\")) var$cor_table var$cor_variables # For all correct_colinvar methods it is possible to take a sample or raster to reduce memory var <- correct_colinvar(env_layer = somevar, method = c(\"pearson\", th = \"0.7\"), maxcell = 10000) var$cor_table var$cor_variables # Perform vif collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"vif\", th = \"8\")) var$env_layer var$removed_variables var$vif_table # Perform pca collinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"pca\")) plot(var$env_layer) var$env_layer var$coefficients var$cumulative_variance # Perform pca collinearity control with different projections ## Below will be created a set of folders to simulate the structure of the directory where ## environmental variables are stored for different scenarios dir_sc <- file.path(tempdir(), \"projections\") dir.create(dir_sc) dir_sc <- file.path(dir_sc, c('scenario_1', 'scenario_2')) sapply(dir_sc, dir.create) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) terra::writeRaster(somevar, file.path(dir_sc[1], \"somevar.tif\"), overwrite=TRUE) terra::writeRaster(somevar, file.path(dir_sc[2], \"somevar.tif\"), overwrite=TRUE) ## Perform pca with projections dir_w_proj <- dirname(dir_sc[1]) dir_w_proj var <- correct_colinvar(env_layer = somevar, method = \"pca\", proj = dir_w_proj) var$env_layer var$coefficients var$cumulative_variance var$proj # Perform fa colinearity control var <- correct_colinvar(env_layer = somevar, method = c(\"fa\")) var$env_layer var$number_factors var$removed_variables var$uniqueness var$loadings ##%######################################################%## # # #### Other option to perform PCA #### #### considering cell restricted to a region #### # # ##%######################################################%## # Define a calibration area abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::filter(pr_ab==1) plot(somevar[[1]]) points(abies2[-3]) ca <- calib_area(abies2, x = \"x\", y = \"y\", method = c(\"mcp\"), crs=crs(somevar)) plot(ca, add=T) # Full geographical range to perform PCA pca_fr <- correct_colinvar(env_layer = somevar , method = c(\"pca\"), maxcell = NULL, restric_to_region = NULL, restric_pca_proj = FALSE) # Perform PCA only with cell delimited by polygon used in restric_to_region pca_rr <- correct_colinvar(env_layer = somevar , method = c(\"pca\"), maxcell = NULL, restric_to_region = ca, restric_pca_proj = FALSE) # Perform and predicted PCA only with cell delimited by polygon used in restric_to_region pca_rrp <- correct_colinvar(env_layer = somevar , method = c(\"pca\"), maxcell = NULL, restric_to_region = ca, restric_pca_proj = TRUE) plot(pca_fr$env_layer) # PCA with all cells plot(pca_rr$env_layer) # PCA with calibration area cell but predicted for entire region plot(pca_rrp$env_layer) # PCA performed and predicted for cells within calibration area (ca) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence surface plots — data_bpdp","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"Calculate data construct Partial dependence surface plot (.e., bivariate dependence plot) two predictor set","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"","code":"data_bpdp( model, predictors, resolution = 50, training_data = NULL, training_boundaries = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector two predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL training_boundaries character. Plot training conditions boundaries based training data (.e., presences, presences absences, etc). training_boundaries = \"convexh\", function delimit training environmental region based convex-hull. training_boundaries = \"rectangle\", function delimit training environmental region based four straight lines. used methods necessary provide data training_data argument. NULL predictors used. Default NULL. projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"list two tibbles \"pdpdata\" \"resid\". pspdata: data construct partial dependence surface plot, first two column includes values selected environmental variables, third column predicted suitability. training_boundaries: data plot boundaries training data.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_bpdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence surface plots — data_bpdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) m <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_bpdp( model = m$model, predictors = c(\"aet\", \"cwd\"), resolution = 50, projection_data = somevar, training_boundaries = \"rectangle\", training_data = abies2, clamping = TRUE ) df names(df) df$pspdata df$training_boundaries # see p_bpdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence plots — data_pdp","title":"Calculate data to construct partial dependence plots — data_pdp","text":"Calculate data construct partial dependence plots given predictor","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence plots — data_pdp","text":"","code":"data_pdp( model, predictors, resolution = 50, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence plots — data_pdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name. resolution numeric. Number equally spaced points predict continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence plots — data_pdp","text":"list two tibbles \"pdpdata\" \"resid\". pdpdata: data construct partial dependence plots, first column includes values selected environmental variable, second column predicted suitability, third column range type, two values Training Projecting, referring suitability calculated within outside range training conditions. Third column returned \"projection_data\" argument used resid: data plot residuals. first column includes values selected environmental variable second column predicted suitability.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_pdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence plots — data_pdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_pdp( model = svm_t1$model, predictors = c(\"aet\"), resolution = 100, resid = TRUE, projection_data = somevar, training_data = abies2, clamping = FALSE ) df names(df) df$pdpdata df$resid plot(df$pdpdata[1:2], type = \"l\") points(df$resid[1:2], cex = 0.5) # see p_pdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence surface plots — data_psp","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"Calculate data construct Partial dependence surface plot (.e., bivariate dependence plot) two predictor set","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"","code":"data_psp( model, predictors, resolution = 50, training_data = NULL, pchull = FALSE, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector two predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL pchull logical. Extract convex-hull limit training data. Default FALSE projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"list two tibbles \"pdpdata\" \"resid\". pspdata: data construct partial dependence surface plot, first two column includes values selected environmental variables, third column predicted suitability. pchull: data plot residuals convex hull polygon bounding calibration data.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/data_psp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence surface plots — data_psp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) m <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- data_psp( model = m$model, predictors = c(\"aet\", \"cwd\"), resolution = 50, projection_data = somevar, pchull = TRUE, training_data = abies2, clamping = TRUE ) df names(df) df$pspdata df$pchull # see p_psp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":null,"dir":"Reference","previous_headings":"","what":"Integration of outliers detection methods in environmental space — env_outliers","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"function performs different methods detecting outliers species distribution data based environmental conditions occurrences. methods need presence absence data (e.g. Two-class Support Vector Machine Random Forest) use presences (e.g. Reverse Jackknife, Box-plot, Random Forest outliers) . Outlier detection can useful procedure occurrence data cleaning (Chapman 2005, Liu et al., 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"","code":"env_outliers(data, x, y, pr_ab, id, env_layer)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"data data.frame tibble presence (presence-absence) records, coordinates x character. Column name longitude data. y character. Column name latitude data. pr_ab character. Column name presence absence data (.e. 1 0) id character. Column name row id. row (record) must unique code. env_layer SpatRaster. Raster environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"tibble object database used 'data' argument seven additional columns, 1 0 denote presence detected outliers .out_bxpt: outliers detected Box-plot method .out_jack: outliers detected Reverse Jackknife method .out_svm: outliers detected Support Vector Machine method .out_rf: outliers detected Random Forest method .out_rfout: outliers detected Random Forest Outliers method .out_sum: frequency presences records detected outliers based previews methods (values 0 6).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"function apply outliers detection methods occurrence data. Box-plot Reverse Jackknife method test outliers variable individually, occurrence behaves outlier least one variable highlighted outlier. user uses presence data, Support Vector Machine Random Forest Methods performed. Support Vector Machine Random Forest performed default hyper-parameter values. case species < 7 occurrences, function perform methods (.e. additional columns 0 values); nonetheless, return tibble additional columns 0 1. information methods, see Chapman (2005), Liu et al. (2018), Velazco et al. (2022).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"Chapman, . D. (2005). Principles methods data cleaning: Primary Species Species- Occurrence Data. version 1.0. Report Global Biodiversity Information Facility, Copenhagen. p72. http://www.gbif.org/document/80528 Liu, C., White, M., & Newell, G. (2018). Detecting outliers species distribution data. Journal Biogeography, 45(1), 164 - 176. https://doi.org/10.1111/jbi.13122 Velazco, S.J.E.; Bedrij, N..; Keller, H..; Rojas, J.L.; Ribeiro, B.R.; De Marco, P. (2022) Quantifying role protected areas safeguarding uses biodiversity. Biological Conservation, xx(xx) xx-xx. https://doi.org/10.1016/j.biocon.2022.109525","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/env_outliers.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Integration of outliers detection methods in environmental space — env_outliers","text":"","code":"if (FALSE) { require(dplyr) require(terra) require(ggplot2) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\") somevar[[1]] %>% plot() points(spp1 %>% filter(pr_ab == 1) %>% select(x, y), col = \"blue\", pch = 19) points(spp1 %>% filter(pr_ab == 0) %>% select(x, y), col = \"red\", cex = 0.5) spp1 <- spp1 %>% mutate(idd = 1:nrow(spp1)) # Detect outliers outs_1 <- env_outliers( data = spp1, pr_ab = \"pr_ab\", x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar ) # How many outliers were detected by different methods? out_pa <- outs_1 %>% dplyr::select(starts_with(\".\"), -.out_sum) %>% apply(., 2, function(x) sum(x, na.rm = T)) out_pa # How many outliers were detected by the sum of different methods? outs_1 %>% dplyr::group_by(.out_sum) %>% dplyr::count() # Let explor where are locate records highlighted as outliers outs_1 %>% dplyr::filter(pr_ab == 1, .out_sum > 0) %>% ggplot(aes(x, y)) + geom_point(aes(col = factor(.out_sum))) + facet_wrap(. ~ factor(.out_sum)) # Detect outliers only with presences outs_2 <- env_outliers( data = spp1 %>% dplyr::filter(pr_ab == 1), pr_ab = \"pr_ab\", x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar ) # How many outliers were detected by different methods out_p <- outs_2 %>% dplyr::select(starts_with(\".\"), -.out_sum) %>% apply(., 2, function(x) sum(x, na.rm = T)) # How many outliers were detected by the sum of different methods? outs_2 %>% dplyr::group_by(.out_sum) %>% dplyr::count() # Let explor where are locate records highlighted as outliers outs_2 %>% dplyr::filter(pr_ab == 1, .out_sum > 0) %>% ggplot(aes(x, y)) + geom_point(aes(col = factor(.out_sum))) + facet_wrap(. ~ factor(.out_sum)) # Comparison of function outputs when using it with # presences-absences or only presences data. bind_rows(out_p, out_pa) # Because the second case only were used presences, outliers methods # based in Random Forest (.out_rf) and Support Vector Machines (.out_svm) # were not performed. }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"function constructs Generalized Additive Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"","code":"esm_gam(data, response, predictors, partition, thr = NULL, k = 3)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function allow categorical variables can construct models continuous variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified k integer. dimension basis used represent smooth term. Default 3. ESM proposed fit models little data, recommend using small values parameter.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"list object : esm_model: list \"gam\" class object mgcv package bivariate model. object can used predicting ensemble small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function fits GAM using mgvc package, Binomial distribution family thin plate regression spline smoothing basis (see ?mgvc::s).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Additive Models based on Ensembles of Small Models approach — esm_gam","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 # Without threshold specification and with kfold esm_gam_t1 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t1$esm_model # bivariate model esm_gam_t1$predictors esm_gam_t1$performance # Test with rep_kfold partition abies2 <- abies2 %>% select(-starts_with(\".\")) set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 esm_gam_t2 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t2$esm_model # bivariate model esm_gam_t2$predictors esm_gam_t2$performance # Test with other bootstrap abies2 <- abies2 %>% select(-starts_with(\".\")) set.seed(10) abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 esm_gam_t3 <- esm_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gam_t3$esm_model # bivariate model esm_gam_t3$predictors esm_gam_t3$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"function constructs Gaussian Process models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"","code":"esm_gau(data, response, predictors, partition, thr = NULL, background = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function allow categorical variables can construct models continuous variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\") partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified background data.frame. Database response column 0 predictors variables. column names must consistent data. Default NULL","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"list object : esm_model: list \"graf\" class object bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"method consists creating bivariate models pair-wise combinations predictors performs ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Gaussian Process models based on Ensembles of Small Models approach — esm_gau","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_gau_t1 <- esm_gau( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_gau_t1$esm_model # bivariate model esm_gau_t1$predictors esm_gau_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"function constructs Generalized Boosted Regression using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"","code":"esm_gbm( data, response, predictors, partition, thr = NULL, n_trees = 100, n_minobsinnode = NULL, shrinkage = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\") partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. case use one threshold type necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. n_trees Integer specifying total number trees fit. equivalent number iterations number basis functions additive expansion. Default 100. n_minobsinnode Integer specifying minimum number observations terminal nodes trees. Note actual number observations, total weight. n_minobsinnode NULL, parameter assume value equal nrow(data)*0.5/4. Default NULL. shrinkage Numeric. parameter applied tree expansion. Also known learning rate step-size reduction; 0.001 0.1 usually works, smaller learning rate typically requires trees. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"list object : esm_model: list \"gbm\" class object gbm package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models based on Ensembles of Small Models approach — esm_gbm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_gbm_t1 <- esm_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, n_trees = 100, n_minobsinnode = NULL, shrinkage = 0.1 ) esm_gbm_t1$esm_model # bivariate model esm_gbm_t1$predictors esm_gbm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"function constructs Generalized Linear Models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"","code":"esm_glm( data, response, predictors, partition, thr = NULL, poly = 0, inter_order = 0 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default value 0.9. user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified poly interger >= 2. used values >= 2 model use polynomials continuous variables (.e. used predictors argument). Default 0. ESM constructed occurrences recommended use polynomials avoid overfitting. inter_order interger >= 0. interaction order explanatory variables. Default 0. ESM constructed occurrences recommended use interaction terms.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"list object : esm_model: list \"glm\" class object stats package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Linear Models based on Ensembles of Small Models approach — esm_glm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_glm_t1 <- esm_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, poly = 0, inter_order = 0 ) esm_glm_t1$esm_model # bivariate model esm_glm_t1$predictors esm_glm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"function constructs Maxent Models using Ensemble Small Model (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"","code":"esm_max( data, response, predictors, partition, thr = NULL, background = NULL, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 2.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables, allow categorical variables Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. background data.frame. Database response column 0 predictors variables. column names must consistent data. Default NULL clamp logical. set TRUE, predictors features restricted range seen model training. classes character. single feature combinations . Features symbolized letters: l (linear), q (quadratic), h (hinge), p (product), t (threshold). Usage classes = \"lpq\". Default \"default\" (see details). pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" regmult numeric. constant adjust regularization. ESM used modeling species records default value 2.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"list object : esm_model: list \"maxnet\" class object maxnet package bivariate model. object can used predicting ensembles small models sdm_predict function. predictors: tibble variables use modeling. performance: Performance metrics (see sdm_eval). threshold dependent metric calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function use default regularization multiplier equal 2.5 (see Breiner et al., 2018) argument “classes” set default MaxEnt use different features combination depending number presences (np) follow rule: np < 10 classes = \"l\", np 10 15 classes = \"lq\", np 15 80 classes = \"lqh\", np >= 80 classes = \"lqph\" presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy Models based on Ensemble of Small of Model approach — esm_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) abies2 set.seed(10) backg2 <- backg %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 100) %>% group_by() backg2 <- part_random( data = backg2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) backg2 # Without threshold specification and with kfold esm_max_t1 <- esm_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL, background = backg2, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) esm_max_t1$esm_model # bivariate model esm_max_t1$predictors esm_max_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"function constructs Neural Networks using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"","code":"esm_net(data, response, predictors, partition, thr = NULL, size = 2, decay = 0)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1) predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified, default used 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified size numeric. Number units hidden layer. Can zero skip-layer units. Default 2. decay numeric. Parameter weight decay. Default 0.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"list object : esm_model: list \"nnet\" class object nnet package bivariate model. object can used predicting ensemble small model sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks based on Ensembles of Small of Models approach — esm_net","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_net_t1 <- esm_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), partition = \".part\", thr = NULL ) esm_net_t1$esm_model # bivariate model esm_net_t1$predictors esm_net_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"function constructs Support Vector Machine models using Ensembles Small Models (ESM) approach (Breiner et al., 2015, 2018).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"","code":"esm_svm( data, response, predictors, partition, thr = NULL, sigma = \"automatic\", C = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). function can construct models continuous variables allow categorical variables. Usage predictors = c(\"aet\", \"cwd\", \"tmin\"). partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 user wants include one threshold type, necessary concatenate threshold types, e.g., thr=c('max_sens_spec', 'max_jaccard'), thr=c('max_sens_spec', 'sensitivity', sens='0.8'), thr=c('max_sens_spec', 'sensitivity'). Function use thresholds threshold specified sigma numeric. Inverse kernel width Radial Basis kernel function \"rbfdot\". Default \"automatic\". C numeric. Cost constraints violation, 'C' constant regularization term Lagrange formulation. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"list object : esm_model: list \"ksvm\" class object ksvm package bivariate model. object can used predicting ensemble small model sdm_predict function. predictors: tibble variables use modeling. performance: Performance metric (see sdm_eval). threshold dependent metric calculated based threshold specified thr argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"method consists creating bivariate models pair-wise combinations predictors perform ensemble based average suitability weighted Somers' D metric (D = 2 x (AUC -0.5)). ESM recommended modeling species occurrences. function allow categorical variables use types variables problematic using occurrences. detail see Breiner et al. (2015, 2018). function constructs 'C-svc' classification type uses Radial Basis kernel \"Gaussian\" function (rbfdot). See details ksvm","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"Breiner, F. T., Guisan, ., Bergamini, ., & Nobis, M. P. (2015). Overcoming limitations modelling rare species using ensembles small models. Methods Ecology Evolution, 6(10), 1210-218. https://doi.org/10.1111/2041-210X.12403 Breiner, F. T., Nobis, M. P., Bergamini, ., & Guisan, . (2018). Optimizing ensembles small models predicting distribution species occurrences. Methods Ecology Evolution, 9(4), 802-808. https://doi.org/10.1111/2041-210X.12957","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/esm_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models based on Ensembles of Small of Models approach — esm_svm","text":"","code":"if (FALSE) { data(\"abies\") require(dplyr) # Using k-fold partition method set.seed(10) abies2 <- abies %>% na.omit() %>% group_by(pr_ab) %>% dplyr::slice_sample(n = 10) %>% group_by() abies2 <- part_random( data = abies2, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Without threshold specification and with kfold esm_svm_t1 <- esm_svm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), partition = \".part\", thr = NULL ) esm_svm_t1$esm_model # bivariate model esm_svm_t1$predictors esm_svm_t1$performance }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"Measure extrapolation comparing environmental data used modeling calibration area model projection. function use Shape metric proposed Velazco et al., 2023","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"","code":"extra_eval( training_data, pr_ab, projection_data, metric = \"mahalanobis\", univar_comb = FALSE, n_cores = 1, aggreg_factor = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"training_data data.frame tibble environmental conditions presence absence (background points pseudo-absences) used constructing models pr_ab character. Column name presence absence (background points pseudo-absences) data (.e., 1 0) projection_data SpatRaster, data.frame tibble environmental condition used projecting model (e.g., larger, encompassing region, spatially separate region, different time period). data.frame tibble used function return tibble object. Otherwise, SpatRaster object. metric character. Metric used measure degree extrapolation. Default = mahalanobis. mahalanobis: Degree extrapolation calculated based Mahalanobis distance. euclidean: Degree extrapolation calculated based Euclidean distance. univar_comb logical. true, function add layer column distinguish univariate (.e., projection data outside range training conditions) combinatorial extrapolation (.e., projection data within range training conditions) using values 1 2, respectively. Default FALSE n_cores numeric. Number cores use parallelization. Default 1 aggreg_factor positive integer. Aggregation factor expressed number cells direction reduce raster resolution. Use value higher 1 useful measuring extrapolation using raster high number cells. resolution output raster object used 'projection_data' argument. Default 1, .e., default, changes made resolution environmental variables.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"SpatRaster tibble object extrapolation values measured Shape metric. Also possible estimate univariate combinatorial extrapolation metric (see `univar_comb` argument).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"function measure model extrapolation base Shape metric (Velazco et al., 2023). Shape model-agnostic approach calculates extrapolation degree given projection data point multivariate distance nearest training data point. distances relativized factor reflects dispersion training data environmental space. Distinct approaches (e.g., MESS-Multivariate Environmental Similarity Surfaces, EO-Environmental Overlap, MOP-Mobility-Oriented Parity, EXDET-Extrapolation Detection, AOA-Area Applicability), Shape incorporates adjustable threshold control binary discrimination acceptable unacceptable extrapolation degrees (see extra_truncate). See vignette flexsdm website details Shape metric, model truncation, tools explore model extrapolation.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"Velazco, S.J.E., Brooke, M.R., De Marco Jr., P., Regan, H.M. Franklin, J. 2023. far can extrapolate species distribution model? Exploring Shape, novel method. Ecography: e06992. https://doi.org/10.1111/ecog.06992","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_eval.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure model extrapolation based on Shape extrapolation metric — extra_eval","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") spp$species %>% unique() sp <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) %>% dplyr::select(x, y, pr_ab) # Calibration area based on some criterion such as dispersal ability ca <- calib_area(sp, x = \"x\", y = \"y\", method = c(\"bmcp\", width = 50000), crs = crs(somevar)) plot(somevar[[1]]) points(sp) plot(ca, add = T) # Sampling pseudo-absences set.seed(10) psa <- sample_pseudoabs( data = sp, x = \"x\", y = \"y\", n = nrow(sp) * 2, method = \"random\", rlayer = somevar, calibarea = ca ) # Merge presences and absences databases to get a complete calibration data sp_pa <- dplyr::bind_rows(sp, psa) sp_pa # Get environmental condition of calibration area sp_pa_2 <- sdm_extract(data = sp_pa, x = \"x\", y = \"y\", env_layer = somevar) sp_pa_2 # Measure degree of extrapolation based on Mahalanobis and # for a projection area based on a SpatRaster object extr <- extra_eval( training_data = sp_pa_2, projection_data = somevar, pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\" ) plot(extr, main = \"Extrapolation pattern\") # Let's fit, predict and truncate a model with extra_truncate sp_pa_2 <- part_random( data = sp_pa_2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) a_model <- fit_glm( data = sp_pa_2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sorensen\") ) predsuit <- sdm_predict(models = a_model, pred = somevar, thr = \"max_sorensen\") predsuit # list with a raster with two layer plot(predsuit[[1]]) # Truncate a model based on a given value of extrapolation # using 'extra_truncate' function par(mfrow = c(1, 2)) plot(extr, main = \"Extrapolation\") plot(predsuit[[1]][[1]], main = \"Suitability\") par(mfrow = c(1, 1)) predsuit_2 <- extra_truncate( suit = predsuit[[1]], extra = extr, threshold = c(50, 100, 200) ) predsuit_2 # a list of continuous and binary models with # different truncated at different extrapolation thresholds plot(predsuit_2$`50`) plot(predsuit_2$`100`) plot(predsuit_2$`200`) ##%######################################################%## #### Measure degree of extrapolation for #### #### projection area based on data.frame #### ##%######################################################%## extr_df <- extra_eval( training_data = sp_pa_2, projection_data = as.data.frame(somevar, xy=TRUE), pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\" ) extr_df # see 'p_extra()' to explore extrapolation or suitability pattern in the # environmental and/or geographical space ##%######################################################%## #### Explore Shape metric with #### #### univariate and combinatorial extrapolation #### ##%######################################################%## extr <- extra_eval( training_data = sp_pa_2, projection_data = somevar, pr_ab = \"pr_ab\", n_cores = 1, aggreg_factor = 1, metric = \"mahalanobis\", univar_comb = TRUE ) extr plot(extr) # In the second layer, values equal to 1 and 2 # depict univariate and combinatorial extrapolation, respectively }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":null,"dir":"Reference","previous_headings":"","what":"Constraint of suitability based on extrapolation — extra_exclude","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"Exclusion suitability values less given extrapolation value (EXPERIMENTAL)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"","code":"extra_exclude(suit, extra, threshold = 50)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"suit SpatRaster suitability values extra SpatRaster extrapolation values measured percentage (output extra_eval function) threshold numeric. Vector one values used correct extrapolation. Default 50% (FUNCTION SET PROJECTED SUITABILITY VALUES LESS THRESHOLD ZERO? UNCLEAR. PLEASE EXPLICIT)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"SpatRaster object corrected suitability values","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_exclude.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constraint of suitability based on extrapolation — extra_exclude","text":"","code":"if (FALSE) { # see examples in extra_eval function }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":null,"dir":"Reference","previous_headings":"","what":"Truncate suitability predictions based on an extrapolation value — extra_truncate","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"Exclusion suitability predictions environmental conditions assumed extrapolative.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"","code":"extra_truncate(suit, extra, threshold = 50, trunc_value = 0)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"suit SpatRaster suitability values extra SpatRaster extrapolation values preferable measured extra_eval function threshold numeric. Vector one extrapolation values used truncate suitability Default 50% trunc_value numeric. Numeric value used cells assumed extrapolative","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"SpatRaster object truncated suitability values","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"Exclusion suitability predictions environmental conditions assumed extrapolative. function possible use metric measuring degree extrapolation (e.g., MESS-Multivariate Environmental Similarity Surfaces, EO-Environmental Overlap, MOP-Mobility-Oriented Parity, EXDET-Extrapolation Detection, AOA-Area Applicability). However, recommend use Shape approach (see extra_eval, Velazco et al., 2023). function truncates suitability predictions assigning given value, generally 0 NA. Usage trunc_value = NA. Default 0. cells assumed extrapolative, .e., higher given threshold given extrapolation metric. See vignette flexsdm website details Shape metric, model truncation, tools explore model extrapolation.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/extra_truncate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Truncate suitability predictions based on an extrapolation value — extra_truncate","text":"","code":"if (FALSE) { # see examples in extra_eval function }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensemble model fitting and validation — fit_ensemble","title":"Ensemble model fitting and validation — fit_ensemble","text":"Ensemble model fitting validation","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensemble model fitting and validation — fit_ensemble","text":"","code":"fit_ensemble( models, ens_method = c(\"mean\", \"meanw\", \"meansup\", \"meanthr\", \"median\"), thr = NULL, thr_model = NULL, metric = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensemble model fitting and validation — fit_ensemble","text":"models list. list models fitted fit_ tune_ function family. Models used ensemble must presences-absences records, partition methods, threshold types. ens_method character. Method used create ensemble different models. vector must provided argument. meansup, meanw pcasup method, necessary provide evaluation metric threshold 'metric' 'thr_model' arguments respectively. default following ensemble methods performed: mean: Simple average different models. meanw: Weighted average models based performance. evaluation metric threshold type must provided. meansup: Average best models (evaluation metric average). evaluation metric must provided. meanthr: Averaging performed cells suitability values selected threshold. median: Median different models. Usage ensemble = \"meanthr\". several ensemble methods implemented necessary concatenate , e.g., ensemble = c(\"meanw\", \"meanthr\", \"median\") thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold criterion. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified, default 0.9. case using one threshold type necessary concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. thr_model character. threshold needed conduct meanw, meandsup, meanthr ensemble methods. mandatory use one threshold, must threshold used fit models used \"models\" argument. Usage thr_model = 'equal_sens_spec' metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, IMAE, BOYCE. Default TSS. Usage metric = BOYCE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensemble model fitting and validation — fit_ensemble","text":"list object : models: list models used performing ensemble. thr_metric: Threshold metric specified function. predictors: tibble quantitative (column names c) qualitative (column names f) variables used models. performance: tibble performance metrics (see sdm_eval). metrics threshold-dependent calculated based threshold specified argument.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensemble model fitting and validation — fit_ensemble","text":"","code":"if (FALSE) { require(dplyr) require(terra) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Species occurrences data(\"spp\") set.seed(1) some_sp <- spp %>% dplyr::filter(species == \"sp2\") %>% sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, variables = names(somevar), filter_na = TRUE ) %>% part_random( data = ., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) # gam mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) # Fit and validate ensemble model mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Additive Models — fit_gam","title":"Fit and validate Generalized Additive Models — fit_gam","text":"Fit validate Generalized Additive Models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Additive Models — fit_gam","text":"","code":"fit_gam( data, response, predictors, predictors_f = NULL, select_pred = FALSE, partition, thr = NULL, fit_formula = NULL, k = -1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Additive Models — fit_gam","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables; factors). Usage predictors_f = c(\"landform\") select_pred logical. Perform predictor selection. Default FALSE. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold types none specified. fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments k integer. dimension basis used represent smooth term. Default -1 (.e., k=10). See help ?mgcv::s.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Additive Models — fit_gam","text":"list object : model: \"gam\" class object mgcv package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Generalized Additive Models — fit_gam","text":"function fits GAM using mgvc package, Binomial distribution family thin plate regression spline smoothing basis (see ?mgvc::s).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Additive Models — fit_gam","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\" ) gam_t1$model gam_t1$predictors gam_t1$performance # Specifying the formula explicitly require(mgcv) gam_t2 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\", fit_formula = stats::formula(pr_ab ~ s(aet) + s(ppt_jja) + s(pH) + landform) ) gam_t2$model gam_t2$predictors gam_t2$performance %>% dplyr::select(ends_with(\"_mean\")) # Using repeated k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 5, replicates = 5) ) abies2 gam_t3 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = \"max_sens_spec\" ) gam_t3 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Gaussian Process models — fit_gau","title":"Fit and validate Gaussian Process models — fit_gau","text":"Fit validate Gaussian Process models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Gaussian Process models — fit_gau","text":"","code":"fit_gau( data, response, predictors, predictors_f = NULL, background = NULL, partition, thr = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Gaussian Process models — fit_gau","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") background data.frame. Database response column 0 predictors variables. column names must consistent data partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), useful threshold-dependent performance metrics. possible use one threshold type. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold criteria none specified.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Gaussian Process models — fit_gau","text":"list object : model: \"graf\" class object. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold criteria specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Gaussian Process models — fit_gau","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 bg <- abies2 bg$pr_ab <- 0 gaup_t1 <- fit_gau( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = bg, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gaup_t1$model gaup_t1$predictors gaup_t1$performance gaup_t1$data_ens # Using bootstrap partition method only with presence-absence abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 5, proportion = 0.7) ) abies2 gaup_t2 <- fit_gau( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(type = c(\"lpt\", \"max_sens_spec\", \"sensitivity\"), sens = \"0.8\") ) gaup_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models — fit_gbm","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"Fit validate Generalized Boosted Regression models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"","code":"fit_gbm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, n_trees = 100, n_minobsinnode = as.integer(nrow(data) * 0.5/4), shrinkage = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. n_trees Integer specifying total number trees fit. equivalent number iterations number basis functions additive expansion. Default 100. n_minobsinnode Integer specifying minimum number observations terminal nodes trees. Note actual number observations, total weight. default value used nrow(data)*0.5/4 shrinkage Numeric. parameter applied tree expansion. Also known learning rate step-size reduction; 0.001 0.1 usually works, smaller learning rate typically requires trees. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"list object : model: \"gbm\" class object gbm package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models — fit_gbm","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 gbm_t1 <- fit_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gbm_t1$model gbm_t1$predictors gbm_t1$performance gbm_t1$data_ens # Using bootstrap partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 gbm_t2 <- fit_gbm( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), partition = \".part\", thr = \"max_sens_spec\" ) gbm_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Linear Models — fit_glm","title":"Fit and validate Generalized Linear Models — fit_glm","text":"Fit validate Generalized Linear Models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Linear Models — fit_glm","text":"","code":"fit_glm( data, response, predictors, predictors_f = NULL, select_pred = FALSE, partition, thr = NULL, fit_formula = NULL, poly = 2, inter_order = 0 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Linear Models — fit_glm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") select_pred logical. Perform predictor selection. TRUE predictors selected based backward step wise approach. Default FALSE. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments poly interger >= 2. used values >= 2 model use polynomials continuous variables (.e. used predictors argument). Default 0. inter_order interger >= 0. interaction order explanatory variables. Default 0.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Linear Models — fit_glm","text":"list object : model: \"glm\" class object stats package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metric calculated based threshold specified thr argument . data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Linear Models — fit_glm","text":"","code":"if (FALSE) { data(\"abies\") abies # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) glm_t1$model glm_t1$predictors glm_t1$performance glm_t1$data_ens # Using second order polynomial terms and first-order interaction terms glm_t2 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 2, inter_order = 1 ) # Using repeated k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) abies2 # Using third order polynomial terms and second-order interaction terms glm_t3 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"ppt_jja\", \"pH\", \"awc\"), predictors_f = c(\"landform\"), select_pred = FALSE, partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 3, inter_order = 2 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy models — fit_max","title":"Fit and validate Maximum Entropy models — fit_max","text":"Fit validate Maximum Entropy models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy models — fit_max","text":"","code":"fit_max( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, background = NULL, thr = NULL, clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy models — fit_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables. See maxnet.formula function maxnet package. Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. background data.frame. Database including rows 0 values response column predictors variables. column names must consistent data. Default NULL thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity values specified default used 0.9. one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. clamp logical. TRUE, predictors features restricted range seen model training. classes character. single feature combinations . Features symbolized letters: l (linear), q (quadratic), h (hinge), p (product), t (threshold). Usage classes = \"lpq\". Default \"default\" (see details). pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" regmult numeric. constant adjust regularization. Default 1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy models — fit_max","text":"list object : model: \"maxnet\" class object maxnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified thr argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy models — fit_max","text":"argument “classes” set default MaxEnt use different features combination depending number presences (np) follow rule: np < 10 classes = \"l\", np 10 15 classes = \"lq\", np 15 80 classes = \"lqh\", np >= 80 classes = \"lqph\" presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy models — fit_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") abies # environmental conditions of presence-absence data backg # environmental conditions of background points # Using k-fold partition method # Note that the partition method, number of folds or replications must # be the same for presence-absence and background points datasets abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) backg2 max_t1 <- fit_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), clamp = TRUE, classes = \"default\", pred_type = \"cloglog\", regmult = 1 ) length(max_t1) max_t1$model max_t1$predictors max_t1$performance max_t1$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks models — fit_net","title":"Fit and validate Neural Networks models — fit_net","text":"Fit validate Neural Networks models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks models — fit_net","text":"","code":"fit_net( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, size = 2, decay = 0.1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks models — fit_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Defaul NULL. partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1)., needed threshold-dependent performance metrics. one threshold type can specified. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. size numeric. Number units hidden layer. Can zero skip-layer units. Default 2. decay numeric. Parameter weight decay. Default 0.1.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks models — fit_net","text":"list object : model: \"nnet.formula\" \"nnet\" class object nnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metric calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks models — fit_net","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 nnet_t1 <- fit_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) nnet_t1$model nnet_t1$predictors nnet_t1$performance nnet_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 nnet_t2 <- fit_net( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) nnet_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Random Forests models — fit_raf","title":"Fit and validate Random Forests models — fit_raf","text":"Fit validate Random Forests models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Random Forests models — fit_raf","text":"","code":"fit_raf( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, mtry = sqrt(length(c(predictors, predictors_f))) )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Random Forests models — fit_raf","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard highest. max_sorensen: threshold Sorensen highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. mtry numeric. Number variables randomly sampled candidates split. Default sqrt(length(c(predictors, predictors_f)))","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Random Forests models — fit_raf","text":"list object : model: \"randomForest\" class object randomForest package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metrics (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_raf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Random Forests models — fit_raf","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 rf_t1 <- fit_raf( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) rf_t1$model rf_t1$predictors rf_t1$performance rf_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 rf_t2 <- fit_raf( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) rf_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models — fit_svm","title":"Fit and validate Support Vector Machine models — fit_svm","text":"Fit validate Support Vector Machine models","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models — fit_svm","text":"","code":"fit_svm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, thr = NULL, sigma = \"automatic\", C = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models — fit_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments partition character. Column name training validation partition groups. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenated, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. sigma numeric. Inverse kernel width Radial Basis kernel function \"rbfdot\". Default \"automatic\". C numeric. Cost constraints violation, 'C'-constant regularization term Lagrange formulation. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models — fit_svm","text":"list object : model: \"ksvm\" class object kernlab package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Performance metric (see sdm_eval). Threshold dependent metrics calculated based threshold specified argument. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Support Vector Machine models — fit_svm","text":"function constructs 'C-svc' classification type uses Radial Basis kernel \"Gaussian\" function (rbfdot). See details details ksvm.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/fit_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models — fit_svm","text":"","code":"if (FALSE) { data(\"abies\") # Using k-fold partition method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) abies2 svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) names(svm_t1) svm_t1$model svm_t1$predictors svm_t1$performance svm_t1$data_ens # Using bootstrap partition method and only with presence-absence # and get performance for several method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 10, proportion = 0.7) ) abies2 svm_t2 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), fit_formula = NULL ) svm_t2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":null,"dir":"Reference","previous_headings":"","what":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"Transform spatial partition layer spatial properties environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"","code":"get_block(env_layer, best_grid)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"env_layer SpatRaster object environmental variables used block_partition band_partition function. Function always select first layer best_grid SpatRaster object returned block_partition band_partition","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"SpatRaster layer resolution extent environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"Transform layer originating function block_partition band_partition spatial properties environmental variables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/get_block.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Transform a spatial partition layer to the same spatial properties as environmental variables — get_block","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example for a single species single_spp <- spp %>% dplyr::filter(species == \"sp3\") part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 100, max_res_mult = 500, num_grids = 10, min_occ = 5, n_part = 2 ) grid_env <- get_block(env_layer = somevar, best_grid = part$grid) grid_env part$grid plot(part$grid) plot(grid_env) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"data set containing localities Hesperocyparis stephensonii species California, USA","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"","code":"hespero"},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"tibble object 14 rows 4 variables: ID presences records ID x y columns coordinates Albers Equal Area Conic coordinate system pr_ab presence denoted 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/hespero.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing localities of Hesperocyparis stephensonii species in California, USA — hespero","text":"","code":"if (FALSE) { require(dplyr) data(\"hespero\") hespero }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":null,"dir":"Reference","previous_headings":"","what":"Homogenize cells with NAs across all layers — homogenize_na","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"Homogenize cells NAs across layers","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"","code":"homogenize_na(x)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"x SpatRaster.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"SpatRaster","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"Homogenize cells NAs across layers SpatRaster resulting SpatRaster layers cells NAa","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/homogenize_na.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Homogenize cells with NAs across all layers — homogenize_na","text":"","code":"if (FALSE) { #' require(terra) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) somevar2 <- homogenize_na(somevar) par(mfrow = c(2, 1)) plot(somevar$CFP_4) plot(somevar2$CFP_4) par(mfrow = c(1, 1)) # In somevar2 all layers have the same cells with NAs }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":null,"dir":"Reference","previous_headings":"","what":"Raster interpolation between two time periods — interp","title":"Raster interpolation between two time periods — interp","text":"function interpolates values year two specified years simple interpolation using two raster objects containing e.g. habitat suitability values predicted using species distribution model.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Raster interpolation between two time periods — interp","text":"","code":"interp(r1, r2, y1, y2, rastername = NULL, dir_save = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Raster interpolation between two time periods — interp","text":"r1 SpatRaster. Raster object initial year r2 SpatRaster. Raster object final year y1 numeric. Initial year y2 numeric. Final year rastername character. Word used prefix raster file name. Default NULL dir_save character. Directory path name folder raster files saved. NULL, function return SpatRaster object, else, save raster given directory. Default NULL","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Raster interpolation between two time periods — interp","text":"dir_save NULL, function returns SpatRaster suitability interpolation year. dir_save used, function outputs saved directory specified dir_save.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Raster interpolation between two time periods — interp","text":"function interpolates suitability values assuming annual changes suitability linear. function useful linking SDM output based averaged climate data climate change scenarios models require suitability values disaggregated time periods, population dynamics (Keith et al., 2008; Conlisk et al., 2013; Syphard et al., 2013).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Raster interpolation between two time periods — interp","text":"Keith, D.., Akçakaya, H.R., Thuiller, W., Midgley, G.F., Pearson, R.G., Phillips, S.J., Regan, H.M., Araujo, M.B. & Rebelo, T.G. (2008) Predicting extinction risks climate change: coupling stochastic population models dynamic bioclimatic habitat models. Biology Letters, 4, 560-563. Conlisk, E., Syphard, .D., Franklin, J., Flint, L., Flint, . & Regan, H.M. (2013) Management implications uncertainty assessing impacts multiple landscape-scale threats species persistence using linked modeling approach. Global Change Biology 3, 858-869. Syphard, .D., Regan, H.M., Franklin, J. & Swab, R. (2013) functional type vulnerability multiple threats depend spatial context Mediterranean-climate regions? Diversity Distributions, 19, 1263-1274.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/interp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Raster interpolation between two time periods — interp","text":"","code":"if (FALSE) { require(terra) require(dplyr) f <- system.file(\"external/suit_time_step.tif\", package = \"flexsdm\") abma <- terra::rast(f) plot(abma) int <- interp( r1 = abma[[1]], r2 = abma[[2]], y1 = 2010, y2 = 2020, rastername = \"Abies\", dir_save = NULL ) int }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":null,"dir":"Reference","previous_headings":"","what":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"methods reduce overprediction species distribution models based posteriori methods (see Mendes et al 2020), .e., combination patterns species occurrences predicted suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"","code":"msdm_posteriori( records, x, y, pr_ab, cont_suit, method = c(\"obr\", \"pres\", \"lq\", \"mcp\", \"bmcp\"), thr = \"equal_sens_spec\", buffer = NULL, crs = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"records tibble data.frame. database spatial coordinates species presences absences (pseudo-absence) used create species distribution models. x character. Column name spatial x coordinates. y character. Column name spatial y coordinates. pr_ab character. Column name presence absence data (.e. 1 0) cont_suit SpatRaster. Raster continuous suitability predictions \"species_specific\" type calculates minimum pairwise-distances occurrences selects maximum distance, .e., value buffer maximum distance minimum distance. procedure depends spatial pattern species' occurrences; thus, species, value buffer width calculated (usage buffer=\"species_specific\"). method character. character string indicating constraint method used. thr character numeric. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. one threshold type can specified. necessary provide vector argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 Also, possible specifying threshold value using numeric values (thr = 0.623). Default \"equal_sens_spec\". buffer numeric. Buffer width use 'bmcp' approach. buffer width interpreted m Coordinate reference system used \"crs\" argument longitude/latitude, map units cases. Usage buffer=50000. Default NULL crs character. Coordinate reference system used calculating buffer \"bmcp\" method.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"function return SpatRaster continuous binary prediction.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"function help reduce overprediction species distribution models based combination patterns species occurrences predicted suitability. recommended use approaches current distribution models projected different time periods (past future). Five methods implemented: Abbreviation list SDM: species distribution model l: suitability patches intercept species occurrences k: suitability patches intercept species occurrences T: threshold distances used select suitability patches methods reduce overprediction species distribution models already fitted based occurrences suitability patterns species (see 'thr' arguments) Method 'obr' (Occurrences Based Restriction). method assumes suitable patches intercepting species occurrences (l) likely part species distributions suitable patches intercept occurrence (k). Distance k patches closest l patch calculated, k patches removed exceed species-specific distance threshold SDMs models. threshold (T) calculated maximum distance vector minimum pairwise distances occurrences. Whenever suitable pixel within k patch distance T closest l patch, suitability pixel reduced zero. assumed simple threshold surrogate species-specific dispersal ability. T low, either species sampled throughout distribution, species geographically restricted, justifying narrow inclusion k patches (Mendes et al., 2020). Method 'pres' (occurrences based restriction). restrictive variant 'obr' method. retains pixels suitability patches intercepting occurrences (k) (Mendes et al., 2020). Method 'lq' (Lower Quantile). method similar 'obr' method, except procedure define distance threshold withdrawn k patches, lower quartile distance k patches closest l patch. Whenever suitable pixel within k patch, .e., within lower quartile, suitability pixel reduced zero. means 75% k patches withdrawn model (Mendes et al., 2020). Method 'mcp' (Minimum Convex Polygon). Compiled adapted Kremen et al. (2008), method excludes SDM predictions suitable pixels intercept minimum convex polygon, interior angles smaller 180, enclosing occurrences species. Method 'bmcp' (Buffered Minimum Convex Polygon). Compiled adapted Kremen et al. (2008), similar 'mcp' method except inclusion buffer zone surrounding minimum convex polygons. method buffer width value must provided \"buffer\" argument CRS \"crs\" argument. methodological performance information methods see Mendes et al. (2020). using one constraining methods, cite Mendes et al (2020).","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"Mendes, P.; Velazco S.J.E.; Andrade, .F..; De Marco, P. (2020) Dealing overprediction species distribution models: adding distance constraints can improve model accuracy, Ecological Modelling, press. https://doi.org/10.1016/j.ecolmodel.2020.109180 Kremen, C., Cameron, ., Moilanen, ., Phillips, S. J., Thomas, C. D., Beentje, H., . Zjhra, M. L. (2008). Aligning Conservation Priorities Across Taxa Madagascar High-Resolution Planning Tools. Science, 320(5873), 222-226. doi:10.1126/science.1155193","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_posteriori.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Methods to correct overprediction of species distribution models based on occurrences and suitability patterns. — msdm_posteriori","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Preparing data for modeling a species set.seed(10) occ <- spp %>% dplyr::filter(species == \"sp2\") %>% # filter a species sdm_extract( data = ., x = \"x\", y = \"y\", env_layer = somevar, filter_na = TRUE ) %>% # extrac variables values part_random(., pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) # add columns with partition # Fit a model m_glm <- fit_glm( data = occ, response = \"pr_ab\", predictors = names(somevar), partition = \".part\", thr = \"equal_sens_spec\", ) # Lets predict this model m_pred <- sdm_predict(models = m_glm, pred = somevar, thr = NULL, con_thr = FALSE) plot(m_pred[[1]]) m_pred[[1]] %>% plot() # Lets extract the raster from this list m_pred <- m_pred[[1]] ### bmcp method m_bmcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"bmcp\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = 30000, crs=crs(m_pred) ) plot(m_bmcp) ### mcp method m_mcp <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"mcp\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_mcp) ### pres method m_pres <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"pres\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_pres) ### lq method m_lq <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"lq\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_lq) ### obr method m_obr <- msdm_posteriori( records = occ, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", method = \"obr\", cont_suit = m_pred, thr = \"equal_sens_spec\", buffer = NULL ) plot(m_obr) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":null,"dir":"Reference","previous_headings":"","what":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"","code":"msdm_priori(data, x, y, method = c(\"xy\", \"min\", \"cml\", \"ker\"), env_layer)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"data tibble data.frame. database geographical coordinates species presences. x character. Column name spatial x coordinates. y character. Column name spatial y coordinates. method character. character string indicating MSDM method used. following methods available: 'xy', 'min', 'cml', 'ker'. Usage method = 'cml' env_layer raster layer used construct species distribution models. object used create constraining variables resolution, extent, pattern empty cells environmental variables. advisable use raster environmental layer used create species distribution models avoid mismatch (e.g. resolution, extent, cells NA) environmental constraining variables.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function returns SpatRaster object. raster/s used together environmental variables construct species distribution models. 'xy' approach creates single pair raster layers can used species share study region. Otherwise, 'cml', 'min', 'ker' create species-specific raster layer.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"function creates geographical predictor variables , together environmental variables, can used construct constrained species distribution models. recommended use approaches create models projected current conditions different time periods (past future). Four methods implemented: xy (Latlong method). method assumes spatial structure can partially explain species distribution (Bahn & McGill, 2007). Therefore, two raster layers created, containing latitude longitude pixels, respectively. raster layers included covariates environmental layers construct species distribution models. method interact species occurrence generic given study region; reason, possible use method species set share study region. min (Nearest neighbor distance method). Compiled adapted Allouche et al. (2008), method calculates cell Euclidean geographic distance nearest presence point. cml (Cumulative distance method). Compiled adapted Allouche et al. (2008), method assumes pixels closer presences likely included species distributions. Therefore, raster layer created containing sum Euclidean geographic distances pixel occurrences species. Obtained values normalized vary zero one. raster layer included environmental layers construct species distribution models. ker (Kernel method). Compiled adapted Allouche et al. (2008), method, like cml, assumes pixels located areas higher density occurrences likely included actual species distribution. Thus, raster layer created containing Gaussian values based density occurrences species. standard deviation Gaussian distribution maximum value vector minimum distances pairs occurrences species. Gaussian values normalized vary zero one. raster layer included environmental layers construct species distribution models. See Mendes et al. (2020) methodological performance details. used one constraining method cite Mendes et al 2020.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"Mendes, P.; Velazco S.J.E.; Andrade, .F..; De Marco, P. (2020) Dealing overprediction species distribution models: adding distance constraints can improve model accuracy, Ecological Modelling, press. https://doi.org/10.1016/j.ecolmodel.2020.109180 Allouche, O.; Steinitz, O.; Rotem, D.; Rosenfeld, .; Kadmon, R. (2008). Incorporating distance constraints species distribution models. Journal Applied Ecology, 45(2), 599-609. doi:10.1111/j.1365-2664.2007.01445.x Bahn, V.; McGill, B. J. (2007). Can niche-based distribution models outperform spatial interpolation? Global Ecology Biogeography, 16(6), 733-742. doi:10.1111/j.1466-8238.2007.00331.x","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/msdm_priori.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create spatial predictor variables to reduce overprediction of species distribution models — msdm_priori","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Select the presences of a species occ <- spp %>% dplyr::filter(species == \"sp3\", pr_ab == 1) # Select a raster layer to be used as a basic raster a_variable <- somevar[[1]] plot(a_variable) points(occ %>% dplyr::select(x, y)) ### xy method m_xy <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"xy\", env_layer = a_variable ) plot(m_xy) ### min method m_min <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"min\", env_layer = a_variable ) plot(m_min) ### cml method m_cml <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"cml\", env_layer = a_variable ) plot(m_cml) ### ker method m_ker <- msdm_priori( data = occ, x = \"x\", y = \"y\", method = \"ker\", env_layer = a_variable ) plot(m_ker) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform environmental filtering on species occurrences — occfilt_env","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"function perform filtering species occurrences based environmental conditions.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"","code":"occfilt_env(data, x, y, id, env_layer, nbins)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"data data.frame. Data.frame tibble object presences (presence-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates id character. Column names rows id. important row unique code. env_layer SpatRaster. Rasters environmental conditions nbins integer. number classes used split environmental condition","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"tibble object data environmentally filtered","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"function uses approach adapted approach proposed Varela et al. (2014). consists filtering occurrences environmental space. First, regular multidimensional grid created environmental space. multidimensional grid determined environmental variables (always use continuous variables) grid cell size defined number bins, used dividing variable range interval classes (Varela et al. 2014; Castellanos et al., 2019). number bins set \"nbins\" argument. , single occurrence randomly selected within cell multidimensional grid. Consider trade-number bins number filtered records number bins decreases, cell size grids increases, number filtered records decreases (Castellanos et al., 2019). occfilt_env works number dimensions (variables) original variables without performing PCA beforehand. greater number predictor variables (.e., number dimensions multidimensional environmental grid) greater number bins, greater time processing computer memory used. Therefore, recommended use small number bins 2-5 ten variables used. Environmental filters sensitive number bins. procedure selecting number bins used Velazco et al. (2020). selection consists testing different numbers bins, calculating average spatial autocorrelation among variables (based Moran’s index), selecting lowest average spatial autocorrelation highest number occurrences. Note greater number bins, greater records retained","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"Castellanos, . ., Huntley, J. W., Voelker, G., & Lawing, . M. (2019). Environmental filtering improves ecological niche models across multiple scales. Methods Ecology Evolution, 10(4), 481-492. https://doi.org/10.1111/2041-210X.13142 Varela, S., Anderson, R. P., Garcia-Valdes, R., & Fernandez-Gonzalez, F. (2014). Environmental filters reduce effects sampling bias improve predictions ecological niche models. Ecography, 37, 1084-1091. https://doi.org/10.1111/j.1600-0587.2013.00441.x Velazco, S. J. E., Svenning, J-C., Ribeiro, B. R., & Laureto, L. M. O. (2020). opportunities threats conserve phylogenetic diversity Neotropical palms. Diversity Distributions, 27, 512–523. https://doi.org/10.1111/ddi.13215","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_env.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform environmental filtering on species occurrences — occfilt_env","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) plot(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) somevar[[1]] %>% plot() points(spp1 %>% select(x, y)) spp1$idd <- 1:nrow(spp1) # split environmental variables into 5 bins filtered_1 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 5 ) # split into 8 bins filtered_2 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 8 ) # split into 12 bins filtered_3 <- occfilt_env( data = spp1, x = \"x\", y = \"y\", id = \"idd\", env_layer = somevar, nbins = 12 ) # note that the higher the nbins parameter the more # classes must be processed (4 variables, 30 bins = 923521 classes) # While the greater the greater the number of bins, the greater records retained }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform geographical filtering on species occurrences — occfilt_geo","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"function perform geographical filtering species occurrences based different approach define minimum nearest-neighbor distance points.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"","code":"occfilt_geo( data, x, y, env_layer, method, prj = \"+proj=longlat +datum=WGS84\", reps = 20 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"data data.frame. Data.frame tibble object presences (presence-absence) records, coordinates x character. Column name longitude data y character. Column name latitude data env_layer SpatRaster. Raster variables used fit model method character. Method perform geographical thinning. Pairs points filtered based geographical distance criteria.following methods available: moran: records filtered based smallest distance reduces Moran's values lower 0.1. Latlong = TRUE occurrences geographical projection. Usage method: method = c('moran'). cellsize: records filtered based resolution environmental variables can aggregated coarser resolution defined factor. Usage method: method = c('cellsize', factor = '2'). defined: records filtered based distance value (d) provided km. Usage method: method = c('defined', d = 300). prj character. Projection string (PROJ4) occurrences. necessary projection used WGS84 (\"+proj=longlat +datum=WGS84\"). Default \"+proj=longlat +datum=WGS84\" reps integer. Number times repeat thinning process. Default 20","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"tibble object data filtered geographically","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"function three alternatives implemented determine distance threshold pair points: 1-\"moran\" determines minimum nearest-neighbor distance minimizes spatial autocorrelation occurrence data, following Moran's semivariogram. Principal Component Analysis environmental variables performed first Principal Component used calculate semivariograms. , method allow use continuous variables. Sometimes, method can () greatly reduce number presences. 2-\"cellsize\" filters occurrences based predictors' resolution. method calculate distance first two cells environmental variable use distance minimum nearest-neighbor distance filter occurrences. resolution raster aggregated based values used \"factor\". Thus, distance used filtering can adjusted represent larger grid size. 3-\"determined\" method uses minimum nearest-neighbor distance specified km. third method \"thin\" function spThin package used (Aiello-Lammens et al., 2015) following argument settings reps = 20, write.files = FALSE, locs.thinned.list.return = TRUE, write.log.file = FALSE.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"Aiello-Lammens, M. E., Boria, R. ., Radosavljevic, ., Vilela, B., & Anderson, R. P. (2015). spThin: R package spatial thinning species occurrence records use ecological niche models. Ecography, 38(5), 541-545. https://doi.org/10.1111/ecog.01132","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/occfilt_geo.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform geographical filtering on species occurrences — occfilt_geo","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Environmental variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) plot(somevar) # Species occurrences data(\"spp\") spp spp1 <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) somevar[[1]] %>% plot() points(spp1 %>% select(x, y)) # Using Moran method filtered_1 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"moran\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_1 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data # Using cellsize method filtered_2 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"cellsize\", factor = \"3\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_2 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data # Using defined method filtered_3 <- occfilt_geo( data = spp1, x = \"x\", y = \"y\", env_layer = somevar, method = c(\"defined\", d = \"30\"), prj = crs(somevar) ) somevar[[1]] %>% plot(col = gray.colors(10)) points(spp1 %>% select(x, y)) # raw data points(filtered_3 %>% select(x, y), pch = 19, col = \"yellow\") # filtered data }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":null,"dir":"Reference","previous_headings":"","what":"Conventional data partitioning methods — part_random","title":"Conventional data partitioning methods — part_random","text":"function provides different conventional (randomized, non-spatial) partitioning methods based cross validation folds (kfold, rep_kfold, loocv), well bootstrap (boot)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Conventional data partitioning methods — part_random","text":"","code":"part_random(data, pr_ab, method = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Conventional data partitioning methods — part_random","text":"data data.frame. Database presences, presence-absence, pseudo-absence, records given species pr_ab character. Column name \"data\" presences, presence-absence, pseudo-absence. Presences must represented 1 absences 0 method character. Vector data partitioning method used. Usage part=c(method= 'kfold', folds='5'). Methods include: kfold: Random partitioning k-folds cross-validation. 'folds' refers number folds data partitioning, assumes value >=1. Usage method = c(method = \"kfold\", folds = 10). rep_kfold: Random partitioning repeated k-folds cross-validation. Usage method = c(method = \"rep_kfold\", folds = 10, replicates=10). 'folds' refers number folds data partitioning, assumes value >=1. 'replicate' refers number replicates, assumes value >=1. loocv: Leave-one-cross-validation (.k.. Jackknife). special case k-fold cross validation number partitions equal number records. Usage method = c(method = \"loocv\"). boot: Random bootstrap partitioning. Usage method=c(method='boot', replicates='2', proportion='0.7'). 'replicate' refers number replicates, assumes value >=1. 'proportion' refers proportion occurrences used model fitting, assumes value >0 <=1. example proportion='0.7' mean 70% data used model training, 30% used model testing.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Conventional data partitioning methods — part_random","text":"tibble object information used 'data' argument additional columns named .part containing partition groups. rep_kfold boot method return many \".part\" columns replicated defined. rest methods, single .part column returned. kfold, rep_kfold, loocv partition methods, groups defined integers. contrast, boot method, partition groups defined characters 'train' 'test'.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Conventional data partitioning methods — part_random","text":"Fielding, . H., & Bell, J. F. (1997). review methods assessment prediction errors conservation presence/absence models. Environmental Conservation, 24(1), 38-49. https://doi.org/10.1017/S0376892997000088","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_random.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Conventional data partitioning methods — part_random","text":"","code":"if (FALSE) { data(\"abies\") abies$partition <- NULL abies <- tibble(abies) # K-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 10) ) abies2 # Repeated K-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 10, replicates = 10) ) abies2 # Leave-one-out cross-validation (loocv) method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"loocv\") ) abies2 # Bootstrap method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"boot\", replicates = 50, proportion = 0.7) ) abies2 abies2$.part1 %>% table() # Note that for this method .partX columns have train and test words. }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial band cross-validation — part_sband","title":"Spatial band cross-validation — part_sband","text":"function explores different numbers spatial bands returns suitable value given presence presence-absence database. selection best number bands performed automatically considering spatial autocorrelation, environmental similarity, number presence absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial band cross-validation — part_sband","text":"","code":"part_sband( env_layer, data, x, y, pr_ab, type = \"lon\", n_part = 2, min_bands = 2, max_bands = 20, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial band cross-validation — part_sband","text":"env_layer SpatRaster. Raster environmental variable. Used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous environmental variables data data.frame. Data.frame tibble object presences (presence-absence, presence-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presences, presence-absence, -pseudo-absence. Presences must represented 1 absences 0 type character. Specify bands across different degrees longitude 'lon' latitude 'lat'. Default 'lon'. n_part integer. Number partition. Default 2, values 2 yet implemented. min_bands integer. Minimum number spatial bands tested, default 2. max_bands integer. Maximum number spatial bands tested, default 20. min_occ numeric. Minimum number presences absences partition fold. min_occ value base number predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion points used testing autocorrelation groups (values > 0 <=1). smaller number , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial band cross-validation — part_sband","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number best partition (n_grid), number bands (n_bands), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto), environmental similarity based Euclidean distance (env_sim). grid: SpatRaster object bands","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spatial band cross-validation — part_sband","text":"part_sbands function allows testing different numbers partitions using range latitudinal longitudinal bands. function explores range numbers bands given number partitions automatically selects best number bands given presence, presence-absences, presence-pseudo-absences dataset. Selection number bands based optimization procedure explores partitions three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among partition groups (Standard Deviation - SD; Velazco et al., 2019). procedure iterative; first select partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances, difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefits partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions groups, iii) controls selection partitions little data may problematic model fitting (\"min_occ\" argument). Partitions geographically structured tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models used projections regions outside calibration area time periods. Band partitions can option species best partition found part_sblock species distributed linearly (e.g., species inhabit coastlines). function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Spatial band cross-validation — part_sband","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sband.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial band cross-validation — part_sband","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example of two longitudinal partitions with presences and absences single_spp <- spp %>% dplyr::filter(species == \"sp1\") part_1 <- part_sband( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lon\", min_bands = 2, max_bands = 20, n_part = 2, min_occ = 10, prop = 0.5 ) part_1$part # database with partition fold (.part) part_1$part %>% group_by(pr_ab, .part) %>% count() # number of presences and absences in each fold part_1$best_part_info # information of the best partition part_1$grid # raster with folds # Explore grid object and presences and absences points plot(part_1$grid, col = gray.colors(20)) points(part_1$part[c(\"x\", \"y\")], col = rainbow(8)[part_1$part$.part], cex = 0.9, pch = c(1, 19)[part_1$part$pr_ab + 1] ) # Example of four latitudinal partition and only presences single_spp <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) part_2 <- part_sband( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", type = \"lat\", min_bands = 8, max_bands = 40, n_part = 8, min_occ = 10, prop = 0.5 ) part_2$part part_2$best_part_info part_2$grid # Explore Grid object and presences points plot(part_2$grid, col = gray.colors(20)) points(part_2$part[c(\"x\", \"y\")], col = rainbow(8)[part_2$part$.part], cex = 0.5, pch = 19 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial block cross-validation — part_sblock","title":"Spatial block cross-validation — part_sblock","text":"function explores spatial blocks different cell sizes returns suitable size given presence presence-absence database. selection best grid size performed automatically considering spatial autocorrelation, environmental similarity, number presence absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial block cross-validation — part_sblock","text":"","code":"part_sblock( env_layer, data, x, y, pr_ab, n_part = 3, min_res_mult = 3, max_res_mult = 200, num_grids = 30, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial block cross-validation — part_sblock","text":"env_layer SpatRaster. Raster environmental variable. Used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous environmental variables data data.frame. Data.frame tibble object presence (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presence, presence-absence, pseudo-absence records. Presences must represented 1 absences 0 n_part integer. Number partition. Default 2. min_res_mult integer. Minimum value used multiplying raster resolution define finest resolution tested, default 3. max_res_mult integer. Maximum value used multiplying raster resolution define coarsest resolution tested, default 200. num_grids integer. Number grid tested min_res_mult X (raster resolution) max_res_mult X (raster resolution), default 30 min_occ numeric. Minimum number presences absences partition fold. min_occ value base amount predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion point used testing autocorrelation groups (values > 0 <=1). smaller proportion , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial block cross-validation — part_sblock","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number best partition (n_grid), cell size (cell_size), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto), environmental similarity based Euclidean distance (env_sim). grid: SpatRaster object blocks","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spatial block cross-validation — part_sblock","text":"part_sblock allows test different numbers partitions using square blocks (like checkerboard). function explores range block sizes automatically selects best size given given presence, presence-absences, presence-pseudo-absences dataset. Number partition selection based optimization procedure explores partition size three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among partition groups (Standard Deviation - SD; Velazco et al., 2019). procedure iteratively select partitions, first partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefit partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions, iii) controls selection partitions data may problematic model fitting (\"min_occ\" argument). Geographically structured partitions tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models used projections regions outside calibration area time periods. function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Spatial block cross-validation — part_sblock","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_sblock.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial block cross-validation — part_sblock","text":"","code":"if (FALSE) { require(terra) require(dplyr) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Example for one single species single_spp <- spp %>% dplyr::filter(species == \"sp3\") part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 2, min_occ = 5, prop = 0.5 ) part part$part # database with partition fold (.part) part$part %>% group_by(pr_ab, .part) %>% count() # number of presences and absences in each fold part$best_part_info # information of the best partition part$grid # raster with folds # Explore the Grid object plot(part$grid) points(part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[part$part$.part], cex = 0.5, pch = 19 ) terra::res(part$grid) terra::res(somevar) # Note that this is a layer with block partition, but it has a # different resolution than the original environmental variables. # If you wish have a layer with the same properties # (i.e. resolution, extent, NAs) as your original environmental # variables you can use the \\code{\\link{get_block}} function. grid_env <- get_block(env_layer = somevar, best_grid = part$grid) plot(grid_env) # this is a block layer with the same layer # properties as environmental variables. points(part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\")[part$part$.part], cex = 0.5, pch = 19 ) # This layer is very useful if you need to sample # pseudo_absence or background point # See examples in \\code{\\link{backgroudp}} and \\code{\\link{pseudoabs}} # Example of a higher number of partitions part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 4, min_occ = 2, prop = 0.5 ) # Explore the Grid object plot(part$grid, col = gray.colors(4)) points(part$part[c(\"x\", \"y\")], col = rainbow(n = 4)[part$part$.part], cex = 0.5, pch = 19 ) # Using these functions with several species spp2 <- split(spp, spp$species) class(spp2) length(spp2) names(spp2) part_list <- lapply(spp2, function(x) { result <- part_sblock( env_layer = somevar, data = x, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 2, min_occ = 5, prop = 0.5 ) result }) part_list$sp3 # For this dataset a suitable partition was not found # Create a single database for all species occ_part <- lapply(part_list, function(x) { if (!length(x) > 0) { x[[1]] } }) %>% dplyr::bind_rows(.id = \"species\") occ_part # Get the best grid info for all species grid_info <- dplyr::bind_rows(lapply( part_list, function(x) x[[2]] ), .id = \"species\") # Get the best grid layer for all species grid_layer <- lapply(part_list, function(x) x$grid) grid_layer2 <- lapply(grid_layer, function(x) { get_block(env_layer = somevar[[1]], best_grid = x) }) grid_layer2 <- terra::rast(grid_layer2) grid_layer2 plot(grid_layer2) # Block partition for presences-only database single_spp <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) single_spp single_spp$pr_ab %>% unique() # only presences part <- part_sblock( env_layer = somevar, data = single_spp, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 10, max_res_mult = 500, num_grids = 30, n_part = 4, min_occ = 10, prop = 0.5 ) part$part %>% dim() part$best_part_info part$grid plot(part$grid) points( part$part[c(\"x\", \"y\")], col = c(\"blue\", \"red\", \"green\", \"black\")[part$part$.part], cex = 0.5, ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":null,"dir":"Reference","previous_headings":"","what":"Environmental and spatial cross-validation — part_senv","title":"Environmental and spatial cross-validation — part_senv","text":"function explores different numbers environmental partitions (clusters) based K-means clustering algorithm returns number partitions best suited given presence, presence-absences, presence-pseudo-absences database. Selection best number partitions performed automatically considering spatial autocorrelation, environmental similarity, number presence /absence records partition.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Environmental and spatial cross-validation — part_senv","text":"","code":"part_senv( env_layer, data, x, y, pr_ab, min_n_groups = 2, max_n_groups = 10, min_occ = 10, prop = 0.5 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Environmental and spatial cross-validation — part_senv","text":"env_layer SpatRaster. Raster environmental variable. used evaluate spatial autocorrelation environmental similarity training testing partitions. function calculate dissimilarity based Euclidean distances, can used continuous variables data data.frame. Data.frame tibble object presence (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column presences, presence-absence, pseudo-absence. Presences must represented 1 absences 0 min_n_groups integer. Minimum number groups tested. Default 2. max_n_groups integer. Maximum number groups tested. Default 10. min_occ numeric. Minimum number presences absences partition fold. min_occ value base amount predictors order avoid -fitting error fitting models given fold. Default 10. prop numeric. Proportion point used testing autocorrelation groups (values > 0 <=1). smaller number , faster function work. Default 0.5","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Environmental and spatial cross-validation — part_senv","text":"list : part: tibble object information used 'data' arguments additional column .part partition group. best_part_info: tibble information best partition. contains number partition (n_groups), standard deviation presences (sd_p), standard deviation absences (sd_a), Moran's spatial autocorrelation (spa_auto) environmental similarity based Euclidean distance (env_sim)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Environmental and spatial cross-validation — part_senv","text":"part_sblock allows test different numbers partitions defined envirnomental clusters delimited K-mean cluster algorithm. function explores range environmental clusters automatically selects best number cluster given given presence, presence-absences, presence-pseudo-absences dataset. selection number clusters based optimization procedure explores partition size three dimensions determined spatial autocorrelation (measured Moran's ), environmental similarity (Euclidean distance), difference amount data among clusters (Standard Deviation - SD; Velazco et al., 2019). procedure cyclically select partitions autocorrelation values less lowest quartile Morans , environmental similarity values greater third quartile Euclidean distances difference amount data less lowest quartile SD. selection repeated one partition retained (Velazco et al., 2019). main benefit partition selection ) subjective, ii) balances environmental similarity special autocorrelation partitions, iii) controls partition selection data may problematic model fitting (\"min_occ\" argument).. Partitions geographically structured tend evaluate model transferability directly conventional ones (e.g., performed part_random) (Roberts et al., 2017; Santini et al., 2021), relevant models want used projections regions outside calibration area periods. function can interact get_block, sample_background, sample_pseudoabs sampling background points pseudo-absences within spatial partition broups","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Environmental and spatial cross-validation — part_senv","text":"Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera-Arroita, G., Hauenstein, S., Lahoz-Monfort, J. J., Schroder, B., Thuiller, W., Warton, D. ., Wintle, B. ., Hartig, F., & Dormann, C. F. (2017). Cross-validation strategies data temporal, spatial, hierarchical, phylogenetic structure. Ecography, 40, 913-929. https://doi.org/10.1111/ecog.02881 Santini, L., Benitez-Lopez, ., Maiorano, L., Cengic, M., & Huijbregts, M. . J. (2021). Assessing reliability species distribution projections climate change research. Diversity Distributions, ddi.13252. https://doi.org/10.1111/ddi.13252 Velazco, S. J. E., Villalobos, F., Galvao, F., & De Marco Junior, P. (2019). dark scenario Cerrado plant species: Effects future climate, land use protected areas ineffectiveness. Diversity Distributions, 25(4), 660-673. https://doi.org/10.1111/ddi.12886","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/part_senv.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Environmental and spatial cross-validation — part_senv","text":"","code":"if (FALSE) { require(terra) require(ggplot2) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Select a species spp1 <- spp %>% dplyr::filter(species == \"sp1\") part1 <- part_senv( env_layer = somevar, data = spp1, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_n_groups = 2, max_n_groups = 10, min_occ = 10, prop = 0.2 ) part1 ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) + facet_wrap(. ~ .part) ggplot(part1$part, aes(x, y, col = factor(.part))) + geom_point(aes(shape = factor(pr_ab))) + facet_wrap(. ~ pr_ab) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate data to construct partial dependence plots — pdp_data","title":"Calculate data to construct partial dependence plots — pdp_data","text":"Calculate data construct partial dependence plots given predictor","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate data to construct partial dependence plots — pdp_data","text":"","code":"pdp_data( model, predictors, resolution = 50, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate data to construct partial dependence plots — pdp_data","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name(s) plot. NULL predictors plotted. Default NULL resolution numeric. Number equally spaced points predict continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate data to construct partial dependence plots — pdp_data","text":"list two tibbles \"pdpdata\" \"resid\". #' pdpdata: data construct partial dependence plots, first column includes values selected environmental variable, second column predicted suitability, third column range type, two values Training Projecting, referring suitability calculated within outside range training conditions. Third column returned \"projection_data\" argument used resid: data plot residuals. first column includes values selected environmental variable second column predicted suitability.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/pdp_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate data to construct partial dependence plots — pdp_data","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) abies2 <- abies %>% select(x, y, pr_ab) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) df <- pdp_data( model = svm_t1$model, predictors = c(\"aet\"), resolution = 100, resid = TRUE, projection_data = somevar, training_data = abies2, clamping = FALSE ) df names(df) df$pdpdata df$resid plot(df$pdpdata[1:2], type = \"l\") points(df$resid[1:2], cex = 0.5) # see p_pdp to construct partial dependence plot with ggplot2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot different resolutions to be used in part_sblock — plot_res","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"function useful display maximum minimum resolution want test block_partition function. Note resolution tested fine, plot display may take long time.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"","code":"plot_res(r, res_mult)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"r SpatRaster. raster layer, preferably layer environmental variables used res_mult numeric. Maximum minimum resolution tested.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"plot original raster overlapped grid resolution used","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/plot_res.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot different resolutions to be used in part_sblock — plot_res","text":"","code":"if (FALSE) { f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") r <- terra::rast(f) r <- r$CFP_1 plot_res(r, res_mult = 100) plot_res(r, res_mult = 200) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Bivariate partial dependence plot — p_bpdp","title":"Bivariate partial dependence plot — p_bpdp","text":"Create bivariate partial dependence plot(s) explore bivariate marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bivariate partial dependence plot — p_bpdp","text":"","code":"p_bpdp( model, predictors = NULL, resolution = 50, training_data = NULL, training_boundaries = NULL, projection_data = NULL, clamping = FALSE, color_gradient = c(\"#000004\", \"#1B0A40\", \"#4A0C69\", \"#781B6C\", \"#A42C5F\", \"#CD4345\", \"#EC6824\", \"#FA990B\", \"#F7CF3D\", \"#FCFFA4\"), color_training_boundaries = \"white\", theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bivariate partial dependence plot — p_bpdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor names calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL training_boundaries character. Plot training conditions boundaries based training data (.e., presences, presences absences, etc). training_boundaries = \"convexh\", function delimit training environmental region based convex-hull. training_boundaries = \"rectangle\", function delimit training environmental region based four straight lines. used methods necessary provide data training_data argument. NULL predictors used. Default NULL. projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE color_gradient character. vector range colors plot. Default c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") color_training_boundaries character. vector one color used color points residuals, Default \"white\" theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bivariate partial dependence plot — p_bpdp","text":"function creates partial dependent surface plots explore bivariate marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. Partial dependence surface plot used interpret model explore model extrapolate outside environmental conditions used train model (convex hull polygon).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_bpdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bivariate partial dependence plot — p_bpdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence surface plot p_bpdp(model = svm_t1$model, training_data = abies2) p_bpdp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_bpdp(model = svm_t1$model, training_data = abies2, resolution = 10) p_bpdp(model = svm_t1$model, training_data = abies2, resolution = 70) # With training condition boundaires p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\") p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"rectangle\", color_training_boundaries = \"yellow\") p_bpdp( model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\", color_training_boundaries = \"orange\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # With projection data p_bpdp( model = svm_t1$model, training_data = abies2, training_boundaries = \"rectangle\", projection_data = somevar, # a SpatRaster used to predict or project the model color_training_boundaries = \"white\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # Bivariate partial dependence plot for training and projection condition plot(somevar[[1]], main = \"Projection area\") p_bpdp(model = svm_t1$model, training_data = abies2, projection_data = somevar, # a SpatRaster used to predict or project the model training_boundaries = \"convexh\") # Bivariate partial dependece plot with categorical variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") cat <- system.file(\"external/clusters.shp\", package = \"flexsdm\") cat <- terra::vect(cat) cat$clusters <- paste0(\"c\", cat$clusters) cat <- terra::rasterize(cat, somevar, field = \"clusters\") somevar <- c(somevar, cat) plot(somevar) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract( data = abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), predictors_f = \"clusters\", partition = \".part\", thr = c(\"max_sens_spec\") ) p_bpdp(model = svm_t1$model, training_data = abies2, training_boundaries = \"convexh\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"Graphical exploration extrapolation suitability pattern environmental geographical space","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"","code":"p_extra( training_data, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data, projection_data, predictors = NULL, geo_space = TRUE, geo_position = \"right\", prop_points = 0.2, maxcells = 1e+05, alpha_p = 0.5, color_p = \"black\", alpha_gradient = 0.5, color_gradient = c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\"), theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"training_data data.frame. Database response (0,1) predictor values used fit model. x character. Column name spatial x coordinates y character. Column name spatial y coordinates pr_ab character. Column name species absence-presence, pseudo-absence-presence, background-presence data (0,1). extra_suit_data SpatRaster. Raster layer extrapolation suitability values. extra_suit_data must resolution extent projection_data projection_data SpatRaster. Raster layer environmental variables used model projection. projection_data must resolution extent extra_suit_data predictors character. Vector predictor name(s) calculate partial dependence plots. NULL predictors used. Default NULL. geo_space logical. TRUE produced map. Default TRUE geo_position character. Map position regarding plot environmental space, right, left, bottom, upper. Default \"right\" prop_points numeric. Proportion cells extra_suit_data projection_data select plotting. default. 0.5. maxcells integer. Maximum number cells used plot geographical space. Default 100000 alpha_p numeric. value 0 1 control transparency presence-absence points. Lower values corresponding transparent colors. Default 0.5 color_p character. vector color used color presence-absence points. Default \"black\" alpha_gradient numeric. value 0 1 control transparency projection data Lower values corresponding transparent colors. Default 0.5 color_gradient character. vector colors used color projection data. Default c( \"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"plot","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_extra.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical exploration of extrapolation or suitability pattern in the environmental and geographical space — p_extra","text":"","code":"if (FALSE) { require(dplyr) require(terra) require(ggplot2) data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") spp$species %>% unique() sp <- spp %>% dplyr::filter(species == \"sp2\", pr_ab == 1) %>% dplyr::select(x, y, pr_ab) # Calibration area based on some criterion such as dispersal ability ca <- calib_area(sp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs = crs(somevar)) plot(somevar[[1]]) points(sp) plot(ca, add = T) # Sampling pseudo-absences set.seed(10) psa <- sample_pseudoabs( data = sp, x = \"x\", y = \"y\", n = nrow(sp) * 2, method = \"random\", rlayer = somevar, calibarea = ca ) # Merge presences and abasences databases to get a complete calibration data sp_pa <- dplyr::bind_rows(sp, psa) sp_pa # Get environmental condition of calibration area sp_pa_2 <- sdm_extract(data = sp_pa, x = \"x\", y = \"y\", env_layer = somevar) sp_pa_2 # Measure extrapolation based on calibration data (presence and pseudo-absences) # using SHAPE metric extr <- extra_eval( training_data = sp_pa_2, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = FALSE, n_cores = 1, aggreg_factor = 1 ) plot(extr) ## %######################################################%## #### Explore extrapolation in the #### #### environmental and geographical space #### ## %######################################################%## p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = TRUE, prop_points = 0.05 ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, predictors = c(\"tmn\", \"cwd\"), geo_space = TRUE, prop_points = 0.05 ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, predictors = c(\"cwd\", \"tmx\", \"aet\"), geo_space = TRUE, geo_position = \"left\", prop_points = 0.05, color_p = \"white\", alpha_p = 0.5, alpha_gradient = 0.2, color_gradient = c(\"#404096\", \"#529DB7\", \"#7DB874\", \"#E39C37\", \"#D92120\"), theme = ggplot2::theme_dark() ) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_p = \"white\", alpha_p = 0.5, alpha_gradient = 0.2, color_gradient = c(\"#404096\", \"#529DB7\", \"#7DB874\", \"#E39C37\", \"#D92120\"), theme = ggplot2::theme_dark() ) # Explore extrapolation only in the environmental space p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr, projection_data = somevar, geo_space = FALSE, prop_points = 0.05, color_p = \"black\", color_gradient = c(\"#085CF8\", \"#65AF1E\", \"#F3CC1D\", \"#FC6A9B\", \"#D70500\"), theme = ggplot2::theme_minimal() ) ##%######################################################%## #### Explore univariate #### #### and combinatorial extrapolation #### ##%######################################################%## extr <- extra_eval( training_data = sp_pa_2, pr_ab = \"pr_ab\", projection_data = somevar, metric = \"mahalanobis\", univar_comb = TRUE, n_cores = 1, aggreg_factor = 1 ) plot(extr) p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = extr$uni_comb, # use uni_comb layer projection_data = somevar, geo_space = TRUE, prop_points = 0.05, color_gradient = c(\"#B3DC2B\",\"#25818E\") ) ## %######################################################%## #### With p_extra also is possible #### #### to explore the patterns of suitability #### ## %######################################################%## sp_pa_2 <- part_random( data = sp_pa_2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) rf_m1 <- fit_raf( data = sp_pa_2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sorensen\") ) suit <- sdm_predict(models = rf_m1, pred = somevar) plot(suit$raf) suit <- suit$raf # Pasterns of suitability in geographical and environmental space p_extra( training_data = sp_pa_2, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, ) # Pasterns of suitability plotting as points only presences p_extra( training_data = sp_pa_2 %>% dplyr::filter(pr_ab == 1), x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = TRUE, prop_points = 0.05, ) # Pasterns of suitability in the environmental space only # and plotting as points only presences p_extra( training_data = sp_pa_2 %>% dplyr::filter(pr_ab == 1), x = \"x\", y = \"y\", pr_ab = \"pr_ab\", extra_suit_data = suit, projection_data = somevar, geo_space = FALSE, prop_points = 0.05, ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial Dependent Plot — p_pdp","title":"Partial Dependent Plot — p_pdp","text":"Create partial dependence plot(s) explore marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial Dependent Plot — p_pdp","text":"","code":"p_pdp( model, predictors = NULL, resolution = 100, resid = FALSE, training_data = NULL, projection_data = NULL, clamping = FALSE, rug = FALSE, colorl = c(\"#462777\", \"#6DCC57\"), colorp = \"black\", alpha = 0.2, theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial Dependent Plot — p_pdp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor name(s) calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 resid logical. Calculate residuals based training data. Default FALSE training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL projection_data SpatRaster. Raster layer environmental variables used model projection. argument used, function calculate partial dependence curves distinguishing conditions used training projection conditions (.e., projection data present projection area training). Default NULL clamping logical. Perform clamping. maxent models. Default FALSE rug logical. Display training data rug plot x-axis. Note: time-consuming large databases. Default FALSE colorl character. vector one two colors used color lines. projection_data argument used necessary provide two colors. Default c(\"#462777\", \"#6DCC57\") colorp character. vector one color used color points residuals, Default \"black\" alpha numeric. value 0 1 control transparency residual points. Lower values corresponding transparent colors. Default 0.2 theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial Dependent Plot — p_pdp","text":"function creates partial dependent plots explore marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. range projection data greater training data plotted different color. Partial dependence curves used interpret model explore model may extrapolate outside environmental conditions used train model.","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_pdp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial Dependent Plot — p_pdp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence plot p_pdp(model = svm_t1$model, training_data = abies2) p_pdp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_pdp(model = svm_t1$model, training_data = abies2, resolution = 5) p_pdp(model = svm_t1$model, training_data = abies2, resolution = 50) p_pdp(model = svm_t1$model, training_data = abies2, resid = TRUE) p_pdp( model = svm_t1$model, training_data = abies2, resid = TRUE, colorl = \"black\", colorp = \"red\", alpha = 0.1 ) p_pdp( model = svm_t1$model, training_data = abies2, resid = TRUE, colorl = \"black\", colorp = \"red\", alpha = 0.1, rug = TRUE ) # Partial depence plot for training and projection condition found in a projection area plot(somevar[[1]], main = \"Projection area\") p_pdp(model = svm_t1$model, training_data = abies2, projection_data = somevar) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\") ) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\"), resid = TRUE, colorp = \"gray\" ) p_pdp( model = svm_t1$model, training_data = abies2, projection_data = somevar, colorl = c(\"#CC00FF\", \"#CCFF00\"), resid = TRUE, colorp = \"gray\", rug = TRUE, theme = ggplot2::theme_dark() ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial Dependent Suface Plot — p_psp","title":"Partial Dependent Suface Plot — p_psp","text":"Create partial dependence surface plot(s) explore bivariate marginal effect predictors suitability","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial Dependent Suface Plot — p_psp","text":"","code":"p_psp( model, predictors = NULL, resolution = 50, training_data = NULL, pchull = FALSE, projection_data = NULL, clamping = FALSE, color_gradient = c(\"#000004\", \"#1B0A40\", \"#4A0C69\", \"#781B6C\", \"#A42C5F\", \"#CD4345\", \"#EC6824\", \"#FA990B\", \"#F7CF3D\", \"#FCFFA4\"), color_chull = \"white\", theme = ggplot2::theme_classic() )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial Dependent Suface Plot — p_psp","text":"model model object class \"gam\", \"gbm\", \"glm\", \"graf\", \"ksvm\", \"ksvm\", \"maxnet”, “nnet\", \"randomForest\" model can found first element list returned function fit_, tune_, esm_ function families predictors character. Vector predictor names calculate partial dependence plots. NULL predictors used. Default NULL resolution numeric. Number equally spaced points predict suitability values continuous predictors. Default 50 training_data data.frame. Database response (0,1) predictor values used fit model. Default NULL pchull logical. Plot convex-hull limit training data. Default FALSE. TRUE necessary provide data training_data argument projection_data SpatRaster. Raster layer environmental variables used model projection. Default NULL clamping logical. Perform clamping. maxent models. Default FALSE color_gradient character. vector range colors plot. Default c(\"#FDE725\", \"#B3DC2B\", \"#6DCC57\", \"#36B677\", \"#1F9D87\", \"#25818E\", \"#30678D\", \"#3D4988\", \"#462777\", \"#440154\") color_chull character. vector one color used color points residuals, Default \"white\" theme ggplot2 theme. Default ggplot2::theme_classic()","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial Dependent Suface Plot — p_psp","text":"function creates partial dependent surface plots explore bivariate marginal effect predictors suitability. projection_data used, function extract minimum maximum values found region time period model projected. Partial dependence surface plot used interpret model explore model extrapolate outside environmental conditions used train model (convex hull polygon).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/p_psp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial Dependent Suface Plot — p_psp","text":"","code":"if (FALSE) { library(terra) library(dplyr) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") data(abies) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract(abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), partition = \".part\", thr = c(\"max_sens_spec\") ) # Partial depence surface plot p_psp(model = svm_t1$model, training_data = abies2) p_psp(model = svm_t1$model, training_data = abies2, predictors = c(\"aet\", \"cwd\")) p_psp(model = svm_t1$model, training_data = abies2, resolution = 10) p_psp(model = svm_t1$model, training_data = abies2, resolution = 70) p_psp(model = svm_t1$model, training_data = abies2, pchull = TRUE) p_psp( model = svm_t1$model, training_data = abies2, pchull = TRUE, color_chull = \"orange\", color_gradient = c(\"#00007F\", \"#007FFF\", \"#7FFF7F\", \"#FF7F00\", \"#7F0000\") ) # Partial depence surface plot for training and projection condition plot(somevar[[1]], main = \"Projection area\") p_psp(model = svm_t1$model, training_data = abies2, projection_data = somevar, pchull = TRUE) # PSP with categorical variables somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # environmental data names(somevar) <- c(\"aet\", \"cwd\", \"tmx\", \"tmn\") cat <- system.file(\"external/clusters.shp\", package = \"flexsdm\") cat <- terra::vect(cat) cat$clusters <- paste0(\"c\", cat$clusters) cat <- terra::rasterize(cat, somevar, field = \"clusters\") somevar <- c(somevar, cat) plot(somevar) # set seed abies2 <- abies %>% dplyr::select(x, y, pr_ab) %>% dplyr::group_by(pr_ab) %>% dplyr::slice_sample(prop = 0.5) abies2 <- sdm_extract( data = abies2, x = \"x\", y = \"y\", env_layer = somevar ) abies2 <- part_random(abies2, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) svm_t1 <- fit_svm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"cwd\", \"tmx\", \"tmn\"), predictors_f = \"clusters\", partition = \".part\", thr = c(\"max_sens_spec\") ) p_psp(model = svm_t1$model, training_data = abies2) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points — sample_background","title":"Sample background points — sample_background","text":"Sampling background points options using different geographical restrictions sampling methods.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points — sample_background","text":"","code":"sample_background( data, x, y, n, method = \"random\", rlayer, maskval = NULL, calibarea = NULL, rbias = NULL, sp_name = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points — sample_background","text":"data data.frame tibble. Database presences records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates n integer. Number background point sampled method character. Background allocation method. methods implemented : random: Random allocation background points. Usage method = 'random' thickening: Thickening background points based Vollering et al. (2019) method. method, buffer width must defined used around presences points. buffer can defined using argument method = c(\"thickening\", width = 20000). Buffer width must m raster (used rlayer) longitude/latitude CRS, map units cases. buffer width provided function use width value equal mean pair-wise presence distances. width value provided, argument must used method = 'thickening'. biased: method, similar \"thickening\", sample background biased bias presences. However, background points sampled used presences probability throughout entire study area, restricting bias within buffers “thickening” approach. using method, necessary provide layer presences bias \"rbias\" argument (Phillips et al., 2009). Usage method='thickening' method = c(\"thickening\", width = 20000). Default 'random' rlayer SpatRaster used sampling background points. best use layer resolution extent environmental variables used modeling. using maskval argument, raster layer must contain values constrain sampling maskval integer, character, factor. Values raster layer used constraining sampling background points calibarea SpatVect delimits calibration area used given species (see calib_area function). rbias SpatRaster used choosing background points using bias method. raster bias data must provided. recommended rbias match resolution extent rlayer. sp_name character. Species name output used. argument used, first output column species name. Default NULL.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points — sample_background","text":"tibble object x y coordinates sampled background points","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Sample background points — sample_background","text":"Phillips, S. J., Dudík, M., Elith, J., Graham, C. H., Lehmann, ., Leathwick, J., & Ferrier, S. (2009). Sample selection bias presence-distribution models: Implications background pseudo-absence data. Ecological Applications, 19(1), 181-197. Vollering, J., Halvorsen, R., Auestad, ., & Rydgren, K. (2019). Bunching background betters bias species distribution models. Ecography, 42(10), 1717-1727. https://doi.org/10.1111/ecog.04503","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_background.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample background points — sample_background","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(spp) somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Example for a single species spp_pa <- spp %>% dplyr::filter(species == \"sp3\") # Spatially structured partition part <- part_sblock( env_layer = somevar, data = spp_pa, x = \"x\", y = \"y\", pr_ab = \"pr_ab\", min_res_mult = 100, max_res_mult = 500, num_grids = 30, min_occ = 5, n_part = 2 ) grid_env <- get_block(env_layer = somevar, best_grid = part$grid) plot(grid_env) ## %######################################################%## # # #### Random background method #### # # ## %######################################################%## # Sample background points throughout study area with random sampling method spp_p <- spp_pa %>% dplyr::filter(pr_ab == 1) bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, sp_name = \"sp3\" ) bg plot(grid_env) points(bg[-1]) # Sample random background points constrained to a region with a give set of values plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 1 ) %>% points() plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 2 ) %>% points() plot(grid_env) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = c(1, 2) ) %>% points() # Sample random background within a calibration area and constrained to a region ca_ps1 <- calib_area( data = spp_pa, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs = crs(somevar) ) plot(grid_env) plot(ca_ps1, add = T) points(spp_pa[-1], col = \"blue\", cex = 0.7, pch = 19) sample_background( data = spp_p, x = \"x\", y = \"y\", n = 1000, method = \"random\", rlayer = grid_env, maskval = 1, calibarea = ca_ps1 ) %>% points(col = \"red\") ## %######################################################%## # # #### Thickening background method #### # # ## %######################################################%## # Thickening background without constraining them spp_p # presences database of a species grid_env # The raster layer used for sampling background bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 5000, method = \"thickening\", rlayer = grid_env, ) plot(grid_env) bg %>% points(col = \"red\") # Thickening background spp_p # presences database of a species grid_env # The raster layer used for sampling background bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 5000, method = c(\"thickening\", width = 150000), rlayer = grid_env ) plot(grid_env) bg %>% points(col = \"red\") # Sample thickening background within a calibration area and constrained to a region bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 3000, method = \"thickening\", rlayer = grid_env, maskval = 2, calibarea = ca_ps1 ) plot(grid_env) plot(ca_ps1, add = T) bg %>% points(col = \"red\", cex = 0.3) points(spp_p[c(\"x\", \"y\")], pch = 19) ## %######################################################%## # # #### Biased background method #### # # ## %######################################################%## require(dplyr) require(terra) data(spp) # Select the presences of a species spp_p <- spp %>% dplyr::filter(species == \"sp1\", pr_ab == 1) # Raster layer with density of points to obtain a biased sampling background occ_density <- system.file(\"external/occ_density.tif\", package = \"flexsdm\") occ_density <- terra::rast(occ_density) plot(occ_density) points(spp_p %>% dplyr::select(x, y), cex = 0.5) # A layer with region used to contrain background sampling area regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) plot(regions) points(spp_p %>% dplyr::select(x, y), cex = 0.5) # Biased background points spp_p # presences database of a species bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 3000, method = \"biased\", rlayer = regions, rbias = occ_density ) plot(occ_density) bg %>% points(col = \"red\", cex = 0.1) spp_p %>% dplyr::select(x, y) %>% points(., col = \"black\", pch = 19, cex = 0.5) # Biased background points constrained to a region # It will be selected region 6 plot(regions) plot(regions %in% c(1, 6)) bg <- sample_background( data = spp_p, x = \"x\", y = \"y\", n = 500, method = \"biased\", rlayer = regions, rbias = occ_density, maskval = c(1, 2) ) plot(occ_density) bg %>% points(col = \"red\", cex = 0.5) spp_p %>% dplyr::select(x, y) %>% points(., col = \"black\", pch = 19, cex = 0.5) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absences — sample_pseudoabs","title":"Sample pseudo-absences — sample_pseudoabs","text":"function provide several methods sampling pseudo-absences, instance totally random sampling method, options using different environmental geographical constraints.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absences — sample_pseudoabs","text":"","code":"sample_pseudoabs( data, x, y, n, method, rlayer, maskval = NULL, calibarea = NULL, sp_name = NULL )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absences — sample_pseudoabs","text":"data data.frame tibble. Database presences (presence-absence, presences-pseudo-absence) records, coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates n integer. Number pseudo-absences sampled method character. Pseudo-absence allocation method. necessary provide vector argument. methods implemented : random: Random allocation pseudo-absences throughout area used model fitting. Usage method='random'. env_const: Pseudo-absences environmentally constrained regions lower suitability values predicted Bioclim model. method, necessary provide raster stack brick object environmental variables Usage method=c(method='env_const', env = somevar). geo_const: Pseudo-absences allocated far occurrences based geographical buffer. value buffer width m must provided raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_const', width='50000'). geo_env_const: Pseudo-absences constrained environmentally (based Bioclim model) distributed geographically far occurrences based geographical buffer. method, raster environmental variables stored SpatRaster object provided. value buffer width m must provided raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_env_const', width='50000', env = somevar). geo_env_km_const: Pseudo-absences constrained using three-level procedure; similar geo_env_const additional step distributes pseudo-absences environmental space using k-means cluster analysis. method, necessary provide raster stack brick object environmental variables value buffer width m raster (used rlayer) longitude/latitude CRS, map units cases. Usage method=c('geo_env_km_const', width='50000', env = somevar). rlayer SpatRaster. raster layer used sampling pseudo-absence layer resolution extent environmental variables used modeling recommended. case use maskval argument, raster layer must contain values used constrain sampling maskval integer, character, factor. Values raster layer used constraining pseudo-absence sampling calibarea SpatVector SpatVector delimit calibration area used given species (see calib_area function). sp_name character. Species name output used. argument used, first output column species name. Default NULL.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absences — sample_pseudoabs","text":"tibble object x y coordinates sampled pseudo-absence points","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sample_pseudoabs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample pseudo-absences — sample_pseudoabs","text":"","code":"if (FALSE) { require(terra) require(dplyr) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) regions <- system.file(\"external/regions.tif\", package = \"flexsdm\") regions <- terra::rast(regions) plot(regions) single_spp <- spp %>% dplyr::filter(species == \"sp3\") %>% dplyr::filter(pr_ab == 1) %>% dplyr::select(-pr_ab) # Pseudo-absences randomly sampled throughout study area ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = \"random\", rlayer = regions, maskval = NULL, sp_name = \"sp3\" ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) # presences points(ps1[-1], col = \"red\", cex = 0.7, pch = 19) # absences # Pseudo-absences randomly sampled within a regions where a species occurs ## Regions where this species occurrs samp_here <- terra::extract(regions, single_spp[2:3])[, 2] %>% unique() %>% na.exclude() ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = \"random\", rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with geographical constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_const\", width = \"30000\"), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"env_const\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental and geographical constraint ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_env_const\", width = \"50000\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Pseudo-absences sampled with environmental and geographical constraint and with k-mean clustering ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 10, method = c(\"geo_env_km_const\", width = \"50000\", env = somevar), rlayer = regions, maskval = samp_here ) plot(regions, col = gray.colors(9)) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) points(ps1, col = \"red\", cex = 0.7, pch = 19) # Sampling pseudo-absence using a calibration area ca_ps1 <- calib_area( data = single_spp, x = \"x\", y = \"y\", method = c(\"buffer\", width = 50000), crs=crs(somevar) ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 50, method = \"random\", rlayer = regions, maskval = NULL, calibarea = ca_ps1 ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(ps1, col = \"red\", cex = 0.7, pch = 19) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) ps1 <- sample_pseudoabs( data = single_spp, x = \"x\", y = \"y\", n = nrow(single_spp) * 50, method = \"random\", rlayer = regions, maskval = samp_here, calibarea = ca_ps1 ) plot(regions, col = gray.colors(9)) plot(ca_ps1, add = T) points(ps1, col = \"red\", cex = 0.7, pch = 19) points(single_spp[-1], col = \"blue\", cex = 0.7, pch = 19) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Create directories for saving the outputs of the flexsdm — sdm_directory","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"function assists creating directory system different sub-folders assist organisation modelling process outputs.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"","code":"sdm_directory( main_dir = NULL, projections = NULL, calibration_area = TRUE, algorithm = NULL, ensemble = NULL, threshold = FALSE, return_vector = TRUE )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"main_dir character. Directory path containing main folder saving model inputs outputs. NULL function assumes directory path current working R session creates sub-folder name 'flexsdm_results'. Default NULL projections vector. Vector folder names future scenarios/different regions/time periods save model projections output. calibration_area logical. TRUE, function creates folder 1_Inputs storing calibration area. Default TRUE algorithm vector. Vector model names used. Usage algorithm = c(gam, tune_max, tune_net, esm_glm). \"\" used function creates folders algorithms available flexsdm . .e. 'gam', 'gau', 'gbm', 'glm', 'max', 'net', 'raf', 'svm'. Default NULL ensemble vector. Vector methods used ensemble different models. Usage ensemble = c(\"mean\", \"meanthr\"). Default NULL threshold logical. TRUE sub-folders \"/1_con\", \"/2_bin\" created within algorithm /ensemble folder. Used storing continuous binarized models separately. Default FALSE return_vector logical. TRUE function returns vector path folders. Default TRUE","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"character vector paths created folders","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"sdm_directory function assists saving workflow outputs creating folders (directories) based user specifications, choice algorithms, ensemble methods, model projections new geographic regions periods. function first creates two folders within user-specified project folder, one model inputs (1_Inputs) one model outputs (2_Outputs). Within 1_Inputs, three sub-folders users store model inputs: 1_Occurrences, 2_Predictors, 3_Calibration_area. user chooses include projections modeling framework, 2_Projections subfolder created within 2_Predictors folder store environmental data projection scenarios provided \"projections\" argument. Additionally, sdm_directory offers users enhanced flexibility saving modeling outputs, giving offers users option save results modeling ensemble technique presented flexsdm","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create directories for saving the outputs of the flexsdm — sdm_directory","text":"","code":"if (FALSE) { require(dplyr) # require(sf) # Implement sdm_directory without specific path and project name dirs_1 <- sdm_directory( main_dir = NULL, projections = NULL, calibration_area = TRUE, algorithm = c(\"gam\", \"tune_max\"), ensemble = c(\"mean\", \"meanthr\"), threshold = FALSE, return_vector = TRUE ) dirs_1 dirs_1[1] %>% fs::dir_tree(., recurse = TRUE) unlink(dirs_1[1], recursive = TRUE) # this directory and sub-folder will be removed # Implement sdm_directory with specific path and project name getwd() %>% dirname() dirs_2 <- sdm_directory( main_dir = getwd() %>% dirname() %>% file.path(., \"my_project_name\"), projections = c( \"cnrm_rpc8.5_2050\", \"cnrm_rpc4.5_2050\" ), calibration_area = TRUE, algorithm = \"all\", ensemble = c(\"mean\", \"meanthr\"), threshold = TRUE ) dirs_2[1] %>% fs::dir_tree(., recurse = TRUE) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate different model performance metrics — sdm_eval","title":"Calculate different model performance metrics — sdm_eval","text":"function calculates threshold dependent independent model performance metrics.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate different model performance metrics — sdm_eval","text":"","code":"sdm_eval(p, a, bg = NULL, thr = NULL)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate different model performance metrics — sdm_eval","text":"p numeric. Predicted suitability presences numeric. Predicted suitability absences bg numeric. Predicted suitability background points, used BOYCE metric. bg set NULL, BOYCE metric calculated presences absences suitabilities values thr character. Threshold criterion used get binary suitability values (.e. 0,1). Used threshold-dependent performance metrics. possible use one threshold type. vector must provided argument. following threshold criteria available: lpt: highest threshold omission. equal_sens_spec: Threshold Sensitivity Specificity equal. max_sens_spec: Threshold sum Sensitivity Specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified Sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers Sensitivity value. sensitivity value specified, default value 0.9 one threshold type used, concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold type specified","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate different model performance metrics — sdm_eval","text":"tibble next columns threshold: threshold names thr_value: threshold values n_presences: number presences n_absences: number absences TPR IMAE: performance metrics","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculate different model performance metrics — sdm_eval","text":"function used evaluating different models approaches base combination presence-absences presence-pseudo-absences background point data suitability predicted model flexsdm modeling function families (fit_, esm_, tune_.) calculates next performance metric: \\* BOYCE calculated based presences background points, case background points provided calculated using presences absences. codes calculating metric adaptation enmSdm package (https://github.com/adamlilith/enmSdm) \\** IMAE calculated 1-(Mean Absolute Error) order consistent metrics higher value given performance metric, greater model's accuracy","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_eval.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate different model performance metrics — sdm_eval","text":"","code":"if (FALSE) { require(dplyr) set.seed(0) p <- rnorm(50, mean = 0.7, sd = 0.3) %>% abs() p[p > 1] <- 1 p[p < 0] <- 0 set.seed(0) a <- rnorm(50, mean = 0.3, sd = 0.2) %>% abs() a[a > 1] <- 1 a[a < 0] <- 0 set.seed(0) backg <- rnorm(1000, mean = 0.4, sd = 0.4) %>% abs() backg[backg > 1] <- 1 backg[backg < 0] <- 0 # Function use without threshold specification e <- sdm_eval(p, a) e # Function use with threshold specification sdm_eval(p, a, thr = \"max_sorensen\") sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"max_jaccard\")) sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"sensitivity\")) sdm_eval(p, a, thr = c(\"lpt\", \"max_sens_spec\", \"sensitivity\", sens = \"0.95\")) # Use of bg argument (it will only be used for calculating BOYCE index) sdm_eval(p, a, thr = \"max_sens_spec\") sdm_eval(p, a, thr = c(\"max_sens_spec\"), bg = backg) # If background will be used to calculate all other metrics # background values can be used in \"a\" argument sdm_eval(p, backg, thr = \"max_sens_spec\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"Extract environmental data values spatial raster based x y coordinates","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"","code":"sdm_extract(data, x, y, env_layer, variables = NULL, filter_na = TRUE)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"data data.frame. Database species presence, presence-absence, pseudo-absence records x y coordinates x character. Column name spatial x coordinates y character. Column name spatial y coordinates env_layer SpatRaster. Raster raster stack environmental variables. variables character. Vector variable names predictor (environmental) variables Usage variables. = c(\"aet\", \"cwd\", \"tmin\"). variable specified, function return data layers. Default NULL filter_na logical. filter_na = TRUE (default), rows NA values environmental variables removed returned tibble.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"tibble returns original data base additional columns extracted environmental variables xy location SpatRaster object used 'env_layer'","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract environmental data values from a spatial raster based on x and y coordinates — sdm_extract","text":"","code":"if (FALSE) { require(terra) # Load datasets data(spp) f <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(f) # Extract environmental data from somevar for all locations in spp ex_spp <- sdm_extract( data = spp, x = \"x\", y = \"y\", env_layer = somevar, variables = NULL, filter_na = FALSE ) # Extract environmental for two variables and remove rows with NAs ex_spp2 <- sdm_extract( data = spp, x = \"x\", y = \"y\", env_layer = somevar, variables = c(\"CFP_3\", \"CFP_4\"), filter_na = TRUE ) ex_spp ex_spp2 }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Spatial predictions from individual and ensemble models — sdm_predict","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"function allows geographical prediction one models constructed fit_ tune_ function set, models fitted esm_ function set (.e., ensemble small models approach), models constructed fit_ensemble function. can return continuous continuous binary predictions one thresholds","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"","code":"sdm_predict( models, pred, nchunk = 1, thr = NULL, con_thr = FALSE, predict_area = NULL, clamp = TRUE, pred_type = \"cloglog\" )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"models list one models fitted fit_ tune_ functions. case use models fitted fit_ensemble esm_ family function one model used. Usage models = mglm models = list(mglm, mraf, mgbm) pred SpatRaster. Raster layer predictor variables. Names layers must exactly match used model fitting. nchunk interger. Number chunks split data used predict models (.e., SpatRaster used pred argument). Predicting models chunks helps reduce memory requirements cases models predicted large scales high resolution. Default = 1 thr character. Threshold used get binary suitability values (.e., 0,1). possible use one threshold type. mandatory use threshold/s used fit models. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value used fit models. : threshold used model outputs used 'models' argument used. Usage thr = c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity'), thr=''. threshold specified (.e., thr = NULL) function return continuous prediction . Default NULL con_thr logical. true predictions suitability values threshold/s returned. Default = FALSE predict_area SpatVector, SpatialPolygon, SpatialPolygonDataFrame. Spatial polygon used restring prediction given region. Default = NULL clamp logical. set TRUE, predictors features restricted range seen model training. valid Maxent model (see tune_mx fit_mx). Default TRUE. pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". valid Maxent model (see tune_mx fit_mx). Default \"cloglog\".","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"list SpatRaster continuous /binary predictions","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_predict.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spatial predictions from individual and ensemble models — sdm_predict","text":"","code":"if (FALSE) { require(dplyr) require(terra) data(\"spp\") somevar <- system.file(\"external/somevar.tif\", package = \"flexsdm\") somevar <- terra::rast(somevar) # Extract data some_sp <- spp %>% filter(species == \"sp3\") some_sp <- sdm_extract( data = some_sp, x = \"x\", y = \"y\", env_layer = somevar ) # Partition some_sp <- part_random( data = some_sp, pr_ab = \"pr_ab\", method = c(method = \"rep_kfold\", folds = 3, replicates = 5) ) ## %######################################################%## # # #### Create different type of models #### # # ## %######################################################%## # Fit some models mglm <- fit_glm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", poly = 2 ) mraf <- fit_raf( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", ) mgbm <- fit_gbm( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\" ) # Fit an ensemble model mensemble <- fit_ensemble( models = list(mglm, mraf, mgbm), ens_method = \"meansup\", thr = NULL, thr_model = \"max_sens_spec\", metric = \"TSS\" ) # Fit a model with the Ensembles of Small Models approach # Without threshold specification and with kfold msmall <- esm_gam( data = some_sp, response = \"pr_ab\", predictors = c(\"CFP_1\", \"CFP_2\", \"CFP_3\", \"CFP_4\"), partition = \".part\", thr = NULL ) ## %######################################################%## # # #### Predict different kind of models #### # # ## %######################################################%## # sdm_predict can be used for predict one or more models fitted with fit_ or tune_ functions # a single model ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # a list of models list_p <- sdm_predict( models = list(mglm, mraf, mgbm), pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # Predict an ensemble model # (only is possilbe use one fit_ensemble) ensemble_p <- sdm_predict( models = mensemble, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) # Predict an ensemble of small models # (only is possible to use one ensemble of small models) small_p <- sdm_predict( models = msmall, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL ) ##%######################################################%## # # #### Predict model using chunks #### # # ##%######################################################%## # Predicting models in chunks helps reduce memory requirements in # cases where models are predicted for large scales and high resolution ind_p <- sdm_predict( models = mglm, pred = somevar, thr = \"max_fpb\", con_thr = FALSE, predict_area = NULL, nchunk = 4 ) }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":null,"dir":"Reference","previous_headings":"","what":"Merge model performance tables — sdm_summarize","title":"Merge model performance tables — sdm_summarize","text":"Merge model performance tables","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Merge model performance tables — sdm_summarize","text":"","code":"sdm_summarize(models)"},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Merge model performance tables — sdm_summarize","text":"models list one models fitted fit_ tune_ functions, fit_ensemble output, esm_ family function output. list single several models fitted fit_ tune_ functions object returned fit_ensemble function. Usage models = list(mod1, mod2, mod3)","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Merge model performance tables — sdm_summarize","text":"Combined model performance table input models. Models fit tune include model performance best hyperparameters.","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/sdm_summarize.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Merge model performance tables — sdm_summarize","text":"","code":"if (FALSE) { data(abies) abies # In this example we will partition the data using the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # Build a generalized additive model using fit_gam gam_t1 <- fit_gam( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\") ) gam_t1$performance # Build a generalized linear model using fit_glm glm_t1 <- fit_glm( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"ppt_jja\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), poly = 0, inter_order = 0 ) glm_t1$performance # Build a tuned random forest model using tune_raf tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t1 <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = c(\"max_sens_spec\", \"equal_sens_spec\", \"max_sorensen\"), metric = \"TSS\", ) rf_t1$performance # Merge sdm performance tables merge_df <- sdm_summarize(models = list(gam_t1, glm_t1, rf_t1)) merge_df }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":null,"dir":"Reference","previous_headings":"","what":"A data set containing presences and absences of three virtual species — spp","title":"A data set containing presences and absences of three virtual species — spp","text":"data set containing presences absences three virtual species","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A data set containing presences and absences of three virtual species — spp","text":"","code":"spp"},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A data set containing presences and absences of three virtual species — spp","text":"tibble 1150 rows 3 variables: species virtual species names x longitude species occurrences y latitude species occurrences pr_ab presences absences denoted 1 0 respectively","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/spp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A data set containing presences and absences of three virtual species — spp","text":"","code":"if (FALSE) { require(dplyr) data(\"spp\") spp }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"Fit validate Generalized Boosted Regression models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"","code":"tune_gbm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL. partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameter values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'n.trees', 'shrinkage', 'n.minobsinnode'. thr character. Threshold used get binary suitability values (.e. 0,1) needed threshold-dependent performance metrics. possible use one threshold type. Provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9 one threshold type used must concatenate, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use threshold types threshold specified. metric character. Performance metric used selecting best combination hyper-parameter values. following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"list object : model: \"gbm\" class object gbm package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameter values performance metric (see sdm_eval) best hyper-parameter combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_gbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Generalized Boosted Regression models with exploration of\r\nhyper-parameters that optimize performance — tune_gbm","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab is the name of the column with species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( n.trees = c(20, 50, 100), shrinkage = c(0.1, 0.5, 1), n.minobsinnode = c(1, 3, 5, 7, 9) ) gbm_t <- tune_gbm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs gbm_t$model gbm_t$predictors gbm_t$performance gbm_t$data_ens gbm_t$hyper_performance # Graphical exploration of performance of each hyper-parameter setting require(ggplot2) pg <- position_dodge(width = 0.5) ggplot(gbm_t$hyper_performance, aes(factor(n.minobsinnode), TSS_mean, col = factor(shrinkage) )) + geom_errorbar(aes(ymin = TSS_mean - TSS_sd, ymax = TSS_mean + TSS_sd), width = 0.2, position = pg ) + geom_point(position = pg) + geom_line( data = gbm_t$tune_performance, aes(as.numeric(factor(n.minobsinnode)), TSS_mean, col = factor(shrinkage) ), position = pg ) + facet_wrap(. ~ n.trees) + theme(legend.position = \"bottom\") }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"Fit validate Maximum Entropy models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"","code":"tune_max( data, response, predictors, predictors_f = NULL, background = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") background data.frame. Database response variable column containing 0 values, predictors variables. column names must consistent data partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'regmult' 'classes' (combination following letters l -linear-, q -quadratic-, h -hinge-, p -product-, t -threshold-). thr character. Threshold used get binary suitability values (.e. 0,1)., needed threshold-dependent performance metrics. one threshold type can used. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold # FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default 0.9 used. one threshold type used, concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper -parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. clamp logical. TRUE, predictors features restricted range seen model training. pred_type character. Type response required available \"link\", \"exponential\", \"cloglog\" \"logistic\". Default \"cloglog\" n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"list object : model: \"maxnet\" class object maxnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metrics (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metrics (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"presence-absence (presence-pseudo-absence) data used data argument addition background points, function fit models presences background points validate presences absences. procedure makes maxent comparable presences-absences models (e.g., random forest, support vector machine). presences background points data used, function fit validate model presences background data. presence-absences used data argument without background, function fit model specified data (recommended).","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Maximum Entropy models with exploration of hyper-parameters that optimize performance — tune_max","text":"","code":"if (FALSE) { data(\"abies\") data(\"backg\") abies # environmental conditions of presence-absence data backg # environmental conditions of background points # Using k-fold partition method # Remember that the partition method, number of folds or replications must # be the same for presence-absence and background points datasets abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) abies2 set.seed(1) backg <- dplyr::sample_n(backg, size = 2000, replace = FALSE) backg2 <- part_random( data = backg, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 3) ) backg gridtest <- expand.grid( regmult = seq(0.1, 3, 0.5), classes = c(\"l\", \"lq\", \"lqh\") ) max_t1 <- tune_max( data = abies2, response = \"pr_ab\", predictors = c(\"aet\", \"pH\", \"awc\", \"depth\"), predictors_f = c(\"landform\"), partition = \".part\", background = backg2, grid = gridtest, thr = \"max_sens_spec\", metric = \"TSS\", clamp = TRUE, pred_type = \"cloglog\", n_cores = 2 # activate two cores to speed up this process ) length(max_t1) max_t1$model max_t1$predictors max_t1$performance max_t1$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"Fit validate Neural Networks models exploration hyper-parameters","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"","code":"tune_net( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variable names used must consistent used response, predictors, predictors_f arguments. Defaul NULL. partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9. using one threshold type concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"list object : model: \"nnet\" class object nnet package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_net.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Neural Networks models with exploration of hyper-parameters — tune_net","text":"","code":"if (FALSE) { data(abies) abies # Partitioning the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab columns is species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( size = c(2, 4, 6, 8, 10), decay = c(0.001, 0.05, 0.1, 1, 3, 4, 5, 10) ) net_t <- tune_net( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs net_t$model net_t$predictors net_t$performance net_t$hyper_performance net_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"Fit validate Random Forest models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"","code":"tune_raf( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variables used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. grid data.frame. data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameter needed tuning 'mtry'. maximum mtry exceed total number predictors. thr character. Threshold used get binary suitability values (.e. 0,1), needed threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. following threshold types available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold FPB highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. specified sensitivity values, function use default 0.9 using one threshold type concatenate , e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified. metric character. Performance metric used selecting best combination hyper -parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"list object : model: \"randomForest\" class object randomForest package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metric (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_raf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Random Forest models with exploration of hyper-parameters that optimize performance — tune_raf","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) tune_grid <- expand.grid(mtry = seq(1, 7, 1)) rf_t <- tune_raf( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs rf_t$model rf_t$predictors rf_t$performance rf_t$hyper_performance rf_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"Fit validate Support Vector Machine models exploration hyper-parameters optimize performance","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"","code":"tune_svm( data, response, predictors, predictors_f = NULL, fit_formula = NULL, partition, grid = NULL, thr = NULL, metric = \"TSS\", n_cores = 1 )"},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"data data.frame. Database response (0,1) predictors values. response character. Column name species absence-presence data (0,1). predictors character. Vector column names quantitative predictor variables (.e. continuous variables). Usage predictors = c(\"aet\", \"cwd\", \"tmin\") predictors_f character. Vector column names qualitative predictor variables (.e. ordinal nominal variables type). Usage predictors_f = c(\"landform\") fit_formula formula. formula object response predictor variables (e.g. formula(pr_ab ~ aet + ppt_jja + pH + awc + depth + landform)). Note variable names used must consistent used response, predictors, predictors_f arguments. Default NULL partition character. Column name training validation partition groups. grid data.frame. Provide data frame object algorithm hyper-parameters values tested. recommended generate data.frame grid() function. Hyper-parameters needed tuning 'size' 'decay'. thr character. Threshold used get binary suitability values (.e. 0,1). useful threshold-dependent performance metrics. possible use one threshold type. necessary provide vector argument. next threshold area available: lpt: highest threshold omission. equal_sens_spec: Threshold sensitivity specificity equal. max_sens_spec: Threshold sum sensitivity specificity highest (aka threshold maximizes TSS). max_jaccard: threshold Jaccard index highest. max_sorensen: threshold Sorensen index highest. max_fpb: threshold # FPB (F-measure presence-background data) highest. sensitivity: Threshold based specified sensitivity value. Usage thr = c('sensitivity', sens='0.6') thr = c('sensitivity'). 'sens' refers sensitivity value. sensitivity value specified, default used 0.9. case use one threshold type necessary concatenate threshold types, e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), thr=c('lpt', 'max_sens_spec', 'sensitivity', sens='0.8'), thr=c('lpt', 'max_sens_spec', 'sensitivity'). Function use thresholds threshold specified metric character. Performance metric used selecting best combination hyper-parameter values. One following metrics can used: SORENSEN, JACCARD, FPB, TSS, KAPPA, AUC, BOYCE. TSS used default. n_cores numeric. Number cores use parallelization. Default 1","code":""},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"list object : model: \"ksvm\" class object kernlab package. object can used predicting. predictors: tibble quantitative (c column names) qualitative (f column names) variables use modeling. performance: Hyper-parameters values performance metric (see sdm_eval) best hyper-parameters combination. hyper_performance: Performance metrics (see sdm_eval) combination hyper-parameters. data_ens: Predicted suitability test partition based best model. database used fit_ensemble","code":""},{"path":[]},{"path":"https://sjevelazco.github.io/flexsdm/reference/tune_svm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit and validate Support Vector Machine models with exploration of hyper-parameters that optimize performance — tune_svm","text":"","code":"if (FALSE) { data(abies) abies # Partition the data with the k-fold method abies2 <- part_random( data = abies, pr_ab = \"pr_ab\", method = c(method = \"kfold\", folds = 5) ) # pr_ab column is species presence and absences (i.e. the response variable) # from aet to landform are the predictors variables (landform is a qualitative variable) # Hyper-parameter values for tuning tune_grid <- expand.grid( C = c(2, 4, 8, 16, 20), sigma = c(0.01, 0.1, 0.2, 0.3, 0.4) ) svm_t <- tune_svm( data = abies2, response = \"pr_ab\", predictors = c( \"aet\", \"cwd\", \"tmin\", \"ppt_djf\", \"ppt_jja\", \"pH\", \"awc\", \"depth\" ), predictors_f = c(\"landform\"), partition = \".part\", grid = tune_grid, thr = \"max_sens_spec\", metric = \"TSS\", n_cores = 1 ) # Outputs svm_t$model svm_t$predictors svm_t$performance svm_t$hyper_performance svm_t$data_ens }"},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-135","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.5","title":"flexsdm 1.3.5","text":"possible restrict cell used perform collinearity reduction analysis geographical area smaller full extent environmental variables correct_clinvar() esm_ family function improved debugged occfilt_geo new argument “rep” control number o repetition filter occurrences","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-134","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.4","title":"flexsdm 1.3.4","text":"rgeos removed dependencies #356 New vignette use different tools explore model extrapolation truncate models added #352 Univariate combinatorial extrapolation metric added extra_eval. Minor bugs fixed project PCA time periods #351 Best grid raster names changed .part part_sblock part_sband Improvements correct_colinvar speed function using maxcell argument Improvements correct_colinvar project PCA time periods","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-133","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.3","title":"flexsdm 1.3.3","text":"Improvements correct_colinvar now possible sample rasters reduce machine memory speed process Improvements sdm_predict possible predict model chunks reduce machine memory p_extra, p_pdp, p_bpdp fixed New function p_bpdp Bivariate Partial Dependent Plot New function data_bpdp Calculate data construct bivariate partial dependence plots Improvements p_dpd Calculate data construct partial dependence plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-132","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.2","title":"flexsdm 1.3.2","text":"New function p_extra Graphical exploration extrapolation suitability pattern environmental geographical space New function p_pdp Partial Dependent Plot New function data_pdp Calculate data construct partial dependence plots","code":""},{"path":"https://sjevelazco.github.io/flexsdm/news/index.html","id":"flexsdm-131","dir":"Changelog","previous_headings":"","what":"flexsdm 1.3.1","title":"flexsdm 1.3.1","text":"New argument “crs” added function msdm_posteriori New argument “sp_name” sample_background sample_pseudoabs raster, flexclust, ape, sp removed dependencies Functions using CRS data improved codes possible use numeric value specify threshold msdm_posteriori extra_eval can use tibble SpatRaster object env_calib argument extra_truncate new argument define values used model truncation documentation improved. #","code":""}] diff --git a/man/correct_colinvar.Rd b/man/correct_colinvar.Rd index f882c88e..b63a0527 100644 --- a/man/correct_colinvar.Rd +++ b/man/correct_colinvar.Rd @@ -4,7 +4,14 @@ \alias{correct_colinvar} \title{Collinearity reduction of predictor variables} \usage{ -correct_colinvar(env_layer, method, proj = NULL, maxcell = NULL) +correct_colinvar( + env_layer, + method, + proj = NULL, + restric_to_region = NULL, + restric_pca_proj = FALSE, + maxcell = NULL +) } \arguments{ \item{env_layer}{SpatRaster An object of class SpatRaster containing the predictors. @@ -27,6 +34,10 @@ provide a vector for this argument. The next methods are implemented: \item{proj}{character. Only used for pca method. Path to a folder that contains sub-folders for the different projection scenarios. Variables names must have the same names as in the raster used in env_layer argument. Usage proj = "C:/User/Desktop/Projections" (see in Details more about the use of this argument)} +\item{restric_to_region}{SpatVector. Area used to restrict cells of env_layer at moment to perform collinearity reduction.} + +\item{restric_pca_proj}{logical. Area used to restrict geographically PCA projection within SpatVector used in restric_to_region. Only use for PCA analysis. default: FALSE.} + \item{maxcell}{numeric. Number of raster cells to be randomly sampled. Taking a sample could be useful to reduce memory usage for large rasters. If NULL, the function will use all raster cells. Default NULL. Usage maxcell = 50000.} @@ -165,6 +176,49 @@ var$number_factors var$removed_variables var$uniqueness var$loadings + +##\%######################################################\%## +# # +#### Other option to perform PCA #### +#### considering cell restricted to a region #### +# # +##\%######################################################\%## + +# Define a calibration area +abies2 <- abies \%>\% + dplyr::select(x, y, pr_ab) \%>\% + dplyr::filter(pr_ab==1) + +plot(somevar[[1]]) +points(abies2[-3]) +ca <- calib_area(abies2, x = "x", y = "y", method = c("mcp"), crs=crs(somevar)) +plot(ca, add=T) + +# Full geographical range to perform PCA +pca_fr <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = NULL, + restric_pca_proj = FALSE) + +# Perform PCA only with cell delimited by polygon used in restric_to_region +pca_rr <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = FALSE) + +# Perform and predicted PCA only with cell delimited by polygon used in restric_to_region +pca_rrp <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = TRUE) + +plot(pca_fr$env_layer) # PCA with all cells +plot(pca_rr$env_layer) # PCA with calibration area cell but predicted for entire region +plot(pca_rrp$env_layer) # PCA performed and predicted for cells within calibration area (ca) + } } diff --git a/tests/testthat/test-correct_colinvar.R b/tests/testthat/test-correct_colinvar.R index b087dc66..23ba0382 100644 --- a/tests/testthat/test-correct_colinvar.R +++ b/tests/testthat/test-correct_colinvar.R @@ -108,7 +108,7 @@ test_that("correct_colinvar PCA with projections", { terra::writeRaster(somevar, file.path(dir_sc[1], "somevar.tif"), overwrite=TRUE) terra::writeRaster(somevar, file.path(dir_sc[2], "somevar.tif"), overwrite=TRUE) - # Perform pearson collinearity control + # Perform PCA collinearity control var <- correct_colinvar(env_layer = somevar, method = "pca", proj = dirname(dir_sc[1])) @@ -117,6 +117,58 @@ test_that("correct_colinvar PCA with projections", { unlink(gsub("projections", "Projection_PCA", dirname(dir_sc[1])), recursive = TRUE) }) + +test_that("correct_colinvar PCA with different projection area", { + require(terra) + require(dplyr) + somevar <- + system.file("external/somevar.tif", package = "flexsdm") + somevar <- terra::rast(somevar) + + # set seed + abies2 <- abies %>% + dplyr::select(x, y, pr_ab) %>% + dplyr::filter(pr_ab==1) + + ca <- calib_area(abies2, x = "x", y = "y", method = c("mcp"), crs=crs(somevar)) + + # Perform PCA only with cell delimited by polygon used in restric_to_region + var <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = FALSE) + expect_equal(length(var), 3) + expect_equal(class(var$env_layer)[1], "SpatRaster") + expect_equal(nrow(var$coefficients), 4) + expect_equal(nrow(var$cumulative_variance), 4) + expect_true(all(names(var) %in% c("env_layer", "coefficients", "cumulative_variance"))) + + # Perform and predicted PCA only with cell delimited by polygon used in restric_to_region + var <- correct_colinvar(env_layer = somevar , + method = c("pca"), + maxcell = NULL, + restric_to_region = ca, + restric_pca_proj = TRUE) + expect_equal(length(var), 3) + expect_equal(class(var$env_layer)[1], "SpatRaster") + expect_true(ext(var$env_layer)[1]> (-310000)) + expect_equal(nrow(var$coefficients), 4) + expect_equal(nrow(var$cumulative_variance), 4) + expect_true(all(names(var) %in% c("env_layer", "coefficients", "cumulative_variance"))) + + + # Perform pearson collinearity control + var <- + correct_colinvar(env_layer = somevar, method = "pca") + + expect_equal(length(var), 3) + expect_equal(class(var$env_layer)[1], "SpatRaster") + expect_equal(nrow(var$coefficients), 4) + expect_equal(nrow(var$cumulative_variance), 4) + expect_true(all(names(var) %in% c("env_layer", "coefficients", "cumulative_variance"))) +}) + test_that("correct_colinvar FA", { require(terra) require(dplyr) diff --git a/vignettes/v01_pre_modeling.Rmd b/vignettes/v01_pre_modeling.Rmd index be4066c0..ba8bd82e 100644 --- a/vignettes/v01_pre_modeling.Rmd +++ b/vignettes/v01_pre_modeling.Rmd @@ -29,44 +29,43 @@ library(knitr) ## Introduction -Species distribution modeling (SDM) has become a standard tool in many research areas, including ecology, conservation biology, biogeography, paleobiogeography, and epidemiology. SDM is an active area of theoretical and methodological research. The *flexsdm* package provides users the ability to manipulate and parameterize models in a variety of ways that meet their unique research needs. +Species distribution modeling (SDM) has become a standard tool in many research areas, including ecology, conservation biology, biogeography, paleobiogeography, and epidemiology. SDM is an active area of theoretical and methodological research. The *flexsdm* package provides users the ability to manipulate and parameterize models in a variety of ways that meet their unique research needs. This flexibility enables users to define their own complete or partial modeling procedure specific for their modeling situation (e.g., number of variables, number of records, different algorithms and ensemble methods, algorithms tuning, etc.). -In this vignette, users will learn about the first set of functions in the *flexsdm* package that fall under the "pre-modeling" umbrella (see below for full list). +In this vignette, users will learn about the first set of functions in the *flexsdm* package that fall under the "pre-modeling" umbrella (see below for full list). **pre-modeling functions** - -+ calib_area() Delimit calibration area for constructing species distribution models -+ correct_colinvar() Collinearity reduction for predictors +- calib_area() Delimit calibration area for constructing species distribution models -+ env_outliers() Integration of outliers detection methods in environmental space +- correct_colinvar() Collinearity reduction for predictors -+ part_random() Data partitioning for training and testing models +- env_outliers() Integration of outliers detection methods in environmental space -+ part_sblock() Spatial block cross-validation +- part_random() Data partitioning for training and testing models -+ part_sband() Spatial band cross-validation +- part_sblock() Spatial block cross-validation -+ part_senv() Environmental cross-validation +- part_sband() Spatial band cross-validation -+ plot_res() Plot different resolutions to be used in part_sblock +- part_senv() Environmental cross-validation -+ get_block() Transform a spatial partition layer to the same spatial properties as the environmental variables +- plot_res() Plot different resolutions to be used in part_sblock -+ sample_background() Sample background points +- get_block() Transform a spatial partition layer to the same spatial properties as the environmental variables -+ sample_pseudoabs() Sample pseudo-absence +- sample_background() Sample background points -+ sdm_directory() Create directories for saving the outputs of flexsdm +- sample_pseudoabs() Sample pseudo-absence -+ sdm_extract() Extract environmental data based on x and y coordinates +- sdm_directory() Create directories for saving the outputs of flexsdm -+ occfilt_env() Perform environmental filtering on species occurrences +- sdm_extract() Extract environmental data based on x and y coordinates -+ occfilt_geo() Perform geographical filtering on species occurrences +- occfilt_env() Perform environmental filtering on species occurrences +- occfilt_geo() Perform geographical filtering on species occurrences ## Installation @@ -81,7 +80,7 @@ library(terra) ## Project Directory Setup -When building SDM's, organizing folders (directories) for a project will save time and confusion. The project directory is the main project folder where you will store all of the relevant data and results for your current project. Now, let's create a project directory where your initial data and the model results will be stored. The function sdm_directory() can do this for you, based on the types of model algorithms you want to use and/or the types of projections you would like to make. First decide where on your computer you would like to store the inputs and outputs of the project (this will be the main directory) and then use dir.create() to create that main directory. Next, specify whether or not you want to include folders for projections, calibration areas, algorithms, ensembles, and thresholds. +When building SDM's, organizing folders (directories) for a project will save time and confusion. The project directory is the main project folder where you will store all of the relevant data and results for your current project. Now, let's create a project directory where your initial data and the model results will be stored. The function sdm_directory() can do this for you, based on the types of model algorithms you want to use and/or the types of projections you would like to make. First decide where on your computer you would like to store the inputs and outputs of the project (this will be the main directory) and then use dir.create() to create that main directory. Next, specify whether or not you want to include folders for projections, calibration areas, algorithms, ensembles, and thresholds. ```{r sdm_directory, eval = FALSE, warning=FALSE} my_project <- file.path(file.path(tempdir(), "flex_sdm_project")) @@ -98,7 +97,6 @@ project_directory <- sdm_directory( ) ``` - ## Data, species occurrence and background data In this tutorial, we will be using species occurrences that are available through the *flexsdm* package. The "spp" example dataset includes pr_ab column (presence = 1, and absence = 0), and location columns (x, y). You can load the "spp" data into your local R environment by using the code below: @@ -111,7 +109,7 @@ spp ## Geographic region -Our species occurrences are located in the California Floristic Province (far western USA). The "regions" dataset can be used to visualize the study area in geographic space. +Our species occurrences are located in the California Floristic Province (far western USA). The "regions" dataset can be used to visualize the study area in geographic space. ```{r region data} regions <- system.file("external/regions.tif", package = "flexsdm") @@ -119,6 +117,7 @@ regions <- terra::rast(regions) ``` How are the points distributed across our study area? + ```{r map} try(plot(regions), silent=TRUE) points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species)) @@ -126,8 +125,8 @@ points(spp[, 2:3], pch = 19, cex = 0.5, col = as.factor(spp$species)) ## Calibration area -An important decision in SDM is how to delimit your model's calibration area, or the geographic space you will use to train your model(s). Choice of calibration area affects other modeling steps, including sampling pseudo-absence and background points, performance metrics, and the geographic patterns of habitat suitability. You would not want to train an SDM using the entire extent of the United States if you are interested in the geographic distribution and environmental controls of a rare plant species that is only found on mountaintops in the Sierra Nevada, California! - +An important decision in SDM is how to delimit your model's calibration area, or the geographic space you will use to train your model(s). Choice of calibration area affects other modeling steps, including sampling pseudo-absence and background points, performance metrics, and the geographic patterns of habitat suitability. You would not want to train an SDM using the entire extent of the United States if you are interested in the geographic distribution and environmental controls of a rare plant species that is only found on mountaintops in the Sierra Nevada, California! + Let's use presence locations for one species in this exercise. ```{r spp1} @@ -140,12 +139,11 @@ spp1 <- ``` - -The calib_area() function offers three methods for defining a calibration area: buffer, mcp, bmcp, and mask. We will briefly go over each. +The calib_area() function offers three methods for defining a calibration area: buffer, mcp, bmcp, and mask. We will briefly go over each. ### 1. Buffer -Here the calibration area is defined using buffers around presence points. User's can specify the distance around points using the "width" argument. The buffer width value is interpreted in m if the CRS has a longitude/latitude, or in map units in other cases. +Here the calibration area is defined using buffers around presence points. User's can specify the distance around points using the "width" argument. The buffer width value is interpreted in m if the CRS has a longitude/latitude, or in map units in other cases. ```{r buffer method} crs(regions, proj=TRUE) @@ -162,9 +160,9 @@ plot(ca_1, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5) ``` - ### 2. Minimum convex polygon -The minimum convex polygon (mcp) method produces a much simpler shape. + +The minimum convex polygon (mcp) method produces a much simpler shape. ```{r mcp method} ca_2 <- calib_area( @@ -181,7 +179,8 @@ points(spp1[, 2:3], pch = 19, cex = 0.5) ``` ### 3. Buffered minimum convex polygon -You can also create a buffer around the minimum convex polygon. + +You can also create a buffer around the minimum convex polygon. ```{r bmcp method} ca_3 <- calib_area( @@ -197,9 +196,9 @@ plot(ca_3, add = TRUE) points(spp1[, 2:3], pch = 19, cex = 0.5) ``` -### 4. Mask +### 4. Mask -The mask method allows polygons to be selected that intersect with your species locations to delineate the calibration area. This is useful if you expect species distributions to be associated with ecologically significant (and mapped) ecoregions, or are interested in distributions within political boundaries. We will use a random set of polygons named "clusters" to illustrate the mask method. The original polygons are on the left and the polygons that contain points (our "mask" calibration area) are on the right. +The mask method allows polygons to be selected that intersect with your species locations to delineate the calibration area. This is useful if you expect species distributions to be associated with ecologically significant (and mapped) ecoregions, or are interested in distributions within political boundaries. We will use a random set of polygons named "clusters" to illustrate the mask method. The original polygons are on the left and the polygons that contain points (our "mask" calibration area) are on the right. ```{r mask method} clusters <- system.file("external/clusters.shp", package = "flexsdm") @@ -221,7 +220,7 @@ points(spp1[, 2:3], pch = 19, cex = 0.5) ## Reducing collinearity among the predictors -Predictor collinearity is a common issue for SDMs, which can lead to model overfitting and inaccurate tests of significance for predictors (De Marco & Nóbrega, 2018; Dormann et al., 2013). +Predictor collinearity is a common issue for SDMs, which can lead to model overfitting and inaccurate tests of significance for predictors (De Marco & Nóbrega, 2018; Dormann et al., 2013). ### Environmental predictors @@ -238,17 +237,18 @@ plot(somevar) ``` The relationship between different environmental variables can be visualized with the pairs() function from the *terra* package. Several of our variables are highly correlated (.89 for predictors tmx and tmn). - + ```{r pairs plot} terra::pairs(somevar) ``` So how can we correct for or reduce this collinearity? The function correct_colinvar() has four methods to deal with collinearity: pearson, vif, pca, and fa. Each method returns 1) a raster object (SpatRaster) with the selected predictors and 2) other useful outputs relevant to each method. These functions should be used as supplementary tools, as predictor selection in SDMs is complicated and ultimately should be based on the relationship between the environment and species' biology. With that being said, these functions offer options for exploring the relationships between predictor variables that can aid in the predictor selection process. Let's look at each method: -### 1. Pearson correlation -This method returns three objects 1) SpatRaster with environmental variables with a correlation below a given threshold (the default is 0.7), 2) the names of the variables that had a correlation above the given threshold and were "removed" from the environmental data, and 3) a correlation matrix of all of the environmental variables. However, we strongly urge users to use this information along with knowledge about specific species-environment relationships to select ecologically-relevant predictors for their SDMs. For example, here, we are modeling the distribution of a plant species in a water-limited Mediterranean-type ecosystem, so we may want to include BOTH climatic water deficit (cwd) and actual evapotranspiration (aet). Despite being highly correlated, these variables capture water availability and evaporative demand, respectively (Stephenson 1998). Additionally, minimum absolute temperature strongly controls vegetation distributions (Woodward, Lomas, and Kelly 2004), so we would select tmn (minimum temperature of the coldest month) in this example. +### 1. Pearson correlation + +This method returns three objects 1) SpatRaster with environmental variables with a correlation below a given threshold (the default is 0.7), 2) the names of the variables that had a correlation above the given threshold and were "removed" from the environmental data, and 3) a correlation matrix of all of the environmental variables. However, we strongly urge users to use this information along with knowledge about specific species-environment relationships to select ecologically-relevant predictors for their SDMs. For example, here, we are modeling the distribution of a plant species in a water-limited Mediterranean-type ecosystem, so we may want to include BOTH climatic water deficit (cwd) and actual evapotranspiration (aet). Despite being highly correlated, these variables capture water availability and evaporative demand, respectively (Stephenson 1998). Additionally, minimum absolute temperature strongly controls vegetation distributions (Woodward, Lomas, and Kelly 2004), so we would select tmn (minimum temperature of the coldest month) in this example. -For references, see: +For references, see: #### 1. Stephenson, N. 1998. Actual evapotranspiration and deficit: biologically meaningful correlates of vegetation distribution across spatial scales. Journal of biogeography 25:855–870. @@ -263,7 +263,8 @@ chosen_variables <- somevar[[c('cwd','aet','tmn')]] ``` ### 2. Variance inflation factor -This method removes the predictors with a variance inflation factor higher than the chosen threshold. Again, users can specify a threshold (the default is 10). This method retains the predictors aet, tmx, and tmn and removes cwd. The output for this method matches what is produced by the pearson method: 1) environmental layer of retained variables, 2) a list of removed variables, and 3) a correlation matrix of all variables. + +This method removes the predictors with a variance inflation factor higher than the chosen threshold. Again, users can specify a threshold (the default is 10). This method retains the predictors aet, tmx, and tmn and removes cwd. The output for this method matches what is produced by the pearson method: 1) environmental layer of retained variables, 2) a list of removed variables, and 3) a correlation matrix of all variables. ```{r vif collinearity reduction} vif_var <- correct_colinvar(somevar, method = c("vif", th = "10")) @@ -272,8 +273,9 @@ vif_var$removed_variables vif_var$vif_table ``` -### 3. Principal component analysis -Finally, the “pca” method performs a principal components analysis on the predictors and returns the axis that accounts for 95% of the total variance in the system. This method returns 1) a SpatRaster object with selected environmental variables, 2) a matrix with the coefficients of principal components for predictors, and 3) a tibble with the cumulative variance explained in selected principal components. +### 3. Principal component analysis + +Finally, the “pca” method performs a principal components analysis on the predictors and returns the axis that accounts for 95% of the total variance in the system. This method returns 1) a SpatRaster object with selected environmental variables, 2) a matrix with the coefficients of principal components for predictors, and 3) a tibble with the cumulative variance explained in selected principal components. ```{r pca collinearity reduction} pca_var <- correct_colinvar(somevar, method = c("pca")) @@ -282,7 +284,8 @@ pca_var$coefficients pca_var$cumulative_variance ``` -### 4. Factorial analysis +### 4. Factorial analysis + Selecting the "fa" method performs a factorial analysis to reduce dimensionality and selects the predictor(s) with the highest correlation to each axis. The outputs for this method are similar to those produced by the 'pca' method. ```{r fa collinearity reduction, eval = FALSE} @@ -294,13 +297,17 @@ fa_var$uniqueness fa_var$loadings ``` +### 5. Comments + +In *flexsdm* it is also possible to restrict the cell used to perform collinearity reduction analysis to a geographical area smaller than the full extent of environmental variables. See the ‘restric_to_region’ and ‘restric_pca_proj’ in [correct_colinvar](https://sjevelazco.github.io/flexsdm/reference/correct_colinvar.html) and the examples with alternative PCA given in this function help. + ## Data filtering -Sample bias in species occurrence data is a common issue in ecological studies and filtering the occurrence data can reduce some of this bias. *flexsdm* provides two functions for different types of filtering, based on geographical or environmental "thinning", randomly removing points where they are dense (oversampling) in geographical or environmental space. This can improve model performance and reduce redundancy in your data. +Sample bias in species occurrence data is a common issue in ecological studies and filtering the occurrence data can reduce some of this bias. *flexsdm* provides two functions for different types of filtering, based on geographical or environmental "thinning", randomly removing points where they are dense (oversampling) in geographical or environmental space. This can improve model performance and reduce redundancy in your data. ### Environmental filtering -The function occfilt_env(), which performs environmental filtering on species occurrence data. This method basically reduces environmental redundancy in your data and is based on methods outlined in Valera et al. (2014). However, this function is unique to *flexsdm*, as it is able to use any number of environmental dimensions and does not perform a PCA before filtering. In this example, we will use our original environmental data (somevar) and occurrence data for a single species (spp1). For filtering occurrences, it is important that each row in the species data has its own unique code (example: idd). This function also gives the user the option of specifying the number classes used to split each environmental condition. Here we will explore the results using 5, 8, and 12 bins. Increasing the number of bins increases the number of occurrence points retained. +The function occfilt_env(), which performs environmental filtering on species occurrence data. This method basically reduces environmental redundancy in your data and is based on methods outlined in Valera et al. (2014). However, this function is unique to *flexsdm*, as it is able to use any number of environmental dimensions and does not perform a PCA before filtering. In this example, we will use our original environmental data (somevar) and occurrence data for a single species (spp1). For filtering occurrences, it is important that each row in the species data has its own unique code (example: idd). This function also gives the user the option of specifying the number classes used to split each environmental condition. Here we will explore the results using 5, 8, and 12 bins. Increasing the number of bins increases the number of occurrence points retained. ```{r env occurrence filtering} @@ -347,7 +354,7 @@ points(filt_env12 %>% select(x, y)) ### Geographical filtering -Next, we will look at occfilt_geo(), which has three alternatives to determine the distance threshold between a pair of points: “moran” determines the threshold as the distance between points that minimizes the spatial autocorrelation in occurrence data; “cellsize” filters occurrences based on the resolution of the predictors (or a specified coarser resolution); finally, “determined” allows users to manually determine the distance threshold. +Next, we will look at occfilt_geo(), which has three alternatives to determine the distance threshold between a pair of points: “moran” determines the threshold as the distance between points that minimizes the spatial autocorrelation in occurrence data; “cellsize” filters occurrences based on the resolution of the predictors (or a specified coarser resolution); finally, “determined” allows users to manually determine the distance threshold. ```{r geo occurrence filtering} @@ -391,12 +398,14 @@ points(filt_geo3 %>% select(x, y)) ``` ## Data partitioning -Data partitioning, or splitting data into testing and training groups, is a key step in building SDMs. *flexsdm* offers multiple options for data partitioning, including part_random(), part_sband(), part_sblock(), and part_senv(). Let's explore each of these methods. + +Data partitioning, or splitting data into testing and training groups, is a key step in building SDMs. *flexsdm* offers multiple options for data partitioning, including part_random(), part_sband(), part_sblock(), and part_senv(). Let's explore each of these methods. ### 1. Conventional data partitioning methods (part_random) -The part_random() function provides users the ability to divide species occurrence data based on conventional partition methods including k-folds, repeated k-folds, leave-one-out cross-validation, and bootstrap partitioning. -Here, we use the "kfold" method with 10 folds to divide our data. This results in 10 folds of occurrence data with 25 observations in each fold. +The part_random() function provides users the ability to divide species occurrence data based on conventional partition methods including k-folds, repeated k-folds, leave-one-out cross-validation, and bootstrap partitioning. + +Here, we use the "kfold" method with 10 folds to divide our data. This results in 10 folds of occurrence data with 25 observations in each fold. ```{r random partitioning kfolds} spp1$pr_ab <- 1 # Add a column with 1 to denote that this is presences only data @@ -412,7 +421,7 @@ sp_part1$.part %>% table() ### 2. Spatial band cross-validation (part_sband) -Both part_sband() and part_sblock() partition data based on their position in geographic space. Geographically structured data partitioning methods are especially useful if users want to evaluate model transferability to different regions or time periods. The function part_sband tests for different numbers of spatial partitions using latitudinal or longitudinal bands and selects the best number of bands for a given presence, presence-absence, or presence-background dataset. This procedure is based on spatial autocorrelation, environmental similarity, and the number of presence/absence records in each band partition. The function's output includes 1) a tibble with presence/absence locations and the assigned partition number, 2) a tibble with information about the best partition, and 3) a SpatRaster showing the selected grid. +Both part_sband() and part_sblock() partition data based on their position in geographic space. Geographically structured data partitioning methods are especially useful if users want to evaluate model transferability to different regions or time periods. The function part_sband tests for different numbers of spatial partitions using latitudinal or longitudinal bands and selects the best number of bands for a given presence, presence-absence, or presence-background dataset. This procedure is based on spatial autocorrelation, environmental similarity, and the number of presence/absence records in each band partition. The function's output includes 1) a tibble with presence/absence locations and the assigned partition number, 2) a tibble with information about the best partition, and 3) a SpatRaster showing the selected grid. ```{r spatial band partition} set.seed(1) @@ -441,7 +450,7 @@ points(sp_part2$part[c("x", "y")], ### 3. Spatial block cross-validation (part_sblock) -The part_sblock() function is very similar to part_sband() but instead of bands it explores spatial blocks with different raster cells sizes and returns the one that is best suited for the input dataset. Here, we can see the data divided into different "blocks" for training and testing. +The part_sblock() function is very similar to part_sband() but instead of bands it explores spatial blocks with different raster cells sizes and returns the one that is best suited for the input dataset. Here, we can see the data divided into different "blocks" for training and testing. ```{r spatial block partition} @@ -466,8 +475,7 @@ points(sp_part3$part[c("x", "y")], ) ``` -However, we notice that the grid partition produced by part_sblock has a different resolution than the original environmental variables. If you want a map layer with the same properties (i.e. resolution, extent, NAs) as your original environmental -variables, apply the get_block() function to the grid resulting from part_sblock(). This layer can be really useful for generating pseudo-absence or background sample points, which we will explore in the next section. +However, we notice that the grid partition produced by part_sblock has a different resolution than the original environmental variables. If you want a map layer with the same properties (i.e. resolution, extent, NAs) as your original environmental variables, apply the get_block() function to the grid resulting from part_sblock(). This layer can be really useful for generating pseudo-absence or background sample points, which we will explore in the next section. ```{r get block function} terra::res(sp_part3$grid) @@ -511,12 +519,11 @@ points(sp_part4$part[c("x", "y")], ## Background and pseudo-absence sampling -Presence-only occurrence data are quite common in ecology and researchers may not have adequate "absence" data for their species of interest. Sometimes in building species distribution models, we need to be able to generate background or pseudo-absence points for the modeling goals. The *flexsdm* package allows users to do this using sample_background() and sample_pseudoabs(). - +Presence-only occurrence data are quite common in ecology and researchers may not have adequate "absence" data for their species of interest. Sometimes in building species distribution models, we need to be able to generate background or pseudo-absence points for the modeling goals. The *flexsdm* package allows users to do this using sample_background() and sample_pseudoabs(). ### 1. Sample background -The function sample_background() allows slection of background sample points based on different geographic restrictions and sampling methods. Here, we sample a set of background points based on our earlier spatial block partitioning using the "random" method. Using lapply() in this case ensures that we generate background points in each of our spatial blocks (n = 2). We are also specifying that we want ten times the amount of background points as our original occurrences and that our calibration area will be the buffer area around presence points (see section on "Calibration area"). +The function sample_background() allows slection of background sample points based on different geographic restrictions and sampling methods. Here, we sample a set of background points based on our earlier spatial block partitioning using the "random" method. Using lapply() in this case ensures that we generate background points in each of our spatial blocks (n = 2). We are also specifying that we want ten times the amount of background points as our original occurrences and that our calibration area will be the buffer area around presence points (see section on "Calibration area"). ```{r sample background} @@ -553,7 +560,7 @@ points(bg, cex = .1, pch = 19) ### 2. Sample pseudo-absences -Similarly, the function sample_pseudoabs allows random pseudo-absence sampling or based on environmental and/or geographical constraints. For example, specifying method = "env_const" selects pseudo-absences that are environmentally constrained to regions with lower suitability values as predicted by a Bioclim model. Additionally, this function allows users to specify a calibration area from which to generate pseudo-absence points. Here, we will use the buffer area around presence points (ca_1) to show what this might look like. As you can see, we have generated pseudo-absence points that are in the general vicinity of our presence points, but are concentrated in areas that have lower environmental suitability. The specific method chosen for sampling background and/or pseudo-absence points will vary depending on research goals. +Similarly, the function sample_pseudoabs allows random pseudo-absence sampling or based on environmental and/or geographical constraints. For example, specifying method = "env_const" selects pseudo-absences that are environmentally constrained to regions with lower suitability values as predicted by a Bioclim model. Additionally, this function allows users to specify a calibration area from which to generate pseudo-absence points. Here, we will use the buffer area around presence points (ca_1) to show what this might look like. As you can see, we have generated pseudo-absence points that are in the general vicinity of our presence points, but are concentrated in areas that have lower environmental suitability. The specific method chosen for sampling background and/or pseudo-absence points will vary depending on research goals. ```{r sample pseudo-absences} @@ -585,9 +592,9 @@ plot(ca_1, add = TRUE) points(psa, cex = .7, pch = 19) ``` -## Extracting environmental values +## Extracting environmental values -Finally, before modeling species geographic distributions, we must extract environmental data at the presences + absences/pseudo-absences/background point locations. The function sdm_extract() extracts environmental data values based on x and y coordinates and returns a tibble with the original data + additional columns for the extracted environmental variables at those locations. Let's do this for our original presence points (spp1) and our background locations (bg). +Finally, before modeling species geographic distributions, we must extract environmental data at the presences + absences/pseudo-absences/background point locations. The function sdm_extract() extracts environmental data values based on x and y coordinates and returns a tibble with the original data + additional columns for the extracted environmental variables at those locations. Let's do this for our original presence points (spp1) and our background locations (bg). ```{r sdm extract, eval = FALSE}