switching to native pipe

ebird · Dec 1, 2023 · b0ef1cb · b0ef1cb
1 parent 420d893
commit b0ef1cb
Show file tree

Hide file tree

Showing 47 changed files with 177,018 additions and 18,195 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,6 @@
 *cache/
 data-figures-prep/
 data-raw/ebd_*
+workshop/data-raw/ebd_*
+workshop/ebird-downloads/
+workshop/*.zip
diff --git a/abundance.qmd b/abundance.qmd
@@ -32,14 +32,14 @@ library(sf)
 library(terra)
 library(tidyr)
 
-# set random number seed to insure fully repeatable results
+# set random number seed for reproducibility
 set.seed(1)
 
 # environmental variables: landcover and elevation
 env_vars <- read_csv("data/environmental-variables_checklists_jun_us-ga.csv")
 
 # zero-filled ebird data combined with environmental data
-checklists <- read_csv("data/checklists-zf_woothr_jun_us-ga.csv") %>% 
+checklists <- read_csv("data/checklists-zf_woothr_jun_us-ga.csv") |> 
   inner_join(env_vars, by = "checklist_id")
 
 # prediction grid
@@ -50,18 +50,18 @@ r <- rast("data/prediction-grid_us-ga.tif")
 crs <- st_crs(r)
 
 # load gis data for making maps
-study_region <- read_sf("data/gis-data.gpkg", "ne_states") %>% 
-  filter(state_code == "US-GA") %>% 
-  st_transform(crs = crs) %>% 
+study_region <- read_sf("data/gis-data.gpkg", "ne_states") |> 
+  filter(state_code == "US-GA") |> 
+  st_transform(crs = crs) |> 
   st_geometry()
-ne_land <- read_sf("data/gis-data.gpkg", "ne_land") %>% 
-  st_transform(crs = crs) %>% 
+ne_land <- read_sf("data/gis-data.gpkg", "ne_land") |> 
+  st_transform(crs = crs) |> 
   st_geometry()
-ne_country_lines <- read_sf("data/gis-data.gpkg", "ne_country_lines") %>% 
-  st_transform(crs = crs) %>% 
+ne_country_lines <- read_sf("data/gis-data.gpkg", "ne_country_lines") |> 
+  st_transform(crs = crs) |> 
   st_geometry()
-ne_state_lines <- read_sf("data/gis-data.gpkg", "ne_state_lines") %>% 
-  st_transform(crs = crs) %>% 
+ne_state_lines <- read_sf("data/gis-data.gpkg", "ne_state_lines") |> 
+  st_transform(crs = crs) |> 
   st_geometry()
 ```
 
@@ -80,8 +80,8 @@ Finally, we'll remove the 20% of checklists held aside for testing and select on
 
 ```{r}
 #| label: encounter-data-train
-checklists_train <- checklists_ss %>% 
-  filter(type == "train") %>% 
+checklists_train <- checklists_ss |> 
+  filter(type == "train") |> 
   # select only the columns to be used in the model
   select(species_observed, observation_count,
          year, day_of_year, hours_of_day,
@@ -138,9 +138,9 @@ For the second step, we train a random forests model to estimate the expected co
 train_count <- checklists_train
 train_count$pred_er <- er_model$predictions[, 2]
 # subset to only observed or predicted detections
-train_count <- train_count %>% 
+train_count <- train_count |> 
   filter(!is.na(observation_count),
-         observation_count > 0 | pred_er > threshold) %>% 
+         observation_count > 0 | pred_er > threshold) |> 
   select(-species_observed, -pred_er)
 ```
 
@@ -170,8 +170,8 @@ In the @sec-encounter-rf-assess we calculated a suite of predictive performance
 ```{r}
 #| label: abundance-assess-estimate
 # get the test set held out from training
-checklists_test <- filter(checklists_ss, type == "test") %>% 
-  mutate(species_observed = as.integer(species_observed)) %>% 
+checklists_test <- filter(checklists_ss, type == "test") |> 
+  mutate(species_observed = as.integer(species_observed)) |> 
   filter(!is.na(observation_count))
 
 # estimate encounter rate for test data
@@ -183,7 +183,7 @@ pred_binary <- as.integer(pred_er > threshold)
 # calibrate
 pred_calibrated <- predict(calibration_model, 
                            newdata = data.frame(pred = pred_er), 
-                           type = "response") %>% 
+                           type = "response") |> 
   as.numeric()
 # constrain probabilities to 0-1
 pred_calibrated[pred_calibrated < 0] <- 0
@@ -253,7 +253,7 @@ Just as we did in the @sec-encounter-predict for encounter rate, we can estimate
 
 ```{r}
 #| label: abundance-predict-grid
-pred_grid_eff <- pred_grid %>% 
+pred_grid_eff <- pred_grid |> 
   mutate(observation_date = ymd("2023-06-15"),
          year = year(observation_date),
          day_of_year = yday(observation_date),
@@ -277,7 +277,7 @@ pred_binary <- as.integer(pred_er > threshold)
 # apply calibration
 pred_er_cal <- predict(calibration_model, 
                        data.frame(pred = pred_er), 
-                       type = "response") %>% 
+                       type = "response") |> 
   as.numeric()
 # constrain to 0-1
 pred_er_cal[pred_er_cal < 0] <- 0
@@ -307,13 +307,12 @@ predictions$abundance <- predictions$encounter_rate * predictions$count
 
 # rasterize
 layers <- c("in_range", "encounter_rate", "count", "abundance")
-r_pred <- predictions %>% 
+r_pred <- predictions |> 
   # convert to spatial features
-  st_as_sf(coords = c("x", "y"), crs = crs) %>% 
-  select(all_of(layers)) %>% 
+  st_as_sf(coords = c("x", "y"), crs = crs) |> 
+  select(all_of(layers)) |> 
   # rasterize
-  rasterize(r, field = layers, fun = "mean") %>% 
-  setNames(layers)
+  rasterize(r, field = layers)
 print(r_pred)
 ```
 
@@ -324,27 +323,24 @@ Prior to mapping the relative abundance, we'll multiple by the `in_range` layer,
 ```{r}
 #| label: abundance-predict-map
 #| fig.asp: 1.15
-# in range abundance
-r_plot <- r_pred[["abundance"]] * r_pred[["in_range"]]
-
 par(mar = c(4, 0.25, 0.25, 0.25))
 # set up plot area
 plot(study_region, col = NA, border = NA)
 plot(ne_land, col = "#cfcfcf", border = "#888888", lwd = 0.5, add = TRUE)
 
 # define quantile breaks, excluding zeros
-brks <- ifel(r_plot > 0, r_plot, NA) %>% 
+brks <- ifel(r_pred[["abundance"]] > 0, r_pred[["abundance"]], NA) |> 
   global(fun = quantile, 
-         probs = seq(0, 1, 0.1), na.rm = TRUE) %>% 
-  as.numeric() %>% 
+         probs = seq(0, 1, 0.1), na.rm = TRUE) |> 
+  as.numeric() |> 
   unique()
 # label the bottom, middle, and top value
 lbls <- round(c(min(brks), median(brks), max(brks)), 2)
 # ebird status and trends color palette
 pal <- ebirdst_palettes(length(brks) - 1)
-plot(r_plot, 
+plot(r_pred[["abundance"]], 
      col = c("#e6e6e6", pal), breaks = c(0, brks), 
-     maxpixels = ncell(r_plot),
+     maxpixels = ncell(r_pred),
      legend = FALSE, axes = FALSE, bty = "n",
      add = TRUE)
 

diff --git a/data-raw/ebird-best-practices-data.zip b/data-raw/ebird-best-practices-data.zip