Skip to content

Commit

Permalink
switching to native pipe
Browse files Browse the repository at this point in the history
  • Loading branch information
mstrimas committed Dec 1, 2023
1 parent 420d893 commit b0ef1cb
Show file tree
Hide file tree
Showing 47 changed files with 177,018 additions and 18,195 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
*cache/
data-figures-prep/
data-raw/ebd_*
workshop/data-raw/ebd_*
workshop/ebird-downloads/
workshop/*.zip
62 changes: 29 additions & 33 deletions abundance.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ library(sf)
library(terra)
library(tidyr)
# set random number seed to insure fully repeatable results
# set random number seed for reproducibility
set.seed(1)
# environmental variables: landcover and elevation
env_vars <- read_csv("data/environmental-variables_checklists_jun_us-ga.csv")
# zero-filled ebird data combined with environmental data
checklists <- read_csv("data/checklists-zf_woothr_jun_us-ga.csv") %>%
checklists <- read_csv("data/checklists-zf_woothr_jun_us-ga.csv") |>
inner_join(env_vars, by = "checklist_id")
# prediction grid
Expand All @@ -50,18 +50,18 @@ r <- rast("data/prediction-grid_us-ga.tif")
crs <- st_crs(r)
# load gis data for making maps
study_region <- read_sf("data/gis-data.gpkg", "ne_states") %>%
filter(state_code == "US-GA") %>%
st_transform(crs = crs) %>%
study_region <- read_sf("data/gis-data.gpkg", "ne_states") |>
filter(state_code == "US-GA") |>
st_transform(crs = crs) |>
st_geometry()
ne_land <- read_sf("data/gis-data.gpkg", "ne_land") %>%
st_transform(crs = crs) %>%
ne_land <- read_sf("data/gis-data.gpkg", "ne_land") |>
st_transform(crs = crs) |>
st_geometry()
ne_country_lines <- read_sf("data/gis-data.gpkg", "ne_country_lines") %>%
st_transform(crs = crs) %>%
ne_country_lines <- read_sf("data/gis-data.gpkg", "ne_country_lines") |>
st_transform(crs = crs) |>
st_geometry()
ne_state_lines <- read_sf("data/gis-data.gpkg", "ne_state_lines") %>%
st_transform(crs = crs) %>%
ne_state_lines <- read_sf("data/gis-data.gpkg", "ne_state_lines") |>
st_transform(crs = crs) |>
st_geometry()
```

Expand All @@ -80,8 +80,8 @@ Finally, we'll remove the 20% of checklists held aside for testing and select on

```{r}
#| label: encounter-data-train
checklists_train <- checklists_ss %>%
filter(type == "train") %>%
checklists_train <- checklists_ss |>
filter(type == "train") |>
# select only the columns to be used in the model
select(species_observed, observation_count,
year, day_of_year, hours_of_day,
Expand Down Expand Up @@ -138,9 +138,9 @@ For the second step, we train a random forests model to estimate the expected co
train_count <- checklists_train
train_count$pred_er <- er_model$predictions[, 2]
# subset to only observed or predicted detections
train_count <- train_count %>%
train_count <- train_count |>
filter(!is.na(observation_count),
observation_count > 0 | pred_er > threshold) %>%
observation_count > 0 | pred_er > threshold) |>
select(-species_observed, -pred_er)
```

Expand Down Expand Up @@ -170,8 +170,8 @@ In the @sec-encounter-rf-assess we calculated a suite of predictive performance
```{r}
#| label: abundance-assess-estimate
# get the test set held out from training
checklists_test <- filter(checklists_ss, type == "test") %>%
mutate(species_observed = as.integer(species_observed)) %>%
checklists_test <- filter(checklists_ss, type == "test") |>
mutate(species_observed = as.integer(species_observed)) |>
filter(!is.na(observation_count))
# estimate encounter rate for test data
Expand All @@ -183,7 +183,7 @@ pred_binary <- as.integer(pred_er > threshold)
# calibrate
pred_calibrated <- predict(calibration_model,
newdata = data.frame(pred = pred_er),
type = "response") %>%
type = "response") |>
as.numeric()
# constrain probabilities to 0-1
pred_calibrated[pred_calibrated < 0] <- 0
Expand Down Expand Up @@ -253,7 +253,7 @@ Just as we did in the @sec-encounter-predict for encounter rate, we can estimate

```{r}
#| label: abundance-predict-grid
pred_grid_eff <- pred_grid %>%
pred_grid_eff <- pred_grid |>
mutate(observation_date = ymd("2023-06-15"),
year = year(observation_date),
day_of_year = yday(observation_date),
Expand All @@ -277,7 +277,7 @@ pred_binary <- as.integer(pred_er > threshold)
# apply calibration
pred_er_cal <- predict(calibration_model,
data.frame(pred = pred_er),
type = "response") %>%
type = "response") |>
as.numeric()
# constrain to 0-1
pred_er_cal[pred_er_cal < 0] <- 0
Expand Down Expand Up @@ -307,13 +307,12 @@ predictions$abundance <- predictions$encounter_rate * predictions$count
# rasterize
layers <- c("in_range", "encounter_rate", "count", "abundance")
r_pred <- predictions %>%
r_pred <- predictions |>
# convert to spatial features
st_as_sf(coords = c("x", "y"), crs = crs) %>%
select(all_of(layers)) %>%
st_as_sf(coords = c("x", "y"), crs = crs) |>
select(all_of(layers)) |>
# rasterize
rasterize(r, field = layers, fun = "mean") %>%
setNames(layers)
rasterize(r, field = layers)
print(r_pred)
```

Expand All @@ -324,27 +323,24 @@ Prior to mapping the relative abundance, we'll multiple by the `in_range` layer,
```{r}
#| label: abundance-predict-map
#| fig.asp: 1.15
# in range abundance
r_plot <- r_pred[["abundance"]] * r_pred[["in_range"]]
par(mar = c(4, 0.25, 0.25, 0.25))
# set up plot area
plot(study_region, col = NA, border = NA)
plot(ne_land, col = "#cfcfcf", border = "#888888", lwd = 0.5, add = TRUE)
# define quantile breaks, excluding zeros
brks <- ifel(r_plot > 0, r_plot, NA) %>%
brks <- ifel(r_pred[["abundance"]] > 0, r_pred[["abundance"]], NA) |>
global(fun = quantile,
probs = seq(0, 1, 0.1), na.rm = TRUE) %>%
as.numeric() %>%
probs = seq(0, 1, 0.1), na.rm = TRUE) |>
as.numeric() |>
unique()
# label the bottom, middle, and top value
lbls <- round(c(min(brks), median(brks), max(brks)), 2)
# ebird status and trends color palette
pal <- ebirdst_palettes(length(brks) - 1)
plot(r_plot,
plot(r_pred[["abundance"]],
col = c("#e6e6e6", pal), breaks = c(0, brks),
maxpixels = ncell(r_plot),
maxpixels = ncell(r_pred),
legend = FALSE, axes = FALSE, bty = "n",
add = TRUE)
Expand Down
Binary file modified data-raw/ebird-best-practices-data.zip
Binary file not shown.
Loading

0 comments on commit b0ef1cb

Please sign in to comment.