-
Notifications
You must be signed in to change notification settings - Fork 0
/
Corals.R
812 lines (655 loc) · 45.5 KB
/
Corals.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
############################################################
### CORAL DASHBOARD DATA IMPORT, FORMAT AND CALCULATIONS ###
############################################################
### LOAD PACKAGES ----
# Look for packages on local machine, install if necessary, then load all
pkgList_pre <- c("magrittr",
"vroom",
"plyr",
"lubridate",
"ggpubr", # to get legends
"tidyselect", # for where() function
"purrr", # to map functions to elements
"tidyverse") # broom, units
inst_pre <- pkgList_pre %in% installed.packages()
if (length(pkgList_pre[!inst_pre]) > 0) install.packages(pkgList_pre[!inst_pre],dep=TRUE)
lapply(pkgList_pre, library, character.only = TRUE)
### FUNCTIONS ----
FuncWarnings <- function(dat, disease_dat, colcounts_dat) {
warn_list <- sapply(c("CertLevel", "AltPurp", "UNKTaxon", "BleachCode", "TransCount", "TaxonCountDiv", "TaxonCount", "EquipShadowCount", "UNKCount", "UNKDisease", "DiseaseColLT4CM", "UnmatchedDiseaseID", "UnmatchedColCountsID"), function(x) NULL)
dat_accepted <- round(100* sum(dat$VideoCertificationLevel == "Accepted")/ nrow(dat))
disease_accepted <- round(100* sum(disease_dat$DiseaseCertificationLevel == "Accepted")/nrow(disease_dat))
colcounts_accepted <- round(100* sum(colcounts_dat$ColCountsCertificationLevel == "Accepted")/nrow(colcounts_dat))
warn_list$CertLevel <- data.frame(Data = c("coral video summary", "disease", "colony counts"), Accepted = c(dat_accepted, disease_accepted, colcounts_accepted))
warn_list$AltPurp <- dat %>%
dplyr::arrange(SurvDate) %>%
dplyr::select(TripName, Purpose) %>%
dplyr::filter(!Purpose %in% c("Annual", "Episodic")) %>%
distinct() %>%
dplyr::rename("Trip" = TripName)
warn_list$UNKTaxon <- dat %>%
dplyr::select(TransectSurveyID, Taxon) %>%
dplyr::filter(Taxon == "UNK") %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID")
warn_list$BleachCode <- dat %>%
dplyr::select(TransectSurveyID, Category, BleachingCode) %>%
dplyr::filter(Category != "CORAL" & !is.na(BleachingCode)) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID")
warn_list$TransCount <- dat %>%
dplyr::select(Site, Transect, SurvDate, Purpose) %>%
distinct() %>%
dplyr::group_by(Site, SurvDate, Purpose) %>%
dplyr::arrange(Site, SurvDate, Purpose) %>%
dplyr::summarize(NumTransects = n(), .groups = "drop") %>%
dplyr::filter(Purpose %in% c("Annual", "Episodic") & !NumTransects %in% c(4, 20)) %>% # 'off' transect counts only matters for annual or episodic surveys
dplyr::mutate(SurvDate = lubridate::ymd(SurvDate)) %>% # renderTable does not play nice with dates--need to format as character
dplyr::rename("Survey Date" = "SurvDate")
Err_counts <- dat %>%
dplyr::group_by(TransectSurveyID) %>%
dplyr::summarize(TotPoints = sum(CountOfTaxon, na.rm=TRUE),
EquipShadowPoints = sum(CountOfTaxon[Category %in% c("EQUIP", "SHADOW")]),
UNKPoints = sum(CountOfTaxon[Category %in% c("UNK", "UNKNOWN") | is.na(Category)]), .groups = "drop")
warn_list$TaxonCountDiv <- Err_counts %>%
dplyr::mutate(ModOut = TotPoints %% 10) %>%
dplyr::filter(ModOut != 0) %>%
dplyr::select(TransectSurveyID, TotPoints) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID", "Total Points" = TotPoints)
warn_list$TaxonCount <- Err_counts %>%
dplyr::filter(TotPoints < 200 | TotPoints > 480) %>%
dplyr::select(TransectSurveyID, TotPoints) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID", "Total Points" = TotPoints)
warn_list$EquipShadowCount <- Err_counts %>%
dplyr::mutate(PercEquipShadow = (EquipShadowPoints / TotPoints)*100) %>%
dplyr::filter(PercEquipShadow > 5.0) %>%
dplyr::select(TransectSurveyID, PercEquipShadow) %>%
dplyr::arrange(desc(PercEquipShadow)) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID", "% Equip or Shadow" = PercEquipShadow)
warn_list$UNKCount <- Err_counts %>%
dplyr::mutate(PercUNK = (UNKPoints / TotPoints)*100) %>%
dplyr::filter(PercUNK > 5.0) %>%
dplyr::select(TransectSurveyID, PercUNK) %>%
dplyr::arrange(desc(PercUNK)) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID", "% Unknown" = PercUNK)
warn_list$UNKDisease <- disease_dat %>%
dplyr::select(TransectSurveyID, DiseaseCode, Taxon, DiseaseCertificationLevel) %>%
dplyr::filter(DiseaseCode == "UNK") %>%
dplyr::arrange(TransectSurveyID) %>%
dplyr::rename("Site_Transect_SurveyDate" = "TransectSurveyID")
warn_list$DiseaseColLT4CM <- disease_dat %>%
dplyr::select(TransectSurveyID, DiseaseCode, Taxon, ColonyLT4CM, DiseaseCertificationLevel) %>%
dplyr::filter(ColonyLT4CM == TRUE)
warn_list$UnmatchedDiseaseID <- data.frame(
`Unmatched_Site_Transect_SurveyDate` = setdiff(unique(disease_dat$TransectSurveyID), unique(dat$TransectSurveyID))
)
warn_list$UnmatchedColCountsID <- data.frame(
`Unmatched_Site_Transect_SurveyDate` = setdiff(unique(colcounts_dat$TransectSurveyID), unique(dat$TransectSurveyID))
)
return(warn_list)
}
FuncPullQuant <- function() {
# Function to calculate bootstrap quantiles from long-format data and format output with one col per quantile
purrr::map(
c(0.25, 0.75, 0.16, 0.84, 0.05, 0.95),
~purrr::partial(quantile, probs = .x, na.rm = TRUE)) %>% # apply quantile function to each specified probability, resulting format has a separate col for each quantile (instead of a separate row for each quantile)
set_names(nm = c("50%CI_low", "50%CI_high", "68%CI_low(1SE)", "68%CI_high(1SE)", "90%CI_low", "90%CI_high"))
}
FuncBootSamples <- function(dat, start_col, n_bootsamples = 100000) {
# Function to generate parametric bootstrap samples for each row of dat
#
# Args:
# dat = data frame of the original data to bootstrap, with the counts spread across cols
# start_col = starting column number for the count data
# n_bootsamples = number of parametric bootstrap samples to generate per row of dat
#
# Returns:
# Data frame of samples from parametric bootstrapping
# Convert to % cover by row
dat[, start_col:ncol(dat)] <- dat[, start_col:ncol(dat)]/dat$AdjTot
# Create list of parametric bootstrap samples, each list element is a data frame of samples for one row of dat
boot_samples_list <- apply(data.frame(dat), 1, FUN = function(z) { # for each row
prob_vec <- z[start_col:length(z)][!is.na(z[start_col:length(z)])] # probability vector w/o NA's
boot_out <- rmultinom(n = n_bootsamples, size = as.numeric(z["AdjTot"]), prob = prob_vec) # generate parametric bootstrap samples of "hits" for all the taxa
boot_df <- as.data.frame(cbind(data.frame(t(z[1:(start_col-1)]), row.names = NULL), t(boot_out)))
boot_df
})
boot_samples_df <- plyr::rbind.fill(boot_samples_list) # combine list elements in a single data frame
boot_samples_df[start_col:ncol(boot_samples_df)] <- sapply(boot_samples_df[start_col:ncol(boot_samples_df)], as.numeric) # convert cols to numeric
return(boot_samples_df)
}
FuncBootDraws <- function(boot_dat, actual_dat, start_col, groups_df, combos_df, key_col, n_bootreps = 5000) {
# Function to calculate bootstrap CI's for % cover and relative % cover, by site-survey (for intensive sites)
#
# Args:
# boot_dat = data frame of the parametric bootstrap samples for all transects of a single site-survey.
# actual_dat = data frame of the original count data used to generate parametric bootstrap samples.
# start_col = starting column number for the count data
# groups_df = data frame assigning each items to higher level groups
# combos_df = data frame of the numerator and denominator group combinations to calculate CI's for
# key_col = name of column of smallest grouping level
# n_bootreps = number of bootstrap reps to calculate CI's over
#
# Returns:
# A data frame with bootstrapped CI's for % cover and relative % cover.
#
# NOTE: Not estimating CI's for trend -- just individually by site-survey.
n_transects <- nrow(actual_dat) # number of transects surveyed in that site-survey
samps <- boot_dat %>%
dplyr::slice_sample(n = n_bootreps * n_transects, replace = TRUE) %>% # randomly sample from the parametric bootstrap samples
as.data.frame() # can't add repeating vector column to tibble
samps[, start_col:ncol(samps)][is.na(samps[, start_col:ncol(samps)])] <- 0 # replace NA's with zero
samps$BootRep = rep(1:n_bootreps, each = n_transects) # add col of bootstrap replicate number; each replicate has same number of transects as original data for the site-survey
# Add the actual data so actual % cover and relative % cover can be simultaneously calculated for each combination of numerator and denominator groups/levels. BootRep 0 is the actual data and will be excluded for CI estimates
samps_df <- plyr::rbind.fill(samps, actual_dat %>% dplyr::mutate(BootRep = 0)) %>%
tibble::rowid_to_column(var = "RowID") # after converting to long format, still need to be able to group data by row at times
# This is the master data frame from which to calculate % cover at different grouping levels.
samps_master <- samps_df %>%
tidyr::gather(key = !!key_col, value = "Count", -Site, -SurvDate, -AdjTot, -BootRep, -RowID, -TransectSurveyID) %>% # convert to long format
dplyr::left_join(groups_df, by = key_col)
# Calculate % cover and RELATIVE % cover for certain combinations of numerator and denominator groups/levels
boot_CIs_list <- apply(combos_df, 1, FUN = function(x) {
tmp <- samps_master %>%
dplyr::mutate(NumerLevel = get(x[["NumerGroup"]])) %>%
dplyr::filter(if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) Category %in% unlist(x[["DenomGroup"]]) else TRUE) %>% # filter for specified Categories unless DenomGroup == "AdjTot"
dplyr::group_by(BootRep, TransectSurveyID, RowID, Category, NumerLevel) %>%
dplyr::summarize(GroupCount = sum(Count), .groups = "drop")
# For % cover, calculate per transect (RowID) and then take mean across all transects in the BootRep. The adjusted total count (denominator value) for any bootstrapped transect is the same as for the original data used to generate parametric bootstrap samples
tmp2 <- tmp %>%
dplyr::left_join(actual_dat[c("TransectSurveyID", "AdjTot")], by = "TransectSurveyID") %>%
dplyr::mutate(PercCov = 100*(GroupCount / AdjTot)) %>%
dplyr::group_by(BootRep, Category, NumerLevel) %>% # grouping by Category just to keep the column in the output
dplyr::summarize(EstimCov = mean(PercCov), .groups = "drop")
# Extract the means calculated for the original data (BootRep = 0) so can be added to the final data frame
actual_df <- subset(tmp2, BootRep == 0) %>%
dplyr::select(-BootRep)
# Calculate % cover CI's
out_df <- tmp2 %>%
dplyr::filter(BootRep != 0) %>% # remove the estimates from the actual data
dplyr::group_by(Category, NumerLevel) %>%
dplyr::summarize_at(vars(EstimCov), FuncPullQuant(), .groups = "drop") %>%
dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "TransectCount") %>%
dplyr::left_join(actual_df, by = c("Category","NumerLevel")) # add in the mean % cov from actual data
# # If denominator includes categories, add any missing zero-items
# if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) {
# perccov_template <- groups_df %>%
# dplyr::select(NumerLevel = x[["NumerGroup"]], Category) %>%
# dplyr::filter(Category %in% x[["DenomGroup"]]) %>%
# dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "TransectCount") %>%
# dplyr::distinct()
#
# out_df %<>%
# right_join(perccov_template, by = c("Category", "NumerLevel", "NumerGroup", "DenomGroup"))
#
# }
# For relative % cover, sum(numerator across all transects)/sum(denom across all transects)
if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) {
categtot_df <- tmp %>%
dplyr::group_by(BootRep, Category) %>%
dplyr::summarize(CategTot = sum(GroupCount), .groups = "drop")
relcov_df <- tmp %>%
dplyr::group_by(BootRep, Category, NumerLevel) %>%
dplyr::summarize(SampGroupCount = sum(GroupCount), .groups = "drop") %>%
dplyr::left_join(categtot_df, by = c("BootRep", "Category")) %>%
dplyr::mutate(EstimCov = 100*(SampGroupCount/CategTot))
# Extract the means calculated for the original data (BootRep = 0) so can be added to the final data frame
actual_relcov_df <- subset(relcov_df, BootRep == 0) %>%
dplyr::select(Category, NumerLevel, EstimCov)
# Calculate % cover CI's
relcov_df %<>%
dplyr::filter(BootRep != 0) %>% # remove the estimates from the actual data
dplyr::group_by(Category, NumerLevel) %>%
dplyr::summarize_at(vars(EstimCov), FuncPullQuant(), .groups = "drop") %>%
dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "CategoryCount") %>%
dplyr::left_join(actual_relcov_df, by = c("Category", "NumerLevel")) # add in the mean % cov from actual data
# # Add any missing zero-items
# relcov_template <- groups_df %>%
# dplyr::select(NumerLevel = x[["NumerGroup"]], Category) %>%
# dplyr::filter(Category %in% x[["DenomGroup"]]) %>%
# dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "CategoryCount") %>%
# dplyr::distinct()
#
# relcov_df %<>%
# right_join(relcov_template, by = c("Category", "NumerLevel", "NumerGroup", "DenomGroup"))
out_df <- rbind(out_df, relcov_df)
}
out_df %<>%
dplyr::mutate_if(is.numeric, round, 2) %>% # calculate quantiles and output in nice format
dplyr::mutate(N = n_transects) %>%
dplyr::select(Category, NumerGroup, NumerLevel, DenomGroup, N, EstimCov, everything())
out_df$EstimCov[is.na(out_df$EstimCov)] <- 0
# out_df[, 5:ncol(out_df)][is.na(out_df[, 5:ncol(out_df)])] <- 0
out_df
})
boot_CIs_df <- as.data.frame(do.call("rbind", boot_CIs_list)) # combine list elements in a single data frame
return(boot_CIs_df)
}
FuncBootDrawsRS <- function(boot_dat, actual_dat, start_col, groups_df, combos_df, key_col, n_bootreps = 5000) {
# Function to calculate bootstrap CI's for % cover and relative % cover, by site-survey (for RS sites). Some repetitiveness with FuncBootDraws
#
# Args:
# boot_list = data frame of the parametric bootstrap samples, each list element is a subsite with all its transects
# actual_dat = data frame of the original count data used to generate parametric bootstrap samples.
# start_col = starting column number for the count data
# groups_df = data frame assigning each items to higher level groups
# combos_df = data frame of the numerator and denominator group combinations to calculate CI's for
# key_col = name of column of smallest grouping level
# n_bootreps = number of bootstrap reps to calculate CI's over
#
# Returns:
# A data frame with bootstrapped CI's for RS % cover and relative % cover.
#
# NOTE: Not estimating CI's for trend -- just individually by site-survey.
n_transects <- 4 # <<<<<<<<<number of transects surveyed in each subsite-survey
subsites <- names(boot_dat)
shuffle_samples <- lapply(boot_dat, FUN = function(x) {
shuffle_dat <- x[sample(nrow(x), replace = F),] # shuffle the bootstrap samples
shuffle_dat$BootSetID = rep(1:(nrow(shuffle_dat)/n_transects), each = n_transects)
grouped_shuffle <- shuffle_dat %>% tidyr::nest(BootTransects = c(-Site, -SurvDate, -BootSetID)) %>%
dplyr::select(-BootSetID) # the list-col now has 4 random parametric bootstraps of transects for that site-survey
}) %>%
plyr::rbind.fill()
samps <- shuffle_samples %>%
dplyr::slice_sample(n = n_bootreps * length(subsites), replace = TRUE) %>% # randomly sample from the parametric bootstrap samples
as.data.frame() # can't add repeating vector column to tibble
samps$BootRep = rep(1:n_bootreps, each = length(subsites)) # add col of bootstrap replicate number; each replicate has same number of subsites as original data for the RSS-survey
samps %<>%
tidyr::unnest(cols = BootTransects) %>%
dplyr::select(Site, SurvDate, BootRep, TransectSurveyID, AdjTot, everything())
samps[, (start_col+1):ncol(samps)][is.na(samps[, (start_col+1):ncol(samps)])] <- 0 # replace NA's with zero
# Add the actual data so actual % cover and relative % cover can be simultaneously calculated for each combination of numerator and denominator groups/levels. BootRep 0 is the actual data and will be excluded for CI estimates
samps_df <- plyr::rbind.fill(samps, actual_dat %>% dplyr::mutate(BootRep = 0)) %>%
tibble::rowid_to_column(var = "RowID") # after converting to long format, still need to be able to group data by row at times
# This is the master data frame from which to calculate % cover at different grouping levels.
samps_master <- samps_df %>%
tidyr::gather(key = !!key_col, value = "Count", -Site, -SurvDate, -AdjTot, -BootRep, -RowID, -TransectSurveyID) %>% # convert to long format
dplyr::left_join(groups_df, by = key_col)
# Calculate % cover and RELATIVE % cover for certain combinations of numerator and denominator groups/levels
boot_CIs_list <- apply(combos_df, 1, FUN = function(x) {
tmp <- samps_master %>%
dplyr::mutate(NumerLevel = get(x[["NumerGroup"]])) %>%
dplyr::filter(if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) Category %in% unlist(x[["DenomGroup"]]) else TRUE) %>% # filter for specified Categories unless DenomGroup == "AdjTot"
dplyr::group_by(BootRep, Site, SurvDate, TransectSurveyID, RowID, Category, NumerLevel) %>%
dplyr::summarize(GroupCount = sum(Count), .groups = "drop")
# For % cover, calculate per RowID and then take mean across all RowID in the BootRep. The adjusted total count (denominator value) for any bootstrapped transect is the same as for the original data used to generate parametric bootstrap samples
tmp2 <- tmp %>%
dplyr::left_join(actual_dat[c("TransectSurveyID", "AdjTot")], by = "TransectSurveyID") %>%
dplyr::mutate(PercCov = 100*(GroupCount / AdjTot)) %>%
dplyr::group_by(BootRep, Category, NumerLevel) %>% # grouping by Category just to keep the column in the output
dplyr::summarize(EstimCov = mean(PercCov), .groups = "drop")
# Extract the means calculated for the original data (BootRep = 0) so can be added to the final data frame
actual_df <- subset(tmp2, BootRep == 0) %>%
dplyr::select(-BootRep)
# Calculate % cover CI's
out_df <- tmp2 %>%
dplyr::filter(BootRep != 0) %>% # remove the estimates from the actual data
dplyr::group_by(Category, NumerLevel) %>%
dplyr::summarize_at(vars(EstimCov), FuncPullQuant(), .groups = "drop") %>%
dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "TransectCount") %>%
dplyr::left_join(actual_df, by = c("Category","NumerLevel")) # add in the mean % cov from actual data
# # If denominator includes categories, add any missing zero-items
# if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) {
# perccov_template <- groups_df %>%
# dplyr::select(NumerLevel = x[["NumerGroup"]], Category) %>%
# dplyr::filter(Category %in% x[["DenomGroup"]]) %>%
# dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "TransectCount") %>%
# dplyr::distinct()
#
# out_df %<>%
# right_join(perccov_template, by = c("Category", "NumerLevel", "NumerGroup", "DenomGroup"))
# }
# For relative % cover, sum(numerator across all transects)/sum(denom across all transects)
if(any(unlist(x[["DenomGroup"]]) %in% unique(samps_master$Category))) {
categtot_df <- tmp %>%
dplyr::group_by(BootRep, Category) %>%
dplyr::summarize(CategTot = sum(GroupCount), .groups = "drop")
relcov_df <- tmp %>%
dplyr::group_by(BootRep, Category, NumerLevel) %>%
dplyr::summarize(SampGroupCount = sum(GroupCount), .groups = "drop") %>%
dplyr::left_join(categtot_df, by = c("BootRep", "Category")) %>%
dplyr::mutate(EstimCov = SampGroupCount/CategTot)
# Extract the means calculated for the original data (BootRep = 0) so can be added to the final data frame
actual_relcov_df <- subset(relcov_df, BootRep == 0) %>%
dplyr::select(Category, NumerLevel, EstimCov)
# Calculate % cover CI's
relcov_df %<>%
dplyr::filter(BootRep != 0) %>% # remove the estimates from the actual data
dplyr::group_by(Category, NumerLevel) %>%
dplyr::summarize_at(vars(EstimCov), FuncPullQuant(), .groups = "drop") %>%
dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "CategoryCount") %>%
dplyr::left_join(actual_relcov_df, by = c("Category", "NumerLevel")) # add in the mean % cov from actual data
# # Add any missing zero-items
# relcov_template <- groups_df %>%
# dplyr::select(NumerLevel = x[["NumerGroup"]], Category) %>%
# dplyr::filter(Category %in% x[["DenomGroup"]]) %>%
# dplyr::mutate(NumerGroup = x[["NumerGroup"]], DenomGroup = "CategoryCount") %>%
# dplyr::distinct()
#
# relcov_df %<>%
# right_join(relcov_template, by = c("Category", "NumerLevel", "NumerGroup", "DenomGroup"))
out_df <- rbind(out_df, relcov_df)
}
out_df %<>%
dplyr::mutate_if(is.numeric, round, 2) %>% # calculate quantiles and output in nice format
dplyr::mutate(N = length(subsites)) %>%
dplyr::select(Category, NumerGroup, NumerLevel, DenomGroup, N, EstimCov, everything())
out_df$EstimCov[is.na(out_df$EstimCov)] <- 0
# out_df[, 5:ncol(out_df)][is.na(out_df[, 5:ncol(out_df)])] <- 0
out_df
})
boot_CIs_df <- as.data.frame(do.call("rbind", boot_CIs_list)) # combine list elements in a single data frame
return(boot_CIs_df)
}
FuncCorals <- function(filenam, sitesfilenam = NULL, diseasefilenam, colcountsfilenam, out_prefix) {
# ### IMPORT AND FORMAT DATA ----
if(!is.null(filenam)) tryCatch(coral <- vroom::vroom(filenam), error = function(e) {out_file <- NULL; print("Cannot import the selected file")})
if(!is.null(sitesfilenam)) tryCatch(tbl_link <- vroom::vroom(sitesfilenam), error = function(e) {out_file <- NULL; print("Cannot import the selected file")})
if(!is.null(diseasefilenam)) tryCatch(disease <- vroom::vroom(diseasefilenam), error = function(e) {out_file <- NULL; print("Cannot import the selected file")})
if(!is.null(colcountsfilenam)) tryCatch(colcounts <- vroom::vroom(colcountsfilenam), error = function(e) {out_file <- NULL; print("Cannot import the selected file")})
# # <<<<<<<< TESTING >>>>>>>>>>>>>> ----
# coral <- vroom::vroom("VIIS_test_data/VIIS_CoralVideo Summary by Transect.csv")
# tbl_link <- vroom::vroom("VIIS_test_data/SFCN_CoralSites.csv")
# disease <- vroom::vroom("VIIS_test_data/SFCN_Coral_VIIS_Disease.csv")
# colcounts <- vroom::vroom("VIIS_test_data/VIIS_ColonyCount.csv")
# Format columns ----
coral$Date <- lubridate::mdy(coral$Date)
coral %<>%
dplyr::rename(
SurvDate = Date,
Subcategory = SubCategory,
Taxon = TaxonCode,
CountOfTaxon = CountOfTaxonCode,
VideoCertificationLevel = CertificationLevel) %>%
dplyr::mutate(TransectSurveyID = paste0(Site, "_", Transect, "_", SurvDate)) # create unique transect-specific EventID
# If the file doesn't already have a Reporting Site column, get the information from the link table
if(!"ReportingSite" %in% colnames(coral) & exists("tbl_link")) {
coral %<>% dplyr::left_join(subset(tbl_link, select=-c(IsActive)), by = c("ParkCode", "Site"))
}
# If the file doesn't already have an Activity Status column, get the information from the link table
if(!"IsActive" %in% colnames(coral) & exists("tbl_link")) {
coral %<>% dplyr::left_join(subset(tbl_link, select=c(ParkCode, Site, IsActive)), by = c("ParkCode", "Site"))
}
# Rename unknown group classifications to "UNK" or (for Functional Group) "UNGROUPED"--add to list as necessary
coral$Category[coral$Category %in% c("UNKNOWN") | is.na(coral$Category)] <- "UNK"
coral$Taxon[coral$Taxon %in% c("No Taxon") | is.na(coral$Taxon)] <- "UNK"
coral$FunctionalGroup[is.na(coral$FunctionalGroup)] <- paste(coral$Category[is.na(coral$FunctionalGroup)], "UNGROUPED", sep = "_")
# Format disease data
disease %<>%
dplyr::rename(SurvDate = Date, Taxon = CoralTaxonCode, DiseaseCertificationLevel = CertificationLevel) %>%
dplyr::mutate(SurvDate = lubridate::mdy(SurvDate),
NumOfBlemishes = as.integer(NumOfBlemishes),
DiscoloredTissue = as.integer(DiscoloredTissue),
TransectSurveyID = paste0(Site, "_", Transect, "_", SurvDate)) %>%
dplyr::filter(is.na(TransectDiseaseFree) | TransectDiseaseFree == FALSE)
disease[, c("DiseaseCode", "Taxon")][is.na(disease[, c("DiseaseCode", "Taxon")])] <- "UNK"
disease$DiseaseCode[grepl("DS", disease$DiseaseCode)] <- "DS"
# If the file doesn't already have a Reporting Site column, get the information from the link table
if(!"ReportingSite" %in% colnames(disease) & exists("tbl_link")) {
disease %<>% dplyr::left_join(tbl_link, by = c("ParkCode", "Site"))
}
# Format colony counts data
colcounts %<>%
dplyr::rename(SurvDate = Date, ColCountsCertificationLevel = CertificationLevel) %>%
dplyr::mutate(SurvDate = lubridate::mdy(SurvDate),
TransectSurveyID = paste0(Site, "_", Transect, "_", SurvDate)) %>%
dplyr::mutate_at(c("StonyCoralCount1", "StonyCoralCount2", "FireCoralCount1", "FireCoralCount2"), as.numeric)
# If the file doesn't already have a Reporting Site column, get the information from the link table
if(!"ReportingSite" %in% colnames(colcounts) & exists("tbl_link")) {
colcounts %<>% dplyr::left_join(tbl_link, by = c("ParkCode", "Site"))
}
# Populate list of warnings ----
warn_list <- FuncWarnings(dat = coral, disease_dat = disease, colcounts_dat = colcounts)
# Final formatting steps ----
# Account for different classification of bleaching prior to 10/1/05
coral$BleachingCode[is.na(coral$BleachingCode) & coral$Category =="CORAL" & coral$SurvDate < "2005-10-01"] <- "NoData"
coral$BleachingCode[is.na(coral$BleachingCode) & coral$Category =="CORAL" & coral$SurvDate >= "2005-10-01"] <- "UNBL"
coral$BleachingCode[coral$BleachingCode == "BL" & coral$Category =="CORAL"] <- "BL1"
coral$BleachingCode[coral$Category != "CORAL"] <- NA
coral%<>%
dplyr::filter(Purpose %in% c("Annual", "Episodic")) %>% # only include annual and episodic data
dplyr::select(ParkCode, ReportingSite, ReportingSiteName, Site, Transect, Latitude, Longitude, Year, SurvDate, TransectSurveyID, TripName, Purpose, IsActive, Category, Subcategory, FunctionalGroup, Taxon, BleachingCode, CountOfTaxon, VideoCertificationLevel) %>%
dplyr::arrange(Site, SurvDate, Transect)
# Disease data --remove records with unknown disease code or colonies <4cm
disease %<>%
dplyr::filter(is.na(ColonyLT4CM) | ColonyLT4CM == FALSE, # get rid of records where colony < 4cm. Most of these are NA (so keeping those)
DiseaseCode != "UNK") %>%
dplyr::select(ParkCode, ReportingSite, ReportingSiteName, Site, IsActive, Transect, SurvDate, TransectSurveyID, Purpose, ProtocolVersion, ColonyNumber, LengthAlongTransect, DistanceFromTransect, DiseaseCode, DiseaseDescription, Taxon, CoralScientificName, LesionLength, LesionWidth, DiscoloredTissue, NumOfBlemishes, DiseaseCertificationLevel)
# Colony counts data
colcounts %<>%
dplyr::select(ParkCode, ReportingSite, ReportingSiteName, Site, IsActive, Transect, SurvDate, TransectSurveyID, Purpose, ProtocolVersion, StonyCoralCount1, StonyCoralCount2, FireCoralCount1, FireCoralCount2, ColCountsCertificationLevel)
# STOP-Error check--make sure all records have associated Reporting Site and Activity Status
missing_RS <- coral[is.na(coral$ReportingSite), ] %>%
distinct()
if(nrow(missing_RS) > 0) stop("These data records do not have reporting site information:\n", paste(capture.output(print(missing_RS)), collapse = "\n")) else cat("OK >>> All records have reporting site info")
missing_IsActive <- coral[is.na(coral$IsActive), ] %>%
distinct()
if(nrow(missing_IsActive) > 0) stop("These data records do not have activity status information:\n", paste(capture.output(print(missing_IsActive)), collapse = "\n")) else cat("OK >>> All records have activity status info")
# Uncomment (i.e., remove the '#') the line below to output a CSV of the cleaned data
# write.csv(coral, paste0(out_prefix, "_cleandat.csv"), row.names = FALSE)
### DATA FOR MAP ----
mapdat <- coral %>%
dplyr::select(ReportingSiteName, ReportingSite, Site, Transect, Latitude, Longitude, Year, IsActive) %>% # pull only the data relevant for mapping by Site
dplyr::group_by(Site) %>%
dplyr::summarize( # site-level summary stats for mapping
ReportingSiteName = unique(ReportingSiteName),
ReportingSite = unique(ReportingSite),
IsActive = unique(IsActive),
NumTransects = length(unique(Transect)),
MinYr = min(Year, na.rm = TRUE),
MaxYr = max(Year, na.rm = TRUE),
MedLat = median(Latitude, na.rm = TRUE),
MedLong = median(Longitude, na.rm = TRUE), .groups = "drop") %>%
dplyr::mutate(
PopText = paste0(Site, " (", MinYr, " - ", MaxYr, ")"))
### BOOTSTRAPPING ESTIMATES ----
# Format data for bootstrapping
boot_master_dat <- dplyr::filter(coral, !Category %in% c("EQUIP", "SHADOW")) %>% # remove EQUIP & SHADOW data, for all analyses
dplyr::select(ReportingSite, Site, Transect, SurvDate, TransectSurveyID, Category, FunctionalGroup, Taxon, CountOfTaxon, BleachingCode)
# Make sure each taxon belongs to only one Functional Group and only one Category
grps_df <- unique(boot_master_dat[c("Category", "FunctionalGroup", "Taxon")])
if(length(unique(grps_df$Taxon)) != nrow(grps_df)) stop("EACH TAXON CAN ONLY BE ASSIGNED TO ONE FUNCTIONAL GROUP AND TO ONE CATEGORY")
# Create data frame with one row per TransectSurveyID and with Taxa as columns. Values are the % cover for each taxon for that TransectSurveyID
boot_site_dat <- boot_master_dat %>%
dplyr::group_by(Site, SurvDate, TransectSurveyID, Taxon) %>%
dplyr::summarize(Count = sum(CountOfTaxon), .groups = "drop") %>% # for each transect-survey, count hits per taxon
tidyr::spread(key = Taxon, value = Count) %>% # each taxon becomes a column
dplyr::mutate(AdjTot = rowSums(across(4:ncol(.)), na.rm = T)) %>% # calculate total hits per transect-survey (already excludes shadow and equipment points)
dplyr::select(Site, SurvDate, TransectSurveyID, AdjTot, everything()) # rearrange cols
# Calculate bootstrapped cover CI's for these combinations.
combos_df <- tibble(
NumerGroup = c("Category", "FunctionalGroup", "Taxon"),
DenomGroup = list(c("AdjTot"), c("ALGAE", "CORAL"), c("ALGAE", "CORAL", "GORGO", "SPONGE")))
# Calculate bootstrapped bleaching CI's for these combinations.
bleach_grps_df <- data.frame (Category = c("CORAL", "CORAL", "CORAL", "CORAL", "CORAL", "CORAL", "NONCORAL"), BleachingCode = c("UNBL", "BL1", "BL2", "BL3", "BL4", "NoData", "NONCORAL"))
bleach_combos_df <- tibble(
NumerGroup = c("BleachingCode"),
DenomGroup = c("CORAL"))
## SITE-LEVEL ESTIMATES ----
# Site-level cover estimates ----
incProgress(1/5, detail = paste0("...site-level % cover estimates"))
# First, work with taxon count data
# For each transect-survey, generate parametric bootstrap samples (default is N = 1M) using the probabilities from the collected data. Run one site-survey at a time to avoid computer memory problems
site_CIs_list <- apply(unique(boot_site_dat[c( "Site", "SurvDate")]), 1, FUN = function(x) {
incProgress(1/5, detail = paste0("...site-level % cover estimates for ", x[["Site"]], x[["SurvDate"]]))
cat("site-level % cover estimates")
cat(x[["Site"]], x[["SurvDate"]])
subdat <- boot_site_dat %>%
dplyr::filter(Site == x[["Site"]] & SurvDate == x[["SurvDate"]]) %>% # pull data for the specific site and survey date
dplyr::select(where(~!all(is.na(.x)))) # get rid of cols with no data
start_col = 5
# Generate parametric bootstrap samples
boot_samples <- FuncBootSamples(dat = subdat, start_col = start_col)
boot_samples %<>% dplyr::mutate(across(where(is.numeric), as.numeric)) # this addresses an unusual problem of columns being nested matrices
# Grab the actual data to send to boot draws function
actual_dat <- subdat
actual_dat[, start_col:ncol(actual_dat)][is.na(actual_dat[, start_col:ncol(actual_dat)])] <- 0 # and then replace the remaining NA's with 0
# For various combinations of numerator groups and denominator groups, calculate quantiles from bootstrap distribution
boot_site_estimates <- FuncBootDraws(boot_dat = boot_samples, start_col = start_col, actual_dat = actual_dat, groups_df = grps_df, combos_df = combos_df, key_col = "Taxon")
cbind("SiteScale" = "Site", "ReportingSite" = mapdat$ReportingSite[mapdat$Site == x[["Site"]]], "Site" = x[["Site"]], "RSS" = x[["Site"]], "SurvDate" = x[["SurvDate"]], boot_site_estimates)
})
site_CIs_df <- as.data.frame(do.call("rbind", site_CIs_list))
site_CIs_df$SurvDate <- lubridate::ymd(site_CIs_df$SurvDate)
# Site-level coral bleaching estimates ----
incProgress(2/5, detail = paste0("...site-level coral bleaching estimates"))
# Seems like most of the bleaching is in the Orbicella functional group. Within that, OFAV is the one least affected, there are 3 taxa highly affected. Raw data plots will show this.
# Now work with coral bleach data
boot_bleach_dat <- boot_master_dat %>%
dplyr::group_by(Site, SurvDate, TransectSurveyID, BleachingCode) %>%
dplyr::summarize(Count = sum(CountOfTaxon), .groups = "drop") %>%
tidyr::replace_na(list(BleachingCode = "NONCORAL")) %>%
tidyr::spread(key = BleachingCode, value = Count) %>%
dplyr::mutate(AdjTot = rowSums(across(4:ncol(.)), na.rm = T)) %>%
dplyr::select(Site, SurvDate, TransectSurveyID, AdjTot, everything())
# For each transect-survey, generate parametric bootstrap samples
site_bleach_CIs_list <- apply(unique(boot_bleach_dat[c("Site", "SurvDate")]), 1, FUN = function(x) {
incProgress(2/5, detail = paste0("...site-level coral bleaching estimates for ", x[["Site"]], x[["SurvDate"]]))
cat("site-level coral bleaching estimates")
cat(x[["Site"]], x[["SurvDate"]])
bleach_subdat <- boot_bleach_dat %>%
dplyr::filter(Site == x[["Site"]] & SurvDate == x[["SurvDate"]]) %>% # pull data for the specific site and survey date
dplyr::select(where(~!all(is.na(.x)))) # get rid of cols with no data
bleach_start_col = 5
# Generate parametric bootstrap samples
boot_bleach_samples <- FuncBootSamples(dat = bleach_subdat, start_col = bleach_start_col)
boot_bleach_samples %<>% mutate(across(where(is.numeric), as.numeric)) # this addresses an unusual problem of columns being nested matrices
# Grab the actual data to send to boot draws function
actual_bleach_dat <- bleach_subdat
actual_bleach_dat[, bleach_start_col:ncol(actual_bleach_dat)][is.na(actual_bleach_dat[, bleach_start_col:ncol(actual_bleach_dat)])] <- 0 # and then replace the remaining NA's with 0
# For various combinations of numerator groups and denominator groups, calculate quantiles from bootstrap distribution
boot_bleach_estimates <- FuncBootDraws(boot_dat = boot_bleach_samples, start_col = bleach_start_col, actual_dat = actual_bleach_dat, groups_df = bleach_grps_df, combos_df = bleach_combos_df, key_col = "BleachingCode")
cbind("SiteScale" = "Site", "ReportingSite" = mapdat$ReportingSite[mapdat$Site == x[["Site"]]], "Site" = x[["Site"]], "RSS" = x[["Site"]], "SurvDate" = x[["SurvDate"]], boot_bleach_estimates)
})
site_bleach_CIs_df <- as.data.frame(do.call("rbind", site_bleach_CIs_list))
site_bleach_CIs_df$SurvDate <- lubridate::ymd(site_bleach_CIs_df$SurvDate)
## REPORTING SITE-LEVEL ESTIMATES ----
# For each reporting site, these are the only survey dates to use for reporting site estimates (because all sites were surveyed in these years)
RS_estim_dates <- boot_master_dat %>%
dplyr::select(ReportingSite, Site, SurvDate) %>%
dplyr::filter(ReportingSite != Site) %>%
dplyr::distinct() %>%
dplyr::group_by(ReportingSite) %>%
dplyr::mutate(NumSub = length(unique(Site))) %>%
dplyr::group_by(ReportingSite, SurvDate) %>%
dplyr::mutate(NumSubSurveyed = length(unique(Site))) %>%
dplyr::filter(NumSub == NumSubSurveyed) %>%
dplyr::select(-NumSub, -NumSubSurveyed)
# Reporting site-level cover estimates ----
incProgress(3/5, detail = paste0("...`reporting site`-level % cover estimates"))
if(nrow(RS_estim_dates) > 0) {
# For each combination of reporting site and survey date
RS_CIs_list <- apply(unique(RS_estim_dates[c( "ReportingSite", "SurvDate")]), 1, FUN = function(x) {
incProgress(3/5, detail = paste0("...`reporting site`-level % cover estimates for ", x[["ReportingSite"]], x[["SurvDate"]]))
cat("reporting site-level % cover estimates")
cat(x[["ReportingSite"]], x[["SurvDate"]])
subsites <- RS_estim_dates %>% dplyr::filter(ReportingSite == x[["ReportingSite"]] & SurvDate == x[["SurvDate"]]) %>% dplyr::pull(Site) # these are the sites in that RS with that survey date
RS_subdat <- boot_site_dat %>%
dplyr::filter(Site %in% subsites & SurvDate == x[["SurvDate"]]) %>% # pull data for the specific site and survey date
dplyr::select(where(~!all(is.na(.x)))) # get rid of cols with no data
RS_start_col = 5
# Generate parametric bootstrap samples
boot_RS_samples <- FuncBootSamples(dat = RS_subdat, start_col = RS_start_col)
boot_RS_samples %<>% mutate(across(where(is.numeric), as.numeric)) # this addresses an unusual problem of columns being nested matrices
boot_RS_list <- split(boot_RS_samples, f = boot_RS_samples$Site) # split samples so each list element is a site
# Grab the actual data to send to boot draws function
actual_RS_dat <- RS_subdat
actual_RS_dat[, RS_start_col:ncol(actual_RS_dat)][is.na(actual_RS_dat[, RS_start_col:ncol(actual_RS_dat)])] <- 0 # and then replace the remaining NA's with 0
# For various combinations of numerator groups and denominator groups, calculate RS quantiles from bootstrap distribution
boot_RS_estimates <- FuncBootDrawsRS(boot_dat = boot_RS_list, start_col = RS_start_col, actual_dat = actual_RS_dat, groups_df = grps_df, combos_df = combos_df, key_col = "Taxon")
cbind("SiteScale" = "ReportingSite", "ReportingSite" = x[["ReportingSite"]], "Site" = NA, "RSS" = x[["ReportingSite"]], "SurvDate" = x[["SurvDate"]], boot_RS_estimates)
})
RS_CIs_df <- as.data.frame(do.call("rbind", RS_CIs_list))
RS_CIs_df$SurvDate <- lubridate::ymd(RS_CIs_df$SurvDate)
# Reporting site-level coral bleaching estimates ----
incProgress(4/5, detail = paste0("...`reporting site`-level coral bleaching estimates"))
# For each reporting site-survey, generate parametric bootstrap samples
RS_bleach_CIs_list <- apply(unique(RS_estim_dates[c( "ReportingSite", "SurvDate")]), 1, FUN = function(x) {
incProgress(4/5, detail = paste0("...`reporting site`-level coral bleaching estimates for ", x[["ReportingSite"]], x[["SurvDate"]]))
cat("reporting site-level coral bleaching estimates ")
cat(x[["ReportingSite"]], x[["SurvDate"]])
subsites <- RS_estim_dates %>% dplyr::filter(ReportingSite == x[["ReportingSite"]] & SurvDate == x[["SurvDate"]]) %>% dplyr::pull(Site) # these are the sites in that RS with that survey date
RS_bleach_subdat <- boot_bleach_dat %>%
dplyr::filter(Site %in% subsites & SurvDate == x[["SurvDate"]]) %>% # pull data for the specific site and survey date
dplyr::select(where(~!all(is.na(.x)))) # get rid of cols with no data
RS_bleach_start_col = 5
# Generate parametric bootstrap samples
boot_RS_bleach_samples <- FuncBootSamples(dat = RS_bleach_subdat, start_col = RS_bleach_start_col)
boot_RS_bleach_samples %<>% mutate(across(where(is.numeric), as.numeric)) # this addresses an unusual problem of columns being nested matrices
boot_RS_bleach_list <- split(boot_RS_bleach_samples, f = boot_RS_bleach_samples$Site) # split samples so each list element is a site
# Grab the actual data to send to boot draws function
actual_RS_bleach_dat <- RS_bleach_subdat
actual_RS_bleach_dat[, RS_bleach_start_col:ncol(actual_RS_bleach_dat)][is.na(actual_RS_bleach_dat[, RS_bleach_start_col:ncol(actual_RS_bleach_dat)])] <- 0 # and then replace the remaining NA's with 0
# For various combinations of numerator groups and denominator groups, calculate quantiles from bootstrap distribution
boot_RS_bleach_estimates <- FuncBootDrawsRS(boot_dat = boot_RS_bleach_list, start_col = RS_bleach_start_col, actual_dat = actual_RS_bleach_dat, groups_df = bleach_grps_df, combos_df = bleach_combos_df, key_col = "BleachingCode")
cbind("SiteScale" = "ReportingSite", "ReportingSite" = x[["ReportingSite"]], "Site" = NA, "RSS" = x[["ReportingSite"]], "SurvDate" = x[["SurvDate"]], boot_RS_bleach_estimates)
})
RS_bleach_CIs_df <- as.data.frame(do.call("rbind", RS_bleach_CIs_list))
RS_bleach_CIs_df$SurvDate <- lubridate::ymd(RS_bleach_CIs_df$SurvDate)
} else {
RS_CIs_df <- RS_bleach_CIs_df <- NULL
}
### ADD CORAL NUMBERS TO MAPDAT ----
# For each site and survey, the mean % coral
CoralPercCov <- site_CIs_df %>%
dplyr::filter(NumerGroup == "Category" & NumerLevel == "CORAL" & DenomGroup == "TransectCount") %>%
dplyr::select(Site, SurvDate, EstimCov) %>%
dplyr::mutate('%Coral' = EstimCov,
'%NonCoral' = 100 - EstimCov) %>%
dplyr::select(-EstimCov)
BleachRelCov <- site_bleach_CIs_df %>%
dplyr::filter(NumerGroup == "BleachingCode" & DenomGroup == "CategoryCount") %>%
dplyr::select(Site, SurvDate, NumerLevel, EstimCov) %>%
# dplyr::mutate(EstimCov = round(EstimCov * 100)) %>%
tidyr::spread(key = NumerLevel, value = EstimCov, fill = 0) %>%
full_join(CoralPercCov, by = c("Site", "SurvDate"))
mapdat2 <- mapdat %>%
full_join(BleachRelCov, by = "Site") %>%
left_join(coral[c("Site", "SurvDate", "Purpose")] %>% distinct(), by = c("Site", "SurvDate")) %>% # add Purpose info
mutate(PopText = paste0(PopText, ": ", SurvDate, "(", Purpose, ")"))
col_order <- c("ReportingSite", "ReportingSiteName", "Site", "IsActive", "NumTransects", "MinYr", "MaxYr", "MedLat", "MedLong", "SurvDate", "Purpose", "PopText", "%Coral", "%NonCoral", "UNBL", "BL1", "BL2", "BL3", "BL4", "NoData")
missing_cols <- setdiff(col_order, names(mapdat2)) # Find names of missing columns
mapdat2[missing_cols] <- NA
mapdat2 %<>%
dplyr::select("ReportingSite", "ReportingSiteName", "Site", "IsActive", "NumTransects", "MinYr", "MaxYr", "MedLat", "MedLong", "SurvDate", "Purpose", "PopText", "%Coral", "%NonCoral", "UNBL", "BL1", "BL2", "BL3", "BL4", "NoData") # order columns
cover_CIs_df <- site_CIs_df %>%
left_join(coral[c("Site", "SurvDate", "IsActive", "Purpose")] %>% distinct(), by = c("Site", "SurvDate")) %>%
dplyr::select(SiteScale, ReportingSite, Site, RSS, IsActive, SurvDate, Purpose, N, everything())
if (!is.null(RS_CIs_df)) {
cover_CIs_df <- rbind(
cover_CIs_df,
RS_CIs_df %>%
left_join(coral[c("ReportingSite", "SurvDate", "IsActive", "Purpose")] %>% distinct(), by = c("ReportingSite", "SurvDate")) %>%
dplyr::select(SiteScale, ReportingSite, Site, RSS, IsActive, SurvDate, Purpose, N, everything())
)
}
bleach_CIs_df <- site_bleach_CIs_df %>%
left_join(coral[c("Site", "SurvDate", "IsActive", "Purpose")] %>% distinct(), by = c("Site", "SurvDate")) %>%
dplyr::select(SiteScale, ReportingSite, Site, RSS, IsActive, SurvDate, Purpose, N, everything())
if(!is.null(RS_bleach_CIs_df)) {
bleach_CIs_df <- rbind(
bleach_CIs_df,
RS_bleach_CIs_df %>%
left_join(coral[c("ReportingSite", "SurvDate", "IsActive", "Purpose")] %>% distinct(), by = c("ReportingSite", "SurvDate")) %>%
dplyr::select(SiteScale, ReportingSite, Site, RSS, IsActive, SurvDate, Purpose, N, everything())
)
}
# Calculate % cover by category for each transect
transect_counts_df <- boot_master_dat %>%
tidyr::expand(TransectSurveyID, Category) %>%
left_join(boot_master_dat[c("TransectSurveyID", "Category", "CountOfTaxon")], by = c("TransectSurveyID", "Category")) %>%
dplyr::mutate(CountOfTaxon = replace_na(CountOfTaxon, 0)) %>%
group_by(TransectSurveyID) %>%
dplyr::mutate(AdjTot = sum(CountOfTaxon)) %>%
group_by(TransectSurveyID, Category) %>%
dplyr::mutate(TransectCount = sum(CountOfTaxon),
PercCov = round(100*(TransectCount/AdjTot), 1)) %>%
dplyr::select(-CountOfTaxon) %>%
dplyr::distinct()
out_list <- list(raw_dat = coral, # the raw data include equipment and shadow counts; only annual and episodic surveys included
map_dat = mapdat2,
groups_df = grps_df,
warn_list = warn_list,
transect_counts_df = transect_counts_df,
cover_CIs_df = cover_CIs_df,
bleach_CIs_df = bleach_CIs_df,
disease_df = disease,
colcounts_df = colcounts)
saveRDS(out_list, paste0(out_prefix, "_coralsummary.RDS"))
}