-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMW_Modelfitting(byyearspeciesplot).Rmd
737 lines (582 loc) · 28.5 KB
/
MW_Modelfitting(byyearspeciesplot).Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
---
title: "Maximum likelihood models: per year/plot/species"
author: "Janneke Hille Ris Lambers, Aji John, Meera Sethi, Elli Theobald"
date: "12/15/2020"
output: html_document
---
#Setup for R script and R markdown
1. Load all libraries
2. Specify string behavior
3. Load packages (should we add all packages here?)
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
library(tidyverse)
#library(leaflet)
#library(lubridate)
library(boot)
#library(readr)
```
#Read in clean phenology data
This chunk of code:
1. Reads in the cleaned up data (from data wrangling script), and defines the years, species, etc considered.
2. Reads in SDD data, merges it with PhenoSite data
```{r}
# Read in clean data
PhenoSite_0 <- read.csv("cleandata/PhenoSite_Clean.csv", header=TRUE)
SDDdat_0 <- read.csv("cleandata/MW_SDDall.csv", header=TRUE)
# Create new SDD - observed when present, predicted when not
SDDfin <- SDDdat_0$SDD
# This substitutes in predicted SDD where sensors failed = NA
SDDfin[is.na(SDDdat_0$SDD)==TRUE] <-
round(SDDdat_0$predSDD[is.na(SDDdat_0$SDD)==TRUE],0)
# this removes unwanted columns, including SDD and pred SDD
SDDdat <- subset(SDDdat_0, select=-c(Site_Num, calibration,
snow_appearance_date,
snow_disappearance_date,
snow_cover_duration,
minimum_soil_temp, SDD,
notes, predSDD))
# This adds SDDfin back to data frame as SDD
SDDdat$SDD <- SDDfin
# Merge SDD and Pheno data
PhenoSite <- merge(PhenoSite_0,SDDdat,
by=c("Year", "Site_Code", "Transect"))
# Define years of data, data filter, etc
yrs <- unique(PhenoSite$Year)
nyrs <- length(yrs)
species <- unique(PhenoSite$Species)
nspp <- length(species)
out_thresh <- max(PhenoSite$distyes[PhenoSite$Flower==1])
# Print this out as reminder
print("Number of flowering observations, columns")
print(dim(PhenoSite))
print("Number of years, species, outlier threshold")
print(c(nyrs, nspp, out_thresh))
```
#Model fitting: per plot / year / species
This code does the following:
1. Loads phenology functions (see HRL_phenology_functions.R).
2. For each plot / year / species data, fits null model and phenology curve to yes / no observations of flowering phenology. Model fit parameters (peak, range and maximum parameters) are written to object pars_yrpltsp, and saved.
3. For each plot / year / species combo, calculates AIC's. This is written to aics_yrpltsp, and saved.
```{r}
#read in appropriate functions
source("./HRL_phenology_functions.R")
#define object in which to save parameters, AIC values
pars_yrpltsp <- c()
aics_yrpltsp <- c()
#For loops to fit curves per year/plot/species, save parameters, plot
for(i in 1:nyrs){ # First for loop: runs through years
#extract data for that year
PhenoSite_Year <- PhenoSite[PhenoSite$Year==yrs[i],]
#pull out unique plots for year in question
plots <- unique(PhenoSite_Year$Site_Code)
#Nested for loop for each plot
for(j in 1:length(plots)){ # Second for loop: each plot
PhenoSite_YearPlot <- PhenoSite_Year[PhenoSite_Year$Site_Code==plots[j],]
#Identify species in plot
spinplt <- unique(PhenoSite_YearPlot$Species)
if(length(spinplt)==0){next} #break if focal spp not in plot
#Nested for loop for each species
for(k in 1:length(spinplt)){ #Third for loop: each species in the pot
#Extract data for the species in question
PhenoSite_YearPlotSpecies <- PhenoSite_YearPlot[
PhenoSite_YearPlot$Species==spinplt[k],]
#Extract other parameters
trsct <- PhenoSite_YearPlotSpecies$Transect[1]
stcd <- PhenoSite_YearPlotSpecies$Site_Loc[1]
#define parameters for curvefitting
days <- PhenoSite_YearPlotSpecies$DOY #explanatory variable: DOY
phenophase <- PhenoSite_YearPlotSpecies$Flower #yes / no flowering
#add three weeks of zeroes before earliest SDD in those plots
SDDplt <- min(PhenoSite_YearPlotSpecies$SDD)
days <- c(SDDplt-21, SDDplt-14, SDDplt-7,days)
phenophase <- c(0,0,0,phenophase)
#now fit null model
model0 <- optimize(nullfit, c(0.000001,0.999999)) #fit null model
#now fit alternative model - curve
param <- c(mean(days[phenophase[]==1]), -0.001, 0) # initial parameters
model1 <- optim(param, curvefit_perplot, control = list(maxit = 50000))
if(model1$convergence==1){
print(paste(spinplt[k],"no convergence", sep="-"))}
#write the model tests / AIC to data frame
AICnull <- round(2*(model0$objective+1),1)
AICalt <- round(2*(model1$value + 3),1)
pcurve <- signif(pchisq(model1$value-model0$objective,2),3)
#save AIC values
tmp_aic_vals <- c(yrs[i], as.character(plots[j]),
as.character(spinplt[k]),AICnull,AICalt)
#save parameters to spyrpltpars
tmp_pars <- c(yrs[i], trsct, stcd, as.character(plots[j]), SDDplt,
as.character(spinplt[k]), model1$par[1:3])
pars_yrpltsp <- rbind(pars_yrpltsp, tmp_pars)
aics_yrpltsp <- rbind(aics_yrpltsp, tmp_aic_vals)
}
}
}
# turn spyrpltpars into a data frame
dimnames(pars_yrpltsp) <- list(c(), c("year","transect","site_code","plot",
"SDD","species","peak","duration","max"))
pars_yrpltsp <- data.frame(pars_yrpltsp)
#change storage type to numeric - all except plot (since a few plots have a, b)
pars_yrpltsp$year <- as.numeric(pars_yrpltsp$year)
pars_yrpltsp$transect <- as.factor(pars_yrpltsp$transect)
pars_yrpltsp$site_code <- as.factor(pars_yrpltsp$site_code)
pars_yrpltsp$species <- as.factor(pars_yrpltsp$species)
pars_yrpltsp$SDD <- as.numeric(pars_yrpltsp$SDD)
pars_yrpltsp$peak <- as.numeric(pars_yrpltsp$peak)
pars_yrpltsp$duration <- as.numeric(pars_yrpltsp$duration)
pars_yrpltsp$max <- as.numeric(pars_yrpltsp$max)
# turn aic vector to dataframe
dimnames(aics_yrpltsp) <- list(c(), c("year", "plot", "species",
"AICnull", "AICalt"))
aics_yrpltsp <- data.frame(aics_yrpltsp)
aics_yrpltsp$year <- as.numeric(aics_yrpltsp$year)
aics_yrpltsp$species <- as.factor(aics_yrpltsp$species)
aics_yrpltsp$AICnull <- as.numeric(aics_yrpltsp$AICnull)
aics_yrpltsp$AICalt <- as.numeric(aics_yrpltsp$AICalt)
#examine pars, aics
head(pars_yrpltsp)
head(aics_yrpltsp)
#Write output to data folder
write.csv(pars_yrpltsp, "output/Parameters_plotmodel.csv", quote=FALSE,
row.names=FALSE)
#Write AIC output to data folder
write.csv(aics_yrpltsp, "output/AIC_plotmodel.csv", quote=FALSE,
row.names=FALSE)
```
#Graph to visualize peak flowering by year & trail
This code takes estimates of peak flowering from year-plot-species specific model fits (in pars_yrpltsp) and plots those estimates by year (year on the x-axis, DOY on the y axis) and by trail.
```{r}
##Plot peak flowering estimates of all species, trails, plots on one graph
par(mfrow=c(1,1),omi=c(0,0,0,0), mai=c(0.5,0.4,0.4,0.2),
tck=-0.01, mgp=c(1.25,0.25,0), xpd=TRUE)
#set plotting colors - 17 total to accommodate all species if needed
plotcol <- c("yellowgreen","magenta","orange","purple","yellow","springgreen",
"pink","purple","navyblue","azure4","yellow4","orchid",
"turquoise","salmon","maroon","black","grey")
#create plot to add points to
#earlypk <- min(spyrpltpars$peak); latepk <- max(spyrpltpars$peak)
earlypk <- 135; latepk <- 255
plot(2016,175, xlim=c(2012, 2020), ylim=c(earlypk,latepk),type="n",
xaxp=c(2012,2020,8), yaxt="n", xlab="Year",ylab="Flowering")
text(2011.5,srt=90, c(135, 165, 195, 225, 255),-0.1,
labels=c("May", "Jun", "Jul", "Aug", "Sept"))
for(trail in 1:2){
if(trail==1){
pars_yrpltsp2 <- pars_yrpltsp[substr(pars_yrpltsp$plot,1,2)=="RL",]}
if(trail==2){
pars_yrpltsp2 <- pars_yrpltsp[substr(pars_yrpltsp$plot,1,2)=="GB",]}
#how many years? differs per trail
yrs2 <- unique(pars_yrpltsp2$year)
#extract data per year, plot
for(i in 1:length(yrs2)){
paryear <- pars_yrpltsp2[pars_yrpltsp2$year==yrs2[i],]
#now pull out all data for a species
for(j in 1:length(species)){
paryearsp <- paryear[paryear$species==species[j],]
if(dim(paryearsp)[1]==0){next}
pks <- paryearsp$peak
tiny <- 0; if(trail==2){tiny <- 0.333}
if(trail==1){pltshp <-21}; if(trail==2){pltshp <- 24}
points((rep(yrs2[i],length(pks))+jitter(rep(tiny,length(pks)),2.5)),
pks,pch=pltshp, bg=plotcol[j], cex=1.25)
}
}
}
legend(x="topleft", legend=c("RL","GB"), pch=c(21,24),
pt.bg="gray", cex=0.75, pt.cex=1.25)
legend(x="bottomleft",legend=species, pch=21,
pt.bg=plotcol[1:length(species)], cex=0.75, pt.cex=1.25)
```
#AIC graph
This graph plots the AIC for the null model (flowering probability does not vary over time) vs. the alternative model (flowering probability is a unimodal curve over time). Plotted also is the 1:1 line. Each point represents a year / plot / species fit.
```{r}
#Make a plot showing AIC null vs. AICalt
plot(aics_yrpltsp$AICnull, aics_yrpltsp$AICalt, pch=21, bg="grey",
xlab="AIC Null", ylab="AIC Curve")
title("Compare Null to Curve")
abline(0,1)
```
##Graph of plot / Species / Year-specific model fits
Graph of all year-plot specific flowering curves for each species, on the same plot, with different colored lines per year.
```{r}
#extract species
species <- unique(pars_yrpltsp$species)
species <- species[order(species)]
yrcols <- c("green","lightblue","orange","purple","pink","red","grey")
xx_DOY <- seq(120,270)
for(i in 1:length(species)){
sp_pars <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
yrcolind <- sp_pars$year - 2012
#make a dummy plot to add lines to
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.6,0.5,0.5),
tck=-0.02, mgp=c(1.1,0.5,0))
plot(1,1, type="n", xlim=c(min(xx_DOY),max(xx_DOY)),
ylim=c(0,1), xlab="", ylab="P(flowering", pin=c(6,4),
xaxp=c(min(xx_DOY),max(xx_DOY),(max(xx_DOY)-min(xx_DOY))/30))
title(species[i])
legend(x="topleft", legend=yrs, col=yrcols, lwd=2)
#Now go through all estimates, add lines
for(j in 1:dim(sp_pars)[1]){
pars <- c(sp_pars$peak[j], sp_pars$duration[j], sp_pars$max[j])
predf <- predflower(xx_DOY, pars)
lines(xx_DOY, predf, col=yrcols[yrcolind[j]], lwd=2)
}
}
```
##Graph of SDD vs peak flowering
Graph of SDD vs. peak flowering for each species, different colored lines per year
```{r}
#extract species
species <- unique(pars_yrpltsp$species)
species <- species[order(species)]
yrcols <- c("green","lightblue","orange","purple","pink","red","grey")
##Plot SDD vs. peak per species
for(i in 1:length(species)){
sp_pars <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
yrcolind <- sp_pars$year - 2012
#correlate SDD to peak snowmelt
SDDtest <- cor.test(sp_pars$SDD, sp_pars$peak)
#make a dummy plot to add lines to
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.6,0.5,0.5),
tck=-0.02, mgp=c(1.1,0.5,0))
tiny <- 0.05*(max(sp_pars$peak)-min(sp_pars$peak))
ylms <- c(min(sp_pars$peak)-tiny, max(sp_pars$peak)+tiny)
plot(sp_pars$SDD, sp_pars$peak, ylim=ylms, pty="s",
pch=21, bg=yrcols[yrcolind], cex=1.5,
xlab="Snow Disappearance Date", ylab="Peak Flowering")
tiny <- 0.05*(max(sp_pars$peak)-min(sp_pars$peak))
text(sp_pars$SDD, (sp_pars$peak+tiny), labels=sp_pars$plot, cex=0.5)
abline(0,1)
title(species[i])
legend(x="topleft", legend=yrs, pt.bg=yrcols, pch=21)
#add cor.test results
mtext(paste("r=",round(SDDtest$estimate,3),", p=", round(SDDtest$p.value, 3),
sep="", collapse=NULL), side=3, line=-1, cex=0.75)
# print average days after snowmelt the species is at peak flower
daysafter <- mean (sp_pars$SDD - sp_pars$peak)
print(paste(species[i], "days after SDD in peak flower",
round(daysafter,2), sep="-"))
}
```
#Plot-level parameters related to SDD, year
A quick lm analysis examining the relationship between the three sets of parameters and SDD and year. On the whole, peak clearly varies with SDD, and for several species that relationship also varies by year. The max and duration parameters generally don't vary by SDD, but in a few cases vary by year.
```{r}
# First test peak flowering - per species
species <- species[order(species)]
peak_pars <- c()
for(i in 1:length(species)){
sp_dat <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
peak_test_all <- lm(peak~SDD*as.factor(year), data=sp_dat)
print(species[i])
print(anova(peak_test_all))
peak_test <- lm(peak~SDD, data=sp_dat)
tmpout <- c(as.character(species[i]),coef(peak_test),
summary(peak_test)$adj.r.squared)
peak_pars <- rbind(peak_pars, tmpout)
}
dimnames(peak_pars) <- list(c(), c("species","intercept","slope","Rsquared"))
peak_pars <- data.frame(peak_pars)
for(i in 2:4){peak_pars[,i] <- as.numeric(peak_pars[,i])}
head(peak_pars)
# range and duration parameters - test wheter affected by SDD, year
for(i in 1:length(species)){
sp_dat <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
ntrans <- length(unique(sp_dat$transect))
range_test <- lm(duration~as.factor(year)*SDD, data=sp_dat)
max_test <- lm(max~as.factor(year)*SDD, data=sp_dat)
print(species[i])
print("range")
print(anova(range_test))
print("max")
print(anova(max_test))
print(""); print("")
}
```
#Model fit to QA/QC vs. non QA/QC data
This code checks to see whether model fits of volunteers vs. professional scientists vary.
```{r}
#define object in which to save parameters
pars_QAQC <- c()
#set # years, species
yrs <- unique(PhenoSite$Year)
nyrs <- length(yrs)
species <- unique(PhenoSite$Species)
#For loops to fit curves per year/plot/species, save parameters, plot
for(i in 1:nyrs){ # First for loop: runs through years
#extract data for that year
PhenoSite_Year <- PhenoSite[PhenoSite$Year==yrs[i],]
#pull out unique plots for year in question
plots <- unique(PhenoSite_Year$Site_Code)
#Nested for loop for each plot
for(j in 1:length(plots)){ # Second for loop: each plot
PhenoSite_YearPlot <- PhenoSite_Year[PhenoSite_Year$Site_Code==plots[j],]
#Identify species in plot
spinplt <- unique(PhenoSite_YearPlot$Species)
if(length(spinplt)==0){next} #break if focal spp not in plot
#Nested for loop for each species
for(k in 1:length(spinplt)){ #Third for loop: each species in the plot
#Extract data for the species in question
PhenoSite_yps <- PhenoSite_YearPlot[PhenoSite_YearPlot$Species
==spinplt[k],]
PhenoSite_QAQC <- PhenoSite_yps[PhenoSite_yps$QA.QC==1,]
PhenoSite_vol <- PhenoSite_yps[PhenoSite_yps$QA.QC!=1,]
if(dim(PhenoSite_QAQC)[1]<9){next}
#Extract other parameters
trsct <- PhenoSite_QAQC$Transect[1]
stcd <- PhenoSite_QAQC$Site_Loc[1]
#define parameters for curvefitting: QAQC
days <- PhenoSite_QAQC$DOY #explanatory variable: DOY
phenophase <- PhenoSite_QAQC$Flower #yes / no flowering
#add three weeks of zeroes before earliest SDD in those plots
SDDplt <- min(PhenoSite_QAQC$SDD)
days <- c(SDDplt-21, SDDplt-14, SDDplt-7,days)
phenophase <- c(0,0,0,phenophase)
if(sum(phenophase)<2){next}
# fit phenological model
param <- c(mean(days[phenophase[]==1]), -0.001, 0) # initial parameters
model1 <- optim(param, curvefit_perplot, control = list(maxit = 50000))
if(model1$convergence==1){
print(paste(spinplt[k],"no convergence", sep="-"))}
#define parameters for curvefitting: volunteers
days <- PhenoSite_vol$DOY #explanatory variable: DOY
phenophase <- PhenoSite_vol$Flower #yes / no flowering
#add three weeks of zeroes before earliest SDD in those plots
SDDplt <- min(PhenoSite_vol$SDD)
days <- c(SDDplt-21, SDDplt-14, SDDplt-7,days)
phenophase <- c(0,0,0,phenophase)
if(sum(phenophase)<2){next}
# fit phenological model
param <- c(mean(days[phenophase[]==1]), -0.001, 0) # initial parameters
model2 <- optim(param, curvefit_perplot, control = list(maxit = 50000))
if(model2$convergence==1){
print(paste(spinplt[k],"no convergence", sep="-"))}
#save parameters
tmp_pars <- c(yrs[i], trsct, stcd, as.character(plots[j]), SDDplt,
as.character(spinplt[k]), model1$par[1:3], model2$par[1:3])
pars_QAQC <- rbind(pars_QAQC, tmp_pars)
}
}
}
# turn pars_QAQC into a data frame
dimnames(pars_QAQC) <- list(c(), c("year","transect","site_code","plot",
"SDD","species","peak_Q","duration_Q",
"max_Q", "peak_v","duration_v", "max_v"))
pars_QAQC <- data.frame(pars_QAQC)
#change storage type to numeric - all except plot (since a few plots have a, b)
pars_QAQC$year <- as.numeric(pars_QAQC$year)
pars_QAQC$transect <- as.factor(pars_QAQC$transect)
pars_QAQC$site_code <- as.factor(pars_QAQC$site_code)
pars_QAQC$species <- as.factor(pars_QAQC$species)
pars_QAQC$SDD <- as.numeric(pars_QAQC$SDD)
pars_QAQC$peak_Q <- as.numeric(pars_QAQC$peak_Q)
pars_QAQC$duration_Q <- as.numeric(pars_QAQC$duration_Q)
pars_QAQC$max_Q <- as.numeric(pars_QAQC$max_Q)
pars_QAQC$peak_v <- as.numeric(pars_QAQC$peak_v)
pars_QAQC$duration_v <- as.numeric(pars_QAQC$duration_v)
pars_QAQC$max_v <- as.numeric(pars_QAQC$max_v)
#show pars_QAQC
head(pars_QAQC)
```
# Species-level comparison: parameters
These graphs show fitted peak, max and duration parameters as fit to data collected by lab members vs those collected by volunteers.
Note - peak parameters are very well correlated (with one exception, LUAR in 2018 GB plot 9* - see note below*), but max and duration parameters are not. Max parameters represent the overall probability across all observing flowers across all observations, and volunteers vs HRL lab members have a different frequency of visitation, so this is unsurprising. The duration parameter is correlated with the max parameter, which may also influence the comparison.
* LUAR in GB plot 9 in 2018 seems to have flowered twice (potentially b/c the plant was herbivorized, potentially because there were 2 plants in the plot on a very different schedule, potentially b/c it rained mid summer providing a second flush of flowers). Because the second batch of flowering occurred very near the last day volunteers collected data, the estimate of peak flowering for volunteers in that plot / year is unrealistically late.
```{r}
# make graphs comparing parameters
for(i in 1:length(species)){
#set up plot
par(mfrow=c(1,3), omi=c(0,0,0,0), mai=c(0.5,0.5,0.4,0.3),
mgp=c(1.2,0.5,0), tck=-0.02, pty="s")
# extract data for species in question
pars_QAQC_sp <- pars_QAQC[pars_QAQC$species==species[i],]
# plot peak, range, duration
plot(pars_QAQC_sp$peak_Q, pars_QAQC_sp$peak_v, pch=21, bg="grey",
cex=1.25, xlab="peak-prof", ylab="peak-vol")
abline(0,1)
plot(pars_QAQC_sp$duration_Q, pars_QAQC_sp$duration_v, pch=21, bg="grey",
cex=1.25, xlab="duration-prof", ylab="duration-vol",
main=species[i])
abline(0,1)
plot(pars_QAQC_sp$max_Q, pars_QAQC_sp$max_v, pch=21, bg="grey",
cex=1.25, xlab="max-prof", ylab="max-vol")
abline(0,1)
}
```
#QAQC graph - peak flowering comparison volunteer vs. professional. This is Supplemental Figure E2.
```{r}
# Uncomment this if you want to plot the tiff file
tiff(file="output/figures/FigE2.tif", width=5, height=5, units="in", res=600)
#set up plot
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.5),
mgp=c(1.35,0.45,0), tck=-0.02, pty="s")
#remove species not considered: MEPA, ANAR, LICA
pars_QAQC2 <- pars_QAQC[pars_QAQC$species!="ANAR"&pars_QAQC$species!="LICA"&
pars_QAQC$species!="MEPA"&pars_QAQC$species!="MIAL",]
species2 <- unique(pars_QAQC2$species); species2 <- species2[order(species2)]
spcols <- c("yellowgreen","gold3","pink","orange","magenta","yellow",
"lightyellow2","plum1","lightsteelblue4","purple","goldenrod4",
"greenyellow","navyblue","grey")
#make dummy plot
plot(180,180, type="n", xlim=c(160,275), ylim=c(160,275),
xlab="Estimate - Ecologist", ylab="Estimate -Volunteers")
abline(0,1)
legend(x="bottomright", legend=species2, pt.bg=spcols, pch=21,
pt.cex=1.25,cex=0.65)
#correlation
pars_QAQC3 <- pars_QAQC2[pars_QAQC2$peak_v<280,]
ctest <- cor.test(pars_QAQC3$peak_Q, pars_QAQC3$peak_v)
Qcor <- round(ctest$estimate,3)
Qp0 <- round(ctest$p.value,3)
ifelse(Qp0==0,Qp<-"p<0.001",Qp<-paste("p=",Qp0,sep==""))
mtext(paste("cor=",Qcor,sep=""),side=3, adj=0.025, line=-0.9, cex=0.8)
mtext(Qp, side=3, adj=0.025, line=-1.8, cex=0.8)
#add text
# make graphs comparing parameter
for(i in 1:length(species2)){
# extract data for species in question
pars_QAQC_sp <- pars_QAQC2[pars_QAQC2$species==species2[i],]
# plot peak, range, duration
points(pars_QAQC_sp$peak_Q, pars_QAQC_sp$peak_v,
pch=21, bg=spcols[i], cex=1.25)
}
dev.off()
```
###########
ODDS AND ENDS BELOW (mostly figures for other purposes)
################
```{r}
plot_pars_years <- read_csv("data/PerPlotCurves.csv")
# str_sub subsets first two characters pf the site ID, so here it would be either RL or GB.
# aes - aesthetics -we give x and y variables , and color
# facets add third dimension to the figure here.
# %>% is the pipe operator - which is equivalent to saying make all the field names available to the operation on right, it helps one to write simplified code where we don't need to use $ to access the fields in a dataframe
plot_pars_years %>% mutate(site= str_sub(plot_pars_years$plot,1,2)) %>%
ggplot() +
geom_point(aes(as.factor(year),peak,color=species)) +
facet_grid(.~plot) +
theme_minimal() + theme(axis.text.x = element_text(angle = 90,hjust = 5)) +
geom_hline(aes(yintercept = 152)) +
geom_text(mapping = aes(label = 'June 1st',y=152, x = 0.1), angle = 90,alpha=.4, hjust = 0) +
geom_hline(aes(yintercept = 183)) +
geom_text(mapping = aes(label = 'July 1st',y=183, x = 0.1), angle = 90, alpha=.4,hjust = 0) +
geom_hline(aes(yintercept = 213)) +
geom_text(mapping = aes(label = 'Aug 1st',y=213, x = 0.1), angle = 90, alpha=.4,hjust = 0) +
geom_hline(aes(yintercept = 244)) +
geom_text(mapping = aes(label = 'Sep 1st',y=244, x = 0.1), angle = 90,alpha=.4, hjust = 0) +
# facet_grid(.~year) +
labs(x=" Year", y= "Predicted Peak flowering (DOY)", color="Species")
# Its a comprehensive plot so might be better to view the generated image separately.
ggsave("figs/sanity_check.png",width = 70, height = 20, units = "cm")
```
#Alternative graph of model fits
Alternative plot to visualize outliers, Site level (RL or GB) is on alternative y-axis, X-axis DOY, and Y-axis the focal species. We see the outliers are closer to start of the y-axis scale or towards the end. Columns here are years. SDD gradient can be ignored ( no variation)
```{r}
# note here aes (aesthetics included 'fill' as well)
plot_pars_years %>%
mutate(site= str_sub(plot_pars_years$plot,1,2)) %>%
ggplot( aes(x = peak, y = species,fill=SDD)) +
geom_jitter() +
facet_grid(site~year) +
scale_fill_viridis_c(name = "Snow Melt Date") +
labs(title = 'Peaks across all the years',
subtitle = 'Modelled peak flowering across 10 focal species',
x = "Day of the year") +
theme_minimal(base_size = 14) + theme(axis.title.y = element_blank())
ggsave("figs/sanity_check_revised.png", width = 35, height = 20, units = "cm")
```
#Observation Effort
This code calculates the total number of observations for each plot / year / species combination.
```{r}
#This coce shows the total number of observations by plot / year / species
PhenoSite %>%
group_by(Transect,Year,Site_Code,Species)%>%
tally()
#This code does the same thing, but transforms the tibble into a dataframe
tallyObservations_typs <- PhenoSite %>%
group_by(Transect,Year,Site_Code,Species)%>%
tally() %>% as.data.frame()
```
#Graph showing species/ year / transect observer effort
Notice a perfect cluster of # of observationsin 2015 Reflections lake. Also, what we see is the low number of observations for some of the plot / species combinations
```{r}
# In dply lingo everything is a tibble , we are transforming it to a dataframe for compatibility
tallyObservations_typs %>%
ggplot() +
geom_jitter(aes(as.factor(Year), n,color=Species))+
facet_grid(.~Transect) +
labs(y="Number of observations",x="Year") +
theme_minimal(base_size = 14)
```
#Calculate outlier 'yes' observations
This code calculates all pairwise differences in dates between all yes observations in each trail / year / plot / species combo, and identify any observation where the minimum value is > 7
For example, imagine a specific query : Species =='ANOC' & Year=='2013' & Site_Code =='RL9'
1.*group_by* orders the rows by given set of fields (in order)
2. *lag* gets the previous row
Returns
# Groups: Transect, Year, Site_Code, Species [1]
Transect Year Site_Code Species Date Observer QA.QC
<chr> <int> <chr> <chr> <chr> <chr> <int>
1 Reflection Lakes 2013 RL9 ANOC 7/21/2013 Dave Purdon 0
2 Reflection Lakes 2013 RL9 ANOC 7/23/2013 Anna Wilson; Cherry Chen 1
3 Reflection Lakes 2013 RL9 ANOC 7/23/2013 Weedy McCauley 0
4 Reflection Lakes 2013 RL9 ANOC 7/24/2013 Brooke Upton 0
5 Reflection Lakes 2013 RL9 ANOC 7/24/2013 Rita Moore; Dan Paquette 0
6 Reflection Lakes 2013 RL9 ANOC 7/25/2013 Carol Miltimore 0
7 Reflection Lakes 2013 RL9 ANOC 7/26/2013 Carol Clingan 0
8 Reflection Lakes 2013 RL9 ANOC 8/4/2013 Johndavid Hascup; Kristalyn Hascup 0
9 Reflection Lakes 2013 RL9 ANOC 8/10/2013 Bonnie Scott 0
Row # 7 and 8 is what we use to find the difference which is greater than 7
#TODO I suggest making pdif a variable we can play around with
#Question - does this only look at the difference between each observation and a previous flowering observation, or also the difference between previous and next? In the latter case, what happens to the first observation of flowering (no previous observation?)
```{r}
PhenoSite %>%
filter (Flower == 1) %>%
mutate (doy = lubridate::yday(as.Date(Date, "%m/%d/%Y")) ) %>%
group_by(Transect,Year,Site_Code,Species) %>%
mutate(pdiff = doy -lag(doy, default = doy[1])) %>%
filter (pdiff > 7) %>%
ggplot( aes(x = Site_Code, y = Species,fill=pdiff)) +
geom_jitter() +
facet_grid(Transect~Year) +
scale_fill_viridis_c(name = "Difference in DOY") +
labs(title = 'Pairwise differences',
subtitle = 'All species',
x = "Site Code") +
theme_minimal(base_size = 14) + theme(axis.title.y = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
# Pairwise differences but filter it only by focal species
#I got rid of this snippet of code, as I changed code above to only look at focal species
# Plot pairwise for RL only, looks like 128 observations for RL (no jitter)
```{r}
PhenoSite %>%
filter (Transect== 'Reflection Lakes' & Flower == 1 & Species %in% species) %>%
mutate (doy = lubridate::yday(as.Date(Date, "%m/%d/%Y")) ) %>%
group_by(Transect,Year,Site_Code,Species) %>%
mutate(pdiff = doy - lag(doy, default = doy[1])) %>%
filter (pdiff > 7) %>% ggplot( aes(x = Site_Code, y = Species,fill=pdiff)) +
geom_point() +
facet_grid(Transect~Year) +
scale_fill_viridis_c(name = "Difference in DOY") +
labs(title = 'Pairwise differences',
subtitle = 'All species',
x = "Site Code") +
theme_minimal(base_size = 14) + theme(axis.title.y = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
#Plot pairwise for GB only
```{r}
PhenoSite %>%
filter (Transect== 'Glacier Basin' & Flower == 1 & Species %in% species) %>%
mutate (doy = lubridate::yday(as.Date(Date, "%m/%d/%Y")) ) %>%
group_by(Transect,Year,Site_Code,Species) %>% mutate(pdiff = doy - lag(doy, default = doy[1])) %>%
filter (pdiff > 7) %>%
ggplot( aes(x = Site_Code, y = Species,fill=pdiff)) +
geom_jitter() +
facet_grid(Transect~Year) +
scale_fill_viridis_c(name = "Difference in DOY") +
labs(title = 'Pairwise differences',
subtitle = 'All species (jittered)',
x = "Site Code") +
theme_minimal(base_size = 14) + theme(axis.title.y = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```