diff --git a/R/crossValidation.R b/R/crossValidation.R index 00c10de..aeced1b 100644 --- a/R/crossValidation.R +++ b/R/crossValidation.R @@ -1,25 +1,14 @@ # Run LOO cross-validation, removing each year sequentially and predicting that year -# library(ncdf4) -# library(RColorBrewer) -# library(sp) -# library(maptools) -# library(reshape2) -# library(ggplot2) -# library(sdmTMB) -# library(ggeffects) -# library(visreg) -# library(doBy) - - #************************************************************* # Create a large loop right here for cross-validation +# The only years that should be sent here are the fit years #************************************************************* LOO_CV <- function(response = response, oceanData = oceanData, loocvYears = 5, min.lon = min.lon, max.lon = max.lon, min.lat = min.lat, max.lat = max.lat, - years = years, months = months, + years = years.fit, months = months, includePDO = FALSE, includePC1 = FALSE) { # Verify that the response is what we expect @@ -75,10 +64,10 @@ LOO_CV <- function(response = response, oceanData.s4.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 4) # Get covariance between each cell's temperature and survival - covs1<-apply(oceanData.s1.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs")) - covs2<-apply(oceanData.s2.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs")) - covs3<-apply(oceanData.s3.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs")) - covs4<-apply(oceanData.s4.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs")) + covs1<-apply(oceanData.s1.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs")) + covs2<-apply(oceanData.s2.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs")) + covs3<-apply(oceanData.s3.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs")) + covs4<-apply(oceanData.s4.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs")) #******************************************************************** # Create the index (how similar is each year to the covariance map) @@ -94,7 +83,7 @@ LOO_CV <- function(response = response, lm(as.vector(oceanData.s4.scl[,,tt]) ~ -1 + as.vector(covs4))$coef)) coefs_cov<-data.frame(coefs_cov) coefs_cov$year<-years - index_cov<-cbind(coefs_cov,response$val) + index_cov<-cbind(coefs_cov,response$val[response$year %in% years]) colnames(index_cov)<-c("win.cov","spr.cov","sum.cov","aut.cov","year","val") #***************************************** diff --git a/R/get_index.R b/R/get_index.R index 8c41356..74b204a 100644 --- a/R/get_index.R +++ b/R/get_index.R @@ -4,7 +4,8 @@ #source("create_OceanData_Object.R") get_CMISST_index <- function(response, oceanData=oceanData_ERSST, - years=NA, months=1:12, years.pred=NA, + years=NA, years.fit=year.fit, + months=1:12, years.pred=NA, min.lon=158, max.lon=246, min.lat=10, max.lat=62, returnDataType='anom', @@ -14,10 +15,6 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST, if (ncol(response)!=2) { print("incorrect data - requires a 2-column data frame with year and the response"); return(NA) } colnames(response)<-c("year","val") - if (is.na(years)[1]) years=response$year - if (!is.na(years.pred[1])) { - years.fit<-years[!years %in% years.pred] # will be needed to calculate the covariance - } else years.fit <- years # 'years' will be considered 'all years' If we need fit or pred, we can access them year_mo<-data.frame(year=rep(years, each=length(months)), month=rep(months, length(years)), label=paste(rep(years, each=length(months)), rep(months, length(years)), sep = "_")) @@ -31,7 +28,7 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST, lats <- as.numeric(dimnames(oceanData)[[2]]) yr_mo <- dimnames(oceanData)[[3]] lon.index<-which(lons >= min.lon & lons <= max.lon) - lat.index<-which(lats >= min.lat & lats <= max.lat) + lat.index<-which(lats >= min.lat & lats <= max.lat) yr_mo.index<-which(yr_mo %in% year_mo$label) # Subset the ocean data with user-defined extent oceanData <- oceanData[lon.index, lat.index, yr_mo.index] diff --git a/R/makePlots.R b/R/makePlots.R new file mode 100644 index 0000000..72e1d57 --- /dev/null +++ b/R/makePlots.R @@ -0,0 +1,160 @@ +# Make a few plots from the results + +# To get back to normal space +reverse_scale <- function(x, center = NULL, scale = NULL) { + if (!is.null(attr(x, "scaled:scale"))) { + x <- x * attr(x, "scaled:scale") + } else { x <- x * scale } + if (!is.null(attr(x, "scaled:center"))) { + x <- x + attr(x, "scaled:center") + } else { x <- x + center } + x +} + + +makeCovarianceMap <- function(input.season = input.season, cmisst = cmisst) { + # Covariance Map + myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab") + season <- switch(input.season, + win = 2, + spr = 3, + sum = 4, + aut = 5) + myTitle <- switch(input.season, + win = "Winter", + spr = "Spring", + sum = "Summer", + aut = "Autumn") + covMap<-cmisst[[season]] + lmt<-max(abs(covMap), na.rm=TRUE) + limits<-c(-lmt, lmt) + extent <- cmisst[[6]] # min, max of lat, long + + gg <- ggplot() + ggtitle(myTitle) + + geom_raster(data = melt(covMap), aes(x = Var1, y = Var2, fill=value)) + + geom_sf(data=land, color="black", fill="grey", linewidth=0.25) + + xlim(extent[3], extent[4]) + ylim(extent[1], extent[2]) + + scale_fill_gradientn(colours = myPalette(100),limits=limits,name="Covariance", na.value = "white") + + theme_classic() + theme(panel.border = element_rect(colour = "grey", fill=NA)) + + labs(x = "Longitude", y = "Latitude") + gg +} + + +makeBiplot <- function(input.season = input.season, cmisst = cmisst) { + # Biplot with response + index <- cmisst[[1]] + season <- switch(input.season, + win = 1, + spr = 2, + sum = 3, + aut = 4) + index$ind <- index[,season] + myTitle <- switch(input.season, + win = "Winter", + spr = "Spring", + sum = "Summer", + aut = "Autumn") + plot(index$ind, index$val, pch=20, cex=2, xlab=paste(myTitle, "CMISST Index"), + ylab="Scaled (Z-score) Response", main=myTitle) + lm1 <- lm(index$val~index$ind) + abline(lm1) + text(bquote(~ R^2 == .(round(summary(lm1)$adj.r.squared, 2))), + x = par("usr")[1]*0.8, y=par("usr")[4]*0.80, cex=1.6, col="blue") + if (input.loocv) { + mae <- cmisst[[7]] + text(paste("MAE =", round(mae[season,"mae"], 2)), + x = par("usr")[1]*0.75, y=par("usr")[4]*0.60, cex=1.6, col="blue") + } +} + +makeTimeSeriesPlot <- function(input.season = input.season, cmisst = cmisst, + ylab="", yaxis_scaler=1) { + # Time series plot in normal space + response.tmp <- response + response.tmp$year <- response.tmp$year - as.numeric(input.lag) + response.tmp <- response.tmp[response.tmp$year %in% seq(input.years[1], input.years[2], 1), c('year', input.stock)] + colnames(response.tmp) <- c('year','val') + if(input.log) response.tmp$val <- log(response.tmp$val) + response.tmp$val.scl <- scale(response.tmp$val) + #reverse_scale(response.tmp$val.scl) + + index <- cmisst[[1]] + season <- switch(input.season, + win = 2, spr = 3, sum = 4, aut = 5) + index$ind <- index[,season] + index$counts <- reverse_scale(index$val, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) + if (input.log) index$counts <- exp(index$counts) + myTitle <- switch(input.season, + win = "Winter", spr = "Spring", sum = "Summer", aut = "Autumn") + lm1 <- lm(index$val~index$ind) + preds<-predict(lm1, newdata = index, interval = "confidence") + preds<-reverse_scale(preds, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) + if (input.log) preds<-exp(preds) + # Use prediction interval for predicted points + preds_new<-predict(lm1, newdata = index, interval = "prediction") + preds_new<-reverse_scale(preds_new, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) + if (input.log) preds_new<- exp(preds_new) + # replace just the ones that were not used during fitting + preds[index$year %in% input.years.pred,]<-preds_new[index$year %in% input.years.pred,] + + preds<-data.frame(preds) + # unlag the year to show the plot in return year + index$year_return <- index$year + input.lag + preds$year_return <- index$year_return + # Plot for SOEM talk in 2024 + ggplot() + + geom_line(data = index, aes(x=year_return, y=counts/yaxis_scaler)) + + geom_point(data = index, aes(x=year_return, y=counts/yaxis_scaler)) + + theme_classic() + + ylab(label = ylab) + xlab("Response Year") + + geom_line(data=preds, aes(x=year_return, y=fit/yaxis_scaler), color="deepskyblue2", linewidth=1.3) + + geom_point(data=preds, aes(x=year_return, y=fit/yaxis_scaler), color="deepskyblue2") + + geom_ribbon(data=preds, aes(x=year_return, ymin = lwr/yaxis_scaler, ymax = upr/yaxis_scaler), fill = "deepskyblue2", alpha = 0.2) +} + +makeIndexPlot <- function(cmisst = cmisst) { + # Output: Index time series + index <- cmisst[[1]] + plot(index$year, index$win.cov, type='b', pch=20, col="red4", + xlab="", ylab="CMISST Index", + ylim=c(min(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE), + max(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE))) + points(index$year, index$spr.cov, type='b', pch=20, col="blue") + points(index$year, index$sum.cov, type='b', pch=20, col="green3") + points(index$year, index$aut.cov, type='b', pch=20, col="purple") + legend("topleft", legend = c("Win","Spr","Sum","Aut"), bty='n', + col = c("red4","blue","green3","purple"), pch = 20, lty=1) +} + + +makeLOOplot <- function(cmisst = cmisst, season = "spr") { + # Output: Observed and predicted time series from the LOO + index <- cmisst[[1]] # This gets us the whole time series + plot(index$year, index$val, type='b', pch=20, cex=2, col="black", xlab="", ylab="Scaled Response", main = input.stock) + abline(0,0, lty=2) + index <- cmisst[[7]] # this is just the loo results + index2<-index[index$season==season & index$model=="cmisst",] + lines(index2$year, index2$pred, lwd=3, col="deepskyblue2") + text(labels = paste("LOO MAE CMISST =", round(mean(index2$mae),2)), + x = par("usr")[1]+9, y=par("usr")[4]*0.80, cex=1.0, col="deepskyblue2") +} + +makeTable <- function(cmisst = cmisst) { + # Time series plot in normal space + response.tmp <- response + response.tmp$year <- response.tmp$year - as.numeric(input.lag) + response.tmp <- response.tmp[response.tmp$year %in% seq(input.years[1], input.years[2], 1), c('year', input.stock)] + colnames(response.tmp) <- c('year','val') + if(input.log) response.tmp$val <- log(response.tmp$val) + response.tmp$val.scl <- scale(response.tmp$val) + index <- cmisst[[1]] + index$response <- reverse_scale(index$val, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) + if (input.log) index$response <- exp(index$response) + + # Output: Table + out<-cmisst[[1]] + out$year <- as.integer(out$year)#out <- out[,c(5,6,1:4)] + out$response <- index$response + out +} \ No newline at end of file diff --git a/R/run_CMISST.R b/R/run_CMISST.R index 1ca5937..ff2bf9a 100644 --- a/R/run_CMISST.R +++ b/R/run_CMISST.R @@ -61,8 +61,8 @@ input.years= c(1980, 2023) # Prediction years (ocean years) # These years will not be included in calculating the CMISST index, # but will be in the index output for use in a predictive model -#input.years.pred=c(2022,2023) -input.years.pred=NA +input.years.pred=c(2020) +#input.years.pred=NA #************************************ # For Leave One Out Cross-validation @@ -70,7 +70,7 @@ input.years.pred=NA # The script will leave only the most recent years out, # emulating a forecasting scenario. How many years should be included? -# E.g., 5 will only test the 5 most recent years, and using teh full +# E.g., 5 will only test the 5 most recent years, and using the full # time series length will remove every data point (one at a time) loocvYears=5 # the most recent X years to include in the LOO CV @@ -97,20 +97,27 @@ updateCMISST <- function() { response.tmp$year <- response.tmp$year - as.numeric(input.lag) # refine to just the years asked for and just the requested response variable response.tmp <- response.tmp[response.tmp$year %in% years, c('year', input.stock)] - colnames(response.tmp) <- c('year','val') - # sometimes the response starts after the input.years, so limit the years in the ocean data too - years <- years[years %in% response.tmp$year] + colnames(response.tmp) <- c('year', 'val') + # sometimes the response starts after the input.years, so limit the years in the ocean data + #years <- years[years %in% response.tmp$year] + years <- years[years >= min(response.tmp$year)] # Log (if requested) and scale the response variable if(input.log) response.tmp$val <- log(response.tmp$val) response.tmp$val.scl <- scale(response.tmp$val) + # Which years are being fit to? + if (!is.na(input.years.pred[1])) { + years.fit<-years[!years %in% input.years.pred & years %in% response.tmp$year] # will be needed to calculate the covariance + } else years.fit <- years[years %in% response.tmp$year] + # Calculate the CMISST index cmisst <- get_CMISST_index(response = response.tmp[,c("year","val.scl")], oceanData = oceanData, years.pred = input.years.pred, min.lon = min.lon, max.lon = max.lon, min.lat = min.lat, max.lat = max.lat, - years = years, months = months, + years = years, years.fit = years.fit, + months = months, returnDataType = returnDataType, removeBering = removeBering) @@ -119,7 +126,8 @@ updateCMISST <- function() { oceanData = oceanData, loocvYears = loocvYears, min.lon = min.lon, max.lon = max.lon, min.lat = min.lat, max.lat = max.lat, - years = years, months = months) + years = years.fit, + months = months) return(append(cmisst, loocv)) } else return(cmisst) # Returns index as a list @@ -139,162 +147,26 @@ cmisst <- updateCMISST() # Things below here should be moved to a new script for plotting #************************************ +source("R/makePlots.R") # Input: What map to plot input.season = "spr" - - -# For the manuscript Table 1 -#cmisst[[7]] - -# Covariance Map -myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab") -season <- switch(input.season, - win = 2, - spr = 3, - sum = 4, - aut = 5) -myTitle <- switch(input.season, - win = "Winter", - spr = "Spring", - sum = "Summer", - aut = "Autumn") -covMap<-cmisst[[season]] -lmt<-max(abs(covMap), na.rm=TRUE) -limits<-c(-lmt, lmt) -extent <- cmisst[[6]] # min, max of lat, long - -gg <- ggplot() + ggtitle(myTitle) + - geom_raster(data = melt(covMap), aes(x = Var1, y = Var2, fill=value)) + - geom_sf(data=land, color="black", fill="grey", linewidth=0.25) + - xlim(extent[3], extent[4]) + ylim(extent[1], extent[2]) + - scale_fill_gradientn(colours = myPalette(100),limits=limits,name="Covariance", na.value = "white") + - theme_classic() + theme(panel.border = element_rect(colour = "grey", fill=NA)) + - labs(x = "Longitude", y = "Latitude") -gg - +# Make the covariance map +makeCovarianceMap(input.season = input.season, cmisst = cmisst) # Biplot with response -index <- cmisst[[1]] -season <- switch(input.season, - win = 1, - spr = 2, - sum = 3, - aut = 4) -index$ind <- index[,season] -myTitle <- switch(input.season, - win = "Winter", - spr = "Spring", - sum = "Summer", - aut = "Autumn") -plot(index$ind, index$val, pch=20, cex=2, xlab=paste(myTitle, "CMISST Index"), - ylab="Scaled (Z-score) Response", main=myTitle) -lm1 <- lm(index$val~index$ind) -abline(lm1) -text(bquote(~ R^2 == .(round(summary(lm1)$adj.r.squared, 2))), - x = par("usr")[1]*0.8, y=par("usr")[4]*0.80, cex=1.6, col="blue") -if (input.loocv) { - mae <- cmisst[[7]] - text(paste("MAE =", round(mae[season,"mae"], 2)), - x = par("usr")[1]*0.75, y=par("usr")[4]*0.60, cex=1.6, col="blue") -} - -# cmisst[[7]][cmisst[[7]]$season=="spr",] +makeBiplot(input.season = input.season, cmisst = cmisst) -# Time series plot in normal space -# To get back to normal space -reverse_scale <- function(x, center = NULL, scale = NULL) { - if (!is.null(attr(x, "scaled:scale"))) { - x <- x * attr(x, "scaled:scale") - } else { x <- x * scale } - if (!is.null(attr(x, "scaled:center"))) { - x <- x + attr(x, "scaled:center") - } else { x <- x + center } - x -} -response.tmp <- response -response.tmp$year <- response.tmp$year - as.numeric(input.lag) -response.tmp <- response.tmp[response.tmp$year %in% seq(input.years[1], input.years[2], 1), c('year', input.stock)] -colnames(response.tmp) <- c('year','val') -if(input.log) response.tmp$val <- log(response.tmp$val) -response.tmp$val.scl <- scale(response.tmp$val) -reverse_scale(response.tmp$val.scl) - -index <- cmisst[[1]] -season <- switch(input.season, - win = 1, spr = 2, sum = 3, aut = 4) -index$ind <- index[,season] -index$counts <- reverse_scale(index$val, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) -if (input.log) index$counts <- exp(index$counts) -myTitle <- switch(input.season, - win = "Winter", spr = "Spring", sum = "Summer", aut = "Autumn") -lm1 <- lm(index$val~index$ind) -preds<-predict(lm1, newdata = index, interval = "confidence") -preds<-reverse_scale(preds, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) -if (input.log) preds<-exp(preds) -# Use prediction interval for last point -preds_last<-predict(lm1, newdata = index, interval = "prediction") -preds_last<-reverse_scale(preds_last, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale")) -if (input.log) preds_last<- exp(preds_last) -preds[nrow(preds),]<-preds_last[nrow(preds),] - -preds<-data.frame(preds) -index$year_return <- index$year + input.lag -preds$year_return<-index$year_return -# Plot for SOEM talk in 2024 -scaler<-1 -ggplot() + - geom_line(data = index, aes(x=year_return, y=counts/scaler)) + - geom_point(data = index, aes(x=year_return, y=counts/scaler)) + - theme_classic() + - #ylab("Spring Chinook Counts at Bonneville Dam") + xlab("") + - #ylab("Bongo Biomass") + xlab("") + - ylab("IGF") + xlab("") + - geom_line(data=preds, aes(x=year_return, y=fit/scaler), color="deepskyblue2", linewidth=1.3) + - geom_point(data=preds, aes(x=year_return, y=fit/scaler), color="deepskyblue2") + - geom_ribbon(data=preds, aes(x=year_return, ymin = lwr/scaler, ymax = upr/scaler), fill = "deepskyblue2", alpha = 0.2) +# Make Time series Plot +makeTimeSeriesPlot(input.season = 'spr', cmisst = cmisst, + ylab = "Counts", yaxis_scaler = 1000) # Output: Index time series -index <- cmisst[[1]] -plot(index$year, index$win.cov, type='b', pch=20, col="red4", - xlab="", ylab="CMISST Index", - ylim=c(min(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE), - max(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE))) -points(index$year, index$spr.cov, type='b', pch=20, col="blue") -points(index$year, index$sum.cov, type='b', pch=20, col="green3") -points(index$year, index$aut.cov, type='b', pch=20, col="purple") -legend("topleft", legend = c("Win","Spr","Sum","Aut"), bty='n', - col = c("red4","blue","green3","purple"), pch = 20, lty=1) - +makeIndexPlot(cmisst = cmisst) # Output: Observed and predicted time series from the LOO -index <- cmisst[[7]] -index<-index[index$season=="spr" & index$model=="cmisst",] -plot(index$year, index$mae, type='b', pch=20, col="red4", xlab="", ylab="LOO") - -#index <- cmisst[[1]] # This gets us the whole time series, whereas item 7 is only the LOO results -#plot(index$year, index$val, type='b', pch=20, cex=2, col="black", xlab="", ylab="Scaled Response", main = input.stock) -index <- cmisst[[7]] -index<-index[index$season=="spr" & index$model=="cmisst",] -plot(index$year, index$response, type='b', pch=20, cex=2, col="black", xlab="", ylab="Scaled Response", main = input.stock) -abline(0,0, lty=2) -index <- cmisst[[7]] -index2<-index[index$season=="spr" & index$model=="cmisst",] -lines(index2$year, index2$pred, lwd=3, col="deepskyblue2") -text(labels = paste("LOO MAE CMISST =", round(mean(index2$mae),2)), - x = par("usr")[1]+9, y=par("usr")[4]*0.80, cex=1.2, col="deepskyblue2") -index3<-index[index$season=="spr" & index$model=="pdo",] -lines(index3$year, index3$pred, lwd=3, col="tomato3") -text(labels = paste("LOO MAE PDO =", round(mean(index3$mae),2)), -x = par("usr")[1]+8, y=par("usr")[4]*0.65, cex=1.2, col="tomato3") - - - +makeLOOplot(cmisst = cmisst, season = "spr") -# Output: Table -out<-cmisst[[1]] -out$year <- as.integer(out$year) -#out <- out[,c(5,6,1:4)] -colnames(out)[colnames(out)=="val"] <- "response" -cbind(out) +# Print the sesonal indices and the scaled response variable +makeTable(cmisst = cmisst)