Put plots in their own file and fixed a bunch of errors.

noaa-nwfsc · May 5, 2024 · fbe6013 · fbe6013
1 parent 1b7ac24
commit fbe6013
Show file tree

Hide file tree

Showing 4 changed files with 197 additions and 179 deletions.
diff --git a/R/crossValidation.R b/R/crossValidation.R
@@ -1,25 +1,14 @@
 # Run LOO cross-validation, removing each year sequentially and predicting that year
 
-# library(ncdf4)
-# library(RColorBrewer)
-# library(sp)
-# library(maptools)
-# library(reshape2)
-# library(ggplot2)
-# library(sdmTMB)
-# library(ggeffects)
-# library(visreg)
-# library(doBy)
-
-
 #*************************************************************
 #  Create a large loop right here for cross-validation
+#   The only years that should be sent here are the fit years
 #*************************************************************
 LOO_CV <- function(response = response,
                    oceanData = oceanData, loocvYears = 5,
                    min.lon = min.lon, max.lon = max.lon,
                    min.lat = min.lat, max.lat = max.lat,
-                   years = years, months = months,
+                   years = years.fit, months = months,
                    includePDO = FALSE, includePC1 = FALSE) {
 
   # Verify that the response is what we expect
@@ -75,10 +64,10 @@ LOO_CV <- function(response = response,
     oceanData.s4.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 4)
 
     # Get covariance between each cell's temperature and survival
-    covs1<-apply(oceanData.s1.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
-    covs2<-apply(oceanData.s2.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
-    covs3<-apply(oceanData.s3.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
-    covs4<-apply(oceanData.s4.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
+    covs1<-apply(oceanData.s1.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs"))
+    covs2<-apply(oceanData.s2.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs"))
+    covs3<-apply(oceanData.s3.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs"))
+    covs4<-apply(oceanData.s4.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[response$year %in% years.fit], use="pairwise.complete.obs"))
 
     #********************************************************************
     # Create the index (how similar is each year to the covariance map)
@@ -94,7 +83,7 @@ LOO_CV <- function(response = response,
                                     lm(as.vector(oceanData.s4.scl[,,tt]) ~ -1 + as.vector(covs4))$coef))
     coefs_cov<-data.frame(coefs_cov)
     coefs_cov$year<-years
-    index_cov<-cbind(coefs_cov,response$val)
+    index_cov<-cbind(coefs_cov,response$val[response$year %in% years])
     colnames(index_cov)<-c("win.cov","spr.cov","sum.cov","aut.cov","year","val")
 
     #*****************************************

diff --git a/R/get_index.R b/R/get_index.R
@@ -4,7 +4,8 @@
 #source("create_OceanData_Object.R")
 
 get_CMISST_index <- function(response, oceanData=oceanData_ERSST,
-                             years=NA, months=1:12, years.pred=NA,
+                             years=NA, years.fit=year.fit,
+                             months=1:12, years.pred=NA,
                              min.lon=158, max.lon=246,
                              min.lat=10, max.lat=62,
                              returnDataType='anom',
@@ -14,10 +15,6 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST,
   if (ncol(response)!=2) { print("incorrect data - requires a 2-column data frame with year and the response"); return(NA) }
   colnames(response)<-c("year","val")
 
-  if (is.na(years)[1]) years=response$year
-  if (!is.na(years.pred[1])) {
-    years.fit<-years[!years %in% years.pred] # will be needed to calculate the covariance
-  } else years.fit <- years
   # 'years' will be considered 'all years'  If we need fit or pred, we can access them
   year_mo<-data.frame(year=rep(years, each=length(months)), month=rep(months, length(years)),
                       label=paste(rep(years, each=length(months)), rep(months, length(years)), sep = "_"))
@@ -31,7 +28,7 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST,
   lats <- as.numeric(dimnames(oceanData)[[2]])
   yr_mo <- dimnames(oceanData)[[3]]
   lon.index<-which(lons >= min.lon & lons <= max.lon) 
-    lat.index<-which(lats >= min.lat & lats <= max.lat)
+  lat.index<-which(lats >= min.lat & lats <= max.lat)
   yr_mo.index<-which(yr_mo %in% year_mo$label)
   # Subset the ocean data with user-defined extent
   oceanData <- oceanData[lon.index, lat.index, yr_mo.index]

diff --git a/R/makePlots.R b/R/makePlots.R
@@ -0,0 +1,160 @@
+# Make a few plots from the results
+
+# To get back to normal space
+reverse_scale <- function(x, center = NULL, scale = NULL) {
+  if (!is.null(attr(x, "scaled:scale"))) {
+    x <- x * attr(x, "scaled:scale")
+  } else { x <- x * scale }
+  if (!is.null(attr(x, "scaled:center"))) {
+    x <- x + attr(x, "scaled:center")
+  } else { x <- x + center }
+  x
+}
+
+
+makeCovarianceMap <- function(input.season = input.season, cmisst = cmisst) {
+  # Covariance Map
+  myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")
+  season <- switch(input.season,
+                   win = 2,
+                   spr = 3,
+                   sum = 4,
+                   aut = 5)
+  myTitle <- switch(input.season,
+                    win = "Winter",
+                    spr = "Spring",
+                    sum = "Summer",
+                    aut = "Autumn")
+  covMap<-cmisst[[season]]
+  lmt<-max(abs(covMap), na.rm=TRUE)
+  limits<-c(-lmt, lmt)
+  extent <- cmisst[[6]] # min, max of lat, long
+
+  gg <- ggplot() + ggtitle(myTitle) +
+    geom_raster(data = melt(covMap), aes(x = Var1, y = Var2, fill=value)) +
+    geom_sf(data=land, color="black", fill="grey", linewidth=0.25) +
+    xlim(extent[3], extent[4]) + ylim(extent[1], extent[2]) +
+    scale_fill_gradientn(colours = myPalette(100),limits=limits,name="Covariance", na.value = "white") +
+    theme_classic() + theme(panel.border = element_rect(colour = "grey", fill=NA)) +
+    labs(x = "Longitude", y = "Latitude")
+  gg
+}
+
+
+makeBiplot <- function(input.season = input.season, cmisst = cmisst) {
+  # Biplot with response
+  index <- cmisst[[1]]
+  season <- switch(input.season,
+                   win = 1,
+                   spr = 2,
+                   sum = 3,
+                   aut = 4)
+  index$ind <- index[,season]
+  myTitle <- switch(input.season,
+                    win = "Winter",
+                    spr = "Spring",
+                    sum = "Summer",
+                    aut = "Autumn")
+  plot(index$ind, index$val, pch=20, cex=2, xlab=paste(myTitle, "CMISST Index"),
+       ylab="Scaled (Z-score) Response", main=myTitle)
+  lm1 <- lm(index$val~index$ind)
+  abline(lm1)
+  text(bquote(~ R^2 == .(round(summary(lm1)$adj.r.squared, 2))),
+       x = par("usr")[1]*0.8, y=par("usr")[4]*0.80, cex=1.6, col="blue")
+  if (input.loocv) {
+    mae <- cmisst[[7]]
+    text(paste("MAE =", round(mae[season,"mae"], 2)),
+         x = par("usr")[1]*0.75, y=par("usr")[4]*0.60, cex=1.6, col="blue")
+  }
+}
+
+makeTimeSeriesPlot <- function(input.season = input.season, cmisst = cmisst,
+                               ylab="", yaxis_scaler=1) {
+  # Time series plot in normal space
+  response.tmp <- response
+  response.tmp$year <- response.tmp$year - as.numeric(input.lag)
+  response.tmp <- response.tmp[response.tmp$year %in% seq(input.years[1], input.years[2], 1), c('year', input.stock)]
+  colnames(response.tmp) <- c('year','val')
+  if(input.log) response.tmp$val <- log(response.tmp$val)
+  response.tmp$val.scl <- scale(response.tmp$val)
+  #reverse_scale(response.tmp$val.scl)
+
+  index <- cmisst[[1]]
+  season <- switch(input.season,
+                   win = 2, spr = 3, sum = 4, aut = 5)
+  index$ind <- index[,season]
+  index$counts <- reverse_scale(index$val, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale"))
+  if (input.log) index$counts <- exp(index$counts)
+  myTitle <- switch(input.season,
+                    win = "Winter", spr = "Spring", sum = "Summer", aut = "Autumn")
+  lm1 <- lm(index$val~index$ind)
+  preds<-predict(lm1, newdata = index, interval = "confidence")
+  preds<-reverse_scale(preds, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale"))
+  if (input.log) preds<-exp(preds)
+  # Use prediction interval for predicted points
+  preds_new<-predict(lm1, newdata = index, interval = "prediction")
+  preds_new<-reverse_scale(preds_new, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale"))
+  if (input.log) preds_new<- exp(preds_new)
+  # replace just the ones that were not used during fitting
+  preds[index$year %in% input.years.pred,]<-preds_new[index$year %in% input.years.pred,]
+
+  preds<-data.frame(preds)
+  # unlag the year to show the plot in return year
+  index$year_return <- index$year + input.lag
+  preds$year_return <- index$year_return
+  # Plot for SOEM talk in 2024
+  ggplot() +
+    geom_line(data = index, aes(x=year_return, y=counts/yaxis_scaler)) +
+    geom_point(data = index, aes(x=year_return, y=counts/yaxis_scaler)) +
+    theme_classic() +
+    ylab(label = ylab) + xlab("Response Year") +
+    geom_line(data=preds, aes(x=year_return, y=fit/yaxis_scaler), color="deepskyblue2", linewidth=1.3) +
+    geom_point(data=preds, aes(x=year_return, y=fit/yaxis_scaler), color="deepskyblue2") +
+    geom_ribbon(data=preds, aes(x=year_return, ymin = lwr/yaxis_scaler, ymax = upr/yaxis_scaler), fill = "deepskyblue2", alpha = 0.2)
+}
+
+makeIndexPlot <- function(cmisst = cmisst) {
+  # Output: Index time series
+  index <- cmisst[[1]]
+  plot(index$year, index$win.cov, type='b', pch=20, col="red4",
+       xlab="", ylab="CMISST Index",
+       ylim=c(min(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE),
+              max(index[,c("win.cov","spr.cov","sum.cov","aut.cov")], na.rm=TRUE)))
+  points(index$year, index$spr.cov, type='b', pch=20, col="blue")
+  points(index$year, index$sum.cov, type='b', pch=20, col="green3")
+  points(index$year, index$aut.cov, type='b', pch=20, col="purple")
+  legend("topleft", legend = c("Win","Spr","Sum","Aut"), bty='n',
+         col = c("red4","blue","green3","purple"), pch = 20, lty=1)
+}
+
+
+makeLOOplot <- function(cmisst = cmisst, season = "spr") {
+  # Output: Observed and predicted time series from the LOO
+  index <- cmisst[[1]] # This gets us the whole time series
+  plot(index$year, index$val, type='b', pch=20, cex=2, col="black", xlab="", ylab="Scaled Response", main = input.stock)
+  abline(0,0, lty=2)
+  index <- cmisst[[7]] # this is just the loo results
+  index2<-index[index$season==season & index$model=="cmisst",]
+  lines(index2$year, index2$pred, lwd=3, col="deepskyblue2")
+  text(labels = paste("LOO MAE CMISST =", round(mean(index2$mae),2)),
+       x = par("usr")[1]+9, y=par("usr")[4]*0.80, cex=1.0, col="deepskyblue2")
+}
+
+makeTable <- function(cmisst = cmisst) {
+  # Time series plot in normal space
+  response.tmp <- response
+  response.tmp$year <- response.tmp$year - as.numeric(input.lag)
+  response.tmp <- response.tmp[response.tmp$year %in% seq(input.years[1], input.years[2], 1), c('year', input.stock)]
+  colnames(response.tmp) <- c('year','val')
+  if(input.log) response.tmp$val <- log(response.tmp$val)
+  response.tmp$val.scl <- scale(response.tmp$val)
+  index <- cmisst[[1]]
+  index$response <- reverse_scale(index$val, attr(response.tmp$val.scl, "scaled:center"), attr(response.tmp$val.scl, "scaled:scale"))
+  if (input.log) index$response <- exp(index$response)
+
+  # Output: Table
+  out<-cmisst[[1]]
+  out$year <- as.integer(out$year)#out <- out[,c(5,6,1:4)]
+  out$response <- index$response
+  out
+}