Merge pull request #132 from pfmc-assessments/get_raw_comps

Add new function to calculate raw/unexpanded composition data
pfmc-assessments · May 17, 2024 · b3c5cd2 · b3c5cd2
2 parents bbf3a70 + ba88e91
commit b3c5cd2
Show file tree

Hide file tree

Showing 23 changed files with 519 additions and 51 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -45,6 +45,7 @@ export(est_growth)
 export(estimate_weight_length)
 export(fit_vbgrowth)
 export(get_json)
+export(get_raw_comps)
 export(get_url)
 export(plot_age_length_sampling)
 export(plot_bio_patterns)

diff --git a/R/Biomass.fn.R b/R/Biomass.fn.R
@@ -56,7 +56,7 @@ Biomass.fn <- function(
   dat,
   strat.vars = c("Depth_m", "Latitude_dd"),
   strat.df,
-  printfolder = "forSS",
+  printfolder = "forSS3",
   outputMedian = TRUE,
   month = "Enter month",
   fleet = "Enter fleet",

diff --git a/R/GetN.fn.R b/R/GetN.fn.R
@@ -54,7 +54,7 @@ GetN.fn <- function(
     "thorny",
     "others"
   ),
-  printfolder = "forSS",
+  printfolder = "forSS3",
   output = NULL,
   verbose = TRUE) {
 

diff --git a/R/SurveyAFs.fn.R b/R/SurveyAFs.fn.R
@@ -37,7 +37,7 @@
 #' @template printfolder
 #' @param remove999 The output object by the function will have the 999 column combined with the first age bin.
 #'   Default TRUE.
-#' @param outputStage1 return the first stage expanded data without compiling it for SS
+#' @param outputStage1 return the first stage expanded data without compiling it for SS3
 #' @template verbose
 #'
 #' @author Allan Hicks and Chantel Wetzel
@@ -66,7 +66,7 @@ SurveyAFs.fn <- function(
   ageErr = "Enter",
   nSamps = "Enter Samps",
   month = "Enter Month",
-  printfolder = "forSS",
+  printfolder = "forSS3",
   remove999 = TRUE,
   outputStage1 = FALSE,
   verbose = TRUE) {

diff --git a/R/SurveyAgeAtLen.fn.R b/R/SurveyAgeAtLen.fn.R
@@ -56,7 +56,7 @@ SurveyAgeAtLen.fn <- function(
   partition = 0,
   ageErr = "Enter Age Error",
   returnSamps = FALSE,
-  printfolder = "forSS",
+  printfolder = "forSS3",
   verbose = TRUE) {
 
   plotdir <- file.path(dir, printfolder)

diff --git a/R/SurveyLFs.fn.R b/R/SurveyLFs.fn.R
@@ -71,7 +71,7 @@ SurveyLFs.fn <- function(
   ageErr = "Enter",
   nSamps = "Enter Samps",
   month = "Enter Month",
-  printfolder = "forSS",
+  printfolder = "forSS3",
   remove999 = TRUE,
   outputStage1 = FALSE,
   sum100 = TRUE,
@@ -597,7 +597,7 @@ SurveyLFs.fn <- function(
     cat("\nNOTE: Files have been saved the the printfolder directory.
         The first file has the 999 column showing fish smaller or younger than the initial bin.
         Check to make sure there is not a large number of fish smaller or younger than the initial bin.
-        The second file has combined the 999 with the first bin and is ready for use in SS.\n")
+        The second file has combined the 999 with the first bin and is ready for use in SS3.\n")
   }
 
   if (!remove999) {

diff --git a/R/UnexpandedAF.fn.R b/R/UnexpandedAF.fn.R
@@ -29,7 +29,7 @@ UnexpandedAFs.fn <- function(
   agelow = -1,
   agehigh = -1,
   month = "Enter Month",
-  printfolder = "forSS",
+  printfolder = "forSS3",
   verbose = TRUE) {
 
   # Overwrite inputs to use the same code for lengths as ages
@@ -48,7 +48,7 @@ UnexpandedAFs.fn <- function(
     ageErr = ageErr,
     agelow = agelow,
     agehigh = agelow,
-    printfolder = "forSS",
+    printfolder = "forSS3",
     verbose = TRUE
   )
 

diff --git a/R/checkStrata.fn.R b/R/checkStrata.fn.R
@@ -27,7 +27,7 @@ CheckStrata.fn <- function(
   dat,
   strat.vars = c("Depth_m", "Latitude_dd"),
   strat.df,
-  printfolder = "forSS",
+  printfolder = "forSS3",
   verbose = TRUE) {
 
   # Grab the strata  rownmaes to index later

diff --git a/R/get_raw_comps.R b/R/get_raw_comps.R
@@ -0,0 +1,274 @@
+#' Calculate unexpanded/raw length or marginal age compositions
+#'
+#' @details
+#' Creates a matrix of unexpanded (or raw) marginal length or age composition
+#' data formatted for Stock Synthesis. The code will return composition data
+#' for all sexes present in the data frame and no sex assignment is done for
+#' unsexed fish. The function will create composition data for either
+#' lengths or ages based on the comp_column_name. The function will return a
+#' list of composition data based upon the sexes present in the data for a
+#' two-sex model or all length/ages for single-sex model.
+#'
+#' @param data A data frame that includes columns of year, sex, and length/ages. The data
+#' frame can be survey data pulled using pull_bio from the data warehouse or any data frame
+#' that includes column names of sex, year, and the comp_column_name.  The sex column is
+#' expected to have sexes denoted by F, M, and U.
+#' @param comp_bins A vector on length bins or age bins to create compositions across. The
+#' composition data is formatted for Stock Synthesis.
+#' @param comp_column_name The column name to create composition data for. This column can be
+#' is used to determine whether to format the composition data for length or age
+#' compositions by looking for either age (e.g., age_years, Age, best_age) or length
+#' (e.g., Length, length, Length_cm) in the comp_column_name. Default Length_cm.
+#' @param two_sex_comps Default TRUE. If TRUE composition data will be formatted for a
+#' Stock Synthesis two-sex model and if FALSE composition data will be formatted for a
+#' single-sex model.
+#' @param fleet A fleet number to assign the composition data to based on the expected
+#' format for Stock Synthesis. Default "Enter Fleet".
+#' @param month Month the samples were collected based on the expected format for
+#' Stock Synthesis to determine the length/age estimate to compare to. Default
+#' "Enter Month".
+#' @param partition Partition to assign the composition data based on the expected
+#' format for Stock Synthesis. Partition of 0 indicates that the composition data
+#' include all composition data, 1 for discarded composition data, and 2 for retained
+#' fish only. Default of 0.
+#' @param age_error Number of ageing error vector to apply to the age data based on
+#' Stock Synthesis. Default "Enter Age Error Vector".
+#' @param age_low Lower age bin for all age composition data based on the expected
+#' format for Stock Synthesis. Default value of -1 which translates to the lowest age
+#' bin.
+#' @param age_high Upper age bin for all age composition data based on the expected
+#' format for Stock Synthesis. Default value of -1 which translates to the highest
+#  age bin.
+#' @template dir
+#' @template printfolder
+#' @template verbose
+#'
+#' @returns A list of length or marginal age compositions for sexed and
+#' unsexed fish formatted for Stock Synthesis.
+#'
+#' @author Chantel Wetzel
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' bio <- pull_bio(
+#'   common_name = "lingcod",
+#'   survey = "NWFSC.Combo"
+#' )
+#'
+#' length_comps <- get_raw_comps(
+#'   data = bio,
+#'   comp_bins = seq(20, 70, 4)
+#' )
+#'
+#' age_comps <- get_raw_comps(
+#'   data = bio,
+#'   comp_bins = 1:20,
+#'   comp_column_name = "Age"
+#' )
+#'
+#' }
+#'
+get_raw_comps <- function(
+  data,
+  comp_bins,
+  comp_column_name = "Length_cm",
+  two_sex_comps = TRUE,
+  fleet = "Enter Fleet",
+  month = "Enter Month",
+  partition = 0,
+  age_error = "Enter Age Error Vector",
+  age_low = -1,
+  age_high = -1,
+  dir = NULL,
+  printfolder = "forSS3",
+  verbose = TRUE)
+{
+
+  plotdir <- file.path(dir, printfolder)
+  check_dir(dir = plotdir, verbose = verbose)
+
+  colnames(data) <- tolower(colnames(data))
+  comp_column_name <- tolower(comp_column_name)
+
+  vars <- c("year", "sex")
+  if(sum(vars %in% colnames(data)) != 2){
+    stop("Data frame does not contain a column name year and/or sex.
+         \n The columns names can be either upper or lower case.")
+  }
+
+  if(!comp_column_name %in% colnames(data)){
+    stop("Data frame does not contain a column name of comp_column_name.
+         \n The columns names can be either upper or lower case. ")
+  }
+
+  if (!two_sex_comps){
+    data[, "sex"] <- "U"
+  }
+
+  # Check to see if user is doing ages or lengths
+  if(length(grep("age", comp_column_name)) > 0) {
+    comp_type <- "age"
+  } else {
+    comp_type <- "length"
+  }
+
+  keep <- !is.na(data[, comp_column_name])
+  data <- data[keep, ]
+  bins <- c(comp_bins, Inf)
+  data$bin <- bins[findInterval(data[, comp_column_name], bins, all.inside = T)]
+
+  # if there are NA sexes replace them with U
+  if (sum(is.na(data[, "sex"])) > 0) {
+    data[is.na(data[, "sex"]), "sex"] <- "U"
+  }
+
+  # Create the comps
+  Results <- out <- NULL
+  for (y in sort(unique(data[, "year"]))) {
+    # Identify relevant rows
+    Which <- which(data[, "year"] == y & data[, "sex"] %in% c("F", "M"))
+    # Skip this year unless there are rows
+    if (length(Which) > 0) {
+      Row <- c(y, length(Which))
+      # Loop across F then M
+      for (s in c("F", "M")) {
+        # Loop across length bins
+        for (l in comp_bins)
+        {
+          # Subset to relevant rows
+          if (l == min(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] %in% l & data[Which, "sex"] == s)]
+          }
+          if (l != min(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] == l & data[Which, "sex"] == s)]
+          }
+          if (l == max(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] %in% c(l, Inf) & data[Which, "sex"] == s)]
+          }
+          # Sum to effective sample size by length_bin x Sex x Fleet x Year
+          if (length(Which2) == 0) Row <- c(Row, 0)
+          if (length(Which2) >= 1) Row <- c(Row, length(Which2))
+        }
+      }
+      # Add to results matrix
+      Results <- rbind(Results, Row)
+    } # end Which loop
+  } # end year loop
+
+  if(!is.null(Results)){
+    Results <- as.data.frame(Results)
+    tmp <- data.frame(
+      year = Results[, 1],
+      month = month,
+      fleet = fleet,
+      sex = 3,
+      partition = partition,
+      nsamp = Results[, 2]
+    )
+    out <- cbind(tmp, Results[, -c(1:2)])
+    colnames(out)[-c(1:6)] <- c(
+      paste(rep("F", each = length(comp_bins)), comp_bins, sep = ""),
+      paste(rep("M", each = length(comp_bins)), comp_bins, sep = ""))
+  }
+
+  # Create unsexed comps if present in the data
+  out_u <- NULL
+  if (length(data[data[, "sex"] == "U", "sex"]) > 0) {
+    Results <- NULL
+    for (y in sort(unique(data[, "year"]))) {
+      # Identify relevant rows
+      Which <- which(data[, "year"] == y & data[, "sex"] == "U")
+      # Skip this year unless there are rows
+      if (length(Which) > 0) {
+        # Format reference stuff
+        Row <- c(y, length(Which))
+        # Loop across length bins
+        for (l in comp_bins)
+        {
+          # Subset to relevant rows
+          if (l == min(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] %in% l)]
+          }
+          if (l != min(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] == l)]
+          }
+          if (l == max(bins)) {
+            Which2 <- Which[which(data[Which, "bin"] %in% c(l, Inf))]
+          }
+          # Sum to effective sample size by length_bin x Sex x Fleet x Year
+          if (length(Which2) == 0) Row <- c(Row, 0)
+          if (length(Which2) >= 1) Row <- c(Row, length(Which2))
+        }
+        # Add to results matrix
+        Results <- rbind(Results, Row)
+      } # end Which loop
+    } # end year loop
+    Results <- as.data.frame(Results)
+    tmp <- data.frame(
+      year = Results[, 1],
+      month = month,
+      fleet = fleet,
+      sex = 0,
+      partition = partition,
+      nsamp = Results[, 2]
+    )
+
+    if (two_sex_comps){
+      out_u <- cbind(tmp, Results[, -c(1:2)], 0 * Results[, -c(1:2)])
+    } else {
+      out_u <- cbind(tmp, Results[, -c(1:2)])
+    }
+    colnames(out_u)[-c(1:6)] <- paste(rep("U", each = length(comp_bins)), comp_bins, sep = "")
+  }
+
+  if (comp_type == "length") {
+    if (!is.null(out)) {
+      out_comps <- out
+    } else {
+      out_comps <- NULL
+    }
+    if (!is.null(out_u)) {
+      out_u_comps <- out_u
+    }
+  }
+
+  if (comp_type == "age") {
+    if (!is.null(out)) {
+      out_comps <- cbind(out[, 1:5], age_error, age_low, age_high, out[, 6:dim(out)[2]])
+    } else {
+      out_comps <- NULL
+    }
+    if (!is.null(out_u)) {
+      out_u_comps <- cbind(out_u[, 1:5], age_error, age_low, age_high, out_u[, 6:dim(out_u)[2]])
+    }
+  }
+
+  if (!is.null(dir)) {
+    if (!is.null(out_comps)) {
+      write.csv(out_comps,
+                file = file.path(plotdir, paste0(comp_type, "_raw_comps_sex_3_", comp_type, "_bins_", comp_bins[1], "-", max(comp_bins), ".csv")),
+                row.names = FALSE
+      )
+    }
+    if (!is.null(out_u)) {
+      write.csv(out_u_comps,
+                file = file.path(plotdir, paste0(comp_type, "_raw_comps_sex_0_", comp_type, "_bins_", comp_bins[1], "-", max(comp_bins), ".csv")),
+                row.names = FALSE
+      )
+    }
+  }
+
+  comps <- list()
+  if (!is.null(out_comps)) {
+    rownames(out_comps) <- NULL
+    comps$sexed <- out_comps
+  }
+  if (!is.null(out_u)) {
+    rownames(out_u_comps) <- NULL
+    comps$unsexed <- out_u_comps
+  }
+
+  return(comps)
+
+}