Skip to content

Commit

Permalink
Merge pull request #132 from pfmc-assessments/get_raw_comps
Browse files Browse the repository at this point in the history
Add new function to calculate raw/unexpanded composition data
  • Loading branch information
chantelwetzel-noaa authored May 17, 2024
2 parents bbf3a70 + ba88e91 commit b3c5cd2
Show file tree
Hide file tree
Showing 23 changed files with 519 additions and 51 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export(est_growth)
export(estimate_weight_length)
export(fit_vbgrowth)
export(get_json)
export(get_raw_comps)
export(get_url)
export(plot_age_length_sampling)
export(plot_bio_patterns)
Expand Down
2 changes: 1 addition & 1 deletion R/Biomass.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Biomass.fn <- function(
dat,
strat.vars = c("Depth_m", "Latitude_dd"),
strat.df,
printfolder = "forSS",
printfolder = "forSS3",
outputMedian = TRUE,
month = "Enter month",
fleet = "Enter fleet",
Expand Down
2 changes: 1 addition & 1 deletion R/GetN.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ GetN.fn <- function(
"thorny",
"others"
),
printfolder = "forSS",
printfolder = "forSS3",
output = NULL,
verbose = TRUE) {

Expand Down
4 changes: 2 additions & 2 deletions R/SurveyAFs.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#' @template printfolder
#' @param remove999 The output object by the function will have the 999 column combined with the first age bin.
#' Default TRUE.
#' @param outputStage1 return the first stage expanded data without compiling it for SS
#' @param outputStage1 return the first stage expanded data without compiling it for SS3
#' @template verbose
#'
#' @author Allan Hicks and Chantel Wetzel
Expand Down Expand Up @@ -66,7 +66,7 @@ SurveyAFs.fn <- function(
ageErr = "Enter",
nSamps = "Enter Samps",
month = "Enter Month",
printfolder = "forSS",
printfolder = "forSS3",
remove999 = TRUE,
outputStage1 = FALSE,
verbose = TRUE) {
Expand Down
2 changes: 1 addition & 1 deletion R/SurveyAgeAtLen.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ SurveyAgeAtLen.fn <- function(
partition = 0,
ageErr = "Enter Age Error",
returnSamps = FALSE,
printfolder = "forSS",
printfolder = "forSS3",
verbose = TRUE) {

plotdir <- file.path(dir, printfolder)
Expand Down
4 changes: 2 additions & 2 deletions R/SurveyLFs.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ SurveyLFs.fn <- function(
ageErr = "Enter",
nSamps = "Enter Samps",
month = "Enter Month",
printfolder = "forSS",
printfolder = "forSS3",
remove999 = TRUE,
outputStage1 = FALSE,
sum100 = TRUE,
Expand Down Expand Up @@ -597,7 +597,7 @@ SurveyLFs.fn <- function(
cat("\nNOTE: Files have been saved the the printfolder directory.
The first file has the 999 column showing fish smaller or younger than the initial bin.
Check to make sure there is not a large number of fish smaller or younger than the initial bin.
The second file has combined the 999 with the first bin and is ready for use in SS.\n")
The second file has combined the 999 with the first bin and is ready for use in SS3.\n")
}

if (!remove999) {
Expand Down
4 changes: 2 additions & 2 deletions R/UnexpandedAF.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ UnexpandedAFs.fn <- function(
agelow = -1,
agehigh = -1,
month = "Enter Month",
printfolder = "forSS",
printfolder = "forSS3",
verbose = TRUE) {

# Overwrite inputs to use the same code for lengths as ages
Expand All @@ -48,7 +48,7 @@ UnexpandedAFs.fn <- function(
ageErr = ageErr,
agelow = agelow,
agehigh = agelow,
printfolder = "forSS",
printfolder = "forSS3",
verbose = TRUE
)

Expand Down
2 changes: 1 addition & 1 deletion R/checkStrata.fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ CheckStrata.fn <- function(
dat,
strat.vars = c("Depth_m", "Latitude_dd"),
strat.df,
printfolder = "forSS",
printfolder = "forSS3",
verbose = TRUE) {

# Grab the strata rownmaes to index later
Expand Down
274 changes: 274 additions & 0 deletions R/get_raw_comps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#' Calculate unexpanded/raw length or marginal age compositions
#'
#' @details
#' Creates a matrix of unexpanded (or raw) marginal length or age composition
#' data formatted for Stock Synthesis. The code will return composition data
#' for all sexes present in the data frame and no sex assignment is done for
#' unsexed fish. The function will create composition data for either
#' lengths or ages based on the comp_column_name. The function will return a
#' list of composition data based upon the sexes present in the data for a
#' two-sex model or all length/ages for single-sex model.
#'
#' @param data A data frame that includes columns of year, sex, and length/ages. The data
#' frame can be survey data pulled using pull_bio from the data warehouse or any data frame
#' that includes column names of sex, year, and the comp_column_name. The sex column is
#' expected to have sexes denoted by F, M, and U.
#' @param comp_bins A vector on length bins or age bins to create compositions across. The
#' composition data is formatted for Stock Synthesis.
#' @param comp_column_name The column name to create composition data for. This column can be
#' is used to determine whether to format the composition data for length or age
#' compositions by looking for either age (e.g., age_years, Age, best_age) or length
#' (e.g., Length, length, Length_cm) in the comp_column_name. Default Length_cm.
#' @param two_sex_comps Default TRUE. If TRUE composition data will be formatted for a
#' Stock Synthesis two-sex model and if FALSE composition data will be formatted for a
#' single-sex model.
#' @param fleet A fleet number to assign the composition data to based on the expected
#' format for Stock Synthesis. Default "Enter Fleet".
#' @param month Month the samples were collected based on the expected format for
#' Stock Synthesis to determine the length/age estimate to compare to. Default
#' "Enter Month".
#' @param partition Partition to assign the composition data based on the expected
#' format for Stock Synthesis. Partition of 0 indicates that the composition data
#' include all composition data, 1 for discarded composition data, and 2 for retained
#' fish only. Default of 0.
#' @param age_error Number of ageing error vector to apply to the age data based on
#' Stock Synthesis. Default "Enter Age Error Vector".
#' @param age_low Lower age bin for all age composition data based on the expected
#' format for Stock Synthesis. Default value of -1 which translates to the lowest age
#' bin.
#' @param age_high Upper age bin for all age composition data based on the expected
#' format for Stock Synthesis. Default value of -1 which translates to the highest
# age bin.
#' @template dir
#' @template printfolder
#' @template verbose
#'
#' @returns A list of length or marginal age compositions for sexed and
#' unsexed fish formatted for Stock Synthesis.
#'
#' @author Chantel Wetzel
#' @export
#'
#' @examples
#' \dontrun{
#' bio <- pull_bio(
#' common_name = "lingcod",
#' survey = "NWFSC.Combo"
#' )
#'
#' length_comps <- get_raw_comps(
#' data = bio,
#' comp_bins = seq(20, 70, 4)
#' )
#'
#' age_comps <- get_raw_comps(
#' data = bio,
#' comp_bins = 1:20,
#' comp_column_name = "Age"
#' )
#'
#' }
#'
get_raw_comps <- function(
data,
comp_bins,
comp_column_name = "Length_cm",
two_sex_comps = TRUE,
fleet = "Enter Fleet",
month = "Enter Month",
partition = 0,
age_error = "Enter Age Error Vector",
age_low = -1,
age_high = -1,
dir = NULL,
printfolder = "forSS3",
verbose = TRUE)
{

plotdir <- file.path(dir, printfolder)
check_dir(dir = plotdir, verbose = verbose)

colnames(data) <- tolower(colnames(data))
comp_column_name <- tolower(comp_column_name)

vars <- c("year", "sex")
if(sum(vars %in% colnames(data)) != 2){
stop("Data frame does not contain a column name year and/or sex.
\n The columns names can be either upper or lower case.")
}

if(!comp_column_name %in% colnames(data)){
stop("Data frame does not contain a column name of comp_column_name.
\n The columns names can be either upper or lower case. ")
}

if (!two_sex_comps){
data[, "sex"] <- "U"
}

# Check to see if user is doing ages or lengths
if(length(grep("age", comp_column_name)) > 0) {
comp_type <- "age"
} else {
comp_type <- "length"
}

keep <- !is.na(data[, comp_column_name])
data <- data[keep, ]
bins <- c(comp_bins, Inf)
data$bin <- bins[findInterval(data[, comp_column_name], bins, all.inside = T)]

# if there are NA sexes replace them with U
if (sum(is.na(data[, "sex"])) > 0) {
data[is.na(data[, "sex"]), "sex"] <- "U"
}

# Create the comps
Results <- out <- NULL
for (y in sort(unique(data[, "year"]))) {
# Identify relevant rows
Which <- which(data[, "year"] == y & data[, "sex"] %in% c("F", "M"))
# Skip this year unless there are rows
if (length(Which) > 0) {
Row <- c(y, length(Which))
# Loop across F then M
for (s in c("F", "M")) {
# Loop across length bins
for (l in comp_bins)
{
# Subset to relevant rows
if (l == min(bins)) {
Which2 <- Which[which(data[Which, "bin"] %in% l & data[Which, "sex"] == s)]
}
if (l != min(bins)) {
Which2 <- Which[which(data[Which, "bin"] == l & data[Which, "sex"] == s)]
}
if (l == max(bins)) {
Which2 <- Which[which(data[Which, "bin"] %in% c(l, Inf) & data[Which, "sex"] == s)]
}
# Sum to effective sample size by length_bin x Sex x Fleet x Year
if (length(Which2) == 0) Row <- c(Row, 0)
if (length(Which2) >= 1) Row <- c(Row, length(Which2))
}
}
# Add to results matrix
Results <- rbind(Results, Row)
} # end Which loop
} # end year loop

if(!is.null(Results)){
Results <- as.data.frame(Results)
tmp <- data.frame(
year = Results[, 1],
month = month,
fleet = fleet,
sex = 3,
partition = partition,
nsamp = Results[, 2]
)
out <- cbind(tmp, Results[, -c(1:2)])
colnames(out)[-c(1:6)] <- c(
paste(rep("F", each = length(comp_bins)), comp_bins, sep = ""),
paste(rep("M", each = length(comp_bins)), comp_bins, sep = ""))
}

# Create unsexed comps if present in the data
out_u <- NULL
if (length(data[data[, "sex"] == "U", "sex"]) > 0) {
Results <- NULL
for (y in sort(unique(data[, "year"]))) {
# Identify relevant rows
Which <- which(data[, "year"] == y & data[, "sex"] == "U")
# Skip this year unless there are rows
if (length(Which) > 0) {
# Format reference stuff
Row <- c(y, length(Which))
# Loop across length bins
for (l in comp_bins)
{
# Subset to relevant rows
if (l == min(bins)) {
Which2 <- Which[which(data[Which, "bin"] %in% l)]
}
if (l != min(bins)) {
Which2 <- Which[which(data[Which, "bin"] == l)]
}
if (l == max(bins)) {
Which2 <- Which[which(data[Which, "bin"] %in% c(l, Inf))]
}
# Sum to effective sample size by length_bin x Sex x Fleet x Year
if (length(Which2) == 0) Row <- c(Row, 0)
if (length(Which2) >= 1) Row <- c(Row, length(Which2))
}
# Add to results matrix
Results <- rbind(Results, Row)
} # end Which loop
} # end year loop
Results <- as.data.frame(Results)
tmp <- data.frame(
year = Results[, 1],
month = month,
fleet = fleet,
sex = 0,
partition = partition,
nsamp = Results[, 2]
)

if (two_sex_comps){
out_u <- cbind(tmp, Results[, -c(1:2)], 0 * Results[, -c(1:2)])
} else {
out_u <- cbind(tmp, Results[, -c(1:2)])
}
colnames(out_u)[-c(1:6)] <- paste(rep("U", each = length(comp_bins)), comp_bins, sep = "")
}

if (comp_type == "length") {
if (!is.null(out)) {
out_comps <- out
} else {
out_comps <- NULL
}
if (!is.null(out_u)) {
out_u_comps <- out_u
}
}

if (comp_type == "age") {
if (!is.null(out)) {
out_comps <- cbind(out[, 1:5], age_error, age_low, age_high, out[, 6:dim(out)[2]])
} else {
out_comps <- NULL
}
if (!is.null(out_u)) {
out_u_comps <- cbind(out_u[, 1:5], age_error, age_low, age_high, out_u[, 6:dim(out_u)[2]])
}
}

if (!is.null(dir)) {
if (!is.null(out_comps)) {
write.csv(out_comps,
file = file.path(plotdir, paste0(comp_type, "_raw_comps_sex_3_", comp_type, "_bins_", comp_bins[1], "-", max(comp_bins), ".csv")),
row.names = FALSE
)
}
if (!is.null(out_u)) {
write.csv(out_u_comps,
file = file.path(plotdir, paste0(comp_type, "_raw_comps_sex_0_", comp_type, "_bins_", comp_bins[1], "-", max(comp_bins), ".csv")),
row.names = FALSE
)
}
}

comps <- list()
if (!is.null(out_comps)) {
rownames(out_comps) <- NULL
comps$sexed <- out_comps
}
if (!is.null(out_u)) {
rownames(out_u_comps) <- NULL
comps$unsexed <- out_u_comps
}

return(comps)

}
Loading

0 comments on commit b3c5cd2

Please sign in to comment.