Skip to content

Commit

Permalink
Update preprocessing step, prepare for validation step
Browse files Browse the repository at this point in the history
  • Loading branch information
langbart committed Dec 6, 2023
1 parent 35c3551 commit db1a8fe
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 4 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ Imports:
readr,
stringr,
tidyr,
rlang
rlang,
lubridate
Suggests:
covr,
pkgdown,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(download_cloud_file)
export(enquo)
export(enquos)
export(expr)
export(get_preprocessed_surveys)
export(ingest_wcs_surveys)
export(preprocess_wcs_surveys)
export(pt_nest_attachments)
Expand All @@ -25,6 +26,7 @@ export(retrieve_wcs_surveys)
export(sym)
export(syms)
export(upload_cloud_file)
export(validate_surveys_time)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
Expand Down
28 changes: 28 additions & 0 deletions R/get-cloud-files.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#' Download WCS preprocessed surveys
#'
#' Download preprocessed WCS data from Google Cloud.
#'
#' @param pars The configuration file.
#'
#' @return A rds dataframe of preprocessed survey landings.
#' @export
#'
get_preprocessed_surveys <- function(pars) {
wcs_preprocessed_surveys <-
cloud_object_name(
prefix = pars$surveys$preprocessed_surveys$file_prefix,
provider = pars$storage$google$key,
extension = "rds",
version = pars$surveys$wcs_surveys$version$preprocess,
options = pars$storage$google$options
)

logger::log_info("Retrieving {wcs_preprocessed_surveys}")
download_cloud_file(
name = wcs_preprocessed_surveys,
provider = pars$storage$google$key,
options = pars$storage$google$options
)

readr::read_rds(wcs_preprocessed_surveys)
}
16 changes: 16 additions & 0 deletions R/preprocess-wcs-surveys.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,26 @@ preprocess_wcs_surveys <- function(log_threshold = logger::DEBUG) {
col_types = readr::cols(.default = readr::col_character())
)

other_info <-
catch_surveys_raw %>%
dplyr::select(
.data$`_id`,
.data$today,
.data$start,
.data$end,
.data$survey_real,
.data$survey_type,
.data$landing_site,
.data$trip_info,
.data$people,
.data$boats_landed
)

logger::log_info("Nesting survey groups' fields")
group_surveys <-
list(
survey_trip = pt_nest_trip(catch_surveys_raw),
other_info = other_info,
survey_catch = pt_nest_catch(catch_surveys_raw),
survey_length = pt_nest_length(catch_surveys_raw),
survey_market = pt_nest_market(catch_surveys_raw),
Expand Down
14 changes: 11 additions & 3 deletions R/pt_nest_survey.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,16 @@ pt_nest_catch <- function(x) {
tidyr::pivot_wider(names_from = .data$name, values_from = .data$value) %>%
dplyr::mutate(content = dplyr::coalesce(!!!.[3:ncol(.)])) %>%
dplyr::filter(.data$n == 0 | !is.na(.data$content)) %>%
dplyr::select(-.data$content) %>%
tidyr::nest("catch" = c(.data$group_catch, .data$species_catch, .data$weight_catch, .data$wgt_ind_catch, .data$type_measure), .by = .data$`_id`)
dplyr::mutate(weight_kg = dplyr::coalesce(.data$weight_catch, .data$wgt_ind_catch, .data$wgt_buckets_catch)) %>%
dplyr::select(-c(.data$content, .data$weight_catch, .data$wgt_ind_catch, .data$wgt_buckets_catch)) %>%
tidyr::nest(
"catch" = c(
.data$type_measure, .data$All_catch_in_boat, .data$group_catch,
.data$species_catch, .data$nb_ind_catch, .data$nb_buckets_catch,
.data$weight_kg
),
.by = .data$`_id`
)
}

#' Nest trip catch columns
Expand All @@ -108,7 +116,7 @@ pt_nest_trip <- function(x) {
) %>%
tidyr::pivot_wider(names_from = .data$name, values_from = .data$value) %>%
dplyr::mutate(content = dplyr::coalesce(!!!.[3:ncol(.)])) %>%
dplyr::filter(.data$n == 0 | !is.na(.data$content)) %>%
# dplyr::filter(!is.na(.data$content)) %>%
dplyr::select(-.data$content, -.data$n)
}

Expand Down
48 changes: 48 additions & 0 deletions R/validation-functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' Validate surveys' temporal parameters
#'
#' This function takes a preprocessed landings' matrix and validate temporal
#' info associated to each survey.
#'
#' @param data A preprocessed data frame
#' @param hrs_max Upper threshold of fishing trip duration.
#' @param hrs_min Lower threshold of fishing trip duration.
#'
#' @return A list containing data frames with validated catch dates and catch
#' duration.
#'
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' \dontrun{
#' pars <- read_config()
#' landings <- get_preprocessed_surveys(pars)
#' validate_surveys_time(landings, hrs_max = 72, hrs_min = 1)
#' }
validate_surveys_time <- function(data, hrs_max = NULL, hrs_min) {
validated_time <- list(
validated_dates = data %>%
dplyr::select(.data$`_id`, date = .data$today) %>%
dplyr::mutate(
date = lubridate::with_tz(.data$date, "Africa/Dar_es_Salaam"),
date = as.Date(date)
),
validated_duration = data %>%
dplyr::select(.data$`_id`, .data$fishing_duration) %>%
dplyr::mutate(fishing_duration = abs(as.numeric(.data$fishing_duration))) %>%
dplyr::transmute(
trip_duration = dplyr::case_when(
.data$fishing_duration > hrs_max |
.data$fishing_duration < hrs_min ~ NA_real_,
TRUE ~ .data$fishing_duration
), # test if catch duration is longer than n hours or minor than 1 hour
alert_number = dplyr::case_when(
.data$fishing_duration > hrs_max |
.data$fishing_duration < hrs_min ~ 5,
TRUE ~ NA_real_
),
submission_id = as.integer(.data$`_id`)
)
)
validated_time
}
3 changes: 3 additions & 0 deletions inst/conf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ default:
file_prefix: wcs_surveys
version:
preprocess: latest
preprocessed_surveys:
file_prefix: wcs_surveys_preprocessed
version: latest
storage:
google:
key: gcs
Expand Down
17 changes: 17 additions & 0 deletions man/get_preprocessed_surveys.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/validate_surveys_time.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit db1a8fe

Please sign in to comment.