From 37466c4fa466d9613717008f9a543a5f6b4cc432 Mon Sep 17 00:00:00 2001 From: damianooldoni Date: Fri, 1 Jul 2022 10:39:19 +0200 Subject: [PATCH] Improve checks and use pkg::func See #86 --- R/visualize_pathways_level1.R | 165 ++++++++++++++++--------------- man/visualize_pathways_level1.Rd | 2 +- 2 files changed, 85 insertions(+), 82 deletions(-) diff --git a/R/visualize_pathways_level1.R b/R/visualize_pathways_level1.R index a6a16a86..1807c123 100644 --- a/R/visualize_pathways_level1.R +++ b/R/visualize_pathways_level1.R @@ -5,7 +5,7 @@ #' vertebrates/invertebrates) and temporal (lower limit year). Facets can be #' added (see argument `facet_column`). #' -#' @param df df. +#' @param df A data frame. #' @param category NULL or character. One of the kingdoms as given in GBIF and #' `Chordata` (the phylum), `Not Chordata` (all other phyla of `Animalia`): 1. #' `Plantae` 2. `Animalia` 3. `Fungi` 4. `Chromista` 5. `Archaea` 6. @@ -46,16 +46,8 @@ #' are no data to plot. #' #' @export -#' @importFrom assertthat assert_that -#' @importFrom assertable assert_colnames -#' @importFrom dplyr %>% anti_join distinct filter if_else mutate pull rename_at -#' sym -#' @importFrom ggplot2 ggplot geom_bar theme ggtitle xlab ylab coord_flip -#' facet_wrap -#' @importFrom tidyselect all_of -#' @importFrom forcats fct_rev +#' @importFrom dplyr %>% #' @importFrom rlang !! -#' @importFrom egg ggarrange #' #' @examples #' \dontrun{ @@ -128,24 +120,24 @@ visualize_pathways_level1 <- function(df, y_lab = "Pathways") { # initial input checks # Check df - assert_that(is.data.frame(df), msg = "`df` must be a data frame.") + assertthat::assert_that(is.data.frame(df), msg = "`df` must be a data frame.") # Check pathway_level1_names - assert_that(is.character(pathway_level1_names), + assertthat::assert_that(is.character(pathway_level1_names), msg = "`pathway_level1_names` must be a character." ) - assert_colnames(df, pathway_level1_names, only_colnames = FALSE) + assertable::assert_colnames(df, pathway_level1_names, only_colnames = FALSE) # Check category if (!is.null(category)) { - assert_that(is.character(category), + assertthat::assert_that(is.character(category), msg = paste0( "`category` must be a character. One of: ", paste(categories, collapse = ", "), "." ) ) - assert_that(category %in% categories, + assertthat::assert_that(category %in% categories, msg = paste0( "`category` is not correct. Choose one of: ", paste0(categories, collapse = ", "), @@ -153,11 +145,14 @@ visualize_pathways_level1 <- function(df, ) ) } - assert_that(is.null(facet_column) | is.character(facet_column), + assertthat::assert_that(is.null(facet_column) | is.character(facet_column), msg = "Argument facet_column has to be NULL or a character." ) + if (!is.null(facet_column)) { + assertthat::assert_that(length(facet_column) == 1) + } if (is.character(facet_column)) { - assert_colnames(df, facet_column, only_colnames = FALSE) + assertable::assert_colnames(df, facet_column, only_colnames = FALSE) } # check for valid facet options valid_facet_options <- c( @@ -166,19 +161,19 @@ visualize_pathways_level1 <- function(df, ) if (is.character(facet_column)) { facet_column <- match.arg(facet_column, valid_facet_options) - assert_that(is.null(category) || !(category == "Chordata" & + assertthat::assert_that(is.null(category) || !(category == "Chordata" & facet_column == "phylum"), msg = "You cannot use phylum as facet with category Chordata." ) } # Check pathways if (!is.null(pathways)) { - assert_that(is.character(pathways), + assertthat::assert_that(is.character(pathways), msg = "`pathways` must be a vector of characters." ) invalid_pathways <- pathways[!pathways %in% df[[pathway_level1_names]]] - assert_that(length(invalid_pathways) == 0, + assertthat::assert_that(length(invalid_pathways) == 0, msg = paste0( "Pathways in `pathways` not present in ", "data.frame: ", @@ -188,32 +183,36 @@ visualize_pathways_level1 <- function(df, ) } # Check taxon_names - assert_that(is.character(taxon_names), + assertthat::assert_that(is.character(taxon_names), msg = "`taxon_names` must be a character." ) - assert_colnames(df, taxon_names, only_colnames = FALSE) + assertthat::assert_that(length(taxon_names) == 1) + assertable::assert_colnames(df, taxon_names, only_colnames = FALSE) # Check kingdom_names - assert_that(is.character(kingdom_names), + assertthat::assert_that(is.character(kingdom_names), msg = "`kingdom_names` must be a character." ) - assert_colnames(df, kingdom_names, only_colnames = FALSE) + assertthat::assert_that(length(kingdom_names) == 1) + assertable::assert_colnames(df, kingdom_names, only_colnames = FALSE) # check parameter phylum - assert_that(is.character(phylum_names), + assertthat::assert_that(is.character(phylum_names), msg = "`phylum_names` must be a character." ) - assert_colnames(df, phylum_names, only_colnames = FALSE) + assertthat::assert_that(length(phylum_names) == 1) + assertable::assert_colnames(df, phylum_names, only_colnames = FALSE) # Check from if (!is.null(from)) { - assert_that(is.numeric(from), + assertthat::assert_that(is.numeric(from), msg = "`from` must be a number (year)." ) - assert_that(from > 0, + assertthat::assert_that(from > 0, msg = "`from` must be a positive number." ) - assert_that(from == as.integer(from), + assertthat::assert_that(from == as.integer(from), msg = "`from` must be an integer." ) - assert_that(from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)), + assertthat::assert_that( + from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)), msg = paste0( "`from` must be less than ", format(Sys.Date(), "%Y"), @@ -222,48 +221,52 @@ visualize_pathways_level1 <- function(df, ) } # Check first_observed - assert_that(is.character(first_observed), + assertthat::assert_that(is.character(first_observed), msg = "`first_observed` must be a character." ) - assert_colnames(df, first_observed, only_colnames = FALSE) + assertthat::assert_that(length(first_observed) == 1) + assertable::assert_colnames(df, first_observed, only_colnames = FALSE) # Check title and labels + assertthat::assert_that(is.null(title) | is.character(title), + msg = "`title` must be a character or NULL." + ) if (!is.null(title)) { - assert_that(is.character(title), - msg = "`title` must be a character or NULL." - ) + assertthat::assert_that(length(title) == 1) } + assertthat::assert_that(is.character(x_lab), + msg = "`x_lab` must be a character or NULL." + ) if (!is.null(x_lab)) { - assert_that(is.character(x_lab), - msg = "`x_lab` must be a character or NULL." - ) + assertthat::assert_that(length(x_lab) == 1) } + assertthat::assert_that(is.character(y_lab), + msg = "`y_lab` must be a character or NULL." + ) if (!is.null(y_lab)) { - assert_that(is.character(y_lab), - msg = "`y_lab` must be a character or NULL." - ) + assertthat::assert_that(length(y_lab) == 1) } # rename to default column name df <- df %>% - rename_at(vars(all_of(kingdom_names)), ~"group") %>% - rename_at(vars(all_of(taxon_names)), ~"taxonKey") %>% - rename_at(vars(all_of(first_observed)), ~"first_observed") %>% - rename_at(vars(all_of(pathway_level1_names)), ~"pathway_level1") + dplyr::rename_at(vars(tidyselect::all_of(kingdom_names)), ~"group") %>% + dplyr::rename_at(vars(tidyselect::all_of(taxon_names)), ~"taxonKey") %>% + dplyr::rename_at(vars(tidyselect::all_of(first_observed)), ~"first_observed") %>% + dplyr::rename_at(vars(tidyselect::all_of(pathway_level1_names)), ~"pathway_level1") # handle asymmetric category system (Chordata, Not Chordta are not kingdoms) if (!is.null(category)) { if (!category %in% c("Chordata", "Not Chordata")) { - df <- df %>% filter(.data$group == category) + df <- df %>% dplyr::filter(.data$group == category) } else { df <- df %>% - rename_at(vars(phylum_names), ~"phylum_group") + dplyr::rename_at(vars(phylum_names), ~"phylum_group") if (category == "Chordata") { - df <- df %>% filter(.data$phylum_group == category) + df <- df %>% dplyr::filter(.data$phylum_group == category) } else { df <- df %>% - filter(.data$group == "Animalia") %>% - filter(.data$phylum_group != "Chordata") + dplyr::filter(.data$group == "Animalia") %>% + dplyr::filter(.data$phylum_group != "Chordata") } } } @@ -271,14 +274,14 @@ visualize_pathways_level1 <- function(df, if (!is.null(from)) { df <- df %>% - filter(.data$first_observed >= from) + dplyr::filter(.data$first_observed >= from) } # Handle NAs and "" nas_or_empty_pathway_level1 <- df %>% - filter(is.na(.data$pathway_level1) | + dplyr::filter(is.na(.data$pathway_level1) | .data$pathway_level1 == "") %>% - distinct(.data$taxonKey) + dplyr::distinct(.data$taxonKey) if (nrow(nas_or_empty_pathway_level1) > 0) { message_warning <- paste( nrow(nas_or_empty_pathway_level1), @@ -290,7 +293,7 @@ visualize_pathways_level1 <- function(df, df <- df %>% # Handle NAs and "unknown" - mutate(pathway_level1 = if_else(is.na(.data$pathway_level1) | + dplyr::mutate(pathway_level1 = dplyr::if_else(is.na(.data$pathway_level1) | .data$pathway_level1 == "", "unknown", .data$pathway_level1 @@ -298,17 +301,17 @@ visualize_pathways_level1 <- function(df, # Import all CBD pathways level 1 pathways_level1_all <- pathways_cbd() %>% - distinct(.data$pathway_level1) + dplyr::distinct(.data$pathway_level1) # Select pathways if (!is.null(pathways)) { pathways <- replace(pathways, is.na(pathways) | pathways == "", "unknown") pathways <- unique(pathways) df <- df %>% - filter(.data$pathway_level1 %in% pathways) + dplyr::filter(.data$pathway_level1 %in% pathways) } else { if (cbd_standard == TRUE) { - pathways <- pathways_level1_all %>% pull() + pathways <- pathways_level1_all %>% dplyr::pull() } else { pathways <- unique(df$pathway_level1) } @@ -316,11 +319,11 @@ visualize_pathways_level1 <- function(df, # Check values in column with pathways level 1 invalid_pathways <- df %>% - anti_join(pathways_level1_all, + dplyr::anti_join(pathways_level1_all, by = "pathway_level1" ) %>% - distinct(.data$pathway_level1) %>% - pull() + dplyr::distinct(.data$pathway_level1) %>% + dplyr::pull() message_invalid_pathways <- paste0( "No CBD standard pathways level 1 value(s) in column `", @@ -332,38 +335,38 @@ visualize_pathways_level1 <- function(df, "." ) if (cbd_standard == TRUE) { - assert_that(length(invalid_pathways) == 0, + assertthat::assert_that(length(invalid_pathways) == 0, msg = message_invalid_pathways ) } else { warning(message_invalid_pathways) } - # Distinct taxa + # dplyr::distinct taxa if (!is.null(facet_column)) { df <- df %>% - distinct(.data$taxonKey, .data$pathway_level1, !!sym(facet_column)) + dplyr::distinct(.data$taxonKey, .data$pathway_level1, !!dplyr::sym(facet_column)) } # Transform pathway level 1 column to factor to make ordering in graph easily df <- df %>% - mutate(pathway_level1 = factor(.data$pathway_level1, levels = pathways)) - # Distinct taxa without facet + dplyr::mutate(pathway_level1 = factor(.data$pathway_level1, levels = pathways)) + # dplyr::distinct taxa without facet df_top_graph <- df %>% - distinct(.data$taxonKey, .data$pathway_level1) + dplyr::distinct(.data$taxonKey, .data$pathway_level1) # Plot number of taxa per pathway_level1 top_graph <- NULL if (nrow(df_top_graph) > 0) { top_graph <- - ggplot( + ggplot2::ggplot( df_top_graph ) + - geom_bar(aes(x = fct_rev(.data$pathway_level1))) + - xlab(y_lab) + - ylab(x_lab) + - coord_flip() + - ggtitle(title) + ggplot2::geom_bar(aes(x = forcats::fct_rev(.data$pathway_level1))) + + ggplot2::xlab(y_lab) + + ggplot2::ylab(x_lab) + + ggplot2::coord_flip() + + ggplot2::ggtitle(title) } if (is.null(facet_column)) { return(top_graph) @@ -371,19 +374,19 @@ visualize_pathways_level1 <- function(df, facet_graph <- NULL if (nrow(df) > 0) { facet_graph <- - ggplot( + ggplot2::ggplot( df, - aes(x = fct_rev(.data$pathway_level1)) + aes(x = forcats::fct_rev(.data$pathway_level1)) ) + - geom_bar() + - xlab(y_lab) + - ylab(x_lab) + - coord_flip() + - ggtitle(title) + - facet_wrap(facet_column) + ggplot2::geom_bar() + + ggplot2::xlab(y_lab) + + ggplot2::ylab(x_lab) + + ggplot2::coord_flip() + + ggplot2::ggtitle(title) + + ggplot2::facet_wrap(facet_column) } if (all(!is.null(top_graph), !is.null(facet_graph))) { - ggarrange(top_graph, facet_graph) + egg::ggarrange(top_graph, facet_graph) } else { NULL diff --git a/man/visualize_pathways_level1.Rd b/man/visualize_pathways_level1.Rd index d6692897..94b412a3 100644 --- a/man/visualize_pathways_level1.Rd +++ b/man/visualize_pathways_level1.Rd @@ -22,7 +22,7 @@ visualize_pathways_level1( ) } \arguments{ -\item{df}{df.} +\item{df}{A data frame.} \item{category}{NULL or character. One of the kingdoms as given in GBIF and \code{Chordata} (the phylum), \verb{Not Chordata} (all other phyla of \code{Animalia}): 1.