From 15de9be9bcb9088301887d0446fbde2f225e0c4f Mon Sep 17 00:00:00 2001 From: damianooldoni Date: Fri, 1 Jul 2022 12:31:38 +0200 Subject: [PATCH] Apply "84 and #86 to vpath year level2 --- NAMESPACE | 5 - R/visualize_pathways_level1.R | 1 + R/visualize_pathways_level2.R | 9 +- R/visualize_pathways_year_level1.R | 2 +- R/visualize_pathways_year_level2.R | 348 ++++++++++++++------------ man/visualize_pathways_year_level1.Rd | 8 +- man/visualize_pathways_year_level2.Rd | 76 +++--- 7 files changed, 244 insertions(+), 205 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7cd2f204..67d7d2dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,7 +41,6 @@ importFrom(dplyr,filter) importFrom(dplyr,filter_at) importFrom(dplyr,full_join) importFrom(dplyr,group_by) -importFrom(dplyr,if_else) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) @@ -61,10 +60,8 @@ importFrom(dplyr,tally) importFrom(dplyr,tibble) importFrom(dplyr,ungroup) importFrom(dplyr,vars) -importFrom(egg,ggarrange) importFrom(ggplot2,aes) importFrom(ggplot2,element_text) -importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) @@ -77,7 +74,6 @@ importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,xlab) importFrom(ggplot2,ylab) -importFrom(ggplot2,ylim) importFrom(grDevices,grey) importFrom(graphics,legend) importFrom(gratia,derivatives) @@ -143,7 +139,6 @@ importFrom(stringr,str_to_lower) importFrom(svDialogs,dlgInput) importFrom(tibble,as_tibble) importFrom(tibble,tibble) -importFrom(tidyselect,all_of) importFrom(tidyselect,ends_with) importFrom(tidyselect,one_of) importFrom(tidyselect,vars_pull) diff --git a/R/visualize_pathways_level1.R b/R/visualize_pathways_level1.R index 1a208f98..63bc8c54 100644 --- a/R/visualize_pathways_level1.R +++ b/R/visualize_pathways_level1.R @@ -141,6 +141,7 @@ visualize_pathways_level1 <- function(df, "." ) ) + assertthat::assert_that(length(category) == 1) assertthat::assert_that(category %in% categories, msg = paste0( "`category` is not correct. Choose one of: ", diff --git a/R/visualize_pathways_level2.R b/R/visualize_pathways_level2.R index ff6f8def..cddf2573 100644 --- a/R/visualize_pathways_level2.R +++ b/R/visualize_pathways_level2.R @@ -176,21 +176,18 @@ visualize_pathways_level2 <- function(df, assertable::assert_colnames(df, pathway_level2_names, only_colnames = FALSE) # Check category if (!is.null(category)) { - valid_categories <- c("Chordata", "Not Chordata", "Plantae", "Animalia", - "Fungi", "Chromista", "Archaea", "Bacteria", - "Protozoa", "Viruses", "incertae sedis") assertthat::assert_that(is.character(category), msg = paste0( "`category` must be NULL or a character. One of: ", - paste(valid_categories, collapse = ", "), + paste(categories, collapse = ", "), "." ) ) assertthat::assert_that(length(category) == 1) - assertthat::assert_that(category %in% valid_categories, + assertthat::assert_that(category %in% categories, msg = paste0( "`category` is not correct. Choose one of: ", - paste0(valid_categories, collapse = ", "), + paste0(categories, collapse = ", "), "." ) ) diff --git a/R/visualize_pathways_year_level1.R b/R/visualize_pathways_year_level1.R index 870a340d..807a6ae8 100644 --- a/R/visualize_pathways_year_level1.R +++ b/R/visualize_pathways_year_level1.R @@ -49,7 +49,7 @@ #' @param x_lab `NULL` or character. x-axis label. Default: `"Number of #' introduced taxa"`. #' @param y_lab `NULL` or character. Title of the graph. Default: `"Pathways"`. -#' #' @return A list with three slots: +#' @return A list with three slots: #' - `plot`: ggplot2 object (or egg object if facets are used). `NULL` if there #' are no data to plot. #' - `data_top_graph`: data.frame (tibble) with data used for the main plot (top diff --git a/R/visualize_pathways_year_level2.R b/R/visualize_pathways_year_level2.R index f90945bf..f4360267 100644 --- a/R/visualize_pathways_year_level2.R +++ b/R/visualize_pathways_year_level2.R @@ -4,58 +4,66 @@ #' through different CBD pathways level 2 for a specific CBD pathway level 1. #' Time expressed in years. Possible breakpoints: taxonomic (kingdoms + #' vertebrates/invertebrates). -#' @param df df. +#' +#' @param df A data frame. #' @param chosen_pathway_level1 character. Selected pathway level 1. -#' @param bin numeric. Time span in years to use for agggregation. Default: 10. +#' @param bin numeric. Time span in years to use for agggregation. Default: +#' `10`. #' @param from numeric. Year trade-off: taxa introduced before this year are -#' grouped all together. Default: 1950. -#' @param category NULL or character. One of the kingdoms as given in GBIF and -#' `Chordata` (the phylum), `Not Chordata` (all other phyla of `Animalia`): 1. -#' `Plantae` 2. `Animalia` 3. `Fungi` 4. `Chromista` 5. `Archaea` 6. -#' `Bacteria` 7. `Protozoa` 8. `Viruses` 9. `incertae sedis` 10. `Chordata` -#' 11. `Not Chordata` Default: `NULL`. -#' @param facet_column NULL or character. The column to use to create additional -#' facet wrap bar graphs underneath the main graph. When NULL, no facet graph -#' are created. One of `family`, `order`, `class`, `phylum`, `locality`, -#' `native_range`, `habitat`. If column has another name, rename it before -#' calling this function. Default: `NULL`. +#' grouped all together. Default: `1950`. +#' @param category `NULL` (default) or character. One of the kingdoms as given +#' in GBIF or `Chordata` (the phylum) or `Not Chordata` (all other phyla of +#' `Animalia`): +#' 1. `Plantae` +#' 2. `Animalia` +#' 3. `Fungi` +#' 4. `Chromista` +#' 5. `Archaea` +#' 6. `Bacteria` +#' 7. `Protozoa` +#' 8. `Viruses` +#' 9. `incertae sedis` +#' 10. `Chordata` +#' 11. `Not Chordata` +#' @param facet_column `NULL` or character. The column to use to create +#' additional facet wrap bar graphs underneath the main graph. When `NULL`, no +#' facet graph are created. One of `family`, `order`, `class`, `phylum`, +#' `locality`, `native_range`, `habitat`. If column has another name, rename +#' it before calling this function. Default: `NULL`. #' @param pathway_level1_names character. Name of the column of `df` containing -#' information about pathways at level 1. Default: `pathway_level1`. +#' information about pathways at level 1. Default: `"pathway_level1"`. #' @param pathway_level2_names character. Name of the column of `df` containing -#' information about pathways at level 2. Default: `pathway_level2`. +#' information about pathways at level 2. Default: `"pathway_level2"`. #' @param pathways character. Vector with pathways level 1 to visualize. The #' pathways are displayed following the order as in this vector. -#' @param taxon_names character. Name of the column of \code{df} containing +#' @param taxon_names character. Name of the column of `df` containing #' information about taxa. This parameter is used to uniquely identify taxa. -#' @param kingdom_names character. Name of the column of \code{df} containing -#' information about kingdom. Default: \code{"kingdom"}. -#' @param phylum_names character. Name of the column of \code{df} containing -#' information about phylum. This parameter is used only if \code{category} is -#' one of: \code{"Chordata"}, \code{"Not Chordata"}. Default: -#' \code{"phylum"}. -#' @param first_observed character. Name of the column of \code{df} containing -#' information about year of introduction. Default: \code{"first_observed"}. -#' @param cbd_standard logical. If TRUE the values of pathway level 1 are +#' @param kingdom_names character. Name of the column of `df` containing +#' information about kingdom. Default: `"kingdom"`. +#' @param phylum_names character. Name of the column of `df` containing +#' information about phylum. This parameter is used only if `category` is +#' one of: `"Chordata"`, `"Not Chordata"`. Default: `"phylum"`. +#' @param first_observed character. Name of the column of `df` containing +#' information about year of introduction. Default: `"first_observed"`. +#' @param cbd_standard logical. If `TRUE` the values of pathway level 1 are #' checked based on CBD standard as returned by `pathways_cbd()`. Error is -#' returned if unmatched values are found. If FALSE, a warning is returned. -#' Default: TRUE. -#' @param title NULL or character. Title of the graph. Default: NULL. -#' @param x_lab NULL or character. x-axis label. Default: "Number of introduced -#' taxa". -#' @param y_lab NULL or character. Title of the graph. Default: "Pathways". -#' @return A ggplot2 object (or egg object if facets are used). NULL if there are -#' no data to plot. +#' returned if unmatched values are found. If `FALSE`, a warning is returned. +#' Default: `TRUE`. +#' @param title `NULL` or character. Title of the graph. Default: `NULL`. +#' @param x_lab `NULL` or character. x-axis label. Default: `"Number of +#' introduced taxa"`. +#' @param y_lab `NULL` or character. Title of the graph. Default: `"Pathways"`. +#' @return A list with three slots: +#' - `plot`: ggplot2 object (or egg object if facets are used). `NULL` if there +#' are no data to plot. +#' - `data_top_graph`: data.frame (tibble) with data used for the main plot (top +#' graph) in `plot`. +#' - `data_facet_graph`: data.frame (tibble) with data used for the faceting +#' plot in `plot`. `NULL` is returned if `facet_column` is `NULL`. #' #' @export -#' @importFrom assertthat assert_that -#' @importFrom assertable assert_colnames -#' @importFrom dplyr %>% .data anti_join count distinct filter group_by if_else -#' mutate pull rename_at sym ungroup -#' @importFrom egg ggarrange -#' @importFrom ggplot2 facet_wrap geom_line geom_point ggplot ggtitle xlab ylab -#' ylim +#' @importFrom dplyr %>% .data #' @importFrom rlang !! -#' @importFrom tidyselect all_of #' #' @examples #' \dontrun{ @@ -71,7 +79,6 @@ #' key = col_double(), #' nubKey = col_double(), #' speciesKey = col_double(), -#' acceptedKey = col_double(), #' first_observed = col_double(), #' last_observed = col_double() #' ) @@ -146,53 +153,60 @@ #' data, #' chosen_pathway_level1 = "escape", #' x_lab = "Jaar", -#' y_lab = "Aantal geïntroduceerde taxa" +#' y_lab = "Aantal geintroduceerde taxa" #' ) #' } visualize_pathways_year_level2 <- function( - df, - chosen_pathway_level1, - bin = 10, - from = 1950, - category = NULL, - facet_column = NULL, - pathways = NULL, - pathway_level1_names = "pathway_level1", - pathway_level2_names = "pathway_level2", - taxon_names = "key", - kingdom_names = "kingdom", - phylum_names = "phylum", - first_observed = "first_observed", - cbd_standard = TRUE, - title = NULL, - x_lab = "Time period", - y_lab = "Number of introduced taxa") { + df, + chosen_pathway_level1, + bin = 10, + from = 1950, + category = NULL, + facet_column = NULL, + pathways = NULL, + pathway_level1_names = "pathway_level1", + pathway_level2_names = "pathway_level2", + taxon_names = "key", + kingdom_names = "kingdom", + phylum_names = "phylum", + first_observed = "first_observed", + cbd_standard = TRUE, + title = NULL, + x_lab = "Time period", + y_lab = "Number of introduced taxa") { # initial input checks # Check df - assert_that(is.data.frame(df), msg = "`df` must be a data frame.") + assertthat::assert_that(is.data.frame(df), msg = "`df` must be a data frame.") # Check bin - assert_that(is.numeric(bin), msg = "`bin` must be a number.") - assert_that(bin == as.integer(bin), msg = "`bin` must be an integer.") + assertthat::assert_that(is.numeric(bin), msg = "`bin` must be a number.") + assertthat::assert_that(length(bin) == 1) + assertthat::assert_that( + bin == as.integer(bin), + msg = "`bin` must be an integer." + ) # Check pathway_level1_names - assert_that(is.character(pathway_level1_names), + assertthat::assert_that(is.character(pathway_level1_names), msg = "`pathway_level1_names` must be a character." ) - assert_colnames(df, pathway_level1_names, only_colnames = FALSE) + assertthat::assert_that(length(pathway_level1_names) == 1) + assertable::assert_colnames(df, pathway_level1_names, only_colnames = FALSE) # Check pathway_level2_names - assert_that(is.character(pathway_level2_names), + assertthat::assert_that(is.character(pathway_level2_names), msg = "`pathway_level2_names` must be a character." ) - assert_colnames(df, pathway_level2_names, only_colnames = FALSE) + assertthat::assert_that(length(pathway_level2_names) == 1) + assertable::assert_colnames(df, pathway_level2_names, only_colnames = FALSE) # Check category if (!is.null(category)) { - assert_that(is.character(category), + assertthat::assert_that(is.character(category), msg = paste0( "`category` must be a character. One of: ", paste(categories, collapse = ", "), "." ) ) - assert_that(category %in% categories, + assertthat::assert_that(length(category) == 1) + assertthat::assert_that(category %in% categories, msg = paste0( "`category` is not correct. Choose one of: ", paste0(categories, collapse = ", "), @@ -200,29 +214,29 @@ visualize_pathways_year_level2 <- function( ) ) } - assert_that(is.null(facet_column) | is.character(facet_column), + assertthat::assert_that(is.null(facet_column) | is.character(facet_column), msg = "Argument facet_column has to be NULL or a character." ) if (is.character(facet_column)) { - assert_colnames(df, facet_column, only_colnames = FALSE) - } - # Check for valid facet options - valid_facet_options <- c( - "family", "order", "class", "phylum", - "locality", "native_range", "habitat" - ) - if (is.character(facet_column)) { + assertthat::assert_that(length(facet_column) == 1) + assertable::assert_colnames(df, facet_column, only_colnames = FALSE) + # Check for valid facet options + valid_facet_options <- c( + "family", "order", "class", "phylum", + "locality", "native_range", "habitat" + ) facet_column <- match.arg(facet_column, valid_facet_options) - assert_that(is.null(category) || !(category == "Chordata" & facet_column == "phylum"), + assertthat::assert_that(is.null(category) || !(category == "Chordata" & facet_column == "phylum"), msg = "You cannot use phylum as facet with category Chordata." ) } # Check chosen_pathway_level1 - assert_that(is.character(chosen_pathway_level1), + assertthat::assert_that(is.character(chosen_pathway_level1), msg = "Argument `chosen_pathway_level1` must be a character." ) + assertthat::assert_that(length(chosen_pathway_level1) == 1) pathways_level1 <- unique(df[[pathway_level1_names]]) - assert_that(chosen_pathway_level1 %in% pathways_level1, + assertthat::assert_that(chosen_pathway_level1 %in% pathways_level1, msg = paste0( "chosen_pathway_level1 ", chosen_pathway_level1, @@ -233,12 +247,13 @@ visualize_pathways_year_level2 <- function( ) # Check pathways if (!is.null(pathways)) { - assert_that(is.character(pathways), + assertthat::assert_that(is.character(pathways), msg = "`pathways` must be a vector of characters." ) + assertthat::assert_that(length(pathways) == 1) invalid_pathways <- pathways[!pathways %in% df[[pathway_level2_names]]] - assert_that(length(invalid_pathways) == 0, + assertthat::assert_that(length(invalid_pathways) == 0, msg = paste0( "Pathways in `pathways` not present in ", "data.frame: ", @@ -248,32 +263,37 @@ visualize_pathways_year_level2 <- function( ) } # Check taxon_names - assert_that(is.character(taxon_names), + assertthat::assert_that(is.character(taxon_names), msg = "`taxon_names` must be a character." ) - assert_colnames(df, taxon_names, only_colnames = FALSE) + assertthat::assert_that(length(taxon_names) == 1) + assertable::assert_colnames(df, taxon_names, only_colnames = FALSE) # Check kingdom_names - assert_that(is.character(kingdom_names), + assertthat::assert_that(is.character(kingdom_names), msg = "`kingdom_names` must be a character." ) - assert_colnames(df, kingdom_names, only_colnames = FALSE) + assertthat::assert_that(length(kingdom_names) == 1) + assertable::assert_colnames(df, kingdom_names, only_colnames = FALSE) # check parameter phylum - assert_that(is.character(phylum_names), + assertthat::assert_that(is.character(phylum_names), msg = "`phylum_names` must be a character." ) - assert_colnames(df, phylum_names, only_colnames = FALSE) + assertthat::assert_that(length(phylum_names) == 1) + assertable::assert_colnames(df, phylum_names, only_colnames = FALSE) # Check from if (!is.null(from)) { - assert_that(is.numeric(from), + assertthat::assert_that(is.numeric(from), msg = "`from` must be a number (year)." ) - assert_that(from > 0, + assertthat::assert_that(length(from) == 1) + assertthat::assert_that(from > 0, msg = "`from` must be a positive number." ) - assert_that(from == as.integer(from), + assertthat::assert_that(from == as.integer(from), msg = "`from` must be an integer." ) - assert_that(from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)), + assertthat::assert_that( + from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)), msg = paste0( "`from` must be less than ", format(Sys.Date(), "%Y"), @@ -282,62 +302,66 @@ visualize_pathways_year_level2 <- function( ) } # Check first_observed - assert_that(is.character(first_observed), + assertthat::assert_that(is.character(first_observed), msg = "`first_observed` must be a character." ) - assert_colnames(df, first_observed, only_colnames = FALSE) + assertthat::assert_that(length(first_observed) == 1) + assertable::assert_colnames(df, first_observed, only_colnames = FALSE) # Check title and labels if (!is.null(title)) { - assert_that(is.character(title), + assertthat::assert_that(is.character(title), msg = "`title` must be a character or NULL." ) + assertthat::assert_that(length(title) == 1) } if (!is.null(x_lab)) { - assert_that(is.character(x_lab), + assertthat::assert_that(is.character(x_lab), msg = "`x_lab` must be a character or NULL." ) + assertthat::assert_that(length(x_lab) == 1) } if (!is.null(y_lab)) { - assert_that(is.character(y_lab), + assertthat::assert_that(is.character(y_lab), msg = "`y_lab` must be a character or NULL." ) + assertthat::assert_that(length(y_lab) == 1) } # rename to default column name df <- df %>% - rename_at(vars(all_of(kingdom_names)), ~"group") %>% - rename_at(vars(all_of(taxon_names)), ~"taxonKey") %>% - rename_at(vars(all_of(first_observed)), ~"first_observed") %>% - rename_at(vars(all_of(pathway_level1_names)), ~"pathway_level1") %>% - rename_at(vars(all_of(pathway_level2_names)), ~"pathway_level2") + dplyr::rename_at(vars(tidyselect::all_of(kingdom_names)), ~"group") %>% + dplyr::rename_at(vars(tidyselect::all_of(taxon_names)), ~"taxonKey") %>% + dplyr::rename_at(vars(tidyselect::all_of(first_observed)), ~"first_observed") %>% + dplyr::rename_at(vars(tidyselect::all_of(pathway_level1_names)), ~"pathway_level1") %>% + dplyr::rename_at(vars(tidyselect::all_of(pathway_level2_names)), ~"pathway_level2") # Select data with the chosen pathway level 1 df <- df %>% - filter(.data$pathway_level1 == chosen_pathway_level1) + dplyr::filter(.data$pathway_level1 == chosen_pathway_level1) # handle asymmetric category system (Chordata, Not Chordta are not kingdoms) if (!is.null(category)) { if (!category %in% c("Chordata", "Not Chordata")) { - df <- df %>% filter(.data$group == category) + df <- df %>% dplyr::filter(.data$group == category) } else { df <- df %>% - rename_at(vars(phylum_names), ~"phylum_group") + dplyr::rename_at(vars(phylum_names), ~"phylum_group") if (category == "Chordata") { - df <- df %>% filter(.data$phylum_group == category) + df <- df %>% dplyr::filter(.data$phylum_group == category) } else { df <- df %>% - filter(.data$group == "Animalia") %>% - filter(.data$phylum_group != "Chordata") + dplyr::filter(.data$group == "Animalia") %>% + dplyr::filter(.data$phylum_group != "Chordata") } } } # Handle NAs and "" nas_or_empty_pathway_level2 <- df %>% - filter(is.na(.data$pathway_level2) | + dplyr::filter(is.na(.data$pathway_level2) | .data$pathway_level2 == "") %>% - distinct(.data$taxonKey) + dplyr::distinct(.data$taxonKey) if (nrow(nas_or_empty_pathway_level2) > 0) { message_warning <- paste( nrow(nas_or_empty_pathway_level2), @@ -349,7 +373,7 @@ visualize_pathways_year_level2 <- function( df <- df %>% # Handle NAs and "unknown" - mutate(pathway_level2 = if_else(is.na(.data$pathway_level2) | + dplyr::mutate(pathway_level2 = dplyr::if_else(is.na(.data$pathway_level2) | .data$pathway_level2 == "", "unknown", .data$pathway_level2 @@ -357,18 +381,18 @@ visualize_pathways_year_level2 <- function( # Import all CBD pathways level 2 within chosen pathway level 1 pathways_level2_all <- pathways_cbd() %>% - filter(.data$pathway_level1 == chosen_pathway_level1) %>% - distinct(.data$pathway_level2) + dplyr::filter(.data$pathway_level1 == chosen_pathway_level1) %>% + dplyr::distinct(.data$pathway_level2) # Select pathways if (!is.null(pathways)) { pathways <- replace(pathways, is.na(pathways) | pathways == "", "unknown") pathways <- unique(pathways) df <- df %>% - filter(.data$pathway_level2 %in% pathways) + dplyr::filter(.data$pathway_level2 %in% pathways) } else { if (cbd_standard == TRUE) { - pathways <- pathways_level2_all %>% pull() + pathways <- pathways_level2_all %>% dplyr::pull() } else { pathways <- unique(df$pathway_level2) } @@ -376,11 +400,11 @@ visualize_pathways_year_level2 <- function( # Check values in column with pathways level 2 invalid_pathways <- df %>% - anti_join(pathways_level2_all, + dplyr::anti_join(pathways_level2_all, by = "pathway_level2" ) %>% - distinct(.data$pathway_level2) %>% - pull() + dplyr::distinct(.data$pathway_level2) %>% + dplyr::pull() message_invalid_pathways <- paste0( "No CBD standard pathways level 2 value(s) in column `", @@ -392,7 +416,7 @@ visualize_pathways_year_level2 <- function( "." ) if (cbd_standard == TRUE) { - assert_that(length(invalid_pathways) == 0, + assertthat::assert_that(length(invalid_pathways) == 0, msg = message_invalid_pathways ) } else { @@ -401,7 +425,7 @@ visualize_pathways_year_level2 <- function( # Throw warning if there are taxa without first_observed n_first_observed_na <- df %>% - filter(is.na(first_observed)) %>% + dplyr::filter(is.na(first_observed)) %>% nrow() if (n_first_observed_na > 0) { warning( @@ -414,13 +438,13 @@ visualize_pathways_year_level2 <- function( ) df <- df %>% - filter(!is.na(first_observed)) + dplyr::filter(!is.na(first_observed)) } - # Distinct taxa + # dplyr::distinct taxa if (is.null(facet_column)) { df <- df %>% - distinct( + dplyr::distinct( .data$taxonKey, .data$first_observed, .data$pathway_level2 @@ -428,20 +452,20 @@ visualize_pathways_year_level2 <- function( } else { df <- df %>% - distinct( + dplyr::distinct( .data$taxonKey, .data$first_observed, .data$pathway_level2, - !!sym(facet_column) + !!dplyr::sym(facet_column) ) } df <- df %>% - mutate( + dplyr::mutate( bins_first_observed = floor((.data$first_observed - from) / bin) * bin + from ) %>% - mutate(bins_first_observed = if_else( + dplyr::mutate(bins_first_observed = dplyr::if_else( .data$bins_first_observed < from, paste("before", from), paste( @@ -461,88 +485,92 @@ visualize_pathways_year_level2 <- function( ) df <- df %>% - mutate(bins_first_observed = factor(.data$bins_first_observed, + dplyr::mutate(bins_first_observed = factor(.data$bins_first_observed, levels = levels_first_observed )) # Transform pathway level 1 column to factor to make ordering in graph easily df <- df %>% - mutate(pathway_level2 = factor(.data$pathway_level2, levels = pathways)) + dplyr::mutate(pathway_level2 = factor(.data$pathway_level2, levels = pathways)) - # Count number of taxa per pathway_level2 over time - df_top_graph <- + # dplyr::count number of taxa per pathway_level2 over time + data_top_graph <- df %>% - group_by( + dplyr::group_by( .data$bins_first_observed, .data$pathway_level2 ) %>% - count() %>% - ungroup() - max_n <- max(df_top_graph$n) + dplyr::count() %>% + dplyr::ungroup() + max_n <- max(data_top_graph$n) # Plot number of taxa per pathway_level2 over time top_graph <- NULL - if (nrow(df_top_graph) > 0) { + if (nrow(data_top_graph) > 0) { top_graph <- - ggplot(df_top_graph) + - geom_line(aes( + ggplot2::ggplot(data_top_graph) + + ggplot2::geom_line(aes( x = .data$bins_first_observed, y = .data$n, group = .data$pathway_level2, color = .data$pathway_level2 )) + - geom_point(aes( + ggplot2::geom_point(aes( x = .data$bins_first_observed, y = .data$n, group = .data$pathway_level2, color = .data$pathway_level2 )) + - ylim(0, max_n) + - xlab(x_lab) + - ylab(y_lab) + - ggtitle(title) + ggplot2::ylim(0, max_n) + + ggplot2::xlab(x_lab) + + ggplot2::ylab(y_lab) + + ggplot2::ggtitle(title) } if (is.null(facet_column)) { - return(top_graph) + return(list(plot = top_graph, + data_top_graph = data_top_graph, + data_facet_graph = NULL)) } else { - # Count number of taxa per pathway_level2 per facet over time + # dplyr::count number of taxa per pathway_level2 per facet over time df_facet_graph <- df %>% - group_by( + dplyr::group_by( .data$bins_first_observed, .data$pathway_level2, - !!sym(facet_column) + !!dplyr::sym(facet_column) ) %>% - count() %>% - ungroup() + dplyr::count() %>% + dplyr::ungroup() max_n <- max(df_facet_graph$n) # Plot number of taxa per pathway_level2 per facet over time facet_graph <- NULL if (nrow(df_facet_graph) > 0) { facet_graph <- - ggplot(df_facet_graph) + - geom_line(aes( + ggplot2::ggplot(df_facet_graph) + + ggplot2::geom_line(aes( x = .data$bins_first_observed, y = .data$n, group = .data$pathway_level2, color = .data$pathway_level2 )) + - geom_point(aes( + ggplot2::geom_point(aes( x = .data$bins_first_observed, y = .data$n, group = .data$pathway_level2, color = .data$pathway_level2 )) + - ylim(0, max_n) + - xlab(x_lab) + - ylab(y_lab) + - ggtitle(title) + - facet_wrap(facet_column) + ggplot2::ylim(0, max_n) + + ggplot2::xlab(x_lab) + + ggplot2::ylab(y_lab) + + ggplot2::ggtitle(title) + + ggplot2::facet_wrap(facet_column) } if (all(!is.null(top_graph), !is.null(facet_graph))) { - ggarrange(top_graph, facet_graph) + return(list(plot = egg::ggarrange(top_graph, facet_graph), + data_top_graph = data_top_graph, + data_facet_graph = data_facet_graph)) } else { - NULL + return(list(plot = NULL, data_top_graph = NULL, data_facet_graph = NULL)) } } } diff --git a/man/visualize_pathways_year_level1.Rd b/man/visualize_pathways_year_level1.Rd index 7bbcea7d..a40a109a 100644 --- a/man/visualize_pathways_year_level1.Rd +++ b/man/visualize_pathways_year_level1.Rd @@ -82,8 +82,10 @@ Default: \code{TRUE.}} \item{x_lab}{\code{NULL} or character. x-axis label. Default: \code{"Number of introduced taxa"}.} -\item{y_lab}{\code{NULL} or character. Title of the graph. Default: \code{"Pathways"}. -#' @return A list with three slots: +\item{y_lab}{\code{NULL} or character. Title of the graph. Default: \code{"Pathways"}.} +} +\value{ +A list with three slots: \itemize{ \item \code{plot}: ggplot2 object (or egg object if facets are used). \code{NULL} if there are no data to plot. @@ -91,7 +93,7 @@ are no data to plot. graph) in \code{plot}. \item \code{data_facet_graph}: data.frame (tibble) with data used for the faceting plot in \code{plot}. \code{NULL} is returned if \code{facet_column} is \code{NULL}. -}} +} } \description{ Function to plot a line graph with number of taxa introduced over time diff --git a/man/visualize_pathways_year_level2.Rd b/man/visualize_pathways_year_level2.Rd index 136ab4f0..d7143358 100644 --- a/man/visualize_pathways_year_level2.Rd +++ b/man/visualize_pathways_year_level2.Rd @@ -25,35 +25,47 @@ visualize_pathways_year_level2( ) } \arguments{ -\item{df}{df.} +\item{df}{A data frame.} \item{chosen_pathway_level1}{character. Selected pathway level 1.} -\item{bin}{numeric. Time span in years to use for agggregation. Default: 10.} +\item{bin}{numeric. Time span in years to use for agggregation. Default: +\code{10}.} \item{from}{numeric. Year trade-off: taxa introduced before this year are -grouped all together. Default: 1950.} - -\item{category}{NULL or character. One of the kingdoms as given in GBIF and -\code{Chordata} (the phylum), \verb{Not Chordata} (all other phyla of \code{Animalia}): 1. -\code{Plantae} 2. \code{Animalia} 3. \code{Fungi} 4. \code{Chromista} 5. \code{Archaea} 6. -\code{Bacteria} 7. \code{Protozoa} 8. \code{Viruses} 9. \verb{incertae sedis} 10. \code{Chordata} -11. \verb{Not Chordata} Default: \code{NULL}.} - -\item{facet_column}{NULL or character. The column to use to create additional -facet wrap bar graphs underneath the main graph. When NULL, no facet graph -are created. One of \code{family}, \code{order}, \code{class}, \code{phylum}, \code{locality}, -\code{native_range}, \code{habitat}. If column has another name, rename it before -calling this function. Default: \code{NULL}.} +grouped all together. Default: \code{1950}.} + +\item{category}{\code{NULL} (default) or character. One of the kingdoms as given +in GBIF or \code{Chordata} (the phylum) or \verb{Not Chordata} (all other phyla of +\code{Animalia}): +\enumerate{ +\item \code{Plantae} +\item \code{Animalia} +\item \code{Fungi} +\item \code{Chromista} +\item \code{Archaea} +\item \code{Bacteria} +\item \code{Protozoa} +\item \code{Viruses} +\item \verb{incertae sedis} +\item \code{Chordata} +\item \verb{Not Chordata} +}} + +\item{facet_column}{\code{NULL} or character. The column to use to create +additional facet wrap bar graphs underneath the main graph. When \code{NULL}, no +facet graph are created. One of \code{family}, \code{order}, \code{class}, \code{phylum}, +\code{locality}, \code{native_range}, \code{habitat}. If column has another name, rename +it before calling this function. Default: \code{NULL}.} \item{pathways}{character. Vector with pathways level 1 to visualize. The pathways are displayed following the order as in this vector.} \item{pathway_level1_names}{character. Name of the column of \code{df} containing -information about pathways at level 1. Default: \code{pathway_level1}.} +information about pathways at level 1. Default: \code{"pathway_level1"}.} \item{pathway_level2_names}{character. Name of the column of \code{df} containing -information about pathways at level 2. Default: \code{pathway_level2}.} +information about pathways at level 2. Default: \code{"pathway_level2"}.} \item{taxon_names}{character. Name of the column of \code{df} containing information about taxa. This parameter is used to uniquely identify taxa.} @@ -63,27 +75,32 @@ information about kingdom. Default: \code{"kingdom"}.} \item{phylum_names}{character. Name of the column of \code{df} containing information about phylum. This parameter is used only if \code{category} is -one of: \code{"Chordata"}, \code{"Not Chordata"}. Default: -\code{"phylum"}.} +one of: \code{"Chordata"}, \code{"Not Chordata"}. Default: \code{"phylum"}.} \item{first_observed}{character. Name of the column of \code{df} containing information about year of introduction. Default: \code{"first_observed"}.} -\item{cbd_standard}{logical. If TRUE the values of pathway level 1 are +\item{cbd_standard}{logical. If \code{TRUE} the values of pathway level 1 are checked based on CBD standard as returned by \code{pathways_cbd()}. Error is -returned if unmatched values are found. If FALSE, a warning is returned. -Default: TRUE.} +returned if unmatched values are found. If \code{FALSE}, a warning is returned. +Default: \code{TRUE}.} -\item{title}{NULL or character. Title of the graph. Default: NULL.} +\item{title}{\code{NULL} or character. Title of the graph. Default: \code{NULL}.} -\item{x_lab}{NULL or character. x-axis label. Default: "Number of introduced -taxa".} +\item{x_lab}{\code{NULL} or character. x-axis label. Default: \code{"Number of introduced taxa"}.} -\item{y_lab}{NULL or character. Title of the graph. Default: "Pathways".} +\item{y_lab}{\code{NULL} or character. Title of the graph. Default: \code{"Pathways"}.} } \value{ -A ggplot2 object (or egg object if facets are used). NULL if there are -no data to plot. +A list with three slots: +\itemize{ +\item \code{plot}: ggplot2 object (or egg object if facets are used). \code{NULL} if there +are no data to plot. +\item \code{data_top_graph}: data.frame (tibble) with data used for the main plot (top +graph) in \code{plot}. +\item \code{data_facet_graph}: data.frame (tibble) with data used for the faceting +plot in \code{plot}. \code{NULL} is returned if \code{facet_column} is \code{NULL}. +} } \description{ Function to plot a line graph with number of taxa introduced over time @@ -105,7 +122,6 @@ data <- read_tsv(datafile, key = col_double(), nubKey = col_double(), speciesKey = col_double(), - acceptedKey = col_double(), first_observed = col_double(), last_observed = col_double() ) @@ -180,7 +196,7 @@ visualize_pathways_year_level2( data, chosen_pathway_level1 = "escape", x_lab = "Jaar", - y_lab = "Aantal geïntroduceerde taxa" + y_lab = "Aantal geintroduceerde taxa" ) } }