Skip to content

Commit

Permalink
Improve checks and use pkg::func
Browse files Browse the repository at this point in the history
See #86
  • Loading branch information
damianooldoni committed Jul 1, 2022
1 parent 5d52674 commit 37466c4
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 82 deletions.
165 changes: 84 additions & 81 deletions R/visualize_pathways_level1.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' vertebrates/invertebrates) and temporal (lower limit year). Facets can be
#' added (see argument `facet_column`).
#'
#' @param df df.
#' @param df A data frame.
#' @param category NULL or character. One of the kingdoms as given in GBIF and
#' `Chordata` (the phylum), `Not Chordata` (all other phyla of `Animalia`): 1.
#' `Plantae` 2. `Animalia` 3. `Fungi` 4. `Chromista` 5. `Archaea` 6.
Expand Down Expand Up @@ -46,16 +46,8 @@
#' are no data to plot.
#'
#' @export
#' @importFrom assertthat assert_that
#' @importFrom assertable assert_colnames
#' @importFrom dplyr %>% anti_join distinct filter if_else mutate pull rename_at
#' sym
#' @importFrom ggplot2 ggplot geom_bar theme ggtitle xlab ylab coord_flip
#' facet_wrap
#' @importFrom tidyselect all_of
#' @importFrom forcats fct_rev
#' @importFrom dplyr %>%
#' @importFrom rlang !!
#' @importFrom egg ggarrange
#'
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -128,36 +120,39 @@ visualize_pathways_level1 <- function(df,
y_lab = "Pathways") {
# initial input checks
# Check df
assert_that(is.data.frame(df), msg = "`df` must be a data frame.")
assertthat::assert_that(is.data.frame(df), msg = "`df` must be a data frame.")

# Check pathway_level1_names
assert_that(is.character(pathway_level1_names),
assertthat::assert_that(is.character(pathway_level1_names),
msg = "`pathway_level1_names` must be a character."
)
assert_colnames(df, pathway_level1_names, only_colnames = FALSE)
assertable::assert_colnames(df, pathway_level1_names, only_colnames = FALSE)

# Check category
if (!is.null(category)) {
assert_that(is.character(category),
assertthat::assert_that(is.character(category),
msg = paste0(
"`category` must be a character. One of: ",
paste(categories, collapse = ", "),
"."
)
)
assert_that(category %in% categories,
assertthat::assert_that(category %in% categories,
msg = paste0(
"`category` is not correct. Choose one of: ",
paste0(categories, collapse = ", "),
"."
)
)
}
assert_that(is.null(facet_column) | is.character(facet_column),
assertthat::assert_that(is.null(facet_column) | is.character(facet_column),
msg = "Argument facet_column has to be NULL or a character."
)
if (!is.null(facet_column)) {
assertthat::assert_that(length(facet_column) == 1)
}
if (is.character(facet_column)) {
assert_colnames(df, facet_column, only_colnames = FALSE)
assertable::assert_colnames(df, facet_column, only_colnames = FALSE)
}
# check for valid facet options
valid_facet_options <- c(
Expand All @@ -166,19 +161,19 @@ visualize_pathways_level1 <- function(df,
)
if (is.character(facet_column)) {
facet_column <- match.arg(facet_column, valid_facet_options)
assert_that(is.null(category) || !(category == "Chordata" &
assertthat::assert_that(is.null(category) || !(category == "Chordata" &
facet_column == "phylum"),
msg = "You cannot use phylum as facet with category Chordata."
)
}
# Check pathways
if (!is.null(pathways)) {
assert_that(is.character(pathways),
assertthat::assert_that(is.character(pathways),
msg = "`pathways` must be a vector of characters."
)
invalid_pathways <- pathways[!pathways %in%
df[[pathway_level1_names]]]
assert_that(length(invalid_pathways) == 0,
assertthat::assert_that(length(invalid_pathways) == 0,
msg = paste0(
"Pathways in `pathways` not present in ",
"data.frame: ",
Expand All @@ -188,32 +183,36 @@ visualize_pathways_level1 <- function(df,
)
}
# Check taxon_names
assert_that(is.character(taxon_names),
assertthat::assert_that(is.character(taxon_names),
msg = "`taxon_names` must be a character."
)
assert_colnames(df, taxon_names, only_colnames = FALSE)
assertthat::assert_that(length(taxon_names) == 1)
assertable::assert_colnames(df, taxon_names, only_colnames = FALSE)
# Check kingdom_names
assert_that(is.character(kingdom_names),
assertthat::assert_that(is.character(kingdom_names),
msg = "`kingdom_names` must be a character."
)
assert_colnames(df, kingdom_names, only_colnames = FALSE)
assertthat::assert_that(length(kingdom_names) == 1)
assertable::assert_colnames(df, kingdom_names, only_colnames = FALSE)
# check parameter phylum
assert_that(is.character(phylum_names),
assertthat::assert_that(is.character(phylum_names),
msg = "`phylum_names` must be a character."
)
assert_colnames(df, phylum_names, only_colnames = FALSE)
assertthat::assert_that(length(phylum_names) == 1)
assertable::assert_colnames(df, phylum_names, only_colnames = FALSE)
# Check from
if (!is.null(from)) {
assert_that(is.numeric(from),
assertthat::assert_that(is.numeric(from),
msg = "`from` must be a number (year)."
)
assert_that(from > 0,
assertthat::assert_that(from > 0,
msg = "`from` must be a positive number."
)
assert_that(from == as.integer(from),
assertthat::assert_that(from == as.integer(from),
msg = "`from` must be an integer."
)
assert_that(from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)),
assertthat::assert_that(
from <= as.numeric(substr(Sys.Date(), start = 1, stop = 4)),
msg = paste0(
"`from` must be less than ",
format(Sys.Date(), "%Y"),
Expand All @@ -222,63 +221,67 @@ visualize_pathways_level1 <- function(df,
)
}
# Check first_observed
assert_that(is.character(first_observed),
assertthat::assert_that(is.character(first_observed),
msg = "`first_observed` must be a character."
)
assert_colnames(df, first_observed, only_colnames = FALSE)
assertthat::assert_that(length(first_observed) == 1)
assertable::assert_colnames(df, first_observed, only_colnames = FALSE)
# Check title and labels
assertthat::assert_that(is.null(title) | is.character(title),
msg = "`title` must be a character or NULL."
)
if (!is.null(title)) {
assert_that(is.character(title),
msg = "`title` must be a character or NULL."
)
assertthat::assert_that(length(title) == 1)
}
assertthat::assert_that(is.character(x_lab),
msg = "`x_lab` must be a character or NULL."
)
if (!is.null(x_lab)) {
assert_that(is.character(x_lab),
msg = "`x_lab` must be a character or NULL."
)
assertthat::assert_that(length(x_lab) == 1)
}
assertthat::assert_that(is.character(y_lab),
msg = "`y_lab` must be a character or NULL."
)
if (!is.null(y_lab)) {
assert_that(is.character(y_lab),
msg = "`y_lab` must be a character or NULL."
)
assertthat::assert_that(length(y_lab) == 1)
}
# rename to default column name
df <-
df %>%
rename_at(vars(all_of(kingdom_names)), ~"group") %>%
rename_at(vars(all_of(taxon_names)), ~"taxonKey") %>%
rename_at(vars(all_of(first_observed)), ~"first_observed") %>%
rename_at(vars(all_of(pathway_level1_names)), ~"pathway_level1")
dplyr::rename_at(vars(tidyselect::all_of(kingdom_names)), ~"group") %>%
dplyr::rename_at(vars(tidyselect::all_of(taxon_names)), ~"taxonKey") %>%
dplyr::rename_at(vars(tidyselect::all_of(first_observed)), ~"first_observed") %>%
dplyr::rename_at(vars(tidyselect::all_of(pathway_level1_names)), ~"pathway_level1")
# handle asymmetric category system (Chordata, Not Chordta are not kingdoms)
if (!is.null(category)) {
if (!category %in% c("Chordata", "Not Chordata")) {
df <- df %>% filter(.data$group == category)
df <- df %>% dplyr::filter(.data$group == category)
} else {
df <-
df %>%
rename_at(vars(phylum_names), ~"phylum_group")
dplyr::rename_at(vars(phylum_names), ~"phylum_group")
if (category == "Chordata") {
df <- df %>% filter(.data$phylum_group == category)
df <- df %>% dplyr::filter(.data$phylum_group == category)
} else {
df <-
df %>%
filter(.data$group == "Animalia") %>%
filter(.data$phylum_group != "Chordata")
dplyr::filter(.data$group == "Animalia") %>%
dplyr::filter(.data$phylum_group != "Chordata")
}
}
}
# Apply cut-off on year of introduction if given
if (!is.null(from)) {
df <-
df %>%
filter(.data$first_observed >= from)
dplyr::filter(.data$first_observed >= from)
}
# Handle NAs and ""
nas_or_empty_pathway_level1 <-
df %>%
filter(is.na(.data$pathway_level1) |
dplyr::filter(is.na(.data$pathway_level1) |
.data$pathway_level1 == "") %>%
distinct(.data$taxonKey)
dplyr::distinct(.data$taxonKey)
if (nrow(nas_or_empty_pathway_level1) > 0) {
message_warning <- paste(
nrow(nas_or_empty_pathway_level1),
Expand All @@ -290,37 +293,37 @@ visualize_pathways_level1 <- function(df,
df <-
df %>%
# Handle NAs and "unknown"
mutate(pathway_level1 = if_else(is.na(.data$pathway_level1) |
dplyr::mutate(pathway_level1 = dplyr::if_else(is.na(.data$pathway_level1) |
.data$pathway_level1 == "",
"unknown",
.data$pathway_level1
))
# Import all CBD pathways level 1
pathways_level1_all <-
pathways_cbd() %>%
distinct(.data$pathway_level1)
dplyr::distinct(.data$pathway_level1)
# Select pathways
if (!is.null(pathways)) {
pathways <- replace(pathways, is.na(pathways) | pathways == "", "unknown")
pathways <- unique(pathways)
df <-
df %>%
filter(.data$pathway_level1 %in% pathways)
dplyr::filter(.data$pathway_level1 %in% pathways)
} else {
if (cbd_standard == TRUE) {
pathways <- pathways_level1_all %>% pull()
pathways <- pathways_level1_all %>% dplyr::pull()
} else {
pathways <- unique(df$pathway_level1)
}
}
# Check values in column with pathways level 1
invalid_pathways <-
df %>%
anti_join(pathways_level1_all,
dplyr::anti_join(pathways_level1_all,
by = "pathway_level1"
) %>%
distinct(.data$pathway_level1) %>%
pull()
dplyr::distinct(.data$pathway_level1) %>%
dplyr::pull()
message_invalid_pathways <-
paste0(
"No CBD standard pathways level 1 value(s) in column `",
Expand All @@ -332,58 +335,58 @@ visualize_pathways_level1 <- function(df,
"."
)
if (cbd_standard == TRUE) {
assert_that(length(invalid_pathways) == 0,
assertthat::assert_that(length(invalid_pathways) == 0,
msg = message_invalid_pathways
)
} else {
warning(message_invalid_pathways)
}
# Distinct taxa
# dplyr::distinct taxa
if (!is.null(facet_column)) {
df <-
df %>%
distinct(.data$taxonKey, .data$pathway_level1, !!sym(facet_column))
dplyr::distinct(.data$taxonKey, .data$pathway_level1, !!dplyr::sym(facet_column))
}
# Transform pathway level 1 column to factor to make ordering in graph easily
df <-
df %>%
mutate(pathway_level1 = factor(.data$pathway_level1, levels = pathways))
# Distinct taxa without facet
dplyr::mutate(pathway_level1 = factor(.data$pathway_level1, levels = pathways))
# dplyr::distinct taxa without facet
df_top_graph <-
df %>%
distinct(.data$taxonKey, .data$pathway_level1)
dplyr::distinct(.data$taxonKey, .data$pathway_level1)
# Plot number of taxa per pathway_level1
top_graph <- NULL
if (nrow(df_top_graph) > 0) {
top_graph <-
ggplot(
ggplot2::ggplot(
df_top_graph
) +
geom_bar(aes(x = fct_rev(.data$pathway_level1))) +
xlab(y_lab) +
ylab(x_lab) +
coord_flip() +
ggtitle(title)
ggplot2::geom_bar(aes(x = forcats::fct_rev(.data$pathway_level1))) +
ggplot2::xlab(y_lab) +
ggplot2::ylab(x_lab) +
ggplot2::coord_flip() +
ggplot2::ggtitle(title)
}
if (is.null(facet_column)) {
return(top_graph)
} else {
facet_graph <- NULL
if (nrow(df) > 0) {
facet_graph <-
ggplot(
ggplot2::ggplot(
df,
aes(x = fct_rev(.data$pathway_level1))
aes(x = forcats::fct_rev(.data$pathway_level1))
) +
geom_bar() +
xlab(y_lab) +
ylab(x_lab) +
coord_flip() +
ggtitle(title) +
facet_wrap(facet_column)
ggplot2::geom_bar() +
ggplot2::xlab(y_lab) +
ggplot2::ylab(x_lab) +
ggplot2::coord_flip() +
ggplot2::ggtitle(title) +
ggplot2::facet_wrap(facet_column)
}
if (all(!is.null(top_graph), !is.null(facet_graph))) {
ggarrange(top_graph, facet_graph)
egg::ggarrange(top_graph, facet_graph)
}
else {
NULL
Expand Down
2 changes: 1 addition & 1 deletion man/visualize_pathways_level1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 37466c4

Please sign in to comment.