Skip to content

Commit

Permalink
fix how genus determined (#166)
Browse files Browse the repository at this point in the history
Fix how a "genus" is identified within a taxon name

- For hybrid genera the genus name is "x genus" - this was causing problems in AusTraits, because I'd only included an ifelse to hunt for "x" in some places

- As part of this fix, package `extract_genus` into its own function
ehwenk authored Nov 9, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 585590c commit 3f3a6de
Showing 4 changed files with 33 additions and 10 deletions.
8 changes: 4 additions & 4 deletions R/load_taxonomic_resources.R
Original file line number Diff line number Diff line change
@@ -56,7 +56,7 @@ load_taxonomic_resources <-
dataset_name = .data$datasetName
) %>%
mutate(
genus = stringr::word(canonical_name, 1),
genus = extract_genus(canonical_name),
taxon_rank = stringr::str_to_lower(taxon_rank),
taxon_rank = stringr::str_replace(taxon_rank, "regnum", "kingdom"),
taxon_rank = stringr::str_replace(taxon_rank, "classis", "class"),
@@ -82,7 +82,7 @@ load_taxonomic_resources <-
name_element = .data$nameElement
) %>%
mutate(
genus = stringr::word(canonical_name, 1),
genus = extract_genus(canonical_name),
taxon_rank = stringr::str_to_lower(taxon_rank),
taxon_rank = stringr::str_replace(taxon_rank, "regnum", "kingdom"),
taxon_rank = stringr::str_replace(taxon_rank, "classis", "class"),
@@ -121,7 +121,7 @@ load_taxonomic_resources <-
),
binomial = ifelse(is.na(binomial), zzz, binomial),
binomial = base::replace(binomial, duplicated(binomial), zzz),
genus = stringr::word(stripped_canonical, 1),
genus = extract_genus(stripped_canonical),
trinomial = stringr::word(stripped_canonical2, start = 1, end = 3),
trinomial = ifelse(is.na(trinomial), zzz, trinomial),
trinomial = base::replace(trinomial, duplicated(trinomial), zzz),
@@ -159,7 +159,7 @@ load_taxonomic_resources <-
trinomial = stringr::word(stripped_canonical2, start = 1, end = 3),
trinomial = ifelse(is.na(trinomial), "zzzz zzzz", trinomial),
trinomial = base::replace(trinomial, duplicated(trinomial), "zzzz zzzz"),
genus = stringr::word(stripped_canonical, 1),
genus = extract_genus(stripped_canonical),
taxonomic_dataset = "APNI"
) %>%
dplyr::distinct() %>%
2 changes: 1 addition & 1 deletion R/match_taxa.R
Original file line number Diff line number Diff line change
@@ -84,7 +84,7 @@ match_taxa <- function(
update_na_with(strip_names_2(cleaned_name)),
trinomial = stringr::word(stripped_name2, start = 1, end = 3),
binomial = stringr::word(stripped_name2, start = 1, end = 2),
genus = stringr::word(original_name, start = 1, end = 1),
genus = extract_genus(original_name),
fuzzy_match_genus =
fuzzy_match_genera(genus, resources$genera_accepted$canonical_name),
fuzzy_match_genus_known =
24 changes: 24 additions & 0 deletions R/standardise_names.R
Original file line number Diff line number Diff line change
@@ -98,3 +98,27 @@ standardise_names <- function(taxon_names) {
stringr::str_squish()
}

#' Extract Genus
#'
#' This function extracts the genus component of a scientific name.
#' It identifies if the genus is/is not a hybrid. For a hybrid genus,
#' the first two words of the taxon name are extracted (e.g. "x Cynochloris"),
#' while for a non-hybrid genus just the first word is extracted (e.g. "Banksia").
#'
#' @param taxon_name
#'
#' @return The genus for a scientific name.
#'
#' @examples
#' genus = extract_genus(stripped_name)
#'
#'
extract_genus <- function(taxon_name) {
genus <-
ifelse(
stringr::word(taxon_name, 1) == "x",
stringr::word(taxon_name, start = 1, end = 2),
stringr::word(taxon_name, 1)
)
genus
}
9 changes: 4 additions & 5 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
@@ -97,7 +97,7 @@ update_taxonomy <- function(aligned_data,
aligned_data %>%
dplyr::select(original_name, aligned_name, taxon_rank, taxonomic_dataset, aligned_reason) %>%
dplyr::mutate(
genus = stringr::word(aligned_name, 1),
genus = extract_genus(aligned_name),
row_number = dplyr::row_number()
)

@@ -300,7 +300,7 @@ update_taxonomy_APC_genus <- function(data, resources) {
taxonomic_status = ifelse(is.na(accepted_name_usage_ID), as.character(my_order), paste("genus", taxonomic_status_genus)),
taxon_ID_genus = resources$genera_all$taxon_ID[match(accepted_name_usage_ID, resources$genera_all$accepted_name_usage_ID)],
# genus names in `aligned_name` that are not APC-accepted need to be updated to their current name in `suggested_name`
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, stringr::word(aligned_name, 1), "")),
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, extract_genus(aligned_name), "")),
suggested_name = ifelse(taxonomic_status == "accepted", paste0(genus_accepted, aligned_minus_genus), NA),
suggested_name = ifelse(taxonomic_status != "accepted", aligned_name, suggested_name),
# indicate taxonomic_status of the genus name in `aligned_name` and why it needed to be updated for the `suggested_name`
@@ -379,8 +379,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
dplyr::filter(species_and_infraspecific(taxon_rank)) %>%
dplyr::distinct(canonical_name, .keep_all = TRUE) %>%
dplyr::select(canonical_name, accepted_name_usage_ID)



split_taxa_table <-
resources$APC %>%
dplyr::filter(species_and_infraspecific(taxon_rank)) %>%
@@ -548,7 +547,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name),
taxonomic_status = ifelse(is.na(accepted_name), taxonomic_status_aligned, "accepted"),
# for APC-accepted species, the `genus` is the first word of the `accepted_name`
genus_accepted = stringr::word(suggested_name, 1),
genus_accepted = extract_genus(suggested_name),
taxon_ID_genus = resources$genera_all$taxon_ID[match(genus_accepted, resources$genera_all$canonical_name)],
update_reason = taxonomic_status_aligned,
taxonomic_dataset = "APC",

0 comments on commit 3f3a6de

Please sign in to comment.