Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update develop to have new changes #171

Merged
merged 3 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/load_taxonomic_resources.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ load_taxonomic_resources <-
dataset_name = .data$datasetName
) %>%
mutate(
genus = stringr::word(canonical_name, 1),
genus = extract_genus(canonical_name),
taxon_rank = stringr::str_to_lower(taxon_rank),
taxon_rank = stringr::str_replace(taxon_rank, "regnum", "kingdom"),
taxon_rank = stringr::str_replace(taxon_rank, "classis", "class"),
Expand All @@ -82,7 +82,7 @@ load_taxonomic_resources <-
name_element = .data$nameElement
) %>%
mutate(
genus = stringr::word(canonical_name, 1),
genus = extract_genus(canonical_name),
taxon_rank = stringr::str_to_lower(taxon_rank),
taxon_rank = stringr::str_replace(taxon_rank, "regnum", "kingdom"),
taxon_rank = stringr::str_replace(taxon_rank, "classis", "class"),
Expand Down Expand Up @@ -121,7 +121,7 @@ load_taxonomic_resources <-
),
binomial = ifelse(is.na(binomial), zzz, binomial),
binomial = base::replace(binomial, duplicated(binomial), zzz),
genus = stringr::word(stripped_canonical, 1),
genus = extract_genus(stripped_canonical),
trinomial = stringr::word(stripped_canonical2, start = 1, end = 3),
trinomial = ifelse(is.na(trinomial), zzz, trinomial),
trinomial = base::replace(trinomial, duplicated(trinomial), zzz),
Expand Down Expand Up @@ -159,7 +159,7 @@ load_taxonomic_resources <-
trinomial = stringr::word(stripped_canonical2, start = 1, end = 3),
trinomial = ifelse(is.na(trinomial), "zzzz zzzz", trinomial),
trinomial = base::replace(trinomial, duplicated(trinomial), "zzzz zzzz"),
genus = stringr::word(stripped_canonical, 1),
genus = extract_genus(stripped_canonical),
taxonomic_dataset = "APNI"
) %>%
dplyr::distinct() %>%
Expand Down
2 changes: 1 addition & 1 deletion R/match_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ match_taxa <- function(
update_na_with(strip_names_2(cleaned_name)),
trinomial = stringr::word(stripped_name2, start = 1, end = 3),
binomial = stringr::word(stripped_name2, start = 1, end = 2),
genus = stringr::word(original_name, start = 1, end = 1),
genus = extract_genus(original_name),
fuzzy_match_genus =
fuzzy_match_genera(genus, resources$genera_accepted$canonical_name),
fuzzy_match_genus_known =
Expand Down
24 changes: 24 additions & 0 deletions R/standardise_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,27 @@ standardise_names <- function(taxon_names) {
stringr::str_squish()
}

#' Extract Genus
#'
#' This function extracts the genus component of a scientific name.
#' It identifies if the genus is/is not a hybrid. For a hybrid genus,
#' the first two words of the taxon name are extracted (e.g. "x Cynochloris"),
#' while for a non-hybrid genus just the first word is extracted (e.g. "Banksia").
#'
#' @param taxon_name
#'
#' @return The genus for a scientific name.
#'
#' @examples
#' genus = extract_genus(stripped_name)
#'
#'
extract_genus <- function(taxon_name) {
genus <-
ifelse(
stringr::word(taxon_name, 1) == "x",
stringr::word(taxon_name, start = 1, end = 2),
stringr::word(taxon_name, 1)
)
genus
}
24 changes: 13 additions & 11 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ update_taxonomy <- function(aligned_data,
aligned_data %>%
dplyr::select(original_name, aligned_name, taxon_rank, taxonomic_dataset, aligned_reason) %>%
dplyr::mutate(
genus = stringr::word(aligned_name, 1),
genus = extract_genus(aligned_name),
row_number = dplyr::row_number()
)

Expand Down Expand Up @@ -267,7 +267,7 @@ relevel_taxonomic_status_preferred_order <- function(taxonomic_status) {
# Function to update names of taxa whose aligned_names are
# taxon_rank = genus and taxonomic_dataset = APC
update_taxonomy_APC_genus <- function(data, resources) {

if(is.null(data)) return(NULL)

data %>%
Expand Down Expand Up @@ -300,7 +300,7 @@ update_taxonomy_APC_genus <- function(data, resources) {
taxonomic_status = ifelse(is.na(accepted_name_usage_ID), as.character(my_order), paste("genus", taxonomic_status_genus)),
taxon_ID_genus = resources$genera_all$taxon_ID[match(accepted_name_usage_ID, resources$genera_all$accepted_name_usage_ID)],
# genus names in `aligned_name` that are not APC-accepted need to be updated to their current name in `suggested_name`
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, stringr::word(aligned_name, 1), "")),
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, extract_genus(aligned_name), "")),
suggested_name = ifelse(taxonomic_status == "accepted", paste0(genus_accepted, aligned_minus_genus), NA),
suggested_name = ifelse(taxonomic_status != "accepted", aligned_name, suggested_name),
# indicate taxonomic_status of the genus name in `aligned_name` and why it needed to be updated for the `suggested_name`
Expand Down Expand Up @@ -341,10 +341,10 @@ update_taxonomy_APNI_genus <- function(data, resources) {
) %>%
# the `suggested_name` is set to the aligned_name and other columns are set to NA
dplyr::mutate(
genus = NA_character_, # genera only in APNI don't appear in this column
accepted_name = NA_character_,
suggested_name = aligned_name,
taxonomic_status_genus = "unplaced"
taxonomic_status_genus = "genus unplaced by APC",
taxonomic_status = "genus unplaced by APC",
)
}

Expand Down Expand Up @@ -379,8 +379,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
dplyr::filter(species_and_infraspecific(taxon_rank)) %>%
dplyr::distinct(canonical_name, .keep_all = TRUE) %>%
dplyr::select(canonical_name, accepted_name_usage_ID)



split_taxa_table <-
resources$APC %>%
dplyr::filter(species_and_infraspecific(taxon_rank)) %>%
Expand Down Expand Up @@ -495,8 +494,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
dplyr::select(
aligned_name,
taxonomic_status_aligned,
accepted_name_usage_ID,
scientific_name_ID
accepted_name_usage_ID
)
) %>%
## Second, find accepted names for each name in the species (and infraspecific taxon) list (sometimes they are the same)
Expand All @@ -515,6 +513,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
accepted_name,
taxonomic_status,
scientific_name,
scientific_name_ID,
family,
subclass,
taxon_distribution
Expand Down Expand Up @@ -548,7 +547,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name),
taxonomic_status = ifelse(is.na(accepted_name), taxonomic_status_aligned, "accepted"),
# for APC-accepted species, the `genus` is the first word of the `accepted_name`
genus_accepted = stringr::word(suggested_name, 1),
genus_accepted = extract_genus(suggested_name),
taxon_ID_genus = resources$genera_all$taxon_ID[match(genus_accepted, resources$genera_all$canonical_name)],
update_reason = taxonomic_status_aligned,
taxonomic_dataset = "APC",
Expand Down Expand Up @@ -629,7 +628,10 @@ update_taxonomy_APNI_species_and_infraspecific_taxa <- function(data, resources)
## have proper columns filled in (`genus` & `taxonomic_ID_genus` & `taxonomic_dataset_genus`),
## while APNI names that do not align with an APC-accepted genus have these columns set to NA
dplyr::mutate(
genus = ifelse(is.na(accepted_name_usage_ID_genus), NA_character_, resources$genera_all$canonical_name[match(accepted_name_usage_ID_genus, resources$genera_all$taxon_ID)]),
genus = ifelse(
is.na(accepted_name_usage_ID_genus),
genus,
resources$genera_all$canonical_name[match(accepted_name_usage_ID_genus, resources$genera_all$taxon_ID)]),
taxon_ID_genus = resources$genera_all$taxon_ID[match(genus, resources$genera_all$canonical_name)],
taxonomic_dataset_genus = ifelse(stringr::str_detect(taxonomic_dataset_genus, "APC"), "APC", taxonomic_dataset_genus)
) %>%
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/benchmarks/consistency_lookup.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Acacia paraneura,Acacia paraneura,Exact match of taxon name to an APC-accepted c
Athrotaxis laxiflolia,Arthrotaxis laxifolia,Fuzzy match of taxon name to an APC-known canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/951456,orthographic variant,NA,https://id.biodiversity.org.au/node/apni/7555184,Athrotaxis x laxifolia,Hook.,https://id.biodiversity.org.au/name/apni/245642,species,accepted,Cupressaceae,Pinidae,Tas,https://id.biodiversity.org.au/node/apni/7555184,Athrotaxis
Banksia integrifolia,Banksia integrifolia,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/node/apni/2892579,accepted,misapplied,https://id.biodiversity.org.au/node/apni/2892579,Banksia integrifolia,L.f.,https://id.biodiversity.org.au/name/apni/107602,species,accepted,Proteaceae,Magnoliidae,"WA (naturalised), Qld, NSW, NI (naturalised), Vic, Tas",https://id.biodiversity.org.au/node/apni/2892579,Banksia
Commersonia rosea,Commersonia rosea,Exact match of taxon name to an APC-known canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/924361,basionym,NA,https://id.biodiversity.org.au/node/apni/2902836,Androcalva rosea,(S.A.J.Bell & L.M.Copel.) C.F.Wilkins & Whitlock,https://id.biodiversity.org.au/name/apni/188176,species,accepted,Malvaceae,Magnoliidae,NSW,https://id.biodiversity.org.au/node/apni/2902836,Androcalva
Galactia striata,Galactia sp. [Galactia striata],Exact match of the first word of the taxon name to an APC-accepted genus (2023-08-17),APNI,NA,unplaced by APC,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Galactia striata,Galactia sp. [Galactia striata],Exact match of the first word of the taxon name to an APC-accepted genus (2023-08-17),APNI,NA,unplaced by APC,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Galactia
Genoplesium insigne,Genoplesium insigne,Exact match of taxon name to an APC-known canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/51441225,nomenclatural synonym,NA,https://id.biodiversity.org.au/taxon/apni/51441228,Corunastylis insignis,(D.L.Jones) D.L.Jones & M.A.Clem.,https://id.biodiversity.org.au/name/apni/167942,species,accepted,Orchidaceae,Magnoliidae,NSW,https://id.biodiversity.org.au/taxon/apni/51441228,Corunastylis
Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/912933,pro parte misapplied,misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2899370,Hibbertia crinita,Toelken,https://id.biodiversity.org.au/name/apni/90852,species,accepted,Dilleniaceae,Magnoliidae,"SA, Vic",https://id.biodiversity.org.au/node/apni/2899370,Hibbertia
Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/912932,pro parte misapplied,misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2899370,Hibbertia crinita,Toelken,https://id.biodiversity.org.au/name/apni/90852,species,accepted,Dilleniaceae,Magnoliidae,"SA, Vic",https://id.biodiversity.org.au/node/apni/2899370,Hibbertia
Expand Down Expand Up @@ -43,7 +43,7 @@ Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted
Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/916524,pro parte misapplied,misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2901708,Hibbertia simulans,Toelken,https://id.biodiversity.org.au/name/apni/90852,species,accepted,Dilleniaceae,Magnoliidae,NSW,https://id.biodiversity.org.au/node/apni/2901708,Hibbertia
Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/916556,pro parte misapplied,misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2910589,Hibbertia superans,Toelken,https://id.biodiversity.org.au/name/apni/90852,species,accepted,Dilleniaceae,Magnoliidae,NSW,https://id.biodiversity.org.au/node/apni/2910589,Hibbertia
Hibbertia sericea,Hibbertia sericea,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/916566,pro parte misapplied,misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2915755,Hibbertia villifera,Tepper ex Toelken,https://id.biodiversity.org.au/name/apni/90852,species,accepted,Dilleniaceae,Magnoliidae,SA,https://id.biodiversity.org.au/node/apni/2915755,Hibbertia
Hibbertia sp.,Hibbertia sp.,Exact match of taxon name ending with `sp.` to an APC accepted genus (2023-08-17),APNI,NA,unplaced by APC,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Hibbertia sp.,Hibbertia sp.,Exact match of taxon name ending with `sp.` to an APC accepted genus (2023-08-17),APNI,NA,unplaced by APC,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Hibbertia
Hibbertia stricta,Hibbertia stricta,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/910062,pro parte misapplied,taxonomic synonym | misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2906127,Hibbertia australis,N.A.Wakef.,https://id.biodiversity.org.au/name/apni/91036,species,accepted,Dilleniaceae,Magnoliidae,"SA, Vic",https://id.biodiversity.org.au/node/apni/2906127,Hibbertia
Hibbertia stricta,Hibbertia stricta,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/916725,pro parte misapplied,taxonomic synonym | misapplied | pro parte misapplied,https://id.biodiversity.org.au/node/apni/2915804,Hibbertia devitata,Toelken,https://id.biodiversity.org.au/name/apni/91036,species,accepted,Dilleniaceae,Magnoliidae,"SA, Vic",https://id.biodiversity.org.au/node/apni/2915804,Hibbertia
Hibbertia stricta,Hibbertia stricta,Exact match of taxon name to an APC-accepted canonical name once punctuation and filler words are removed (2023-08-17),APC,https://id.biodiversity.org.au/instance/apni/916770,pro parte misapplied,taxonomic synonym | misapplied | pro parte misapplied,https://id.biodiversity.org.au/taxon/apni/51300211,Hibbertia glebosa,Toelken,https://id.biodiversity.org.au/name/apni/91036,species,accepted,Dilleniaceae,Magnoliidae,SA,https://id.biodiversity.org.au/taxon/apni/51300211,Hibbertia
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-alignment_executes.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ test_that("handles APNI taxa and genus level IDs",{
original_name <- c("Acacia sp.", "Dendropanax amplifolius", "Acanthopanax divaricatum", "Eucalyptus sp.")
taxon_rank <- c("genus", "species", "species", "genus")
taxonomic_dataset <- c("APC", "APNI", "APNI", "APC")
genus_updated <- c("Acacia", NA, NA, "Eucalyptus")
genus_updated <- c("Acacia", "Dendropanax", "Acanthopanax", "Eucalyptus")

out1 <-
align_taxa(original_name, resources = resources)
Expand Down