diff --git a/checklistbank-cli/src/main/java/org/gbif/checklistbank/neo/UsageDao.java b/checklistbank-cli/src/main/java/org/gbif/checklistbank/neo/UsageDao.java index a59568a3..a38c4aa8 100644 --- a/checklistbank-cli/src/main/java/org/gbif/checklistbank/neo/UsageDao.java +++ b/checklistbank-cli/src/main/java/org/gbif/checklistbank/neo/UsageDao.java @@ -422,7 +422,7 @@ private static void storeEnum(Node n, String property, Enum value) { } } - private Rank readRank(Node n) { + public Rank readRank(Node n) { return readEnum(n, NeoProperties.RANK, Rank.class, Rank.UNRANKED); } diff --git a/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubBuilder.java b/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubBuilder.java index 45902c93..a28c7b55 100644 --- a/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubBuilder.java +++ b/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubBuilder.java @@ -1080,10 +1080,12 @@ private NubUsage processSourceUsage(SrcUsage u, Origin origin, NubUsage parent) )) { if (!match.isMatch() || ( - fromCurrentSource(match.usage) && currSrc.supragenericHomonymSource && - !IGNORABLE_ORIGINS.contains(origin) && !IGNORABLE_ORIGINS.contains(match.usage.origin) - )) { - + currSrc.supragenericHomonymSource + && fromCurrentSource(match.usage) + && (u.rank.isSuprageneric() || !sameFamily(parent, match.usage)) + && !IGNORABLE_ORIGINS.contains(origin) && !IGNORABLE_ORIGINS.contains(match.usage.origin) + ) + ) { // remember if we had a doubtful match NubUsage doubtful = match.doubtfulUsage; // persistent new nub usage if there wasnt any yet @@ -1151,6 +1153,16 @@ private NubUsage processSourceUsage(SrcUsage u, Origin origin, NubUsage parent) return match.usage; } + private boolean sameFamily(NubUsage u1, NubUsage u2) { + Node f1 = family(u1); + Node f2 = family(u2); + return Objects.equals(f1, f2); + } + + private Node family(NubUsage u) { + return u.rank == Rank.FAMILY ? u.node : db.parent(u.node, Rank.FAMILY); + } + private void delete(NubUsage nub) { for (int sourceId : nub.sourceIds) { src2NubKey.remove(sourceId); diff --git a/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubDb.java b/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubDb.java index 28620081..d0f74a11 100644 --- a/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubDb.java +++ b/checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubDb.java @@ -134,8 +134,21 @@ public Node parent(Node child) { } /** - * @return the parent (or accepted) nodes for a given node. + * @return the parent (or accepted) nub usage for a given node that has the matching rank. Will be null if nothing found. */ + public Node parent(Node n, Rank rank) { + for (Node pn : parents(n)) { + Rank pr = dao.readRank(pn); + if (pr == rank){ + return pn; + } + } + return null; + } + + /** + * @return the parent (or accepted) nodes for a given node. + */ public List parents(Node n) { return Iterables.asList(Traversals.PARENTS .relationships(RelType.SYNONYM_OF, Direction.OUTGOING) @@ -413,6 +426,25 @@ public NubUsageMatch findNubUsage(UUID currSource, ParsedName pn, @Nullable Rank return NubUsageMatch.snap(synonym); } + // all canonical, one with the same parent? + if (currNubParent != null && checked.size() >= 2) { + NubUsage sibling = null; + for (NubUsage u : checked) { + if (parent(u.node).equals(currNubParent.node)) { + if (sibling == null){ + sibling = u; + } else { + // multiple found! + sibling = null; + break; + } + } + } + if (sibling != null) { + return NubUsageMatch.match(sibling); + } + } + // try to do better authorship matching, remove canonical matches if (qualifiedName) { iter = checked.iterator(); @@ -454,8 +486,10 @@ public NubUsageMatch findNubUsage(UUID currSource, ParsedName pn, @Nullable Rank if (curr != null) { curr.issues.add(NameUsageIssue.HOMONYM); curr.addRemark("Homonym known in other sources: " + pn.getScientificName()); - LOG.warn("{} ambigous homonyms encountered for {} in source {}, picking largest taxon", checked.size(), pn.getScientificName(), currSource); + LOG.warn("{} ambiguous homonyms encountered for {} in source {}, picking largest taxon", checked.size(), pn.getScientificName(), currSource); return NubUsageMatch.snap(curr); + } else if (checked.isEmpty()) { + return NubUsageMatch.empty(); } throw new IgnoreSourceUsageException("homonym " + pn.getScientificName(), pn.getScientificName()); diff --git a/checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/NubBuilderIT.java b/checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/NubBuilderIT.java index a5687266..2c608a4c 100644 --- a/checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/NubBuilderIT.java +++ b/checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/NubBuilderIT.java @@ -2171,6 +2171,18 @@ public void redundantHyla() throws Exception { assertTree("184 185.txt"); } + /** + * https://github.com/gbif/checklistbank/issues/273 + */ + @Test + public void implicitGenera() throws Exception { + ClasspathSourceList src = ClasspathSourceList.source(neoRepo.cfg, 186); + src.setSourceRank(186, Rank.KINGDOM); + src.setSupragenericHomonymSource(186); + build(src); + assertTree("186.txt"); + } + /** * For profiling memory usage of nub builds */ diff --git a/checklistbank-cli/src/test/resources/nub-sources/dataset186.txt b/checklistbank-cli/src/test/resources/nub-sources/dataset186.txt new file mode 100644 index 00000000..750ea6c4 --- /dev/null +++ b/checklistbank-cli/src/test/resources/nub-sources/dataset186.txt @@ -0,0 +1,33 @@ +1 UNRANKED ACCEPTED Biota +2 1 KINGDOM ACCEPTED Animalia +3 2 PHYLUM ACCEPTED Chordata +4 3 SUBPHYLUM ACCEPTED Vertebrata +5 4 INFRAPHYLUM ACCEPTED Gnathostomata +6 5 UNRANKED ACCEPTED Osteichthyes +7 6 UNRANKED ACCEPTED Sarcopterygii +8 7 UNRANKED ACCEPTED Tetrapoda +9 8 CLASS ACCEPTED Amphibia +10 9 ORDER ACCEPTED Anura +11 10 FAMILY ACCEPTED Hylidae Rafinesque, 1815 +12 11 GENUS ACCEPTED Hyla +20 12 SPECIES ACCEPTED Hyla imitator (Barbour & Dunn, 1921) +21 12 SPECIES ACCEPTED Hyla nicefori (Cochran & Goin, 1970) +13 11 SUBFAMILY ACCEPTED Hylinae Rafinesque, 1815 +14 13 GENUS ACCEPTED Hyla Laurenti, 1768 +22 14 SPECIES ACCEPTED Hyla annectans (Jerdon, 1870) +23 14 SPECIES ACCEPTED Hyla arborea (Linnaeus, 1758) +24 14 SPECIES ACCEPTED Hyla chinensis Günther, 1858 +25 14 SPECIES ACCEPTED Hyla hallowellii Thompson, 1912 +30 10 SUPERFAMILY ACCEPTED Brachycephaloidea Günther, 1858 +31 30 FAMILY ACCEPTED Brachycephalidae Günther, 1858 +32 31 GENUS ACCEPTED Brachycephalus Fitzinger, 1826 +37 32 SPECIES ACCEPTED Brachycephalus germanicus Döring, 2023 +33 31 GENUS ACCEPTED Ischnocnema Reinhardt & Lütken, 1862 +34 33 SPECIES ACCEPTED Ischnocnema alpina Döring, 2023 +35 31 GENUS ACCEPTED Ischnocnema +36 35 SPECIES ACCEPTED Ischnocnema vulgaris Döring, 2023 +40 10 FAMILY ACCEPTED Inventedidae Döring, 2023 +41 40 GENUS ACCEPTED Inventeda Döring, 2023 +42 41 SPECIES ACCEPTED Inventeda pulchra Döring, 2023 +43 40 GENUS ACCEPTED Ischnocnema +44 43 SPECIES ACCEPTED Ischnocnema viatrix Döring, 2023 diff --git a/checklistbank-cli/src/test/resources/trees/184 185.txt b/checklistbank-cli/src/test/resources/trees/184 185.txt index b88f3e5f..42e73dec 100644 --- a/checklistbank-cli/src/test/resources/trees/184 185.txt +++ b/checklistbank-cli/src/test/resources/trees/184 185.txt @@ -3,14 +3,13 @@ Animalia [kingdom] Amphibia [class] Anura [order] Hylidae [family] - Hyla [genus] - Hyla imitator (Barbour & Dunn, 1921) [species] - Hyla nicefori (Cochran & Goin, 1970) [species] Hyla Laurenti, 1768 [genus] Hyla annectans (Jerdon, 1870) [species] Hyla arborea (Linnaeus, 1758) [species] Hyla chinensis Günther, 1858 [species] Hyla hallowellii Thompson, 1912 [species] + Hyla imitator (Barbour & Dunn, 1921) [species] + Hyla nicefori (Cochran & Goin, 1970) [species] Archaea [kingdom] Bacteria [kingdom] Chromista [kingdom] diff --git a/checklistbank-cli/src/test/resources/trees/186.txt b/checklistbank-cli/src/test/resources/trees/186.txt new file mode 100644 index 00000000..ca80ecf0 --- /dev/null +++ b/checklistbank-cli/src/test/resources/trees/186.txt @@ -0,0 +1,31 @@ +Animalia [kingdom] + Chordata [phylum] + Amphibia [class] + Anura [order] + Brachycephalidae [family] + Brachycephalus Fitzinger, 1826 [genus] + Brachycephalus germanicus Döring, 2023 [species] + Ischnocnema Reinhardt & Lütken, 1862 [genus] + Ischnocnema alpina Döring, 2023 [species] + Ischnocnema vulgaris Döring, 2023 [species] + Hylidae [family] + Hyla Laurenti, 1768 [genus] + Hyla annectans (Jerdon, 1870) [species] + Hyla arborea (Linnaeus, 1758) [species] + Hyla chinensis Günther, 1858 [species] + Hyla hallowellii Thompson, 1912 [species] + Hyla imitator (Barbour & Dunn, 1921) [species] + Hyla nicefori (Cochran & Goin, 1970) [species] + Inventedidae [family] + Inventeda Döring, 2023 [genus] + Inventeda pulchra Döring, 2023 [species] + Ischnocnema [genus] + Ischnocnema viatrix Döring, 2023 [species] +Archaea [kingdom] +Bacteria [kingdom] +Chromista [kingdom] +Fungi [kingdom] +Plantae [kingdom] +Protozoa [kingdom] +Viruses [kingdom] +incertae sedis [kingdom] \ No newline at end of file