Skip to content

Commit

Permalink
Try to merge redundant accepted genera within the same family, see #273
Browse files Browse the repository at this point in the history
  • Loading branch information
mdoering committed Jul 5, 2023
1 parent 558f793 commit e8a2751
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ private static void storeEnum(Node n, String property, Enum value) {
}
}

private Rank readRank(Node n) {
public Rank readRank(Node n) {
return readEnum(n, NeoProperties.RANK, Rank.class, Rank.UNRANKED);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1080,10 +1080,12 @@ private NubUsage processSourceUsage(SrcUsage u, Origin origin, NubUsage parent)
)) {

if (!match.isMatch() || (
fromCurrentSource(match.usage) && currSrc.supragenericHomonymSource &&
!IGNORABLE_ORIGINS.contains(origin) && !IGNORABLE_ORIGINS.contains(match.usage.origin)
)) {

currSrc.supragenericHomonymSource
&& fromCurrentSource(match.usage)
&& (u.rank.isSuprageneric() || !sameFamily(parent, match.usage))
&& !IGNORABLE_ORIGINS.contains(origin) && !IGNORABLE_ORIGINS.contains(match.usage.origin)
)
) {
// remember if we had a doubtful match
NubUsage doubtful = match.doubtfulUsage;
// persistent new nub usage if there wasnt any yet
Expand Down Expand Up @@ -1151,6 +1153,16 @@ private NubUsage processSourceUsage(SrcUsage u, Origin origin, NubUsage parent)
return match.usage;
}

private boolean sameFamily(NubUsage u1, NubUsage u2) {
Node f1 = family(u1);
Node f2 = family(u2);
return Objects.equals(f1, f2);
}

private Node family(NubUsage u) {
return u.rank == Rank.FAMILY ? u.node : db.parent(u.node, Rank.FAMILY);
}

private void delete(NubUsage nub) {
for (int sourceId : nub.sourceIds) {
src2NubKey.remove(sourceId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,21 @@ public Node parent(Node child) {
}

/**
* @return the parent (or accepted) nodes for a given node.
* @return the parent (or accepted) nub usage for a given node that has the matching rank. Will be null if nothing found.
*/
public Node parent(Node n, Rank rank) {
for (Node pn : parents(n)) {
Rank pr = dao.readRank(pn);
if (pr == rank){
return pn;
}
}
return null;
}

/**
* @return the parent (or accepted) nodes for a given node.
*/
public List<Node> parents(Node n) {
return Iterables.asList(Traversals.PARENTS
.relationships(RelType.SYNONYM_OF, Direction.OUTGOING)
Expand Down Expand Up @@ -413,6 +426,25 @@ public NubUsageMatch findNubUsage(UUID currSource, ParsedName pn, @Nullable Rank
return NubUsageMatch.snap(synonym);
}

// all canonical, one with the same parent?
if (currNubParent != null && checked.size() >= 2) {
NubUsage sibling = null;
for (NubUsage u : checked) {
if (parent(u.node).equals(currNubParent.node)) {
if (sibling == null){
sibling = u;
} else {
// multiple found!
sibling = null;
break;
}
}
}
if (sibling != null) {
return NubUsageMatch.match(sibling);
}
}

// try to do better authorship matching, remove canonical matches
if (qualifiedName) {
iter = checked.iterator();
Expand Down Expand Up @@ -454,8 +486,10 @@ public NubUsageMatch findNubUsage(UUID currSource, ParsedName pn, @Nullable Rank
if (curr != null) {
curr.issues.add(NameUsageIssue.HOMONYM);
curr.addRemark("Homonym known in other sources: " + pn.getScientificName());
LOG.warn("{} ambigous homonyms encountered for {} in source {}, picking largest taxon", checked.size(), pn.getScientificName(), currSource);
LOG.warn("{} ambiguous homonyms encountered for {} in source {}, picking largest taxon", checked.size(), pn.getScientificName(), currSource);
return NubUsageMatch.snap(curr);
} else if (checked.isEmpty()) {
return NubUsageMatch.empty();
}

throw new IgnoreSourceUsageException("homonym " + pn.getScientificName(), pn.getScientificName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2171,6 +2171,18 @@ public void redundantHyla() throws Exception {
assertTree("184 185.txt");
}

/**
* https://github.com/gbif/checklistbank/issues/273
*/
@Test
public void implicitGenera() throws Exception {
ClasspathSourceList src = ClasspathSourceList.source(neoRepo.cfg, 186);
src.setSourceRank(186, Rank.KINGDOM);
src.setSupragenericHomonymSource(186);
build(src);
assertTree("186.txt");
}

/**
* For profiling memory usage of nub builds
*/
Expand Down
33 changes: 33 additions & 0 deletions checklistbank-cli/src/test/resources/nub-sources/dataset186.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
1 UNRANKED ACCEPTED Biota
2 1 KINGDOM ACCEPTED Animalia
3 2 PHYLUM ACCEPTED Chordata
4 3 SUBPHYLUM ACCEPTED Vertebrata
5 4 INFRAPHYLUM ACCEPTED Gnathostomata
6 5 UNRANKED ACCEPTED Osteichthyes
7 6 UNRANKED ACCEPTED Sarcopterygii
8 7 UNRANKED ACCEPTED Tetrapoda
9 8 CLASS ACCEPTED Amphibia
10 9 ORDER ACCEPTED Anura
11 10 FAMILY ACCEPTED Hylidae Rafinesque, 1815
12 11 GENUS ACCEPTED Hyla
20 12 SPECIES ACCEPTED Hyla imitator (Barbour & Dunn, 1921)
21 12 SPECIES ACCEPTED Hyla nicefori (Cochran & Goin, 1970)
13 11 SUBFAMILY ACCEPTED Hylinae Rafinesque, 1815
14 13 GENUS ACCEPTED Hyla Laurenti, 1768
22 14 SPECIES ACCEPTED Hyla annectans (Jerdon, 1870)
23 14 SPECIES ACCEPTED Hyla arborea (Linnaeus, 1758)
24 14 SPECIES ACCEPTED Hyla chinensis Günther, 1858
25 14 SPECIES ACCEPTED Hyla hallowellii Thompson, 1912
30 10 SUPERFAMILY ACCEPTED Brachycephaloidea Günther, 1858
31 30 FAMILY ACCEPTED Brachycephalidae Günther, 1858
32 31 GENUS ACCEPTED Brachycephalus Fitzinger, 1826
37 32 SPECIES ACCEPTED Brachycephalus germanicus Döring, 2023
33 31 GENUS ACCEPTED Ischnocnema Reinhardt & Lütken, 1862
34 33 SPECIES ACCEPTED Ischnocnema alpina Döring, 2023
35 31 GENUS ACCEPTED Ischnocnema
36 35 SPECIES ACCEPTED Ischnocnema vulgaris Döring, 2023
40 10 FAMILY ACCEPTED Inventedidae Döring, 2023
41 40 GENUS ACCEPTED Inventeda Döring, 2023
42 41 SPECIES ACCEPTED Inventeda pulchra Döring, 2023
43 40 GENUS ACCEPTED Ischnocnema
44 43 SPECIES ACCEPTED Ischnocnema viatrix Döring, 2023
5 changes: 2 additions & 3 deletions checklistbank-cli/src/test/resources/trees/184 185.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ Animalia [kingdom]
Amphibia [class]
Anura [order]
Hylidae [family]
Hyla [genus]
Hyla imitator (Barbour & Dunn, 1921) [species]
Hyla nicefori (Cochran & Goin, 1970) [species]
Hyla Laurenti, 1768 [genus]
Hyla annectans (Jerdon, 1870) [species]
Hyla arborea (Linnaeus, 1758) [species]
Hyla chinensis Günther, 1858 [species]
Hyla hallowellii Thompson, 1912 [species]
Hyla imitator (Barbour & Dunn, 1921) [species]
Hyla nicefori (Cochran & Goin, 1970) [species]
Archaea [kingdom]
Bacteria [kingdom]
Chromista [kingdom]
Expand Down
31 changes: 31 additions & 0 deletions checklistbank-cli/src/test/resources/trees/186.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Animalia [kingdom]
Chordata [phylum]
Amphibia [class]
Anura [order]
Brachycephalidae [family]
Brachycephalus Fitzinger, 1826 [genus]
Brachycephalus germanicus Döring, 2023 [species]
Ischnocnema Reinhardt & Lütken, 1862 [genus]
Ischnocnema alpina Döring, 2023 [species]
Ischnocnema vulgaris Döring, 2023 [species]
Hylidae [family]
Hyla Laurenti, 1768 [genus]
Hyla annectans (Jerdon, 1870) [species]
Hyla arborea (Linnaeus, 1758) [species]
Hyla chinensis Günther, 1858 [species]
Hyla hallowellii Thompson, 1912 [species]
Hyla imitator (Barbour & Dunn, 1921) [species]
Hyla nicefori (Cochran & Goin, 1970) [species]
Inventedidae [family]
Inventeda Döring, 2023 [genus]
Inventeda pulchra Döring, 2023 [species]
Ischnocnema [genus]
Ischnocnema viatrix Döring, 2023 [species]
Archaea [kingdom]
Bacteria [kingdom]
Chromista [kingdom]
Fungi [kingdom]
Plantae [kingdom]
Protozoa [kingdom]
Viruses [kingdom]
incertae sedis [kingdom]

0 comments on commit e8a2751

Please sign in to comment.