diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtil.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtil.java index e33d210..66f6feb 100644 --- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtil.java +++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtil.java @@ -48,11 +48,15 @@ import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLProperty; import owltools.graph.OWLGraphWrapper; public class OntologyUtil { + private static final String INVALID_OBO_IN_OWL_NAMESPACE = "http://www.geneontology.org/formats/oboInOWL#"; + private static final String NAMESPACE_PROP = ""; + private static final String NAMESPACE_PROP_ALT = ""; private static final Logger logger = Logger.getLogger(OntologyUtil.class); private static final String EXACT_SYN_PROP = ""; private static final String EXACT_SYN_PROP_ALT = ""; @@ -146,11 +150,37 @@ public String getLabel(OWLClass cls) { return null; } + /** + * This method was composed in response to the following issue: + * https://github.com/UCDenver-ccp/datasource/issues/5 + * + * The user uncovered an inconsistency in the oboInOwl namespace returned by + * the OWL API OBO parser. The inconsistency involves the capitalization of + * "OWL" in oboInOWL. The OBO parsers uses + * http://www.geneontology.org/formats/oboInOWL# whereas the namespace + * appears as http://www.geneontology.org/formats/oboInOwl# in OWL files in + * the wild. This method swaps out the oboInOWL for oboInOwl when it is + * observed. + * + * @param annotation + * @return the {@link OWLProperty} IRI for the input {@link OWLAnnotation}. + * If the invalid version of the oboInOwl namespace is detected + * (used by the OWL API OBO parser), it is replaced with the valid + * version which differs only in capitalization. + */ + public static String getAnnotationPropertyUri(OWLAnnotation annotation) { + String propertyUri = annotation.getProperty().toString(); + if (propertyUri.startsWith("<" + INVALID_OBO_IN_OWL_NAMESPACE)) { + propertyUri = propertyUri.replaceFirst("oboInOWL", "oboInOwl"); + } + return propertyUri; + } + public Set getSynonyms(OWLClass cls, SynonymType synType) { Set synonyms = new HashSet(); Set annotations = cls.getAnnotations(ont); for (OWLAnnotation annotation : annotations) { - String property = annotation.getProperty().toString(); + String property = getAnnotationPropertyUri(annotation); if ((synType == SynonymType.EXACT || synType == SynonymType.ALL) && (property.equals(EXACT_SYN_PROP) || property.equals(EXACT_SYN_PROP_ALT))) { String s = annotation.getValue().toString(); @@ -203,8 +233,8 @@ public Set getSynonyms(OWLClass cls, SynonymType synType) { public String getNamespace(OWLClass cls) { Set annotations = cls.getAnnotations(ont); for (OWLAnnotation annotation : annotations) { - if (annotation.getProperty().toString() - .equals("")) { + String propertyUri = getAnnotationPropertyUri(annotation); + if (propertyUri.equals(NAMESPACE_PROP_ALT) || propertyUri.equals(NAMESPACE_PROP)) { String s = annotation.getValue().toString(); s = StringUtils.removePrefix(s, "\""); s = StringUtils.removeSuffix(s, "\"^^xsd:string"); diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtilTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtilTest.java index 27f56ea..103e3be 100644 --- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtilTest.java +++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/obo/OntologyUtilTest.java @@ -39,9 +39,17 @@ import java.io.File; import java.io.IOException; - +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.spi.LoggingEvent; import org.junit.Before; import org.junit.Test; +import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import edu.ucdenver.ccp.common.collections.CollectionsUtil; @@ -57,13 +65,24 @@ public class OntologyUtilTest extends DefaultTestCase { private static final String SAMPLE_OBO_FILE_NAME = "sample.obo"; + private static final String SAMPLE_NCBITAXON_OBO_FILE_NAME = "sample.ncbitaxon.obo"; + private static final String SAMPLE_NCBITAXON_OWL_FILE_NAME = "sample.ncbitaxon.owl"; private OntologyUtil ontUtil; + private File sampleNcbiTaxonOboFile; + private File sampleNcbiTaxonOwlFile; @Before public void setUp() throws IOException, OWLOntologyCreationException { File sampleOboFile = folder.newFile("sample.obo"); ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_OBO_FILE_NAME, sampleOboFile); ontUtil = new OntologyUtil(sampleOboFile); + + sampleNcbiTaxonOboFile = folder.newFile("sample.ncbitaxon.obo"); + ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_NCBITAXON_OBO_FILE_NAME, sampleNcbiTaxonOboFile); + + sampleNcbiTaxonOwlFile = folder.newFile("sample.ncbitaxon.owl"); + ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_NCBITAXON_OWL_FILE_NAME, sampleNcbiTaxonOwlFile); + } @Test @@ -116,4 +135,91 @@ public void testGetSynonyms() { ontUtil.getOWLClassFromId("PR:000002012"), SynonymType.ALL)); } + @Test + public void testNcbiTaxonOboFile() throws OWLOntologyCreationException, IOException { + testSampleOntologyFile(sampleNcbiTaxonOboFile, 8, "ncbi_taxonomy"); + } + + @Test + public void testNcbiTaxonOwlFile() throws OWLOntologyCreationException, IOException { + testSampleOntologyFile(sampleNcbiTaxonOwlFile, 8, "ncbi_taxonomy"); + } + + /** + * This test was written in response to + * https://github.com/UCDenver-ccp/datasource/issues/5 + * + * The user reported an "unhandled synonym type" error when processing the + * NCBI Taxonomy ontology. This error stems from an inconsistency in the OWL + * API when processing OBO files vs. OWL files. Specifically, the oboInOwl + * namespace when parsing an OBO file is set to: + * http://www.geneontology.org/formats/oboInOWL# whereas in OWL files the + * following is used: http://www.geneontology.org/formats/oboInOwl#. Note + * the difference in capitalization, oboInOWL vs. oboInOwl. The error + * appears when retrieving synonyms for a concept and the oboInOwl namespace + * is used for the various synonym types (related, exact, broad, narrow, + * etc.) It also appears when retrieving the namespace of a concept. + * + * This test processes a sample ontology file and exercises the synonym + * retrieval code. If an "unhandled synonym type" error is logged, the test + * fails. + * + * This test also checks to make sure the returned namespace is as expected. + * + * @param ontFile + * @param expectedClassCount + * @throws OWLOntologyCreationException + * @throws IOException + */ + private static void testSampleOntologyFile(File ontFile, int expectedClassCount, String expectedNamespace) + throws OWLOntologyCreationException, IOException { + final TestAppender appender = new TestAppender(); + final Logger logger = Logger.getLogger(OntologyUtil.class); + logger.addAppender(appender); + + OntologyUtil ontUtil = new OntologyUtil(ontFile); + int count = 0; + for (Iterator classIterator = ontUtil.getClassIterator(); classIterator.hasNext();) { + count++; + OWLClass owlCls = classIterator.next(); + ontUtil.getSynonyms(owlCls, SynonymType.RELATED); + assertEquals(expectedNamespace, ontUtil.getNamespace(owlCls)); + } + ontUtil.close(); + assertEquals(expectedClassCount, count); + + /* ensure there were no errors logged */ + final List logList = appender.getLog(); + for (LoggingEvent log : logList) { + assertFalse("An error was logged: " + log.getMessage().toString(), log.getLevel().equals(Level.ERROR)); + } + } + + /** + * from: + * http://stackoverflow.com/questions/1827677/how-to-do-a-junit-assert- + * on-a-message-in-a-logger + */ + private static class TestAppender extends AppenderSkeleton { + private final List log = new ArrayList(); + + @Override + public boolean requiresLayout() { + return false; + } + + @Override + protected void append(final LoggingEvent loggingEvent) { + log.add(loggingEvent); + } + + @Override + public void close() { + } + + public List getLog() { + return new ArrayList(log); + } + } + } diff --git a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.obo b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.obo new file mode 100644 index 0000000..9ad1668 --- /dev/null +++ b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.obo @@ -0,0 +1,143 @@ +format-version: 1.2 +data-version: 2016-02-02 +synonymtypedef: acronym "acronym" +synonymtypedef: anamorph "anamorph" +synonymtypedef: blast_name "blast name" +synonymtypedef: common_name "common name" +synonymtypedef: equivalent_name "equivalent name" +synonymtypedef: genbank_acronym "genbank acronym" +synonymtypedef: genbank_anamorph "genbank anamorph" +synonymtypedef: genbank_common_name "genbank common name" +synonymtypedef: genbank_synonym "genbank synonym" +synonymtypedef: in_part "in-part" +synonymtypedef: misnomer "misnomer" +synonymtypedef: misspelling "misspelling" +synonymtypedef: scientific_name "scientific name" +synonymtypedef: synonym "synonym" +synonymtypedef: teleomorph "teleomorph" +remark: Autogenerated by OWLTools-NCBIConverter. +ontology: ncbitaxon.sample + +[Term] +id: NCBITaxon:1 +name: root +namespace: ncbi_taxonomy +synonym: "all" RELATED synonym [] +xref: GC_ID:1 + +[Term] +id: NCBITaxon:10 +name: Cellvibrio +namespace: ncbi_taxonomy +synonym: "\"Cellvibrio\" Winogradsky 1929" RELATED synonym [] +synonym: "Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Humphry et al. 2003" RELATED synonym [] +synonym: "Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Suarez et al. 2014" RELATED synonym [] +xref: GC_ID:11 +xref: PMID:12710603 +xref: PMID:24105943 +is_a: NCBITaxon:1706371 ! Cellvibrionaceae +property_value: has_rank NCBITaxon:genus + +[Term] +id: NCBITaxon:1706371 +name: Cellvibrionaceae +namespace: ncbi_taxonomy +xref: GC_ID:11 +xref: PMID:25914684 +is_a: NCBITaxon:1706369 ! Cellvibrionales +property_value: has_rank NCBITaxon:family + +[Term] +id: NCBITaxon:1706369 +name: Cellvibrionales +namespace: ncbi_taxonomy +xref: GC_ID:11 +xref: PMID:25914684 +is_a: NCBITaxon:1236 ! Gammaproteobacteria +property_value: has_rank NCBITaxon:order + +[Term] +id: NCBITaxon:1236 +name: Gammaproteobacteria +namespace: ncbi_taxonomy +synonym: "g-proteobacteria" RELATED blast_name [] +synonym: "gamma proteobacteria" RELATED synonym [] +synonym: "gamma subdivision" RELATED synonym [] +synonym: "gamma subgroup" RELATED synonym [] +synonym: "Gammaproteobacteria Garrity et al. 2005" RELATED synonym [] +synonym: "Proteobacteria gamma subdivision" RELATED synonym [] +synonym: "Purple bacteria, gamma subdivision" RELATED synonym [] +xref: GC_ID:11 +xref: PMID:16280474 +is_a: NCBITaxon:1224 ! Proteobacteria +property_value: has_rank NCBITaxon:class + +[Term] +id: NCBITaxon:1224 +name: Proteobacteria +namespace: ncbi_taxonomy +synonym: "Alphaproteobacteraeota" RELATED synonym [] +synonym: "proteobacteria" RELATED blast_name [] +synonym: "purple bacteria" EXACT common_name [] +synonym: "purple bacteria and relatives" EXACT common_name [] +synonym: "purple non-sulfur bacteria" EXACT common_name [] +synonym: "purple photosynthetic bacteria" EXACT common_name [] +synonym: "purple photosynthetic bacteria and relatives" EXACT common_name [] +xref: GC_ID:11 +xref: PMID:11321122 +xref: PMID:11542017 +xref: PMID:11837318 +xref: PMID:16280474 +xref: PMID:26654112 +is_a: NCBITaxon:2 ! Bacteria +property_value: has_rank NCBITaxon:phylum + +id: NCBITaxon:2 +name: Bacteria +namespace: ncbi_taxonomy +synonym: "Bacteria" EXACT scientific_name [] +synonym: "bacteria" RELATED blast_name [] +synonym: "eubacteria" EXACT genbank_common_name [] +synonym: "Monera" RELATED in_part [] +synonym: "not Bacteria Haeckel 1894" RELATED synonym [] +synonym: "Procaryotae" RELATED in_part [] +synonym: "Prokaryota" RELATED in_part [] +synonym: "Prokaryotae" RELATED in_part [] +synonym: "prokaryote" RELATED in_part [] +synonym: "prokaryotes" RELATED in_part [] +xref: GC_ID:11 +xref: PMID:10425795 +xref: PMID:10425796 +xref: PMID:10425797 +xref: PMID:10490293 +xref: PMID:10843050 +xref: PMID:10939651 +xref: PMID:10939673 +xref: PMID:10939677 +xref: PMID:11211268 +xref: PMID:11321083 +xref: PMID:11321113 +xref: PMID:11411719 +xref: PMID:11540071 +xref: PMID:11542017 +xref: PMID:11542087 +xref: PMID:11760965 +xref: PMID:12054223 +xref: PMID:2112744 +xref: PMID:270744 +xref: PMID:270744 +xref: PMID:8123559 +xref: PMID:8590690 +xref: PMID:9103655 +xref: PMID:9336922 +is_a: NCBITaxon:131567 ! cellular organisms +property_value: has_rank NCBITaxon:superkingdom + + +[Term] +id: NCBITaxon:131567 +name: cellular organisms +namespace: ncbi_taxonomy +synonym: "biota" RELATED synonym [] +xref: GC_ID:1 +is_a: NCBITaxon:1 ! root \ No newline at end of file diff --git a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.owl b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.owl new file mode 100644 index 0000000..275fc8f --- /dev/null +++ b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/obo/sample.ncbitaxon.owl @@ -0,0 +1,604 @@ + + + + Autogenerated by OWLTools-NCBIConverter. + + + + + + + + + + + + + + definition + + + + + + + + acronym + oio:hasBroadSynonym + + + + + + + + + anamorph + oio:hasRelatedSynonym + + + + + + + + + blast name + oio:hasRelatedSynonym + + + + + + + + + common name + oio:hasExactSynonym + + + + + + + + + equivalent name + oio:hasExactSynonym + + + + + + + + + genbank acronym + oio:hasBroadSynonym + + + + + + + + + genbank anamorph + oio:hasRelatedSynonym + + + + + + + + + genbank common name + oio:hasExactSynonym + + + + + + + + + genbank synonym + oio:hasRelatedSynonym + + + + + + + + + has_rank + A metadata relation between a class and its taxonomic rank (eg species, family) + This is an abstract class for use with the NCBI taxonomy to name the depth of the node within the tree. The link between the node term and the rank is only visible if you are using an obo 1.3 aware browser/editor; otherwise this can be ignored + ncbi_taxonomy + + + + + + + + in-part + oio:hasRelatedSynonym + + + + + + + + + misnomer + oio:hasRelatedSynonym + + + + + + + + + misspelling + oio:hasRelatedSynonym + + + + + + + + + scientific name + oio:hasExactSynonym + + + + + + + + + synonym + oio:hasRelatedSynonym + + + + + + + + + teleomorph + oio:hasRelatedSynonym + + + + + + + + + synonym_type_property + + + + + + + + has_alternative_id + + + + + + + + has_broad_synonym + + + + + + + + database_cross_reference + + + + + + + + has_exact_synonym + + + + + + + + has_obo_format_version + + + + + + + + has_obo_namespace + + + + + + + + has_related_synonym + + + + + + + + has_scope + + + + + + + + has_synonym_type + + + + + + + + + + + + + root + GC_ID:1 + all + ncbi_taxonomy + + + all + + + + + + + + + + + Cellvibrio + + "Cellvibrio" Winogradsky 1929 + Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Humphry et al. 2003 + Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Suarez et al. 2014 + GC_ID:11 + PMID:12710603 + PMID:24105943 + ncbi_taxonomy + + + + Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Suarez et al. 2014 + + + + + + "Cellvibrio" Winogradsky 1929 + + + + + + Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Humphry et al. 2003 + + + + + + + + + Cellvibrionaceae + + GC_ID:11 + PMID:25914684 + ncbi_taxonomy + + + + + + + Cellvibrionales + + GC_ID:11 + PMID:25914684 + ncbi_taxonomy + + + + + + + Gammaproteobacteria + + GC_ID:11 + Gammaproteobacteria Garrity et al. 2005 + PMID:16280474 + Proteobacteria gamma subdivision + Purple bacteria, gamma subdivision + g-proteobacteria + gamma proteobacteria + gamma subdivision + gamma subgroup + ncbi_taxonomy + + + + Purple bacteria, gamma subdivision + + + + + + Gammaproteobacteria Garrity et al. 2005 + + + + + + gamma proteobacteria + + + + + + Proteobacteria gamma subdivision + + + + + + gamma subdivision + + + + + + g-proteobacteria + + + + + + gamma subgroup + + + + + + + + + Proteobacteria + + Alphaproteobacteraeota + GC_ID:11 + PMID:11321122 + PMID:11542017 + PMID:11837318 + PMID:16280474 + PMID:26654112 + ncbi_taxonomy + proteobacteria + purple bacteria + purple bacteria and relatives + purple non-sulfur bacteria + purple photosynthetic bacteria + purple photosynthetic bacteria and relatives + + + + purple bacteria + + + + + + purple photosynthetic bacteria + + + + + + purple photosynthetic bacteria and relatives + + + + + + proteobacteria + + + + + + Alphaproteobacteraeota + + + + + + purple non-sulfur bacteria + + + + + + purple bacteria and relatives + + + + + + + + + Bacteria <prokaryote> + + Bacteria + GC_ID:11 + Monera + PMID:10425795 + PMID:10425796 + PMID:10425797 + PMID:10490293 + PMID:10843050 + PMID:10939651 + PMID:10939673 + PMID:10939677 + PMID:11211268 + PMID:11321083 + PMID:11321113 + PMID:11411719 + PMID:11540071 + PMID:11542017 + PMID:11542087 + PMID:11760965 + PMID:12054223 + PMID:2112744 + PMID:270744 + PMID:8123559 + PMID:8590690 + PMID:9103655 + PMID:9336922 + Procaryotae + Prokaryota + Prokaryotae + bacteria + eubacteria + ncbi_taxonomy + not Bacteria Haeckel 1894 + prokaryote + prokaryotes + + + + Monera + + + + + + bacteria + + + + + + prokaryote + + + + + + Prokaryotae + + + + + + Procaryotae + + + + + + prokaryotes + + + + + + Prokaryota + + + + + + not Bacteria Haeckel 1894 + + + + + + eubacteria + + + + + + Bacteria + + + + + + + + + cellular organisms + + GC_ID:1 + biota + ncbi_taxonomy + + + biota + + + + + + \ No newline at end of file