Skip to content

Commit

Permalink
Merge pull request UCDenver-ccp#6 from bill-baumgartner/master
Browse files Browse the repository at this point in the history
Revisions to handle case inconsistency for oboInOwl namespace.
  • Loading branch information
sinistral committed Apr 26, 2016
2 parents 3c1001f + fe1b50b commit 7de3735
Show file tree
Hide file tree
Showing 4 changed files with 887 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,15 @@
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.model.OWLProperty;

import owltools.graph.OWLGraphWrapper;

public class OntologyUtil {

private static final String INVALID_OBO_IN_OWL_NAMESPACE = "http://www.geneontology.org/formats/oboInOWL#";
private static final String NAMESPACE_PROP = "<http://purl.obolibrary.org/obo/namespace>";
private static final String NAMESPACE_PROP_ALT = "<http://www.geneontology.org/formats/oboInOwl#hasOBONamespace>";
private static final Logger logger = Logger.getLogger(OntologyUtil.class);
private static final String EXACT_SYN_PROP = "<http://www.geneontology.org/formats/oboInOwl#hasExactSynonym>";
private static final String EXACT_SYN_PROP_ALT = "<http://purl.obolibrary.org/obo/exact_synonym>";
Expand Down Expand Up @@ -146,11 +150,37 @@ public String getLabel(OWLClass cls) {
return null;
}

/**
* This method was composed in response to the following issue:
* https://github.com/UCDenver-ccp/datasource/issues/5
*
* The user uncovered an inconsistency in the oboInOwl namespace returned by
* the OWL API OBO parser. The inconsistency involves the capitalization of
* "OWL" in oboInOWL. The OBO parsers uses
* http://www.geneontology.org/formats/oboInOWL# whereas the namespace
* appears as http://www.geneontology.org/formats/oboInOwl# in OWL files in
* the wild. This method swaps out the oboInOWL for oboInOwl when it is
* observed.
*
* @param annotation
* @return the {@link OWLProperty} IRI for the input {@link OWLAnnotation}.
* If the invalid version of the oboInOwl namespace is detected
* (used by the OWL API OBO parser), it is replaced with the valid
* version which differs only in capitalization.
*/
public static String getAnnotationPropertyUri(OWLAnnotation annotation) {
String propertyUri = annotation.getProperty().toString();
if (propertyUri.startsWith("<" + INVALID_OBO_IN_OWL_NAMESPACE)) {
propertyUri = propertyUri.replaceFirst("oboInOWL", "oboInOwl");
}
return propertyUri;
}

public Set<String> getSynonyms(OWLClass cls, SynonymType synType) {
Set<String> synonyms = new HashSet<String>();
Set<OWLAnnotation> annotations = cls.getAnnotations(ont);
for (OWLAnnotation annotation : annotations) {
String property = annotation.getProperty().toString();
String property = getAnnotationPropertyUri(annotation);
if ((synType == SynonymType.EXACT || synType == SynonymType.ALL)
&& (property.equals(EXACT_SYN_PROP) || property.equals(EXACT_SYN_PROP_ALT))) {
String s = annotation.getValue().toString();
Expand Down Expand Up @@ -203,8 +233,8 @@ public Set<String> getSynonyms(OWLClass cls, SynonymType synType) {
public String getNamespace(OWLClass cls) {
Set<OWLAnnotation> annotations = cls.getAnnotations(ont);
for (OWLAnnotation annotation : annotations) {
if (annotation.getProperty().toString()
.equals("<http://www.geneontology.org/formats/oboInOwl#hasOBONamespace>")) {
String propertyUri = getAnnotationPropertyUri(annotation);
if (propertyUri.equals(NAMESPACE_PROP_ALT) || propertyUri.equals(NAMESPACE_PROP)) {
String s = annotation.getValue().toString();
s = StringUtils.removePrefix(s, "\"");
s = StringUtils.removeSuffix(s, "\"^^xsd:string");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,17 @@

import java.io.File;
import java.io.IOException;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.spi.LoggingEvent;
import org.junit.Before;
import org.junit.Test;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;

import edu.ucdenver.ccp.common.collections.CollectionsUtil;
Expand All @@ -57,13 +65,24 @@
public class OntologyUtilTest extends DefaultTestCase {

private static final String SAMPLE_OBO_FILE_NAME = "sample.obo";
private static final String SAMPLE_NCBITAXON_OBO_FILE_NAME = "sample.ncbitaxon.obo";
private static final String SAMPLE_NCBITAXON_OWL_FILE_NAME = "sample.ncbitaxon.owl";
private OntologyUtil ontUtil;
private File sampleNcbiTaxonOboFile;
private File sampleNcbiTaxonOwlFile;

@Before
public void setUp() throws IOException, OWLOntologyCreationException {
File sampleOboFile = folder.newFile("sample.obo");
ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_OBO_FILE_NAME, sampleOboFile);
ontUtil = new OntologyUtil(sampleOboFile);

sampleNcbiTaxonOboFile = folder.newFile("sample.ncbitaxon.obo");
ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_NCBITAXON_OBO_FILE_NAME, sampleNcbiTaxonOboFile);

sampleNcbiTaxonOwlFile = folder.newFile("sample.ncbitaxon.owl");
ClassPathUtil.copyClasspathResourceToFile(getClass(), SAMPLE_NCBITAXON_OWL_FILE_NAME, sampleNcbiTaxonOwlFile);

}

@Test
Expand Down Expand Up @@ -116,4 +135,91 @@ public void testGetSynonyms() {
ontUtil.getOWLClassFromId("PR:000002012"), SynonymType.ALL));
}

@Test
public void testNcbiTaxonOboFile() throws OWLOntologyCreationException, IOException {
testSampleOntologyFile(sampleNcbiTaxonOboFile, 8, "ncbi_taxonomy");
}

@Test
public void testNcbiTaxonOwlFile() throws OWLOntologyCreationException, IOException {
testSampleOntologyFile(sampleNcbiTaxonOwlFile, 8, "ncbi_taxonomy");
}

/**
* This test was written in response to
* https://github.com/UCDenver-ccp/datasource/issues/5
*
* The user reported an "unhandled synonym type" error when processing the
* NCBI Taxonomy ontology. This error stems from an inconsistency in the OWL
* API when processing OBO files vs. OWL files. Specifically, the oboInOwl
* namespace when parsing an OBO file is set to:
* http://www.geneontology.org/formats/oboInOWL# whereas in OWL files the
* following is used: http://www.geneontology.org/formats/oboInOwl#. Note
* the difference in capitalization, oboInOWL vs. oboInOwl. The error
* appears when retrieving synonyms for a concept and the oboInOwl namespace
* is used for the various synonym types (related, exact, broad, narrow,
* etc.) It also appears when retrieving the namespace of a concept.
*
* This test processes a sample ontology file and exercises the synonym
* retrieval code. If an "unhandled synonym type" error is logged, the test
* fails.
*
* This test also checks to make sure the returned namespace is as expected.
*
* @param ontFile
* @param expectedClassCount
* @throws OWLOntologyCreationException
* @throws IOException
*/
private static void testSampleOntologyFile(File ontFile, int expectedClassCount, String expectedNamespace)
throws OWLOntologyCreationException, IOException {
final TestAppender appender = new TestAppender();
final Logger logger = Logger.getLogger(OntologyUtil.class);
logger.addAppender(appender);

OntologyUtil ontUtil = new OntologyUtil(ontFile);
int count = 0;
for (Iterator<OWLClass> classIterator = ontUtil.getClassIterator(); classIterator.hasNext();) {
count++;
OWLClass owlCls = classIterator.next();
ontUtil.getSynonyms(owlCls, SynonymType.RELATED);
assertEquals(expectedNamespace, ontUtil.getNamespace(owlCls));
}
ontUtil.close();
assertEquals(expectedClassCount, count);

/* ensure there were no errors logged */
final List<LoggingEvent> logList = appender.getLog();
for (LoggingEvent log : logList) {
assertFalse("An error was logged: " + log.getMessage().toString(), log.getLevel().equals(Level.ERROR));
}
}

/**
* from:
* http://stackoverflow.com/questions/1827677/how-to-do-a-junit-assert-
* on-a-message-in-a-logger
*/
private static class TestAppender extends AppenderSkeleton {
private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();

@Override
public boolean requiresLayout() {
return false;
}

@Override
protected void append(final LoggingEvent loggingEvent) {
log.add(loggingEvent);
}

@Override
public void close() {
}

public List<LoggingEvent> getLog() {
return new ArrayList<LoggingEvent>(log);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
format-version: 1.2
data-version: 2016-02-02
synonymtypedef: acronym "acronym"
synonymtypedef: anamorph "anamorph"
synonymtypedef: blast_name "blast name"
synonymtypedef: common_name "common name"
synonymtypedef: equivalent_name "equivalent name"
synonymtypedef: genbank_acronym "genbank acronym"
synonymtypedef: genbank_anamorph "genbank anamorph"
synonymtypedef: genbank_common_name "genbank common name"
synonymtypedef: genbank_synonym "genbank synonym"
synonymtypedef: in_part "in-part"
synonymtypedef: misnomer "misnomer"
synonymtypedef: misspelling "misspelling"
synonymtypedef: scientific_name "scientific name"
synonymtypedef: synonym "synonym"
synonymtypedef: teleomorph "teleomorph"
remark: Autogenerated by OWLTools-NCBIConverter.
ontology: ncbitaxon.sample

[Term]
id: NCBITaxon:1
name: root
namespace: ncbi_taxonomy
synonym: "all" RELATED synonym []
xref: GC_ID:1

[Term]
id: NCBITaxon:10
name: Cellvibrio
namespace: ncbi_taxonomy
synonym: "\"Cellvibrio\" Winogradsky 1929" RELATED synonym []
synonym: "Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Humphry et al. 2003" RELATED synonym []
synonym: "Cellvibrio (ex Winogradsky 1929) Blackall et al. 1986 emend. Suarez et al. 2014" RELATED synonym []
xref: GC_ID:11
xref: PMID:12710603
xref: PMID:24105943
is_a: NCBITaxon:1706371 ! Cellvibrionaceae
property_value: has_rank NCBITaxon:genus

[Term]
id: NCBITaxon:1706371
name: Cellvibrionaceae
namespace: ncbi_taxonomy
xref: GC_ID:11
xref: PMID:25914684
is_a: NCBITaxon:1706369 ! Cellvibrionales
property_value: has_rank NCBITaxon:family

[Term]
id: NCBITaxon:1706369
name: Cellvibrionales
namespace: ncbi_taxonomy
xref: GC_ID:11
xref: PMID:25914684
is_a: NCBITaxon:1236 ! Gammaproteobacteria
property_value: has_rank NCBITaxon:order

[Term]
id: NCBITaxon:1236
name: Gammaproteobacteria
namespace: ncbi_taxonomy
synonym: "g-proteobacteria" RELATED blast_name []
synonym: "gamma proteobacteria" RELATED synonym []
synonym: "gamma subdivision" RELATED synonym []
synonym: "gamma subgroup" RELATED synonym []
synonym: "Gammaproteobacteria Garrity et al. 2005" RELATED synonym []
synonym: "Proteobacteria gamma subdivision" RELATED synonym []
synonym: "Purple bacteria, gamma subdivision" RELATED synonym []
xref: GC_ID:11
xref: PMID:16280474
is_a: NCBITaxon:1224 ! Proteobacteria
property_value: has_rank NCBITaxon:class

[Term]
id: NCBITaxon:1224
name: Proteobacteria
namespace: ncbi_taxonomy
synonym: "Alphaproteobacteraeota" RELATED synonym []
synonym: "proteobacteria" RELATED blast_name []
synonym: "purple bacteria" EXACT common_name []
synonym: "purple bacteria and relatives" EXACT common_name []
synonym: "purple non-sulfur bacteria" EXACT common_name []
synonym: "purple photosynthetic bacteria" EXACT common_name []
synonym: "purple photosynthetic bacteria and relatives" EXACT common_name []
xref: GC_ID:11
xref: PMID:11321122
xref: PMID:11542017
xref: PMID:11837318
xref: PMID:16280474
xref: PMID:26654112
is_a: NCBITaxon:2 ! Bacteria <prokaryote>
property_value: has_rank NCBITaxon:phylum

id: NCBITaxon:2
name: Bacteria <prokaryote>
namespace: ncbi_taxonomy
synonym: "Bacteria" EXACT scientific_name []
synonym: "bacteria" RELATED blast_name []
synonym: "eubacteria" EXACT genbank_common_name []
synonym: "Monera" RELATED in_part []
synonym: "not Bacteria Haeckel 1894" RELATED synonym []
synonym: "Procaryotae" RELATED in_part []
synonym: "Prokaryota" RELATED in_part []
synonym: "Prokaryotae" RELATED in_part []
synonym: "prokaryote" RELATED in_part []
synonym: "prokaryotes" RELATED in_part []
xref: GC_ID:11
xref: PMID:10425795
xref: PMID:10425796
xref: PMID:10425797
xref: PMID:10490293
xref: PMID:10843050
xref: PMID:10939651
xref: PMID:10939673
xref: PMID:10939677
xref: PMID:11211268
xref: PMID:11321083
xref: PMID:11321113
xref: PMID:11411719
xref: PMID:11540071
xref: PMID:11542017
xref: PMID:11542087
xref: PMID:11760965
xref: PMID:12054223
xref: PMID:2112744
xref: PMID:270744
xref: PMID:270744
xref: PMID:8123559
xref: PMID:8590690
xref: PMID:9103655
xref: PMID:9336922
is_a: NCBITaxon:131567 ! cellular organisms
property_value: has_rank NCBITaxon:superkingdom


[Term]
id: NCBITaxon:131567
name: cellular organisms
namespace: ncbi_taxonomy
synonym: "biota" RELATED synonym []
xref: GC_ID:1
is_a: NCBITaxon:1 ! root
Loading

0 comments on commit 7de3735

Please sign in to comment.