Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scigraph support to the clinical-text-analysis-extension #13

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<parent>
<groupId>org.phenotips</groupId>
<artifactId>clinical-text-analysis-extension</artifactId>
<version>1.0-SNAPSHOT</version>
<version>1.3-SNAPSHOT</version>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't right. This is a separate module, so it doesn't use the same versioning as the main project.

</parent>
<artifactId>clinical-text-analysis-extension-api</artifactId>
<name>PhenoTips - Clinical Text Analysis - Java API</name>
Expand Down Expand Up @@ -104,4 +104,7 @@
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<coverage.instructionRatio>0.16</coverage.instructionRatio>
</properties>
</project>
61 changes: 60 additions & 1 deletion api/src/main/java/org/phenotips/textanalysis/TermAnnotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,29 @@
* @version $Id$
* @since 1.0M1
*/
public class TermAnnotation
public class TermAnnotation implements Comparable<TermAnnotation>
{
private final long mStartPos;

private final long mEndPos;

private final VocabularyTerm mTerm;

/**
* The sentence this term annotation appears in.
*/
private String sentence;

/**
* The start of this annotation within the sentence.
*/
private long startInSentence;

/**
* The end of this annotation within the sentence.
*/
private long endInSentence;

/**
* Constructs an annotation for a an ontology term using it's start and end positions within the text.
*
Expand Down Expand Up @@ -71,6 +86,50 @@ public VocabularyTerm getTerm()
return this.mTerm;
}

/**
* @return the sentence in which the term occurs.
*/
public String getSentence()
{
return sentence;
}

/**
* @return the position within the sentence where the term starts
*/
public long getStartInSentence()
{
return startInSentence;
}

/**
* @return the position within the sentence where the term ends
*/
public long getEndInSentence()
{
return endInSentence;
}

/**
* Set the sentence that this term appears in.
* @param sentence the sentence.
* @param startInSentence the position within the sentence where the term starts
* @param endInSentence the position within the sentence where the term ends
*/
public void setSentence(String sentence, long startInSentence, long endInSentence)
{
this.sentence = sentence.trim();
this.startInSentence = startInSentence;
this.endInSentence = endInSentence;
}

@Override
public int compareTo(TermAnnotation other)
{
/* TODO: Casting. Hopefully they're not that far off that we overflow... */
return (int) (this.getStartPos() - other.getStartPos());
}

@Override
public int hashCode()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public interface TermAnnotationService
* @version $Id$
* @since 1.0M1
*/
public class AnnotationException extends Exception
class AnnotationException extends Exception
{
/**
* Constructs a new AnnotationException with the specified detail message.
Expand All @@ -56,5 +56,16 @@ public AnnotationException(String message)
{
super(message);
}

/**
* Constructs a new AnnotationException with the specified detail message and cause.
*
* @param message the detail message
* @param cause the cause
*/
public AnnotationException(String message, Exception cause)
{
super(message, cause);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/
*/
package org.phenotips.textanalysis.internal;

import org.phenotips.textanalysis.TermAnnotation;

import java.text.BreakIterator;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

/**
* Takes in lists of term annotations, as well as the text they appear in and
* assigns sentences to them from the text.
*
* @version $Id$
*/
public class TermAnnotationSentenceDetector
{
/**
* Attaches sentences to the term annotations given.
* @param annotations the annotations
* @param text the text where the annotations appear
*/
public void detectSentences(List<TermAnnotation> annotations, String text)
{
BreakIterator sentences = BreakIterator.getSentenceInstance(Locale.US);
sentences.setText(text);
Collections.sort(annotations);
int currentAnnotation = 0;
int currentSentence = 0;
while (currentSentence != BreakIterator.DONE && currentAnnotation < annotations.size()) {
TermAnnotation annotation = annotations.get(currentAnnotation);
int nextSentence = sentences.next();
/* next() pushes the iterator forward, so bring it back */
sentences.previous();
/* Does this annotation fall within the current sentence? */
if (annotation.getStartPos() >= currentSentence && annotation.getStartPos() < nextSentence) {
long start = annotation.getStartPos() - currentSentence;
long end = annotation.getEndPos() - currentSentence;
String sentence;
if (annotation.getEndPos() <= nextSentence) {
/* Yay, straightforward! */
sentence = text.substring(currentSentence, nextSentence);
} else {
/* Uh-oh, cross sentence annotation */
int crossSentenceEnd = sentences.following((int) annotation.getEndPos());
sentence = text.substring(currentSentence, crossSentenceEnd);
/* Rewind the iterator */
sentences.preceding(currentSentence + 1);
}
annotation.setSentence(sentence, start, end);
currentAnnotation++;
} else {
currentSentence = sentences.next();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.phenotips.textanalysis.TermAnnotation;
import org.phenotips.textanalysis.TermAnnotationService;
import org.phenotips.textanalysis.TermAnnotationService.AnnotationException;
import org.phenotips.textanalysis.internal.TermAnnotationSentenceDetector;

import org.xwiki.component.annotation.Component;
import org.xwiki.script.service.ScriptService;
Expand All @@ -43,7 +44,7 @@
public class TermAnnotationScriptService implements ScriptService
{
@Inject
@Named("biolark")
@Named("genericREST")
private TermAnnotationService service;

/**
Expand All @@ -55,7 +56,10 @@ public class TermAnnotationScriptService implements ScriptService
public List<TermAnnotation> get(String text)
{
try {
return this.service.annotate(text);
List<TermAnnotation> retval = this.service.annotate(text);
TermAnnotationSentenceDetector detector = new TermAnnotationSentenceDetector();
detector.detectSentences(retval, text);
return retval;
} catch (AnnotationException e) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/
*/
package org.phenotips.textanalysis.internal;

import org.phenotips.textanalysis.TermAnnotation;


import org.junit.Test;
import org.phenotips.vocabulary.VocabularyTerm;

import java.util.List;
import java.util.ArrayList;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.junit.Assert.assertEquals;


public class TermAnnotationSentenceDetectorTest
{
private TermAnnotationSentenceDetector client;

/* This is where you miss heredocs. */
private static final String TEXT = "Now is the winter of our discontent " +
"Made glorious summer by this son of York; " +
"And all the clouds that lowered upon our house " +
"In the deep bosom of the ocean buried. " +
"Now are our brows bound with victorious wreaths, " +
"Our bruised arms hung up for monuments, " +
"Our stern alarums changed to merry meetings, " +
"Our dreadful marches to delightful measures. " +
"Grim-visaged war hath smoothed his wrinkled front, " +
"And now, instead of mounting barbed steeds " +
"To fright the souls of fearful adversaries, " +
"He capers nimbly in a lady's chamber " +
"To the lascivious pleasing of a lute.";

private static final String SENTENCE1;

private static final String SENTENCE2;

private static final String SENTENCE3;

static {
SENTENCE1 = TEXT.substring(0, 163);
/* 163 is just whitespace, skip it. */
SENTENCE2 = TEXT.substring(164, 342);
SENTENCE3 = TEXT.substring(343);
}

/**
* Test that we're able to have one annotation per sentence.
*/
@Test
public void testBasic()
{
client = new TermAnnotationSentenceDetector();

List<TermAnnotation> annotations = new ArrayList<>(3);
VocabularyTerm winterTerm = mock(VocabularyTerm.class);
when(winterTerm.getId()).thenReturn("winter");
TermAnnotation winter = new TermAnnotation(11, 17, winterTerm);
annotations.add(winter);

VocabularyTerm monumentsTerm = mock(VocabularyTerm.class);
when(monumentsTerm.getId()).thenReturn("monuments");
TermAnnotation monuments = new TermAnnotation(242, 251, monumentsTerm);
annotations.add(monuments);

VocabularyTerm nimblyTerm = mock(VocabularyTerm.class);
when(nimblyTerm.getId()).thenReturn("nimbly");
TermAnnotation nimbly = new TermAnnotation(491, 497, nimblyTerm);
annotations.add(nimbly);

client.detectSentences(annotations, TEXT);

assertEquals("winter",
winter.getSentence().
substring((int) winter.getStartInSentence(), (int) winter.getEndInSentence()));
assertEquals(SENTENCE1, winter.getSentence());
assertEquals(11, winter.getStartInSentence());
assertEquals(17, winter.getEndInSentence());

assertEquals("monuments",
monuments.getSentence().
substring((int) monuments.getStartInSentence(), (int) monuments.getEndInSentence()));
assertEquals(SENTENCE2, monuments.getSentence());
assertEquals(78, monuments.getStartInSentence());
assertEquals(87, monuments.getEndInSentence());

assertEquals("nimbly",
nimbly.getSentence().
substring((int) nimbly.getStartInSentence(), (int) nimbly.getEndInSentence()));
assertEquals(SENTENCE3, nimbly.getSentence());
assertEquals(148, nimbly.getStartInSentence());
assertEquals(154, nimbly.getEndInSentence());
}
}
6 changes: 1 addition & 5 deletions biolark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<classifier>jdk15</classifier>
<version>2.3</version>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, the code should be updated to use the new org.json library instead of json-lib.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm after looking at it, I don't think the code actually uses json-lib at all, so we could probably take off that dependency.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mvn dependency:analyze

</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down Expand Up @@ -200,11 +201,6 @@
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>49.1</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
Expand Down
Loading