-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add scigraph support to the clinical-text-analysis-extension #13
Changes from 7 commits
09e64b0
d2f1f79
a113dd4
c678d4c
8713b55
9877e6a
76da891
c262725
436eb06
5fcc6e1
450df05
0e1f342
d1e85b9
25403fc
83f8dde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* See the NOTICE file distributed with this work for additional | ||
* information regarding copyright ownership. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see http://www.gnu.org/licenses/ | ||
*/ | ||
package org.phenotips.textanalysis.internal; | ||
|
||
import org.phenotips.textanalysis.TermAnnotation; | ||
|
||
import java.text.BreakIterator; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Locale; | ||
|
||
/** | ||
* Takes in lists of term annotations, as well as the text they appear in and | ||
* assigns sentences to them from the text. | ||
* | ||
* @version $Id$ | ||
*/ | ||
public class TermAnnotationSentenceDetector | ||
{ | ||
/** | ||
* Attaches sentences to the term annotations given. | ||
* @param annotations the annotations | ||
* @param text the text where the annotations appear | ||
*/ | ||
public void detectSentences(List<TermAnnotation> annotations, String text) | ||
{ | ||
BreakIterator sentences = BreakIterator.getSentenceInstance(Locale.US); | ||
sentences.setText(text); | ||
Collections.sort(annotations); | ||
int currentAnnotation = 0; | ||
int currentSentence = 0; | ||
while (currentSentence != BreakIterator.DONE && currentAnnotation < annotations.size()) { | ||
TermAnnotation annotation = annotations.get(currentAnnotation); | ||
int nextSentence = sentences.next(); | ||
/* next() pushes the iterator forward, so bring it back */ | ||
sentences.previous(); | ||
/* Does this annotation fall within the current sentence? */ | ||
if (annotation.getStartPos() >= currentSentence && annotation.getStartPos() < nextSentence) { | ||
long start = annotation.getStartPos() - currentSentence; | ||
long end = annotation.getEndPos() - currentSentence; | ||
String sentence; | ||
if (annotation.getEndPos() <= nextSentence) { | ||
/* Yay, straightforward! */ | ||
sentence = text.substring(currentSentence, nextSentence); | ||
} else { | ||
/* Uh-oh, cross sentence annotation */ | ||
int crossSentenceEnd = sentences.following((int) annotation.getEndPos()); | ||
sentence = text.substring(currentSentence, crossSentenceEnd); | ||
/* Rewind the iterator */ | ||
sentences.preceding(currentSentence + 1); | ||
} | ||
annotation.setSentence(sentence, start, end); | ||
currentAnnotation++; | ||
} else { | ||
currentSentence = sentences.next(); | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* See the NOTICE file distributed with this work for additional | ||
* information regarding copyright ownership. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see http://www.gnu.org/licenses/ | ||
*/ | ||
package org.phenotips.textanalysis.internal; | ||
|
||
import org.phenotips.textanalysis.TermAnnotation; | ||
|
||
|
||
import org.junit.Test; | ||
import org.phenotips.vocabulary.VocabularyTerm; | ||
|
||
import java.util.List; | ||
import java.util.ArrayList; | ||
|
||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
import static org.junit.Assert.assertEquals; | ||
|
||
|
||
public class TermAnnotationSentenceDetectorTest | ||
{ | ||
private TermAnnotationSentenceDetector client; | ||
|
||
/* This is where you miss heredocs. */ | ||
private static final String TEXT = "Now is the winter of our discontent " + | ||
"Made glorious summer by this son of York; " + | ||
"And all the clouds that lowered upon our house " + | ||
"In the deep bosom of the ocean buried. " + | ||
"Now are our brows bound with victorious wreaths, " + | ||
"Our bruised arms hung up for monuments, " + | ||
"Our stern alarums changed to merry meetings, " + | ||
"Our dreadful marches to delightful measures. " + | ||
"Grim-visaged war hath smoothed his wrinkled front, " + | ||
"And now, instead of mounting barbed steeds " + | ||
"To fright the souls of fearful adversaries, " + | ||
"He capers nimbly in a lady's chamber " + | ||
"To the lascivious pleasing of a lute."; | ||
|
||
private static final String SENTENCE1; | ||
|
||
private static final String SENTENCE2; | ||
|
||
private static final String SENTENCE3; | ||
|
||
static { | ||
SENTENCE1 = TEXT.substring(0, 163); | ||
/* 163 is just whitespace, skip it. */ | ||
SENTENCE2 = TEXT.substring(164, 342); | ||
SENTENCE3 = TEXT.substring(343); | ||
} | ||
|
||
/** | ||
* Test that we're able to have one annotation per sentence. | ||
*/ | ||
@Test | ||
public void testBasic() | ||
{ | ||
client = new TermAnnotationSentenceDetector(); | ||
|
||
List<TermAnnotation> annotations = new ArrayList<>(3); | ||
VocabularyTerm winterTerm = mock(VocabularyTerm.class); | ||
when(winterTerm.getId()).thenReturn("winter"); | ||
TermAnnotation winter = new TermAnnotation(11, 17, winterTerm); | ||
annotations.add(winter); | ||
|
||
VocabularyTerm monumentsTerm = mock(VocabularyTerm.class); | ||
when(monumentsTerm.getId()).thenReturn("monuments"); | ||
TermAnnotation monuments = new TermAnnotation(242, 251, monumentsTerm); | ||
annotations.add(monuments); | ||
|
||
VocabularyTerm nimblyTerm = mock(VocabularyTerm.class); | ||
when(nimblyTerm.getId()).thenReturn("nimbly"); | ||
TermAnnotation nimbly = new TermAnnotation(491, 497, nimblyTerm); | ||
annotations.add(nimbly); | ||
|
||
client.detectSentences(annotations, TEXT); | ||
|
||
assertEquals("winter", | ||
winter.getSentence(). | ||
substring((int) winter.getStartInSentence(), (int) winter.getEndInSentence())); | ||
assertEquals(SENTENCE1, winter.getSentence()); | ||
assertEquals(11, winter.getStartInSentence()); | ||
assertEquals(17, winter.getEndInSentence()); | ||
|
||
assertEquals("monuments", | ||
monuments.getSentence(). | ||
substring((int) monuments.getStartInSentence(), (int) monuments.getEndInSentence())); | ||
assertEquals(SENTENCE2, monuments.getSentence()); | ||
assertEquals(78, monuments.getStartInSentence()); | ||
assertEquals(87, monuments.getEndInSentence()); | ||
|
||
assertEquals("nimbly", | ||
nimbly.getSentence(). | ||
substring((int) nimbly.getStartInSentence(), (int) nimbly.getEndInSentence())); | ||
assertEquals(SENTENCE3, nimbly.getSentence()); | ||
assertEquals(148, nimbly.getStartInSentence()); | ||
assertEquals(154, nimbly.getEndInSentence()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,6 +86,7 @@ | |
<groupId>net.sf.json-lib</groupId> | ||
<artifactId>json-lib</artifactId> | ||
<classifier>jdk15</classifier> | ||
<version>2.3</version> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, the code should be updated to use the new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm after looking at it, I don't think the code actually uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
|
@@ -200,11 +201,6 @@ | |
<artifactId>gson</artifactId> | ||
<version>2.2.2</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.ibm.icu</groupId> | ||
<artifactId>icu4j</artifactId> | ||
<version>49.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.ibm.icu</groupId> | ||
<artifactId>icu4j</artifactId> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't right. This is a separate module, so it doesn't use the same versioning as the main project.