Skip to content

Commit 7fe80d8

Browse files
author
Henry Chen
committed
Update
1 parent ab09081 commit 7fe80d8

File tree

9 files changed

+11
-331
lines changed

9 files changed

+11
-331
lines changed

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/corpus/EntityTokenCorpusReconstructor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public EntityTokenCorpusReconstructor(CorpusType type, List<String> filePaths, S
7171
private void init(){
7272
switch (type) {
7373
case RAW: decoder = new NLPDecoder(TLanguage.ENGLISH); break;
74-
case TSV: reader = new TSVReader(0, 1, 2, 3, 7, 4, 5, 6, -1, -1); break;
74+
case TSV: reader = new TSVReader(0, 1, 2, 3, 9, 4, 5, 6, 7, 8); break;
7575
}
7676
}
7777

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/corpus/MultiTheadedCorpusReconstructor.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public void reconstruct(List<String> filePaths) {
5555
AbstractCorpusRecontructor constructor_new;
5656
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
5757
for(i = 0; i < l_size; i+=b_size){
58-
if(i+b_size >= l_size){
58+
if(i+b_size < l_size){
5959
constructor_new = constructor.clone();
6060
constructor_new.setFilePaths(filePaths.subList(i, i+b_size));
6161
executor.submit(constructor_new);
@@ -65,7 +65,6 @@ public void reconstruct(List<String> filePaths) {
6565
executor.submit(constructor);
6666
}
6767
}
68-
6968
}
7069

7170
@Override

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/AbstractNGram.java

-109
This file was deleted.

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/Bigram.java

-75
This file was deleted.

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/Trigram.java

-25
This file was deleted.

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/Unigram.java

-61
This file was deleted.

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/collector/AbstractNGramCollector.java

-36
This file was deleted.

src/main/java/edu/emory/clir/clearnlp/extraction/attribute/ngram/smoothing/ISmoothing.java

-18
This file was deleted.

src/test/java/edu/emory/clir/clearnlp/extraction/attribute/corpus/EntityTokenCorpusReconstructorTest.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,27 @@
2020
import org.junit.Test;
2121

2222
import edu.emory.clir.clearnlp.util.DSUtils;
23+
import edu.emory.clir.clearnlp.util.FileUtils;
2324

2425
/**
2526
* @author Yu-Hsin(Henry) Chen ({@code [email protected]})
2627
* @version 1.0
2728
* @since Sep 23, 2015
2829
*/
2930
public class EntityTokenCorpusReconstructorTest{
30-
public final Set<String> EXT = DSUtils.toHashSet(".txt");
31-
public final String INPUT_DIR = "/Users/HenryChen/Documents/clearnlp-qa/corpus/NYT";
32-
public final String OUPUT_DIR = "/Users/HenryChen/Documents/clearnlp-qa/corpus/NYT_OUT";
31+
public final Set<String> EXT = DSUtils.toHashSet(".dep");
32+
public final String INPUT_DIR = "/Users/HenryChen/Documents/clearnlp-qa/corpus/WSJ";
33+
public final String OUPUT_DIR = "/Users/HenryChen/Documents/clearnlp-qa/corpus/WSJ_OUT";
3334
public final Set<String> NERLabels = DSUtils.toHashSet("PERSON", "ORG", "LOC", "GPE");
3435

3536
@Test
3637
public void testReconstructor(){
38+
System.out.println(INPUT_DIR);
39+
System.out.println(FileUtils.getBaseName(INPUT_DIR));
40+
System.out.println(FileUtils.getFileList(INPUT_DIR, ".dep", false));
41+
3742
EntityTokenCorpusReconstructor constructor
38-
= new EntityTokenCorpusReconstructor(CorpusType.RAW, INPUT_DIR, OUPUT_DIR, EXT, NERLabels);
43+
= new EntityTokenCorpusReconstructor(CorpusType.TSV, INPUT_DIR, OUPUT_DIR, EXT, NERLabels);
3944

4045
constructor.reconstruct();
4146
}

0 commit comments

Comments
 (0)