Skip to content

Commit

Permalink
Merge pull request #40 from leogott/feature-prefix
Browse files Browse the repository at this point in the history
WIP: Prefix splitting on PREFIX as well as @Prefix
  • Loading branch information
chiarcos authored Nov 30, 2023
2 parents 15a3d20 + 991197d commit 7724276
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 59 deletions.
8 changes: 5 additions & 3 deletions src/main/java/org/acoli/conll/rdf/CoNLLRDFAnnotator.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ public static void main(String[] args) throws IOException {
while((line = in.readLine())!=null) {
line=line.replaceAll("[\t ]+"," ").trim();

if(!buffer.trim().equals(""))
if((line.startsWith("@") || line.startsWith("#")) && !lastLine.startsWith("@") && !lastLine.startsWith("#")) {
if(!buffer.trim().equals("") &&
(line.startsWith("@") || line.startsWith("PREFIX") || line.startsWith("#")) &&
!(lastLine.startsWith("@") || lastLine.startsWith("PREFIX") || lastLine.startsWith("#"))
) {
while(!command.trim().equals(">")) {
System.err.print(
"actions ............................................................................................................\n"+
Expand Down Expand Up @@ -108,7 +110,7 @@ public static void main(String[] args) throws IOException {
command = "";
}
//System.err.println(ANSI_RED+"> "+line+ANSI_RESET);
if(line.trim().startsWith("@") && !lastLine.trim().endsWith("."))
if((line.trim().startsWith("@") || line.trim().startsWith("PREFIX")) && !lastLine.trim().endsWith("."))
//System.out.print("\n");
buffer=buffer+"\n";

Expand Down
98 changes: 50 additions & 48 deletions src/main/java/org/acoli/conll/rdf/CoNLLRDFFormatter.java
Original file line number Diff line number Diff line change
Expand Up @@ -423,9 +423,9 @@ protected static String reorderTTLBuffer(String buffer, List<String> cols) {
String line;
while((line=in.readLine())!=null) {
line=line.trim();
if(line.startsWith("@")) result=result+line+"\n"; else
if(line.startsWith("#")) result=result+line+"\n"; else
if(!line.equals("")) {
if(line.startsWith("@") || line.startsWith("PREFIX") || line.startsWith("#")) {
result=result+line+"\n";
} else if(!line.equals("")) {
//reorder columns according to user list.
String orderedLine = "";
List<String> statements = new ArrayList<String>(Arrays.asList(line.substring(0, line.lastIndexOf(".")-1).split(";\\s*\t"))); //TODO: only consider ; not ";"
Expand Down Expand Up @@ -665,62 +665,64 @@ protected void processSentenceStream() throws IOException {
while((line = getInputStream().readLine())!=null) {
line=line.replaceAll("[\t ]+"," ").trim();

if(!buffer.trim().equals(""))
if((line.startsWith("@") || line.startsWith("#")) && !lastLine.startsWith("@") && !lastLine.startsWith("#")) { //!buffer.matches("@[^\n]*\n?$")) {
for (Module m:modules) {
if(m.getMode()==Mode.CONLLRDF) m.getOutputStream().println(reorderTTLBuffer(buffer, m.getCols()));
if(m.getMode()==Mode.DEBUG) System.err.println(colorTTL(reorderTTLBuffer(buffer, m.getCols())));
if(m.getMode()==Mode.CONLL) {
if (m.getCols().size() < 1) {// no column args supplied
LOG.info("No column names in cmd args, searching rdf comments..");
List<String> conllColumns = findColumnNamesInRDFBuffer(buffer);
if(!buffer.trim().equals("") &&
((line.startsWith("@") || line.startsWith("PREFIX")) || line.startsWith("#")) &&
!(lastLine.startsWith("@") || lastLine.startsWith("PREFIX") || lastLine.startsWith("#"))
) {
for (Module m:modules) {
if(m.getMode()==Mode.CONLLRDF) m.getOutputStream().println(reorderTTLBuffer(buffer, m.getCols()));
if(m.getMode()==Mode.DEBUG) System.err.println(colorTTL(reorderTTLBuffer(buffer, m.getCols())));
if(m.getMode()==Mode.CONLL) {
if (m.getCols().size() < 1) {// no column args supplied
LOG.info("No column names in cmd args, searching rdf comments..");
List<String> conllColumns = findColumnNamesInRDFBuffer(buffer);
if (conllColumns.size()>0) {
LOG.info("Using #global.comments from rdf");
m.setCols(conllColumns);
} else {
LOG.info("Trying conll columns now..");
conllColumns = CoNLLStreamExtractor.findFieldsFromComments(new BufferedReader(new StringReader(buffer.trim())), 1);
if (conllColumns.size()>0) {
LOG.info("Using #global.comments from rdf");
m.setCols(conllColumns);
} else {
LOG.info("Trying conll columns now..");
conllColumns = CoNLLStreamExtractor.findFieldsFromComments(new BufferedReader(new StringReader(buffer.trim())), 1);
if (conllColumns.size()>0) {
m.setCols(conllColumns);
}
}
}
if (m.getCols().size() < 1) {
LOG.info("Supply column names some way! (-conll arg, global.columns or rdf comments");
}
else
printSparql(buffer, columnsAsSelect(m.getCols()), new OutputStreamWriter(m.getOutputStream()));
}
if(m.getMode()==Mode.QUERY) printSparql(buffer, m.getSelect(), new OutputStreamWriter(m.getOutputStream()));
if(m.getMode()==Mode.GRAMMAR) m.getOutputStream().println(extractCoNLLGraph(buffer,true));
if(m.getMode()==Mode.SEMANTICS) m.getOutputStream().println(extractTermGraph(buffer,true));
if(m.getMode()==Mode.GRAMMAR_SEMANTICS) {
m.getOutputStream().println(extractCoNLLGraph(buffer,true));
m.getOutputStream().println(extractTermGraph(buffer,false));
if (m.getCols().size() < 1) {
LOG.info("Supply column names some way! (-conll arg, global.columns or rdf comments");
}
else
printSparql(buffer, columnsAsSelect(m.getCols()), new OutputStreamWriter(m.getOutputStream()));
}
if(m.getMode()==Mode.QUERY) printSparql(buffer, m.getSelect(), new OutputStreamWriter(m.getOutputStream()));
if(m.getMode()==Mode.GRAMMAR) m.getOutputStream().println(extractCoNLLGraph(buffer,true));
if(m.getMode()==Mode.SEMANTICS) m.getOutputStream().println(extractTermGraph(buffer,true));
if(m.getMode()==Mode.GRAMMAR_SEMANTICS) {
m.getOutputStream().println(extractCoNLLGraph(buffer,true));
m.getOutputStream().println(extractTermGraph(buffer,false));
}
buffer="";
}
//System.err.println(ANSI_RED+"> "+line+ANSI_RESET);
if(line.trim().startsWith("@") && !lastLine.trim().endsWith("."))
//System.out.print("\n");
buffer=buffer+"\n";
buffer="";
}
//System.err.println(ANSI_RED+"> "+line+ANSI_RESET);
if((line.trim().startsWith("@") || line.trim().startsWith("PREFIX")) && !lastLine.trim().endsWith("."))
//System.out.print("\n");
buffer=buffer+"\n";

if(line.trim().startsWith("#") && (!lastLine.trim().startsWith("#")))
// System.out.print("\n");
buffer=buffer+"\n";
//System.out.print(" "+color(line));
//System.out.print(color(line));
buffer=buffer+line+"\t";//+"\n";
if(line.trim().startsWith("#") && (!lastLine.trim().startsWith("#")))
// System.out.print("\n");
buffer=buffer+"\n";

//System.out.print(" "+color(line));
//System.out.print(color(line));
buffer=buffer+line+"\t";//+"\n";

if(line.trim().endsWith(".") || line.trim().matches("^(.*>)?[^<]*#"))
//System.out.print("\n");
buffer=buffer+"\n";
if(line.trim().endsWith(".") || line.trim().matches("^(.*>)?[^<]*#"))
//System.out.print("\n");
buffer=buffer+"\n";

//System.out.println();
lastLine=line;
}
//System.out.println();
lastLine=line;
}

for (Module m:modules) {
if(m.getMode()==Mode.CONLLRDF) m.getOutputStream().println(reorderTTLBuffer(buffer, m.getCols()));
Expand Down
20 changes: 12 additions & 8 deletions src/main/java/org/acoli/conll/rdf/CoNLLRDFUpdater.java
Original file line number Diff line number Diff line change
Expand Up @@ -806,17 +806,21 @@ protected void processSentenceStream() throws IOException {
String lastLine ="";
String buffer="";
// List<Pair<Integer,Long> > dRTs = new ArrayList<Pair<Integer,Long> >(); // iterations and execution time of each update in seconds
// TODO Refactor @Leo
while((line = getInputStream().readLine())!=null) {
line=line.replaceAll("[\t ]+"," ").trim();
line=line.replaceAll("[\t ]+"," ").trim(); // TODO this will mess-up multiline strings with lines ending in whitespace

if(!buffer.trim().equals("") && (line.startsWith("@") || line.startsWith("#")) && !lastLine.startsWith("@") && !lastLine.startsWith("#")) { //!buffer.matches("@[^\n]*\n?$")) {
// If the buffer is not empty and the current line starts with @ or #
// and the previous line did not start with @ or #
if(!buffer.trim().equals("") &&
(line.startsWith("@") || line.startsWith("#")) || (line.startsWith("PREFIX")) &&
!(lastLine.startsWith("@") || lastLine.startsWith("#") || (line.startsWith("PREFIX")))
) {
// If the buffer is not empty and the current line starts with @ or # or PREFIX
// and the previous line did not start with @ or # or PREFIX
// check if the buffer contains a ttl prefix
if (buffer.contains("@prefix")) {
if (buffer.contains("@prefix") || buffer.contains("PREFIX")) {
prefixCache = new String();
for (String buffLine:buffer.split("\n")) {
if (buffLine.trim().startsWith("@prefix")) {
if (buffLine.trim().startsWith("@prefix") || buffLine.trim().startsWith("PREFIX")) {
prefixCache += buffLine+"\n";
}
}
Expand Down Expand Up @@ -862,7 +866,7 @@ protected void processSentenceStream() throws IOException {
}

// FINAL SENTENCE (with prefixes if necessary)
if (!buffer.contains("@prefix")) {
if (!(buffer.contains("@prefix") || buffer.contains("PREFIX"))) {
buffer = prefixCache+buffer;
}

Expand Down Expand Up @@ -975,7 +979,7 @@ private synchronized void flushOutputBuffer(PrintStream out) {
if (prefixDeduplication) {
String prefixCacheTMP = new String();
for (String buffLine:sentBufferOut.remove(0).split("\n")) {
if (buffLine.trim().startsWith("@prefix")) {
if (buffLine.trim().startsWith("@prefix") || buffLine.trim().startsWith("PREFIX")) {
prefixCacheTMP += buffLine+"\n";
} else if (!buffLine.trim().isEmpty()) {
outString += buffLine+"\n";
Expand Down

0 comments on commit 7724276

Please sign in to comment.