Skip to content

Commit

Permalink
Blazegraph: remove multiple include with pairs.
Browse files Browse the repository at this point in the history
  • Loading branch information
JervenBolleman committed Sep 24, 2024
1 parent 0172fa8 commit ad1828e
Show file tree
Hide file tree
Showing 2 changed files with 324 additions and 124 deletions.
224 changes: 104 additions & 120 deletions src/main/java/swiss/sib/rdf/sparql/examples/Fixer.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.time.Instant;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -41,31 +39,18 @@
import org.openrdf.query.MalformedQueryException;

import com.bigdata.bop.BOp;
import com.bigdata.journal.TemporaryStore;
import com.bigdata.rdf.sail.sparql.ASTVisitorBase;
import com.bigdata.rdf.sail.sparql.Bigdata2ASTSPARQLParser;
import com.bigdata.rdf.sail.sparql.BigdataParsedQuery;
import com.bigdata.rdf.sail.sparql.ast.ASTGraphPatternGroup;
import com.bigdata.rdf.sail.sparql.ast.VisitorException;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.GroupNodeBase;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IJoinNode;
import com.bigdata.rdf.sparql.ast.IQueryNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueriesNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude;
import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot;
import com.bigdata.rdf.sparql.ast.QueryBase;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.optimizers.ASTQueryHintOptimizer;
import com.bigdata.rdf.store.AbstractTripleStore;

import picocli.CommandLine;
import picocli.CommandLine.Model.CommandSpec;
Expand Down Expand Up @@ -105,34 +90,34 @@ public Integer call() {
private void findFilesToFix() {
try {
Map<String, String> prefixes = loadPrefixes();
try (Stream<Path> sparqlExamples = FindFiles.sparqlExamples(inputDirectory)){
sparqlExamples.forEach(ttl -> {
System.out.println("Looking at:" + ttl);
try (FileInputStream in = new FileInputStream(ttl.toFile())) {
Model model = Rio.parse(in, RDFFormat.TURTLE);
IRI queryIri = null;
Value query = null;
Statement select = has(model, SHACL.SELECT);
Statement construct = has(model, SHACL.CONSTRUCT);
Statement ask = has(model, SHACL.ASK);
if (select != null) {
queryIri = (IRI) select.getSubject();
query = select.getObject();
} else if (construct != null) {
queryIri = (IRI) construct.getSubject();
query = construct.getObject();
} else if (ask != null) {
queryIri = (IRI) ask.getSubject();
query = ask.getObject();
}
if (queryIri != null && query != null) {
fix(queryIri, query, ttl, model, prefixes);
}
} catch (IOException | RDFParseException e) {
System.err.println("RDF error in " + ttl);
Failure.CANT_READ_EXAMPLE.exit(e);
}
});
try (Stream<Path> sparqlExamples = FindFiles.sparqlExamples(inputDirectory)) {
sparqlExamples.forEach(ttl -> {
System.out.println("Looking at:" + ttl);
try (FileInputStream in = new FileInputStream(ttl.toFile())) {
Model model = Rio.parse(in, RDFFormat.TURTLE);
IRI queryIri = null;
Value query = null;
Statement select = has(model, SHACL.SELECT);
Statement construct = has(model, SHACL.CONSTRUCT);
Statement ask = has(model, SHACL.ASK);
if (select != null) {
queryIri = (IRI) select.getSubject();
query = select.getObject();
} else if (construct != null) {
queryIri = (IRI) construct.getSubject();
query = construct.getObject();
} else if (ask != null) {
queryIri = (IRI) ask.getSubject();
query = ask.getObject();
}
if (queryIri != null && query != null) {
fix(queryIri, query, ttl, model, prefixes);
}
} catch (IOException | RDFParseException e) {
System.err.println("RDF error in " + ttl);
Failure.CANT_READ_EXAMPLE.exit(e);
}
});
}
} catch (IOException e) {
Failure.CANT_READ_INPUT_DIRECTORY.exit(e);
Expand All @@ -151,14 +136,14 @@ private Map<String, String> loadPrefixes() throws IOException {
SailRepository sr = new SailRepository(new MemoryStore());
sr.init();
try (SailRepositoryConnection conn = sr.getConnection()) {
FindFiles.prefixFile(inputDirectory).forEach(p->{
FindFiles.prefixFile(inputDirectory).forEach(p -> {
conn.begin();
try {
conn.add(p.toFile());
} catch (RDFParseException | RepositoryException | IOException e) {
Failure.CANT_PARSE_PREFIXES.exit(e);
}
conn.commit();
conn.commit();
});
}
Map<String, String> prefixes = new LinkedHashMap<>();
Expand All @@ -182,11 +167,9 @@ private Map<String, String> loadPrefixes() throws IOException {
return prefixes;
}

private void fix(IRI queryIri, Value query, Path file, Model model, Map<String, String> prefixes2) {
static void fix(IRI queryIri, Value query, Path file, Model model, Map<String, String> prefixes2) {
String queryIriStr = queryIri.stringValue();
String queryStr = query.stringValue()
.replace("\\\"", "\"");

String queryStr = query.stringValue();

String fixedPrefixes = Fixer.fixMissingPrefixes(queryStr, prefixes2);
String fix = null;
Expand All @@ -198,7 +181,7 @@ private void fix(IRI queryIri, Value query, Path file, Model model, Map<String,
queryStr = fixedPrefixes;
}
fix = Fixer.fixBlazeGraph(queryStr, queryIriStr, file);

if (fix != null) {
System.out.println("Fixed blaze graph " + queryIriStr + " in file " + file);
model.remove(queryIri, SHACL.SELECT, query);
Expand All @@ -207,21 +190,21 @@ private void fix(IRI queryIri, Value query, Path file, Model model, Map<String,
model.add(queryIri, SIB.BIGDATA_SELECT, query);
writeFixedModel(file, model);
return;
}
}
if (fixedPrefixes == null) {
System.out.println("No change to:" + file);
}
}

private void writeFixedModel(Path file, Model model) {
private static void writeFixedModel(Path file, Model model) {
try (OutputStream out = Files.newOutputStream(file, StandardOpenOption.TRUNCATE_EXISTING)) {
model.getNamespaces().add(SHACL.NS);
model.getNamespaces().add(RDF.NS);
model.getNamespaces().add(RDFS.NS);
model.getNamespaces().add(SchemaDotOrg.NS);
model.getNamespaces().add(DCTERMS.NS);
Rio.write(model, out, RDFFormat.TURTLE);

} catch (RDFHandlerException | IOException e) {
Failure.CANT_WRITE_FIXED_EXAMPLE.exit(e);
}
Expand All @@ -241,34 +224,43 @@ public static String fixMissingPrefixes(String original, Map<String, String> pre
else
return changed.toString();
}

public static String fixBlazeGraph(String original, String queryIriStr, Path fileStr) {
String fix = fixBlazeGraphIncludeWith(original, queryIriStr, fileStr);
if (fix != null) {
original = fix;
}
return fixBlazeGraphHints(original, queryIriStr, fileStr);
String fix2 = fixBlazeGraphHints(original, queryIriStr, fileStr);
if (fix2 != null) {
return fix2;
} else if (fix != null) {
return fix;
} else {
return null;
}
}

public static String fixBlazeGraphHints(String original, String queryIriStr, Path fileStr) {
if (original.contains("hint:")) {
try {
new SPARQLParser().parseQuery(original, QueryHints.NAMESPACE);
} catch (org.eclipse.rdf4j.query.MalformedQueryException e) {
String testQ = "PREFIX hint:<" + QueryHints.NAMESPACE + ">\n"+original;
String testQ = "PREFIX hint:<" + QueryHints.NAMESPACE + ">\n" + original;
try {
new SPARQLParser().parseQuery(testQ, QueryHints.NAMESPACE);
//we now know we have hints that are in the query and we need to remove them.
// we now know we have hints that are in the query and we need to remove them.
return original.replaceAll("hint:([^.;,])+[.;,]", "");

} catch (org.eclipse.rdf4j.query.MalformedQueryException e2) {
return null;
}
}

}
return null;

}

public static String fixBlazeGraphIncludeWith(String original, String queryIriStr, Path fileStr) {
Bigdata2ASTSPARQLParser blzp = new Bigdata2ASTSPARQLParser();
try {
Expand All @@ -277,12 +269,26 @@ public static String fixBlazeGraphIncludeWith(String original, String queryIriSt
QueryRoot origAst = pq.getASTContainer().getOriginalAST();
NamedSubqueriesNode nsq = origAst.getNamedSubqueries();
if (nsq != null) {
BOp bOp = nsq.get(0);
StringBuilder sb = new StringBuilder(original);
for (int i = 0; i < nsq.size(); i++) {
NamedSubqueryRoot bOp = (NamedSubqueryRoot) nsq.get(i);

origAst.clearProperty("namedSubqueries");
origAst.clearProperty("namedSubqueries");

Pattern asP = Pattern.compile(bOp.getName() + "\\s");
Matcher matcher = asP.matcher(sb);

StringBuilder sb = new StringBuilder(original);
BOp fixed = replaceIncludes(origAst, bOp, sb);
if (matcher.find()) {
int startAsP = matcher.start();
int lastClosingBracket = sb.lastIndexOf("}", startAsP);
int openingBracket = findBlockInMatchingBrackets(sb, lastClosingBracket - 1);
int withStart = findWithJustBeforeOpenBracket(sb, openingBracket);
String toInclude = sb.substring(openingBracket, lastClosingBracket + 1);
bOp.annotations().put("original", toInclude);
sb.delete(withStart, matcher.end());
}
BOp fixed = replaceIncludes(origAst, bOp, sb);
}
return sb.toString();
}
return null;
Expand All @@ -291,6 +297,8 @@ public static String fixBlazeGraphIncludeWith(String original, String queryIriSt
return null;
}
}

private static final Pattern WITH = Pattern.compile("with", Pattern.CASE_INSENSITIVE);

private static BOp replaceIncludes(BOp astContainer, BOp bOp, StringBuilder blazeGraphIncludeExample) {
return switch (astContainer) {
Expand All @@ -300,8 +308,10 @@ private static BOp replaceIncludes(BOp astContainer, BOp bOp, StringBuilder blaz
blazeGraphIncludeExample));
yield nq;
}
case NamedSubqueryRoot nsqr -> bOp;
case SubqueryRoot sqb -> sqb;
case SubqueryRoot sqb -> {
replaceIncludes(sqb.getGraphPattern(), bOp, blazeGraphIncludeExample);
yield sqb;
}
case GraphPatternGroup jgn -> {
var nq = new JoinGroupNode(jgn);
nq.getChildren().clear();
Expand All @@ -319,23 +329,14 @@ private static BOp replaceIncludes(BOp astContainer, BOp bOp, StringBuilder blaz
if (nsq.annotations().get("namedSet").equals(as)) {
SubqueryRoot sqr = new SubqueryRoot((QueryType) bOp.annotations().get("queryType"));
sqr.setGraphPattern((GraphPatternGroup<IGroupMemberNode>) bOp.annotations().get("graphPattern"));
Matcher m = Pattern.compile("(INCLUDE|include)\\s+" + as).matcher(blazeGraphIncludeExample);
Pattern includeAs = Pattern.compile("(INCLUDE|include)\\s+" + as+"\\s");
Matcher m = includeAs.matcher(blazeGraphIncludeExample);
if (m.find()) {

Pattern asP = Pattern.compile(as.toString(), Pattern.LITERAL);
Pattern origP = Pattern.compile("(?:(?:WITH|with)\\s*\\{([\\s\\S]*?)\\}\\s+AS\\s+" + asP.pattern() + ")",
Pattern.MULTILINE);
Matcher orig = origP.matcher(blazeGraphIncludeExample);
if (orig.find()) {
try {
String r = m.replaceAll('{' + orig.group(1) + '}');
blazeGraphIncludeExample.setLength(0);
blazeGraphIncludeExample.append(r);
blazeGraphIncludeExample.delete(orig.start(), orig.end());
} catch (IllegalArgumentException e) {
System.out.println("Can't fix due to regex issue:"+origP.pattern());
}
}
do {
blazeGraphIncludeExample.delete(m.start(), m.end());
blazeGraphIncludeExample.insert(m.start(), bOp.annotations().get("original"));
m = includeAs.matcher(blazeGraphIncludeExample);
} while (m.find());
}
yield sqr;
}
Expand All @@ -344,46 +345,29 @@ private static BOp replaceIncludes(BOp astContainer, BOp bOp, StringBuilder blaz
default -> astContainer;
};
}

private static boolean hasHints(QueryBase astContainer, StringBuilder blazeGraphIncludeExample) {
if (astContainer.getQueryHints() != null)
{
return true;
} else if (astContainer.annotations().containsKey("graphPattern")) {
IQueryNode gp = (IQueryNode) astContainer.annotations().get("graphPattern");
return hasHints(gp) !=null;
}
return false;
}

private static BOp hasHints(IQueryNode bOp) {
return switch (bOp) {
case QueryRoot qr -> {
yield hasHints(qr.getGraphPattern());
}
case GroupNodeBase<?> jgn -> {
for (var n:jgn.getChildren()) {
if (hasHints(n) != null)
yield jgn;

private static int findBlockInMatchingBrackets(StringBuilder blazeGraphIncludeExample, int at) {
// We look to find a matching closing pair of brackets.
int open = 1;
while (open > 0 && at > 0) {
char cat = blazeGraphIncludeExample.charAt(at);
if (cat == '{') {
open--;
} else if (cat == '}') {
open++;
}

// nq.setLeftArg(visit(nq.getChildren(), bOp));
// nq.setRightArg(visit(nq.getRightArg(), bOp));
yield jgn;
at--;
}

case StatementPatternNode spn -> {
yield spn;
}
case IJoinNode ijn -> {
for (var n:ijn.args()) {
if (hasHints((IQueryNode) n) != null)
yield ijn;
}
hasHints((IQueryNode) ijn.getProperty("graphPattern"));
yield ijn;
return at;
}

private static int findWithJustBeforeOpenBracket(StringBuilder blazeGraphIncludeExample, int at) {
Matcher toFindLastWith = WITH.matcher(blazeGraphIncludeExample.substring(0, at));
int withStart = 0;
while (toFindLastWith.find()) {
withStart = toFindLastWith.start();
// Loop is important.
}
default -> bOp;
};
return withStart;
}
}
Loading

0 comments on commit ad1828e

Please sign in to comment.