Skip to content

Commit

Permalink
Resolution for #67: Added CLI options --limit and --paginate
Browse files Browse the repository at this point in the history
Aklakan committed Nov 26, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 31e2671 commit a7fb879
Showing 3 changed files with 37 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
@@ -476,7 +476,7 @@
<plugin>
<groupId>com.google.cloud.tools</groupId>
<artifactId>jib-maven-plugin</artifactId>
<version>3.2.0</version>
<version>3.4.4</version>
</plugin>
<plugin>
<artifactId>jdeb</artifactId>
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
import java.util.concurrent.Callable;

import org.aksw.jenax.arq.picocli.CmdMixinArq;
import org.aksw.jenax.arq.picocli.CmdMixinSparqlPaginate;
import org.aksw.rdf_processing_toolkit.cli.cmd.CmdCommonBase;
import org.aksw.rdf_processing_toolkit.cli.cmd.VersionProviderRdfProcessingToolkit;
import org.aksw.sparql_integrate.cli.main.SparqlIntegrateCmdImpls;
@@ -75,8 +76,6 @@ public class CmdSparqlIntegrateMain
@Option(names = { "--cache-rewrite-groupby" }, description="Cache GROUP BY operations individually. Ignored if no cache engine is specified.") //, defaultValue = "false", fallbackValue = "true")
public boolean cacheRewriteGroupBy = false;



@Option(names = { "--tmpdir" }, description="Temporary directory")
public String tempPath = StandardSystemProperty.JAVA_IO_TMPDIR.value();

@@ -96,6 +95,10 @@ public class CmdSparqlIntegrateMain
@Mixin
public CmdMixinArq arqConfig = new CmdMixinArq();

/** Mixin for result set limit and pagination */
@Mixin
public CmdMixinSparqlPaginate paginationConfig = new CmdMixinSparqlPaginate();

@Option(names= {"--bnp", "--bnode-profile"}, description="Blank node profile, empty string ('') to disable; 'auto' to autodetect, defaults to ${DEFAULT-VALUE}", defaultValue = "")
public String bnodeProfile = null;

Original file line number Diff line number Diff line change
@@ -484,6 +484,16 @@ public static int sparqlIntegrate(CmdSparqlIntegrateMain cmd) throws Exception {
}
}

Long resultSetPageSize = cmd.paginationConfig.queryPageSize;
if (resultSetPageSize != null && resultSetPageSize > 0) {
dataSourceTmp = RdfDataEngines.wrapWithDataSourceTransform(dataSourceTmp, ds -> RdfDataSources.withPagination(ds, resultSetPageSize));
}

Long queryLimit = cmd.paginationConfig.queryLimit;
if (queryLimit != null && queryLimit > 0) {
dataSourceTmp = RdfDataEngines.wrapWithDataSourceTransform(dataSourceTmp, ds -> RdfDataSources.withLimit(ds, queryLimit));
}

dataSourceTmp = RdfDataEngines.wrapWithQueryTransform(dataSourceTmp, null, QueryExecs::withDetailedHttpMessages);

if (cmd.cachePath != null) {
@@ -582,16 +592,19 @@ public static int sparqlIntegrate(CmdSparqlIntegrateMain cmd) throws Exception {

// Load function macros (run sparql inferences first)
Map<String, UserDefinedFunctionDefinition> udfRegistry = new LinkedHashMap<>();

// XXX There should be a separate registry for default macros to load.
loadMacros(macroProfiles, udfRegistry, "macros/ollama.ttl");

for (String macroSource : cmd.macroSources) {
Model model = RDFDataMgr.loadModel(macroSource);
SparqlStmtMgr.execSparql(model, "udf-inferences.rq");
Map<String, UserDefinedFunctionDefinition> contrib = UserDefinedFunctions.load(model, macroProfiles);
udfRegistry.putAll(contrib);
loadMacros(macroProfiles, udfRegistry, macroSource);
}

if (!cmd.macroSources.isEmpty()) {
logger.info("Loaded functions: {}", udfRegistry.keySet());
logger.info("Loaded {} function definitions from {} macro sources.", udfRegistry.size(), cmd.macroSources.size());
if (logger.isInfoEnabled()) {
logger.info("Loaded functions: {}", udfRegistry.keySet());
logger.info("Loaded {} function definitions from {} macro sources.", udfRegistry.size(), cmd.macroSources.size());
}
// ExprTransform eform = new ExprTransformExpand(udfRegistry);
ExprTransform eform = new ExprTransformCopy() {
@Override
@@ -602,8 +615,6 @@ public Expr transform(ExprFunctionN func, ExprList args) {
};
SparqlStmtTransform stmtTransform = SparqlStmtTransforms.ofExprTransform(eform);
dataSourceTmp = RdfDataEngines.wrapWithStmtTransform(dataSourceTmp, stmtTransform);
// QueryTransform qform = q -> QueryUtils.rewrite(q, op -> Transformer.transform(null, eform, op));
// dataSourceTmp = RdfDataEngines.wrapWithQueryTransform(dataSourceTmp, qform, null);
}

RdfDataEngine datasetAndDelete = dataSourceTmp;
@@ -848,10 +859,14 @@ public void afterExec() {

server.start();

// Try to get the host address from a network device (e.g. within a docker container)
String hostAddress;
try(final DatagramSocket socket = new DatagramSocket()){
socket.connect(InetAddress.getByName("1.1.1.1"), 53);
hostAddress = socket.getLocalAddress().getHostAddress();
} catch (Exception e) {
// Fall back to localhost
hostAddress = "localhost";
}
URI browseUri = new URI("http://"+hostAddress+":" + port + "/");
if (Desktop.isDesktopSupported()) {
@@ -928,6 +943,14 @@ public void afterExec() {
return exitCode;
}

private static void loadMacros(Set<String> macroProfiles, Map<String, UserDefinedFunctionDefinition> udfRegistry,
String macroSource) {
Model model = RDFDataMgr.loadModel(macroSource);
SparqlStmtMgr.execSparql(model, "udf-inferences.rq");
Map<String, UserDefinedFunctionDefinition> contrib = UserDefinedFunctions.load(model, macroProfiles);
udfRegistry.putAll(contrib);
}

/** Be careful not to call within a read transaction! */
public static void updateSpatialIndex(Dataset dataset) {
Context cxt = dataset.getContext();

0 comments on commit a7fb879

Please sign in to comment.