From 637f0d14f2b1ae47d19fbf35e34e2a3cb92a1bc7 Mon Sep 17 00:00:00 2001 From: Julian Rojas Date: Sun, 10 Mar 2024 00:24:20 +0100 Subject: [PATCH] Add support for multiple shape descriptions --- bin/cli.ts | 8 +- lib/client.ts | 118 +++++++++++------- lib/config.ts | 4 +- lib/memberManager.ts | 89 ++++++++----- lib/utils.ts | 46 ++++++- processor.ttl | 2 +- .../ldes-client.test.ts | 89 ++++++++++++- .../connector-architecture/processor.test.ts | 4 +- tests/data/shape1.ttl | 19 +++ tests/data/shape2.ttl | 12 ++ tests/unordered.test.ts | 3 +- 11 files changed, 308 insertions(+), 86 deletions(-) create mode 100644 tests/data/shape1.ttl create mode 100644 tests/data/shape2.ttl diff --git a/bin/cli.ts b/bin/cli.ts index 520a417..a49d427 100644 --- a/bin/cli.ts +++ b/bin/cli.ts @@ -13,7 +13,7 @@ let before: Date | undefined; let paramPollInterval: number; let urlIsView = false; let noShape = false; -let shapeFile: string | undefined; +let shapeFiles: string[] | undefined; let ordered: Ordered = "none"; let quiet: boolean = false; let verbose: boolean = false; @@ -32,7 +32,7 @@ program .option("--after ", "follow only relations including members after a certain point in time") .option("--before ", "follow only relations including members before a certain point in time") .option("--poll-interval ", "specify poll interval") - .option("--shape-file ", "specify a shapefile") + .option("--shape-files [shapeFiles...]", "specify a shapefile") .option( "--no-shape", "don't extract members with a shape (only use cbd and named graphs)", @@ -60,7 +60,7 @@ program noShape = !program.shape; save = program.save; paramURL = url; - shapeFile = program.shapeFile; + shapeFiles = program.shapeFiles; paramFollow = program.follow; paramPollInterval = program.pollInterval; ordered = program.ordered; @@ -100,7 +100,7 @@ async function main() { pollInterval: paramPollInterval, fetcher: { maxFetched: 2, concurrentRequests: 10 }, urlIsView: urlIsView, - shapeFile, + shapeFiles, onlyDefaultGraph, after, before diff --git a/lib/client.ts b/lib/client.ts index 3346974..0d677bb 100644 --- a/lib/client.ts +++ b/lib/client.ts @@ -6,8 +6,8 @@ import { CBDShapeExtractor } from "extract-cbd-shape"; import { RdfStore } from "rdf-stores"; import { DataFactory, Writer as NWriter } from "n3"; import { Quad_Object, Term } from "@rdfjs/types"; -import { getObjects, ModulatorFactory, Notifier, streamToArray } from "./utils"; -import { LDES, SDS, TREE } from "@treecg/types"; +import { extractMainNodeShape, getObjects, ModulatorFactory, Notifier, streamToArray } from "./utils"; +import { LDES, SDS, SHACL, TREE } from "@treecg/types"; import { FetchedPage, Fetcher, longPromise, resetPromise } from "./pageFetcher"; import { Manager } from "./memberManager"; import { OrderedStrategy, StrategyEvents, UnorderedStrategy } from "./strategy"; @@ -39,7 +39,7 @@ export function replicateLDES( } export type LDESInfo = { - shape?: Term; + shapeMap?: Map; extractor: CBDShapeExtractor; timestampPath?: Term; isVersionOfPath?: Term; @@ -53,19 +53,39 @@ async function getInfo( ): Promise { const logger = log.extend("getShape"); - if (config.shapeFile) { - const shapeId = config.shapeFile.startsWith("http") - ? config.shapeFile - : "file://" + config.shapeFile; + const shapeConfigStore = RdfStore.createDefault(); + if (config.shapeFiles && config.shapeFiles.length > 0) { + config.shapes = []; - const resp = await rdfDereference.dereference(config.shapeFile, { - localFiles: true, - }); - const quads = await streamToArray(resp.data); - config.shape = { - quads: quads, - shapeId: namedNode(shapeId), - }; + for (const shapeFile of config.shapeFiles) { + const tempShapeStore = RdfStore.createDefault(); + const shapeId = shapeFile.startsWith("http") + ? shapeFile + : "file://" + shapeFile; + + const resp = await rdfDereference.dereference(shapeFile, { + localFiles: true, + }); + const quads = await streamToArray(resp.data); + // Add retrieved quads to local stores + quads.forEach(q => { + tempShapeStore.addQuad(q); + shapeConfigStore.addQuad(q); + }); + + if (shapeId.startsWith("file://")) { + // We have to find the actual IRI/Blank Node of the main shape within the file + config.shapes.push({ + quads, + shapeId: extractMainNodeShape(tempShapeStore) + }); + } else { + config.shapes.push({ + quads: quads, + shapeId: namedNode(shapeId), + }); + } + } } let shapeIds = config.noShape @@ -105,11 +125,7 @@ async function getInfo( timestampPaths.length, isVersionOfPaths.length, ); - } catch (ex: any) {} - } - - if (shapeIds.length > 1) { - console.error("Expected at most one shape id, found " + shapeIds.length); + } catch (ex: any) { } } if (timestampPaths.length > 1) { @@ -124,22 +140,38 @@ async function getInfo( ); } - let shapeConfigStore = RdfStore.createDefault(); - if (config.shape) { - for (let quad of config.shape.quads) { - shapeConfigStore.addQuad(quad); + // Create a map of shapes and member types + const shapeMap = new Map(); + + if (config.shapes) { + for (const shape of config.shapes) { + const memberType = getObjects(shapeConfigStore, shape.shapeId, SHACL.terms.targetClass)[0]; + if (memberType) { + shapeMap.set(memberType.value, shape.shapeId); + } else { + console.error("Ignoring SHACL shape without a declared sh:targetClass: ", shape.shapeId); + } + } + } else { + for (const shapeId of shapeIds) { + const memberType = getObjects(store, shapeId, SHACL.terms.targetClass)[0]; + if (memberType) { + shapeMap.set(memberType.value, shapeId); + } else { + console.error("Ignoring SHACL shape without a declared sh:targetClass: ", shapeId); + } } } return { extractor: new CBDShapeExtractor( - config.shape ? shapeConfigStore : store, + config.shapes && config.shapes.length > 0 ? shapeConfigStore : store, dereferencer, { cbdDefaultGraph: config.onlyDefaultGraph, }, ), - shape: config.shape ? config.shape.shapeId : shapeIds[0], + shapeMap: config.noShape ? undefined : shapeMap, timestampPath: timestampPaths[0], isVersionOfPath: isVersionOfPaths[0], }; @@ -311,22 +343,22 @@ export class Client { this.strategy = this.ordered !== "none" ? new OrderedStrategy( - this.memberManager, - this.fetcher, - notifier, - factory, - this.ordered, - this.config.polling, - this.config.pollInterval, - ) + this.memberManager, + this.fetcher, + notifier, + factory, + this.ordered, + this.config.polling, + this.config.pollInterval, + ) : new UnorderedStrategy( - this.memberManager, - this.fetcher, - notifier, - factory, - this.config.polling, - this.config.pollInterval, - ); + this.memberManager, + this.fetcher, + notifier, + factory, + this.config.polling, + this.config.pollInterval, + ); logger("Found %d views, choosing %s", viewQuads.length, ldesId.value); this.strategy.start(ldesId.value); @@ -389,7 +421,7 @@ export async function processor( ordered?: string, follow?: boolean, pollInterval?: number, - shape?: string, + shapes?: string[], noShape?: boolean, save?: string, loose?: boolean, @@ -400,7 +432,7 @@ export async function processor( intoConfig({ loose, noShape, - shapeFile: shape, + shapeFiles: shapes, polling: follow, url: url, after, diff --git a/lib/config.ts b/lib/config.ts index 7921543..a375081 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -29,8 +29,8 @@ export interface Config { fetcher: FetcherConfig; before?: Date; after?: Date; - shape?: ShapeConfig; - shapeFile?: string; + shapes?: ShapeConfig[]; + shapeFiles?: string[]; onlyDefaultGraph?: boolean; // Add flag to indicate in order (default true) diff --git a/lib/memberManager.ts b/lib/memberManager.ts index d9977c1..4b8550b 100644 --- a/lib/memberManager.ts +++ b/lib/memberManager.ts @@ -1,8 +1,8 @@ -import { Term } from "@rdfjs/types"; +import { Term, Quad } from "@rdfjs/types"; import { Member } from "./page"; import { FetchedPage } from "./pageFetcher"; import { CBDShapeExtractor } from "extract-cbd-shape"; -import { TREE } from "@treecg/types"; +import { RDF, TREE } from "@treecg/types"; import Heap from "heap-js"; import { LDESInfo } from "./client"; import debug from "debug"; @@ -26,7 +26,7 @@ export type MemberEvents = { extracted: Member; done: Member[]; }; -const getObjects = function (store: RdfStore, subject:Term|null, predicate: Term|null, graph?:Term|null) { +const getObjects = function (store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null) { return store.getQuads(subject, predicate, null, graph).map((quad) => { return quad.object; }); @@ -41,7 +41,7 @@ export class Manager { private state: Set; private extractor: CBDShapeExtractor; - private shapeId?: Term; + private shapeMap?: Map; private timestampPath?: Term; private isVersionOfPath?: Term; @@ -53,7 +53,7 @@ export class Manager { this.extractor = info.extractor; this.timestampPath = info.timestampPath; this.isVersionOfPath = info.isVersionOfPath; - this.shapeId = info.shape; + this.shapeMap = info.shapeMap; logger("new %s %o", ldesId.value, info); @@ -85,39 +85,68 @@ export class Manager { member: Term, data: RdfStore, ): Promise { - const quads = await this.extractor.extract(data, member, this.shapeId); + + let quads: Quad[] = []; + + if (this.shapeMap) { + if (this.shapeMap.size === 1) { + // Use the only shape available + quads = await this.extractor.extract(data, member, Array.from(this.shapeMap.values())[0]); + } else if (this.shapeMap.size > 1) { + // Find what is the proper shape for this member based on its rdf:type + const memberType = getObjects(data, member, RDF.terms.type)[0]; + if (memberType) { + const shapeId = this.shapeMap.get(memberType.value); + if (shapeId) { + quads = await this.extractor.extract(data, member, shapeId); + } + } else { + // There is no rdf:type defined for this member. Fallback to CBD extraction + quads = await this.extractor.extract(data, member); + } + } else { + // Do a simple CBD extraction + quads = await this.extractor.extract(data, member); + } + } else { + // Do a simple CBD extraction + quads = await this.extractor.extract(data, member); + } if (this.state.has(member.value)) { return; } - this.state.add(member.value); - - // Get timestamp - let timestamp: Date | string | undefined; - if (this.timestampPath) { - const ts = quads.find( - (x) => - x.subject.equals(member) && x.predicate.equals(this.timestampPath), - )?.object.value; - if (ts) { - try { - timestamp = new Date(ts); - } catch (ex: any) { - timestamp = ts; + + if (quads.length > 0) { + this.state.add(member.value); + + // Get timestamp + let timestamp: Date | string | undefined; + if (this.timestampPath) { + const ts = quads.find( + (x) => + x.subject.equals(member) && x.predicate.equals(this.timestampPath), + )?.object.value; + if (ts) { + try { + timestamp = new Date(ts); + } catch (ex: any) { + timestamp = ts; + } } } - } - let isVersionOf: string | undefined; - if (this.isVersionOfPath) { - isVersionOf = quads.find( - (x) => - x.subject.equals(member) && x.predicate.equals(this.isVersionOfPath), - )?.object.value; - } + let isVersionOf: string | undefined; + if (this.isVersionOfPath) { + isVersionOf = quads.find( + (x) => + x.subject.equals(member) && x.predicate.equals(this.isVersionOfPath), + )?.object.value; + } - this.members.push({ id: member, quads, timestamp, isVersionOf }); - return { id: member, quads, timestamp, isVersionOf }; + this.members.push({ id: member, quads, timestamp, isVersionOf }); + return { id: member, quads, timestamp, isVersionOf }; + } } // Extract members found in this page, this does not yet emit the members diff --git a/lib/utils.ts b/lib/utils.ts index 0328a9e..edb87e9 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -1,12 +1,24 @@ -import { Stream, Term } from "@rdfjs/types"; +import { NamedNode, Quad_Subject, Stream, Term } from "@rdfjs/types"; import { BaseQuad } from "n3"; import { StateFactory, StateT } from "./state"; import { RdfStore } from "rdf-stores"; +import { RDF, SHACL } from "@treecg/types"; export type Notifier = { [K in keyof Events]: (event: Events[K], state: S) => void; }; +export function getSubjects( + store: RdfStore, + predicate: Term | null, + object: Term | null, + graph?: Term | null, +) { + return store.getQuads(null, predicate, object, graph).map((quad) => { + return quad.subject; + }); +} + export function getObjects( store: RdfStore, subject: Term | null, @@ -54,6 +66,38 @@ export function streamToArray( }); } +/** + * Find the main sh:NodeShape subject of a given Shape Graph. + * We determine this by assuming that the main node shape + * is not referenced by any other shape description. + * If more than one is found an exception is thrown. + */ +export function extractMainNodeShape(store: RdfStore): NamedNode { + const nodeShapes = getSubjects(store, RDF.terms.type, SHACL.terms.NodeShape, null); + let mainNodeShape = null; + + if (nodeShapes && nodeShapes.length > 0) { + for (const ns of nodeShapes) { + const isNotReferenced = getSubjects(store, null, ns, null).length === 0; + + if (isNotReferenced) { + if (!mainNodeShape) { + mainNodeShape = ns; + } else { + throw new Error("There are multiple main node shapes in a given shape graph. Unrelated shapes must be given as separate shape graphs"); + } + } + } + if (mainNodeShape) { + return mainNodeShape; + } else { + throw new Error("No main SHACL Node Shapes found in given shape graph"); + } + } else { + throw new Error("No SHACL Node Shapes found in given shape graph"); + } +} + /** * Generic interface that represents a structure that ranks elements. * Most common is a Prority Queue (heap like) the pops elements in order. diff --git a/processor.ttl b/processor.ttl index 16ac6d7..9f03dfc 100644 --- a/processor.ttl +++ b/processor.ttl @@ -113,7 +113,7 @@ js:LdesClient a js:JsProcess; sh:datatype xsd:string; sh:path js:shapeFile; sh:name "Shape"; - sh:maxCount 1; + sh:minCount 0; ], [ sh:datatype xsd:boolean; sh:path js:noShape; diff --git a/tests/connector-architecture/ldes-client.test.ts b/tests/connector-architecture/ldes-client.test.ts index 77da280..85bb21d 100644 --- a/tests/connector-architecture/ldes-client.test.ts +++ b/tests/connector-architecture/ldes-client.test.ts @@ -3,7 +3,7 @@ import { SimpleStream } from "@ajuvercr/js-runner"; import { Parser } from "n3"; import { RdfStore } from "rdf-stores"; import { processor } from "../../lib/client"; -import { DC, SDS } from "@treecg/types"; +import { DC, RDF, SDS } from "@treecg/types"; describe("Functional tests for the js:LdesClient Connector Architecture function", () => { @@ -172,4 +172,91 @@ describe("Functional tests for the js:LdesClient Connector Architecture function const isSorted = timestamps.every((v, i) => (i === 0 || v <= timestamps[i - 1])); expect(isSorted).toBeTruthy(); }); + + test("Fetching a remote LDES unordered, with before and after filter and original shapes", async () => { + const outputStream = new SimpleStream(); + + let count = 0; + const observedClasses = new Map([ + ["ContactLineSystem", false], + ["ETCSLevel", false], + ["LoadCapability", false], + ["NationalRailwayLine", false], + ["NetElement", false], + ["NetRelation", false], + ["OperationalPoint", false], + ["Geometry", false], + ["LineReference", false], + ["SectionOfLine", false], + ["Track", false], + ["TrainDetectionSystem", false] + ]); + outputStream.data(record => { + for (const classSuffix of observedClasses.keys()) { + if (record.includes(classSuffix)) { + observedClasses.set(classSuffix, true); + } + } + count++; + }); + + // Setup client + const exec = await processor( + outputStream, + "https://era.ilabt.imec.be/rinf/ldes", + new Date("2024-03-08T11:43:00.000Z"), + new Date("2024-03-08T11:39:00.000Z"), + "none", + false, + undefined, + undefined, + false + ); + + // Run client + await exec(); + // Check we got some members + expect(count).toBeGreaterThan(0); + // Check we saw all expected classes + expect(Array.from(observedClasses.values()).every(v => v === true)).toBeTruthy(); + }); + + test("Fetching a remote LDES unordered, with before and after filter and overridden shapes", async () => { + const outputStream = new SimpleStream(); + + let count = 0; + const observedClasses = new Set(); + outputStream.data(record => { + const store = RdfStore.createDefault(); + new Parser().parse(record).forEach(q => store.addQuad(q)); + const typeQs = store.getQuads(null, RDF.terms.type); + typeQs.forEach(tq => observedClasses.add(tq.object.value)); + + // Check era:Tracks only have the 2 properties defined in shape2.ttl + if (record.includes("/Track")) { + expect(store.getQuads(typeQs[0].subject).length).toBeLessThanOrEqual(2); + } + count++; + }); + + // Setup client + const exec = await processor( + outputStream, + "https://era.ilabt.imec.be/rinf/ldes", + new Date("2024-03-08T11:43:00.000Z"), + new Date("2024-03-08T11:39:00.000Z"), + "none", + false, + undefined, + ["./tests/data/shape1.ttl", "./tests/data/shape2.ttl"], + false + ); + + // Run client + await exec(); + // Check we got some members + expect(count).toBeGreaterThan(0); + // Check we only saw expected classes + expect(observedClasses.size).toBe(5); + }); }); \ No newline at end of file diff --git a/tests/connector-architecture/processor.test.ts b/tests/connector-architecture/processor.test.ts index 3d5992d..8be99ed 100644 --- a/tests/connector-architecture/processor.test.ts +++ b/tests/connector-architecture/processor.test.ts @@ -54,7 +54,7 @@ describe("Tests for js:LdesClient processor", async () => { expect(argss[0].length).toBe(13); const [[ - output, url, before, after, ordered, follow, pollInterval, shapeFile, + output, url, before, after, ordered, follow, pollInterval, shapeFiles, noShape, savePath, loose, urlIsView, verbose ]] = argss; @@ -65,7 +65,7 @@ describe("Tests for js:LdesClient processor", async () => { expect(ordered).toBe("ascending"); expect(follow).toBeTruthy(); expect(pollInterval).toBe(5); - expect(shapeFile).toBe("/path/to/shape.ttl"); + expect(shapeFiles[0]).toBe("/path/to/shape.ttl"); expect(noShape).toBeFalsy(); expect(savePath).toBe("/state/save.json"); expect(loose).toBeFalsy(); diff --git a/tests/data/shape1.ttl b/tests/data/shape1.ttl new file mode 100644 index 0000000..e2313c1 --- /dev/null +++ b/tests/data/shape1.ttl @@ -0,0 +1,19 @@ +@prefix sh: . +@prefix era: . +@prefix gsp: . + +[ ] a sh:NodeShape; + sh:targetClass era:OperationalPoint; + sh:property [ + sh:path gsp:hasGeometry; + sh:node [ + a sh:NodeShape; + sh:targetClass gsp:Geometry + ] + ], [ + sh:path era:lineReference; + sh:node [ + a sh:NodeShape; + sh:targetClass era:LineReference + ] + ]. \ No newline at end of file diff --git a/tests/data/shape2.ttl b/tests/data/shape2.ttl new file mode 100644 index 0000000..2cbafc8 --- /dev/null +++ b/tests/data/shape2.ttl @@ -0,0 +1,12 @@ +@prefix sh: . +@prefix rdf: . +@prefix era: . + +[ ] a sh:NodeShape; + sh:targetClass era:Track; + sh:closed true; + sh:property [ + sh:path rdf:type + ], [ + sh:path era:trackId + ]. \ No newline at end of file diff --git a/tests/unordered.test.ts b/tests/unordered.test.ts index 057df94..96d68b1 100644 --- a/tests/unordered.test.ts +++ b/tests/unordered.test.ts @@ -57,7 +57,6 @@ describe("Simple Tree", () => { test("ascending tree, emits ordered", async () => { const tree = simpleTree(1); - const base = tree.base() + tree.root(); const mock = tree.mock(); global.fetch = mock; @@ -179,7 +178,7 @@ describe("more complex tree", () => { .fragment(tree.root()) .relation( first, - TREE.GreaterThanOrEqualRelation, + TREE.GreaterThanOrEqualToRelation, "http://example.com/value", "3", );