Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multiple shape descriptions #18

Merged
merged 1 commit into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ let before: Date | undefined;
let paramPollInterval: number;
let urlIsView = false;
let noShape = false;
let shapeFile: string | undefined;
let shapeFiles: string[] | undefined;
let ordered: Ordered = "none";
let quiet: boolean = false;
let verbose: boolean = false;
Expand All @@ -32,7 +32,7 @@ program
.option("--after <after>", "follow only relations including members after a certain point in time")
.option("--before <before>", "follow only relations including members before a certain point in time")
.option("--poll-interval <number>", "specify poll interval")
.option("--shape-file <shapefile>", "specify a shapefile")
.option("--shape-files [shapeFiles...]", "specify a shapefile")
.option(
"--no-shape",
"don't extract members with a shape (only use cbd and named graphs)",
Expand Down Expand Up @@ -60,7 +60,7 @@ program
noShape = !program.shape;
save = program.save;
paramURL = url;
shapeFile = program.shapeFile;
shapeFiles = program.shapeFiles;
paramFollow = program.follow;
paramPollInterval = program.pollInterval;
ordered = program.ordered;
Expand Down Expand Up @@ -100,7 +100,7 @@ async function main() {
pollInterval: paramPollInterval,
fetcher: { maxFetched: 2, concurrentRequests: 10 },
urlIsView: urlIsView,
shapeFile,
shapeFiles,
onlyDefaultGraph,
after,
before
Expand Down
118 changes: 75 additions & 43 deletions lib/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import { CBDShapeExtractor } from "extract-cbd-shape";
import { RdfStore } from "rdf-stores";
import { DataFactory, Writer as NWriter } from "n3";
import { Quad_Object, Term } from "@rdfjs/types";
import { getObjects, ModulatorFactory, Notifier, streamToArray } from "./utils";
import { LDES, SDS, TREE } from "@treecg/types";
import { extractMainNodeShape, getObjects, ModulatorFactory, Notifier, streamToArray } from "./utils";
import { LDES, SDS, SHACL, TREE } from "@treecg/types";
import { FetchedPage, Fetcher, longPromise, resetPromise } from "./pageFetcher";
import { Manager } from "./memberManager";
import { OrderedStrategy, StrategyEvents, UnorderedStrategy } from "./strategy";
Expand Down Expand Up @@ -39,7 +39,7 @@ export function replicateLDES(
}

export type LDESInfo = {
shape?: Term;
shapeMap?: Map<string, Term>;
extractor: CBDShapeExtractor;
timestampPath?: Term;
isVersionOfPath?: Term;
Expand All @@ -53,19 +53,39 @@ async function getInfo(
): Promise<LDESInfo> {
const logger = log.extend("getShape");

if (config.shapeFile) {
const shapeId = config.shapeFile.startsWith("http")
? config.shapeFile
: "file://" + config.shapeFile;
const shapeConfigStore = RdfStore.createDefault();
if (config.shapeFiles && config.shapeFiles.length > 0) {
config.shapes = [];

const resp = await rdfDereference.dereference(config.shapeFile, {
localFiles: true,
});
const quads = await streamToArray(resp.data);
config.shape = {
quads: quads,
shapeId: namedNode(shapeId),
};
for (const shapeFile of config.shapeFiles) {
const tempShapeStore = RdfStore.createDefault();
const shapeId = shapeFile.startsWith("http")
? shapeFile
: "file://" + shapeFile;

const resp = await rdfDereference.dereference(shapeFile, {
localFiles: true,
});
const quads = await streamToArray(resp.data);
// Add retrieved quads to local stores
quads.forEach(q => {
tempShapeStore.addQuad(q);
shapeConfigStore.addQuad(q);
});

if (shapeId.startsWith("file://")) {
// We have to find the actual IRI/Blank Node of the main shape within the file
config.shapes.push({
quads,
shapeId: extractMainNodeShape(tempShapeStore)
});
} else {
config.shapes.push({
quads: quads,
shapeId: namedNode(shapeId),
});
}
}
}

let shapeIds = config.noShape
Expand Down Expand Up @@ -105,11 +125,7 @@ async function getInfo(
timestampPaths.length,
isVersionOfPaths.length,
);
} catch (ex: any) {}
}

if (shapeIds.length > 1) {
console.error("Expected at most one shape id, found " + shapeIds.length);
} catch (ex: any) { }
}

if (timestampPaths.length > 1) {
Expand All @@ -124,22 +140,38 @@ async function getInfo(
);
}

let shapeConfigStore = RdfStore.createDefault();
if (config.shape) {
for (let quad of config.shape.quads) {
shapeConfigStore.addQuad(quad);
// Create a map of shapes and member types
const shapeMap = new Map<string, Term>();

if (config.shapes) {
for (const shape of config.shapes) {
const memberType = getObjects(shapeConfigStore, shape.shapeId, SHACL.terms.targetClass)[0];
if (memberType) {
shapeMap.set(memberType.value, shape.shapeId);
} else {
console.error("Ignoring SHACL shape without a declared sh:targetClass: ", shape.shapeId);
}
}
} else {
for (const shapeId of shapeIds) {
const memberType = getObjects(store, shapeId, SHACL.terms.targetClass)[0];
if (memberType) {
shapeMap.set(memberType.value, shapeId);
} else {
console.error("Ignoring SHACL shape without a declared sh:targetClass: ", shapeId);
}
}
}

return {
extractor: new CBDShapeExtractor(
config.shape ? shapeConfigStore : store,
config.shapes && config.shapes.length > 0 ? shapeConfigStore : store,
dereferencer,
{
cbdDefaultGraph: config.onlyDefaultGraph,
},
),
shape: config.shape ? config.shape.shapeId : shapeIds[0],
shapeMap: config.noShape ? undefined : shapeMap,
timestampPath: timestampPaths[0],
isVersionOfPath: isVersionOfPaths[0],
};
Expand Down Expand Up @@ -311,22 +343,22 @@ export class Client {
this.strategy =
this.ordered !== "none"
? new OrderedStrategy(
this.memberManager,
this.fetcher,
notifier,
factory,
this.ordered,
this.config.polling,
this.config.pollInterval,
)
this.memberManager,
this.fetcher,
notifier,
factory,
this.ordered,
this.config.polling,
this.config.pollInterval,
)
: new UnorderedStrategy(
this.memberManager,
this.fetcher,
notifier,
factory,
this.config.polling,
this.config.pollInterval,
);
this.memberManager,
this.fetcher,
notifier,
factory,
this.config.polling,
this.config.pollInterval,
);

logger("Found %d views, choosing %s", viewQuads.length, ldesId.value);
this.strategy.start(ldesId.value);
Expand Down Expand Up @@ -389,7 +421,7 @@ export async function processor(
ordered?: string,
follow?: boolean,
pollInterval?: number,
shape?: string,
shapes?: string[],
noShape?: boolean,
save?: string,
loose?: boolean,
Expand All @@ -400,7 +432,7 @@ export async function processor(
intoConfig({
loose,
noShape,
shapeFile: shape,
shapeFiles: shapes,
polling: follow,
url: url,
after,
Expand Down
4 changes: 2 additions & 2 deletions lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ export interface Config {
fetcher: FetcherConfig;
before?: Date;
after?: Date;
shape?: ShapeConfig;
shapeFile?: string;
shapes?: ShapeConfig[];
shapeFiles?: string[];
onlyDefaultGraph?: boolean;

// Add flag to indicate in order (default true)
Expand Down
89 changes: 59 additions & 30 deletions lib/memberManager.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { Term } from "@rdfjs/types";
import { Term, Quad } from "@rdfjs/types";
import { Member } from "./page";
import { FetchedPage } from "./pageFetcher";
import { CBDShapeExtractor } from "extract-cbd-shape";
import { TREE } from "@treecg/types";
import { RDF, TREE } from "@treecg/types";
import Heap from "heap-js";
import { LDESInfo } from "./client";
import debug from "debug";
Expand All @@ -26,7 +26,7 @@ export type MemberEvents = {
extracted: Member;
done: Member[];
};
const getObjects = function (store: RdfStore, subject:Term|null, predicate: Term|null, graph?:Term|null) {
const getObjects = function (store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null) {
return store.getQuads(subject, predicate, null, graph).map((quad) => {
return quad.object;
});
Expand All @@ -41,7 +41,7 @@ export class Manager {

private state: Set<string>;
private extractor: CBDShapeExtractor;
private shapeId?: Term;
private shapeMap?: Map<string, Term>;

private timestampPath?: Term;
private isVersionOfPath?: Term;
Expand All @@ -53,7 +53,7 @@ export class Manager {
this.extractor = info.extractor;
this.timestampPath = info.timestampPath;
this.isVersionOfPath = info.isVersionOfPath;
this.shapeId = info.shape;
this.shapeMap = info.shapeMap;

logger("new %s %o", ldesId.value, info);

Expand Down Expand Up @@ -85,39 +85,68 @@ export class Manager {
member: Term,
data: RdfStore,
): Promise<Member | undefined> {
const quads = await this.extractor.extract(data, member, this.shapeId);

let quads: Quad[] = [];

if (this.shapeMap) {
if (this.shapeMap.size === 1) {
// Use the only shape available
quads = await this.extractor.extract(data, member, Array.from(this.shapeMap.values())[0]);
} else if (this.shapeMap.size > 1) {
// Find what is the proper shape for this member based on its rdf:type
const memberType = getObjects(data, member, RDF.terms.type)[0];
if (memberType) {
const shapeId = this.shapeMap.get(memberType.value);
if (shapeId) {
quads = await this.extractor.extract(data, member, shapeId);
}
} else {
// There is no rdf:type defined for this member. Fallback to CBD extraction
quads = await this.extractor.extract(data, member);
}
} else {
// Do a simple CBD extraction
quads = await this.extractor.extract(data, member);
}
} else {
// Do a simple CBD extraction
quads = await this.extractor.extract(data, member);
}

if (this.state.has(member.value)) {
return;
}
this.state.add(member.value);

// Get timestamp
let timestamp: Date | string | undefined;
if (this.timestampPath) {
const ts = quads.find(
(x) =>
x.subject.equals(member) && x.predicate.equals(this.timestampPath),
)?.object.value;
if (ts) {
try {
timestamp = new Date(ts);
} catch (ex: any) {
timestamp = ts;

if (quads.length > 0) {
this.state.add(member.value);

// Get timestamp
let timestamp: Date | string | undefined;
if (this.timestampPath) {
const ts = quads.find(
(x) =>
x.subject.equals(member) && x.predicate.equals(this.timestampPath),
)?.object.value;
if (ts) {
try {
timestamp = new Date(ts);
} catch (ex: any) {
timestamp = ts;
}
}
}
}

let isVersionOf: string | undefined;
if (this.isVersionOfPath) {
isVersionOf = quads.find(
(x) =>
x.subject.equals(member) && x.predicate.equals(this.isVersionOfPath),
)?.object.value;
}
let isVersionOf: string | undefined;
if (this.isVersionOfPath) {
isVersionOf = quads.find(
(x) =>
x.subject.equals(member) && x.predicate.equals(this.isVersionOfPath),
)?.object.value;
}

this.members.push({ id: member, quads, timestamp, isVersionOf });
return { id: member, quads, timestamp, isVersionOf };
this.members.push({ id: member, quads, timestamp, isVersionOf });
return { id: member, quads, timestamp, isVersionOf };
}
}

// Extract members found in this page, this does not yet emit the members
Expand Down
Loading
Loading