diff --git a/.vscode/launch.json b/.vscode/launch.json index 7a02e8f..ebda32d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,7 +15,11 @@ "run", "--v8-flags=--max-old-space-size=8000", "--inspect", - "--allow-net", "--allow-read", "--allow-write", "--allow-run=git", "--allow-env" + "--allow-net", + "--allow-read", + "--allow-write", + "--allow-run=git", + "--allow-env" ], "attachSimplePort": 9229 } diff --git a/Dockerfile b/Dockerfile index 0b6a314..f07e8d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM denoland/deno:ubuntu-1.22.1 +FROM denoland/deno:1.43.3 # Install cron -RUN apt-get update -RUN apt-get install -y git +RUN DEBIAN_FRONTEND=noninteractive apt-get update +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git # The port that your application listens to. EXPOSE 4505 @@ -14,8 +14,8 @@ WORKDIR /app # Cache the dependencies as a layer (the following two steps are re-run only when deps.ts is modified). # Ideally cache deps.ts will download and compile _all_ external files used in main.ts. -COPY src/deps.ts . -RUN deno cache deps.ts +COPY src/deps.ts src/deps.ts +RUN deno cache src/deps.ts # These steps will be re-run upon each file change in your working directory: ADD src src diff --git a/README.md b/README.md new file mode 100644 index 0000000..ed70d3e --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# Turtle-Hook + +This updates a SPARQL-Endpoint to reflect the changes of the content of RDF-Turtle files in a Github repository. + +It uses [ghact](https://deno.land/x/ghact) to provide a webhook and a web/rest interface. \ No newline at end of file diff --git a/config/config.ts b/config/config.ts index 7107534..b4e06f6 100644 --- a/config/config.ts +++ b/config/config.ts @@ -1,7 +1,17 @@ -export const config = { - branch: "main", +import { type Config } from "../src/deps.ts"; + +export const sparqlConfig = { graphUriPrefix: "https://raw.githubusercontent.com/plazi/treatments-rdf/main", - repository: "plazi/treatments-rdf", - repositoryUri: "https://github.com/plazi/treatments-rdf.git", uploadUri: "http://blazegraph:8080/blazegraph/sparql", }; + +export const ghActConfig: Config = { + title: "Turtle-Hook", + description: "Load RDF from plazi/treatments-rdf into our triple-store.", + // we don't create commits, so a default job-author is not really neccesary + email: "", + sourceRepositoryUri: "https://github.com/plazi/treatments-rdf.git", + sourceBranch: "main", + sourceRepository: "plazi/treatments-rdf", + workDir: "/workdir", +}; diff --git a/config/postupdate.sparql b/config/postupdate.sparql deleted file mode 100644 index 9a7cde8..0000000 --- a/config/postupdate.sparql +++ /dev/null @@ -1,52 +0,0 @@ -PREFIX rdf: -PREFIX dwc: -PREFIX dwcfp: -PREFIX tp: -INSERT { GRAPH tp:Index { - ?res tp:genusPrefix2 ?prefix - } } WHERE { - ?res dwc:genus ?genus . - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?genus) > 1) - BIND(LCASE(substr(?genus,1,2)) AS $prefix) -}; -INSERT { GRAPH tp:Index { - ?res tp:speciesPrefix2 ?prefix - } } WHERE { - ?res dwc:species ?species. - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?species) > 1) - BIND(LCASE(substr(?species,1,2)) AS $prefix) -}; -INSERT { GRAPH tp:Index { - ?res tp:genusPrefix3 ?prefix - } } WHERE { - ?res dwc:genus ?genus . - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?genus) > 2) - BIND(LCASE(substr(?genus,1,3)) AS $prefix) -}; -INSERT { GRAPH tp:Index { - ?res tp:speciesPrefix3 ?prefix - } } WHERE { - ?res dwc:species ?species. - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?species) > 2) - BIND(LCASE(substr(?species,1,3)) AS $prefix) -}; -INSERT { GRAPH tp:Index { - ?res tp:genusPrefix4 ?prefix - } } WHERE { - ?res dwc:genus ?genus . - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?genus) > 3) - BIND(LCASE(substr(?genus,1,4)) AS $prefix) -}; -INSERT { GRAPH tp:Index { - ?res tp:speciesPrefix4 ?prefix - } } WHERE { - ?res dwc:species ?species. - ?res rdf:type dwcfp:TaxonName. - FILTER (strlen(?species) > 3) - BIND(LCASE(substr(?species,1,4)) AS $prefix) -} \ No newline at end of file diff --git a/manual-test.ts b/manual-test.ts deleted file mode 100644 index c4b2438..0000000 --- a/manual-test.ts +++ /dev/null @@ -1,215 +0,0 @@ -const response = await fetch("http://localhost:4505", { - method: "POST", - body: `{ - "ref": "refs/heads/main", - "before": "fa509fa1cb711e0ee98312f89973dd3a5f26769c", - "after": "9ec7f988390afbd38b1909476d4aab6700ec7382", - "repository": { - "id": 220027861, - "node_id": "MDEwOlJlcG9zaXRvcnkyMjAwMjc4NjE=", - "name": "treatments-rdf", - "full_name": "plazi/treatments-rdf", - "private": false, - "owner": { - "name": "plazi", - "email": "agosti@plazi.org", - "login": "plazi", - "id": 3786794, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3ODY3OTQ=", - "avatar_url": "https://avatars.githubusercontent.com/u/3786794?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/plazi", - "html_url": "https://github.com/plazi", - "followers_url": "https://api.github.com/users/plazi/followers", - "following_url": "https://api.github.com/users/plazi/following{/other_user}", - "gists_url": "https://api.github.com/users/plazi/gists{/gist_id}", - "starred_url": "https://api.github.com/users/plazi/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/plazi/subscriptions", - "organizations_url": "https://api.github.com/users/plazi/orgs", - "repos_url": "https://api.github.com/users/plazi/repos", - "events_url": "https://api.github.com/users/plazi/events{/privacy}", - "received_events_url": "https://api.github.com/users/plazi/received_events", - "type": "Organization", - "site_admin": false - }, - "html_url": "https://github.com/plazi/treatments-rdf", - "description": "The treatments as RDF in Turtle", - "fork": false, - "url": "https://github.com/plazi/treatments-rdf", - "forks_url": "https://api.github.com/repos/plazi/treatments-rdf/forks", - "keys_url": "https://api.github.com/repos/plazi/treatments-rdf/keys{/key_id}", - "collaborators_url": "https://api.github.com/repos/plazi/treatments-rdf/collaborators{/collaborator}", - "teams_url": "https://api.github.com/repos/plazi/treatments-rdf/teams", - "hooks_url": "https://api.github.com/repos/plazi/treatments-rdf/hooks", - "issue_events_url": "https://api.github.com/repos/plazi/treatments-rdf/issues/events{/number}", - "events_url": "https://api.github.com/repos/plazi/treatments-rdf/events", - "assignees_url": "https://api.github.com/repos/plazi/treatments-rdf/assignees{/user}", - "branches_url": "https://api.github.com/repos/plazi/treatments-rdf/branches{/branch}", - "tags_url": "https://api.github.com/repos/plazi/treatments-rdf/tags", - "blobs_url": "https://api.github.com/repos/plazi/treatments-rdf/git/blobs{/sha}", - "git_tags_url": "https://api.github.com/repos/plazi/treatments-rdf/git/tags{/sha}", - "git_refs_url": "https://api.github.com/repos/plazi/treatments-rdf/git/refs{/sha}", - "trees_url": "https://api.github.com/repos/plazi/treatments-rdf/git/trees{/sha}", - "statuses_url": "https://api.github.com/repos/plazi/treatments-rdf/statuses/{sha}", - "languages_url": "https://api.github.com/repos/plazi/treatments-rdf/languages", - "stargazers_url": "https://api.github.com/repos/plazi/treatments-rdf/stargazers", - "contributors_url": "https://api.github.com/repos/plazi/treatments-rdf/contributors", - "subscribers_url": "https://api.github.com/repos/plazi/treatments-rdf/subscribers", - "subscription_url": "https://api.github.com/repos/plazi/treatments-rdf/subscription", - "commits_url": "https://api.github.com/repos/plazi/treatments-rdf/commits{/sha}", - "git_commits_url": "https://api.github.com/repos/plazi/treatments-rdf/git/commits{/sha}", - "comments_url": "https://api.github.com/repos/plazi/treatments-rdf/comments{/number}", - "issue_comment_url": "https://api.github.com/repos/plazi/treatments-rdf/issues/comments{/number}", - "contents_url": "https://api.github.com/repos/plazi/treatments-rdf/contents/{+path}", - "compare_url": "https://api.github.com/repos/plazi/treatments-rdf/compare/{base}...{head}", - "merges_url": "https://api.github.com/repos/plazi/treatments-rdf/merges", - "archive_url": "https://api.github.com/repos/plazi/treatments-rdf/{archive_format}{/ref}", - "downloads_url": "https://api.github.com/repos/plazi/treatments-rdf/downloads", - "issues_url": "https://api.github.com/repos/plazi/treatments-rdf/issues{/number}", - "pulls_url": "https://api.github.com/repos/plazi/treatments-rdf/pulls{/number}", - "milestones_url": "https://api.github.com/repos/plazi/treatments-rdf/milestones{/number}", - "notifications_url": "https://api.github.com/repos/plazi/treatments-rdf/notifications{?since,all,participating}", - "labels_url": "https://api.github.com/repos/plazi/treatments-rdf/labels{/name}", - "releases_url": "https://api.github.com/repos/plazi/treatments-rdf/releases{/id}", - "deployments_url": "https://api.github.com/repos/plazi/treatments-rdf/deployments", - "created_at": 1573053817, - "updated_at": "2022-01-11T01:26:21Z", - "pushed_at": 1651887538, - "git_url": "git://github.com/plazi/treatments-rdf.git", - "ssh_url": "git@github.com:plazi/treatments-rdf.git", - "clone_url": "https://github.com/plazi/treatments-rdf.git", - "svn_url": "https://github.com/plazi/treatments-rdf", - "homepage": null, - "size": 1221326, - "stargazers_count": 0, - "watchers_count": 0, - "language": null, - "has_issues": true, - "has_projects": true, - "has_downloads": true, - "has_wiki": true, - "has_pages": false, - "forks_count": 1, - "mirror_url": null, - "archived": false, - "disabled": false, - "open_issues_count": 1, - "license": null, - "allow_forking": true, - "is_template": false, - "topics": [ - - ], - "visibility": "public", - "forks": 1, - "open_issues": 1, - "watchers": 0, - "default_branch": "main", - "stargazers": 0, - "master_branch": "main", - "organization": "plazi" - }, - "pusher": { - "name": "nleanba", - "email": "noam@helou.ch" - }, - "organization": { - "login": "plazi", - "id": 3786794, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3ODY3OTQ=", - "url": "https://api.github.com/orgs/plazi", - "repos_url": "https://api.github.com/orgs/plazi/repos", - "events_url": "https://api.github.com/orgs/plazi/events", - "hooks_url": "https://api.github.com/orgs/plazi/hooks", - "issues_url": "https://api.github.com/orgs/plazi/issues", - "members_url": "https://api.github.com/orgs/plazi/members{/member}", - "public_members_url": "https://api.github.com/orgs/plazi/public_members{/member}", - "avatar_url": "https://avatars.githubusercontent.com/u/3786794?v=4", - "description": "Plazi is an association supporting and promoting the development of persistent and openly accessible digital taxonomic literature." - }, - "sender": { - "login": "nleanba", - "id": 25827850, - "node_id": "MDQ6VXNlcjI1ODI3ODUw", - "avatar_url": "https://avatars.githubusercontent.com/u/25827850?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/nleanba", - "html_url": "https://github.com/nleanba", - "followers_url": "https://api.github.com/users/nleanba/followers", - "following_url": "https://api.github.com/users/nleanba/following{/other_user}", - "gists_url": "https://api.github.com/users/nleanba/gists{/gist_id}", - "starred_url": "https://api.github.com/users/nleanba/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/nleanba/subscriptions", - "organizations_url": "https://api.github.com/users/nleanba/orgs", - "repos_url": "https://api.github.com/users/nleanba/repos", - "events_url": "https://api.github.com/users/nleanba/events{/privacy}", - "received_events_url": "https://api.github.com/users/nleanba/received_events", - "type": "User", - "site_admin": false - }, - "created": false, - "deleted": false, - "forced": false, - "base_ref": null, - "compare": "https://github.com/plazi/treatments-rdf/compare/fa509fa1cb71...9ec7f988390a", - "commits": [ - { - "id": "9ec7f988390afbd38b1909476d4aab6700ec7382", - "tree_id": "a8d4d73f8efb715b6feb257514ca2f2c6dce79a8", - "distinct": true, - "message": "committed by action runner plazi/treatments-xml@a04316a23e88eada7534fb5e09d45d30e0fa7d11", - "timestamp": "2022-05-07T01:38:54Z", - "url": "https://github.com/plazi/treatments-rdf/commit/9ec7f988390afbd38b1909476d4aab6700ec7382", - "author": { - "name": "gsautter", - "email": "gsautter@users.noreply.github.com", - "username": "gsautter" - }, - "committer": { - "name": "gsautter", - "email": "gsautter@users.noreply.github.com", - "username": "gsautter" - }, - "added": [ - - ], - "removed": [ - - ], - "modified": [ - "data/9B/48/47/9B48474CE8148E14FCD2999E1905E9CD.ttl" - ] - } - ], - "head_commit": { - "id": "9ec7f988390afbd38b1909476d4aab6700ec7382", - "tree_id": "a8d4d73f8efb715b6feb257514ca2f2c6dce79a8", - "distinct": true, - "message": "committed by action runner plazi/treatments-xml@a04316a23e88eada7534fb5e09d45d30e0fa7d11", - "timestamp": "2022-05-07T01:38:54Z", - "url": "https://github.com/plazi/treatments-rdf/commit/9ec7f988390afbd38b1909476d4aab6700ec7382", - "author": { - "name": "gsautter", - "email": "gsautter@users.noreply.github.com", - "username": "gsautter" - }, - "committer": { - "name": "gsautter", - "email": "gsautter@users.noreply.github.com", - "username": "gsautter" - }, - "added": [ - - ], - "removed": [ - - ], - "modified": [ - "data/9B/48/47/9B48474CE8148E14FCD2999E1905E9CD.ttl" - ] - } -}`, -}); - -console.log(response); -console.log(await response.text()); diff --git a/src/deps.ts b/src/deps.ts index 1919fd8..72d659e 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,6 +1,6 @@ export { - Server, - Status, - STATUS_TEXT, -} from "https://deno.land/std@0.141.0/http/mod.ts"; -export { serveDir } from "https://deno.land/std@0.141.0/http/file_server.ts"; + type Config, + GHActServer, + GHActWorker, + type Job, +} from "https://deno.land/x/ghact@1.2.1/mod.ts"; diff --git a/src/main.ts b/src/main.ts index 5030349..e07d1a0 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,167 +1,8 @@ -import { serveDir, Server, Status, STATUS_TEXT } from "./deps.ts"; -import { config } from "../config/config.ts"; - -// Incomplete, only what we need -type webhookPayload = { - repository: { - full_name: string; - }; - commits: { - added: string[]; - removed: string[]; - modified: string[]; - }[]; -}; - -const emptyDatadir = async () => { - await Deno.remove("workdir/repo", { recursive: true }); -}; - -const cloneRepo = async () => { - console.warn("cloning repo (this WILL take some time)"); - const p = Deno.run({ - cmd: [ - "git", - "clone", - "--depth", - "1", - config.repositoryUri, - "repo", - ], - cwd: "workdir", - }); - const status = await p.status(); - if (!status.success) { - throw new Error("Bad, really bad"); - } -}; - -const updateLocalData = async () => { - await Deno.mkdir("workdir/repo/.git", { recursive: true }); - const p = Deno.run({ - cmd: ["git", "pull"], - env: { - GIT_CEILING_DIRECTORIES: Deno.cwd(), - }, - cwd: "workdir/repo", - }); - const status = await p.status(); - if (!status.success) { - await emptyDatadir(); - await cloneRepo(); - } -}; - -const fileUri = (fileName: string) => - ``; - -const graphUri = (fileName: string) => - `<${config.graphUriPrefix}/${fileName.replace(/\.ttl$/, "")}>`; - -const DROP = (fileName: string) => `DROP GRAPH ${graphUri(fileName)}`; - -/* SPARQL A LA (note the .ttl and domains) ` -LOAD INTO GRAPH -` */ -const LOAD = (fileName: string) => - `LOAD ${fileUri(fileName)} INTO GRAPH ${graphUri(fileName)}`; - -const UPDATE = (fileName: string) => `${DROP(fileName)}; ${LOAD(fileName)}`; - -const webhookHandler = async (request: Request) => { - const pathname = new URL(request.url).pathname; - if (request.method === "POST") { - try { - const json: webhookPayload = await request.json(); - const repoName = json.repository.full_name; - - console.log("· got webhook from", repoName); - if (repoName !== config.repository) { - throw new Error("Wrong Repository"); - } - await updateLocalData(); - const added = json.commits.flatMap((c) => c.added); - const removed = json.commits.flatMap((c) => c.removed); - const modified = json.commits.flatMap((c) => c.modified); - - console.info("> got added ", added); // -> LOAD - console.info("> got removed ", removed); // -> DROP graphname - console.info("> got modified", modified); // DROP; LOAD - - const statements = [ - ...added.map((f) => ({ statement: LOAD(f), fileName: f })), - ...removed.map((f) => ({ statement: DROP(f), fileName: f })), - ...modified.map((f) => ({ statement: UPDATE(f), fileName: f })), - ]; - - console.log("- statement count:", statements.length); - - const failingFiles: string[] = []; - let succeededOnce = false; - - for (const { statement, fileName } of statements) { - console.debug("» handling", fileName); - try { - console.debug(statement); - const response = await fetch(config.uploadUri, { - method: "POST", - body: statement, - headers: { "Content-Type": "application/sparql-update" }, - }); - if (response.ok) { - succeededOnce = true; - console.debug("» success"); - } else { - throw new Error( - `Got ${response.status}:\n` + await response.text(), - ); - } - } catch (error) { - failingFiles.push(fileName); - console.group("» error:"); - console.warn(error); - console.groupEnd(); - } - } - - console.log("< done"); - if (!succeededOnce) { - throw new Error(`All failed:\n ${failingFiles.join("\n ")}`); - } else if (failingFiles.length > 0) { - return new Response(`Some failed:\n ${failingFiles.join("\n ")}`, { - status: 200, - }); - } else { - return new Response(undefined, { status: 204 }); - } - } catch (error) { - return new Response(error, { - status: Status.InternalServerError, - statusText: STATUS_TEXT.get(Status.InternalServerError), - }); - } - } else if (pathname.startsWith("/repo")) { - console.log("· Got file request for", pathname); - const response = await serveDir(request, { - fsRoot: "workdir/repo", - urlRoot: "repo", - }); - response.headers.set("Content-Type", "text/turtle"); - return response; - } else { - console.log("· Got invalid request"); - return new Response(STATUS_TEXT.get(Status.MethodNotAllowed), { - status: Status.MethodNotAllowed, - statusText: STATUS_TEXT.get(Status.MethodNotAllowed), - }); - } -}; - -console.log("updating local data..."); -await updateLocalData(); - -const server = new Server({ handler: webhookHandler }); -const listener = Deno.listen({ port: 4505 }); -console.log(`server listening on http://${Deno.env.get("HOSTNAME")}:4505`); - -await server.serve(listener); +import { GHActServer } from "./deps.ts"; +import { ghActConfig } from "../config/config.ts"; + +const worker = new Worker(import.meta.resolve("./worker.ts"), { + type: "module", +}); +const server = new GHActServer(worker, ghActConfig); +await server.serve(); // defaults to port 4505 diff --git a/src/worker.ts b/src/worker.ts new file mode 100644 index 0000000..2054d03 --- /dev/null +++ b/src/worker.ts @@ -0,0 +1,102 @@ +import { GHActWorker, type Job } from "./deps.ts"; +import { ghActConfig, sparqlConfig } from "../config/config.ts"; + +const fileUri = (fileName: string) => + ``; + +const graphUri = (fileName: string) => + `<${sparqlConfig.graphUriPrefix}/${fileName.replace(/\.ttl$/, "")}>`; + +const DROP = (fileName: string) => `DROP GRAPH ${graphUri(fileName)}`; + +/* SPARQL A LA (note the .ttl and domains) ` +LOAD INTO GRAPH +` */ +const LOAD = (fileName: string) => + `LOAD ${fileUri(fileName)} INTO GRAPH ${graphUri(fileName)}`; + +const UPDATE = (fileName: string) => `${DROP(fileName)}; ${LOAD(fileName)}`; + +const _worker = new GHActWorker( + self, + ghActConfig, + async (job: Job, log): Promise => { + log( + "Starting transformation\n" + JSON.stringify(job, undefined, 2), + ); + + let added: string[] = []; + let modified: string[] = []; + let removed: string[] = []; + + if ("files" in job) { + modified = job.files.modified ?? []; + if ("added" in job.files) added = job.files.added; + if ("removed" in job.files) removed = job.files.removed; + } else if (job.from) { + const files = await _worker.gitRepository.getModifiedAfter( + job.from, + job.till, + log, + ); + added = files.added; + modified = files.modified; + removed = files.removed; + if (files.till && files.till !== "HEAD") { + job.till = files.till; + } + } else { + throw new Error( + "Could not start job, neither explicit file list nor from-commit specified", + ); + } + + log(`> got added ${added}`); // -> LOAD + log(`> got removed ${removed}`); // -> DROP graphname + log(`> got modified ${modified}`); // DROP; LOAD + + const statements = [ + ...added.map((f) => ({ statement: LOAD(f), fileName: f })), + ...removed.map((f) => ({ statement: DROP(f), fileName: f })), + ...modified.map((f) => ({ statement: UPDATE(f), fileName: f })), + ]; + + log(`- statement count: ${statements.length}`); + + const failingFiles: string[] = []; + let succeededOnce = false; + + for (const { statement, fileName } of statements) { + log(`» handling ${fileName}\n ${statement}`); + try { + const response = await fetch(sparqlConfig.uploadUri, { + method: "POST", + body: statement, + headers: { "Content-Type": "application/sparql-update" }, + }); + if (response.ok) { + succeededOnce = true; + log("» success"); + } else { + throw new Error( + `Got ${response.status}:\n` + await response.text(), + ); + } + } catch (error) { + failingFiles.push(fileName); + log(" » error:"); + log("" + error); + } + } + + log("< done"); + if (!succeededOnce) { + log(`All failed:\n ${failingFiles.join("\n ")}`); + throw new Error(`All failed`); + } else if (failingFiles.length > 0) { + log(`Some failed:\n ${failingFiles.join("\n ")}`); + } else { + log("All succeeded"); + } + }, +);