diff --git a/src/middleware/packages/auth/services/account.js b/src/middleware/packages/auth/services/account.js index ac330c643..76eef17e6 100644 --- a/src/middleware/packages/auth/services/account.js +++ b/src/middleware/packages/auth/services/account.js @@ -88,6 +88,12 @@ module.exports = { const accounts = await this._find(ctx, { query: { email } }); return accounts.length > 0; }, + /** Overwrite find method, to filter accounts with tombstone. */ + async find(ctx) { + /** @type {object[]} */ + const accounts = await this._find(ctx, ctx.params); + return accounts.filter(account => !account.deletedAt); + }, async findByUsername(ctx) { const { username } = ctx.params; const accounts = await this._find(ctx, { query: { username } }); @@ -184,6 +190,34 @@ module.exports = { '@id': account['@id'], ...params }); + }, + async deleteByWebId(ctx) { + const { webId } = ctx.params; + const account = await ctx.call('auth.account.findByWebId', { webId }); + + if (account) { + await this._remove(ctx, { id: account['@id'] }); + return true; + } + + return false; + }, + // Remove email and password from an account, set deletedAt timestamp. + async setTombstone(ctx) { + const { webId } = ctx.params; + const account = await ctx.call('auth.account.findByWebId', { webId }); + + return await this._update(ctx, { + // Set all values to undefined... + ...Object.fromEntries(Object.keys(account).map(key => [key, null])), + '@id': account['@id'], + // ...except for + webId: account.webId, + username: account.username, + podUri: account.podUri, + // And add a deletedAt date. + deletedAt: new Date().toISOString() + }); } }, methods: { diff --git a/src/middleware/packages/backup/index.js b/src/middleware/packages/backup/index.js index 32d945151..9560d5581 100644 --- a/src/middleware/packages/backup/index.js +++ b/src/middleware/packages/backup/index.js @@ -1,7 +1,11 @@ const { CronJob } = require('cron'); +const fs = require('fs'); +const pathJoin = require('path').join; const fsCopy = require('./utils/fsCopy'); const ftpCopy = require('./utils/ftpCopy'); const rsyncCopy = require('./utils/rsyncCopy'); +const ftpRemove = require('./utils/ftpRemove'); +const fsRemove = require('./utils/fsRemove'); /** * @typedef {import('moleculer').Context} Context */ @@ -10,7 +14,7 @@ const BackupService = { name: 'backup', settings: { localServer: { - fusekiBackupsPath: null, + fusekiBase: null, otherDirsPaths: {} }, copyMethod: 'rsync', // rsync, ftp, or fs @@ -29,11 +33,18 @@ const BackupService = { }, dependencies: ['triplestore'], started() { - const { cronJob } = this.settings; + const { + cronJob, + localServer: { fusekiBase } + } = this.settings; if (cronJob.time) { this.cronJob = new CronJob(cronJob.time, this.actions.backupAll, null, true, cronJob.timeZone); } + + if (!fusekiBase) { + throw new Error('Backup service requires `localServer.fusekiBase` setting to be set to the FUSEKI_BASE path.'); + } }, actions: { async backupAll(ctx) { @@ -41,13 +52,6 @@ const BackupService = { await this.actions.backupOtherDirs({}, { parentCtx: ctx }); }, async backupDatasets(ctx) { - const { fusekiBackupsPath } = this.settings.localServer; - - if (!fusekiBackupsPath) { - this.logger.info('No fusekiBackupsPath defined, skipping backup...'); - return; - } - // Generate a new backup of all datasets const datasets = await ctx.call('triplestore.dataset.list'); for (const dataset of datasets) { @@ -55,7 +59,10 @@ const BackupService = { await ctx.call('triplestore.dataset.backup', { dataset }); } - await this.actions.copyToRemoteServer({ path: fusekiBackupsPath, subDir: 'datasets' }, { parentCtx: ctx }); + await this.actions.copyToRemoteServer( + { path: pathJoin(this.settings.localServer.fusekiBase, 'backups'), subDir: 'datasets' }, + { parentCtx: ctx } + ); }, async backupOtherDirs(ctx) { const { otherDirsPaths } = this.settings.localServer; @@ -96,6 +103,62 @@ const BackupService = { default: throw new Error(`Unknown copy method: ${copyMethod}`); } + }, + deleteDataset: { + params: { + dataset: { type: 'string' }, + iKnowWhatImDoing: { type: 'boolean' } + }, + async handler(ctx) { + const { dataset, iKnowWhatImDoing } = ctx.params; + const { + copyMethod, + remoteServer, + localServer: { fusekiBase } + } = this.settings; + if (!iKnowWhatImDoing) { + throw new Error( + 'Please confirm that you know what you are doing and set the `iKnowWhatImDoing` parameter to `true`.' + ); + } + + const deleteFilenames = await ctx.call('backup.listBackupsForDataset', { dataset }); + + // Delete all backups locally. + await Promise.all(deleteFilenames.map(file => fs.promises.rm(file))); + + // Delete backups from remote.fusekiBase + switch (copyMethod) { + case 'rsync': + // The last param sets the --deletion argument, to sync deletions too. + await rsyncCopy(pathJoin(fusekiBase, 'backups'), 'datasets', remoteServer, true); + break; + + case 'ftp': + await ftpRemove(deleteFilenames, remoteServer); + break; + + case 'fs': + await fsRemove(deleteFilenames, 'datasets', remoteServer); + break; + + default: + throw new Error(`Unknown copy method: ${copyMethod}`); + } + } + }, + /** Returns an array of file paths to the backups relative to `this.settings.localServer.fusekiBase`. */ + async listBackupsForDataset(ctx) { + const { dataset } = ctx.params; + + // File format: _ + const backupsPattern = RegExp(`^${dataset}_.{10}_.{8}\\.nq\\.gz$`); + const filenames = await fs.promises + .readdir(pathJoin(this.settings.localServer.fusekiBase, 'backups')) + .then(files => files.filter(file => backupsPattern.test(file))) + .then(files => files.map(file => pathJoin(this.settings.localServer.fusekiBase, 'backups', file))); + + return filenames; } } }; diff --git a/src/middleware/packages/backup/indexTypes.d.ts b/src/middleware/packages/backup/indexTypes.d.ts index 81e1f94f8..200316275 100644 --- a/src/middleware/packages/backup/indexTypes.d.ts +++ b/src/middleware/packages/backup/indexTypes.d.ts @@ -2,7 +2,7 @@ import { Context, ServiceSchema, CallingOptions } from 'moleculer'; import { CronJob } from 'cron'; interface LocalServerSettings { - fusekiBackupsPath: string | null; + fusekiBase: string | null; otherDirsPaths: Record; } diff --git a/src/middleware/packages/backup/utils/fsRemove.js b/src/middleware/packages/backup/utils/fsRemove.js new file mode 100644 index 000000000..7a4315d0d --- /dev/null +++ b/src/middleware/packages/backup/utils/fsRemove.js @@ -0,0 +1,12 @@ +const fs = require('fs'); +const { join: pathJoin } = require('path'); + +const fsRemove = async (removeFiles, subDir, remoteServer) => { + await Promise.all( + removeFiles + .map(file => pathJoin(remoteServer.path, subDir, file)) + .map(file => fs.promises.rm(file, { force: true })) + ); +}; + +module.exports = fsRemove; diff --git a/src/middleware/packages/backup/utils/ftpRemove.js b/src/middleware/packages/backup/utils/ftpRemove.js new file mode 100644 index 000000000..6176a13ba --- /dev/null +++ b/src/middleware/packages/backup/utils/ftpRemove.js @@ -0,0 +1,24 @@ +const Client = require('ssh2-sftp-client'); +const { join: pathJoin } = require('path'); + +const ftpRemove = (removeFiles, remoteServer) => { + return new Promise((resolve, reject) => { + const sftp = new Client(); + sftp + .connect({ + host: remoteServer.host, + port: remoteServer.port, + username: remoteServer.user, + password: remoteServer.password + }) + .then(async () => { + for (const filename of removeFiles) { + await sftp.delete(pathJoin(remoteServer.path, filename), true); + } + resolve(); + }) + .catch(e => reject(e)); + }); +}; + +module.exports = ftpRemove; diff --git a/src/middleware/packages/backup/utils/rsyncCopy.js b/src/middleware/packages/backup/utils/rsyncCopy.js index d9ce46dc8..573075266 100644 --- a/src/middleware/packages/backup/utils/rsyncCopy.js +++ b/src/middleware/packages/backup/utils/rsyncCopy.js @@ -1,7 +1,7 @@ const Rsync = require('rsync'); const { join: pathJoin } = require('path'); -const rsyncCopy = (path, subDir, remoteServer) => { +const rsyncCopy = (path, subDir, remoteServer, syncDelete = false) => { // Setup rsync to remote server const rsync = new Rsync() .flags('arv') @@ -9,6 +9,8 @@ const rsyncCopy = (path, subDir, remoteServer) => { .source(path) .destination(`${remoteServer.user}@${remoteServer.host}:${pathJoin(remoteServer.path, subDir)}`); + if (syncDelete) rsync.set('delete'); + return new Promise((resolve, reject) => { console.log(`Rsync started with command: ${rsync.command()}`); rsync.execute(error => { diff --git a/src/middleware/packages/core/service.js b/src/middleware/packages/core/service.js index 5acefe2f5..a47c59f17 100644 --- a/src/middleware/packages/core/service.js +++ b/src/middleware/packages/core/service.js @@ -35,7 +35,8 @@ const CoreService = { url: undefined, user: undefined, password: undefined, - mainDataset: undefined + mainDataset: undefined, + fusekiBase: undefined }, // Optional containers: undefined, @@ -173,11 +174,7 @@ const CoreService = { this.broker.createService(TripleStoreService, { settings: { - url: triplestore.url, - user: triplestore.user, - password: triplestore.password, - mainDataset: triplestore.mainDataset, - ...this.settings.triplestore + ...triplestore }, async started() { if (triplestore.mainDataset) { diff --git a/src/middleware/packages/middlewares/index.js b/src/middleware/packages/middlewares/index.js index dc0582786..2078e1741 100644 --- a/src/middleware/packages/middlewares/index.js +++ b/src/middleware/packages/middlewares/index.js @@ -47,6 +47,11 @@ const throw403 = msg => { throw new MoleculerError('Forbidden', 403, 'ACCESS_DENIED', { status: 'Forbidden', text: msg }); }; +/** @type {(msg: string) => never} */ +const throw404 = msg => { + throw new MoleculerError('Forbidden', 404, 'NOT_FOUND', { status: 'Not found', text: msg }); +}; + const throw500 = msg => { throw new MoleculerError(msg, 500, 'INTERNAL_SERVER_ERROR', { status: 'Server Error', text: msg }); }; @@ -202,5 +207,6 @@ module.exports = { saveDatasetMeta, throw400, throw403, + throw404, throw500 }; diff --git a/src/middleware/packages/triplestore/service.js b/src/middleware/packages/triplestore/service.js index 65a7d29d9..e08a09b0a 100644 --- a/src/middleware/packages/triplestore/service.js +++ b/src/middleware/packages/triplestore/service.js @@ -19,12 +19,13 @@ const TripleStoreService = { user: null, password: null, mainDataset: null, + fusekiBase: null, // Sub-services customization dataset: {} }, dependencies: ['jsonld'], async created() { - const { url, user, password, dataset } = this.settings; + const { url, user, password, dataset, fusekiBase } = this.settings; this.subservices = {}; if (dataset !== false) { @@ -33,6 +34,7 @@ const TripleStoreService = { url, user, password, + fusekiBase, ...dataset } }); diff --git a/src/middleware/packages/triplestore/subservices/dataset.js b/src/middleware/packages/triplestore/subservices/dataset.js index 3f3ab3685..3f6a06fb8 100644 --- a/src/middleware/packages/triplestore/subservices/dataset.js +++ b/src/middleware/packages/triplestore/subservices/dataset.js @@ -1,17 +1,19 @@ const fetch = require('node-fetch'); -const fsPromises = require('fs').promises; +const fs = require('fs'); const path = require('path'); const urlJoin = require('url-join'); const format = require('string-template'); const delay = t => new Promise(resolve => setTimeout(resolve, t)); +/** @type {import('moleculer').ServiceSchema} */ const DatasetService = { name: 'triplestore.dataset', settings: { url: null, user: null, - password: null + password: null, + fusekiBase: null }, started() { this.headers = { @@ -44,7 +46,7 @@ const DatasetService = { throw new Error(`Error when creating dataset ${dataset}. Its name cannot end with Acl or Mirror`); const templateFilePath = path.join(__dirname, '../templates', secure ? 'secure-dataset.ttl' : 'dataset.ttl'); - const template = await fsPromises.readFile(templateFilePath, 'utf8'); + const template = await fs.promises.readFile(templateFilePath, 'utf8'); const assembler = format(template, { dataset: dataset }); response = await fetch(urlJoin(this.settings.url, '$/datasets'), { method: 'POST', @@ -79,6 +81,15 @@ const DatasetService = { } return []; }, + async isSecure(ctx) { + const { dataset } = ctx.params; + // Check if http://semapps.org/webacl graph exists + return await ctx.call('triplestore.query', { + query: `ASK WHERE { GRAPH { ?s ?p ?o } }`, + dataset, + webId: 'system' + }); + }, async waitForCreation(ctx) { const { dataset } = ctx.params; let datasetExist; @@ -103,6 +114,48 @@ const DatasetService = { task = await response.json(); } } while (!task || !task.finished); + }, + delete: { + params: { + dataset: { type: 'string' }, + iKnowWhatImDoing: { type: 'boolean' } + }, + async handler(ctx) { + const { dataset, iKnowWhatImDoing } = ctx.params; + if (!iKnowWhatImDoing) { + throw new Error('Please confirm that you know what you are doing by setting `iKnowWhatImDoing` to `true`.'); + } + const isSecure = await this.actions.isSecure({ dataset }); + + if (isSecure && !this.settings.fusekiBase) + throw new Error( + 'Please provide the fusekiBase dir setting to the triplestore service, to delete a secure dataset.' + ); + + const response = await fetch(urlJoin(this.settings.url, '$/datasets', dataset), { + method: 'DELETE', + headers: this.headers + }); + if (!response.ok) { + throw new Error(`Failed to delete dataset ${dataset}: ${response.statusText}`); + } + + // If this is a secure dataset, we need to delete stuff manually. + if (isSecure) { + const dbDir = path.join(this.settings.fusekiBase, 'databases', dataset); + const dbAclDir = path.join(this.settings.fusekiBase, 'databases', `${dataset}Acl`); + const dbMirrorDir = path.join(this.settings.fusekiBase, 'databases', `${dataset}Mirror`); + const confFile = path.join(this.settings.fusekiBase, 'configuration', `${dataset}.ttl`); + + // Delete all, if present. + await Promise.all([ + fs.promises.rm(dbDir, { recursive: true, force: true }), + fs.promises.rm(dbAclDir, { recursive: true, force: true }), + fs.promises.rm(dbMirrorDir, { recursive: true, force: true }), + fs.promises.rm(confFile, { force: true }) + ]); + } + } } } }; diff --git a/src/middleware/tests/config.js b/src/middleware/tests/config.js index 9bd0f7d66..cd330e4d4 100644 --- a/src/middleware/tests/config.js +++ b/src/middleware/tests/config.js @@ -9,5 +9,6 @@ module.exports = { SETTINGS_DATASET: process.env.SEMAPPS_SETTINGS_DATASET, JENA_USER: process.env.SEMAPPS_JENA_USER, JENA_PASSWORD: process.env.SEMAPPS_JENA_PASSWORD, - ACTIVATE_CACHE: process.env.SEMAPPS_ACTIVATE_CACHE === 'true' + ACTIVATE_CACHE: process.env.SEMAPPS_ACTIVATE_CACHE === 'true', + FUSEKI_BASE: process.env.FUSEKI_BASE }; diff --git a/website/docs/middleware/backup.md b/website/docs/middleware/backup.md index 133a8ec38..9c1ad4253 100644 --- a/website/docs/middleware/backup.md +++ b/website/docs/middleware/backup.md @@ -4,19 +4,16 @@ title: Backup This service allows you to backup the triples in a given dataset, as well as the uploaded files. - ## Features - Backup Fuseki datasets and uploaded files - Choose copy method: Rsync, FTP or filesystem (copy to another directory) - Setup a cron to automatically launch the rsync operation - ## Dependencies - [TripleStoreService](triplestore) - ## Install ```bash @@ -35,7 +32,6 @@ You will also need to add the remote server domain as a known host, otherwise ss ssh-keyscan REMOTE_SERVER_DOMAIN_NAME >> ~/.ssh/known_hosts ``` - ## Usage ```js @@ -46,7 +42,7 @@ module.exports = { mixins: [BackupService], settings: { localServer: { - fusekiBackupsPath: '/absolute/path/to/fuseki/backups', + fusekiBase: '/absolute/path/to/fuseki-base/', otherDirsPaths: { uploads: path.resolve(__dirname, '../uploads') } @@ -57,8 +53,7 @@ module.exports = { user: 'user', // Required for rsync and ftp password: 'password', // Required for rsync and ftp host: 'remote.server.com', // Required for rsync and ftp - port: null, // Required for ftp - + port: null // Required for ftp }, // Required only if you want to do automatic backups cronJob: { @@ -69,16 +64,15 @@ module.exports = { }; ``` - ## Service settings -| Property | Type | Default | Description | -|----------------|------------|---------|------------------------------------------------------------------------------| -| `localServer` | `[Object]` | | Absolute path to the Fuseki backups and other directories you want to backup | -| `copyMethod` | `[String]` | "rsync" | Copy method ("rsync", "ftp" or "fs") | -| `remoteServer` | `[Object]` | | Information to connect to the remote server (see above) | -| `cronJob` | `[Object]` | | Information for the automatic backups (see above) | - +| Property | Type | Default | Description | +| ---------------------------- | ------------------------ | ------- | ---------------------------------------------------------------------------- | +| `localServer.fusekiBase` | `[String]` | | Absolute path to the Fuseki backups and other directories you want to backup | +| `localServer.otherDirsPaths` | `Record` | | Other directories to back up with the keys as the backup dir names. | +| `copyMethod` | `[String]` | "rsync" | Copy method ("rsync", "ftp" or "fs") | +| `remoteServer` | `[Object]` | | Information to connect to the remote server (see above) | +| `cronJob` | `[Object]` | | Information for the automatic backups (see above) | ## Actions @@ -101,7 +95,8 @@ Copy the other directories defined in the settings with the remote server. Copy the data in the local server to the remote server. ##### Parameters -| Property | Type | Default | Description | -|----------| ---- |--------------------|---------------------------------------------------------| -| `path` | `String` | **required** | Absolute path to be synchronized with the remote server | -| `subDir` | `String` | | Sub-directory in the remote server | + +| Property | Type | Default | Description | +| -------- | -------- | ------------ | ------------------------------------------------------- | +| `path` | `String` | **required** | Absolute path to be synchronized with the remote server | +| `subDir` | `String` | | Sub-directory in the remote server |