diff --git a/scripts/config.json b/scripts/config.json index 87c518e..87a15fd 100644 --- a/scripts/config.json +++ b/scripts/config.json @@ -1,6 +1,38 @@ { "version": "0.1", "scripts": [ + { + "documentation": { + "command": "data-diff", + "description": "A data diff will be taken of the data resulting from a grep on each file. Parameters:\n config: the config file to use\n", + "arguments": ["config"] + }, + "environment": { + "image": "ubuntu", + "interactive": false, + "script": "virtuoso/data-diff.sh", + "join_networks": true + }, + "mounts": { + "app": "/project/" + } + }, + { + "documentation": { + "command": "dump-database", + "description": "A virtuoso dump will be created db/dumpss.\n Parameters:\n hostname: default triplestore\n username: default dba\n password: default dba", + "arguments": ["hostname", "username", "password"] + }, + "environment": { + "image": "redpencil/virtuoso", + "interactive": false, + "script": "virtuoso/dump-database.sh", + "join_networks": true + }, + "mounts": { + "app": "/project/" + } + }, { "documentation": { "command": "create-backup", diff --git a/scripts/virtuoso/data-diff-config-example.json b/scripts/virtuoso/data-diff-config-example.json new file mode 100644 index 0000000..a188487 --- /dev/null +++ b/scripts/virtuoso/data-diff-config-example.json @@ -0,0 +1,7 @@ +{ "source": "/project/data/file1.nq", + "target": "/project/data/file2.nq", + "graphs": [ + "http://mu.semte.ch/application" + ], + "graphRegexes": ["http://mu.semte.ch/vocabularies/ext/tabId"] +} diff --git a/scripts/virtuoso/data-diff.sh b/scripts/virtuoso/data-diff.sh new file mode 100755 index 0000000..dd54c90 --- /dev/null +++ b/scripts/virtuoso/data-diff.sh @@ -0,0 +1,11 @@ +#!/bin/bash +apt-get update > /dev/null +apt-get -y install jq python3 > /dev/null + +config=$1 +source=$(jq -r ".source" $config) +target=$(jq -r ".target" $config) + +command=$(python3 generate-datadiff.py $config) + +diff <(cat $source | eval $command | sort) <(cat $target | eval $command | sort) diff --git a/scripts/virtuoso/dump-database.sh b/scripts/virtuoso/dump-database.sh new file mode 100755 index 0000000..d7ceb2e --- /dev/null +++ b/scripts/virtuoso/dump-database.sh @@ -0,0 +1,80 @@ +#!/bin/bash +USERNAME=${2:-"dba"} +PASSWORD=${3:-"dba"} +TRIPLESTORE=${1:-"triplestore"} + +if [[ "$#" -ge 3 ]]; then + echo "Usage:" + echo " mu script triplestore [hostname] [username] [password]" + exit -1; +fi + +if [[ -d "/project/data/db" ]];then + mkdir -p /project/data/db/dumps +else + echo "WARNING:" + echo " did not find data/db folder in your project, so did not create data/db/dumps!" + echo " " +fi + + +echo "connecting to $TRIPLESTORE with $USERNAME" +isql-v -H $TRIPLESTORE -U $USERNAME -P $PASSWORD <= file_length_limit) + { + file_name := sprintf ('%s/output%06d.nq', dir, inx); + string_to_file (file_name, ses, -2); + IF (comp) + { + gz_compress_file (file_name, file_name||'.gz'); + file_delete (file_name); + } + inx := inx + 1; + env := vector (0,0,0); + ses := string_output (10000000); + } + next:; + } + IF (length (ses)) + { + file_name := sprintf ('%s/output%06d.nq', dir, inx); + string_to_file (file_name, ses, -2); + IF (comp) + { + gz_compress_file (file_name, file_name||'.gz'); + file_delete (file_name); + } + inx := inx + 1; + env := vector (0,0,0); + } +} +; + dump_nquads ('dumps', 1, 100000000, 1); + exit; +EOF +gunzip /project/data/db/dumps/*.gz +cat /project/data/db/dumps/* > /project/data/dumped-quads.nq diff --git a/scripts/virtuoso/generate-datadiff.py b/scripts/virtuoso/generate-datadiff.py new file mode 100644 index 0000000..7429924 --- /dev/null +++ b/scripts/virtuoso/generate-datadiff.py @@ -0,0 +1,14 @@ +import sys +import json + +config_file = sys.argv[1] +config = {} +with open(config_file) as f: + config = json.load(f) + +graph_regex = "<{}> .$".format("|".join(config['graphs'])) +grep_commands = f'egrep "{graph_regex}"' +for regex in config['graphRegexes']: + grep_commands += f'| egrep "{regex}"' + +print(grep_commands)