From 9e57052382da06d34faccf7424ca388e7800cc9c Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Wed, 26 Jul 2023 12:14:31 +0200 Subject: [PATCH] default prefixes --- README.md | 10 +++++ config/config.json | 70 ++++++++++++++++++++++++---------- lib/mu_search/config_parser.rb | 4 +- lib/mu_search/sparql.rb | 9 +++-- web.rb | 1 + 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index d558049..f6e0c80 100644 --- a/README.md +++ b/README.md @@ -727,6 +727,16 @@ Possible values are `true` and `false`. Defaults to `false`. Note that if set to `true`, the indexes may be out-of-date if data has changed in the application while mu-search was down. +#### Prefixes +You can define default prefixes, e.g: +```json +prefixes: [ + { + "rdfs": "http://data.vlaanderen.be/ns/besluit#", + } +] + +``` #### Eager indexes Configure indexes to be pre-built when the application starts. For each user search profile for which the indexes needs to be prepared, the authorization group names and their corresponding variables needs to be passed. diff --git a/config/config.json b/config/config.json index 6c0b28b..872d30e 100644 --- a/config/config.json +++ b/config/config.json @@ -1,21 +1,48 @@ { "batch_size": 12, "max_batches": 1, - "automatic_index_updates" : true , - "eager_indexing_groups" : [[{"name" : "documents", "variables" : ["human"]}], - [{"name" : "documents", "variables" : ["chicken"]}]], - "attachments_path_base" : "/local/files/directory", - "persist_indexes" : false, - "default_settings" : { + "automatic_index_updates": true, + "eager_indexing_groups": [ + [ + { + "name": "documents", + "variables": [ + "human" + ] + } + ], + [ + { + "name": "documents", + "variables": [ + "chicken" + ] + } + ] + ], + "attachments_path_base": "/local/files/directory", + "persist_indexes": false, + "prefixes": [], + "default_settings": { "analysis": { - "analyzer": { - "dutchanalyzer": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding", "dutchstemmer"] } }, - "filter": { - "dutchstemmer": { - "type": "stemmer", - "name": "dutch" } } } }, + "analyzer": { + "dutchanalyzer": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "asciifolding", + "dutchstemmer" + ] + } + }, + "filter": { + "dutchstemmer": { + "type": "stemmer", + "name": "dutch" + } + } + } + }, "types": [ { "type": "document", @@ -23,13 +50,13 @@ "rdf_type": "http://example.org/Document", "properties": { "title": "http://purl.org/dc/elements/1.1/title", - "description": "http://purl.org/dc/elements/1.1/description" , - "data" : { - "via" : [ + "description": "http://purl.org/dc/elements/1.1/description", + "data": { + "via": [ "http://mu.semte.ch/vocabularies/ext/file", "^http://www.semanticdesktop.org/ontologies/2007/01/19/nie#dataSource" ], - "attachment_pipeline" : "attachment" + "attachment_pipeline": "attachment" } }, "mappings": null @@ -45,8 +72,11 @@ "mappings": null }, { - "type" : "userdoc", - "composite_types": ["document", "user"], + "type": "userdoc", + "composite_types": [ + "document", + "user" + ], "on_path": "userdocs", "properties": [ { diff --git a/lib/mu_search/config_parser.rb b/lib/mu_search/config_parser.rb index 8979549..600148c 100644 --- a/lib/mu_search/config_parser.rb +++ b/lib/mu_search/config_parser.rb @@ -16,7 +16,8 @@ def self.parse(path) eager_indexing_groups: [], update_wait_interval_minutes: 1, number_of_threads: 1, - enable_raw_dsl_endpoint: false + enable_raw_dsl_endpoint: false, + prefixes: [] } json_config = JSON.parse(File.read(path)) @@ -48,6 +49,7 @@ def self.parse(path) config[:eager_indexing_groups] = json_config["eager_indexing_groups"] end + config[:prefixes] = json_config["prefixes"] config[:type_definitions] = Hash[MuSearch::IndexDefinition.from_json_config(json_config["types"])] config end diff --git a/lib/mu_search/sparql.rb b/lib/mu_search/sparql.rb index 6a0e583..0663724 100644 --- a/lib/mu_search/sparql.rb +++ b/lib/mu_search/sparql.rb @@ -3,18 +3,20 @@ module MuSearch module SPARQL class ClientWrapper - def initialize(logger:, sparql_client:, options:) + def initialize(logger:, sparql_client:, options:, prefixes:) @logger = logger @sparql_client = sparql_client @options = options end def query(query_string) + query_string = @prefixes + "\n" + query_string @logger.debug("SPARQL") { "Executing query with #{@options.inspect}\n#{query_string}" } @sparql_client.query query_string, @options end def update(query_string) + query_string = @prefixes + "\n" + query_string @logger.debug("SPARQL") { "Executing update with #{@options.inspect}\n#{query_string}" } @sparql_client.update query_string, @options end @@ -25,7 +27,8 @@ class ConnectionPool # default number of threads to use for indexing and update handling DEFAULT_NUMBER_OF_THREADS = 2 - def initialize(logger:, number_of_threads:) + def initialize(logger:, number_of_threads:, prefixes:) + @prefixes = prefixes.map {|key, value| "prefix #{key}: <#{value}>"} .join("\n") @logger = logger number_of_threads = DEFAULT_NUMBER_OF_THREADS if number_of_threads <= 0 @sparql_connection_pool = ::ConnectionPool.new(size: number_of_threads, timeout: 3) do @@ -106,7 +109,7 @@ def with_sudo(&block) def with_options(sparql_options) @sparql_connection_pool.with do |sparql_client| @logger.debug("SPARQL") { "Claimed connection from pool. There are #{@sparql_connection_pool.available} connections left" } - client_wrapper = ClientWrapper.new(logger: @logger, sparql_client: sparql_client, options: sparql_options) + client_wrapper = ClientWrapper.new(logger: @logger, sparql_client: sparql_client, options: sparql_options, prefixes: @prefixes) yield client_wrapper end end diff --git a/web.rb b/web.rb index 68c8afa..bb54281 100644 --- a/web.rb +++ b/web.rb @@ -119,6 +119,7 @@ def setup_delta_handling(index_manager, elasticsearch, tika, sparql_connection_p sparql_connection_pool = MuSearch::SPARQL::ConnectionPool.new( number_of_threads: configuration[:number_of_threads], + prefixes: configuration[:prefixes], logger: SinatraTemplate::Utils.log )