From 97b64ea354281cc2867042480ff78a9c7315fc73 Mon Sep 17 00:00:00 2001 From: Alfred Rubin Date: Wed, 25 Oct 2023 11:48:02 +0200 Subject: [PATCH 01/14] pushing fix to edge case with only one subject that wasn't being flushed --- rdflib_neo4j/Neo4jStore.py | 35 ++++++++++++++++++----------------- setup.py | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/rdflib_neo4j/Neo4jStore.py b/rdflib_neo4j/Neo4jStore.py index 050fc79..e6ccb81 100644 --- a/rdflib_neo4j/Neo4jStore.py +++ b/rdflib_neo4j/Neo4jStore.py @@ -49,6 +49,22 @@ def open(self, configuration, create=True): self.__constraint_check(create) self.__set_open(True) + def close(self, commit_pending_transaction=True): + """ + Closes the store. + + Args: + commit_pending_transaction (bool): Flag indicating whether to commit any pending transaction before closing. + """ + if commit_pending_transaction: + self.commit(commit_nodes=True) + self.commit(commit_rels=True) + self.session.close() + self.driver.close() + self.__set_open(False) + print(f"IMPORTED {self.total_triples} TRIPLES") + self.total_triples=0 + def is_open(self): """ Checks if the store is open. @@ -108,23 +124,6 @@ def commit(self, commit_nodes=False, commit_rels=False): def remove(self, triple, context=None, txn=None): raise NotImplemented("This is a streamer so it doesn't preserve the state, there is no removal feature.") - def close(self, commit_pending_transaction=True): - """ - Closes the store. - - Args: - commit_pending_transaction (bool): Flag indicating whether to commit any pending transaction before closing. - """ - if commit_pending_transaction: - if self.node_buffer_size > 0: - self.commit(commit_nodes=True) - if self.rel_buffer_size > 0: - self.commit(commit_rels=True) - self.session.close() - self.driver.close() - self.__set_open(False) - print(f"IMPORTED {self.total_triples} TRIPLES") - def __close_on_error(self): """ Empties the query buffers in case of an error. @@ -144,6 +143,8 @@ def __set_open(self, val: bool): val (bool): The value to set for the 'open' status. """ self.__open = val + print(f"The store is now: {'Open' if self.__open else 'Closed'}") + def __create_session(self): """ diff --git a/setup.py b/setup.py index 081703b..a41389a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ if __name__ == "__main__": setup( name="rdflib-neo4j", - version="1.0", + version="1.0.1", author="Jesús Barrasa, Aleksandar Simeunovic, Alfredo Rubin", author_email="jbarrasa@outlook.com, aleksandar.simeunovic@neo4j.com, alfredo.rubin@neo4j.com", description="RDFLib Store backed by neo4j", From d988d20b92a9eb45566c8a205a8ac6534d0b9379 Mon Sep 17 00:00:00 2001 From: aleksandarneo4j Date: Thu, 26 Oct 2023 11:30:47 +0200 Subject: [PATCH 02/14] added a new noteboook with the demo from the Nodes session --- examples/NodesDemo.ipynb | 156 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 examples/NodesDemo.ipynb diff --git a/examples/NodesDemo.ipynb b/examples/NodesDemo.ipynb new file mode 100644 index 0000000..450642d --- /dev/null +++ b/examples/NodesDemo.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install rdflib-neo4j (do not forget to add the Unqiueness Constraint on the (r:Resource {r.uri}) property))\n", + "!pip install rdflib-neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY\n", + "from rdflib import Namespace, Graph, URIRef, RDF, SKOS, Literal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define your custom prefixes\n", + "prefixes = {\n", + " 'neo4ind': Namespace('http://neo4j.org/ind#'),\n", + " 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'),\n", + "}\n", + "\n", + "# Neo4j connection credentials\n", + "auth_data = {'uri': 'your_neo4j_uri',\n", + " 'database': 'neo4j',\n", + " 'user': \"neo4j\",\n", + " 'pwd': 'password'}\n", + "\n", + "# Define your Neo4jStoreConfig\n", + "config = Neo4jStoreConfig(auth_data=auth_data,\n", + " custom_prefixes=prefixes,\n", + " handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE,\n", + " batching=False)\n", + "\n", + "neo4j_store = Neo4jStore(config=config)\n", + "graph_store = Graph(store=neo4j_store)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import by reference, passing a url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl'\n", + "graph_store.parse(file_path,format=\"ttl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Writing individual triples using add" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "aura = URIRef(\"http://neo4j.com/voc/tech#AuraDB\")\n", + "\n", + "graph_store.add((aura, RDF.type, SKOS.Concept))\n", + "graph_store.add((aura, SKOS.prefLabel, Literal(\"AuraDB\")))\n", + "graph_store.add((aura, SKOS.broader, URIRef(\"http://neo4j.org/ind#neo4j355\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import the RDF generated by a SPARQL query + Batching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import urllib.parse\n", + "\n", + "endpoint = \"https://id.nlm.nih.gov/mesh/sparql\"\n", + "sparql = \"\"\"\n", + "PREFIX rdfs: \n", + "PREFIX meshv: \n", + "PREFIX mesh: \n", + "PREFIX rdf: \n", + "\n", + "CONSTRUCT { ?s ?p ?o }\n", + "FROM \n", + "WHERE {\n", + " {\n", + " ?s ?p ?o\n", + " filter(?s = mesh:D000086402 || ?o = mesh:D000086402)\n", + " }\n", + " union\n", + " {\n", + " mesh:D000086402 ?x ?s .\n", + " ?s ?p ?o .\n", + " filter(?x != rdf:type && (isLiteral(?o) || ?p = rdf:type))\n", + " }\n", + " union\n", + " {\n", + " ?s ?x mesh:D000086402 .\n", + " ?s ?p ?o .\n", + " filter(isLiteral(?o|| ?p = rdf:type))\n", + " }\n", + "}\n", + "\"\"\"\n", + "\n", + "# Define your Neo4jStoreConfig\n", + "config = Neo4jStoreConfig(auth_data=auth_data,\n", + " custom_prefixes=prefixes,\n", + " handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE,\n", + " batching=True)\n", + "\n", + "neo4j_store = Neo4jStore(config=config)\n", + "graph_store = Graph(store=neo4j_store)\n", + "\n", + "query_response = requests.get(endpoint, params = {\"query\": sparql , \"format\" : \"TURTLE\"})\n", + "graph_store.parse(data=query_response.text,format='ttl')\n", + "graph_store.close(commit_pending_transaction=True)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 78d9e7e33732591489e0e879271a3756641c1f6d Mon Sep 17 00:00:00 2001 From: aleksandarneo4j Date: Thu, 26 Oct 2023 11:32:27 +0200 Subject: [PATCH 03/14] changed the file name --- examples/{NodesDemo.ipynb => nodes_demo.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/{NodesDemo.ipynb => nodes_demo.ipynb} (100%) diff --git a/examples/NodesDemo.ipynb b/examples/nodes_demo.ipynb similarity index 100% rename from examples/NodesDemo.ipynb rename to examples/nodes_demo.ipynb From f9398e66d0200521258cadd54c03af95341b1ad3 Mon Sep 17 00:00:00 2001 From: aleksandarneo4j Date: Thu, 26 Oct 2023 11:35:13 +0200 Subject: [PATCH 04/14] small change for the password --- examples/nodes_demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/nodes_demo.ipynb b/examples/nodes_demo.ipynb index 450642d..dea5779 100644 --- a/examples/nodes_demo.ipynb +++ b/examples/nodes_demo.ipynb @@ -36,7 +36,7 @@ "auth_data = {'uri': 'your_neo4j_uri',\n", " 'database': 'neo4j',\n", " 'user': \"neo4j\",\n", - " 'pwd': 'password'}\n", + " 'pwd': 'your_password'}\n", "\n", "# Define your Neo4jStoreConfig\n", "config = Neo4jStoreConfig(auth_data=auth_data,\n", From 237436ac6f95fae66edf5cb5b9f016c4e47e1657 Mon Sep 17 00:00:00 2001 From: JB Date: Mon, 22 Jan 2024 01:56:29 +0000 Subject: [PATCH 05/14] set store as context-aware --- rdflib_neo4j/Neo4jStore.py | 2 ++ test/integration/handle_vocab_uri_test.py | 29 +++++++++++++++++++++++ test/test_files/n10s_example.json | 1 + 3 files changed, 32 insertions(+) create mode 100644 test/test_files/n10s_example.json diff --git a/rdflib_neo4j/Neo4jStore.py b/rdflib_neo4j/Neo4jStore.py index e6ccb81..6cca1c9 100644 --- a/rdflib_neo4j/Neo4jStore.py +++ b/rdflib_neo4j/Neo4jStore.py @@ -15,6 +15,8 @@ class Neo4jStore(Store): + context_aware = True + def __init__(self, config: Neo4jStoreConfig): self.__open = False self.driver = None diff --git a/test/integration/handle_vocab_uri_test.py b/test/integration/handle_vocab_uri_test.py index 11f33d3..5792aa6 100644 --- a/test/integration/handle_vocab_uri_test.py +++ b/test/integration/handle_vocab_uri_test.py @@ -130,3 +130,32 @@ def test_ignore_strategy(neo4j_container, neo4j_driver): assert len(rels_from_rdflib) == len(rels) for i in range(len(rels)): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) + + +def test_ignore_strategy_on_json_ld_file(neo4j_container, neo4j_driver): + auth_data = get_credentials(LOCAL, neo4j_container) + + # Define your prefixes + prefixes = { + 'neo4ind': Namespace('http://neo4j.org/ind#'), + } + + # Define your custom mappings + custom_mappings = [] + + multival_props_names = [] + + config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + custom_mappings=custom_mappings, + multival_props_names=multival_props_names, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.SHORTEN, + batching=False) + + graph_store = Graph(store=Neo4jStore(config=config)) + + try: + graph_store.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../test_files/n10s_example.json")) + except Exception as e: + assert isinstance(e, ShortenStrictException) + assert True \ No newline at end of file diff --git a/test/test_files/n10s_example.json b/test/test_files/n10s_example.json new file mode 100644 index 0000000..ee0f23e --- /dev/null +++ b/test/test_files/n10s_example.json @@ -0,0 +1 @@ +[{"@id":"http://neo4j.org/ind#apoc3502","http://neo4j.org/vocab/sw#name":[{"@value":"APOC"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.4"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"05-31-2019"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/ind#graphql3502","http://neo4j.org/vocab/sw#name":[{"@value":"Neo4j-GraphQL"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.3"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"05-05-2019"}],"http://neo4j.org/ind#releaseDate":[{"@value":"05-05-2021"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/ind#mongodb355","http://neo4j.org/vocab/sw#name":[{"@value":"mongodb"}],"@type":["http://neo4j.org/vocab/sw#AwesomePlatform"],"http://neo4j.org/vocab/sw#author":[{"@value":"JB"},{"@value":"Jesus Barrasa"}],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.5"}]},{"@id":"http://neo4j.org/ind#neo4j355","http://neo4j.org/vocab/sw#name":[{"@value":"neo4j"}],"@type":["http://neo4j.org/vocab/sw#GraphPlatform","http://neo4j.org/vocab/sw#AwesomePlatform"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.5"}]},{"@id":"http://neo4j.org/ind#nsmntx3502","http://neo4j.org/vocab/sw#name":[{"@value":"NSMNTX"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.2"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"03-06-2019"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/vocab/sw#AwesomePlatform"},{"@id":"http://neo4j.org/vocab/sw#GraphPlatform"},{"@id":"http://neo4j.org/vocab/sw#Neo4jPlugin"}] From 80b675307751e367682e49033879facb97fa65c9 Mon Sep 17 00:00:00 2001 From: Alfred Rubin Date: Mon, 22 Jan 2024 13:44:04 +0100 Subject: [PATCH 06/14] adding test for file format json-ld --- test/integration/file_format.py | 15 +++ test/integration/handle_vocab_uri_test.py | 2 +- test/integration/utils.py | 10 +- test/test_files/n10s_example.json | 136 +++++++++++++++++++++- test/test_files/products.json | 44 +++++++ 5 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 test/integration/file_format.py create mode 100644 test/test_files/products.json diff --git a/test/integration/file_format.py b/test/integration/file_format.py new file mode 100644 index 0000000..0837fe3 --- /dev/null +++ b/test/integration/file_format.py @@ -0,0 +1,15 @@ +from rdflib import Graph, Literal, RDF, URIRef +from rdflib.namespace import FOAF +from test.integration.constants import GET_DATA_QUERY, RDFLIB_DB +from test.integration.utils import records_equal, read_file_n10s_and_rdflib +import pytest +from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ + cleanup_databases + + +def test_read_json_ld_file(neo4j_driver, graph_store): + """Compare data imported with n10s procs and n10s + rdflib in single add mode""" + records_from_rdf_lib, records, _, _ = read_file_n10s_and_rdflib(neo4j_driver, graph_store,file_path="../test_files/n10s_example.json",n10s_file_format="'JSON-LD'",rdflib_file_format="json-ld") + assert len(records_from_rdf_lib) == len(records) + for i in range(len(records)): + assert records_equal(records[i], records_from_rdf_lib[i]) \ No newline at end of file diff --git a/test/integration/handle_vocab_uri_test.py b/test/integration/handle_vocab_uri_test.py index 5792aa6..cb7a2c8 100644 --- a/test/integration/handle_vocab_uri_test.py +++ b/test/integration/handle_vocab_uri_test.py @@ -149,7 +149,7 @@ def test_ignore_strategy_on_json_ld_file(neo4j_container, neo4j_driver): custom_prefixes=prefixes, custom_mappings=custom_mappings, multival_props_names=multival_props_names, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.SHORTEN, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, batching=False) graph_store = Graph(store=Neo4jStore(config=config)) diff --git a/test/integration/utils.py b/test/integration/utils.py index 8b5fcfb..94e7bfb 100644 --- a/test/integration/utils.py +++ b/test/integration/utils.py @@ -39,7 +39,7 @@ def records_equal(record1: Record, record2: Record, rels=False): def read_file_n10s_and_rdflib(neo4j_driver, graph_store, batching=False, n10s_params=None, n10s_mappings=None, - get_rels=False): + get_rels=False, file_path="../test_files/n10s_example.ttl", n10s_file_format="Turtle", rdflib_file_format = "ttl"): """Compare data imported with n10s procs and n10s + rdflib""" if n10s_mappings is None: n10s_mappings = [] @@ -47,19 +47,19 @@ def read_file_n10s_and_rdflib(neo4j_driver, graph_store, batching=False, n10s_pa n10s_params = {"handleVocabUris": "IGNORE"} g = Graph() - g.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../test_files/n10s_example.ttl")) - rdf_payload = g.serialize(format='ttl') + g.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), file_path)) + rdf_payload = g.serialize(format=rdflib_file_format) neo4j_driver.execute_query("CALL n10s.graphconfig.init($params)", params=n10s_params) for (prefix, mapping) in n10s_mappings: neo4j_driver.execute_query(prefix) neo4j_driver.execute_query(mapping) - records = neo4j_driver.execute_query("CALL n10s.rdf.import.inline($payload, 'Turtle')", + records = neo4j_driver.execute_query(f"CALL n10s.rdf.import.inline($payload, {n10s_file_format})", payload=rdf_payload) assert records[0][0]["terminationStatus"] == "OK" - graph_store.parse(data=rdf_payload, format="ttl") + graph_store.parse(data=rdf_payload, format=rdflib_file_format) # When batching we need to close the store to check that all the data is flushed if batching: graph_store.close(True) diff --git a/test/test_files/n10s_example.json b/test/test_files/n10s_example.json index ee0f23e..75ed220 100644 --- a/test/test_files/n10s_example.json +++ b/test/test_files/n10s_example.json @@ -1 +1,135 @@ -[{"@id":"http://neo4j.org/ind#apoc3502","http://neo4j.org/vocab/sw#name":[{"@value":"APOC"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.4"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"05-31-2019"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/ind#graphql3502","http://neo4j.org/vocab/sw#name":[{"@value":"Neo4j-GraphQL"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.3"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"05-05-2019"}],"http://neo4j.org/ind#releaseDate":[{"@value":"05-05-2021"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/ind#mongodb355","http://neo4j.org/vocab/sw#name":[{"@value":"mongodb"}],"@type":["http://neo4j.org/vocab/sw#AwesomePlatform"],"http://neo4j.org/vocab/sw#author":[{"@value":"JB"},{"@value":"Jesus Barrasa"}],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.5"}]},{"@id":"http://neo4j.org/ind#neo4j355","http://neo4j.org/vocab/sw#name":[{"@value":"neo4j"}],"@type":["http://neo4j.org/vocab/sw#GraphPlatform","http://neo4j.org/vocab/sw#AwesomePlatform"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.5"}]},{"@id":"http://neo4j.org/ind#nsmntx3502","http://neo4j.org/vocab/sw#name":[{"@value":"NSMNTX"}],"@type":["http://neo4j.org/vocab/sw#Neo4jPlugin"],"http://neo4j.org/vocab/sw#version":[{"@value":"3.5.0.2"}],"http://neo4j.org/vocab/sw#releaseDate":[{"@value":"03-06-2019"}],"http://neo4j.org/vocab/sw#runsOn":[{"@id":"http://neo4j.org/ind#neo4j355"}]},{"@id":"http://neo4j.org/vocab/sw#AwesomePlatform"},{"@id":"http://neo4j.org/vocab/sw#GraphPlatform"},{"@id":"http://neo4j.org/vocab/sw#Neo4jPlugin"}] +[ + { + "@id":"http://neo4j.org/ind#apoc3502", + "http://neo4j.org/vocab/sw#name":[ + { + "@value":"APOC" + } + ], + "@type":[ + "http://neo4j.org/vocab/sw#Neo4jPlugin" + ], + "http://neo4j.org/vocab/sw#version":[ + { + "@value":"3.5.0.4" + } + ], + "http://neo4j.org/vocab/sw#releaseDate":[ + { + "@value":"05-31-2019" + } + ], + "http://neo4j.org/vocab/sw#runsOn":[ + { + "@id":"http://neo4j.org/ind#neo4j355" + } + ] + }, + { + "@id":"http://neo4j.org/ind#graphql3502", + "http://neo4j.org/vocab/sw#name":[ + { + "@value":"Neo4j-GraphQL" + } + ], + "@type":[ + "http://neo4j.org/vocab/sw#Neo4jPlugin" + ], + "http://neo4j.org/vocab/sw#version":[ + { + "@value":"3.5.0.3" + } + ], + "http://neo4j.org/vocab/sw#releaseDate":[ + { + "@value":"05-05-2019" + } + ], + "http://neo4j.org/ind#releaseDate":[ + { + "@value":"05-05-2021" + } + ], + "http://neo4j.org/vocab/sw#runsOn":[ + { + "@id":"http://neo4j.org/ind#neo4j355" + } + ] + }, + { + "@id":"http://neo4j.org/ind#mongodb355", + "http://neo4j.org/vocab/sw#name":[ + { + "@value":"mongodb" + } + ], + "@type":[ + "http://neo4j.org/vocab/sw#AwesomePlatform" + ], + "http://neo4j.org/vocab/sw#author":[ + { + "@value":"JB" + }, + { + "@value":"Jesus Barrasa" + } + ], + "http://neo4j.org/vocab/sw#version":[ + { + "@value":"3.5.5" + } + ] + }, + { + "@id":"http://neo4j.org/ind#neo4j355", + "http://neo4j.org/vocab/sw#name":[ + { + "@value":"neo4j" + } + ], + "@type":[ + "http://neo4j.org/vocab/sw#GraphPlatform", + "http://neo4j.org/vocab/sw#AwesomePlatform" + ], + "http://neo4j.org/vocab/sw#version":[ + { + "@value":"3.5.5" + } + ] + }, + { + "@id":"http://neo4j.org/ind#nsmntx3502", + "http://neo4j.org/vocab/sw#name":[ + { + "@value":"NSMNTX" + } + ], + "@type":[ + "http://neo4j.org/vocab/sw#Neo4jPlugin" + ], + "http://neo4j.org/vocab/sw#version":[ + { + "@value":"3.5.0.2" + } + ], + "http://neo4j.org/vocab/sw#releaseDate":[ + { + "@value":"03-06-2019" + } + ], + "http://neo4j.org/vocab/sw#runsOn":[ + { + "@id":"http://neo4j.org/ind#neo4j355" + } + ] + }, + { + "@id":"http://neo4j.org/vocab/sw#AwesomePlatform" + }, + { + "@id":"http://neo4j.org/vocab/sw#GraphPlatform" + }, + { + "@id":"http://neo4j.org/vocab/sw#Neo4jPlugin" + } +] \ No newline at end of file diff --git a/test/test_files/products.json b/test/test_files/products.json new file mode 100644 index 0000000..9129e78 --- /dev/null +++ b/test/test_files/products.json @@ -0,0 +1,44 @@ +{ + "@context":"http://schema.org/", + "@type":"Product", + "url":"https://www.edeka24.de/Wein-Spirituosen/Spirituosen/Vodka/Danzka-Premium-Vodka-Grapefruit-0-7L.html", + "gtin":"5700351851003", + "name":"Danzka Premium Vodka Grapefruit 0,7L", + "description":"EDEKA24 - Lebensmittel Onlineshop - Danzka - Danzka Premium Vodka Grapefruit 0,7L - online kaufen | Schnelle Lieferung mit DHL", + "image":[ + { + "@type":"MediaObject", + "url":"https://www.edeka24.de/out/pictures/generated/product/1/540_540_90/danzka-vodka-grapafruit_1000.jpg", + "width":{ + "@type":"QuantitativeValue", + "value":1000, + "unitCode":"E37" + }, + "height":{ + "@type":"QuantitativeValue", + "value":1000, + "unitCode":"E37" + } + } + ], + "offer":[ + { + "@type":"Offer", + "url":"https://www.edeka24.de/Wein-Spirituosen/Spirituosen/Vodka/Danzka-Premium-Vodka-Grapefruit-0-7L.html", + "sku":"1194795008", + "priceSpecification":[ + { + "@type":"PriceSpecification", + "price":"14.99", + "priceCurrency":"EUR" + } + ], + "seller":{ + "@type":"Organization", + "@id":"https://www.edeka24.de/", + "url":"https://www.edeka24.de/", + "legalName":"neukauf markt GmbH" + } + } + ] +} \ No newline at end of file From a08adf28cffb9ca7504c70cda826171fff99f522 Mon Sep 17 00:00:00 2001 From: Alfred Rubin Date: Mon, 22 Jan 2024 14:06:55 +0100 Subject: [PATCH 07/14] fixing tests --- test/integration/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/utils.py b/test/integration/utils.py index 94e7bfb..d659dc3 100644 --- a/test/integration/utils.py +++ b/test/integration/utils.py @@ -39,7 +39,7 @@ def records_equal(record1: Record, record2: Record, rels=False): def read_file_n10s_and_rdflib(neo4j_driver, graph_store, batching=False, n10s_params=None, n10s_mappings=None, - get_rels=False, file_path="../test_files/n10s_example.ttl", n10s_file_format="Turtle", rdflib_file_format = "ttl"): + get_rels=False, file_path="../test_files/n10s_example.ttl", n10s_file_format="'Turtle'", rdflib_file_format = "ttl"): """Compare data imported with n10s procs and n10s + rdflib""" if n10s_mappings is None: n10s_mappings = [] From 5c3040d1826a8f41513049c39424d8135e005e4c Mon Sep 17 00:00:00 2001 From: Niels de Jong Date: Tue, 23 Jan 2024 10:24:40 +0100 Subject: [PATCH 08/14] Updated documentation --- docs/getting_started.adoc | 3 - docs/introduction.adoc | 16 -- docs/modules/ROOT/nav.adoc | 7 +- docs/modules/ROOT/pages/contributing.adoc | 17 ++ docs/modules/ROOT/pages/examples.adoc | 149 ++++++++++++++++++ docs/modules/ROOT/pages/gettingstarted.adoc | 88 +++++++++++ docs/modules/ROOT/pages/index.adoc | 14 +- .../ROOT/pages/neo4jstore.adoc} | 4 +- .../ROOT/pages/neo4jstoreconfig.adoc} | 25 ++- 9 files changed, 284 insertions(+), 39 deletions(-) delete mode 100644 docs/getting_started.adoc delete mode 100644 docs/introduction.adoc create mode 100644 docs/modules/ROOT/pages/contributing.adoc create mode 100644 docs/modules/ROOT/pages/examples.adoc create mode 100644 docs/modules/ROOT/pages/gettingstarted.adoc rename docs/{documentation/Neo4jStore-0.1.adoc => modules/ROOT/pages/neo4jstore.adoc} (98%) rename docs/{documentation/Neo4jStoreConfig-0.1.adoc => modules/ROOT/pages/neo4jstoreconfig.adoc} (88%) diff --git a/docs/getting_started.adoc b/docs/getting_started.adoc deleted file mode 100644 index d1dfc11..0000000 --- a/docs/getting_started.adoc +++ /dev/null @@ -1,3 +0,0 @@ -== Using the Neo4j store and examples - -== Migrating an existing implementation to the Neo4j Store diff --git a/docs/introduction.adoc b/docs/introduction.adoc deleted file mode 100644 index fdadfde..0000000 --- a/docs/introduction.adoc +++ /dev/null @@ -1,16 +0,0 @@ -= RDFLIB + Neo4j User Guide -:doctype: book -:icons: font -:source-highlighter: coderay -:toc: left -:toclevels: 2 - -toc::[] - -= Chapter 1: Introduction - -== Section 1.1: Purpose -This section provides an overview of the purpose of this documentation. - - - diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 057cbff..f711cf1 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -1 +1,6 @@ -* xref:index.adoc[Introduction] \ No newline at end of file +* xref:index.adoc[Introduction] +* xref:gettingstarted.adoc[Getting Started] +* xref:neo4jstore.adoc[Neo4j Store] +* xref:neo4jstoreconfig.adoc[Store Configuration] +* xref:examples.adoc[Examples] +* xref:contributing.adoc[Contributing] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/contributing.adoc b/docs/modules/ROOT/pages/contributing.adoc new file mode 100644 index 0000000..894f807 --- /dev/null +++ b/docs/modules/ROOT/pages/contributing.adoc @@ -0,0 +1,17 @@ +# Contributing + +Contributions to the project are highly welcomed. +If you extend the library with custom functionality, consider creating a Pull Request on our GitHub repository. + + +We highly recommend to familiarize yourself with the RDFLib core library. You can https://github.com/RDFLib/rdflib/#getting-started[learn more here]. + + +Contribution checklist: + +- Find or create an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue] on GitHub. +- Fork the repository, create your own feature branch starting from the `develop` branch. +- Document your code with docstrings or in the documentation (`docs` folder), if applicable. + +## Feature Requests / Bugs +If you have a request for a feature, or have found a bug, creating an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue on GitHub] is the best way to reach out. \ No newline at end of file diff --git a/docs/modules/ROOT/pages/examples.adoc b/docs/modules/ROOT/pages/examples.adoc new file mode 100644 index 0000000..4c81455 --- /dev/null +++ b/docs/modules/ROOT/pages/examples.adoc @@ -0,0 +1,149 @@ += Examples + +This page contains some code snippets with examples on using the library. + +== Importing a TTL file +This a basic example for importing a single TTL file. +Insert your own database credentials for `AURA_DB_URI`, `AURA_DB_USERNAME`, `AURA_DB_PWD` to use this template. + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig +from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY + +# Get your Aura Db free instance here: https://neo4j.com/cloud/aura-free/#test-drive-section +AURA_DB_URI="your_db_uri" +AURA_DB_USERNAME="neo4j" +AURA_DB_PWD="your_db_pwd" + +auth_data = {'uri': AURA_DB_URI, + 'database': "neo4j", + 'user': AURA_DB_USERNAME, + 'pwd': AURA_DB_PWD} +from rdflib import Namespace + +# Define your prefixes +prefixes = { + 'neo4ind': Namespace('http://neo4j.org/ind#'), + 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), + 'nsmntx': Namespace('http://neo4j.org/vocab/NSMNTX#'), + 'apoc': Namespace('http://neo4j.org/vocab/APOC#'), + 'graphql': Namespace('http://neo4j.org/vocab/GraphQL#') +} +# Define your custom mappings +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) +from rdflib_neo4j import Neo4jStore +from rdflib import Graph +file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' + +graph_store = Graph(store=Neo4jStore(config=config)) +graph_store.parse(file_path,format="ttl") +graph_store.close(True) +---- + +== Advanced Examples + +=== Initialize Neo4jStore + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Namespace, Graph, URIRef, RDF, SKOS, Literal + + +# Define your custom prefixes +prefixes = { + 'neo4ind': Namespace('http://neo4j.org/ind#'), + 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), +} + +# Neo4j connection credentials +auth_data = {'uri': 'your_neo4j_uri', + 'database': 'neo4j', + 'user': "neo4j", + 'pwd': 'your_password'} + +# Define your Neo4jStoreConfig +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=False) + +neo4j_store = Neo4jStore(config=config) +graph_store = Graph(store=neo4j_store) + +---- + +=== Import by Reference URL + +[source,python] +---- +file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' +graph_store.parse(file_path,format="ttl") +---- + +=== Write Individual Triples + +[source,python] +---- +aura = URIRef("http://neo4j.com/voc/tech#AuraDB") + +graph_store.add((aura, RDF.type, SKOS.Concept)) +graph_store.add((aura, SKOS.prefLabel, Literal("AuraDB"))) +graph_store.add((aura, SKOS.broader, URIRef("http://neo4j.org/ind#neo4j355"))) + +---- + +=== SPARQL Query with Batching + +[source,python] +---- +import requests +import urllib.parse + +endpoint = "https://id.nlm.nih.gov/mesh/sparql" +sparql = """ +PREFIX rdfs: +PREFIX meshv: +PREFIX mesh: +PREFIX rdf: + +CONSTRUCT { ?s ?p ?o } +FROM +WHERE { + { + ?s ?p ?o + filter(?s = mesh:D000086402 || ?o = mesh:D000086402) + } + union + { + mesh:D000086402 ?x ?s . + ?s ?p ?o . + filter(?x != rdf:type && (isLiteral(?o) || ?p = rdf:type)) + } + union + { + ?s ?x mesh:D000086402 . + ?s ?p ?o . + filter(isLiteral(?o|| ?p = rdf:type)) + } +} +""" + +# Define your Neo4jStoreConfig +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) + +neo4j_store = Neo4jStore(config=config) +graph_store = Graph(store=neo4j_store) + +query_response = requests.get(endpoint, params = {"query": sparql , "format" : "TURTLE"}) +graph_store.parse(data=query_response.text,format='ttl') +graph_store.close(commit_pending_transaction=True) + +---- \ No newline at end of file diff --git a/docs/modules/ROOT/pages/gettingstarted.adoc b/docs/modules/ROOT/pages/gettingstarted.adoc new file mode 100644 index 0000000..381e73e --- /dev/null +++ b/docs/modules/ROOT/pages/gettingstarted.adoc @@ -0,0 +1,88 @@ += Getting Started + +This page describes how to get started with this library, and set up your first RDF import. + +== Set up Neo4j +To configure your Neo4j Graph DB, the process is simplified: initialize the database by establishing a uniqueness constraint on Resources' URIs. You can achieve this by executing the following Cypher fragment: + +[source,cypher] +---- +CREATE CONSTRAINT n10s_unique_uri FOR (r:Resource) REQUIRE r.uri IS UNIQUE; +---- +This constraint ensures the uniqueness of URIs for Resource nodes, streamlining the integration process. Alternatively, you can simply set `create=True` when attempting to open the store in your Python code, and it will create the constraint for you. + +== Set up Python environment +`rdflib-neo4j` can be installed with Python's package management tool `pip`: + +[source,shell] +---- +$ pip install rdflib-neo4j +---- + +== Loading data +Now, seamlessly import RDF data into your Neo4j On-premise or Aura instance by establishing an RDFLib graph and employing it to parse your RDF data. Each individual triple undergoes transparent persistence within your Neo4j database(whether it is on Aura or on-premise). Here's a step-by-step guide to achieve this integration: + +You can import the data from an RDF document (for example link:https://github.com/jbarrasa/datasets/blob/master/rdf/music.nt[this one serialised using N-Triples]): + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Graph + +# set the configuration to connect to your Aura DB +AURA_DB_URI="your_db_uri" +AURA_DB_USERNAME="neo4j" +AURA_DB_PWD="your_db_pwd" + +auth_data = {'uri': AURA_DB_URI, + 'database': "neo4j", + 'user': AURA_DB_USERNAME, + 'pwd': AURA_DB_PWD} + +# Define your custom mappings & store config +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) + +file_path = 'https://github.com/jbarrasa/gc-2022/raw/main/search/onto/concept-scheme-skos.ttl' + +# Create the RDF Graph, parse & ingest the data to Neo4j, and close the store(If the field batching is set to True in the Neo4jStoreConfig, remember to close the store to prevent the loss of any uncommitted records.) +neo4j_aura = Graph(store=Neo4jStore(config=config)) +# Calling the parse method will implictly open the store +neo4j_aura.parse(file_path, format="ttl") +neo4j_aura.close(True) +---- + +The imported file contains a taxonomy of technologies extracted from Wikidata and serialised using SKOS. +After running the previous code fragment, your Aura DB/Neo4j DB should be populated with a graph like this one: + +image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura.png[height="400"] + +You can also write to the graph triple by triple like this: + +[source,python] +---- +import rdflib +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Graph, RDF, SKOS + +# Set up your store config +config = Neo4jStoreConfig(auth_data=auth_data, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=False) + +# Create the graph and open the store +neo4j_aura = Graph(store=Neo4jStore(config=config)) +neo4j_aura.open(config) + +aura = rdflib.URIRef("http://neo4j.com/voc/tech#AuraDB") + +neo4j_aura.add((aura, RDF.type, SKOS.Concept)) +neo4j_aura.add((aura, SKOS.prefLabel, rdflib.Literal("AuraDB"))) +neo4j_aura.add((aura, SKOS.broader, rdflib.URIRef("http://www.wikidata.org/entity/Q1628290"))) +---- + +The previous fragment would add another node to the graph representing AuraDB as a concept related to Neo4j via `skos:narrower`, which in your AuraDB graph would look as follows: + +image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura-detail.png[height="150"] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index 21699b4..ac8777f 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -1 +1,13 @@ -jsdhdfvjfdvfv \ No newline at end of file +# RDFLib-Neo4j + +The **rdflib-neo4j** project is a Python-based https://rdflib.readthedocs.io/en/stable/[RDFLib Store] backed by Neo4j. +You can use this library for high-performance RDF data ingestion into the Neo4j database. + +This library works with all types of Neo4j deployments, whether on-premise or cloud-hosted (Neo4j Aura). + +## Documentation + +- To get started, see the link:quickstart[Quickstart] page. +- For details on the available Python classes, see the link:neo4jstore[Neo4j Store] page. +- Example code fragments are available under link:examples[Examples]. +- If you want to contribute to this project, see link:contributing[Contributing]. diff --git a/docs/documentation/Neo4jStore-0.1.adoc b/docs/modules/ROOT/pages/neo4jstore.adoc similarity index 98% rename from docs/documentation/Neo4jStore-0.1.adoc rename to docs/modules/ROOT/pages/neo4jstore.adoc index 2f04f82..050d494 100644 --- a/docs/documentation/Neo4jStore-0.1.adoc +++ b/docs/modules/ROOT/pages/neo4jstore.adoc @@ -1,9 +1,9 @@ -== Neo4j Store += Neo4j Store [.procedures, opts=header] This class is an implementation of the rdflib link:https://rdflib.readthedocs.io/en/stable/_modules/rdflib/store.html[Store class] that uses Neo4j as a backend. In this way it is possible to persist you RDF data directly in Neo4j, with the power of rdflib to process your data. -=== Object Initialization +== Constructor |=== | Name | Type | Required | Default | Description |config|Neo4jStoreConfig|True||Neo4jStoreConfig object that contains all the useful informations to initialize the store. diff --git a/docs/documentation/Neo4jStoreConfig-0.1.adoc b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc similarity index 88% rename from docs/documentation/Neo4jStoreConfig-0.1.adoc rename to docs/modules/ROOT/pages/neo4jstoreconfig.adoc index efbd664..2ef01f5 100644 --- a/docs/documentation/Neo4jStoreConfig-0.1.adoc +++ b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc @@ -1,17 +1,17 @@ -== Neo4j Store Config += Neo4j Store Config [.procedures, opts=header] This object is used to configure the Neo4j Store to connect to your Neo4j Instance and to manage the parsing of a Triple Store. -=== Object Initialization +== Constructor |=== | Name | Type | Required | Values(Default) | Description | auth_data | Dictionary | True | ("uri", "database", "user", "pwd") | A dictionary containing authentication data. The required keys are: ["uri", "database", "user", "pwd"]. | batching | Boolean | False | boolean (True) | A boolean indicating whether batching is enabled. | batch_size | Integer | False | (5000) | An integer representing the batch size (The batch size is intended as number of entities to store inside the database (nodes/relationships) and not triples. | custom_mappings | List[Tuple[Str,Str,Str]] | False | Empty list | A list of tuples containing custom mappings for prefixes in the form (prefix, object_to_replace, new_object). -| custom_prefixes | Dictionary | True if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN | ({}) | A dictionary containing custom prefixes. -| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |HANDLE_VOCAB_URI_STRATEGY.IGNORE, HANDLE_VOCAB_URI_STRATEGY.KEEP, HANDLE_VOCAB_URI_STRATEGY.MAP(HANDLE_VOCAB_URI_STRATEGY.SHORTEN) | +| custom_prefixes | Dictionary | True ① | ({}) | A dictionary containing custom prefixes. +| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |IGNORE, KEEP, MAP, (SHORTEN) | * 'SHORTEN', full uris are shortened using prefixes for property names, relationship names and labels. Fails if a prefix is not predefined for a namespace in the imported RDF. @@ -21,13 +21,15 @@ This object is used to configure the Neo4j Store to connect to your Neo4j Instan * 'KEEP' uris are kept unchanged -| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | HANDLE_MULTIVAL_STRATEGY.ARRAY (HANDLE_MULTIVAL_STRATEGY.OVERWRITE)| +| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | ARRAY (OVERWRITE)| * 'OVERWRITE' property values are kept single valued. Multiple values in the imported RDF are overwriten (only the last one is kept) * 'ARRAY' properties are stored in an array enabling storage of multiple values. All of them unless multivalPropList is set. | multival_props_names | List[Tuple[Str,Str]] | False | ([]) | A list of tuples containing the prefix and property names to be treated as multivalued in the form (prefix, property_name). |=== +① if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN + == Functions === set_handle_vocab_uri_strategy @@ -188,7 +190,7 @@ No arguments | Dictionary | A dictionary containing all prefixes. |=== -== Enumerators +== Enumerated Values === HANDLE_VOCAB_URI_STRATEGY @@ -204,12 +206,6 @@ Enum class defining different strategies for handling vocabulary URIs. | IGNORE | Strategy to ignore the Namespace and get only the local part |=== -=== Examples - -Here a series of examples of the application of a strategy on a certain triplet. - -#TODO: Fill the examples# - === Shorten This strategy will shorten the URIs, replacing the prefix with its shorted version. If the Store find a prefix not defined inside its Neo4jStoreConfig object, the parsing will stop, raising a ShortenStrictException error. @@ -229,7 +225,7 @@ This strategy will remove the entire prefix from the predicate. Enum class defining different strategies for handling multiple values. -TO NOTICE: If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. +> If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. ==== Possible Values @@ -239,9 +235,6 @@ TO NOTICE: If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any | ARRAY | Strategy to treat multiple values as an array |=== -=== Examples - -Here a series of examples of the application of a strategy on a certain triplet. === Overwrite From dcc8c8788df65f8d57654bd2a0ad828f553f5df3 Mon Sep 17 00:00:00 2001 From: Niels de Jong Date: Tue, 23 Jan 2024 10:43:58 +0100 Subject: [PATCH 09/14] Revert "Updated documentation" This reverts commit 5c3040d1826a8f41513049c39424d8135e005e4c. --- .../Neo4jStore-0.1.adoc} | 4 +- .../Neo4jStoreConfig-0.1.adoc} | 25 +-- docs/getting_started.adoc | 3 + docs/introduction.adoc | 16 ++ docs/modules/ROOT/nav.adoc | 7 +- docs/modules/ROOT/pages/contributing.adoc | 17 -- docs/modules/ROOT/pages/examples.adoc | 149 ------------------ docs/modules/ROOT/pages/gettingstarted.adoc | 88 ----------- docs/modules/ROOT/pages/index.adoc | 14 +- 9 files changed, 39 insertions(+), 284 deletions(-) rename docs/{modules/ROOT/pages/neo4jstore.adoc => documentation/Neo4jStore-0.1.adoc} (98%) rename docs/{modules/ROOT/pages/neo4jstoreconfig.adoc => documentation/Neo4jStoreConfig-0.1.adoc} (88%) create mode 100644 docs/getting_started.adoc create mode 100644 docs/introduction.adoc delete mode 100644 docs/modules/ROOT/pages/contributing.adoc delete mode 100644 docs/modules/ROOT/pages/examples.adoc delete mode 100644 docs/modules/ROOT/pages/gettingstarted.adoc diff --git a/docs/modules/ROOT/pages/neo4jstore.adoc b/docs/documentation/Neo4jStore-0.1.adoc similarity index 98% rename from docs/modules/ROOT/pages/neo4jstore.adoc rename to docs/documentation/Neo4jStore-0.1.adoc index 050d494..2f04f82 100644 --- a/docs/modules/ROOT/pages/neo4jstore.adoc +++ b/docs/documentation/Neo4jStore-0.1.adoc @@ -1,9 +1,9 @@ -= Neo4j Store +== Neo4j Store [.procedures, opts=header] This class is an implementation of the rdflib link:https://rdflib.readthedocs.io/en/stable/_modules/rdflib/store.html[Store class] that uses Neo4j as a backend. In this way it is possible to persist you RDF data directly in Neo4j, with the power of rdflib to process your data. -== Constructor +=== Object Initialization |=== | Name | Type | Required | Default | Description |config|Neo4jStoreConfig|True||Neo4jStoreConfig object that contains all the useful informations to initialize the store. diff --git a/docs/modules/ROOT/pages/neo4jstoreconfig.adoc b/docs/documentation/Neo4jStoreConfig-0.1.adoc similarity index 88% rename from docs/modules/ROOT/pages/neo4jstoreconfig.adoc rename to docs/documentation/Neo4jStoreConfig-0.1.adoc index 2ef01f5..efbd664 100644 --- a/docs/modules/ROOT/pages/neo4jstoreconfig.adoc +++ b/docs/documentation/Neo4jStoreConfig-0.1.adoc @@ -1,17 +1,17 @@ -= Neo4j Store Config +== Neo4j Store Config [.procedures, opts=header] This object is used to configure the Neo4j Store to connect to your Neo4j Instance and to manage the parsing of a Triple Store. -== Constructor +=== Object Initialization |=== | Name | Type | Required | Values(Default) | Description | auth_data | Dictionary | True | ("uri", "database", "user", "pwd") | A dictionary containing authentication data. The required keys are: ["uri", "database", "user", "pwd"]. | batching | Boolean | False | boolean (True) | A boolean indicating whether batching is enabled. | batch_size | Integer | False | (5000) | An integer representing the batch size (The batch size is intended as number of entities to store inside the database (nodes/relationships) and not triples. | custom_mappings | List[Tuple[Str,Str,Str]] | False | Empty list | A list of tuples containing custom mappings for prefixes in the form (prefix, object_to_replace, new_object). -| custom_prefixes | Dictionary | True ① | ({}) | A dictionary containing custom prefixes. -| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |IGNORE, KEEP, MAP, (SHORTEN) | +| custom_prefixes | Dictionary | True if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN | ({}) | A dictionary containing custom prefixes. +| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |HANDLE_VOCAB_URI_STRATEGY.IGNORE, HANDLE_VOCAB_URI_STRATEGY.KEEP, HANDLE_VOCAB_URI_STRATEGY.MAP(HANDLE_VOCAB_URI_STRATEGY.SHORTEN) | * 'SHORTEN', full uris are shortened using prefixes for property names, relationship names and labels. Fails if a prefix is not predefined for a namespace in the imported RDF. @@ -21,15 +21,13 @@ This object is used to configure the Neo4j Store to connect to your Neo4j Instan * 'KEEP' uris are kept unchanged -| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | ARRAY (OVERWRITE)| +| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | HANDLE_MULTIVAL_STRATEGY.ARRAY (HANDLE_MULTIVAL_STRATEGY.OVERWRITE)| * 'OVERWRITE' property values are kept single valued. Multiple values in the imported RDF are overwriten (only the last one is kept) * 'ARRAY' properties are stored in an array enabling storage of multiple values. All of them unless multivalPropList is set. | multival_props_names | List[Tuple[Str,Str]] | False | ([]) | A list of tuples containing the prefix and property names to be treated as multivalued in the form (prefix, property_name). |=== -① if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN - == Functions === set_handle_vocab_uri_strategy @@ -190,7 +188,7 @@ No arguments | Dictionary | A dictionary containing all prefixes. |=== -== Enumerated Values +== Enumerators === HANDLE_VOCAB_URI_STRATEGY @@ -206,6 +204,12 @@ Enum class defining different strategies for handling vocabulary URIs. | IGNORE | Strategy to ignore the Namespace and get only the local part |=== +=== Examples + +Here a series of examples of the application of a strategy on a certain triplet. + +#TODO: Fill the examples# + === Shorten This strategy will shorten the URIs, replacing the prefix with its shorted version. If the Store find a prefix not defined inside its Neo4jStoreConfig object, the parsing will stop, raising a ShortenStrictException error. @@ -225,7 +229,7 @@ This strategy will remove the entire prefix from the predicate. Enum class defining different strategies for handling multiple values. -> If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. +TO NOTICE: If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. ==== Possible Values @@ -235,6 +239,9 @@ Enum class defining different strategies for handling multiple values. | ARRAY | Strategy to treat multiple values as an array |=== +=== Examples + +Here a series of examples of the application of a strategy on a certain triplet. === Overwrite diff --git a/docs/getting_started.adoc b/docs/getting_started.adoc new file mode 100644 index 0000000..d1dfc11 --- /dev/null +++ b/docs/getting_started.adoc @@ -0,0 +1,3 @@ +== Using the Neo4j store and examples + +== Migrating an existing implementation to the Neo4j Store diff --git a/docs/introduction.adoc b/docs/introduction.adoc new file mode 100644 index 0000000..fdadfde --- /dev/null +++ b/docs/introduction.adoc @@ -0,0 +1,16 @@ += RDFLIB + Neo4j User Guide +:doctype: book +:icons: font +:source-highlighter: coderay +:toc: left +:toclevels: 2 + +toc::[] + += Chapter 1: Introduction + +== Section 1.1: Purpose +This section provides an overview of the purpose of this documentation. + + + diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index f711cf1..057cbff 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -1,6 +1 @@ -* xref:index.adoc[Introduction] -* xref:gettingstarted.adoc[Getting Started] -* xref:neo4jstore.adoc[Neo4j Store] -* xref:neo4jstoreconfig.adoc[Store Configuration] -* xref:examples.adoc[Examples] -* xref:contributing.adoc[Contributing] \ No newline at end of file +* xref:index.adoc[Introduction] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/contributing.adoc b/docs/modules/ROOT/pages/contributing.adoc deleted file mode 100644 index 894f807..0000000 --- a/docs/modules/ROOT/pages/contributing.adoc +++ /dev/null @@ -1,17 +0,0 @@ -# Contributing - -Contributions to the project are highly welcomed. -If you extend the library with custom functionality, consider creating a Pull Request on our GitHub repository. - - -We highly recommend to familiarize yourself with the RDFLib core library. You can https://github.com/RDFLib/rdflib/#getting-started[learn more here]. - - -Contribution checklist: - -- Find or create an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue] on GitHub. -- Fork the repository, create your own feature branch starting from the `develop` branch. -- Document your code with docstrings or in the documentation (`docs` folder), if applicable. - -## Feature Requests / Bugs -If you have a request for a feature, or have found a bug, creating an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue on GitHub] is the best way to reach out. \ No newline at end of file diff --git a/docs/modules/ROOT/pages/examples.adoc b/docs/modules/ROOT/pages/examples.adoc deleted file mode 100644 index 4c81455..0000000 --- a/docs/modules/ROOT/pages/examples.adoc +++ /dev/null @@ -1,149 +0,0 @@ -= Examples - -This page contains some code snippets with examples on using the library. - -== Importing a TTL file -This a basic example for importing a single TTL file. -Insert your own database credentials for `AURA_DB_URI`, `AURA_DB_USERNAME`, `AURA_DB_PWD` to use this template. - -[source,python] ----- -from rdflib_neo4j import Neo4jStoreConfig -from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY - -# Get your Aura Db free instance here: https://neo4j.com/cloud/aura-free/#test-drive-section -AURA_DB_URI="your_db_uri" -AURA_DB_USERNAME="neo4j" -AURA_DB_PWD="your_db_pwd" - -auth_data = {'uri': AURA_DB_URI, - 'database': "neo4j", - 'user': AURA_DB_USERNAME, - 'pwd': AURA_DB_PWD} -from rdflib import Namespace - -# Define your prefixes -prefixes = { - 'neo4ind': Namespace('http://neo4j.org/ind#'), - 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), - 'nsmntx': Namespace('http://neo4j.org/vocab/NSMNTX#'), - 'apoc': Namespace('http://neo4j.org/vocab/APOC#'), - 'graphql': Namespace('http://neo4j.org/vocab/GraphQL#') -} -# Define your custom mappings -config = Neo4jStoreConfig(auth_data=auth_data, - custom_prefixes=prefixes, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=True) -from rdflib_neo4j import Neo4jStore -from rdflib import Graph -file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' - -graph_store = Graph(store=Neo4jStore(config=config)) -graph_store.parse(file_path,format="ttl") -graph_store.close(True) ----- - -== Advanced Examples - -=== Initialize Neo4jStore - -[source,python] ----- -from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY -from rdflib import Namespace, Graph, URIRef, RDF, SKOS, Literal - - -# Define your custom prefixes -prefixes = { - 'neo4ind': Namespace('http://neo4j.org/ind#'), - 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), -} - -# Neo4j connection credentials -auth_data = {'uri': 'your_neo4j_uri', - 'database': 'neo4j', - 'user': "neo4j", - 'pwd': 'your_password'} - -# Define your Neo4jStoreConfig -config = Neo4jStoreConfig(auth_data=auth_data, - custom_prefixes=prefixes, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=False) - -neo4j_store = Neo4jStore(config=config) -graph_store = Graph(store=neo4j_store) - ----- - -=== Import by Reference URL - -[source,python] ----- -file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' -graph_store.parse(file_path,format="ttl") ----- - -=== Write Individual Triples - -[source,python] ----- -aura = URIRef("http://neo4j.com/voc/tech#AuraDB") - -graph_store.add((aura, RDF.type, SKOS.Concept)) -graph_store.add((aura, SKOS.prefLabel, Literal("AuraDB"))) -graph_store.add((aura, SKOS.broader, URIRef("http://neo4j.org/ind#neo4j355"))) - ----- - -=== SPARQL Query with Batching - -[source,python] ----- -import requests -import urllib.parse - -endpoint = "https://id.nlm.nih.gov/mesh/sparql" -sparql = """ -PREFIX rdfs: -PREFIX meshv: -PREFIX mesh: -PREFIX rdf: - -CONSTRUCT { ?s ?p ?o } -FROM -WHERE { - { - ?s ?p ?o - filter(?s = mesh:D000086402 || ?o = mesh:D000086402) - } - union - { - mesh:D000086402 ?x ?s . - ?s ?p ?o . - filter(?x != rdf:type && (isLiteral(?o) || ?p = rdf:type)) - } - union - { - ?s ?x mesh:D000086402 . - ?s ?p ?o . - filter(isLiteral(?o|| ?p = rdf:type)) - } -} -""" - -# Define your Neo4jStoreConfig -config = Neo4jStoreConfig(auth_data=auth_data, - custom_prefixes=prefixes, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=True) - -neo4j_store = Neo4jStore(config=config) -graph_store = Graph(store=neo4j_store) - -query_response = requests.get(endpoint, params = {"query": sparql , "format" : "TURTLE"}) -graph_store.parse(data=query_response.text,format='ttl') -graph_store.close(commit_pending_transaction=True) - ----- \ No newline at end of file diff --git a/docs/modules/ROOT/pages/gettingstarted.adoc b/docs/modules/ROOT/pages/gettingstarted.adoc deleted file mode 100644 index 381e73e..0000000 --- a/docs/modules/ROOT/pages/gettingstarted.adoc +++ /dev/null @@ -1,88 +0,0 @@ -= Getting Started - -This page describes how to get started with this library, and set up your first RDF import. - -== Set up Neo4j -To configure your Neo4j Graph DB, the process is simplified: initialize the database by establishing a uniqueness constraint on Resources' URIs. You can achieve this by executing the following Cypher fragment: - -[source,cypher] ----- -CREATE CONSTRAINT n10s_unique_uri FOR (r:Resource) REQUIRE r.uri IS UNIQUE; ----- -This constraint ensures the uniqueness of URIs for Resource nodes, streamlining the integration process. Alternatively, you can simply set `create=True` when attempting to open the store in your Python code, and it will create the constraint for you. - -== Set up Python environment -`rdflib-neo4j` can be installed with Python's package management tool `pip`: - -[source,shell] ----- -$ pip install rdflib-neo4j ----- - -== Loading data -Now, seamlessly import RDF data into your Neo4j On-premise or Aura instance by establishing an RDFLib graph and employing it to parse your RDF data. Each individual triple undergoes transparent persistence within your Neo4j database(whether it is on Aura or on-premise). Here's a step-by-step guide to achieve this integration: - -You can import the data from an RDF document (for example link:https://github.com/jbarrasa/datasets/blob/master/rdf/music.nt[this one serialised using N-Triples]): - -[source,python] ----- -from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY -from rdflib import Graph - -# set the configuration to connect to your Aura DB -AURA_DB_URI="your_db_uri" -AURA_DB_USERNAME="neo4j" -AURA_DB_PWD="your_db_pwd" - -auth_data = {'uri': AURA_DB_URI, - 'database': "neo4j", - 'user': AURA_DB_USERNAME, - 'pwd': AURA_DB_PWD} - -# Define your custom mappings & store config -config = Neo4jStoreConfig(auth_data=auth_data, - custom_prefixes=prefixes, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=True) - -file_path = 'https://github.com/jbarrasa/gc-2022/raw/main/search/onto/concept-scheme-skos.ttl' - -# Create the RDF Graph, parse & ingest the data to Neo4j, and close the store(If the field batching is set to True in the Neo4jStoreConfig, remember to close the store to prevent the loss of any uncommitted records.) -neo4j_aura = Graph(store=Neo4jStore(config=config)) -# Calling the parse method will implictly open the store -neo4j_aura.parse(file_path, format="ttl") -neo4j_aura.close(True) ----- - -The imported file contains a taxonomy of technologies extracted from Wikidata and serialised using SKOS. -After running the previous code fragment, your Aura DB/Neo4j DB should be populated with a graph like this one: - -image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura.png[height="400"] - -You can also write to the graph triple by triple like this: - -[source,python] ----- -import rdflib -from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY -from rdflib import Graph, RDF, SKOS - -# Set up your store config -config = Neo4jStoreConfig(auth_data=auth_data, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=False) - -# Create the graph and open the store -neo4j_aura = Graph(store=Neo4jStore(config=config)) -neo4j_aura.open(config) - -aura = rdflib.URIRef("http://neo4j.com/voc/tech#AuraDB") - -neo4j_aura.add((aura, RDF.type, SKOS.Concept)) -neo4j_aura.add((aura, SKOS.prefLabel, rdflib.Literal("AuraDB"))) -neo4j_aura.add((aura, SKOS.broader, rdflib.URIRef("http://www.wikidata.org/entity/Q1628290"))) ----- - -The previous fragment would add another node to the graph representing AuraDB as a concept related to Neo4j via `skos:narrower`, which in your AuraDB graph would look as follows: - -image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura-detail.png[height="150"] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index ac8777f..21699b4 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -1,13 +1 @@ -# RDFLib-Neo4j - -The **rdflib-neo4j** project is a Python-based https://rdflib.readthedocs.io/en/stable/[RDFLib Store] backed by Neo4j. -You can use this library for high-performance RDF data ingestion into the Neo4j database. - -This library works with all types of Neo4j deployments, whether on-premise or cloud-hosted (Neo4j Aura). - -## Documentation - -- To get started, see the link:quickstart[Quickstart] page. -- For details on the available Python classes, see the link:neo4jstore[Neo4j Store] page. -- Example code fragments are available under link:examples[Examples]. -- If you want to contribute to this project, see link:contributing[Contributing]. +jsdhdfvjfdvfv \ No newline at end of file From cec21540cc0d0e19d1dee6447ad55b16cdf9e21b Mon Sep 17 00:00:00 2001 From: Niels de Jong Date: Tue, 23 Jan 2024 10:47:14 +0100 Subject: [PATCH 10/14] Updated documentation --- docs/getting_started.adoc | 3 - docs/introduction.adoc | 16 -- docs/modules/ROOT/nav.adoc | 7 +- docs/modules/ROOT/pages/contributing.adoc | 17 ++ docs/modules/ROOT/pages/examples.adoc | 149 ++++++++++++++++++ docs/modules/ROOT/pages/gettingstarted.adoc | 88 +++++++++++ docs/modules/ROOT/pages/index.adoc | 14 +- .../ROOT/pages/neo4jstore.adoc} | 4 +- .../ROOT/pages/neo4jstoreconfig.adoc} | 25 ++- 9 files changed, 284 insertions(+), 39 deletions(-) delete mode 100644 docs/getting_started.adoc delete mode 100644 docs/introduction.adoc create mode 100644 docs/modules/ROOT/pages/contributing.adoc create mode 100644 docs/modules/ROOT/pages/examples.adoc create mode 100644 docs/modules/ROOT/pages/gettingstarted.adoc rename docs/{documentation/Neo4jStore-0.1.adoc => modules/ROOT/pages/neo4jstore.adoc} (98%) rename docs/{documentation/Neo4jStoreConfig-0.1.adoc => modules/ROOT/pages/neo4jstoreconfig.adoc} (88%) diff --git a/docs/getting_started.adoc b/docs/getting_started.adoc deleted file mode 100644 index d1dfc11..0000000 --- a/docs/getting_started.adoc +++ /dev/null @@ -1,3 +0,0 @@ -== Using the Neo4j store and examples - -== Migrating an existing implementation to the Neo4j Store diff --git a/docs/introduction.adoc b/docs/introduction.adoc deleted file mode 100644 index fdadfde..0000000 --- a/docs/introduction.adoc +++ /dev/null @@ -1,16 +0,0 @@ -= RDFLIB + Neo4j User Guide -:doctype: book -:icons: font -:source-highlighter: coderay -:toc: left -:toclevels: 2 - -toc::[] - -= Chapter 1: Introduction - -== Section 1.1: Purpose -This section provides an overview of the purpose of this documentation. - - - diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 057cbff..f711cf1 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -1 +1,6 @@ -* xref:index.adoc[Introduction] \ No newline at end of file +* xref:index.adoc[Introduction] +* xref:gettingstarted.adoc[Getting Started] +* xref:neo4jstore.adoc[Neo4j Store] +* xref:neo4jstoreconfig.adoc[Store Configuration] +* xref:examples.adoc[Examples] +* xref:contributing.adoc[Contributing] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/contributing.adoc b/docs/modules/ROOT/pages/contributing.adoc new file mode 100644 index 0000000..894f807 --- /dev/null +++ b/docs/modules/ROOT/pages/contributing.adoc @@ -0,0 +1,17 @@ +# Contributing + +Contributions to the project are highly welcomed. +If you extend the library with custom functionality, consider creating a Pull Request on our GitHub repository. + + +We highly recommend to familiarize yourself with the RDFLib core library. You can https://github.com/RDFLib/rdflib/#getting-started[learn more here]. + + +Contribution checklist: + +- Find or create an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue] on GitHub. +- Fork the repository, create your own feature branch starting from the `develop` branch. +- Document your code with docstrings or in the documentation (`docs` folder), if applicable. + +## Feature Requests / Bugs +If you have a request for a feature, or have found a bug, creating an https://github.com/neo4j-labs/rdflib-neo4j/issues[issue on GitHub] is the best way to reach out. \ No newline at end of file diff --git a/docs/modules/ROOT/pages/examples.adoc b/docs/modules/ROOT/pages/examples.adoc new file mode 100644 index 0000000..4c81455 --- /dev/null +++ b/docs/modules/ROOT/pages/examples.adoc @@ -0,0 +1,149 @@ += Examples + +This page contains some code snippets with examples on using the library. + +== Importing a TTL file +This a basic example for importing a single TTL file. +Insert your own database credentials for `AURA_DB_URI`, `AURA_DB_USERNAME`, `AURA_DB_PWD` to use this template. + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig +from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY + +# Get your Aura Db free instance here: https://neo4j.com/cloud/aura-free/#test-drive-section +AURA_DB_URI="your_db_uri" +AURA_DB_USERNAME="neo4j" +AURA_DB_PWD="your_db_pwd" + +auth_data = {'uri': AURA_DB_URI, + 'database': "neo4j", + 'user': AURA_DB_USERNAME, + 'pwd': AURA_DB_PWD} +from rdflib import Namespace + +# Define your prefixes +prefixes = { + 'neo4ind': Namespace('http://neo4j.org/ind#'), + 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), + 'nsmntx': Namespace('http://neo4j.org/vocab/NSMNTX#'), + 'apoc': Namespace('http://neo4j.org/vocab/APOC#'), + 'graphql': Namespace('http://neo4j.org/vocab/GraphQL#') +} +# Define your custom mappings +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) +from rdflib_neo4j import Neo4jStore +from rdflib import Graph +file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' + +graph_store = Graph(store=Neo4jStore(config=config)) +graph_store.parse(file_path,format="ttl") +graph_store.close(True) +---- + +== Advanced Examples + +=== Initialize Neo4jStore + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Namespace, Graph, URIRef, RDF, SKOS, Literal + + +# Define your custom prefixes +prefixes = { + 'neo4ind': Namespace('http://neo4j.org/ind#'), + 'neo4voc': Namespace('http://neo4j.org/vocab/sw#'), +} + +# Neo4j connection credentials +auth_data = {'uri': 'your_neo4j_uri', + 'database': 'neo4j', + 'user': "neo4j", + 'pwd': 'your_password'} + +# Define your Neo4jStoreConfig +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=False) + +neo4j_store = Neo4jStore(config=config) +graph_store = Graph(store=neo4j_store) + +---- + +=== Import by Reference URL + +[source,python] +---- +file_path = 'https://raw.githubusercontent.com/neo4j-labs/neosemantics/3.5/docs/rdf/nsmntx.ttl' +graph_store.parse(file_path,format="ttl") +---- + +=== Write Individual Triples + +[source,python] +---- +aura = URIRef("http://neo4j.com/voc/tech#AuraDB") + +graph_store.add((aura, RDF.type, SKOS.Concept)) +graph_store.add((aura, SKOS.prefLabel, Literal("AuraDB"))) +graph_store.add((aura, SKOS.broader, URIRef("http://neo4j.org/ind#neo4j355"))) + +---- + +=== SPARQL Query with Batching + +[source,python] +---- +import requests +import urllib.parse + +endpoint = "https://id.nlm.nih.gov/mesh/sparql" +sparql = """ +PREFIX rdfs: +PREFIX meshv: +PREFIX mesh: +PREFIX rdf: + +CONSTRUCT { ?s ?p ?o } +FROM +WHERE { + { + ?s ?p ?o + filter(?s = mesh:D000086402 || ?o = mesh:D000086402) + } + union + { + mesh:D000086402 ?x ?s . + ?s ?p ?o . + filter(?x != rdf:type && (isLiteral(?o) || ?p = rdf:type)) + } + union + { + ?s ?x mesh:D000086402 . + ?s ?p ?o . + filter(isLiteral(?o|| ?p = rdf:type)) + } +} +""" + +# Define your Neo4jStoreConfig +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) + +neo4j_store = Neo4jStore(config=config) +graph_store = Graph(store=neo4j_store) + +query_response = requests.get(endpoint, params = {"query": sparql , "format" : "TURTLE"}) +graph_store.parse(data=query_response.text,format='ttl') +graph_store.close(commit_pending_transaction=True) + +---- \ No newline at end of file diff --git a/docs/modules/ROOT/pages/gettingstarted.adoc b/docs/modules/ROOT/pages/gettingstarted.adoc new file mode 100644 index 0000000..381e73e --- /dev/null +++ b/docs/modules/ROOT/pages/gettingstarted.adoc @@ -0,0 +1,88 @@ += Getting Started + +This page describes how to get started with this library, and set up your first RDF import. + +== Set up Neo4j +To configure your Neo4j Graph DB, the process is simplified: initialize the database by establishing a uniqueness constraint on Resources' URIs. You can achieve this by executing the following Cypher fragment: + +[source,cypher] +---- +CREATE CONSTRAINT n10s_unique_uri FOR (r:Resource) REQUIRE r.uri IS UNIQUE; +---- +This constraint ensures the uniqueness of URIs for Resource nodes, streamlining the integration process. Alternatively, you can simply set `create=True` when attempting to open the store in your Python code, and it will create the constraint for you. + +== Set up Python environment +`rdflib-neo4j` can be installed with Python's package management tool `pip`: + +[source,shell] +---- +$ pip install rdflib-neo4j +---- + +== Loading data +Now, seamlessly import RDF data into your Neo4j On-premise or Aura instance by establishing an RDFLib graph and employing it to parse your RDF data. Each individual triple undergoes transparent persistence within your Neo4j database(whether it is on Aura or on-premise). Here's a step-by-step guide to achieve this integration: + +You can import the data from an RDF document (for example link:https://github.com/jbarrasa/datasets/blob/master/rdf/music.nt[this one serialised using N-Triples]): + +[source,python] +---- +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Graph + +# set the configuration to connect to your Aura DB +AURA_DB_URI="your_db_uri" +AURA_DB_USERNAME="neo4j" +AURA_DB_PWD="your_db_pwd" + +auth_data = {'uri': AURA_DB_URI, + 'database': "neo4j", + 'user': AURA_DB_USERNAME, + 'pwd': AURA_DB_PWD} + +# Define your custom mappings & store config +config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes=prefixes, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=True) + +file_path = 'https://github.com/jbarrasa/gc-2022/raw/main/search/onto/concept-scheme-skos.ttl' + +# Create the RDF Graph, parse & ingest the data to Neo4j, and close the store(If the field batching is set to True in the Neo4jStoreConfig, remember to close the store to prevent the loss of any uncommitted records.) +neo4j_aura = Graph(store=Neo4jStore(config=config)) +# Calling the parse method will implictly open the store +neo4j_aura.parse(file_path, format="ttl") +neo4j_aura.close(True) +---- + +The imported file contains a taxonomy of technologies extracted from Wikidata and serialised using SKOS. +After running the previous code fragment, your Aura DB/Neo4j DB should be populated with a graph like this one: + +image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura.png[height="400"] + +You can also write to the graph triple by triple like this: + +[source,python] +---- +import rdflib +from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY +from rdflib import Graph, RDF, SKOS + +# Set up your store config +config = Neo4jStoreConfig(auth_data=auth_data, + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=False) + +# Create the graph and open the store +neo4j_aura = Graph(store=Neo4jStore(config=config)) +neo4j_aura.open(config) + +aura = rdflib.URIRef("http://neo4j.com/voc/tech#AuraDB") + +neo4j_aura.add((aura, RDF.type, SKOS.Concept)) +neo4j_aura.add((aura, SKOS.prefLabel, rdflib.Literal("AuraDB"))) +neo4j_aura.add((aura, SKOS.broader, rdflib.URIRef("http://www.wikidata.org/entity/Q1628290"))) +---- + +The previous fragment would add another node to the graph representing AuraDB as a concept related to Neo4j via `skos:narrower`, which in your AuraDB graph would look as follows: + +image::https://raw.githubusercontent.com/neo4j-labs/rdflib-neo4j/master/img/graph-view-aura-detail.png[height="150"] \ No newline at end of file diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index 21699b4..ac8777f 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -1 +1,13 @@ -jsdhdfvjfdvfv \ No newline at end of file +# RDFLib-Neo4j + +The **rdflib-neo4j** project is a Python-based https://rdflib.readthedocs.io/en/stable/[RDFLib Store] backed by Neo4j. +You can use this library for high-performance RDF data ingestion into the Neo4j database. + +This library works with all types of Neo4j deployments, whether on-premise or cloud-hosted (Neo4j Aura). + +## Documentation + +- To get started, see the link:quickstart[Quickstart] page. +- For details on the available Python classes, see the link:neo4jstore[Neo4j Store] page. +- Example code fragments are available under link:examples[Examples]. +- If you want to contribute to this project, see link:contributing[Contributing]. diff --git a/docs/documentation/Neo4jStore-0.1.adoc b/docs/modules/ROOT/pages/neo4jstore.adoc similarity index 98% rename from docs/documentation/Neo4jStore-0.1.adoc rename to docs/modules/ROOT/pages/neo4jstore.adoc index 2f04f82..050d494 100644 --- a/docs/documentation/Neo4jStore-0.1.adoc +++ b/docs/modules/ROOT/pages/neo4jstore.adoc @@ -1,9 +1,9 @@ -== Neo4j Store += Neo4j Store [.procedures, opts=header] This class is an implementation of the rdflib link:https://rdflib.readthedocs.io/en/stable/_modules/rdflib/store.html[Store class] that uses Neo4j as a backend. In this way it is possible to persist you RDF data directly in Neo4j, with the power of rdflib to process your data. -=== Object Initialization +== Constructor |=== | Name | Type | Required | Default | Description |config|Neo4jStoreConfig|True||Neo4jStoreConfig object that contains all the useful informations to initialize the store. diff --git a/docs/documentation/Neo4jStoreConfig-0.1.adoc b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc similarity index 88% rename from docs/documentation/Neo4jStoreConfig-0.1.adoc rename to docs/modules/ROOT/pages/neo4jstoreconfig.adoc index efbd664..2ef01f5 100644 --- a/docs/documentation/Neo4jStoreConfig-0.1.adoc +++ b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc @@ -1,17 +1,17 @@ -== Neo4j Store Config += Neo4j Store Config [.procedures, opts=header] This object is used to configure the Neo4j Store to connect to your Neo4j Instance and to manage the parsing of a Triple Store. -=== Object Initialization +== Constructor |=== | Name | Type | Required | Values(Default) | Description | auth_data | Dictionary | True | ("uri", "database", "user", "pwd") | A dictionary containing authentication data. The required keys are: ["uri", "database", "user", "pwd"]. | batching | Boolean | False | boolean (True) | A boolean indicating whether batching is enabled. | batch_size | Integer | False | (5000) | An integer representing the batch size (The batch size is intended as number of entities to store inside the database (nodes/relationships) and not triples. | custom_mappings | List[Tuple[Str,Str,Str]] | False | Empty list | A list of tuples containing custom mappings for prefixes in the form (prefix, object_to_replace, new_object). -| custom_prefixes | Dictionary | True if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN | ({}) | A dictionary containing custom prefixes. -| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |HANDLE_VOCAB_URI_STRATEGY.IGNORE, HANDLE_VOCAB_URI_STRATEGY.KEEP, HANDLE_VOCAB_URI_STRATEGY.MAP(HANDLE_VOCAB_URI_STRATEGY.SHORTEN) | +| custom_prefixes | Dictionary | True ① | ({}) | A dictionary containing custom prefixes. +| handle_vocab_uri_strategy | HANDLE_VOCAB_URI_STRATEGY | False |IGNORE, KEEP, MAP, (SHORTEN) | * 'SHORTEN', full uris are shortened using prefixes for property names, relationship names and labels. Fails if a prefix is not predefined for a namespace in the imported RDF. @@ -21,13 +21,15 @@ This object is used to configure the Neo4j Store to connect to your Neo4j Instan * 'KEEP' uris are kept unchanged -| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | HANDLE_MULTIVAL_STRATEGY.ARRAY (HANDLE_MULTIVAL_STRATEGY.OVERWRITE)| +| handle_multival_strategy | HANDLE_MULTIVAL_STRATEGY | False | ARRAY (OVERWRITE)| * 'OVERWRITE' property values are kept single valued. Multiple values in the imported RDF are overwriten (only the last one is kept) * 'ARRAY' properties are stored in an array enabling storage of multiple values. All of them unless multivalPropList is set. | multival_props_names | List[Tuple[Str,Str]] | False | ([]) | A list of tuples containing the prefix and property names to be treated as multivalued in the form (prefix, property_name). |=== +① if handle_vocab_uri_strategy == HANDLE_VOCAB_URI_STRATEGY.SHORTEN + == Functions === set_handle_vocab_uri_strategy @@ -188,7 +190,7 @@ No arguments | Dictionary | A dictionary containing all prefixes. |=== -== Enumerators +== Enumerated Values === HANDLE_VOCAB_URI_STRATEGY @@ -204,12 +206,6 @@ Enum class defining different strategies for handling vocabulary URIs. | IGNORE | Strategy to ignore the Namespace and get only the local part |=== -=== Examples - -Here a series of examples of the application of a strategy on a certain triplet. - -#TODO: Fill the examples# - === Shorten This strategy will shorten the URIs, replacing the prefix with its shorted version. If the Store find a prefix not defined inside its Neo4jStoreConfig object, the parsing will stop, raising a ShortenStrictException error. @@ -229,7 +225,7 @@ This strategy will remove the entire prefix from the predicate. Enum class defining different strategies for handling multiple values. -TO NOTICE: If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. +> If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any predicate marked as multivalued, EVERY field will be treated as multivalued. ==== Possible Values @@ -239,9 +235,6 @@ TO NOTICE: If the strategy is ARRAY and the Neo4jStoreConfig doesn't contain any | ARRAY | Strategy to treat multiple values as an array |=== -=== Examples - -Here a series of examples of the application of a strategy on a certain triplet. === Overwrite From 1b9ef06a9292f2648f056f6e8e8a76b3984355e0 Mon Sep 17 00:00:00 2001 From: "Christian Tremblay, ing." Date: Thu, 4 Apr 2024 22:31:11 -0400 Subject: [PATCH 11/14] Fix issue https://github.com/neo4j-labs/rdflib-neo4j/issues/28 --- rdflib_neo4j/query_composers/NodeQueryComposer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdflib_neo4j/query_composers/NodeQueryComposer.py b/rdflib_neo4j/query_composers/NodeQueryComposer.py index 041add1..5748107 100644 --- a/rdflib_neo4j/query_composers/NodeQueryComposer.py +++ b/rdflib_neo4j/query_composers/NodeQueryComposer.py @@ -4,7 +4,7 @@ def prop_query_append(prop): - return f"""n.`{prop}` = CASE WHEN COALESCE(param["{prop}"], NULL) IS NULL THEN n.{prop} ELSE REDUCE(i=COALESCE(n.{prop},[]), val IN param["{prop}"] | CASE WHEN val IN i THEN i ELSE i+val END) END """ + return f"""n.`{prop}` = CASE WHEN COALESCE(param["{prop}"], NULL) IS NULL THEN n.`{prop}` ELSE REDUCE(i=COALESCE(n.`{prop}`,[]), val IN param["{prop}"] | CASE WHEN val IN i THEN i ELSE i+val END) END """ From fe49d5a84e6024cbf82ec8d340a11936e8fbbbd5 Mon Sep 17 00:00:00 2001 From: Nicolas Mervaillie Date: Sun, 7 Jul 2024 07:44:08 +0200 Subject: [PATCH 12/14] Ability to initialize the store with an existing driver object Adds an optional parameter to the Neo4j store initialization. This allows fine-grained customization of the neo4j driver settings and scenarios like SSO. Closes #30 --- docs/modules/ROOT/pages/neo4jstore.adoc | 3 +- docs/modules/ROOT/pages/neo4jstoreconfig.adoc | 2 +- rdflib_neo4j/Neo4jStore.py | 31 ++++++---- rdflib_neo4j/config/Neo4jStoreConfig.py | 4 -- test/integration/containers_test.py | 2 +- test/integration/custom_mappings_test.py | 16 +++--- test/integration/fixtures.py | 41 ++++++++++++-- test/integration/handle_vocab_uri_test.py | 24 ++++---- test/integration/multival_test.py | 20 +++---- test/integration/single_triple_test.py | 2 +- test/integration/store_initialization_test.py | 56 +++++++++++++++++++ test/integration/utils.py | 56 ------------------- 12 files changed, 147 insertions(+), 110 deletions(-) create mode 100644 test/integration/store_initialization_test.py diff --git a/docs/modules/ROOT/pages/neo4jstore.adoc b/docs/modules/ROOT/pages/neo4jstore.adoc index 050d494..d877e0c 100644 --- a/docs/modules/ROOT/pages/neo4jstore.adoc +++ b/docs/modules/ROOT/pages/neo4jstore.adoc @@ -6,7 +6,8 @@ This class is an implementation of the rdflib link:https://rdflib.readthedocs.io == Constructor |=== | Name | Type | Required | Default | Description -|config|Neo4jStoreConfig|True||Neo4jStoreConfig object that contains all the useful informations to initialize the store. +|config|Neo4jStoreConfig|True||Neo4jStoreConfig object that contains all the useful information to initialize the store. +|driver|Neo4jStoreConfig|False|None|A pre-built Neo4j driver object to use to connect to the database. You cannot specify both a driver and credentials in the Neo4jStoreConfig. |=== == Functions diff --git a/docs/modules/ROOT/pages/neo4jstoreconfig.adoc b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc index 2ef01f5..6659169 100644 --- a/docs/modules/ROOT/pages/neo4jstoreconfig.adoc +++ b/docs/modules/ROOT/pages/neo4jstoreconfig.adoc @@ -6,7 +6,7 @@ This object is used to configure the Neo4j Store to connect to your Neo4j Instan == Constructor |=== | Name | Type | Required | Values(Default) | Description -| auth_data | Dictionary | True | ("uri", "database", "user", "pwd") | A dictionary containing authentication data. The required keys are: ["uri", "database", "user", "pwd"]. +| auth_data | Dictionary | Yes, unless a driver object is passed in the store init | ("uri", "database", "user", "pwd") | A dictionary containing authentication data. The required keys are: ["uri", "database", "user", "pwd"]. | batching | Boolean | False | boolean (True) | A boolean indicating whether batching is enabled. | batch_size | Integer | False | (5000) | An integer representing the batch size (The batch size is intended as number of entities to store inside the database (nodes/relationships) and not triples. | custom_mappings | List[Tuple[Str,Str,Str]] | False | Empty list | A list of tuples containing custom mappings for prefixes in the form (prefix, object_to_replace, new_object). diff --git a/rdflib_neo4j/Neo4jStore.py b/rdflib_neo4j/Neo4jStore.py index 6cca1c9..596cead 100644 --- a/rdflib_neo4j/Neo4jStore.py +++ b/rdflib_neo4j/Neo4jStore.py @@ -1,13 +1,14 @@ from typing import Dict from rdflib.store import Store -from neo4j import GraphDatabase +from neo4j import GraphDatabase, Driver from neo4j import WRITE_ACCESS import logging from rdflib_neo4j.Neo4jTriple import Neo4jTriple from rdflib_neo4j.config.Neo4jStoreConfig import Neo4jStoreConfig from rdflib_neo4j.config.const import NEO4J_DRIVER_USER_AGENT_NAME +from rdflib_neo4j.config.utils import check_auth_data from rdflib_neo4j.query_composers.NodeQueryComposer import NodeQueryComposer from rdflib_neo4j.query_composers.RelationshipQueryComposer import RelationshipQueryComposer from rdflib_neo4j.utils import handle_neo4j_driver_exception @@ -17,11 +18,16 @@ class Neo4jStore(Store): context_aware = True - def __init__(self, config: Neo4jStoreConfig): + def __init__(self, config: Neo4jStoreConfig, neo4j_driver: Driver = None): self.__open = False - self.driver = None + self.driver = neo4j_driver self.session = None self.config = config + if not neo4j_driver: + check_auth_data(config.auth_data) + elif config.auth_data: + raise Exception("Either initialize the store with credentials or driver. You cannot do both.") + super(Neo4jStore, self).__init__(config.get_config_dict()) self.batching = config.batching @@ -62,7 +68,6 @@ def close(self, commit_pending_transaction=True): self.commit(commit_nodes=True) self.commit(commit_rels=True) self.session.close() - self.driver.close() self.__set_open(False) print(f"IMPORTED {self.total_triples} TRIPLES") self.total_triples=0 @@ -147,6 +152,16 @@ def __set_open(self, val: bool): self.__open = val print(f"The store is now: {'Open' if self.__open else 'Closed'}") + def __get_driver(self) -> Driver: + if not self.driver: + auth_data = self.config.auth_data + self.driver = GraphDatabase.driver( + auth_data['uri'], + auth=(auth_data['user'], auth_data['pwd']), + database=auth_data.get('database', 'neo4j'), + user_agent=NEO4J_DRIVER_USER_AGENT_NAME + ) + return self.driver def __create_session(self): """ @@ -156,13 +171,7 @@ def __create_session(self): """ auth_data = self.config.auth_data - self.driver = GraphDatabase.driver( - auth_data['uri'], - auth=(auth_data['user'], auth_data['pwd']), - user_agent=NEO4J_DRIVER_USER_AGENT_NAME - ) - self.session = self.driver.session( - database=auth_data.get('database', 'neo4j'), + self.session = self.__get_driver().session( default_access_mode=WRITE_ACCESS ) diff --git a/rdflib_neo4j/config/Neo4jStoreConfig.py b/rdflib_neo4j/config/Neo4jStoreConfig.py index 2b4686b..b15f928 100644 --- a/rdflib_neo4j/config/Neo4jStoreConfig.py +++ b/rdflib_neo4j/config/Neo4jStoreConfig.py @@ -191,10 +191,7 @@ def set_auth_data(self, auth): Parameters: - auth: A dictionary containing authentication data. - Raises: - - WrongAuthenticationException: If any of the required authentication fields is missing. """ - check_auth_data(auth=auth) self.auth_data = auth def set_batching(self, val: bool): @@ -225,5 +222,4 @@ def get_config_dict(self): Raises: - WrongAuthenticationException: If any of the required authentication fields is missing. """ - check_auth_data(auth=self.auth_data) return vars(self) diff --git a/test/integration/containers_test.py b/test/integration/containers_test.py index 14261d1..cf06d9a 100644 --- a/test/integration/containers_test.py +++ b/test/integration/containers_test.py @@ -4,7 +4,7 @@ from test.integration.utils import records_equal, read_file_n10s_and_rdflib import pytest from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ - cleanup_databases + cleanup_databases, neo4j_connection_parameters def test_import_person(neo4j_driver, graph_store): diff --git a/test/integration/custom_mappings_test.py b/test/integration/custom_mappings_test.py index 045b22b..505680a 100644 --- a/test/integration/custom_mappings_test.py +++ b/test/integration/custom_mappings_test.py @@ -3,21 +3,21 @@ from rdflib_neo4j.Neo4jStore import Neo4jStore from rdflib_neo4j.config.Neo4jStoreConfig import Neo4jStoreConfig from test.integration.constants import LOCAL -from test.integration.utils import records_equal, read_file_n10s_and_rdflib, get_credentials +from test.integration.utils import records_equal, read_file_n10s_and_rdflib from rdflib_neo4j.config.const import HANDLE_VOCAB_URI_STRATEGY import os from dotenv import load_dotenv from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ - cleanup_databases + cleanup_databases, neo4j_connection_parameters -def test_custom_mapping_match(neo4j_container, neo4j_driver): +def test_custom_mapping_match(neo4j_driver, neo4j_connection_parameters): """ If we define a custom mapping and the strategy is HANDLE_VOCAB_URI_STRATEGY.MAP, it should match it and use the mapping if the predicate satisfies the mapping. """ - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { 'neo4voc': Namespace('http://neo4j.org/vocab/sw#') @@ -56,7 +56,7 @@ def test_custom_mapping_match(neo4j_container, neo4j_driver): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) -def test_custom_mapping_no_match(neo4j_container, neo4j_driver): +def test_custom_mapping_no_match(neo4j_driver, neo4j_connection_parameters): """ If we define a custom mapping and the strategy is HANDLE_VOCAB_URI_STRATEGY.MAP, it shouldn't apply the mapping if the predicate doesn't satisfy the mapping and use IGNORE as a strategy. @@ -66,7 +66,7 @@ def test_custom_mapping_no_match(neo4j_container, neo4j_driver): if the predicate satisfies the mapping. """ - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { @@ -106,12 +106,12 @@ def test_custom_mapping_no_match(neo4j_container, neo4j_driver): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) -def test_custom_mapping_map_strategy_zero_custom_mappings(neo4j_container, neo4j_driver): +def test_custom_mapping_map_strategy_zero_custom_mappings(neo4j_driver, neo4j_connection_parameters): """ If we don't define custom mapping and the strategy is HANDLE_VOCAB_URI_STRATEGY.MAP, it shouldn't apply the mapping on anything and just use IGNORE mode. """ - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { diff --git a/test/integration/fixtures.py b/test/integration/fixtures.py index 0f18ecd..fbad787 100644 --- a/test/integration/fixtures.py +++ b/test/integration/fixtures.py @@ -1,9 +1,10 @@ import pytest from neo4j import GraphDatabase +from rdflib import Graph from testcontainers.neo4j import Neo4jContainer +from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY, Neo4jStoreConfig, Neo4jStore from test.integration.constants import LOCAL, N10S_CONSTRAINT_QUERY, RDFLIB_DB -from test.integration.utils import create_graph_store import os @@ -48,13 +49,43 @@ def neo4j_driver(neo4j_container): @pytest.fixture -def graph_store(neo4j_container, neo4j_driver): - return create_graph_store(neo4j_container) +def graph_store(neo4j_connection_parameters): + return config_graph_store(neo4j_connection_parameters) @pytest.fixture -def graph_store_batched(neo4j_container, neo4j_driver): - return create_graph_store(neo4j_container, batching=True) +def graph_store_batched(neo4j_connection_parameters): + return config_graph_store(neo4j_connection_parameters, True) + + +def config_graph_store(auth_data, batching=False): + + config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes={}, + custom_mappings=[], + multival_props_names=[], + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, + batching=batching) + + g = Graph(store=Neo4jStore(config=config)) + return g + + +@pytest.fixture +def neo4j_connection_parameters(neo4j_container): + if LOCAL: + auth_data = { + 'uri': os.getenv("NEO4J_URI_LOCAL"), + 'database': RDFLIB_DB, + 'user': os.getenv("NEO4J_USER_LOCAL"), + 'pwd': os.getenv("NEO4J_PWD_LOCAL") + } + else: + auth_data = {'uri': neo4j_container.get_connection_url(), + 'database': RDFLIB_DB, + 'user': "neo4j", + 'pwd': Neo4jContainer.NEO4J_ADMIN_PASSWORD} + return auth_data @pytest.fixture(autouse=True) diff --git a/test/integration/handle_vocab_uri_test.py b/test/integration/handle_vocab_uri_test.py index cb7a2c8..581fa94 100644 --- a/test/integration/handle_vocab_uri_test.py +++ b/test/integration/handle_vocab_uri_test.py @@ -6,18 +6,18 @@ from rdflib_neo4j.config.Neo4jStoreConfig import Neo4jStoreConfig from rdflib_neo4j.config.const import ShortenStrictException, HANDLE_VOCAB_URI_STRATEGY from test.integration.constants import LOCAL -from test.integration.utils import records_equal, read_file_n10s_and_rdflib, get_credentials +from test.integration.utils import records_equal, read_file_n10s_and_rdflib import pytest -from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ +from test.integration.fixtures import neo4j_container, neo4j_connection_parameters, neo4j_driver, graph_store, graph_store_batched, \ cleanup_databases -def test_shorten_all_prefixes_defined(neo4j_container, neo4j_driver): +def test_shorten_all_prefixes_defined(neo4j_driver, neo4j_connection_parameters): """ If we use the strategy HANDLE_VOCAB_URI_STRATEGY.SHORTEN and we provide all the required namespaces, it should load all the data without raising an error for a missing prefix """ - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { @@ -61,8 +61,8 @@ def test_shorten_all_prefixes_defined(neo4j_container, neo4j_driver): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) -def test_shorten_missing_prefix(neo4j_container, neo4j_driver): - auth_data = get_credentials(LOCAL, neo4j_container) +def test_shorten_missing_prefix(neo4j_driver, neo4j_connection_parameters): + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { @@ -90,8 +90,8 @@ def test_shorten_missing_prefix(neo4j_container, neo4j_driver): assert True -def test_keep_strategy(neo4j_container, neo4j_driver): - auth_data = get_credentials(LOCAL, neo4j_container) +def test_keep_strategy(neo4j_driver, neo4j_connection_parameters): + auth_data = neo4j_connection_parameters config = Neo4jStoreConfig(auth_data=auth_data, handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.KEEP, @@ -111,8 +111,8 @@ def test_keep_strategy(neo4j_container, neo4j_driver): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) -def test_ignore_strategy(neo4j_container, neo4j_driver): - auth_data = get_credentials(LOCAL, neo4j_container) +def test_ignore_strategy(neo4j_driver, neo4j_connection_parameters): + auth_data = neo4j_connection_parameters config = Neo4jStoreConfig(auth_data=auth_data, handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, @@ -132,8 +132,8 @@ def test_ignore_strategy(neo4j_container, neo4j_driver): assert records_equal(rels[i], rels_from_rdflib[i], rels=True) -def test_ignore_strategy_on_json_ld_file(neo4j_container, neo4j_driver): - auth_data = get_credentials(LOCAL, neo4j_container) +def test_ignore_strategy_on_json_ld_file(neo4j_driver, neo4j_connection_parameters): + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { diff --git a/test/integration/multival_test.py b/test/integration/multival_test.py index db5d49e..05ca98a 100644 --- a/test/integration/multival_test.py +++ b/test/integration/multival_test.py @@ -2,17 +2,17 @@ from rdflib import Graph, Namespace from rdflib_neo4j.Neo4jStore import Neo4jStore from rdflib_neo4j.config.Neo4jStoreConfig import Neo4jStoreConfig -from test.integration.utils import records_equal, read_file_n10s_and_rdflib, create_graph_store, get_credentials +from test.integration.utils import records_equal, read_file_n10s_and_rdflib from rdflib_neo4j.config.const import HANDLE_VOCAB_URI_STRATEGY, HANDLE_MULTIVAL_STRATEGY import pytest from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ - cleanup_databases + cleanup_databases, neo4j_connection_parameters -def test_read_file_multival_with_strategy_no_predicates(neo4j_container, neo4j_driver): +def test_read_file_multival_with_strategy_no_predicates(neo4j_driver, neo4j_connection_parameters): """Compare data imported with n10s procs and n10s + rdflib in single add mode for multivalues""" - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = {} @@ -40,9 +40,9 @@ def test_read_file_multival_with_strategy_no_predicates(neo4j_container, neo4j_d assert records_equal(records[i], records_from_rdf_lib[i]) -def test_read_file_multival_with_strategy_and_predicates(neo4j_container, neo4j_driver): +def test_read_file_multival_with_strategy_and_predicates(neo4j_driver, neo4j_connection_parameters): """Compare data imported with n10s procs and n10s + rdflib in single add mode for multivalues""" - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { @@ -72,9 +72,9 @@ def test_read_file_multival_with_strategy_and_predicates(neo4j_container, neo4j_ assert records_equal(records[i], records_from_rdf_lib[i]) -def test_read_file_multival_with_no_strategy_and_predicates(neo4j_container, neo4j_driver): +def test_read_file_multival_with_no_strategy_and_predicates(neo4j_driver, neo4j_connection_parameters): """Compare data imported with n10s procs and n10s + rdflib in single add mode for multivalues""" - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters # Define your prefixes prefixes = { @@ -101,9 +101,9 @@ def test_read_file_multival_with_no_strategy_and_predicates(neo4j_container, neo for i in range(len(records)): assert records_equal(records[i], records_from_rdf_lib[i]) -def test_read_file_multival_array_as_set_behavior(neo4j_container, neo4j_driver): +def test_read_file_multival_array_as_set_behavior(neo4j_driver, neo4j_connection_parameters): """When importing the data, if a triple will add the same value to a multivalued property it won't be added""" - auth_data = get_credentials(LOCAL, neo4j_container) + auth_data = neo4j_connection_parameters prefixes = {'music': Namespace('neo4j://graph.schema#')} diff --git a/test/integration/single_triple_test.py b/test/integration/single_triple_test.py index 14a2cdb..dc89a43 100644 --- a/test/integration/single_triple_test.py +++ b/test/integration/single_triple_test.py @@ -3,7 +3,7 @@ from test.integration.constants import GET_DATA_QUERY, RDFLIB_DB import pytest from test.integration.fixtures import neo4j_container, neo4j_driver, graph_store, graph_store_batched, \ - cleanup_databases + cleanup_databases, neo4j_connection_parameters def test_import_type_as_label(neo4j_driver, graph_store): diff --git a/test/integration/store_initialization_test.py b/test/integration/store_initialization_test.py new file mode 100644 index 0000000..b97dbef --- /dev/null +++ b/test/integration/store_initialization_test.py @@ -0,0 +1,56 @@ +from rdflib import Literal, RDF, URIRef, Graph +from rdflib.namespace import FOAF + +from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY, Neo4jStoreConfig, Neo4jStore +from test.integration.constants import GET_DATA_QUERY, RDFLIB_DB +import pytest +from test.integration.fixtures import neo4j_connection_parameters, neo4j_driver, neo4j_container + + +def test_initialize_store_with_credentials(neo4j_connection_parameters, neo4j_driver): + + auth_data = neo4j_connection_parameters + + config = Neo4jStoreConfig(auth_data=auth_data, + custom_prefixes={}, + custom_mappings=[], + multival_props_names=[], + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.MAP, + batching=False) + + graph_store = Graph(store=Neo4jStore(config=config)) + donna = URIRef("https://example.org/donna") + graph_store.add((donna, FOAF.name, Literal("Donna Fales"))) + graph_store.commit() + records, summary, keys = neo4j_driver.execute_query(GET_DATA_QUERY, database_=RDFLIB_DB) + assert len(records) == 1 + + +def test_initialize_store_with_driver(neo4j_driver): + + config = Neo4jStoreConfig(auth_data=None, + custom_prefixes={}, + custom_mappings=[], + multival_props_names=[], + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.MAP, + batching=False) + + graph_store = Graph(store=Neo4jStore(config=config, neo4j_driver=neo4j_driver)) + donna = URIRef("https://example.org/donna") + graph_store.add((donna, FOAF.name, Literal("Donna Fales"))) + graph_store.commit() + records, summary, keys = neo4j_driver.execute_query(GET_DATA_QUERY, database_=RDFLIB_DB) + assert len(records) == 1 + + +def test_initialize_with_both_credentials_and_driver_should_fail(neo4j_connection_parameters, neo4j_driver): + + config = Neo4jStoreConfig(auth_data=neo4j_connection_parameters, + custom_prefixes={}, + custom_mappings=[], + multival_props_names=[], + handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.MAP, + batching=False) + + with pytest.raises(Exception): + Graph(store=Neo4jStore(config=config, neo4j_driver=neo4j_driver)) diff --git a/test/integration/utils.py b/test/integration/utils.py index d659dc3..e24624a 100644 --- a/test/integration/utils.py +++ b/test/integration/utils.py @@ -1,7 +1,5 @@ from neo4j import Record from rdflib import Graph -from testcontainers.neo4j import Neo4jContainer - from rdflib_neo4j.Neo4jStore import Neo4jStore from rdflib_neo4j.config.Neo4jStoreConfig import Neo4jStoreConfig from rdflib_neo4j.config.const import HANDLE_VOCAB_URI_STRATEGY @@ -70,57 +68,3 @@ def read_file_n10s_and_rdflib(neo4j_driver, graph_store, batching=False, n10s_pa n10s_rels, summary, keys = neo4j_driver.execute_query(GET_RELS_QUERY) rdflib_rels, summary, keys = neo4j_driver.execute_query(GET_RELS_QUERY, database_=RDFLIB_DB) return records_from_rdf_lib, records, rdflib_rels, n10s_rels - - -def create_graph_store(neo4j_container, batching=False): - if neo4j_container: - auth_data = {'uri': neo4j_container.get_connection_url(), - 'database': RDFLIB_DB, - 'user': "neo4j", - 'pwd': Neo4jContainer.NEO4J_ADMIN_PASSWORD} - return config_graph_store(auth_data, batching) - else: - auth_data = { - 'uri': os.getenv("NEO4J_URI_LOCAL"), - 'database': RDFLIB_DB, - 'user': os.getenv("NEO4J_USER_LOCAL"), - 'pwd': os.getenv("NEO4J_PWD_LOCAL") - } - - return config_graph_store(auth_data, batching) - - -def config_graph_store(auth_data, batching=False): - # Define your prefixes - prefixes = {} - - # Define your custom mappings - custom_mappings = [] - - multival_props_names = [] - - config = Neo4jStoreConfig(auth_data=auth_data, - custom_prefixes=prefixes, - custom_mappings=custom_mappings, - multival_props_names=multival_props_names, - handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE, - batching=batching) - - g = Graph(store=Neo4jStore(config=config)) - return g - - -def get_credentials(local, neo4j_container): - if local: - auth_data = { - 'uri': os.getenv("NEO4J_URI_LOCAL"), - 'database': RDFLIB_DB, - 'user': os.getenv("NEO4J_USER_LOCAL"), - 'pwd': os.getenv("NEO4J_PWD_LOCAL") - } - else: - auth_data = {'uri': neo4j_container.get_connection_url(), - 'database': RDFLIB_DB, - 'user': "neo4j", - 'pwd': Neo4jContainer.NEO4J_ADMIN_PASSWORD} - return auth_data From 2215fc0706eb580e279d5584f5b258e4d153a1d3 Mon Sep 17 00:00:00 2001 From: alfredorubin96 Date: Wed, 10 Jul 2024 10:16:37 +0100 Subject: [PATCH 13/14] checking code and adding some small comments --- rdflib_neo4j/Neo4jStore.py | 2 ++ rdflib_neo4j/config/Neo4jStoreConfig.py | 2 -- test/integration/store_initialization_test.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rdflib_neo4j/Neo4jStore.py b/rdflib_neo4j/Neo4jStore.py index 596cead..184795e 100644 --- a/rdflib_neo4j/Neo4jStore.py +++ b/rdflib_neo4j/Neo4jStore.py @@ -23,6 +23,8 @@ def __init__(self, config: Neo4jStoreConfig, neo4j_driver: Driver = None): self.driver = neo4j_driver self.session = None self.config = config + + # Check that either driver or credentials are provided if not neo4j_driver: check_auth_data(config.auth_data) elif config.auth_data: diff --git a/rdflib_neo4j/config/Neo4jStoreConfig.py b/rdflib_neo4j/config/Neo4jStoreConfig.py index b15f928..11b3607 100644 --- a/rdflib_neo4j/config/Neo4jStoreConfig.py +++ b/rdflib_neo4j/config/Neo4jStoreConfig.py @@ -5,8 +5,6 @@ PrefixNotFoundException, HANDLE_VOCAB_URI_STRATEGY, HANDLE_MULTIVAL_STRATEGY ) -from rdflib_neo4j.config.utils import check_auth_data - class Neo4jStoreConfig: """ diff --git a/test/integration/store_initialization_test.py b/test/integration/store_initialization_test.py index b97dbef..e6a241c 100644 --- a/test/integration/store_initialization_test.py +++ b/test/integration/store_initialization_test.py @@ -8,7 +8,7 @@ def test_initialize_store_with_credentials(neo4j_connection_parameters, neo4j_driver): - + """ Test that we can initialize a store by passing the credentials config. """ auth_data = neo4j_connection_parameters config = Neo4jStoreConfig(auth_data=auth_data, @@ -27,7 +27,7 @@ def test_initialize_store_with_credentials(neo4j_connection_parameters, neo4j_dr def test_initialize_store_with_driver(neo4j_driver): - + """ Test that we can initialize a store with a driver. """ config = Neo4jStoreConfig(auth_data=None, custom_prefixes={}, custom_mappings=[], @@ -44,7 +44,7 @@ def test_initialize_store_with_driver(neo4j_driver): def test_initialize_with_both_credentials_and_driver_should_fail(neo4j_connection_parameters, neo4j_driver): - + """ Test that we can't initialize a store with both credentials and a driver. """ config = Neo4jStoreConfig(auth_data=neo4j_connection_parameters, custom_prefixes={}, custom_mappings=[], From 3e932d4901db0e66b532f77505fbc2ba8b14670a Mon Sep 17 00:00:00 2001 From: alfredorubin96 Date: Thu, 11 Jul 2024 16:07:32 +0100 Subject: [PATCH 14/14] bumping version to 1.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a41389a..6c83f8e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ if __name__ == "__main__": setup( name="rdflib-neo4j", - version="1.0.1", + version="1.1", author="Jesús Barrasa, Aleksandar Simeunovic, Alfredo Rubin", author_email="jbarrasa@outlook.com, aleksandar.simeunovic@neo4j.com, alfredo.rubin@neo4j.com", description="RDFLib Store backed by neo4j",