From 2e18826fc243526a5583d1dadb208edd9c1add1b Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Tue, 15 Mar 2022 14:24:16 -0700 Subject: [PATCH] allow configurable rdflib.Store plugins, e.g., Oxrdflib --- .pre-commit-config.yaml | 2 +- changelog.txt | 6 ++- docs/depend.md | 2 + kglab/kglab.py | 51 +++++++++++++------- kglab/version.py | 2 +- requirements.txt | 3 +- scripts/oxrdf.py | 57 ++++++++++++++++++++++ setup.py | 103 +++++++++++++++++++++++++--------------- 8 files changed, 164 insertions(+), 62 deletions(-) create mode 100644 scripts/oxrdf.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5da1860..9f2fb3c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,7 @@ repos: rev: v2.0.0 hooks: - id: codespell # spell-check source code - args: ["-L", "wit,fils"] # comma separated list of words to ignore. + args: ["-L", "wit,fils,basf"] # comma separated list of words to ignore. exclude: ^examples\/|.*\.ipynb language: python types: [text] diff --git a/changelog.txt b/changelog.txt index 116d868..19b5765 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,10 +1,12 @@ # `kglab` changelog -## 0.5.? +## 0.5.2 2022-03-?? - * bumping up versions of dependencies to fit Py 3.7+ + * bump up versions of dependencies to fit Py 3.7+ + * improve testing based on pytest; kudos @Mec-iS + * allow configuration for rdflib.Store plugins ## 0.5.1 diff --git a/docs/depend.md b/docs/depend.md index 9eeeb08..c7b9005 100644 --- a/docs/depend.md +++ b/docs/depend.md @@ -12,6 +12,7 @@ include: - [chocolate](https://github.com/seungjaeryanlee/chocolate) - [csvwlib](https://github.com/DerwenAI/csvwlib) +- [cryptography](https://cryptography.io/) - [decorator](https://github.com/micheles/decorator) - [fsspec](https://filesystem-spec.readthedocs.io/)[^1] - [gcsfs](https://gcsfs.readthedocs.io/) @@ -20,6 +21,7 @@ include: - [NetworkX](https://networkx.org/) - [NumPy](https://numpy.org/)[^2] - [OWL-RL](https://owl-rl.readthedocs.io/) +- [Oxrdflib](https://github.com/oxigraph/oxrdflib) - [pandas](https://pandas.pydata.org/) - [pslpython](https://psl.linqs.org/)[^3] - [pyarrow](https://arrow.apache.org/) diff --git a/kglab/kglab.py b/kglab/kglab.py index 4acb763..d7d5aac 100644 --- a/kglab/kglab.py +++ b/kglab/kglab.py @@ -1,11 +1,12 @@ -""" -kglab main class definition. +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# see license https://github.com/DerwenAI/kglab#license-and-copyright - see license https://github.com/DerwenAI/kglab#license-and-copyright """ -###################################################################### -## kglab - core classes +Main class definitions for `kglab` +""" +## Python standard libraries import codecs import datetime import io @@ -14,8 +15,7 @@ import traceback import typing -### third-parties bindings - +### third-parties libraries from icecream import ic # type: ignore # pylint: disable=E0401 import chocolate # type: ignore # pylint: disable=E0401 import csvwlib # type: ignore # pylint: disable=E0401 @@ -30,13 +30,8 @@ import rdflib # type: ignore # pylint: disable=E0401 import rdflib.plugin # type: ignore # pylint: disable=E0401 import rdflib.plugins.parsers.notation3 as rdf_n3 # type: ignore # pylint: disable=E0401 -#rdflib.plugin.register("json-ld", rdflib.plugin.Parser, "rdflib_jsonld.parser", "JsonLDParser") -#rdflib.plugin.register("json-ld", rdflib.plugin.Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer") - -###################################################################### ## kglab - core classes - from kglab.decorators import multifile from kglab.pkg_types import PathLike, IOPathLike, GraphLike, RDF_Node from kglab.gpviz import GPViz @@ -44,6 +39,7 @@ from kglab.version import _check_version +## pre-constructor set-up _check_version() if get_gpu_count() > 0: @@ -75,7 +71,7 @@ class KnowledgeGraph: "sh": "http://www.w3.org/ns/shacl#", "skos": "http://www.w3.org/2004/02/skos/core#", "xsd": "http://www.w3.org/2001/XMLSchema#", - } + } def __init__ ( @@ -84,6 +80,7 @@ def __init__ ( name: str = "generic", base_uri: str = None, language: str = "en", + store: str = None, use_gpus: bool = True, import_graph: typing.Optional[GraphLike] = None, namespaces: dict = None, @@ -100,6 +97,9 @@ def __init__ ( language: the default [*language tag*](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag), e.g., used for [*language indexing*](https://www.w3.org/TR/json-ld11/#language-indexing) + store: +optionally, string representing an `rdflib.Store` plugin to use. + use_gpus: optionally, use the NVidia GPU devices with [RAPIDS](https://rapids.ai/) if these libraries have been installed and the devices are available; defaults to `True` @@ -112,19 +112,20 @@ def __init__ ( self.name = name self.base_uri = base_uri self.language = language + self.store = store # use NVidia GPU devices if available and the libraries # have been installed and the flag is not disabled - if use_gpus and get_gpu_count() > 0: + if use_gpus is not None and get_gpu_count() > 0: self.use_gpus = True else: self.use_gpus = False # import relations from another RDF graph, or start from blank - if import_graph: + if import_graph is not None: self._g = import_graph else: - self._g = rdflib.Graph() + self._g = self.build_blank_graph() # initialize the namespaces self._ns: dict = {} @@ -137,6 +138,20 @@ def __init__ ( self.add_ns(prefix, iri) + def build_blank_graph ( + self, + ) -> rdflib.Graph: + """ +Build a new `rdflib.Graph` object, based on storage plugin configuration. + """ + if self.store is not None: + g = rdflib.Graph(store=self.store) + else: + g = rdflib.Graph() + + return g + + def rdf_graph ( self ) -> rdflib.Graph: @@ -472,7 +487,7 @@ def _get_filename ( filename = str(path) elif isinstance(path, pathlib.Path): filename = path.as_posix() - elif type(path) == str: + elif isinstance(path, str): filename = path else: raise TypeError(f"path variable not recognised {type(path)}") @@ -1225,7 +1240,7 @@ def validate ( **chocolate.filter_args(kwargs, pyshacl.validate), ) - g = rdflib.Graph() + g = self.build_blank_graph() g.parse( data=report_graph_data, diff --git a/kglab/version.py b/kglab/version.py index 315f8a4..b94be12 100644 --- a/kglab/version.py +++ b/kglab/version.py @@ -11,7 +11,7 @@ MIN_PY_VERSION: typing.Tuple = (3, 7,) -__version__: str = "0.5.1" +__version__: str = "0.5.2" def _versify ( diff --git a/requirements.txt b/requirements.txt index 6c6573f..6972f57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp >= 3.8 chocolate >= 0.0.2 -cryptography >= 35.0 csvwlib >= 0.3.2 +cryptography >= 35.0 decorator >= 5.1 fsspec[gs,s3] >= 2022.2 gcsfs >= 2022.2 @@ -10,6 +10,7 @@ morph-kgc >= 1.6 networkx >= 2.7 numpy >= 1.22 owlrl >= 6.0.2 +oxrdflib >= 0.2 pandas >= 1.4 pslpython >= 2.2.2 pyarrow >= 7.0 diff --git a/scripts/oxrdf.py b/scripts/oxrdf.py new file mode 100644 index 0000000..0823d0d --- /dev/null +++ b/scripts/oxrdf.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# type: ignore + +from os.path import abspath, dirname +import pathlib +import sys +import time + +import oxrdflib +import rdflib + +sys.path.insert(0, str(pathlib.Path(dirname(dirname(abspath(__file__)))))) +import kglab + +FILENAME = "../ffurf_fforms/dat/fiirm.12.owl" + +# get_items_interface: +QUDT_QUERY9 = """ +PREFIX BASF_EC_RaMPO: +PREFIX owl: +PREFIX rdfs: +PREFIX rdf: + +SELECT DISTINCT ?item WHERE { ?item rdfs:subClassOf ?item_class . FILTER(?item != owl:Nothing) . FILTER(?item != ?item_class) . FILTER(?item != BASF_EC_RaMPO:CoatingLayer) . FILTER(?interfaceClass != BASF_EC_RaMPO:CoatingLayer) . ?item rdfs:subClassOf ?restriction1 . ?restriction1 rdf:type owl:Restriction . ?restriction1 owl:onProperty BASF_EC_RaMPO:basedOn . ?restriction1 owl:onClass ?basedOn . { ?restriction1 owl:onClass ?basedOn . ?restriction1 owl:onClass ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interfaceClass } UNION { ?restriction1 owl:onClass ?basedOn . ?basedOn owl:unionOf/rdf:rest*/rdf:first ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interactsWith . ?interactsWith owl:unionOf/rdf:rest*/rdf:first ?interactsWithUnion . ?interactsWithUnion rdfs:subClassOf ?interfaceRestriction . ?interfaceRestriction rdf:type owl:Restriction . ?interfaceRestriction owl:onProperty BASF_EC_RaMPO:hasEquivalentCoatingLayer . ?interfaceRestriction owl:someValuesFrom ?interfaceClass . FILTER (?interfaceClass != ?base_layer) } OPTIONAL { ?item rdfs:label ?label } } +""" + + +def run_query (g) -> None: + """measure the timing for a SPARQL query""" + print(f"using graph: {g}") + + # query init + init_time = time.time() + query_iter = g.query(QUDT_QUERY9) + + duration = time.time() - init_time + print(f"query init time: {duration:10.3f}") + + # query exec + init_time = time.time() + + for row in query_iter: + print(row.item) + + duration = time.time() - init_time + print(f"query exec time: {duration:10.3f}", "\n") + + +if __name__ == "__main__": + g = rdflib.Graph(store="OxMemory") + g.parse(FILENAME, format="xml") + run_query(g) + + g = kglab.KnowledgeGraph(store="OxMemory") + g.load_rdf(FILENAME, format="xml") + run_query(g) diff --git a/setup.py b/setup.py index fc29c15..8d268ae 100644 --- a/setup.py +++ b/setup.py @@ -7,44 +7,53 @@ KEYWORDS = [ - "knowledge graph", + "controlled vocabulary", + "cugraph", + "deep learning", + "embedding", + "gpu", "graph algorithms", - "interactive visualization", - "validation", + "igraph", "inference", - "rdf", - "owl", - "skos", - "sparql", - "shacl", - "controlled vocabulary", + "interactive visualization", + "json-ld", + "knowledge graph", "managing namespaces", - "serialization", + "morph-kgc", "n3", - "turtle", - "json-ld", + "networkx", + "owl", + "pandas", "parquet", - "psl", "probabilistic soft logic", - "pandas", + "psl", + "pyvis", + "rapids", + "rdf", + "rml", "roam research", - "networkx", - "igraph", - "pytorch", - "embedding", - "deep learning", + "serialization", + "shacl", + "skos", + "sparql", + "statistical relational learning", + "topology", + "turtle", + "validation", ] -def parse_requirements_file (filename: str) -> typing.List: - """read and parse a Python `requirements.txt` file, returning as a list of str""" - results: list = [] - +def parse_requirements_file ( + filename: str, + ) -> typing.List[ str ]: + """read and parse a Python `requirements.txt` file, returning as a list of strings""" with pathlib.Path(filename).open() as f: - for l in f.readlines(): - results.append(l.strip().replace(" ", "").split("#")[0]) + results: list = [ + l.strip().replace(" ", "").split("#")[0] + for l in f.readlines() + ] - return results + return results if __name__ == "__main__": @@ -55,32 +64,41 @@ def parse_requirements_file (filename: str) -> typing.List: base_packages = parse_requirements_file("requirements.txt") docs_packages = parse_requirements_file("requirements-dev.txt") + tut_packages = parse_requirements_file("requirements-tut.txt") setuptools.setup( name = "kglab", version = kglab_version.__version__, + license = "MIT", + description = "A simple abstraction layer in Python for building knowledge graphs", + long_description = pathlib.Path("README.md").read_text(), + long_description_content_type = "text/markdown", + python_requires = ">=" + kglab_version._versify(kglab_version.MIN_PY_VERSION), # pylint: disable=W0212 packages = setuptools.find_packages(exclude=[ "docs", "examples" ]), + zip_safe = False, + install_requires = base_packages, extras_require = { "base": base_packages, "docs": docs_packages, - }, + "tut": tut_packages, + }, author = "Paco Nathan", author_email = "paco@derwen.ai", - license = "MIT", - - description = "A simple abstraction layer in Python for building knowledge graphs", - long_description = pathlib.Path("README.md").read_text(), - long_description_content_type = "text/markdown", keywords = ", ".join(KEYWORDS), classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Operating System :: OS Independent", + "License :: OSI Approved :: MIT License", "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Education", @@ -89,6 +107,7 @@ def parse_requirements_file (filename: str) -> typing.List: "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Human Machine Interfaces", "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Indexing", ], @@ -96,10 +115,16 @@ def parse_requirements_file (filename: str) -> typing.List: project_urls = { "Source Code": "https://github.com/DerwenAI/kglab", "Issue Tracker": "https://github.com/DerwenAI/kglab/issues", - "Community Survey": "https://forms.gle/FMHgtmxHYWocprMn6", - "Discussion Forum": "https://www.linkedin.com/groups/6725785/", "Hands-on Tutorial": "https://derwen.ai/docs/kgl/tutorial/", - }, + "Discussion Forum": "https://www.linkedin.com/groups/6725785/", + "DockerHub": "https://hub.docker.com/r/derwenai/kglab", + "Community Survey": "https://forms.gle/FMHgtmxHYWocprMn6", + "DOI": "https://doi.org/10.5281/zenodo.4717287", + }, - zip_safe = False, - ) + entry_points = { + "rdf.plugins.store": [ + "kglab = kglab:PropertyStore", + ], + }, + )