diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5da1860..9f2fb3c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -33,7 +33,7 @@ repos:
rev: v2.0.0
hooks:
- id: codespell # spell-check source code
- args: ["-L", "wit,fils"] # comma separated list of words to ignore.
+ args: ["-L", "wit,fils,basf"] # comma separated list of words to ignore.
exclude: ^examples\/|.*\.ipynb
language: python
types: [text]
diff --git a/changelog.txt b/changelog.txt
index 116d868..19b5765 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,10 +1,12 @@
# `kglab` changelog
-## 0.5.?
+## 0.5.2
2022-03-??
- * bumping up versions of dependencies to fit Py 3.7+
+ * bump up versions of dependencies to fit Py 3.7+
+ * improve testing based on pytest; kudos @Mec-iS
+ * allow configuration for rdflib.Store plugins
## 0.5.1
diff --git a/docs/depend.md b/docs/depend.md
index 9eeeb08..c7b9005 100644
--- a/docs/depend.md
+++ b/docs/depend.md
@@ -12,6 +12,7 @@ include:
- [chocolate](https://github.com/seungjaeryanlee/chocolate)
- [csvwlib](https://github.com/DerwenAI/csvwlib)
+- [cryptography](https://cryptography.io/)
- [decorator](https://github.com/micheles/decorator)
- [fsspec](https://filesystem-spec.readthedocs.io/)[^1]
- [gcsfs](https://gcsfs.readthedocs.io/)
@@ -20,6 +21,7 @@ include:
- [NetworkX](https://networkx.org/)
- [NumPy](https://numpy.org/)[^2]
- [OWL-RL](https://owl-rl.readthedocs.io/)
+- [Oxrdflib](https://github.com/oxigraph/oxrdflib)
- [pandas](https://pandas.pydata.org/)
- [pslpython](https://psl.linqs.org/)[^3]
- [pyarrow](https://arrow.apache.org/)
diff --git a/kglab/kglab.py b/kglab/kglab.py
index 4acb763..d7d5aac 100644
--- a/kglab/kglab.py
+++ b/kglab/kglab.py
@@ -1,11 +1,12 @@
-"""
-kglab main class definition.
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# see license https://github.com/DerwenAI/kglab#license-and-copyright
- see license https://github.com/DerwenAI/kglab#license-and-copyright
"""
-######################################################################
-## kglab - core classes
+Main class definitions for `kglab`
+"""
+## Python standard libraries
import codecs
import datetime
import io
@@ -14,8 +15,7 @@
import traceback
import typing
-### third-parties bindings
-
+### third-parties libraries
from icecream import ic # type: ignore # pylint: disable=E0401
import chocolate # type: ignore # pylint: disable=E0401
import csvwlib # type: ignore # pylint: disable=E0401
@@ -30,13 +30,8 @@
import rdflib # type: ignore # pylint: disable=E0401
import rdflib.plugin # type: ignore # pylint: disable=E0401
import rdflib.plugins.parsers.notation3 as rdf_n3 # type: ignore # pylint: disable=E0401
-#rdflib.plugin.register("json-ld", rdflib.plugin.Parser, "rdflib_jsonld.parser", "JsonLDParser")
-#rdflib.plugin.register("json-ld", rdflib.plugin.Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer")
-
-######################################################################
## kglab - core classes
-
from kglab.decorators import multifile
from kglab.pkg_types import PathLike, IOPathLike, GraphLike, RDF_Node
from kglab.gpviz import GPViz
@@ -44,6 +39,7 @@
from kglab.version import _check_version
+## pre-constructor set-up
_check_version()
if get_gpu_count() > 0:
@@ -75,7 +71,7 @@ class KnowledgeGraph:
"sh": "http://www.w3.org/ns/shacl#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
- }
+ }
def __init__ (
@@ -84,6 +80,7 @@ def __init__ (
name: str = "generic",
base_uri: str = None,
language: str = "en",
+ store: str = None,
use_gpus: bool = True,
import_graph: typing.Optional[GraphLike] = None,
namespaces: dict = None,
@@ -100,6 +97,9 @@ def __init__ (
language:
the default [*language tag*](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag), e.g., used for [*language indexing*](https://www.w3.org/TR/json-ld11/#language-indexing)
+ store:
+optionally, string representing an `rdflib.Store` plugin to use.
+
use_gpus:
optionally, use the NVidia GPU devices with [RAPIDS](https://rapids.ai/) if these libraries have been installed and the devices are available; defaults to `True`
@@ -112,19 +112,20 @@ def __init__ (
self.name = name
self.base_uri = base_uri
self.language = language
+ self.store = store
# use NVidia GPU devices if available and the libraries
# have been installed and the flag is not disabled
- if use_gpus and get_gpu_count() > 0:
+ if use_gpus is not None and get_gpu_count() > 0:
self.use_gpus = True
else:
self.use_gpus = False
# import relations from another RDF graph, or start from blank
- if import_graph:
+ if import_graph is not None:
self._g = import_graph
else:
- self._g = rdflib.Graph()
+ self._g = self.build_blank_graph()
# initialize the namespaces
self._ns: dict = {}
@@ -137,6 +138,20 @@ def __init__ (
self.add_ns(prefix, iri)
+ def build_blank_graph (
+ self,
+ ) -> rdflib.Graph:
+ """
+Build a new `rdflib.Graph` object, based on storage plugin configuration.
+ """
+ if self.store is not None:
+ g = rdflib.Graph(store=self.store)
+ else:
+ g = rdflib.Graph()
+
+ return g
+
+
def rdf_graph (
self
) -> rdflib.Graph:
@@ -472,7 +487,7 @@ def _get_filename (
filename = str(path)
elif isinstance(path, pathlib.Path):
filename = path.as_posix()
- elif type(path) == str:
+ elif isinstance(path, str):
filename = path
else:
raise TypeError(f"path variable not recognised {type(path)}")
@@ -1225,7 +1240,7 @@ def validate (
**chocolate.filter_args(kwargs, pyshacl.validate),
)
- g = rdflib.Graph()
+ g = self.build_blank_graph()
g.parse(
data=report_graph_data,
diff --git a/kglab/version.py b/kglab/version.py
index 315f8a4..b94be12 100644
--- a/kglab/version.py
+++ b/kglab/version.py
@@ -11,7 +11,7 @@
MIN_PY_VERSION: typing.Tuple = (3, 7,)
-__version__: str = "0.5.1"
+__version__: str = "0.5.2"
def _versify (
diff --git a/requirements.txt b/requirements.txt
index 6c6573f..6972f57 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
aiohttp >= 3.8
chocolate >= 0.0.2
-cryptography >= 35.0
csvwlib >= 0.3.2
+cryptography >= 35.0
decorator >= 5.1
fsspec[gs,s3] >= 2022.2
gcsfs >= 2022.2
@@ -10,6 +10,7 @@ morph-kgc >= 1.6
networkx >= 2.7
numpy >= 1.22
owlrl >= 6.0.2
+oxrdflib >= 0.2
pandas >= 1.4
pslpython >= 2.2.2
pyarrow >= 7.0
diff --git a/scripts/oxrdf.py b/scripts/oxrdf.py
new file mode 100644
index 0000000..0823d0d
--- /dev/null
+++ b/scripts/oxrdf.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# type: ignore
+
+from os.path import abspath, dirname
+import pathlib
+import sys
+import time
+
+import oxrdflib
+import rdflib
+
+sys.path.insert(0, str(pathlib.Path(dirname(dirname(abspath(__file__))))))
+import kglab
+
+FILENAME = "../ffurf_fforms/dat/fiirm.12.owl"
+
+# get_items_interface:
+QUDT_QUERY9 = """
+PREFIX BASF_EC_RaMPO:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+
+SELECT DISTINCT ?item WHERE { ?item rdfs:subClassOf ?item_class . FILTER(?item != owl:Nothing) . FILTER(?item != ?item_class) . FILTER(?item != BASF_EC_RaMPO:CoatingLayer) . FILTER(?interfaceClass != BASF_EC_RaMPO:CoatingLayer) . ?item rdfs:subClassOf ?restriction1 . ?restriction1 rdf:type owl:Restriction . ?restriction1 owl:onProperty BASF_EC_RaMPO:basedOn . ?restriction1 owl:onClass ?basedOn . { ?restriction1 owl:onClass ?basedOn . ?restriction1 owl:onClass ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interfaceClass } UNION { ?restriction1 owl:onClass ?basedOn . ?basedOn owl:unionOf/rdf:rest*/rdf:first ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interactsWith . ?interactsWith owl:unionOf/rdf:rest*/rdf:first ?interactsWithUnion . ?interactsWithUnion rdfs:subClassOf ?interfaceRestriction . ?interfaceRestriction rdf:type owl:Restriction . ?interfaceRestriction owl:onProperty BASF_EC_RaMPO:hasEquivalentCoatingLayer . ?interfaceRestriction owl:someValuesFrom ?interfaceClass . FILTER (?interfaceClass != ?base_layer) } OPTIONAL { ?item rdfs:label ?label } }
+"""
+
+
+def run_query (g) -> None:
+ """measure the timing for a SPARQL query"""
+ print(f"using graph: {g}")
+
+ # query init
+ init_time = time.time()
+ query_iter = g.query(QUDT_QUERY9)
+
+ duration = time.time() - init_time
+ print(f"query init time: {duration:10.3f}")
+
+ # query exec
+ init_time = time.time()
+
+ for row in query_iter:
+ print(row.item)
+
+ duration = time.time() - init_time
+ print(f"query exec time: {duration:10.3f}", "\n")
+
+
+if __name__ == "__main__":
+ g = rdflib.Graph(store="OxMemory")
+ g.parse(FILENAME, format="xml")
+ run_query(g)
+
+ g = kglab.KnowledgeGraph(store="OxMemory")
+ g.load_rdf(FILENAME, format="xml")
+ run_query(g)
diff --git a/setup.py b/setup.py
index fc29c15..8d268ae 100644
--- a/setup.py
+++ b/setup.py
@@ -7,44 +7,53 @@
KEYWORDS = [
- "knowledge graph",
+ "controlled vocabulary",
+ "cugraph",
+ "deep learning",
+ "embedding",
+ "gpu",
"graph algorithms",
- "interactive visualization",
- "validation",
+ "igraph",
"inference",
- "rdf",
- "owl",
- "skos",
- "sparql",
- "shacl",
- "controlled vocabulary",
+ "interactive visualization",
+ "json-ld",
+ "knowledge graph",
"managing namespaces",
- "serialization",
+ "morph-kgc",
"n3",
- "turtle",
- "json-ld",
+ "networkx",
+ "owl",
+ "pandas",
"parquet",
- "psl",
"probabilistic soft logic",
- "pandas",
+ "psl",
+ "pyvis",
+ "rapids",
+ "rdf",
+ "rml",
"roam research",
- "networkx",
- "igraph",
- "pytorch",
- "embedding",
- "deep learning",
+ "serialization",
+ "shacl",
+ "skos",
+ "sparql",
+ "statistical relational learning",
+ "topology",
+ "turtle",
+ "validation",
]
-def parse_requirements_file (filename: str) -> typing.List:
- """read and parse a Python `requirements.txt` file, returning as a list of str"""
- results: list = []
-
+def parse_requirements_file (
+ filename: str,
+ ) -> typing.List[ str ]:
+ """read and parse a Python `requirements.txt` file, returning as a list of strings"""
with pathlib.Path(filename).open() as f:
- for l in f.readlines():
- results.append(l.strip().replace(" ", "").split("#")[0])
+ results: list = [
+ l.strip().replace(" ", "").split("#")[0]
+ for l in f.readlines()
+ ]
- return results
+ return results
if __name__ == "__main__":
@@ -55,32 +64,41 @@ def parse_requirements_file (filename: str) -> typing.List:
base_packages = parse_requirements_file("requirements.txt")
docs_packages = parse_requirements_file("requirements-dev.txt")
+ tut_packages = parse_requirements_file("requirements-tut.txt")
setuptools.setup(
name = "kglab",
version = kglab_version.__version__,
+ license = "MIT",
+ description = "A simple abstraction layer in Python for building knowledge graphs",
+ long_description = pathlib.Path("README.md").read_text(),
+ long_description_content_type = "text/markdown",
+
python_requires = ">=" + kglab_version._versify(kglab_version.MIN_PY_VERSION), # pylint: disable=W0212
packages = setuptools.find_packages(exclude=[ "docs", "examples" ]),
+ zip_safe = False,
+
install_requires = base_packages,
extras_require = {
"base": base_packages,
"docs": docs_packages,
- },
+ "tut": tut_packages,
+ },
author = "Paco Nathan",
author_email = "paco@derwen.ai",
- license = "MIT",
-
- description = "A simple abstraction layer in Python for building knowledge graphs",
- long_description = pathlib.Path("README.md").read_text(),
- long_description_content_type = "text/markdown",
keywords = ", ".join(KEYWORDS),
classifiers = [
- "Programming Language :: Python :: 3",
- "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3 :: Only",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
"Operating System :: OS Independent",
+ "License :: OSI Approved :: MIT License",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
@@ -89,6 +107,7 @@ def parse_requirements_file (filename: str) -> typing.List:
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Human Machine Interfaces",
"Topic :: Scientific/Engineering :: Information Analysis",
+ "Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing :: Indexing",
],
@@ -96,10 +115,16 @@ def parse_requirements_file (filename: str) -> typing.List:
project_urls = {
"Source Code": "https://github.com/DerwenAI/kglab",
"Issue Tracker": "https://github.com/DerwenAI/kglab/issues",
- "Community Survey": "https://forms.gle/FMHgtmxHYWocprMn6",
- "Discussion Forum": "https://www.linkedin.com/groups/6725785/",
"Hands-on Tutorial": "https://derwen.ai/docs/kgl/tutorial/",
- },
+ "Discussion Forum": "https://www.linkedin.com/groups/6725785/",
+ "DockerHub": "https://hub.docker.com/r/derwenai/kglab",
+ "Community Survey": "https://forms.gle/FMHgtmxHYWocprMn6",
+ "DOI": "https://doi.org/10.5281/zenodo.4717287",
+ },
- zip_safe = False,
- )
+ entry_points = {
+ "rdf.plugins.store": [
+ "kglab = kglab:PropertyStore",
+ ],
+ },
+ )