Skip to content

Commit

Permalink
Merge pull request #240 from DerwenAI/oxrdflib-integration
Browse files Browse the repository at this point in the history
allow configurable rdflib.Store plugins, e.g., Oxrdflib
  • Loading branch information
ceteri authored Mar 15, 2022
2 parents 2971529 + 2e18826 commit 715adb6
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ repos:
rev: v2.0.0
hooks:
- id: codespell # spell-check source code
args: ["-L", "wit,fils"] # comma separated list of words to ignore.
args: ["-L", "wit,fils,basf"] # comma separated list of words to ignore.
exclude: ^examples\/|.*\.ipynb
language: python
types: [text]
6 changes: 4 additions & 2 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# `kglab` changelog

## 0.5.?
## 0.5.2

2022-03-??

* bumping up versions of dependencies to fit Py 3.7+
* bump up versions of dependencies to fit Py 3.7+
* improve testing based on pytest; kudos @Mec-iS
* allow configuration for rdflib.Store plugins


## 0.5.1
Expand Down
2 changes: 2 additions & 0 deletions docs/depend.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include:

- [chocolate](https://github.com/seungjaeryanlee/chocolate)
- [csvwlib](https://github.com/DerwenAI/csvwlib)
- [cryptography](https://cryptography.io/)
- [decorator](https://github.com/micheles/decorator)
- [fsspec](https://filesystem-spec.readthedocs.io/)[^1]
- [gcsfs](https://gcsfs.readthedocs.io/)
Expand All @@ -20,6 +21,7 @@ include:
- [NetworkX](https://networkx.org/)
- [NumPy](https://numpy.org/)[^2]
- [OWL-RL](https://owl-rl.readthedocs.io/)
- [Oxrdflib](https://github.com/oxigraph/oxrdflib)
- [pandas](https://pandas.pydata.org/)
- [pslpython](https://psl.linqs.org/)[^3]
- [pyarrow](https://arrow.apache.org/)
Expand Down
51 changes: 33 additions & 18 deletions kglab/kglab.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""
kglab main class definition.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# see license https://github.com/DerwenAI/kglab#license-and-copyright

see license https://github.com/DerwenAI/kglab#license-and-copyright
"""
######################################################################
## kglab - core classes
Main class definitions for `kglab`
"""

## Python standard libraries
import codecs
import datetime
import io
Expand All @@ -14,8 +15,7 @@
import traceback
import typing

### third-parties bindings

### third-parties libraries
from icecream import ic # type: ignore # pylint: disable=E0401
import chocolate # type: ignore # pylint: disable=E0401
import csvwlib # type: ignore # pylint: disable=E0401
Expand All @@ -30,20 +30,16 @@
import rdflib # type: ignore # pylint: disable=E0401
import rdflib.plugin # type: ignore # pylint: disable=E0401
import rdflib.plugins.parsers.notation3 as rdf_n3 # type: ignore # pylint: disable=E0401
#rdflib.plugin.register("json-ld", rdflib.plugin.Parser, "rdflib_jsonld.parser", "JsonLDParser")
#rdflib.plugin.register("json-ld", rdflib.plugin.Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer")


######################################################################
## kglab - core classes

from kglab.decorators import multifile
from kglab.pkg_types import PathLike, IOPathLike, GraphLike, RDF_Node
from kglab.gpviz import GPViz
from kglab.util import get_gpu_count
from kglab.version import _check_version


## pre-constructor set-up
_check_version()

if get_gpu_count() > 0:
Expand Down Expand Up @@ -75,7 +71,7 @@ class KnowledgeGraph:
"sh": "http://www.w3.org/ns/shacl#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
}
}


def __init__ (
Expand All @@ -84,6 +80,7 @@ def __init__ (
name: str = "generic",
base_uri: str = None,
language: str = "en",
store: str = None,
use_gpus: bool = True,
import_graph: typing.Optional[GraphLike] = None,
namespaces: dict = None,
Expand All @@ -100,6 +97,9 @@ def __init__ (
language:
the default [*language tag*](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag), e.g., used for [*language indexing*](https://www.w3.org/TR/json-ld11/#language-indexing)
store:
optionally, string representing an `rdflib.Store` plugin to use.
use_gpus:
optionally, use the NVidia GPU devices with [RAPIDS](https://rapids.ai/) if these libraries have been installed and the devices are available; defaults to `True`
Expand All @@ -112,19 +112,20 @@ def __init__ (
self.name = name
self.base_uri = base_uri
self.language = language
self.store = store

# use NVidia GPU devices if available and the libraries
# have been installed and the flag is not disabled
if use_gpus and get_gpu_count() > 0:
if use_gpus is not None and get_gpu_count() > 0:
self.use_gpus = True
else:
self.use_gpus = False

# import relations from another RDF graph, or start from blank
if import_graph:
if import_graph is not None:
self._g = import_graph
else:
self._g = rdflib.Graph()
self._g = self.build_blank_graph()

# initialize the namespaces
self._ns: dict = {}
Expand All @@ -137,6 +138,20 @@ def __init__ (
self.add_ns(prefix, iri)


def build_blank_graph (
self,
) -> rdflib.Graph:
"""
Build a new `rdflib.Graph` object, based on storage plugin configuration.
"""
if self.store is not None:
g = rdflib.Graph(store=self.store)
else:
g = rdflib.Graph()

return g


def rdf_graph (
self
) -> rdflib.Graph:
Expand Down Expand Up @@ -472,7 +487,7 @@ def _get_filename (
filename = str(path)
elif isinstance(path, pathlib.Path):
filename = path.as_posix()
elif type(path) == str:
elif isinstance(path, str):
filename = path
else:
raise TypeError(f"path variable not recognised {type(path)}")
Expand Down Expand Up @@ -1225,7 +1240,7 @@ def validate (
**chocolate.filter_args(kwargs, pyshacl.validate),
)

g = rdflib.Graph()
g = self.build_blank_graph()

g.parse(
data=report_graph_data,
Expand Down
2 changes: 1 addition & 1 deletion kglab/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


MIN_PY_VERSION: typing.Tuple = (3, 7,)
__version__: str = "0.5.1"
__version__: str = "0.5.2"


def _versify (
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
aiohttp >= 3.8
chocolate >= 0.0.2
cryptography >= 35.0
csvwlib >= 0.3.2
cryptography >= 35.0
decorator >= 5.1
fsspec[gs,s3] >= 2022.2
gcsfs >= 2022.2
Expand All @@ -10,6 +10,7 @@ morph-kgc >= 1.6
networkx >= 2.7
numpy >= 1.22
owlrl >= 6.0.2
oxrdflib >= 0.2
pandas >= 1.4
pslpython >= 2.2.2
pyarrow >= 7.0
Expand Down
57 changes: 57 additions & 0 deletions scripts/oxrdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# type: ignore

from os.path import abspath, dirname
import pathlib
import sys
import time

import oxrdflib
import rdflib

sys.path.insert(0, str(pathlib.Path(dirname(dirname(abspath(__file__))))))
import kglab

FILENAME = "../ffurf_fforms/dat/fiirm.12.owl"

# get_items_interface:
QUDT_QUERY9 = """
PREFIX BASF_EC_RaMPO: <https://ontology.basf.net/ontology/BASF_EC_RaMPO/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?item WHERE { ?item rdfs:subClassOf ?item_class . FILTER(?item != owl:Nothing) . FILTER(?item != ?item_class) . FILTER(?item != BASF_EC_RaMPO:CoatingLayer) . FILTER(?interfaceClass != BASF_EC_RaMPO:CoatingLayer) . ?item rdfs:subClassOf ?restriction1 . ?restriction1 rdf:type owl:Restriction . ?restriction1 owl:onProperty BASF_EC_RaMPO:basedOn . ?restriction1 owl:onClass ?basedOn . { ?restriction1 owl:onClass ?basedOn . ?restriction1 owl:onClass ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interfaceClass } UNION { ?restriction1 owl:onClass ?basedOn . ?basedOn owl:unionOf/rdf:rest*/rdf:first ?base_layer . ?item rdfs:subClassOf ?restriction2 . ?restriction2 rdf:type owl:Restriction . ?restriction2 owl:onProperty BASF_EC_RaMPO:interactsWith . ?restriction2 owl:onClass ?interactsWith . ?interactsWith owl:unionOf/rdf:rest*/rdf:first ?interactsWithUnion . ?interactsWithUnion rdfs:subClassOf ?interfaceRestriction . ?interfaceRestriction rdf:type owl:Restriction . ?interfaceRestriction owl:onProperty BASF_EC_RaMPO:hasEquivalentCoatingLayer . ?interfaceRestriction owl:someValuesFrom ?interfaceClass . FILTER (?interfaceClass != ?base_layer) } OPTIONAL { ?item rdfs:label ?label } }
"""


def run_query (g) -> None:
"""measure the timing for a SPARQL query"""
print(f"using graph: {g}")

# query init
init_time = time.time()
query_iter = g.query(QUDT_QUERY9)

duration = time.time() - init_time
print(f"query init time: {duration:10.3f}")

# query exec
init_time = time.time()

for row in query_iter:
print(row.item)

duration = time.time() - init_time
print(f"query exec time: {duration:10.3f}", "\n")


if __name__ == "__main__":
g = rdflib.Graph(store="OxMemory")
g.parse(FILENAME, format="xml")
run_query(g)

g = kglab.KnowledgeGraph(store="OxMemory")
g.load_rdf(FILENAME, format="xml")
run_query(g)
Loading

0 comments on commit 715adb6

Please sign in to comment.