From 5a1833d6902a4dd6a731098cd41485b22df2aa95 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 2 Jan 2021 19:27:35 +1000 Subject: [PATCH] Potential speedups when executing validation by lazy-loading large modules which may never be required in a normal validation run. Black and Flake8 issues outstanding from 0.14.1 release. Workaround a RDFLib bug trying to import `requests` when requests is not required to be installed. This bug will still be observed if you use SPARQLConstraints, SPARQLFunction or JSFunction features, but it can be worked around by simply installing `requests` in your python environment. --- CHANGELOG.md | 14 +++++++++- pyproject.toml | 2 +- pyshacl/__init__.py | 2 +- pyshacl/consts.py | 1 + pyshacl/extras/js/function.py | 5 +--- pyshacl/extras/js/loader.py | 1 + pyshacl/functions/__init__.py | 50 ++++++++++++++++++++++++++--------- pyshacl/rdfutil/clone.py | 13 ++++----- pyshacl/shape.py | 30 ++++++++++++++------- 9 files changed, 84 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b17e3b6..6d94a05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Python PEP 440 Versioning](https://www.python.org/dev/peps/pep-0440/). +## [0.14.2] - 2021-01-02 + +## Added +- Potential speedups when executing validation by lazy-loading large modules which may never be required in a normal validation run. + +## Fixed +- Black and Flake8 issues outstanding from 0.14.1 release. +- Workaround a RDFLib bug trying to import `requests` when requests is not required to be installed. + - This bug will still be observed if you use SPARQLConstraints, SPARQLFunction or JSFunction features, but it can be worked around by simply installing `requests` in your python enviornment. + + ## [0.14.1] - 2020-12-23 ## Added @@ -710,7 +721,8 @@ just leaves the files open. Now it is up to the command-line client to close the - Initial version, limited functionality -[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.14.1...HEAD +[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.14.2...HEAD +[0.14.2]: https://github.com/RDFLib/pySHACL/compare/v0.14.1...v0.14.2 [0.14.1]: https://github.com/RDFLib/pySHACL/compare/v0.14.0...v0.14.1 [0.14.0]: https://github.com/RDFLib/pySHACL/compare/v0.13.3...v0.14.0 [0.13.3]: https://github.com/RDFLib/pySHACL/compare/v0.13.2...v0.13.3 diff --git a/pyproject.toml b/pyproject.toml index c680938..cfdf01e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "pyshacl" -version = "0.14.1" +version = "0.14.2" # Don't forget to change the version number in __init__.py along with this one description = "Python SHACL Validator" license = "Apache-2.0" diff --git a/pyshacl/__init__.py b/pyshacl/__init__.py index ce164da..af7f212 100644 --- a/pyshacl/__init__.py +++ b/pyshacl/__init__.py @@ -6,7 +6,7 @@ # version compliant with https://www.python.org/dev/peps/pep-0440/ -__version__ = '0.14.1' +__version__ = '0.14.2' # Don't forget to change the version number in pyproject.toml along with this one __all__ = ['validate', 'Validator', '__version__', 'Shape', 'ShapesGraph'] diff --git a/pyshacl/consts.py b/pyshacl/consts.py index 010ba08..dcb969f 100644 --- a/pyshacl/consts.py +++ b/pyshacl/consts.py @@ -35,6 +35,7 @@ SH_SPARQLTargetType = SH.term('SPARQLTargetType') SH_JSTarget = SH.term('JSTarget') SH_JSTargetType = SH.term('JSTargetType') +SH_JSFunction = SH.term('JSFunction') # predicates RDF_type = RDF.term('type') diff --git a/pyshacl/extras/js/function.py b/pyshacl/extras/js/function.py index 6ba6152..a77946b 100644 --- a/pyshacl/extras/js/function.py +++ b/pyshacl/extras/js/function.py @@ -5,9 +5,8 @@ from rdflib.plugins.sparql.operators import register_custom_function, unregister_custom_function from rdflib.plugins.sparql.sparql import SPARQLError -from pyshacl.consts import SH from pyshacl.errors import ReportableRuntimeError -from pyshacl.functions import SHACLFunction +from pyshacl.functions.shacl_function import SHACLFunction from .js_executable import JSExecutable @@ -15,8 +14,6 @@ if typing.TYPE_CHECKING: from pyshacl.shapes_graph import ShapesGraph -SH_JSFunction = SH.term('JSFunction') - class JSFunction(SHACLFunction): __slots__ = ('js_exe',) diff --git a/pyshacl/extras/js/loader.py b/pyshacl/extras/js/loader.py index f8e5d6d..d69ad87 100644 --- a/pyshacl/extras/js/loader.py +++ b/pyshacl/extras/js/loader.py @@ -9,6 +9,7 @@ import regex except ImportError: import re + regex = re if typing.TYPE_CHECKING: diff --git a/pyshacl/functions/__init__.py b/pyshacl/functions/__init__.py index 7b6dfbe..f12a604 100644 --- a/pyshacl/functions/__init__.py +++ b/pyshacl/functions/__init__.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- # -from typing import TYPE_CHECKING, List, Sequence, Type, Union +import sys + +from typing import TYPE_CHECKING, List, Sequence, Union from pyshacl.consts import ( RDF_type, SH_ask, + SH_JSFunction, SH_jsFunctionName, SH_jsLibrary, SH_select, @@ -13,12 +16,16 @@ ) from pyshacl.pytypes import GraphLike -from .shacl_function import SHACLFunction, SPARQLFunction - if TYPE_CHECKING: + from pyshacl.extras.js.function import JSFunction from pyshacl.shapes_graph import ShapesGraph + from .shacl_function import SHACLFunction, SPARQLFunction + + +module = sys.modules[__name__] + def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunction', 'SPARQLFunction']]: """ @@ -31,10 +38,8 @@ def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunctio spq_nodes = set(shacl_graph.subjects(RDF_type, SH_SPARQLFunction)) if shacl_graph.js_enabled: - from pyshacl.extras.js.function import JSFunction, SH_JSFunction - js_nodes = set(shacl_graph.subjects(RDF_type, SH_JSFunction)) - use_JSFunction: Union[bool, Type] = JSFunction + use_JSFunction = True else: use_JSFunction = False js_nodes = set() @@ -60,13 +65,34 @@ def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunctio js_nodes.add(n) all_fns: List[Union['SHACLFunction', 'SPARQLFunction', 'JSFunction']] = [] - for n in spq_nodes: - all_fns.append(SPARQLFunction(n, shacl_graph)) - for n in scl_nodes: - all_fns.append(SHACLFunction(n, shacl_graph)) - if use_JSFunction and callable(use_JSFunction): + if spq_nodes: + SPQ = getattr(module, 'SPARQLFunction', None) + if not SPQ: + # Lazy-import SPARQLFunction to prevent rdflib import error + from .shacl_function import SPARQLFunction + setattr(module, 'SPARQLFunction', SPARQLFunction) + SPQ = SPARQLFunction + for n in spq_nodes: + all_fns.append(SPQ(n, shacl_graph)) + if scl_nodes: + SCL = getattr(module, 'SHACLFunction', None) + if not SCL: + # Lazy-import SHACLFunction to prevent rdflib import error + from .shacl_function import SHACLFunction + setattr(module, 'SHACLFunction', SHACLFunction) + SCL = SHACLFunction + for n in scl_nodes: + all_fns.append(SCL(n, shacl_graph)) + if use_JSFunction and js_nodes: + JSF = getattr(module, 'JSFunction', None) + if not JSF: + # Lazy-import JSFunction to prevent rdflib import error + from pyshacl.extras.js.function import JSFunction + + setattr(module, 'JSFunction', JSFunction) + JSF = JSFunction for n in js_nodes: - all_fns.append(use_JSFunction(n, shacl_graph)) + all_fns.append(JSF(n, shacl_graph)) return all_fns diff --git a/pyshacl/rdfutil/clone.py b/pyshacl/rdfutil/clone.py index 1712c84..0f367e4 100644 --- a/pyshacl/rdfutil/clone.py +++ b/pyshacl/rdfutil/clone.py @@ -62,7 +62,9 @@ def clone_graph(source_graph, target_graph=None, identifier=None): return g -def mix_datasets(base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None): +def mix_datasets( + base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None +): """ Make a clone of base_ds (dataset) and add in the triples from extra_ds (dataset) :param base_ds: @@ -89,10 +91,7 @@ def mix_datasets(base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optio if target_ds == "inplace": target_ds = base_ds for mg in mixin_graphs: - mod_named_graphs = { - g.identifier: mix_graphs(g, mg, target_graph="inplace") - for g in base_named_graphs - } + mod_named_graphs = {g.identifier: mix_graphs(g, mg, target_graph="inplace") for g in base_named_graphs} elif isinstance(target_ds, str): raise RuntimeError("target_ds cannot be a string (unless it is 'inplace')") else: @@ -100,7 +99,9 @@ def mix_datasets(base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optio mixed_graphs = {} for mg in mixin_graphs: mod_named_graphs = { - g.identifier: mix_graphs(g, mg, target_graph=rdflib.Graph(store=target_ds.store, identifier=g.identifier)) + g.identifier: mix_graphs( + g, mg, target_graph=rdflib.Graph(store=target_ds.store, identifier=g.identifier) + ) for g in base_named_graphs } mixed_graphs.update(mod_named_graphs) diff --git a/pyshacl/shape.py b/pyshacl/shape.py index f871ff9..a9b0c5b 100644 --- a/pyshacl/shape.py +++ b/pyshacl/shape.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # import logging +import sys from decimal import Decimal from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Type, Union @@ -44,6 +45,8 @@ if TYPE_CHECKING: from pyshacl.shapes_graph import ShapesGraph +module = sys.modules[__name__] + class Shape(object): @@ -237,9 +240,7 @@ def advanced_target(self): custom_targets = set(self.sg.objects(self.node, SH_target)) result_set = dict() if self.sg.js_enabled: - from pyshacl.extras.js.target import JSTarget - - use_JSTarget: Union[bool, Type] = JSTarget + use_JSTarget: Union[bool, Type] = True else: use_JSTarget = False @@ -258,8 +259,14 @@ def advanced_target(self): ct['qh'] = qh elif has_fnname or (SH_JSTarget in is_types): if use_JSTarget: + JST = getattr(module, "JSTarget", None) + if not JST: + # Lazy-import JS-Target to prevent RDFLib import error + from pyshacl.extras.js.target import JSTarget as JST + + setattr(module, "JSTarget", JST) ct['type'] = SH_JSTarget - ct['targeter'] = use_JSTarget(self.sg, c) + ct['targeter'] = JST(self.sg, c) else: # Found JSTarget, but JS is not enabled in PySHACL. Ignore this target. pass @@ -516,18 +523,23 @@ def validate( path_str = "->".join((str(e) for e in _evaluation_path)) raise ReportableRuntimeError("Evaluation path too deep!\n{}".format(path_str)) # Lazy import here to avoid an import loop - from .constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP + CONSTRAINT_PARAMETERS, PARAMETER_MAP = getattr(module, 'CONSTRAINT_PARAMS', (None, None)) + if not CONSTRAINT_PARAMETERS: + from .constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP + setattr(module, 'CONSTRAINT_PARAMS', (ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP)) + CONSTRAINT_PARAMETERS = ALL_CONSTRAINT_PARAMETERS + PARAMETER_MAP = CONSTRAINT_PARAMETERS_MAP if self.sg.js_enabled: - search_parameters = ALL_CONSTRAINT_PARAMETERS.copy() - constraint_map = CONSTRAINT_PARAMETERS_MAP.copy() + search_parameters = CONSTRAINT_PARAMETERS.copy() + constraint_map = PARAMETER_MAP.copy() from pyshacl.extras.js.constraint import JSConstraint, SH_js search_parameters.append(SH_js) constraint_map[SH_js] = JSConstraint else: - search_parameters = ALL_CONSTRAINT_PARAMETERS - constraint_map = CONSTRAINT_PARAMETERS_MAP + search_parameters = CONSTRAINT_PARAMETERS + constraint_map = PARAMETER_MAP parameters = (p for p, v in self.sg.predicate_objects(self.node) if p in search_parameters) reports = [] focus_value_nodes = self.value_nodes(target_graph, focus)