Skip to content

Commit

Permalink
Potential speedups when executing validation by lazy-loading large mo…
Browse files Browse the repository at this point in the history
…dules which may never be required in a normal validation run.

Black and Flake8 issues outstanding from 0.14.1 release.
Workaround a RDFLib bug trying to import `requests` when requests is not required to be installed.
This bug will still be observed if you use SPARQLConstraints, SPARQLFunction or JSFunction features, but it can be worked around by simply installing `requests` in your python environment.
  • Loading branch information
ashleysommer committed Jan 2, 2021
1 parent 8e0a513 commit 5a1833d
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 34 deletions.
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Python PEP 440 Versioning](https://www.python.org/dev/peps/pep-0440/).

## [0.14.2] - 2021-01-02

## Added
- Potential speedups when executing validation by lazy-loading large modules which may never be required in a normal validation run.

## Fixed
- Black and Flake8 issues outstanding from 0.14.1 release.
- Workaround a RDFLib bug trying to import `requests` when requests is not required to be installed.
- This bug will still be observed if you use SPARQLConstraints, SPARQLFunction or JSFunction features, but it can be worked around by simply installing `requests` in your python enviornment.


## [0.14.1] - 2020-12-23

## Added
Expand Down Expand Up @@ -710,7 +721,8 @@ just leaves the files open. Now it is up to the command-line client to close the

- Initial version, limited functionality

[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.14.1...HEAD
[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.14.2...HEAD
[0.14.2]: https://github.com/RDFLib/pySHACL/compare/v0.14.1...v0.14.2
[0.14.1]: https://github.com/RDFLib/pySHACL/compare/v0.14.0...v0.14.1
[0.14.0]: https://github.com/RDFLib/pySHACL/compare/v0.13.3...v0.14.0
[0.13.3]: https://github.com/RDFLib/pySHACL/compare/v0.13.2...v0.13.3
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "pyshacl"
version = "0.14.1"
version = "0.14.2"
# Don't forget to change the version number in __init__.py along with this one
description = "Python SHACL Validator"
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion pyshacl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


# version compliant with https://www.python.org/dev/peps/pep-0440/
__version__ = '0.14.1'
__version__ = '0.14.2'
# Don't forget to change the version number in pyproject.toml along with this one

__all__ = ['validate', 'Validator', '__version__', 'Shape', 'ShapesGraph']
1 change: 1 addition & 0 deletions pyshacl/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
SH_SPARQLTargetType = SH.term('SPARQLTargetType')
SH_JSTarget = SH.term('JSTarget')
SH_JSTargetType = SH.term('JSTargetType')
SH_JSFunction = SH.term('JSFunction')

# predicates
RDF_type = RDF.term('type')
Expand Down
5 changes: 1 addition & 4 deletions pyshacl/extras/js/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,15 @@
from rdflib.plugins.sparql.operators import register_custom_function, unregister_custom_function
from rdflib.plugins.sparql.sparql import SPARQLError

from pyshacl.consts import SH
from pyshacl.errors import ReportableRuntimeError
from pyshacl.functions import SHACLFunction
from pyshacl.functions.shacl_function import SHACLFunction

from .js_executable import JSExecutable


if typing.TYPE_CHECKING:
from pyshacl.shapes_graph import ShapesGraph

SH_JSFunction = SH.term('JSFunction')


class JSFunction(SHACLFunction):
__slots__ = ('js_exe',)
Expand Down
1 change: 1 addition & 0 deletions pyshacl/extras/js/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import regex
except ImportError:
import re

regex = re

if typing.TYPE_CHECKING:
Expand Down
50 changes: 38 additions & 12 deletions pyshacl/functions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# -*- coding: utf-8 -*-
#
from typing import TYPE_CHECKING, List, Sequence, Type, Union
import sys

from typing import TYPE_CHECKING, List, Sequence, Union

from pyshacl.consts import (
RDF_type,
SH_ask,
SH_JSFunction,
SH_jsFunctionName,
SH_jsLibrary,
SH_select,
Expand All @@ -13,12 +16,16 @@
)
from pyshacl.pytypes import GraphLike

from .shacl_function import SHACLFunction, SPARQLFunction


if TYPE_CHECKING:
from pyshacl.extras.js.function import JSFunction
from pyshacl.shapes_graph import ShapesGraph

from .shacl_function import SHACLFunction, SPARQLFunction


module = sys.modules[__name__]


def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunction', 'SPARQLFunction']]:
"""
Expand All @@ -31,10 +38,8 @@ def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunctio

spq_nodes = set(shacl_graph.subjects(RDF_type, SH_SPARQLFunction))
if shacl_graph.js_enabled:
from pyshacl.extras.js.function import JSFunction, SH_JSFunction

js_nodes = set(shacl_graph.subjects(RDF_type, SH_JSFunction))
use_JSFunction: Union[bool, Type] = JSFunction
use_JSFunction = True
else:
use_JSFunction = False
js_nodes = set()
Expand All @@ -60,13 +65,34 @@ def gather_functions(shacl_graph: 'ShapesGraph') -> Sequence[Union['SHACLFunctio
js_nodes.add(n)

all_fns: List[Union['SHACLFunction', 'SPARQLFunction', 'JSFunction']] = []
for n in spq_nodes:
all_fns.append(SPARQLFunction(n, shacl_graph))
for n in scl_nodes:
all_fns.append(SHACLFunction(n, shacl_graph))
if use_JSFunction and callable(use_JSFunction):
if spq_nodes:
SPQ = getattr(module, 'SPARQLFunction', None)
if not SPQ:
# Lazy-import SPARQLFunction to prevent rdflib import error
from .shacl_function import SPARQLFunction
setattr(module, 'SPARQLFunction', SPARQLFunction)
SPQ = SPARQLFunction
for n in spq_nodes:
all_fns.append(SPQ(n, shacl_graph))
if scl_nodes:
SCL = getattr(module, 'SHACLFunction', None)
if not SCL:
# Lazy-import SHACLFunction to prevent rdflib import error
from .shacl_function import SHACLFunction
setattr(module, 'SHACLFunction', SHACLFunction)
SCL = SHACLFunction
for n in scl_nodes:
all_fns.append(SCL(n, shacl_graph))
if use_JSFunction and js_nodes:
JSF = getattr(module, 'JSFunction', None)
if not JSF:
# Lazy-import JSFunction to prevent rdflib import error
from pyshacl.extras.js.function import JSFunction

setattr(module, 'JSFunction', JSFunction)
JSF = JSFunction
for n in js_nodes:
all_fns.append(use_JSFunction(n, shacl_graph))
all_fns.append(JSF(n, shacl_graph))
return all_fns


Expand Down
13 changes: 7 additions & 6 deletions pyshacl/rdfutil/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def clone_graph(source_graph, target_graph=None, identifier=None):
return g


def mix_datasets(base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None):
def mix_datasets(
base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None
):
"""
Make a clone of base_ds (dataset) and add in the triples from extra_ds (dataset)
:param base_ds:
Expand All @@ -89,18 +91,17 @@ def mix_datasets(base_ds: ConjunctiveLike, extra_ds: GraphLike, target_ds: Optio
if target_ds == "inplace":
target_ds = base_ds
for mg in mixin_graphs:
mod_named_graphs = {
g.identifier: mix_graphs(g, mg, target_graph="inplace")
for g in base_named_graphs
}
mod_named_graphs = {g.identifier: mix_graphs(g, mg, target_graph="inplace") for g in base_named_graphs}
elif isinstance(target_ds, str):
raise RuntimeError("target_ds cannot be a string (unless it is 'inplace')")
else:

mixed_graphs = {}
for mg in mixin_graphs:
mod_named_graphs = {
g.identifier: mix_graphs(g, mg, target_graph=rdflib.Graph(store=target_ds.store, identifier=g.identifier))
g.identifier: mix_graphs(
g, mg, target_graph=rdflib.Graph(store=target_ds.store, identifier=g.identifier)
)
for g in base_named_graphs
}
mixed_graphs.update(mod_named_graphs)
Expand Down
30 changes: 21 additions & 9 deletions pyshacl/shape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
#
import logging
import sys

from decimal import Decimal
from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Type, Union
Expand Down Expand Up @@ -44,6 +45,8 @@
if TYPE_CHECKING:
from pyshacl.shapes_graph import ShapesGraph

module = sys.modules[__name__]


class Shape(object):

Expand Down Expand Up @@ -237,9 +240,7 @@ def advanced_target(self):
custom_targets = set(self.sg.objects(self.node, SH_target))
result_set = dict()
if self.sg.js_enabled:
from pyshacl.extras.js.target import JSTarget

use_JSTarget: Union[bool, Type] = JSTarget
use_JSTarget: Union[bool, Type] = True
else:
use_JSTarget = False

Expand All @@ -258,8 +259,14 @@ def advanced_target(self):
ct['qh'] = qh
elif has_fnname or (SH_JSTarget in is_types):
if use_JSTarget:
JST = getattr(module, "JSTarget", None)
if not JST:
# Lazy-import JS-Target to prevent RDFLib import error
from pyshacl.extras.js.target import JSTarget as JST

setattr(module, "JSTarget", JST)
ct['type'] = SH_JSTarget
ct['targeter'] = use_JSTarget(self.sg, c)
ct['targeter'] = JST(self.sg, c)
else:
# Found JSTarget, but JS is not enabled in PySHACL. Ignore this target.
pass
Expand Down Expand Up @@ -516,18 +523,23 @@ def validate(
path_str = "->".join((str(e) for e in _evaluation_path))
raise ReportableRuntimeError("Evaluation path too deep!\n{}".format(path_str))
# Lazy import here to avoid an import loop
from .constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP
CONSTRAINT_PARAMETERS, PARAMETER_MAP = getattr(module, 'CONSTRAINT_PARAMS', (None, None))
if not CONSTRAINT_PARAMETERS:
from .constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP

setattr(module, 'CONSTRAINT_PARAMS', (ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP))
CONSTRAINT_PARAMETERS = ALL_CONSTRAINT_PARAMETERS
PARAMETER_MAP = CONSTRAINT_PARAMETERS_MAP
if self.sg.js_enabled:
search_parameters = ALL_CONSTRAINT_PARAMETERS.copy()
constraint_map = CONSTRAINT_PARAMETERS_MAP.copy()
search_parameters = CONSTRAINT_PARAMETERS.copy()
constraint_map = PARAMETER_MAP.copy()
from pyshacl.extras.js.constraint import JSConstraint, SH_js

search_parameters.append(SH_js)
constraint_map[SH_js] = JSConstraint
else:
search_parameters = ALL_CONSTRAINT_PARAMETERS
constraint_map = CONSTRAINT_PARAMETERS_MAP
search_parameters = CONSTRAINT_PARAMETERS
constraint_map = PARAMETER_MAP
parameters = (p for p, v in self.sg.predicate_objects(self.node) if p in search_parameters)
reports = []
focus_value_nodes = self.value_nodes(target_graph, focus)
Expand Down

0 comments on commit 5a1833d

Please sign in to comment.