Merge branch 'develop'

DaniFdezAlvarez · DaniFdezAlvarez · commit e1af16eb8074 · 2024-08-28T17:16:08.000+02:00
diff --git a/setup.py b/setup.py
@@ -8,12 +8,12 @@ def read(file_path):
 setup(
   name = 'shexer',
   packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
-  version = '2.5.6',
+  version = '2.5.7',
   description = 'Automatic schema extraction for RDF graphs',
   author = 'Daniel Fernandez-Alvarez',
   author_email = 'danifdezalvarez@gmail.com',
   url = 'https://github.com/DaniFdezAlvarez/shexer',
-  download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.5.6.tar.gz',
+  download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.5.7.tar.gz',
   keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
   long_description = read('README.md'),
   long_description_content_type='text/markdown',
diff --git a/shexer/core/profiling/class_profiler.py b/shexer/core/profiling/class_profiler.py
@@ -9,8 +9,9 @@
 from shexer.core.profiling.strategy.include_reverse_features_strategy import IncludeReverseFeaturesStrategy
 from shexer.core.profiling.consts import RDF_TYPE_STR
 from shexer.utils.structures.dicts import ShapeExampleFeaturesDict
+from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
 
-_MINIMAL_IRI_INIT = "@"
+_MINIMAL_IRI_INIT = STARTING_CHAR_FOR_SHAPE_NAME
 
 
 
diff --git a/shexer/core/shexing/strategy/abstract_shexing_strategy.py b/shexer/core/shexing/strategy/abstract_shexing_strategy.py
@@ -4,6 +4,7 @@
 from shexer.io.shex.formater.statement_serializers.st_serializers_factory import StSerializerFactory
 from shexer.core.shexing.strategy.minimal_iri_strategy.annotate_min_iri_strategy import AnnotateMinIriStrategy
 from shexer.core.shexing.strategy.minimal_iri_strategy.ignore_min_iri_strategy import IgnoreMinIriStrategy
+from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
 
 
 _DIRECT_ST_SERIALIZER = 0
@@ -261,7 +262,7 @@ def _manage_group_to_decide_with_or(self, group_to_decide):
                 yield a_new_statement
 
     def _is_an_IRI(self, statement_type):
-        return statement_type == IRI_ELEM_TYPE or statement_type.startswith("@")  # TODO careful here. Refactor
+        return statement_type == IRI_ELEM_TYPE or statement_type.startswith(STARTING_CHAR_FOR_SHAPE_NAME)  # TODO careful here. Refactor
 
 
     def _remove_IRI_statements_if_useles(self, group_of_statements):
diff --git a/shexer/io/shape_map/label/shape_map_label_parser.py b/shexer/io/shape_map/label/shape_map_label_parser.py
@@ -1,4 +1,5 @@
 from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
+from shexer.io.shex.formater.consts import SHAPE_LINK_CHAR
 
 class ShapeMapLabelParser(object):
 
@@ -9,7 +10,7 @@ def parse_shape_map_label(self, raw_label):
 
         if self._is_a_prefixed_uri(raw_label):
             return STARTING_CHAR_FOR_SHAPE_NAME + self._parse_prefixed_label(raw_label)
-        return STARTING_CHAR_FOR_SHAPE_NAME + raw_label
+        return raw_label  # todo SURE?
         # return self._parse_unprefixed_label(raw_label)
 
 
diff --git a/shexer/io/shape_map/node_selector/node_selector_parser.py b/shexer/io/shape_map/node_selector/node_selector_parser.py
@@ -67,7 +67,7 @@ def _parse_focus_expression(self, raw_selector):
                                   sgraph=self._sgraph)
 
     def _turn_focus_exp_tokens_into_query(self, subj, pred, obj):
-        return self._namespaces_to_string() + "SELECT " + _FOCUS_VARIABLE + " WHERE {" + subj + " " + pred + " " + obj + " . } " # LIMIT 20"
+        return self._namespaces_to_string() + "SELECT " + _FOCUS_VARIABLE + " WHERE {" + subj + " " + pred + " " + obj + " . } "
         # return sparql.prepareQuery(string_query, initNs=self._prefix_namespace_dict)
 
     def _parse_subj_obj_focus_expression(self, token, focus_count):
diff --git a/shexer/io/shex/formater/statement_serializers/base_statement_serializer.py b/shexer/io/shex/formater/statement_serializers/base_statement_serializer.py
@@ -1,5 +1,5 @@
 from shexer.io.shex.formater.consts import SPACES_GAP_BETWEEN_TOKENS, \
-    COMMENT_INI, TARGET_LINE_LENGHT, SPACES_GAP_FOR_FREQUENCY, KLEENE_CLOSURE, POSITIVE_CLOSURE, OPT_CARDINALITY
+    COMMENT_INI, TARGET_LINE_LENGHT, SPACES_GAP_FOR_FREQUENCY, KLEENE_CLOSURE, POSITIVE_CLOSURE, OPT_CARDINALITY, SHAPE_LINK_CHAR
 from shexer.model.IRI import IRI_ELEM_TYPE
 from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
 from shexer.utils.shapes import prefixize_shape_name_if_possible
@@ -53,16 +53,16 @@ def tune_token(a_token, namespaces_dict):
         # TODO:  a lot to correct here for normal behaviour
         if a_token.startswith(STARTING_CHAR_FOR_SHAPE_NAME):  # Shape
             # return STARTING_CHAR_FOR_SHAPE_NAME +":" + a_token.replace(STARTING_CHAR_FOR_SHAPE_NAME, "")
-            return STARTING_CHAR_FOR_SHAPE_NAME \
+            return SHAPE_LINK_CHAR \
                    + prefixize_shape_name_if_possible(a_shape_name=a_token,
                                                       namespaces_prefix_dict=namespaces_dict)
         if a_token == IRI_ELEM_TYPE:  # iri
             return a_token
         if ":" not in a_token:
             if "<" in a_token:
-                return STARTING_CHAR_FOR_SHAPE_NAME + a_token
+                return SHAPE_LINK_CHAR + a_token
             else:
-                return STARTING_CHAR_FOR_SHAPE_NAME + "<" + a_token + ">"
+                return SHAPE_LINK_CHAR + "<" + a_token + ">"
         candidate_prefixed = BaseStatementSerializer._prefixize_uri_if_possible(uri=a_token,
                                                                                 namespaces_dict=namespaces_dict)
         if candidate_prefixed is not None:
diff --git a/shexer/io/uml/uml_serializer.py b/shexer/io/uml/uml_serializer.py
@@ -3,6 +3,7 @@
 from shexer.utils.uri import prefixize_uri_if_possible
 from shexer.model.fixed_prop_choice_statement import FixedPropChoiceStatement
 from shexer.consts import RDF_TYPE
+from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
 import warnings
 
 
@@ -90,7 +91,7 @@ def _serialize_obj_of_non_shape_link(self, a_statement):
             return result
         types = []
         for a_type in a_statement.st_types:
-            if a_type.startswith("@"):
+            if a_type.startswith(STARTING_CHAR_FOR_SHAPE_NAME):
                 types.append("@" + prefixize_uri_if_possible(target_uri=a_type[1:],
                                                              namespaces_prefix_dict=self._namespaces_dict,
                                                              corners=True))
@@ -104,7 +105,7 @@ def _is_a_type_declaration(self, a_statement):
     def _is_a_shape_link(self, statement):
         if type(statement) == FixedPropChoiceStatement:
             return False
-        return statement.st_type.startswith("@")
+        return statement.st_type.startswith(STARTING_CHAR_FOR_SHAPE_NAME)
 
     def _declare_and_open_shape(self, a_shape):
         target_name = prefixize_shape_name_if_possible(a_shape_name=a_shape.name,
diff --git a/shexer/model/shape.py b/shexer/model/shape.py
@@ -1,4 +1,4 @@
-STARTING_CHAR_FOR_SHAPE_NAME = "@"
+STARTING_CHAR_FOR_SHAPE_NAME = "%"
 
 
 class Shape(object):
diff --git a/shexer/shaper.py b/shexer/shaper.py
@@ -12,6 +12,8 @@
 from shexer.utils.factories.shape_serializer_factory import get_shape_serializer, get_uml_serializer
 from shexer.utils.namespaces import find_adequate_prefix_for_shapes_namespaces
 from shexer.utils.log import log_msg
+from shexer.utils.uri import unprefixize_uri_if_possible
+from shexer.utils.dict import reverse_keys_and_values
 from shexer.consts import RATIO_INSTANCES
 
 
@@ -150,7 +152,10 @@ def __init__(self, target_classes=None,
         self._list_of_url_input = list_of_url_input
         self._rdflib_graph = rdflib_graph
         self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {}
-        self._instantiation_property = instantiation_property
+        self._instantiation_property = \
+            unprefixize_uri_if_possible(instantiation_property,
+                                        include_corners=False,
+                                        prefix_namespaces_dict=reverse_keys_and_values(self._namespaces_dict))
         self._namespaces_to_ignore = namespaces_to_ignore
         self._infer_numeric_types_for_untyped_literals = infer_numeric_types_for_untyped_literals
         self._discard_useles_constraints_with_positive_closure = discard_useless_constraints_with_positive_closure
diff --git a/shexer/utils/shapes.py b/shexer/utils/shapes.py
@@ -39,6 +39,6 @@ def build_shape_name_for_qualifier_prop_uri(prop_uri, shapes_namespace):  # TODO
 
 
 def prefixize_shape_name_if_possible(a_shape_name, namespaces_prefix_dict):
-    result = prefixize_uri_if_possible(target_uri=a_shape_name[1:],                  # Avoid the "@" starting char
+    result = prefixize_uri_if_possible(target_uri=a_shape_name[1:],                  # Avoid the "from shexer.model.shape. STARTING_CHAR_FOR_SHAPE_NAME starting char
                                        namespaces_prefix_dict=namespaces_prefix_dict)
     return result
diff --git a/shexer/utils/uri.py b/shexer/utils/uri.py
@@ -1,3 +1,4 @@
+from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
 
 XSD_NAMESPACE = "http://www.w3.org/2001/XMLSchema#"
 XSD_PREFIX = "xsd"
@@ -104,7 +105,7 @@ def is_a_correct_uri(target_uri, prefix_namespace_dict):
 
 
 def there_is_arroba_after_last_quotes(target_str):
-    if target_str.rfind("@") > target_str.rfind('"'):
+    if target_str.rfind(STARTING_CHAR_FOR_SHAPE_NAME) > target_str.rfind('"'):
         return True
     return False
 
diff --git a/test/test_disable_endpoint_cache.py b/test/test_disable_endpoint_cache.py
@@ -55,6 +55,5 @@ def test_all_classes_mode(self):
                         limit_remote_instances=5,
                         disable_endpoint_cache=True)
         str_result = shaper.shex_graph(string_output=True)
-        print(str_result)
         self.assertTrue(number_of_shapes(str_result) > 2)
         pass  #
diff --git a/test/test_instantiation_property.py b/test/test_instantiation_property.py
@@ -48,6 +48,19 @@ def test_explicit_ex_a(self):
         self.assertTrue(file_vs_str_tunned_comparison(file_path=_BASE_DIR + "G1_ex_a.shex",
                                                       str_target=str_result))
 
+    def test_explicit_ex_a_prefixed(self):
+        shaper = Shaper(target_classes=["http://xmlns.com/foaf/0.1/Person",
+                                        "http://xmlns.com/foaf/0.1/Document"],
+                        graph_file_input=_BASE_DIR + "G1_ex_a.ttl",
+                        instantiation_property="ex:a",
+                        namespaces_dict=default_namespaces(),
+                        all_classes_mode=False,
+                        input_format=TURTLE,
+                        disable_comments=True)
+        str_result = shaper.shex_graph(string_output=True)
+        self.assertTrue(file_vs_str_tunned_comparison(file_path=_BASE_DIR + "G1_ex_a.shex",
+                                                      str_target=str_result))
+
     def test_explicit_ex_a_rdf_type_mixed(self):
         # G1_ex_a_some_rdftype
         shaper = Shaper(target_classes=["http://xmlns.com/foaf/0.1/Person",
diff --git a/test/test_url_endpoint.py b/test/test_url_endpoint.py
@@ -48,6 +48,6 @@ def test_all_classes_mode(self):
                         track_classes_for_entities_at_last_depth_level=False,
                         limit_remote_instances=5)
         str_result = shaper.shex_graph(string_output=True)
-        print(str_result)
         self.assertTrue(number_of_shapes(str_result) > 2)
-        pass  #
+
+

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-STARTING_CHAR_FOR_SHAPE_NAME = "@"`
	`1`	`+STARTING_CHAR_FOR_SHAPE_NAME = "%"`
`2`	`2`
`3`	`3`
`4`	`4`	`class Shape(object):`