From 39599549d27f4f1bca22bf99b992aa8b8aebd0c3 Mon Sep 17 00:00:00 2001 From: Andrzej Uszok Date: Sun, 1 Oct 2023 01:00:46 -0500 Subject: [PATCH] Extended lc path parsing and error reporting, fix graph test and added new --- domiknows/graph/concept.py | 3 + domiknows/graph/graph.py | 128 +++++++++++------- test_regr/graph_errors/graph_cardinality.py | 20 +-- .../graph_errors/graph_concept_in_path.py | 58 ++++++++ test_regr/graph_errors/graph_nli_1.py | 14 ++ .../test_graph_concept_in_path.py | 25 ++++ test_regr/graph_errors/test_graph_nli_1.py | 23 ++++ test_regr/graph_errors/test_graph_nli_3.py | 4 +- test_regr/graph_errors/test_graph_path.py | 9 +- .../graph_errors/test_graph_path_reverse.py | 8 +- 10 files changed, 225 insertions(+), 67 deletions(-) create mode 100644 test_regr/graph_errors/graph_concept_in_path.py create mode 100644 test_regr/graph_errors/test_graph_concept_in_path.py diff --git a/domiknows/graph/concept.py b/domiknows/graph/concept.py index 679b378b..ddc4cb1d 100644 --- a/domiknows/graph/concept.py +++ b/domiknows/graph/concept.py @@ -57,6 +57,9 @@ def __init__(self, name=None, batch=False): def get_batch(self): return self.batch + def get_var_name(self): + return self.var_name + def __str__(self): return self.name diff --git a/domiknows/graph/graph.py b/domiknows/graph/graph.py index d9c7a9f2..71626360 100644 --- a/domiknows/graph/graph.py +++ b/domiknows/graph/graph.py @@ -26,14 +26,19 @@ def __init__(self, name=None, ontology=None, iri=None, local=None, auto_constrai self._logicalConstrains = OrderedDict() self._relations = OrderedDict() self._batch = None + self.cacheRootConcepts = {} def __iter__(self): yield from BaseGraphTree.__iter__(self) yield from self._concepts yield from self._relations - # find the root parent of the provided concept or relation + def findRootConceptOrRelation(self, relationConcept): + # If the result is already in cache, return it + if relationConcept in self.cacheRootConcepts: + return self.cacheRootConcepts[relationConcept] + try: isAs = relationConcept.is_a() except (AttributeError, TypeError): @@ -42,10 +47,18 @@ def findRootConceptOrRelation(self, relationConcept): for _isA in isAs: parentRelationConcept = _isA.dst - return self.findRootConceptOrRelation(parentRelationConcept) + # Recursive call, but result is cached if previously computed + root = self.findRootConceptOrRelation(parentRelationConcept) + + # Store result in cache + self.cacheRootConcepts[relationConcept] = root + return root + + # If the provided concept or relation is root (has no parents) + # Store result in cache + self.cacheRootConcepts[relationConcept] = relationConcept - # If the provided concept or relation is root (has not parents) - return relationConcept + return relationConcept from collections import namedtuple @@ -115,15 +128,17 @@ def check_if_all_used_variables_are_defined(self, lc, found_variables, used_vari from domiknows.graph import V, LogicalConstrain - def handle_variable_name(variable_name): - if variable_name not in found_variables: - raise Exception(f"Variable {variable_name} found in {headLc} {lc} is not defined") + def handle_variable_name(lc_variable_name): + if lc_variable_name not in found_variables: + raise Exception(f"Variable {lc_variable_name} found in {headLc} {lc} is not defined") - if variable_name not in used_variables: - used_variables[variable_name] = [] + if lc_variable_name not in used_variables: + used_variables[lc_variable_name] = [] - variable_info = (lc, variable_name, lc.e[i-1], e.v) - used_variables[variable_name].append(variable_info) + lcElementType = lc.e[i-1] + lcPath = e.v + variable_info = (lc, lc_variable_name, lcElementType, lcPath) + used_variables[lc_variable_name].append(variable_info) for i, e in enumerate(lc.e): if isinstance(e, V) and e.v: # has path @@ -148,83 +163,99 @@ def handle_variable_name(variable_name): return used_variables def getPathStr(self, path): - from .relation import IsA, HasA + from .concept import Concept + from .relation import Relation pathStr = "" for pathElement in path[1:]: - if isinstance(pathElement, (HasA, IsA)): + if isinstance(pathElement, (Relation,)): if pathElement.var_name: pathStr += pathElement.var_name + " " else: pathStr += pathElement.name + " " - elif isinstance(pathElement.reversed, (HasA, IsA)): - if pathElement.reversed.var_name: - pathStr += pathElement.reversed.var_name + ".reversed " - else: - pathStr += pathElement.name + " " + elif isinstance(pathElement, (Concept,)): + pathStr += pathElement.var_name + " " else: - pathStr += pathElement + " " - + pathStr += pathElement + return pathStr.strip() def check_path(self, path, resultConcept, variableConceptParent, lc_name, foundVariables, variableName): from .relation import IsA, HasA, Relation + from .logicalConstrain import eqL + from .concept import Concept - requiredLeftConcept = variableConceptParent # path element has to be relation with this type to the left + requiredLeftConcept = variableConceptParent.name # path element has to be relation with this type to the left requiredEndOfPathConcept = resultConcept[1] # path has to result in this concept + requiredEndOfPathConceptRoot = self.findRootConceptOrRelation(resultConcept[0]).name expectedRightConcept = None lastPathElement = False + pathStr = self.getPathStr(path) + pathVariable = path[0] + for p, pathElement in enumerate(path[1:]): pathIndex = p+1 + if pathIndex < len(path)-1: expectedRightConcept = path[pathIndex].dst.name + expectedRightConceptRoot = expectedRightConcept else: expectedRightConcept = requiredEndOfPathConcept lastPathElement = True + expectedRightConceptRoot = requiredEndOfPathConceptRoot if isinstance(pathElement, (HasA, IsA, Relation)): - pathElementSrc = pathElement.src - pathElementDst = pathElement.dst + pathElementSrc = pathElement.src.name + pathElementDst = pathElement.dst.name if pathElement.var_name != None: pathElementVarName = pathElement.var_name else: pathElementVarName ="" - # Check if there is a problem with reversed usage - if requiredLeftConcept.name == pathElementDst.name and expectedRightConcept == pathElementSrc.name: - pathVariable = path[0] - pathStr = self.getPathStr(path) - - exceptionStr1 = f"The Path {pathStr} from the variable {pathVariable}, defined in {lc_name} is not valid" - exceptionStr2 = f" The relation {pathElementVarName} is from a {pathElementSrc.name} to a {pathElementDst.name}, but you have used it from a {pathElementDst.name} to a {pathElementSrc.name}." + # Check if there is a problem with reversed usage of the current path element - it has to be possible to reverse the order to fix it + if requiredLeftConcept == pathElementDst and expectedRightConceptRoot == pathElementSrc: + exceptionStr1 = f"The Path '{pathStr}' from the variable {pathVariable}, defined in {lc_name} is not valid" + exceptionStr2 = f"The relation {pathElementVarName} is from a {pathElementSrc} to a {pathElementDst}, but you have used it from a {pathElementDst} to a {pathElementSrc}." if not pathElement.is_reversed: exceptionStr3 = f"You can use the .reversed property to change the direction." else: exceptionStr3 = f"You can use without the .reversed property to change the direction." raise Exception(f"{exceptionStr1} {exceptionStr2} {exceptionStr3}") - # Check if the path is correct - elif requiredLeftConcept.name != pathElementSrc.name: - pathVariable = path[0] - pathStr = self.getPathStr(path) - exceptionStr1 = f"The Path {pathStr} from the variable {pathVariable}, defined in {lc_name} is not valid" - exceptionStr2 = f" and the type of {pathVariable} is a {requiredLeftConcept}, there is no relationship defined between {pathElementDst} and {requiredLeftConcept}." - exceptionStr3 = f"The used variable {pathElementVarName} is a relationship defined between a {pathElementSrc} and a {pathElementDst}, which is not correctly used here." + # Check if the current path element is correctly connected to the left (source) - has matching type + elif requiredLeftConcept != pathElementSrc: + exceptionStr1 = f"The Path '{pathStr}' from the variable {pathVariable}, defined in {lc_name} is not valid." + exceptionStr2 = f"The required source type in this place of the path is a {requiredLeftConcept}," + exceptionStr3 = f"but the used variable {pathElementVarName} is a relationship defined between a {pathElementSrc} and a {pathElementDst}, which is not correctly used here." raise Exception(f"{exceptionStr1} {exceptionStr2} {exceptionStr3}") - elif expectedRightConcept != pathElementDst.name: - pathVariable = path[0] - pathStr = self.getPathStr(path) - exceptionStr1 = f"The Path {pathStr} from the variable {pathVariable}, defined in {lc_name} is not valid" - if lastPathElement: - exceptionStr2 = f" and the required destination type of the last element of the path is a {expectedRightConcept}." - else: - exceptionStr2 = f" and the expected destination type in this place of the path is a {expectedRightConcept}." + # Check if the current path element is correctly connected to the right (destination) - has matching type + elif expectedRightConceptRoot != pathElementDst: + exceptionStr1 = f"The Path '{pathStr}' from the variable {pathVariable}, defined in {lc_name} is not valid." + if lastPathElement: # if this is the last element it has to match the concept in which this path is embedded + exceptionStr2 = f"The required destination type of the last element of the path is a {expectedRightConcept}." + else: # if this it intermediary path element that if is expected that it will match next path element source type + exceptionStr2 = f"The expected destination type in this place of the path is a {expectedRightConcept}." exceptionStr3 = f"The used variable {pathElementVarName} is a relationship defined between a {pathElementSrc} and a {pathElementDst}, which is not correctly used here." raise Exception(f"{exceptionStr1} {exceptionStr2} {exceptionStr3}") - - requiredLeftConcept = pathElementDst # move along the path with the requiredLeftConcept + + # Move along the path with the requiredLeftConcept and pathVariable + requiredLeftConcept = pathElementDst + pathVariable = pathElementVarName + elif isinstance(pathElement, (eqL,)): + pass # it is not check for now + else: + exceptionStr1 = f"The Path '{pathStr}' from the variable {pathVariable}, defined in {lc_name} is not valid." + if isinstance(pathElement, (Concept,)): + exceptionStr2 = f"The used variable {pathElement} is a concept, path element can be only relation or eqL logical constraint used to filter candidates in the path." + else: + pathElementType = type(pathElement) + exceptionStr2 = f"The used variable {pathElement} is a {pathElementType}, path element can be only relation or eqL logical constraint used to filter candidates in the path." + raise Exception(f"{exceptionStr1} {exceptionStr2}") def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) + #image_name = "./image" + self.name + #self.visualize(image_name, open_image=True) + # Get the current frame and then go one level back frame = inspect.currentframe().f_back @@ -239,6 +270,8 @@ def __exit__(self, exc_type, exc_value, traceback): # set it to the name of the variable they are stored in. if var_value.var_name is None: var_value.var_name = var_name + if isinstance(var_value, (HasA,)): + var_value.reversed.var_name = var_name + ".reversed" lc_info = {} LcInfo = namedtuple('CurrentLcInfo', ['foundVariables', 'usedVariables', 'headLcName']) @@ -372,7 +405,6 @@ def set_apply(self, name, sub): # TODO: what are other cases pass - def visualize(self, filename, open_image=False): import graphviz concept_graph = graphviz.Digraph(name=f"{self.name}") diff --git a/test_regr/graph_errors/graph_cardinality.py b/test_regr/graph_errors/graph_cardinality.py index a8f13589..b204b6e9 100644 --- a/test_regr/graph_errors/graph_cardinality.py +++ b/test_regr/graph_errors/graph_cardinality.py @@ -32,11 +32,11 @@ def setup_graph(fix_constraint=False): ifL( entity('x'), atMostL( - person(path=('x', entity)), - organization(path=('x', entity)), - location(path=('x', entity)), - date(path=('x', entity)), - other_entity_types(path=('x', entity)), + person(path=('x')), + organization(path=('x')), + location(path=('x')), + date(path=('x')), + other_entity_types(path=('x')), 1 ), name="constraint_only_one_entity_fixed" @@ -46,11 +46,11 @@ def setup_graph(fix_constraint=False): entity('x'), atMostL( 1, - person(path=('x', entity)), - organization(path=('x', entity)), - location(path=('x', entity)), - date(path=('x', entity)), - other_entity_types(path=('x', entity)) + person(path=('x')), + organization(path=('x')), + location(path=('x')), + date(path=('x')), + other_entity_types(path=('x')) ), name="constraint_only_one_entity" ) diff --git a/test_regr/graph_errors/graph_concept_in_path.py b/test_regr/graph_errors/graph_concept_in_path.py new file mode 100644 index 00000000..d98094b8 --- /dev/null +++ b/test_regr/graph_errors/graph_concept_in_path.py @@ -0,0 +1,58 @@ +from domiknows.graph import Graph, Concept, Relation +from domiknows.graph.logicalConstrain import ifL, andL, nandL, atMostL, existsL + +def setup_graph(fix_constraint=False): + Graph.clear() + Concept.clear() + Relation.clear() + + with Graph('global') as graph: + with Graph('linguistic') as ling_graph: + text_document = Concept(name='text_document') + + with Graph('application', auto_constraint=False) as app_graph: + entity = Concept(name='entity') + person = entity(name='person', auto_constraint=False) + organization = entity(name='organization', auto_constraint=False) + location = entity(name='location', auto_constraint=False) + date = entity(name='date') + other_entity_types = entity(name='other_entity_types') + + relation = Concept(name='relation') + employment = relation(name='employment') + located = relation(name='located') + part_whole = relation(name='part-whole') + personal_social = relation(name='personal-social') + other_relation = relation(name='other') + + pair = Concept(name='pair') + (rel_pair_entity1, rel_pair_entity2) = pair.has_a(arg1=entity, arg2=entity) + + if fix_constraint: + ifL( + entity('x'), + atMostL( + person(path=('x')), + organization(path=('x')), + location(path=('x')), + date(path=('x')), + other_entity_types(path=('x')), + 1 + ), + name="constraint_only_one_entity_fixed" + ) + else: + ifL( + entity('x'), + atMostL( + person(path=('x', entity)), + organization(path=('x', entity)), + location(path=('x', entity)), + date(path=('x', entity)), + other_entity_types(path=('x', entity)), + 1 + ), + name="constraint_only_one_entity" + ) + + return graph diff --git a/test_regr/graph_errors/graph_nli_1.py b/test_regr/graph_errors/graph_nli_1.py index c07c1cf7..e176ce77 100644 --- a/test_regr/graph_errors/graph_nli_1.py +++ b/test_regr/graph_errors/graph_nli_1.py @@ -19,6 +19,20 @@ def setup_graph(fix_constraint=False): values=['entailment', 'contradiction', 'neutral']) if fix_constraint: + ifL( + andL( + pair('x'), + pair('y'), + pair(path=('x', rel_pair_hypothesis, rel_pair_hypothesis.reversed)), + pair(path=('y')), + ), + ifL( + nli_class.entailment(path=('x')), + notL(nli_class.contradiction(path=('y'))) + ), + name="pair_symmetry_constraint" + ) + else: ifL( andL( pair('x'), diff --git a/test_regr/graph_errors/test_graph_concept_in_path.py b/test_regr/graph_errors/test_graph_concept_in_path.py new file mode 100644 index 00000000..779c82ff --- /dev/null +++ b/test_regr/graph_errors/test_graph_concept_in_path.py @@ -0,0 +1,25 @@ +import pytest +from graph_concept_in_path import setup_graph +import re + +def test_setup_graph_exception(): + try: + setup_graph() + except Exception as e: + sanitized_error_message = re.sub(r'[^\x20-\x7E]', '', str(e)).replace(" ", "") + sanitized_pattern = re.sub(r'[^\x20-\x7E]', '', + "The Path 'entity' from the variable x, defined in constraint_only_one_entity is not valid." + "The used variable entity is a concept, path element can be only relation or eqL logical constraint used to filter candidates in the path.").replace(" ", "") + + print(repr(sanitized_error_message)) + print(repr(sanitized_pattern)) + + assert sanitized_error_message == sanitized_pattern, f"Exception message did not match: got {sanitized_error_message}" + else: + pytest.fail("Expected an exception but none was raised.") + +def test_setup_graph_no_exception(): + try: + setup_graph(fix_constraint=True) + except Exception as e: + pytest.fail(f"Unexpected Exception raised: {e}") \ No newline at end of file diff --git a/test_regr/graph_errors/test_graph_nli_1.py b/test_regr/graph_errors/test_graph_nli_1.py index 73a8155a..ab0a2a81 100644 --- a/test_regr/graph_errors/test_graph_nli_1.py +++ b/test_regr/graph_errors/test_graph_nli_1.py @@ -1,6 +1,29 @@ import pytest from graph_nli_1 import setup_graph import re + +def test_setup_graph_exception(): + try: + setup_graph() + except Exception as e: + sanitized_error_message = re.sub(r'[^\x20-\x7E]', '', str(e)).replace(" ", "") + + sanitized_pattern = re.sub( + r'[^\x20-\x7E]', + '', + ("The Path 'rel_pair_premise rel_pair_hypothesis.reversed' from the variable rel_pair_premise, defined in pair_symmetry_constraint is not valid." + "The required source type in this place of the path is a premise," + "but the used variable rel_pair_hypothesis.reversed is a relationship defined between a hypothesis and a pair, which is not correctly used here.") + ).replace(" ", "") + + print(sanitized_pattern) + + print(repr(sanitized_error_message)) + print(repr(sanitized_pattern)) + + assert sanitized_error_message == sanitized_pattern, f"Exception message did not match: got {sanitized_error_message}" + else: + pytest.fail("Expected an exception but none was raised.") def test_setup_graph_no_exception(): try: diff --git a/test_regr/graph_errors/test_graph_nli_3.py b/test_regr/graph_errors/test_graph_nli_3.py index 5c5f8d74..4d42ca51 100644 --- a/test_regr/graph_errors/test_graph_nli_3.py +++ b/test_regr/graph_errors/test_graph_nli_3.py @@ -11,8 +11,8 @@ def test_setup_graph_exception(): sanitized_pattern = re.sub( r'[^\x20-\x7E]', '', - ("The Path rel_pair_premise from the variable x, defined in pair_symmetry_constraint is not valid" - "and the required destination type of the last element of the path is a pair." + ("The Path 'rel_pair_premise' from the variable x, defined in pair_symmetry_constraint is not valid." + "The required destination type of the last element of the path is a pair." "The used variable rel_pair_premise is a relationship defined between a pair and a premise, which is not correctly used here.") ).replace(" ", "") diff --git a/test_regr/graph_errors/test_graph_path.py b/test_regr/graph_errors/test_graph_path.py index 57c63c30..3675b2b8 100644 --- a/test_regr/graph_errors/test_graph_path.py +++ b/test_regr/graph_errors/test_graph_path.py @@ -7,10 +7,11 @@ def test_setup_graph_exception(): setup_graph() except Exception as e: sanitized_error_message = re.sub(r'[^\x20-\x7E]', '', str(e)).replace(" ", "") - sanitized_pattern = re.sub(r'[^\x20-\x7E]', '', "The Path rel_pair_entity1 from the variable x, defined in LC_employment is not valid" - " and the type of x is a relation, there is no relationship defined between named_entity and relation." - " The used variable rel_pair_entity1 is a relationship defined between a pair and a named_entity," - " which is not correctly used here.").replace(" ", "") + sanitized_pattern = re.sub(r'[^\x20-\x7E]', + '', + "The Path 'rel_pair_entity1' from the variable x, defined in LC_employment is not valid." + "The required source type in this place of the path is a relation," + "but the used variable rel_pair_entity1 is a relationship defined between a pair and a named_entity, which is not correctly used here.").replace(" ", "") print(repr(sanitized_error_message)) print(repr(sanitized_pattern)) diff --git a/test_regr/graph_errors/test_graph_path_reverse.py b/test_regr/graph_errors/test_graph_path_reverse.py index 956047de..cd943d7a 100644 --- a/test_regr/graph_errors/test_graph_path_reverse.py +++ b/test_regr/graph_errors/test_graph_path_reverse.py @@ -7,9 +7,11 @@ def test_setup_graph_exception(): setup_graph() except Exception as e: sanitized_error_message = re.sub(r'[^\x20-\x7E]', '', str(e)).replace(" ", "") - sanitized_pattern = re.sub(r'[^\x20-\x7E]', '', "The Path rel_pair_entity1 from the variable x, defined in LC_person_attendance is not valid" - " The relation rel_pair_entity1 is from a pair to a named_entity, but you have used it from a named_entity to a pair. " - " You can use the .reversed property to change the direction.").replace(" ", "") + sanitized_pattern = re.sub(r'[^\x20-\x7E]', + '', + "The Path 'rel_pair_entity1' from the variable x, defined in LC_person_attendance is not valid" + "The relation rel_pair_entity1 is from a pair to a named_entity, but you have used it from a named_entity to a pair." + "You can use the .reversed property to change the direction.").replace(" ", "") print(repr(sanitized_error_message)) print(repr(sanitized_pattern))