From 4f95543606cf091fec5f6a040ac97a7cd96e378a Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Fri, 28 Feb 2025 15:41:45 +0700 Subject: [PATCH 01/22] constraints structure --- datamimic_ce/constants/attribute_constants.py | 2 + datamimic_ce/constants/element_constants.py | 1 + datamimic_ce/model/constraints_model.py | 11 +++++ datamimic_ce/model/rule_model.py | 21 +++++++++ datamimic_ce/parsers/constraints_parser.py | 47 +++++++++++++++++++ .../statements/constraints_statement.py | 11 +++++ datamimic_ce/tasks/constraints_task.py | 22 +++++++++ 7 files changed, 115 insertions(+) create mode 100644 datamimic_ce/model/constraints_model.py create mode 100644 datamimic_ce/model/rule_model.py create mode 100644 datamimic_ce/parsers/constraints_parser.py create mode 100644 datamimic_ce/statements/constraints_statement.py create mode 100644 datamimic_ce/tasks/constraints_task.py diff --git a/datamimic_ce/constants/attribute_constants.py b/datamimic_ce/constants/attribute_constants.py index 662dfe22..d0df8308 100644 --- a/datamimic_ce/constants/attribute_constants.py +++ b/datamimic_ce/constants/attribute_constants.py @@ -73,3 +73,5 @@ ATTR_STRING: Final = "string" ATTR_BUCKET: Final = "bucket" ATTR_MP_PLATFORM: Final = "mpPlatform" +ATTR_IF: Final = "if" +ATTR_THEN: Final = "then" diff --git a/datamimic_ce/constants/element_constants.py b/datamimic_ce/constants/element_constants.py index 5002d38e..2a0d2185 100644 --- a/datamimic_ce/constants/element_constants.py +++ b/datamimic_ce/constants/element_constants.py @@ -25,3 +25,4 @@ EL_CONDITION = "condition" EL_ELSE_IF = "else-if" EL_ELSE = "else" +EL_CONSTRAINTS = "constraints" diff --git a/datamimic_ce/model/constraints_model.py b/datamimic_ce/model/constraints_model.py new file mode 100644 index 00000000..8e743c62 --- /dev/null +++ b/datamimic_ce/model/constraints_model.py @@ -0,0 +1,11 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + +from pydantic import BaseModel + + +class ConstraintsModel(BaseModel): + pass diff --git a/datamimic_ce/model/rule_model.py b/datamimic_ce/model/rule_model.py new file mode 100644 index 00000000..c79cb443 --- /dev/null +++ b/datamimic_ce/model/rule_model.py @@ -0,0 +1,21 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + + +from datamimic_ce.model.model_util import ModelUtil +from pydantic import BaseModel, Field, field_validator + +from datamimic_ce.constants.attribute_constants import ATTR_IF, ATTR_THEN + + +class RuleModel(BaseModel): + if_rule: str = Field(None, alias=ATTR_IF) + then_rule: str = Field(None, alias=ATTR_THEN) + + @field_validator("if", "then") + @classmethod + def validate_name(cls, value): + return ModelUtil.check_not_empty(value=value) diff --git a/datamimic_ce/parsers/constraints_parser.py b/datamimic_ce/parsers/constraints_parser.py new file mode 100644 index 00000000..07d25ba3 --- /dev/null +++ b/datamimic_ce/parsers/constraints_parser.py @@ -0,0 +1,47 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + + +from pathlib import Path +from xml.etree.ElementTree import Element + +from datamimic_ce.parsers.statement_parser import StatementParser +from datamimic_ce.statements.composite_statement import CompositeStatement +from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil + +from datamimic_ce.constants.element_constants import EL_CONSTRAINTS +from datamimic_ce.statements.constraints_statement import ConstraintsStatement + + +class ConstraintsParser(StatementParser): + def __init__( + self, + class_factory_util: BaseClassFactoryUtil, + element: Element, + properties: dict, + ): + super().__init__( + element, + properties, + valid_element_tag=EL_CONSTRAINTS, + class_factory_util=class_factory_util, + ) + + def parse(self, descriptor_dir: Path, parent_stmt: CompositeStatement) -> ConstraintsStatement: + """ + Parse element "constraints" into ConstraintsStatement + :return: + """ + constraints_stmt = ConstraintsStatement(parent_stmt) + sub_stmt_list = self._class_factory_util.get_parser_util_cls()().parse_sub_elements( + class_factory_util=self._class_factory_util, + descriptor_dir=descriptor_dir, + element=self._element, + properties=self._properties, + parent_stmt=constraints_stmt, + ) + constraints_stmt.sub_statements = sub_stmt_list + return constraints_stmt diff --git a/datamimic_ce/statements/constraints_statement.py b/datamimic_ce/statements/constraints_statement.py new file mode 100644 index 00000000..3014fb30 --- /dev/null +++ b/datamimic_ce/statements/constraints_statement.py @@ -0,0 +1,11 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com +from datamimic_ce.statements.composite_statement import CompositeStatement + + +class ConstraintsStatement(CompositeStatement): + def __init__(self, parent_stmt: CompositeStatement): + super().__init__(None, parent_stmt=parent_stmt) diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py new file mode 100644 index 00000000..77af62e5 --- /dev/null +++ b/datamimic_ce/tasks/constraints_task.py @@ -0,0 +1,22 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + +from datamimic_ce.contexts.geniter_context import GenIterContext +from datamimic_ce.tasks.task import Task + +from datamimic_ce.statements.constraints_statement import ConstraintsStatement + + +class ConstraintsTask(Task): + def __init__(self, statement: ConstraintsStatement): + self._statement = statement + + def execute(self, parent_context: GenIterContext): + pass + + @property + def statement(self) -> ConstraintsStatement: + return self._statement From 100b9b051560784e8745d2eb4b2fa3a131fed0b3 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Mon, 3 Mar 2025 10:40:10 +0700 Subject: [PATCH 02/22] rule structure --- datamimic_ce/constants/element_constants.py | 1 + datamimic_ce/parsers/rule_parser.py | 26 +++++++++++++++ datamimic_ce/statements/rule_statement.py | 21 +++++++++++++ datamimic_ce/tasks/rule_task.py | 35 +++++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 datamimic_ce/parsers/rule_parser.py create mode 100644 datamimic_ce/statements/rule_statement.py create mode 100644 datamimic_ce/tasks/rule_task.py diff --git a/datamimic_ce/constants/element_constants.py b/datamimic_ce/constants/element_constants.py index 2a0d2185..62b2cb5c 100644 --- a/datamimic_ce/constants/element_constants.py +++ b/datamimic_ce/constants/element_constants.py @@ -26,3 +26,4 @@ EL_ELSE_IF = "else-if" EL_ELSE = "else" EL_CONSTRAINTS = "constraints" +EL_RULE = "rule" diff --git a/datamimic_ce/parsers/rule_parser.py b/datamimic_ce/parsers/rule_parser.py new file mode 100644 index 00000000..181ed880 --- /dev/null +++ b/datamimic_ce/parsers/rule_parser.py @@ -0,0 +1,26 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + +from xml.etree.ElementTree import Element + +from datamimic_ce.parsers.statement_parser import StatementParser + +from datamimic_ce.constants.element_constants import EL_RULE +from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil + + +class RuleParser(StatementParser): + """ + Parse element "rule" to RuleStatement + """ + + def __init__( + self, + class_factory_util: BaseClassFactoryUtil, + element: Element, + properties: dict, + ): + super().__init__(element, properties, valid_element_tag=EL_RULE, class_factory_util=class_factory_util) diff --git a/datamimic_ce/statements/rule_statement.py b/datamimic_ce/statements/rule_statement.py new file mode 100644 index 00000000..b7ac9cf5 --- /dev/null +++ b/datamimic_ce/statements/rule_statement.py @@ -0,0 +1,21 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com +from datamimic_ce.model.rule_model import RuleModel +from datamimic_ce.statements.statement import Statement + + +class RuleStatement(Statement): + def __init__(self, model: RuleModel): + self._if_rule = model.if_rule + self._then_rule = model.then_rule + + @property + def if_rule(self): + return self._if_rule + + @property + def then_rule(self): + return self._then_rule \ No newline at end of file diff --git a/datamimic_ce/tasks/rule_task.py b/datamimic_ce/tasks/rule_task.py new file mode 100644 index 00000000..bc9e2da7 --- /dev/null +++ b/datamimic_ce/tasks/rule_task.py @@ -0,0 +1,35 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + +from datamimic_ce.contexts.geniter_context import GenIterContext +from datamimic_ce.contexts.setup_context import SetupContext +from datamimic_ce.statements.item_statement import ItemStatement +from datamimic_ce.tasks.element_task import ElementTask +from datamimic_ce.tasks.task import Task +from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil + + +class RuleTask(Task): + def __init__( + self, + ctx: SetupContext, + statement: ItemStatement, + class_factory_util: BaseClassFactoryUtil, + ): + self._statement = statement + self._class_factory_util = class_factory_util + + @property + def statement(self) -> ItemStatement: + return self._statement + + def execute(self, parent_context: GenIterContext): + """ + Change datas base on condition in element "rule" + :param parent_context: + :return: + """ + pass From b8ac0fb828add2fd0ce82c68a3705545c101c0a7 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Mon, 3 Mar 2025 14:49:49 +0700 Subject: [PATCH 03/22] constraint-rule structure done --- datamimic_ce/model/rule_model.py | 12 +-- datamimic_ce/parsers/parser_util.py | 12 ++- datamimic_ce/parsers/rule_parser.py | 14 ++- datamimic_ce/statements/rule_statement.py | 3 +- datamimic_ce/tasks/task_util.py | 8 ++ .../test_constraints/data/person_data.ent.csv | 101 ++++++++++++++++++ .../test_constraints/test_constraints.py | 22 ++++ .../test_constraints/test_constraints.xml | 11 ++ 8 files changed, 173 insertions(+), 10 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/data/person_data.ent.csv create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints.py create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints.xml diff --git a/datamimic_ce/model/rule_model.py b/datamimic_ce/model/rule_model.py index c79cb443..0ec2179f 100644 --- a/datamimic_ce/model/rule_model.py +++ b/datamimic_ce/model/rule_model.py @@ -12,10 +12,10 @@ class RuleModel(BaseModel): - if_rule: str = Field(None, alias=ATTR_IF) - then_rule: str = Field(None, alias=ATTR_THEN) + if_rule: str = Field(alias=ATTR_IF) + then_rule: str = Field(alias=ATTR_THEN) - @field_validator("if", "then") - @classmethod - def validate_name(cls, value): - return ModelUtil.check_not_empty(value=value) + # @field_validator("if_rule", "then_rule") + # @classmethod + # def validate_name(cls, value): + # return ModelUtil.check_not_empty(value=value) diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py index 556c692d..2fb0419b 100644 --- a/datamimic_ce/parsers/parser_util.py +++ b/datamimic_ce/parsers/parser_util.py @@ -33,11 +33,12 @@ EL_NESTED_KEY, EL_REFERENCE, EL_SETUP, - EL_VARIABLE, + EL_VARIABLE, EL_CONSTRAINTS, EL_RULE, ) from datamimic_ce.logger import logger from datamimic_ce.parsers.array_parser import ArrayParser from datamimic_ce.parsers.condition_parser import ConditionParser +from datamimic_ce.parsers.constraints_parser import ConstraintsParser from datamimic_ce.parsers.database_parser import DatabaseParser from datamimic_ce.parsers.echo_parser import EchoParser from datamimic_ce.parsers.element_parser import ElementParser @@ -54,6 +55,7 @@ from datamimic_ce.parsers.memstore_parser import MemstoreParser from datamimic_ce.parsers.nested_key_parser import NestedKeyParser from datamimic_ce.parsers.reference_parser import ReferenceParser +from datamimic_ce.parsers.rule_parser import RuleParser from datamimic_ce.parsers.variable_parser import VariableParser from datamimic_ce.statements.array_statement import ArrayStatement from datamimic_ce.statements.composite_statement import CompositeStatement @@ -122,6 +124,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None: EL_ECHO, EL_CONDITION, EL_INCLUDE, + EL_CONSTRAINTS, }, EL_INCLUDE: {EL_SETUP}, EL_ITEM: {EL_KEY, EL_NESTED_KEY, EL_LIST, EL_ARRAY, EL_ELEMENT}, @@ -130,6 +133,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None: EL_IF: None, EL_ELSE_IF: None, EL_ELSE: None, + EL_CONSTRAINTS: {EL_RULE}, } return valid_sub_element_dict.get(ele_tag, set()) @@ -189,6 +193,10 @@ def get_parser_by_element(class_factory_util: BaseClassFactoryUtil, element: Ele return ElementParser(class_factory_util, element=element, properties=properties) elif tag == EL_GENERATOR: return GeneratorParser(class_factory_util, element=element, properties=properties) + elif tag == EL_CONSTRAINTS: + return ConstraintsParser(class_factory_util, element=element, properties=properties) + elif tag == EL_RULE: + return RuleParser(class_factory_util, element=element, properties=properties) else: raise ValueError(f"Cannot get parser for element <{tag}>") @@ -238,7 +246,7 @@ def parse_sub_elements( | GeneratorParser, ): stmt = parser.parse() - elif isinstance(parser, KeyParser): + elif isinstance(parser, KeyParser | ConstraintsParser | RuleParser): stmt = parser.parse(descriptor_dir=descriptor_dir, parent_stmt=parent_stmt) elif isinstance(parser, ConditionParser): stmt = parser.parse( diff --git a/datamimic_ce/parsers/rule_parser.py b/datamimic_ce/parsers/rule_parser.py index 181ed880..4c4eea5f 100644 --- a/datamimic_ce/parsers/rule_parser.py +++ b/datamimic_ce/parsers/rule_parser.py @@ -3,9 +3,13 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com - +from pathlib import Path from xml.etree.ElementTree import Element +from datamimic_ce.model.rule_model import RuleModel +from datamimic_ce.statements.rule_statement import RuleStatement +from datamimic_ce.statements.statement import Statement + from datamimic_ce.parsers.statement_parser import StatementParser from datamimic_ce.constants.element_constants import EL_RULE @@ -24,3 +28,11 @@ def __init__( properties: dict, ): super().__init__(element, properties, valid_element_tag=EL_RULE, class_factory_util=class_factory_util) + + def parse(self, descriptor_dir: Path, parent_stmt: Statement) -> RuleStatement: + """ + Parse element "xml-attribute" to XmlAttributeStatement + :return: + """ + + return RuleStatement(self.validate_attributes(RuleModel)) diff --git a/datamimic_ce/statements/rule_statement.py b/datamimic_ce/statements/rule_statement.py index b7ac9cf5..ab2b5444 100644 --- a/datamimic_ce/statements/rule_statement.py +++ b/datamimic_ce/statements/rule_statement.py @@ -9,6 +9,7 @@ class RuleStatement(Statement): def __init__(self, model: RuleModel): + super().__init__(None, None) self._if_rule = model.if_rule self._then_rule = model.then_rule @@ -18,4 +19,4 @@ def if_rule(self): @property def then_rule(self): - return self._then_rule \ No newline at end of file + return self._then_rule diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py index 5b0a1966..21785470 100644 --- a/datamimic_ce/tasks/task_util.py +++ b/datamimic_ce/tasks/task_util.py @@ -35,6 +35,7 @@ from datamimic_ce.logger import logger from datamimic_ce.statements.array_statement import ArrayStatement from datamimic_ce.statements.condition_statement import ConditionStatement +from datamimic_ce.statements.constraints_statement import ConstraintsStatement from datamimic_ce.statements.database_statement import DatabaseStatement from datamimic_ce.statements.echo_statement import EchoStatement from datamimic_ce.statements.element_statement import ElementStatement @@ -52,10 +53,12 @@ from datamimic_ce.statements.mongodb_statement import MongoDBStatement from datamimic_ce.statements.nested_key_statement import NestedKeyStatement from datamimic_ce.statements.reference_statement import ReferenceStatement +from datamimic_ce.statements.rule_statement import RuleStatement from datamimic_ce.statements.statement import Statement from datamimic_ce.statements.variable_statement import VariableStatement from datamimic_ce.tasks.array_task import ArrayTask from datamimic_ce.tasks.condition_task import ConditionTask +from datamimic_ce.tasks.constraints_task import ConstraintsTask from datamimic_ce.tasks.database_task import DatabaseTask from datamimic_ce.tasks.echo_task import EchoTask from datamimic_ce.tasks.element_task import ElementTask @@ -75,6 +78,7 @@ from datamimic_ce.tasks.mongodb_task import MongoDBTask from datamimic_ce.tasks.nested_key_task import NestedKeyTask from datamimic_ce.tasks.reference_task import ReferenceTask +from datamimic_ce.tasks.rule_task import RuleTask from datamimic_ce.tasks.task import Task from datamimic_ce.utils.object_util import ObjectUtil @@ -129,6 +133,10 @@ def get_task_by_statement( return ElementTask(ctx, stmt) elif isinstance(stmt, GeneratorStatement): return GeneratorTask(stmt) + elif isinstance(stmt, ConstraintsStatement): + return ConstraintsTask(stmt) + elif isinstance(stmt, RuleStatement): + return RuleTask(stmt) else: raise ValueError(f"Cannot created task for statement {stmt.__class__.__name__}") diff --git a/tests_ce/functional_tests/test_constraints/data/person_data.ent.csv b/tests_ce/functional_tests/test_constraints/data/person_data.ent.csv new file mode 100644 index 00000000..8c6cf3b6 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/data/person_data.ent.csv @@ -0,0 +1,101 @@ +firstname|lastname|age|city|credit_score|risk_profile +Charlie|Brown|61|New York|707|Low +Emma|Jones|48|San Jose|712|High +Jane|Williams|28|Philadelphia|387|High +Emma|Garcia|63|Philadelphia|535|Low +John|Johnson|58|Los Angeles|656|Low +Alice|Johnson|60|San Diego|485|Low +Jane|Smith|31|Los Angeles|631|Low +Grace|Garcia|24|Phoenix|157|Medium +Frank|Garcia|18|Dallas|471|Medium +John|Taylor|29|Philadelphia|985|Medium +Emma|Jones|68|Houston|488|Medium +Bob|Smith|52|Dallas|252|Low +Jane|Brown|67|Dallas|562|Medium +David|Brown|21|Philadelphia|612|High +Jane|Brown|70|Phoenix|918|Medium +Hannah|Davis|26|Philadelphia|767|Medium +Bob|Johnson|38|San Antonio|431|High +John|Johnson|48|Los Angeles|155|Medium +John|Davis|29|San Jose|496|Medium +John|Johnson|29|Los Angeles|586|Medium +Bob|Williams|53|Los Angeles|810|Medium +Jane|Miller|46|Dallas|704|High +Charlie|Garcia|67|Phoenix|487|Medium +Hannah|Jones|57|Phoenix|120|High +Grace|Smith|33|New York|308|Medium +Grace|Miller|34|San Antonio|151|Medium +Emma|Johnson|48|San Jose|420|Medium +Frank|Garcia|67|New York|150|Medium +Alice|Davis|37|Phoenix|268|Medium +Charlie|Brown|56|Houston|157|Medium +Emma|Martinez|65|Dallas|522|Medium +Bob|Williams|54|San Diego|536|Medium +Frank|Brown|57|San Diego|118|Medium +Frank|Garcia|52|Dallas|440|High +Frank|Jones|45|Houston|908|Low +Hannah|Miller|23|San Diego|851|Low +David|Garcia|59|San Antonio|609|Medium +Frank|Davis|35|New York|624|Medium +John|Garcia|36|Houston|597|Medium +Alice|Brown|62|Chicago|469|Medium +Alice|Smith|25|San Jose|441|Medium +Bob|Jones|52|New York|452|Medium +Jane|Garcia|25|Phoenix|869|Low +Bob|Jones|37|Los Angeles|847|Medium +Emma|Jones|48|New York|891|Medium +Bob|Martinez|34|San Jose|824|High +Jane|Davis|31|San Antonio|108|Medium +Grace|Garcia|33|San Diego|467|Medium +Jane|Davis|43|Philadelphia|394|High +Jane|Johnson|67|San Jose|913|High +Hannah|Williams|32|San Jose|252|Low +Alice|Brown|67|Houston|640|Medium +Bob|Davis|51|Philadelphia|941|Medium +Frank|Williams|47|San Jose|504|Medium +Emma|Brown|33|Los Angeles|581|Medium +Grace|Williams|55|Chicago|832|Low +Hannah|Williams|31|New York|235|Low +Emma|Garcia|40|San Jose|655|Low +Alice|Miller|55|Phoenix|389|Low +David|Smith|45|Los Angeles|932|Low +Bob|Johnson|64|San Diego|771|Low +Grace|Brown|63|San Antonio|989|Low +Jane|Jones|63|Chicago|715|Medium +Alice|Miller|65|Dallas|959|Low +John|Smith|47|San Antonio|989|Low +John|Miller|53|Phoenix|580|Low +Bob|Smith|46|New York|822|Medium +Alice|Davis|59|New York|329|Low +Jane|Garcia|40|San Antonio|950|High +Charlie|Johnson|19|San Antonio|990|Medium +Jane|Johnson|38|Dallas|326|Medium +Emma|Smith|30|Houston|988|High +Hannah|Brown|48|San Diego|127|Medium +John|Davis|50|San Jose|498|Medium +Emma|Miller|65|San Jose|548|Medium +Grace|Miller|19|Houston|100|Medium +Bob|Johnson|28|Phoenix|780|Low +Charlie|Smith|65|San Jose|612|Medium +Jane|Brown|37|San Jose|814|Low +Alice|Miller|48|San Diego|691|Medium +John|Garcia|38|Philadelphia|193|High +John|Martinez|30|Houston|294|Low +Emma|Martinez|34|Houston|702|Medium +Charlie|Johnson|70|Philadelphia|308|Medium +Jane|Miller|55|San Diego|706|High +Alice|Taylor|49|Phoenix|367|Medium +John|Jones|32|Chicago|276|Medium +Charlie|Taylor|22|San Diego|529|Low +Frank|Martinez|42|New York|865|Medium +Jane|Garcia|22|Houston|835|High +Emma|Garcia|60|San Diego|427|High +Jane|Smith|67|Philadelphia|724|Medium +Jane|Smith|58|Los Angeles|214|Medium +Alice|Jones|21|Houston|743|Medium +Charlie|Johnson|70|Los Angeles|729|Medium +Grace|Garcia|64|Los Angeles|964|Low +Frank|Brown|46|Dallas|303|Medium +Hannah|Jones|27|Philadelphia|299|Low +John|Smith|26|New York|491|Low +Alice|Williams|41|San Diego|158|Medium diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py new file mode 100644 index 00000000..d8e296ab --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -0,0 +1,22 @@ +# DATAMIMIC +# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. +# This software is licensed under the MIT License. +# See LICENSE file for the full text of the license. +# For questions and support, contact: info@rapiddweller.com + + +from pathlib import Path +from unittest import TestCase + +import pytest + +from datamimic_ce.data_mimic_test import DataMimicTest + + +class TestConstraints(TestCase): + _test_dir = Path(__file__).resolve().parent + + def test_constraints(self): + engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints.xml", capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.xml b/tests_ce/functional_tests/test_constraints/test_constraints.xml new file mode 100644 index 00000000..4343d621 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints.xml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file From 8d128f6caba2cd28e3abdef6618aa2f35d0db70d Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Tue, 4 Mar 2025 19:07:00 +0700 Subject: [PATCH 04/22] try execute rule in task, not good --- datamimic_ce/tasks/constraints_task.py | 24 +- datamimic_ce/tasks/task_util.py | 4 +- .../test_constraints/script/person_data.json | 242 ++++++++++++++++++ .../test_constraints/test_constraints.xml | 4 +- 4 files changed, 266 insertions(+), 8 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/script/person_data.json diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 77af62e5..a0882e07 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -3,6 +3,8 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com +from datamimic_ce.statements.rule_statement import RuleStatement +from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil from datamimic_ce.contexts.geniter_context import GenIterContext from datamimic_ce.tasks.task import Task @@ -11,12 +13,26 @@ class ConstraintsTask(Task): - def __init__(self, statement: ConstraintsStatement): + def __init__(self, statement: ConstraintsStatement, class_factory_util: BaseClassFactoryUtil): self._statement = statement - - def execute(self, parent_context: GenIterContext): - pass + self._class_factory_util = class_factory_util @property def statement(self) -> ConstraintsStatement: return self._statement + + def execute(self, parent_context: GenIterContext): + """ + Get condition from sub-tag Rule. Filter datas from context base on Rule + """ + task_util_cls = self._class_factory_util.get_task_util_cls() + child_stmt = [child_stmt for child_stmt in self.statement.sub_statements] + for child_stmt in self.statement.sub_statements: + if isinstance(child_stmt, RuleStatement): + if_condition = parent_context.evaluate_python_expression(child_stmt.if_rule) + if isinstance(if_condition, bool) and if_condition: + else_condition = parent_context.evaluate_python_expression(child_stmt.then_rule) + if isinstance(else_condition, bool) and else_condition is False: + parent_context.current_product = {} + # out of loop when remove the product + break diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py index 21785470..65011e27 100644 --- a/datamimic_ce/tasks/task_util.py +++ b/datamimic_ce/tasks/task_util.py @@ -134,9 +134,9 @@ def get_task_by_statement( elif isinstance(stmt, GeneratorStatement): return GeneratorTask(stmt) elif isinstance(stmt, ConstraintsStatement): - return ConstraintsTask(stmt) + return ConstraintsTask(stmt, class_factory_util) elif isinstance(stmt, RuleStatement): - return RuleTask(stmt) + return RuleTask(stmt, class_factory_util) else: raise ValueError(f"Cannot created task for statement {stmt.__class__.__name__}") diff --git a/tests_ce/functional_tests/test_constraints/script/person_data.json b/tests_ce/functional_tests/test_constraints/script/person_data.json new file mode 100644 index 00000000..5634e1dd --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/script/person_data.json @@ -0,0 +1,242 @@ +[ + { + "firstname": "Charlie", + "lastname": "Brown", + "age": 61, + "city": "New York", + "credit_score": 707, + "risk_profile": "Low" + }, + { + "firstname": "Emma", + "lastname": "Jones", + "age": 48, + "city": "San Jose", + "credit_score": 712, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Williams", + "age": 28, + "city": "Philadelphia", + "credit_score": 387, + "risk_profile": "High" + }, + { + "firstname": "Emma", + "lastname": "Garcia", + "age": 63, + "city": "Philadelphia", + "credit_score": 535, + "risk_profile": "Low" + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 58, + "city": "Los Angeles", + "credit_score": 656, + "risk_profile": "Low" + }, + { + "firstname": "Alice", + "lastname": "Johnson", + "age": 60, + "city": "San Diego", + "credit_score": 485, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Smith", + "age": 31, + "city": "Los Angeles", + "credit_score": 631, + "risk_profile": "Low" + }, + { + "firstname": "Grace", + "lastname": "Garcia", + "age": 24, + "city": "Phoenix", + "credit_score": 157, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Garcia", + "age": 18, + "city": "Dallas", + "credit_score": 471, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Taylor", + "age": 29, + "city": "Philadelphia", + "credit_score": 985, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Jones", + "age": 68, + "city": "Houston", + "credit_score": 488, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Smith", + "age": 52, + "city": "Dallas", + "credit_score": 252, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 67, + "city": "Dallas", + "credit_score": 562, + "risk_profile": "Medium" + }, + { + "firstname": "David", + "lastname": "Brown", + "age": 21, + "city": "Philadelphia", + "credit_score": 612, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 70, + "city": "Phoenix", + "credit_score": 918, + "risk_profile": "Medium" + }, + { + "firstname": "Hannah", + "lastname": "Davis", + "age": 26, + "city": "Philadelphia", + "credit_score": 767, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Johnson", + "age": 38, + "city": "San Antonio", + "credit_score": 431, + "risk_profile": "High" + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 48, + "city": "Los Angeles", + "credit_score": 155, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Davis", + "age": 29, + "city": "San Jose", + "credit_score": 496, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 29, + "city": "Los Angeles", + "credit_score": 586, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Williams", + "age": 53, + "city": "Los Angeles", + "credit_score": 810, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Miller", + "age": 46, + "city": "Dallas", + "credit_score": 704, + "risk_profile": "High" + }, + { + "firstname": "Charlie", + "lastname": "Garcia", + "age": 67, + "city": "Phoenix", + "credit_score": 487, + "risk_profile": "Medium" + }, + { + "firstname": "Hannah", + "lastname": "Jones", + "age": 57, + "city": "Phoenix", + "credit_score": 120, + "risk_profile": "High" + }, + { + "firstname": "Grace", + "lastname": "Smith", + "age": 33, + "city": "New York", + "credit_score": 308, + "risk_profile": "Medium" + }, + { + "firstname": "Grace", + "lastname": "Miller", + "age": 34, + "city": "San Antonio", + "credit_score": 151, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Johnson", + "age": 48, + "city": "San Jose", + "credit_score": 420, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Garcia", + "age": 67, + "city": "New York", + "credit_score": 150, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Davis", + "age": 37, + "city": "Phoenix", + "credit_score": 268, + "risk_profile": "Medium" + }, + { + "firstname": "Charlie", + "lastname": "Brown", + "age": 56, + "city": "Houston", + "credit_score": 157, + "risk_profile": "Medium" + } +] \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.xml b/tests_ce/functional_tests/test_constraints/test_constraints.xml index 4343d621..7d3c36b2 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.xml +++ b/tests_ce/functional_tests/test_constraints/test_constraints.xml @@ -1,6 +1,6 @@ - + + source="script/person_data.json" target="ConsoleExporter"> From 28665a92308f0d14c55af68914fec2d9d189db90 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Tue, 4 Mar 2025 19:42:39 +0700 Subject: [PATCH 05/22] change place to execute constraints filter --- datamimic_ce/tasks/constraints_task.py | 32 +- datamimic_ce/workers/generate_worker.py | 10 + .../test_constraints/script/person_data.json | 560 ++++++++++++++++++ 3 files changed, 587 insertions(+), 15 deletions(-) diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index a0882e07..9108d126 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -3,6 +3,8 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com +from datamimic_ce.contexts.context import SAFE_GLOBALS + from datamimic_ce.statements.rule_statement import RuleStatement from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil @@ -21,18 +23,18 @@ def __init__(self, statement: ConstraintsStatement, class_factory_util: BaseClas def statement(self) -> ConstraintsStatement: return self._statement - def execute(self, parent_context: GenIterContext): - """ - Get condition from sub-tag Rule. Filter datas from context base on Rule - """ - task_util_cls = self._class_factory_util.get_task_util_cls() - child_stmt = [child_stmt for child_stmt in self.statement.sub_statements] - for child_stmt in self.statement.sub_statements: - if isinstance(child_stmt, RuleStatement): - if_condition = parent_context.evaluate_python_expression(child_stmt.if_rule) - if isinstance(if_condition, bool) and if_condition: - else_condition = parent_context.evaluate_python_expression(child_stmt.then_rule) - if isinstance(else_condition, bool) and else_condition is False: - parent_context.current_product = {} - # out of loop when remove the product - break + def execute(self, parent_context: GenIterContext, source_data): + pass + + def filter(self, source_data): + for i in range(len(source_data) - 1, -1, -1): # Iterate from last to first + data_dict = source_data[i] + for child_stmt in self.statement.sub_statements: + if isinstance(child_stmt, RuleStatement): + if_condition = eval(child_stmt.if_rule, SAFE_GLOBALS, data_dict) + if isinstance(if_condition, bool) and if_condition: + else_condition = eval(child_stmt.then_rule, SAFE_GLOBALS, data_dict) + if isinstance(else_condition, bool) and else_condition is False: + del source_data[i] + break + return source_data diff --git a/datamimic_ce/workers/generate_worker.py b/datamimic_ce/workers/generate_worker.py index 4a4fee2f..f46baaed 100644 --- a/datamimic_ce/workers/generate_worker.py +++ b/datamimic_ce/workers/generate_worker.py @@ -16,7 +16,9 @@ from datamimic_ce.exporters.exporter_state_manager import ExporterStateManager from datamimic_ce.logger import logger, setup_logger from datamimic_ce.statements.generate_statement import GenerateStatement +from datamimic_ce.tasks.constraints_task import ConstraintsTask from datamimic_ce.tasks.generate_task import GenerateTask +from datamimic_ce.tasks.rule_task import RuleTask from datamimic_ce.utils.logging_util import gen_timer @@ -188,6 +190,11 @@ def _generate_product_by_page_in_single_process( ) ) + # filter source_data with constraints-rule task when specify + for task in tasks: + if isinstance(task, ConstraintsTask): + source_data = task.filter(source_data) + # Shuffle source data if distribution is random if is_random_distribution: seed = root_context.get_distribution_seed() @@ -227,6 +234,9 @@ def _generate_product_by_page_in_single_process( # Store temp product in context for later evaluate inner_generate_key = key.split("|", 1)[-1].strip() ctx.current_variables[inner_generate_key] = value + # ConstraintsTask and RuleTask don't need to execute + elif isinstance(task, ConstraintsTask | RuleTask): + pass else: task.execute(ctx) diff --git a/tests_ce/functional_tests/test_constraints/script/person_data.json b/tests_ce/functional_tests/test_constraints/script/person_data.json index 5634e1dd..fdaa292e 100644 --- a/tests_ce/functional_tests/test_constraints/script/person_data.json +++ b/tests_ce/functional_tests/test_constraints/script/person_data.json @@ -238,5 +238,565 @@ "city": "Houston", "credit_score": 157, "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Martinez", + "age": 65, + "city": "Dallas", + "credit_score": 522, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Williams", + "age": 54, + "city": "San Diego", + "credit_score": 536, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Brown", + "age": 57, + "city": "San Diego", + "credit_score": 118, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Garcia", + "age": 52, + "city": "Dallas", + "credit_score": 440, + "risk_profile": "High" + }, + { + "firstname": "Frank", + "lastname": "Jones", + "age": 45, + "city": "Houston", + "credit_score": 908, + "risk_profile": "Low" + }, + { + "firstname": "Hannah", + "lastname": "Miller", + "age": 23, + "city": "San Diego", + "credit_score": 851, + "risk_profile": "Low" + }, + { + "firstname": "David", + "lastname": "Garcia", + "age": 59, + "city": "San Antonio", + "credit_score": 609, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Davis", + "age": 35, + "city": "New York", + "credit_score": 624, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Garcia", + "age": 36, + "city": "Houston", + "credit_score": 597, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Brown", + "age": 62, + "city": "Chicago", + "credit_score": 469, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Smith", + "age": 25, + "city": "San Jose", + "credit_score": 441, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Jones", + "age": 52, + "city": "New York", + "credit_score": 452, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Garcia", + "age": 25, + "city": "Phoenix", + "credit_score": 869, + "risk_profile": "Low" + }, + { + "firstname": "Bob", + "lastname": "Jones", + "age": 37, + "city": "Los Angeles", + "credit_score": 847, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Jones", + "age": 48, + "city": "New York", + "credit_score": 891, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Martinez", + "age": 34, + "city": "San Jose", + "credit_score": 824, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Davis", + "age": 31, + "city": "San Antonio", + "credit_score": 108, + "risk_profile": "Medium" + }, + { + "firstname": "Grace", + "lastname": "Garcia", + "age": 33, + "city": "San Diego", + "credit_score": 467, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Davis", + "age": 43, + "city": "Philadelphia", + "credit_score": 394, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Johnson", + "age": 67, + "city": "San Jose", + "credit_score": 913, + "risk_profile": "High" + }, + { + "firstname": "Hannah", + "lastname": "Williams", + "age": 32, + "city": "San Jose", + "credit_score": 252, + "risk_profile": "Low" + }, + { + "firstname": "Alice", + "lastname": "Brown", + "age": 67, + "city": "Houston", + "credit_score": 640, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Davis", + "age": 51, + "city": "Philadelphia", + "credit_score": 941, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Williams", + "age": 47, + "city": "San Jose", + "credit_score": 504, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Brown", + "age": 33, + "city": "Los Angeles", + "credit_score": 581, + "risk_profile": "Medium" + }, + { + "firstname": "Grace", + "lastname": "Williams", + "age": 55, + "city": "Chicago", + "credit_score": 832, + "risk_profile": "Low" + }, + { + "firstname": "Hannah", + "lastname": "Williams", + "age": 31, + "city": "New York", + "credit_score": 235, + "risk_profile": "Low" + }, + { + "firstname": "Emma", + "lastname": "Garcia", + "age": 40, + "city": "San Jose", + "credit_score": 655, + "risk_profile": "Low" + }, + { + "firstname": "Alice", + "lastname": "Miller", + "age": 55, + "city": "Phoenix", + "credit_score": 389, + "risk_profile": "Low" + }, + { + "firstname": "David", + "lastname": "Smith", + "age": 45, + "city": "Los Angeles", + "credit_score": 932, + "risk_profile": "Low" + }, + { + "firstname": "Bob", + "lastname": "Johnson", + "age": 64, + "city": "San Diego", + "credit_score": 771, + "risk_profile": "Low" + }, + { + "firstname": "Grace", + "lastname": "Brown", + "age": 63, + "city": "San Antonio", + "credit_score": 989, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Jones", + "age": 63, + "city": "Chicago", + "credit_score": 715, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Miller", + "age": 65, + "city": "Dallas", + "credit_score": 959, + "risk_profile": "Low" + }, + { + "firstname": "John", + "lastname": "Smith", + "age": 47, + "city": "San Antonio", + "credit_score": 989, + "risk_profile": "Low" + }, + { + "firstname": "John", + "lastname": "Miller", + "age": 53, + "city": "Phoenix", + "credit_score": 580, + "risk_profile": "Low" + }, + { + "firstname": "Bob", + "lastname": "Smith", + "age": 46, + "city": "New York", + "credit_score": 822, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Davis", + "age": 59, + "city": "New York", + "credit_score": 329, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Garcia", + "age": 40, + "city": "San Antonio", + "credit_score": 950, + "risk_profile": "High" + }, + { + "firstname": "Charlie", + "lastname": "Johnson", + "age": 19, + "city": "San Antonio", + "credit_score": 990, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Johnson", + "age": 38, + "city": "Dallas", + "credit_score": 326, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Smith", + "age": 30, + "city": "Houston", + "credit_score": 988, + "risk_profile": "High" + }, + { + "firstname": "Hannah", + "lastname": "Brown", + "age": 48, + "city": "San Diego", + "credit_score": 127, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Davis", + "age": 50, + "city": "San Jose", + "credit_score": 498, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Miller", + "age": 65, + "city": "San Jose", + "credit_score": 548, + "risk_profile": "Medium" + }, + { + "firstname": "Grace", + "lastname": "Miller", + "age": 19, + "city": "Houston", + "credit_score": 100, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Johnson", + "age": 28, + "city": "Phoenix", + "credit_score": 780, + "risk_profile": "Low" + }, + { + "firstname": "Charlie", + "lastname": "Smith", + "age": 65, + "city": "San Jose", + "credit_score": 612, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 37, + "city": "San Jose", + "credit_score": 814, + "risk_profile": "Low" + }, + { + "firstname": "Alice", + "lastname": "Miller", + "age": 48, + "city": "San Diego", + "credit_score": 691, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Garcia", + "age": 38, + "city": "Philadelphia", + "credit_score": 193, + "risk_profile": "High" + }, + { + "firstname": "John", + "lastname": "Martinez", + "age": 30, + "city": "Houston", + "credit_score": 294, + "risk_profile": "Low" + }, + { + "firstname": "Emma", + "lastname": "Martinez", + "age": 34, + "city": "Houston", + "credit_score": 702, + "risk_profile": "Medium" + }, + { + "firstname": "Charlie", + "lastname": "Johnson", + "age": 70, + "city": "Philadelphia", + "credit_score": 308, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Miller", + "age": 55, + "city": "San Diego", + "credit_score": 706, + "risk_profile": "High" + }, + { + "firstname": "Alice", + "lastname": "Taylor", + "age": 49, + "city": "Phoenix", + "credit_score": 367, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Jones", + "age": 32, + "city": "Chicago", + "credit_score": 276, + "risk_profile": "Medium" + }, + { + "firstname": "Charlie", + "lastname": "Taylor", + "age": 22, + "city": "San Diego", + "credit_score": 529, + "risk_profile": "Low" + }, + { + "firstname": "Frank", + "lastname": "Martinez", + "age": 42, + "city": "New York", + "credit_score": 865, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Garcia", + "age": 22, + "city": "Houston", + "credit_score": 835, + "risk_profile": "High" + }, + { + "firstname": "Emma", + "lastname": "Garcia", + "age": 60, + "city": "San Diego", + "credit_score": 427, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Smith", + "age": 67, + "city": "Philadelphia", + "credit_score": 724, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Smith", + "age": 58, + "city": "Los Angeles", + "credit_score": 214, + "risk_profile": "Medium" + }, + { + "firstname": "Alice", + "lastname": "Jones", + "age": 21, + "city": "Houston", + "credit_score": 743, + "risk_profile": "Medium" + }, + { + "firstname": "Charlie", + "lastname": "Johnson", + "age": 70, + "city": "Los Angeles", + "credit_score": 729, + "risk_profile": "Medium" + }, + { + "firstname": "Grace", + "lastname": "Garcia", + "age": 64, + "city": "Los Angeles", + "credit_score": 964, + "risk_profile": "Low" + }, + { + "firstname": "Frank", + "lastname": "Brown", + "age": 46, + "city": "Dallas", + "credit_score": 303, + "risk_profile": "Medium" + }, + { + "firstname": "Hannah", + "lastname": "Jones", + "age": 27, + "city": "Philadelphia", + "credit_score": 299, + "risk_profile": "Low" + }, + { + "firstname": "John", + "lastname": "Smith", + "age": 26, + "city": "New York", + "credit_score": 491, + "risk_profile": "Low" + }, + { + "firstname": "Alice", + "lastname": "Williams", + "age": 41, + "city": "San Diego", + "credit_score": 158, + "risk_profile": "Medium" } ] \ No newline at end of file From 897aa853b1c2195568f12a0545de4af037d9245d Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Tue, 4 Mar 2025 19:55:07 +0700 Subject: [PATCH 06/22] update test case --- .../test_constraints/test_constraints.py | 10 ++++++++++ .../test_constraints/test_constraints.xml | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index d8e296ab..ff1a3621 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -20,3 +20,13 @@ def test_constraints(self): engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints.xml", capture_test_result=True) engine.test_with_timer() result = engine.capture_result() + + synthetic_customers = result["synthetic_customers"] + assert len(synthetic_customers) == 100 + for ele in synthetic_customers: + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.xml b/tests_ce/functional_tests/test_constraints/test_constraints.xml index 7d3c36b2..54f2464c 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.xml +++ b/tests_ce/functional_tests/test_constraints/test_constraints.xml @@ -1,6 +1,6 @@ - + + source="script/person_data.json" cyclic="True" target="ConsoleExporter"> From e28819a619d86218a82fa66d5dd2e939374fd092 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Wed, 5 Mar 2025 13:36:22 +0700 Subject: [PATCH 07/22] update constraints task, update test case --- datamimic_ce/tasks/constraints_task.py | 35 +++- datamimic_ce/workers/generate_worker.py | 2 +- .../script/person_less_data.json | 162 ++++++++++++++++++ .../test_constraints/test_constraints.py | 56 ++++++ .../test_constraints_non_cyclic.xml | 17 ++ .../test_constraints_order_distribution.xml | 13 ++ .../test_constraints_single_processing.xml | 11 ++ 7 files changed, 290 insertions(+), 6 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/script/person_less_data.json create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_non_cyclic.xml create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_order_distribution.xml create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_single_processing.xml diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 9108d126..35e021b0 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -3,6 +3,11 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com +import copy +import itertools + +from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination + from datamimic_ce.contexts.context import SAFE_GLOBALS from datamimic_ce.statements.rule_statement import RuleStatement @@ -26,15 +31,35 @@ def statement(self) -> ConstraintsStatement: def execute(self, parent_context: GenIterContext, source_data): pass - def filter(self, source_data): - for i in range(len(source_data) - 1, -1, -1): # Iterate from last to first - data_dict = source_data[i] + def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: bool = False) -> list: + filter_data = list(source_data) + # If source is empty, return empty list + if len(filter_data) == 0: + return [] + + for i in range(len(filter_data) - 1, -1, -1): # Iterate from last to first + data_dict = filter_data[i] for child_stmt in self.statement.sub_statements: if isinstance(child_stmt, RuleStatement): if_condition = eval(child_stmt.if_rule, SAFE_GLOBALS, data_dict) if isinstance(if_condition, bool) and if_condition: else_condition = eval(child_stmt.then_rule, SAFE_GLOBALS, data_dict) if isinstance(else_condition, bool) and else_condition is False: - del source_data[i] + del filter_data[i] break - return source_data + # If filtered data is empty, return empty list + if len(filter_data) == 0: + return [] + + if pagination is None: + start_idx = 0 + end_idx = len(filter_data) + else: + start_idx = pagination.skip + end_idx = pagination.skip + pagination.limit + # Get cyclic data from filtered data source + if cyclic: + iterator = itertools.cycle(filter_data) + return [copy.deepcopy(ele) for ele in itertools.islice(iterator, start_idx, end_idx)] + else: + return list(itertools.islice(filter_data, start_idx, end_idx)) diff --git a/datamimic_ce/workers/generate_worker.py b/datamimic_ce/workers/generate_worker.py index f46baaed..fc678e58 100644 --- a/datamimic_ce/workers/generate_worker.py +++ b/datamimic_ce/workers/generate_worker.py @@ -193,7 +193,7 @@ def _generate_product_by_page_in_single_process( # filter source_data with constraints-rule task when specify for task in tasks: if isinstance(task, ConstraintsTask): - source_data = task.filter(source_data) + source_data = task.filter(source_data, pagination, stmt.cyclic) # Shuffle source data if distribution is random if is_random_distribution: diff --git a/tests_ce/functional_tests/test_constraints/script/person_less_data.json b/tests_ce/functional_tests/test_constraints/script/person_less_data.json new file mode 100644 index 00000000..d744d26d --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/script/person_less_data.json @@ -0,0 +1,162 @@ +[ + { + "firstname": "Charlie", + "lastname": "Brown", + "age": 61, + "city": "New York", + "credit_score": 707, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Williams", + "age": 28, + "city": "Philadelphia", + "credit_score": 387, + "risk_profile": "High" + }, + { + "firstname": "Alice", + "lastname": "Johnson", + "age": 60, + "city": "San Diego", + "credit_score": 485, + "risk_profile": "Low" + }, + { + "firstname": "Jane", + "lastname": "Smith", + "age": 31, + "city": "Los Angeles", + "credit_score": 631, + "risk_profile": "Low" + }, + { + "firstname": "Grace", + "lastname": "Garcia", + "age": 24, + "city": "Phoenix", + "credit_score": 157, + "risk_profile": "Medium" + }, + { + "firstname": "Frank", + "lastname": "Garcia", + "age": 18, + "city": "Dallas", + "credit_score": 471, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Taylor", + "age": 29, + "city": "Philadelphia", + "credit_score": 985, + "risk_profile": "Medium" + }, + { + "firstname": "Emma", + "lastname": "Jones", + "age": 68, + "city": "Houston", + "credit_score": 488, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Smith", + "age": 52, + "city": "Dallas", + "credit_score": 252, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 67, + "city": "Dallas", + "credit_score": 600, + "risk_profile": "Medium" + }, + { + "firstname": "David", + "lastname": "Brown", + "age": 21, + "city": "Philadelphia", + "credit_score": 612, + "risk_profile": "High" + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 70, + "city": "Phoenix", + "credit_score": 918, + "risk_profile": "Medium" + }, + { + "firstname": "Hannah", + "lastname": "Davis", + "age": 26, + "city": "Philadelphia", + "credit_score": 767, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Johnson", + "age": 38, + "city": "San Antonio", + "credit_score": 431, + "risk_profile": "Low" + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 48, + "city": "Los Angeles", + "credit_score": 155, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Davis", + "age": 29, + "city": "San Jose", + "credit_score": 496, + "risk_profile": "Medium" + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 29, + "city": "Los Angeles", + "credit_score": 586, + "risk_profile": "Medium" + }, + { + "firstname": "Bob", + "lastname": "Williams", + "age": 53, + "city": "Los Angeles", + "credit_score": 810, + "risk_profile": "Medium" + }, + { + "firstname": "Jane", + "lastname": "Miller", + "age": 46, + "city": "Dallas", + "credit_score": 704, + "risk_profile": "High" + }, + { + "firstname": "Charlie", + "lastname": "Garcia", + "age": 67, + "city": "Phoenix", + "credit_score": 487, + "risk_profile": "Medium" + } +] \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index ff1a3621..430f6e9a 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -30,3 +30,59 @@ def test_constraints(self): assert ele["risk_profile"] == 'Medium' else: assert ele["risk_profile"] == 'Low' + + def test_constraints_non_cyclic(self): + engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_non_cyclic.xml", capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + original_customers = result["original_customers"] + assert len(original_customers) == 100 + assert any(customer["risk_profile"] != 'High' for customer in original_customers + if customer["credit_score"] < 600) + assert any(customer["risk_profile"] != 'Medium' for customer in original_customers + if 600 <= customer["credit_score"] < 750) + assert any(customer["risk_profile"] != 'Low' for customer in original_customers + if customer["credit_score"] >= 750) + + # filtered generate + constraints_customers = result["constraints_customers"] + assert len(constraints_customers) < 100 + + for ele in constraints_customers: + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' + + def test_constraints_order_distribution(self): + engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_order_distribution.xml", capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + synthetic_customers = result["synthetic_customers"] + assert len(synthetic_customers) == 100 + for ele in synthetic_customers: + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' + + def test_constraints_single_processing(self): + engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_single_processing.xml", capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + synthetic_customers = result["synthetic_customers"] + assert len(synthetic_customers) == 100 + for ele in synthetic_customers: + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_non_cyclic.xml b/tests_ce/functional_tests/test_constraints/test_constraints_non_cyclic.xml new file mode 100644 index 00000000..164ec6b7 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_non_cyclic.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_order_distribution.xml b/tests_ce/functional_tests/test_constraints/test_constraints_order_distribution.xml new file mode 100644 index 00000000..ea4f026d --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_order_distribution.xml @@ -0,0 +1,13 @@ + + + + + + + + + + \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_single_processing.xml b/tests_ce/functional_tests/test_constraints/test_constraints_single_processing.xml new file mode 100644 index 00000000..be8a186e --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_single_processing.xml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file From 1964c9f4828c54f8f7320348ad6681dac946c0fb Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Wed, 5 Mar 2025 13:55:49 +0700 Subject: [PATCH 08/22] refactor code --- datamimic_ce/parsers/parser_util.py | 4 ++-- datamimic_ce/tasks/constraints_task.py | 4 +--- datamimic_ce/tasks/rule_task.py | 17 +++-------------- datamimic_ce/tasks/task_util.py | 4 ++-- .../test_constraints/test_constraints.py | 3 --- 5 files changed, 8 insertions(+), 24 deletions(-) diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py index 2fb0419b..52cf1235 100644 --- a/datamimic_ce/parsers/parser_util.py +++ b/datamimic_ce/parsers/parser_util.py @@ -246,9 +246,9 @@ def parse_sub_elements( | GeneratorParser, ): stmt = parser.parse() - elif isinstance(parser, KeyParser | ConstraintsParser | RuleParser): + elif isinstance(parser, KeyParser | RuleParser): stmt = parser.parse(descriptor_dir=descriptor_dir, parent_stmt=parent_stmt) - elif isinstance(parser, ConditionParser): + elif isinstance(parser, ConditionParser | ConstraintsParser): stmt = parser.parse( descriptor_dir=descriptor_dir, parent_stmt=cast(CompositeStatement, parent_stmt) ) diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 35e021b0..95e3ce57 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -11,7 +11,6 @@ from datamimic_ce.contexts.context import SAFE_GLOBALS from datamimic_ce.statements.rule_statement import RuleStatement -from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil from datamimic_ce.contexts.geniter_context import GenIterContext from datamimic_ce.tasks.task import Task @@ -20,9 +19,8 @@ class ConstraintsTask(Task): - def __init__(self, statement: ConstraintsStatement, class_factory_util: BaseClassFactoryUtil): + def __init__(self, statement: ConstraintsStatement): self._statement = statement - self._class_factory_util = class_factory_util @property def statement(self) -> ConstraintsStatement: diff --git a/datamimic_ce/tasks/rule_task.py b/datamimic_ce/tasks/rule_task.py index bc9e2da7..f7cc2e35 100644 --- a/datamimic_ce/tasks/rule_task.py +++ b/datamimic_ce/tasks/rule_task.py @@ -5,31 +5,20 @@ # For questions and support, contact: info@rapiddweller.com from datamimic_ce.contexts.geniter_context import GenIterContext -from datamimic_ce.contexts.setup_context import SetupContext -from datamimic_ce.statements.item_statement import ItemStatement -from datamimic_ce.tasks.element_task import ElementTask +from datamimic_ce.statements.rule_statement import RuleStatement from datamimic_ce.tasks.task import Task -from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil class RuleTask(Task): def __init__( self, - ctx: SetupContext, - statement: ItemStatement, - class_factory_util: BaseClassFactoryUtil, + statement: RuleStatement, ): self._statement = statement - self._class_factory_util = class_factory_util @property - def statement(self) -> ItemStatement: + def statement(self) -> RuleStatement: return self._statement def execute(self, parent_context: GenIterContext): - """ - Change datas base on condition in element "rule" - :param parent_context: - :return: - """ pass diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py index 65011e27..21785470 100644 --- a/datamimic_ce/tasks/task_util.py +++ b/datamimic_ce/tasks/task_util.py @@ -134,9 +134,9 @@ def get_task_by_statement( elif isinstance(stmt, GeneratorStatement): return GeneratorTask(stmt) elif isinstance(stmt, ConstraintsStatement): - return ConstraintsTask(stmt, class_factory_util) + return ConstraintsTask(stmt) elif isinstance(stmt, RuleStatement): - return RuleTask(stmt, class_factory_util) + return RuleTask(stmt) else: raise ValueError(f"Cannot created task for statement {stmt.__class__.__name__}") diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 430f6e9a..18cc2162 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -4,12 +4,9 @@ # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com - from pathlib import Path from unittest import TestCase -import pytest - from datamimic_ce.data_mimic_test import DataMimicTest From 3e7c21c5d7f0f524e79224ce82172b30147c26e1 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Wed, 5 Mar 2025 14:02:46 +0700 Subject: [PATCH 09/22] fix ruff and mypy --- datamimic_ce/model/rule_model.py | 3 +-- datamimic_ce/parsers/constraints_parser.py | 5 ++--- datamimic_ce/parsers/parser_util.py | 4 +++- datamimic_ce/parsers/rule_parser.py | 6 ++---- datamimic_ce/tasks/constraints_task.py | 12 ++++-------- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/datamimic_ce/model/rule_model.py b/datamimic_ce/model/rule_model.py index 0ec2179f..db9aac3b 100644 --- a/datamimic_ce/model/rule_model.py +++ b/datamimic_ce/model/rule_model.py @@ -5,8 +5,7 @@ # For questions and support, contact: info@rapiddweller.com -from datamimic_ce.model.model_util import ModelUtil -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field from datamimic_ce.constants.attribute_constants import ATTR_IF, ATTR_THEN diff --git a/datamimic_ce/parsers/constraints_parser.py b/datamimic_ce/parsers/constraints_parser.py index 07d25ba3..a0b8a2d2 100644 --- a/datamimic_ce/parsers/constraints_parser.py +++ b/datamimic_ce/parsers/constraints_parser.py @@ -8,12 +8,11 @@ from pathlib import Path from xml.etree.ElementTree import Element +from datamimic_ce.constants.element_constants import EL_CONSTRAINTS from datamimic_ce.parsers.statement_parser import StatementParser from datamimic_ce.statements.composite_statement import CompositeStatement -from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil - -from datamimic_ce.constants.element_constants import EL_CONSTRAINTS from datamimic_ce.statements.constraints_statement import ConstraintsStatement +from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil class ConstraintsParser(StatementParser): diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py index 52cf1235..00adea48 100644 --- a/datamimic_ce/parsers/parser_util.py +++ b/datamimic_ce/parsers/parser_util.py @@ -15,6 +15,7 @@ from datamimic_ce.constants.element_constants import ( EL_ARRAY, EL_CONDITION, + EL_CONSTRAINTS, EL_DATABASE, EL_ECHO, EL_ELEMENT, @@ -32,8 +33,9 @@ EL_MONGODB, EL_NESTED_KEY, EL_REFERENCE, + EL_RULE, EL_SETUP, - EL_VARIABLE, EL_CONSTRAINTS, EL_RULE, + EL_VARIABLE, ) from datamimic_ce.logger import logger from datamimic_ce.parsers.array_parser import ArrayParser diff --git a/datamimic_ce/parsers/rule_parser.py b/datamimic_ce/parsers/rule_parser.py index 4c4eea5f..5bbe1b24 100644 --- a/datamimic_ce/parsers/rule_parser.py +++ b/datamimic_ce/parsers/rule_parser.py @@ -6,13 +6,11 @@ from pathlib import Path from xml.etree.ElementTree import Element +from datamimic_ce.constants.element_constants import EL_RULE from datamimic_ce.model.rule_model import RuleModel +from datamimic_ce.parsers.statement_parser import StatementParser from datamimic_ce.statements.rule_statement import RuleStatement from datamimic_ce.statements.statement import Statement - -from datamimic_ce.parsers.statement_parser import StatementParser - -from datamimic_ce.constants.element_constants import EL_RULE from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 95e3ce57..3a8d076b 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -6,16 +6,12 @@ import copy import itertools -from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination - from datamimic_ce.contexts.context import SAFE_GLOBALS - -from datamimic_ce.statements.rule_statement import RuleStatement - from datamimic_ce.contexts.geniter_context import GenIterContext -from datamimic_ce.tasks.task import Task - +from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination from datamimic_ce.statements.constraints_statement import ConstraintsStatement +from datamimic_ce.statements.rule_statement import RuleStatement +from datamimic_ce.tasks.task import Task class ConstraintsTask(Task): @@ -29,7 +25,7 @@ def statement(self) -> ConstraintsStatement: def execute(self, parent_context: GenIterContext, source_data): pass - def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: bool = False) -> list: + def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: bool | None = False) -> list: filter_data = list(source_data) # If source is empty, return empty list if len(filter_data) == 0: From 34173a1730396f62cdd5e965b6758b69eec75fee Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Wed, 5 Mar 2025 15:29:44 +0700 Subject: [PATCH 10/22] edit test --- .../test_constraints/test_constraints.py | 11 ++++++++++- .../test_constraints/test_constraints.xml | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 18cc2162..addbc0c1 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -19,8 +19,10 @@ def test_constraints(self): result = engine.capture_result() synthetic_customers = result["synthetic_customers"] - assert len(synthetic_customers) == 100 + assert len(synthetic_customers) == 10000 for ele in synthetic_customers: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 10001) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: @@ -41,12 +43,15 @@ def test_constraints_non_cyclic(self): if 600 <= customer["credit_score"] < 750) assert any(customer["risk_profile"] != 'Low' for customer in original_customers if customer["credit_score"] >= 750) + assert all(customer["id"] is not None for customer in original_customers) # filtered generate constraints_customers = result["constraints_customers"] assert len(constraints_customers) < 100 for ele in constraints_customers: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 101) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: @@ -62,6 +67,8 @@ def test_constraints_order_distribution(self): synthetic_customers = result["synthetic_customers"] assert len(synthetic_customers) == 100 for ele in synthetic_customers: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 101) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: @@ -77,6 +84,8 @@ def test_constraints_single_processing(self): synthetic_customers = result["synthetic_customers"] assert len(synthetic_customers) == 100 for ele in synthetic_customers: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 101) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.xml b/tests_ce/functional_tests/test_constraints/test_constraints.xml index 54f2464c..d7103c24 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.xml +++ b/tests_ce/functional_tests/test_constraints/test_constraints.xml @@ -1,6 +1,6 @@ - + From 7e0a7cd8385d98531fc831ed1f0829c6295a31d7 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Wed, 5 Mar 2025 17:21:38 +0700 Subject: [PATCH 11/22] add more test --- datamimic_ce/tasks/constraints_task.py | 2 +- .../conf/environment.env.properties | 2 + .../test_constraints/data/bill_data.ent.csv | 101 ++++++++++++++++++ .../data/hospital_data.ent.csv | 11 ++ .../data/patient_data.ent.csv | 51 +++++++++ .../test_constraints/script/hospital.scr.sql | 28 +++++ .../test_constraints/test_constraints.py | 27 ++++- .../test_constraints_with_mem.xml | 24 +++++ 8 files changed, 242 insertions(+), 4 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/conf/environment.env.properties create mode 100644 tests_ce/functional_tests/test_constraints/data/bill_data.ent.csv create mode 100644 tests_ce/functional_tests/test_constraints/data/hospital_data.ent.csv create mode 100644 tests_ce/functional_tests/test_constraints/data/patient_data.ent.csv create mode 100644 tests_ce/functional_tests/test_constraints/script/hospital.scr.sql create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_with_mem.xml diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 3a8d076b..626bd8bc 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -39,7 +39,7 @@ def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: b if isinstance(if_condition, bool) and if_condition: else_condition = eval(child_stmt.then_rule, SAFE_GLOBALS, data_dict) if isinstance(else_condition, bool) and else_condition is False: - del filter_data[i] + del filter_data[i] # remove data that not meet then_rule break # If filtered data is empty, return empty list if len(filter_data) == 0: diff --git a/tests_ce/functional_tests/test_constraints/conf/environment.env.properties b/tests_ce/functional_tests/test_constraints/conf/environment.env.properties new file mode 100644 index 00000000..abb65a5b --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/conf/environment.env.properties @@ -0,0 +1,2 @@ +source.db.database=test +source.db.dbms=sqlite \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/data/bill_data.ent.csv b/tests_ce/functional_tests/test_constraints/data/bill_data.ent.csv new file mode 100644 index 00000000..57cca53e --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/data/bill_data.ent.csv @@ -0,0 +1,101 @@ +bill_id|patient_id|amount|date|payment_method +1|1|102|2016-05-09|Cash +2|2|18|2018-03-14|Credit Card +3|3|60|1999-09-24|Cash +4|4|19|2015-07-08|Cash +5|5|25.86|1999-05-04|Credit Card +6|6|24|2021-04-10|Credit Card +7|7|15.32|2015-09-24|Insurance +8|8|48.1|2016-01-03|Credit Card +9|9|2.38|2015-02-11|Credit Card +10|10|1570|2020-06-11|Insurance +11|11|20.97|1991-11-21|Cash +12|12|142.59|1934-09-19|Insurance +13|13|10.8|1998-01-11|Credit Card +14|14|19.98|1998-08-23|Cash +15|15|2.21|1970-01-01|Credit Card +16|16|140|2014-12-01|Insurance +17|17|172.2|1998-08-04|Insurance +18|18|23.6|2018-05-30|Credit Card +19|19|18.67|2022-03-02|Cash +20|20|21.51|2018-02-05|Credit Card +21|21|6.67|2014-10-21|Insurance +22|22|23.42|1998-05-15|Insurance +23|23|3.7|1997-09-03|Cash +24|24|17|2016-11-27|Credit Card +25|25|386.5|2020-08-13|Insurance +26|26|48.37|1999-12-21|Insurance +27|27|10.65|1999-04-17|Credit Card +28|28|12|1993-05-18|Cash +29|29|14.98|2018-01-01|Credit Card +30|30|481|2015-11-12|Insurance +31|31|128|1950-07-25|Credit Card +32|32|636.9|2019-01-02|Insurance +33|33|11.58|1909-09-03|Cash +34|34|39.8|1996-04-23|Cash +35|35|13.04|1998-09-11|Cash +36|36|120|2017-11-26|Credit Card +37|37|284.99|2022-10-21|Insurance +38|38|2.76|2019-10-15|Insurance +39|39|415.94|1996-07-27|Credit Card +40|40|97|2021-08-01|Insurance +41|41|15.3|1994-11-06|Insurance +42|42|396.9|2017-04-20|Credit Card +43|43|14.32|2019-01-20|Cash +44|44|29.32|2019-12-14|Cash +45|45|1.47|2017-02-28|Insurance +46|46|29.35|2022-07-21|Cash +47|47|33.65|1990-04-11|Insurance +48|48|9.89|1991-12-19|Insurance +49|49|146|2019-01-03|Cash +50|50|63.8|2016-07-15|Cash +51|1|29.93|2020-02-05|Cash +52|2|27.3|2015-09-12|Insurance +53|3|32|2021-05-11|Cash +54|4|4.2|1997-06-01|Insurance +55|5|918.21|1994-09-08|Cash +56|6|251|1997-03-21|Insurance +57|7|16.01|1999-01-13|Cash +58|8|141.58|1999-02-02|Insurance +59|9|12.3|1998-10-21|Credit Card +60|10|11|2020-10-04|Cash +61|11|19.32|2021-10-12|Credit Card +62|12|171.04|1999-04-01|Credit Card +63|13|430.5|2016-04-11|Cash +64|14|535.5|1999-07-20|Cash +65|15|55|1999-04-25|Insurance +66|16|179|2019-06-05|Insurance +67|17|18|2019-01-01|Cash +68|18|131.56|1999-10-05|Cash +69|19|147|1999-10-26|Insurance +70|20|130|2020-07-15|Insurance +71|21|104|1998-11-10|Insurance +72|22|119.4|2018-08-12|Credit Card +73|23|109|2020-09-20|Credit Card +74|24|195.08|1999-12-16|Insurance +75|25|5.4|2016-04-26|Cash +76|26|158|1910-12-20|Insurance +77|27|19.55|1997-06-21|Insurance +78|28|15.61|2018-06-23|Insurance +79|29|169|2015-12-26|Insurance +80|30|62.1|2015-04-15|Insurance +81|31|72|1996-12-16|Insurance +82|32|4|1998-11-21|Insurance +83|33|10|2015-11-29|Cash +84|34|135|1900-01-01|Insurance +85|35|103.58|2010-10-22|Insurance +86|36|4|1995-06-09|Credit Card +87|37|459|2006-12-05|Cash +88|38|25|2017-09-22|Credit Card +89|39|52.1|1999-05-05|Cash +90|40|16.07|1999-07-05|Cash +91|41|7.01|2017-04-27|Insurance +92|42|262|1998-03-06|Insurance +93|43|12|2018-03-05|Insurance +94|44|58|2014-01-02|Cash +95|45|40.95|1991-09-14|Insurance +96|46|117.12|1999-05-01|Credit Card +97|47|183.1|2020-03-17|Credit Card +98|48|56.85|2021-08-03|Insurance +99|49|15|1996-04-26|Insurance +100|50|5.5|2020-04-10|Insurance \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/data/hospital_data.ent.csv b/tests_ce/functional_tests/test_constraints/data/hospital_data.ent.csv new file mode 100644 index 00000000..2c44c714 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/data/hospital_data.ent.csv @@ -0,0 +1,11 @@ +hospital_id|hospital_name|location|number_of_beds +1|St. Kitts-Nevis|St. Kitts-Nevis|15 +2|Apollo Spectra Hospital|Bangalore|270 +3|MISSION HOSPITALS|SAPAKORANA|25 +4|TATA MEMORIAL HOSPITAL|MUMBAI|1050 +5|McLeod Regional Medical Center|Florence|236 +6|RAHUL HOSPITAL AND MEDICAL CENTRE|Dhule|50 +7|Seagate Hospital|Marshall Islands|20 +8|Apollo Cradle and Children's Hospital|Chennai|338 +9|MGM Medical College Hospital|Varanasi|500 +10|Holy Family Hospital|Telangana|400 \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/data/patient_data.ent.csv b/tests_ce/functional_tests/test_constraints/data/patient_data.ent.csv new file mode 100644 index 00000000..e3b50801 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/data/patient_data.ent.csv @@ -0,0 +1,51 @@ +patient_id|patient_name|age|gender|hospital_id|diagnosis +1|MEHMET Ali|39|Male|1|COPD +2|Abdullah Al Sagheer|31|Male|2|Pneumonia +3|JOHN DOE|38|Male|3|Acute Appendicitis +4|Nicholas Riley|60|Male|4|Heart murmur +5|Mrs. Vijaya Manju|56|Female|5|Chronic Obstructive Respiratory Disease with Acute Upper Respiratory Infection +6|Fabio|49|Male|6|Gallbladder disease +7|KRISHNAKUMAR MOORTHY|51|Male|7|Hypertension with 02+ Oedema +8|Peter|20|Male|8|Musculoskeletal system and connective tissue diseases +9|MARY JANE|56|Female|9|Pneumonia +10|SUSHMITA|27|Female|10|cholelithiasis +11|Veer Pratap Singh|19|Male|1|JIHAD +12|Sadam Ali|23|Male|2|Hypothyroidism +13|John|52|Male|3|Metastatic Carcinoma of Left Lung +14|Julianna|63|Female|4|Chronic obstructive pulmonary disease +15|JASON|36|Male|5|Hypertension +16|Dennis|26|Male|6|Heart Diseases +17|Chris|23|Male|7|Anemia +18|MRS. S. S.|45|Female|8|Other diseases of the respiratory system +19|Hemant Kumar|27|Male|9|Congestive Heart Failure +20|WILLIAM JAMES|52|Male|10|Chronic kidney disease +21|Michael|32|Male|1|Dementia +22|Giancarlo|75|Male|2|Chronic renal failure and hypertension +23|SAMUEL|23|Male|3|Pneumonia +24|James|38|Male|4|gastroenteritis +25|Alexander|33|Male|5|Pneumonia +26|Olivia|26|Female|6|Pneumonia +27|Entertainment|23|Male|7|Flying +28|Karthik|37|Male|8|Myocardial infarction +29|HARSHADINI TRIPATHI|39|Female|9|Acute limb ischemia +30|Keerthy|35|Female|10|Chronic kidney disease +31|KANISHK|27|Male|1|Pneumonia +32|Arnold Bernard|31|Male|2|Acute Myocardial Infarction +33|Jagdish SINGH|59|Male|3|Hypertensive chronic kidney disease +34|Sidra|32|Female|4|Headache +35|FARIS|27|Male|5|Myocardial infarction +36|James|36|Male|6|Fever +37|Ashok Kumar|42|Male|7|Coronary Atherosclerosis +38|Sara|52|Female|8|Pyelonephritis +39|SALLIE|45|Male|9|Acute renal failure +40|Nadia Alice|31|Female|10|Gastrointestinal Disorders +41|Khalifa|39|Male|1|Diabetes +42|JANET|33|Female|2|Influenza +43|Thomas|61|Male|3|Dementia +44|Benjamin|55|Male|4|Diabetes +45|ASHOK BABU|51|Male|5|Major Depressive Disorder +46|James|47|Male|6|Breast Cancer +47|Christopher|18|Male|7|Cryptic +48|GORDON|67|Male|8|DIABETES MELLITUS +49|Kayleigh|36|Male|9|Asthma +50|James|42|Male|10|Urinary Tract Infection \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/script/hospital.scr.sql b/tests_ce/functional_tests/test_constraints/script/hospital.scr.sql new file mode 100644 index 00000000..d9a9e0c9 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/script/hospital.scr.sql @@ -0,0 +1,28 @@ +-- Drop exists tables +DROP TABLE IF EXISTS hospital; +DROP TABLE IF EXISTS patient; +DROP TABLE IF EXISTS bill; + +CREATE TABLE hospital ( + hospital_id INTEGER PRIMARY KEY, + hospital_name TEXT NOT NULL, + location TEXT NOT NULL, + number_of_beds INTEGER NOT NULL +); + +CREATE TABLE patient ( + patient_id INTEGER PRIMARY KEY, + patient_name TEXT NOT NULL, + age INTEGER NOT NULL, + gender TEXT NOT NULL, + hospital_id INTEGER NOT NULL, + diagnosis TEXT NOT NULL +); + +CREATE TABLE bill ( + bill_id INTEGER PRIMARY KEY, + patient_id INTEGER NOT NULL, + amount REAL NOT NULL, + date TEXT NOT NULL, + payment_method TEXT NOT NULL +); \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index addbc0c1..23716d3d 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -31,7 +31,9 @@ def test_constraints(self): assert ele["risk_profile"] == 'Low' def test_constraints_non_cyclic(self): - engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_non_cyclic.xml", capture_test_result=True) + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_non_cyclic.xml", + capture_test_result=True) engine.test_with_timer() result = engine.capture_result() @@ -60,7 +62,9 @@ def test_constraints_non_cyclic(self): assert ele["risk_profile"] == 'Low' def test_constraints_order_distribution(self): - engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_order_distribution.xml", capture_test_result=True) + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_order_distribution.xml", + capture_test_result=True) engine.test_with_timer() result = engine.capture_result() @@ -77,7 +81,9 @@ def test_constraints_order_distribution(self): assert ele["risk_profile"] == 'Low' def test_constraints_single_processing(self): - engine = DataMimicTest(test_dir=self._test_dir, filename="test_constraints_single_processing.xml", capture_test_result=True) + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_single_processing.xml", + capture_test_result=True) engine.test_with_timer() result = engine.capture_result() @@ -92,3 +98,18 @@ def test_constraints_single_processing(self): assert ele["risk_profile"] == 'Medium' else: assert ele["risk_profile"] == 'Low' + + def test_constraints_with_mem(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_with_mem.xml", + capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + generate_selector = result["generate_selector"] + assert len(generate_selector) == 50 + assert any(patient["total_bill_amount"] < 150 for patient in generate_selector) + + constraint_patient = result["constraint_patient"] + assert len(constraint_patient) == 25 + assert all(patient["total_bill_amount"] > 150 for patient in constraint_patient) diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_with_mem.xml b/tests_ce/functional_tests/test_constraints/test_constraints_with_mem.xml new file mode 100644 index 00000000..7b0e80a2 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_with_mem.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 4d3df5bf358266b947f10757ce449b3f6a098c9a Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 10:30:12 +0700 Subject: [PATCH 12/22] check only one constraints per generate --- datamimic_ce/parsers/generate_parser.py | 16 ++++++++++++++++ .../test_constraints/test_constraints.py | 8 ++++++++ .../test_constraints/test_two_constraints.xml | 15 +++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 tests_ce/functional_tests/test_constraints/test_two_constraints.xml diff --git a/datamimic_ce/parsers/generate_parser.py b/datamimic_ce/parsers/generate_parser.py index 4a0db1be..80ccc3c5 100644 --- a/datamimic_ce/parsers/generate_parser.py +++ b/datamimic_ce/parsers/generate_parser.py @@ -10,6 +10,7 @@ from datamimic_ce.constants.element_constants import EL_GENERATE from datamimic_ce.model.generate_model import GenerateModel from datamimic_ce.parsers.statement_parser import StatementParser +from datamimic_ce.statements.constraints_statement import ConstraintsStatement from datamimic_ce.statements.generate_statement import GenerateStatement from datamimic_ce.statements.statement import Statement from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil @@ -50,5 +51,20 @@ def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool = self._properties, gen_stmt, ) + + self._check_only_one_constraints_tag(sub_stmt_list) + gen_stmt.sub_statements = sub_stmt_list return gen_stmt + + @staticmethod + def _check_only_one_constraints_tag(sub_stmt_list: list[Statement]): + """ + Only one 'constraints' tag per generate + """ + count = 0 + for stmt in sub_stmt_list: + if isinstance(stmt, ConstraintsStatement): + count += 1 + if count > 1: + raise SyntaxError("Only once allow in per ") diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 23716d3d..3bc4bb5c 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -113,3 +113,11 @@ def test_constraints_with_mem(self): constraint_patient = result["constraint_patient"] assert len(constraint_patient) == 25 assert all(patient["total_bill_amount"] > 150 for patient in constraint_patient) + + def test_two_constraints(self): + engine = DataMimicTest(test_dir=self._test_dir, filename="test_two_constraints.xml", capture_test_result=True) + try: + engine.test_with_timer() + assert False + except SyntaxError as e: + assert "Only once allow in per " in e.msg \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_two_constraints.xml b/tests_ce/functional_tests/test_constraints/test_two_constraints.xml new file mode 100644 index 00000000..c6caa5d2 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_two_constraints.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + \ No newline at end of file From 59c22c7f280f57b9acfe29bff7f663abd7bd78e2 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 14:13:06 +0700 Subject: [PATCH 13/22] fix rule not work with nested data --- datamimic_ce/tasks/constraints_task.py | 9 +- .../script/person_nested_data.json | 202 ++++++++++++++++++ .../test_constraints/test_constraints.py | 23 +- .../test_constraints_nested_data.xml | 16 ++ 4 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/script/person_nested_data.json create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_nested_data.xml diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 626bd8bc..656f7318 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -6,7 +6,7 @@ import copy import itertools -from datamimic_ce.contexts.context import SAFE_GLOBALS +from datamimic_ce.contexts.context import SAFE_GLOBALS, DotableDict from datamimic_ce.contexts.geniter_context import GenIterContext from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination from datamimic_ce.statements.constraints_statement import ConstraintsStatement @@ -32,7 +32,12 @@ def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: b return [] for i in range(len(filter_data) - 1, -1, -1): # Iterate from last to first - data_dict = filter_data[i] + data_dict = copy.deepcopy(filter_data[i]) + + for key, value in data_dict.items(): + if isinstance(value, dict): + data_dict[key] = DotableDict(value) + for child_stmt in self.statement.sub_statements: if isinstance(child_stmt, RuleStatement): if_condition = eval(child_stmt.if_rule, SAFE_GLOBALS, data_dict) diff --git a/tests_ce/functional_tests/test_constraints/script/person_nested_data.json b/tests_ce/functional_tests/test_constraints/script/person_nested_data.json new file mode 100644 index 00000000..eb43539f --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/script/person_nested_data.json @@ -0,0 +1,202 @@ +[ + { + "firstname": "Charlie", + "lastname": "Brown", + "age": 61, + "city": "New York", + "financial_info": { + "credit_score": 707, + "risk_profile": "Low" + } + }, + { + "firstname": "Jane", + "lastname": "Williams", + "age": 28, + "city": "Philadelphia", + "financial_info": { + "credit_score": 387, + "risk_profile": "High" + } + }, + { + "firstname": "Alice", + "lastname": "Johnson", + "age": 60, + "city": "San Diego", + "financial_info": { + "credit_score": 485, + "risk_profile": "Low" + } + }, + { + "firstname": "Jane", + "lastname": "Smith", + "age": 31, + "city": "Los Angeles", + "financial_info": { + "credit_score": 631, + "risk_profile": "Low" + } + }, + { + "firstname": "Grace", + "lastname": "Garcia", + "age": 24, + "city": "Phoenix", + "financial_info": { + "credit_score": 157, + "risk_profile": "Medium" + } + }, + { + "firstname": "Frank", + "lastname": "Garcia", + "age": 18, + "city": "Dallas", + "financial_info": { + "credit_score": 471, + "risk_profile": "Medium" + } + }, + { + "firstname": "John", + "lastname": "Taylor", + "age": 29, + "city": "Philadelphia", + "financial_info": { + "credit_score": 985, + "risk_profile": "Medium" + } + }, + { + "firstname": "Emma", + "lastname": "Jones", + "age": 68, + "city": "Houston", + "financial_info": { + "credit_score": 488, + "risk_profile": "Medium" + } + }, + { + "firstname": "Bob", + "lastname": "Smith", + "age": 52, + "city": "Dallas", + "financial_info": { + "credit_score": 252, + "risk_profile": "High" + } + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 67, + "city": "Dallas", + "financial_info": { + "credit_score": 600, + "risk_profile": "Medium" + } + }, + { + "firstname": "David", + "lastname": "Brown", + "age": 21, + "city": "Philadelphia", + "financial_info": { + "credit_score": 612, + "risk_profile": "High" + } + }, + { + "firstname": "Jane", + "lastname": "Brown", + "age": 70, + "city": "Phoenix", + "financial_info": { + "credit_score": 918, + "risk_profile": "Medium" + } + }, + { + "firstname": "Hannah", + "lastname": "Davis", + "age": 26, + "city": "Philadelphia", + "financial_info": { + "credit_score": 767, + "risk_profile": "Medium" + } + }, + { + "firstname": "Bob", + "lastname": "Johnson", + "age": 38, + "city": "San Antonio", + "financial_info": { + "credit_score": 431, + "risk_profile": "Low" + } + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 48, + "city": "Los Angeles", + "financial_info": { + "credit_score": 155, + "risk_profile": "Medium" + } + }, + { + "firstname": "John", + "lastname": "Davis", + "age": 29, + "city": "San Jose", + "financial_info": { + "credit_score": 496, + "risk_profile": "Medium" + } + }, + { + "firstname": "John", + "lastname": "Johnson", + "age": 29, + "city": "Los Angeles", + "financial_info": { + "credit_score": 586, + "risk_profile": "Medium" + } + }, + { + "firstname": "Bob", + "lastname": "Williams", + "age": 53, + "city": "Los Angeles", + "financial_info": { + "credit_score": 810, + "risk_profile": "Medium" + } + }, + { + "firstname": "Jane", + "lastname": "Miller", + "age": 46, + "city": "Dallas", + "financial_info": { + "credit_score": 704, + "risk_profile": "High" + } + }, + { + "firstname": "Charlie", + "lastname": "Garcia", + "age": 67, + "city": "Phoenix", + "financial_info": { + "credit_score": 487, + "risk_profile": "Medium" + } + } +] diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 3bc4bb5c..f364c92f 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -120,4 +120,25 @@ def test_two_constraints(self): engine.test_with_timer() assert False except SyntaxError as e: - assert "Only once allow in per " in e.msg \ No newline at end of file + assert "Only once allow in per " in e.msg + + def test_constraints_nested_data(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_nested_data.xml", + capture_test_result=True) + engine.test_with_timer() + + result = engine.capture_result() + + synthetic_customers = result["synthetic_customers"] + assert len(synthetic_customers) == 10000 + for customer in synthetic_customers: + assert isinstance(customer["id"], int) + assert customer["id"] in range(1, 10001) + ele = customer["financial_info"] + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_nested_data.xml b/tests_ce/functional_tests/test_constraints/test_constraints_nested_data.xml new file mode 100644 index 00000000..422f2967 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_nested_data.xml @@ -0,0 +1,16 @@ + + + + + + + + + + \ No newline at end of file From ad2e043da9aecc9be884d715cdcfc35382134d22 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 17:05:09 +0700 Subject: [PATCH 14/22] change Constraints filter method to execute for codebase consistency --- datamimic_ce/tasks/constraints_task.py | 5 +---- datamimic_ce/workers/generate_worker.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 656f7318..f4ce0672 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -22,10 +22,7 @@ def __init__(self, statement: ConstraintsStatement): def statement(self) -> ConstraintsStatement: return self._statement - def execute(self, parent_context: GenIterContext, source_data): - pass - - def filter(self, source_data, pagination: DataSourcePagination | None, cyclic: bool | None = False) -> list: + def execute(self, source_data, pagination: DataSourcePagination | None, cyclic: bool | None = False) -> list: filter_data = list(source_data) # If source is empty, return empty list if len(filter_data) == 0: diff --git a/datamimic_ce/workers/generate_worker.py b/datamimic_ce/workers/generate_worker.py index fc678e58..fa8ad864 100644 --- a/datamimic_ce/workers/generate_worker.py +++ b/datamimic_ce/workers/generate_worker.py @@ -190,10 +190,10 @@ def _generate_product_by_page_in_single_process( ) ) - # filter source_data with constraints-rule task when specify + # execute ConstraintsTask to filter source_data with its rules for task in tasks: if isinstance(task, ConstraintsTask): - source_data = task.filter(source_data, pagination, stmt.cyclic) + source_data = task.execute(source_data, pagination, stmt.cyclic) # Shuffle source data if distribution is random if is_random_distribution: @@ -234,7 +234,7 @@ def _generate_product_by_page_in_single_process( # Store temp product in context for later evaluate inner_generate_key = key.split("|", 1)[-1].strip() ctx.current_variables[inner_generate_key] = value - # ConstraintsTask and RuleTask don't need to execute + # Do not execute ConstraintsTask and RuleTask elif isinstance(task, ConstraintsTask | RuleTask): pass else: From f3de1422fa8cca7549702e66c4f9f91cf99d30ef Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 17:08:28 +0700 Subject: [PATCH 15/22] fix ruff --- datamimic_ce/tasks/constraints_task.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index f4ce0672..7088406f 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -7,7 +7,6 @@ import itertools from datamimic_ce.contexts.context import SAFE_GLOBALS, DotableDict -from datamimic_ce.contexts.geniter_context import GenIterContext from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination from datamimic_ce.statements.constraints_statement import ConstraintsStatement from datamimic_ce.statements.rule_statement import RuleStatement From 35fa8cb566175615d3e6dc0111a076156c16d57d Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 17:21:07 +0700 Subject: [PATCH 16/22] uncomment in RuleModel, add test --- datamimic_ce/model/rule_model.py | 13 ++++++------- .../test_constraints/test_constraints.py | 18 ++++++++++++++++++ .../test_constraints_if_rule_empty.xml | 9 +++++++++ .../test_constraints_then_rule_empty.xml | 9 +++++++++ 4 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_if_rule_empty.xml create mode 100644 tests_ce/functional_tests/test_constraints/test_constraints_then_rule_empty.xml diff --git a/datamimic_ce/model/rule_model.py b/datamimic_ce/model/rule_model.py index db9aac3b..b68c7e84 100644 --- a/datamimic_ce/model/rule_model.py +++ b/datamimic_ce/model/rule_model.py @@ -3,9 +3,8 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com - - -from pydantic import BaseModel, Field +from datamimic_ce.model.model_util import ModelUtil +from pydantic import BaseModel, Field, field_validator from datamimic_ce.constants.attribute_constants import ATTR_IF, ATTR_THEN @@ -14,7 +13,7 @@ class RuleModel(BaseModel): if_rule: str = Field(alias=ATTR_IF) then_rule: str = Field(alias=ATTR_THEN) - # @field_validator("if_rule", "then_rule") - # @classmethod - # def validate_name(cls, value): - # return ModelUtil.check_not_empty(value=value) + @field_validator("if_rule", "then_rule") + @classmethod + def validate_name(cls, value): + return ModelUtil.check_not_empty(value=value) diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index f364c92f..84cb0e4b 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -142,3 +142,21 @@ def test_constraints_nested_data(self): assert ele["risk_profile"] == 'Medium' else: assert ele["risk_profile"] == 'Low' + + def test_constraints_if_rule_empty(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_if_rule_empty.xml") + try: + engine.test_with_timer() + assert False + except ValueError as e: + assert "if: Value error, must be not empty" in str(e) + + def test_constraints_then_rule_empty(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_constraints_then_rule_empty.xml") + try: + engine.test_with_timer() + assert False + except ValueError as e: + assert "then: Value error, must be not empty" in str(e) diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_if_rule_empty.xml b/tests_ce/functional_tests/test_constraints/test_constraints_if_rule_empty.xml new file mode 100644 index 00000000..56693570 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_if_rule_empty.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints_then_rule_empty.xml b/tests_ce/functional_tests/test_constraints/test_constraints_then_rule_empty.xml new file mode 100644 index 00000000..8f17a7db --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_constraints_then_rule_empty.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file From 530f2d1015f4cb61831cb18cfe1370cb0420064e Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 6 Mar 2025 17:27:13 +0700 Subject: [PATCH 17/22] fix ruff --- datamimic_ce/model/rule_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datamimic_ce/model/rule_model.py b/datamimic_ce/model/rule_model.py index b68c7e84..44f636d8 100644 --- a/datamimic_ce/model/rule_model.py +++ b/datamimic_ce/model/rule_model.py @@ -3,10 +3,10 @@ # This software is licensed under the MIT License. # See LICENSE file for the full text of the license. # For questions and support, contact: info@rapiddweller.com -from datamimic_ce.model.model_util import ModelUtil from pydantic import BaseModel, Field, field_validator from datamimic_ce.constants.attribute_constants import ATTR_IF, ATTR_THEN +from datamimic_ce.model.model_util import ModelUtil class RuleModel(BaseModel): From cdf149ea1ad362a359edaebea922243dd0d2f637 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Thu, 20 Mar 2025 15:42:42 +0700 Subject: [PATCH 18/22] add cascade test --- .../test_cascaded_constraints.xml | 13 +++++++++++++ .../test_constraints/test_constraints.py | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests_ce/functional_tests/test_constraints/test_cascaded_constraints.xml diff --git a/tests_ce/functional_tests/test_constraints/test_cascaded_constraints.xml b/tests_ce/functional_tests/test_constraints/test_cascaded_constraints.xml new file mode 100644 index 00000000..2d489bd9 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_cascaded_constraints.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 84cb0e4b..4f887fb6 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -160,3 +160,22 @@ def test_constraints_then_rule_empty(self): assert False except ValueError as e: assert "then: Value error, must be not empty" in str(e) + + def test_cascaded_constraints(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_cascaded_constraints.xml", + capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + synthetic_customers = result["synthetic_customers"] + assert len(synthetic_customers) == 10000 + for ele in synthetic_customers: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 10001) + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' \ No newline at end of file From 3770f3de31e0fa1a0868997e226ed2f62d550158 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Fri, 21 Mar 2025 09:28:21 +0700 Subject: [PATCH 19/22] extend constraint to nestedKey subtag --- datamimic_ce/parsers/parser_util.py | 1 + datamimic_ce/tasks/constraints_task.py | 2 +- datamimic_ce/tasks/nested_key_task.py | 11 +++++ .../test_constraints/test_constraints.py | 41 ++++++++++++++++++- .../test_nestedkey_constraints.xml | 24 +++++++++++ 5 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py index 00adea48..7318a422 100644 --- a/datamimic_ce/parsers/parser_util.py +++ b/datamimic_ce/parsers/parser_util.py @@ -113,6 +113,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None: EL_ELEMENT, EL_ARRAY, EL_CONDITION, + EL_CONSTRAINTS, }, EL_CONDITION: {EL_IF, EL_ELSE_IF, EL_ELSE}, EL_GENERATE: { diff --git a/datamimic_ce/tasks/constraints_task.py b/datamimic_ce/tasks/constraints_task.py index 7088406f..84ad8a38 100644 --- a/datamimic_ce/tasks/constraints_task.py +++ b/datamimic_ce/tasks/constraints_task.py @@ -21,7 +21,7 @@ def __init__(self, statement: ConstraintsStatement): def statement(self) -> ConstraintsStatement: return self._statement - def execute(self, source_data, pagination: DataSourcePagination | None, cyclic: bool | None = False) -> list: + def execute(self, source_data, pagination: DataSourcePagination | None = None, cyclic: bool | None = False) -> list: filter_data = list(source_data) # If source is empty, return empty list if len(filter_data) == 0: diff --git a/datamimic_ce/tasks/nested_key_task.py b/datamimic_ce/tasks/nested_key_task.py index 4be05c1c..7ed5e76a 100644 --- a/datamimic_ce/tasks/nested_key_task.py +++ b/datamimic_ce/tasks/nested_key_task.py @@ -15,6 +15,7 @@ from datamimic_ce.data_sources.data_source_registry import DataSourceRegistry from datamimic_ce.logger import logger from datamimic_ce.statements.nested_key_statement import NestedKeyStatement +from datamimic_ce.tasks.constraints_task import ConstraintsTask from datamimic_ce.tasks.element_task import ElementTask from datamimic_ce.tasks.task import Task from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil @@ -158,6 +159,13 @@ def _execute_iterate(self, parent_context: GenIterContext) -> None: else: raise ValueError(f"Cannot load original data for '{self._statement.name}'") + # TODO: execute ConstraintsTask here to filter source data + for sub_task in self._sub_tasks: + if isinstance(sub_task, ConstraintsTask): + nestedkey_len = self._determine_nestedkey_length(context=parent_context) + temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None + result = sub_task.execute(result, pagination=temp_pagination, cyclic=self.statement.cyclic) + # Post convert value after executing sub-tasks if isinstance(result, list): result = list(map(lambda ele: self._post_convert(ele), result)) @@ -187,6 +195,9 @@ def _try_execute_sub_tasks(self, ctx: GenIterContext) -> dict: try: if isinstance(sub_task, ElementTask): attributes.update(sub_task.generate_xml_attribute(ctx)) + elif isinstance(sub_task, ConstraintsTask): + # do not execute ConstraintsTask here, ConstraintsTask is for filter source data + pass else: sub_task.execute(ctx) except StopIteration: diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 4f887fb6..5f33d725 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -49,7 +49,7 @@ def test_constraints_non_cyclic(self): # filtered generate constraints_customers = result["constraints_customers"] - assert len(constraints_customers) < 100 + assert len(constraints_customers) == 29 for ele in constraints_customers: assert isinstance(ele["id"], int) @@ -178,4 +178,41 @@ def test_cascaded_constraints(self): elif 600 <= ele["credit_score"] < 750: assert ele["risk_profile"] == 'Medium' else: - assert ele["risk_profile"] == 'Low' \ No newline at end of file + assert ele["risk_profile"] == 'Low' + + def test_nestedkey_constraints(self): + engine = DataMimicTest(test_dir=self._test_dir, + filename="test_nestedkey_constraints.xml", + capture_test_result=True) + engine.test_with_timer() + result = engine.capture_result() + + container = result["container"] + assert container + assert len(container) == 1 + + cyclic_true = container[0]["cyclic_true"] + assert cyclic_true + assert len(cyclic_true) == 1000 + for ele in cyclic_true: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 1001) + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' + + cyclic_false = container[0]["cyclic_false"] + assert cyclic_false + assert len(cyclic_false) == 29 + for ele in cyclic_false: + assert isinstance(ele["id"], int) + assert ele["id"] in range(1, 1001) + if ele["credit_score"] < 600: + assert ele["risk_profile"] == 'High' + elif 600 <= ele["credit_score"] < 750: + assert ele["risk_profile"] == 'Medium' + else: + assert ele["risk_profile"] == 'Low' diff --git a/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml b/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml new file mode 100644 index 00000000..d69f9a15 --- /dev/null +++ b/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 0c3caf1a708e310f642f79fdd9b069a4139ea10a Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Fri, 21 Mar 2025 10:23:55 +0700 Subject: [PATCH 20/22] fix mypy --- datamimic_ce/tasks/nested_key_task.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/datamimic_ce/tasks/nested_key_task.py b/datamimic_ce/tasks/nested_key_task.py index 7ed5e76a..11cc9368 100644 --- a/datamimic_ce/tasks/nested_key_task.py +++ b/datamimic_ce/tasks/nested_key_task.py @@ -159,12 +159,13 @@ def _execute_iterate(self, parent_context: GenIterContext) -> None: else: raise ValueError(f"Cannot load original data for '{self._statement.name}'") - # TODO: execute ConstraintsTask here to filter source data - for sub_task in self._sub_tasks: - if isinstance(sub_task, ConstraintsTask): - nestedkey_len = self._determine_nestedkey_length(context=parent_context) - temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None - result = sub_task.execute(result, pagination=temp_pagination, cyclic=self.statement.cyclic) + # execute ConstraintsTask here to filter source data + if self._sub_tasks: + for sub_task in self._sub_tasks: + if isinstance(sub_task, ConstraintsTask): + nestedkey_len = self._determine_nestedkey_length(context=parent_context) + temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None + result = sub_task.execute(result, pagination=temp_pagination, cyclic=self.statement.cyclic) # Post convert value after executing sub-tasks if isinstance(result, list): From 5686d90165f9821bef288008c01a56c2fbdb4f97 Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Fri, 21 Mar 2025 11:06:48 +0700 Subject: [PATCH 21/22] refactor code --- datamimic_ce/tasks/nested_key_task.py | 24 ++++++++++++------- .../test_constraints/test_constraints.py | 4 ++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/datamimic_ce/tasks/nested_key_task.py b/datamimic_ce/tasks/nested_key_task.py index 11cc9368..0fe4305d 100644 --- a/datamimic_ce/tasks/nested_key_task.py +++ b/datamimic_ce/tasks/nested_key_task.py @@ -159,14 +159,6 @@ def _execute_iterate(self, parent_context: GenIterContext) -> None: else: raise ValueError(f"Cannot load original data for '{self._statement.name}'") - # execute ConstraintsTask here to filter source data - if self._sub_tasks: - for sub_task in self._sub_tasks: - if isinstance(sub_task, ConstraintsTask): - nestedkey_len = self._determine_nestedkey_length(context=parent_context) - temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None - result = sub_task.execute(result, pagination=temp_pagination, cyclic=self.statement.cyclic) - # Post convert value after executing sub-tasks if isinstance(result, list): result = list(map(lambda ele: self._post_convert(ele), result)) @@ -328,7 +320,8 @@ def _modify_nestedkey_data_list(self, parent_context: GenIterContext, value: lis :return: """ result = [] - + # filter source data by constraints + value = self._filter_source_by_constraints_task(parent_context=parent_context, source_data=value) # Determine len of nestedkey count = self._determine_nestedkey_length(context=parent_context) value_len = len(value) @@ -392,3 +385,16 @@ def _post_convert(self, value): for converter in self._converter_list: value = converter.convert(value) return value + + def _filter_source_by_constraints_task(self, parent_context: GenIterContext, source_data: list) -> list: + """ + Execute ConstraintsTask to filter source data + """ + result = {} + if self._sub_tasks: + for sub_task in self._sub_tasks: + if isinstance(sub_task, ConstraintsTask): + nestedkey_len = self._determine_nestedkey_length(context=parent_context) + temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None + result = sub_task.execute(source_data, pagination=temp_pagination, cyclic=self.statement.cyclic) + return result diff --git a/tests_ce/functional_tests/test_constraints/test_constraints.py b/tests_ce/functional_tests/test_constraints/test_constraints.py index 5f33d725..84e6eac0 100644 --- a/tests_ce/functional_tests/test_constraints/test_constraints.py +++ b/tests_ce/functional_tests/test_constraints/test_constraints.py @@ -53,7 +53,7 @@ def test_constraints_non_cyclic(self): for ele in constraints_customers: assert isinstance(ele["id"], int) - assert ele["id"] in range(1, 101) + assert ele["id"] in range(1, 30) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: @@ -209,7 +209,7 @@ def test_nestedkey_constraints(self): assert len(cyclic_false) == 29 for ele in cyclic_false: assert isinstance(ele["id"], int) - assert ele["id"] in range(1, 1001) + assert ele["id"] in range(1, 30) if ele["credit_score"] < 600: assert ele["risk_profile"] == 'High' elif 600 <= ele["credit_score"] < 750: From f76d0f8eba0e3e177cbc0468d2294ffa06b3be8d Mon Sep 17 00:00:00 2001 From: Tung Lieu Date: Fri, 21 Mar 2025 11:19:11 +0700 Subject: [PATCH 22/22] fix false test --- datamimic_ce/tasks/nested_key_task.py | 3 ++- .../test_constraints/test_nestedkey_constraints.xml | 9 +++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/datamimic_ce/tasks/nested_key_task.py b/datamimic_ce/tasks/nested_key_task.py index 0fe4305d..1a082970 100644 --- a/datamimic_ce/tasks/nested_key_task.py +++ b/datamimic_ce/tasks/nested_key_task.py @@ -390,11 +390,12 @@ def _filter_source_by_constraints_task(self, parent_context: GenIterContext, sou """ Execute ConstraintsTask to filter source data """ - result = {} + result = source_data if self._sub_tasks: for sub_task in self._sub_tasks: if isinstance(sub_task, ConstraintsTask): nestedkey_len = self._determine_nestedkey_length(context=parent_context) temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None result = sub_task.execute(source_data, pagination=temp_pagination, cyclic=self.statement.cyclic) + break return result diff --git a/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml b/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml index d69f9a15..d21a6015 100644 --- a/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml +++ b/tests_ce/functional_tests/test_constraints/test_nestedkey_constraints.xml @@ -1,18 +1,15 @@ - - + - +