Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/dat 474 constraints #67

Merged
merged 23 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datamimic_ce/constants/attribute_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,5 @@
ATTR_STRING: Final = "string"
ATTR_BUCKET: Final = "bucket"
ATTR_MP_PLATFORM: Final = "mpPlatform"
ATTR_IF: Final = "if"
ATTR_THEN: Final = "then"
2 changes: 2 additions & 0 deletions datamimic_ce/constants/element_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@
EL_CONDITION = "condition"
EL_ELSE_IF = "else-if"
EL_ELSE = "else"
EL_CONSTRAINTS = "constraints"
EL_RULE = "rule"
11 changes: 11 additions & 0 deletions datamimic_ce/model/constraints_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]

from pydantic import BaseModel


class ConstraintsModel(BaseModel):
pass
19 changes: 19 additions & 0 deletions datamimic_ce/model/rule_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]
from pydantic import BaseModel, Field, field_validator

from datamimic_ce.constants.attribute_constants import ATTR_IF, ATTR_THEN
from datamimic_ce.model.model_util import ModelUtil


class RuleModel(BaseModel):
if_rule: str = Field(alias=ATTR_IF)
then_rule: str = Field(alias=ATTR_THEN)

@field_validator("if_rule", "then_rule")
@classmethod
def validate_name(cls, value):
return ModelUtil.check_not_empty(value=value)
46 changes: 46 additions & 0 deletions datamimic_ce/parsers/constraints_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]


from pathlib import Path
from xml.etree.ElementTree import Element

from datamimic_ce.constants.element_constants import EL_CONSTRAINTS
from datamimic_ce.parsers.statement_parser import StatementParser
from datamimic_ce.statements.composite_statement import CompositeStatement
from datamimic_ce.statements.constraints_statement import ConstraintsStatement
from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil


class ConstraintsParser(StatementParser):
def __init__(
self,
class_factory_util: BaseClassFactoryUtil,
element: Element,
properties: dict,
):
super().__init__(
element,
properties,
valid_element_tag=EL_CONSTRAINTS,
class_factory_util=class_factory_util,
)

def parse(self, descriptor_dir: Path, parent_stmt: CompositeStatement) -> ConstraintsStatement:
"""
Parse element "constraints" into ConstraintsStatement
:return:
"""
constraints_stmt = ConstraintsStatement(parent_stmt)
sub_stmt_list = self._class_factory_util.get_parser_util_cls()().parse_sub_elements(
class_factory_util=self._class_factory_util,
descriptor_dir=descriptor_dir,
element=self._element,
properties=self._properties,
parent_stmt=constraints_stmt,
)
constraints_stmt.sub_statements = sub_stmt_list
return constraints_stmt
16 changes: 16 additions & 0 deletions datamimic_ce/parsers/generate_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from datamimic_ce.constants.element_constants import EL_GENERATE
from datamimic_ce.model.generate_model import GenerateModel
from datamimic_ce.parsers.statement_parser import StatementParser
from datamimic_ce.statements.constraints_statement import ConstraintsStatement
from datamimic_ce.statements.generate_statement import GenerateStatement
from datamimic_ce.statements.statement import Statement
from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil
Expand Down Expand Up @@ -50,5 +51,20 @@ def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool =
self._properties,
gen_stmt,
)

self._check_only_one_constraints_tag(sub_stmt_list)

gen_stmt.sub_statements = sub_stmt_list
return gen_stmt

@staticmethod
def _check_only_one_constraints_tag(sub_stmt_list: list[Statement]):
"""
Only one 'constraints' tag per generate
"""
count = 0
for stmt in sub_stmt_list:
if isinstance(stmt, ConstraintsStatement):
count += 1
if count > 1:
raise SyntaxError("Only once <constraints> allow in per <generate>")
15 changes: 13 additions & 2 deletions datamimic_ce/parsers/parser_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datamimic_ce.constants.element_constants import (
EL_ARRAY,
EL_CONDITION,
EL_CONSTRAINTS,
EL_DATABASE,
EL_ECHO,
EL_ELEMENT,
Expand All @@ -32,12 +33,14 @@
EL_MONGODB,
EL_NESTED_KEY,
EL_REFERENCE,
EL_RULE,
EL_SETUP,
EL_VARIABLE,
)
from datamimic_ce.logger import logger
from datamimic_ce.parsers.array_parser import ArrayParser
from datamimic_ce.parsers.condition_parser import ConditionParser
from datamimic_ce.parsers.constraints_parser import ConstraintsParser
from datamimic_ce.parsers.database_parser import DatabaseParser
from datamimic_ce.parsers.echo_parser import EchoParser
from datamimic_ce.parsers.element_parser import ElementParser
Expand All @@ -54,6 +57,7 @@
from datamimic_ce.parsers.memstore_parser import MemstoreParser
from datamimic_ce.parsers.nested_key_parser import NestedKeyParser
from datamimic_ce.parsers.reference_parser import ReferenceParser
from datamimic_ce.parsers.rule_parser import RuleParser
from datamimic_ce.parsers.variable_parser import VariableParser
from datamimic_ce.statements.array_statement import ArrayStatement
from datamimic_ce.statements.composite_statement import CompositeStatement
Expand Down Expand Up @@ -109,6 +113,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None:
EL_ELEMENT,
EL_ARRAY,
EL_CONDITION,
EL_CONSTRAINTS,
},
EL_CONDITION: {EL_IF, EL_ELSE_IF, EL_ELSE},
EL_GENERATE: {
Expand All @@ -122,6 +127,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None:
EL_ECHO,
EL_CONDITION,
EL_INCLUDE,
EL_CONSTRAINTS,
},
EL_INCLUDE: {EL_SETUP},
EL_ITEM: {EL_KEY, EL_NESTED_KEY, EL_LIST, EL_ARRAY, EL_ELEMENT},
Expand All @@ -130,6 +136,7 @@ def get_valid_sub_elements_set_by_tag(ele_tag: str) -> set | None:
EL_IF: None,
EL_ELSE_IF: None,
EL_ELSE: None,
EL_CONSTRAINTS: {EL_RULE},
}

return valid_sub_element_dict.get(ele_tag, set())
Expand Down Expand Up @@ -189,6 +196,10 @@ def get_parser_by_element(class_factory_util: BaseClassFactoryUtil, element: Ele
return ElementParser(class_factory_util, element=element, properties=properties)
elif tag == EL_GENERATOR:
return GeneratorParser(class_factory_util, element=element, properties=properties)
elif tag == EL_CONSTRAINTS:
return ConstraintsParser(class_factory_util, element=element, properties=properties)
elif tag == EL_RULE:
return RuleParser(class_factory_util, element=element, properties=properties)
else:
raise ValueError(f"Cannot get parser for element <{tag}>")

Expand Down Expand Up @@ -238,9 +249,9 @@ def parse_sub_elements(
| GeneratorParser,
):
stmt = parser.parse()
elif isinstance(parser, KeyParser):
elif isinstance(parser, KeyParser | RuleParser):
stmt = parser.parse(descriptor_dir=descriptor_dir, parent_stmt=parent_stmt)
elif isinstance(parser, ConditionParser):
elif isinstance(parser, ConditionParser | ConstraintsParser):
stmt = parser.parse(
descriptor_dir=descriptor_dir, parent_stmt=cast(CompositeStatement, parent_stmt)
)
Expand Down
36 changes: 36 additions & 0 deletions datamimic_ce/parsers/rule_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]
from pathlib import Path
from xml.etree.ElementTree import Element

from datamimic_ce.constants.element_constants import EL_RULE
from datamimic_ce.model.rule_model import RuleModel
from datamimic_ce.parsers.statement_parser import StatementParser
from datamimic_ce.statements.rule_statement import RuleStatement
from datamimic_ce.statements.statement import Statement
from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil


class RuleParser(StatementParser):
"""
Parse element "rule" to RuleStatement
"""

def __init__(
self,
class_factory_util: BaseClassFactoryUtil,
element: Element,
properties: dict,
):
super().__init__(element, properties, valid_element_tag=EL_RULE, class_factory_util=class_factory_util)

def parse(self, descriptor_dir: Path, parent_stmt: Statement) -> RuleStatement:
"""
Parse element "xml-attribute" to XmlAttributeStatement
:return:
"""

return RuleStatement(self.validate_attributes(RuleModel))
11 changes: 11 additions & 0 deletions datamimic_ce/statements/constraints_statement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]
from datamimic_ce.statements.composite_statement import CompositeStatement


class ConstraintsStatement(CompositeStatement):
def __init__(self, parent_stmt: CompositeStatement):
super().__init__(None, parent_stmt=parent_stmt)
22 changes: 22 additions & 0 deletions datamimic_ce/statements/rule_statement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]
from datamimic_ce.model.rule_model import RuleModel
from datamimic_ce.statements.statement import Statement


class RuleStatement(Statement):
def __init__(self, model: RuleModel):
super().__init__(None, None)
self._if_rule = model.if_rule
self._then_rule = model.then_rule

@property
def if_rule(self):
return self._if_rule

@property
def then_rule(self):
return self._then_rule
60 changes: 60 additions & 0 deletions datamimic_ce/tasks/constraints_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# DATAMIMIC
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
# This software is licensed under the MIT License.
# See LICENSE file for the full text of the license.
# For questions and support, contact: [email protected]
import copy
import itertools

from datamimic_ce.contexts.context import SAFE_GLOBALS, DotableDict
from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination
from datamimic_ce.statements.constraints_statement import ConstraintsStatement
from datamimic_ce.statements.rule_statement import RuleStatement
from datamimic_ce.tasks.task import Task


class ConstraintsTask(Task):
def __init__(self, statement: ConstraintsStatement):
self._statement = statement

@property
def statement(self) -> ConstraintsStatement:
return self._statement

def execute(self, source_data, pagination: DataSourcePagination | None = None, cyclic: bool | None = False) -> list:
filter_data = list(source_data)
# If source is empty, return empty list
if len(filter_data) == 0:
return []

for i in range(len(filter_data) - 1, -1, -1): # Iterate from last to first
data_dict = copy.deepcopy(filter_data[i])

for key, value in data_dict.items():
if isinstance(value, dict):
data_dict[key] = DotableDict(value)

for child_stmt in self.statement.sub_statements:
if isinstance(child_stmt, RuleStatement):
if_condition = eval(child_stmt.if_rule, SAFE_GLOBALS, data_dict)
if isinstance(if_condition, bool) and if_condition:
else_condition = eval(child_stmt.then_rule, SAFE_GLOBALS, data_dict)
if isinstance(else_condition, bool) and else_condition is False:
del filter_data[i] # remove data that not meet then_rule
break
# If filtered data is empty, return empty list
if len(filter_data) == 0:
return []

if pagination is None:
start_idx = 0
end_idx = len(filter_data)
else:
start_idx = pagination.skip
end_idx = pagination.skip + pagination.limit
# Get cyclic data from filtered data source
if cyclic:
iterator = itertools.cycle(filter_data)
return [copy.deepcopy(ele) for ele in itertools.islice(iterator, start_idx, end_idx)]
else:
return list(itertools.islice(filter_data, start_idx, end_idx))
21 changes: 20 additions & 1 deletion datamimic_ce/tasks/nested_key_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datamimic_ce.data_sources.data_source_registry import DataSourceRegistry
from datamimic_ce.logger import logger
from datamimic_ce.statements.nested_key_statement import NestedKeyStatement
from datamimic_ce.tasks.constraints_task import ConstraintsTask
from datamimic_ce.tasks.element_task import ElementTask
from datamimic_ce.tasks.task import Task
from datamimic_ce.utils.base_class_factory_util import BaseClassFactoryUtil
Expand Down Expand Up @@ -187,6 +188,9 @@ def _try_execute_sub_tasks(self, ctx: GenIterContext) -> dict:
try:
if isinstance(sub_task, ElementTask):
attributes.update(sub_task.generate_xml_attribute(ctx))
elif isinstance(sub_task, ConstraintsTask):
# do not execute ConstraintsTask here, ConstraintsTask is for filter source data
pass
else:
sub_task.execute(ctx)
except StopIteration:
Expand Down Expand Up @@ -316,7 +320,8 @@ def _modify_nestedkey_data_list(self, parent_context: GenIterContext, value: lis
:return:
"""
result = []

# filter source data by constraints
value = self._filter_source_by_constraints_task(parent_context=parent_context, source_data=value)
# Determine len of nestedkey
count = self._determine_nestedkey_length(context=parent_context)
value_len = len(value)
Expand Down Expand Up @@ -380,3 +385,17 @@ def _post_convert(self, value):
for converter in self._converter_list:
value = converter.convert(value)
return value

def _filter_source_by_constraints_task(self, parent_context: GenIterContext, source_data: list) -> list:
"""
Execute ConstraintsTask to filter source data
"""
result = source_data
if self._sub_tasks:
for sub_task in self._sub_tasks:
if isinstance(sub_task, ConstraintsTask):
nestedkey_len = self._determine_nestedkey_length(context=parent_context)
temp_pagination = DataSourcePagination(skip=0, limit=nestedkey_len) if nestedkey_len else None
result = sub_task.execute(source_data, pagination=temp_pagination, cyclic=self.statement.cyclic)
break
return result
Loading