Skip to content

Commit

Permalink
remove type ignore mypy check
Browse files Browse the repository at this point in the history
  • Loading branch information
tunglxfast committed Dec 16, 2024
1 parent 05346d4 commit 5784ac2
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 22 deletions.
2 changes: 1 addition & 1 deletion datamimic_ce/clients/database_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def get_by_page_with_type(self, table_name: str, pagination: DataSourcePaginatio

@abstractmethod
def get_cyclic_data(
self, query: str, data_len: int, pagination: DataSourcePagination, cyclic: bool | None = False
self, query: str, data_len: int, pagination: DataSourcePagination | None, cyclic: bool | None = False
) -> list:
"""
Get cyclic data from database
Expand Down
2 changes: 1 addition & 1 deletion datamimic_ce/parsers/generate_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
class_factory_util=class_factory_util,
)

def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool = False) -> GenerateStatement: # type: ignore[override]
def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool = False) -> GenerateStatement: # TODO: mypy issue [override]
"""
Parse element "generate" into GenerateStatement
:return:
Expand Down
2 changes: 1 addition & 1 deletion datamimic_ce/tasks/key_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def statement(self) -> KeyStatement:
else:
raise TypeError("Expected an KeyStatement")

def execute(self, ctx: GenIterContext): # type: ignore
def execute(self, ctx: GenIterContext): # TODO: mypy issue [override]
"""
Generate data for element "attribute"
If 'type' element is not specified, then default type of generated data is string
Expand Down
28 changes: 16 additions & 12 deletions datamimic_ce/tasks/nested_key_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def __init__(
def statement(self) -> NestedKeyStatement:
return self._statement

def execute(self, parent_context: GenIterContext): # type: ignore
def execute(self, parent_context: GenIterContext): # TODO: mypy issue [override]

"""
Generate data for element "nestedKey"
:param parent_context:
Expand Down Expand Up @@ -196,14 +197,15 @@ def _try_execute_sub_tasks(self, ctx: GenIterContext) -> dict:
ctx.current_product = self._post_convert(ctx.current_product)
return {**ctx.current_product, **attributes}

def _evaluate_value_from_script(self, parent_context: GenIterContext) -> dict:
def _evaluate_value_from_script(self, parent_context: GenIterContext) -> list | dict:
"""
Evaluate data using script
:param parent_context:
:return:
"""
value = parent_context.evaluate_python_expression(self._statement.script)
result: dict | list
if isinstance(value, list):
result = self._modify_nestedkey_data_list(parent_context, value)
elif isinstance(value, dict):
Expand All @@ -215,7 +217,7 @@ def _evaluate_value_from_script(self, parent_context: GenIterContext) -> dict:
)
return result

def _load_data_from_source(self, parent_context: GenIterContext) -> list:
def _load_data_from_source(self, parent_context: GenIterContext) -> list | dict:
"""
Load data from source
Expand All @@ -224,6 +226,7 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list:
"""
source_str = self._statement.source
nestedkey_type = self._statement.type
result: dict | list

# Evaluate scripted source string
source = (
Expand All @@ -235,28 +238,28 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list:
# Read data from source
if source.endswith("csv"):
separator = self._statement.separator or parent_context.root.default_separator
value = FileUtil.read_csv_to_dict_list(file_path=self._descriptor_dir / source, separator=separator)
list_value = FileUtil.read_csv_to_dict_list(file_path=self._descriptor_dir / source, separator=separator)
elif source.endswith("json"):
value = FileUtil.read_json_to_dict_list(self._descriptor_dir / source)
list_value = FileUtil.read_json_to_dict_list(self._descriptor_dir / source)
else:
raise ValueError(f"Invalid source '{source}' of nestedkey '{self._statement.name}'")

result = self._modify_nestedkey_data_list(parent_context, value)
result = self._modify_nestedkey_data_list(parent_context, list_value)

elif nestedkey_type == DATA_TYPE_DICT:
if source.endswith("json"):
value = FileUtil.read_json_to_dict(self._descriptor_dir / source)
result = self._modify_nestedkey_data_dict(parent_context, value)
dict_value = FileUtil.read_json_to_dict(self._descriptor_dir / source)
result = self._modify_nestedkey_data_dict(parent_context, dict_value)
else:
raise ValueError(f"Source of nestedkey having type as 'dict' does not support format {source}")

# handle memstore source
elif parent_context.root.memstore_manager.contain(source_str):
value = parent_context.root.memstore_manager.get_memstore(source_str).get_data_by_type(
list_value = parent_context.root.memstore_manager.get_memstore(source_str).get_data_by_type(
self._statement.type, None, self._statement.cyclic
)

result = self._modify_nestedkey_data_list(parent_context, value)
result = self._modify_nestedkey_data_list(parent_context, list_value)
else:
raise ValueError(
f"Cannot load data from source '{self._statement.source}' of <nestedKey> '{self._statement.name}'"
Expand All @@ -274,12 +277,13 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list:
# Determine variable prefix and suffix
setup_ctx = parent_context.parent
while not isinstance(setup_ctx, SetupContext):
setup_ctx = setup_ctx.parent
setup_ctx = setup_ctx.parent # TODO: mypy issue [attr-defined]
variable_prefix = self.statement.variable_prefix or setup_ctx.default_variable_prefix
variable_suffix = self.statement.variable_suffix or setup_ctx.default_variable_suffix

# Evaluate source_script
result = TaskUtil.evaluate_file_script_template(parent_context, result, variable_prefix, variable_suffix)
result = TaskUtil.evaluate_file_script_template(
parent_context, result, variable_prefix, variable_suffix)

return result

Expand Down
16 changes: 10 additions & 6 deletions datamimic_ce/tasks/task_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,15 +376,19 @@ def gen_task_load_data_from_source(
source_data = client.get_by_page_with_query(original_query=selector, pagination=load_pagination)
else:
source_data = client.get_by_page_with_type(
table_name=stmt.type or stmt.name, # type: ignore
table_name=stmt.type or stmt.name, # TODO: mypy issue [return-value]
pagination=load_pagination,
)
else:
raise ValueError(f"Cannot load data from client: {type(client).__name__}")
else:
raise ValueError(f"cannot find data source {source_str} for iterate task")

return source_data, build_from_source # type: ignore
if isinstance(source_data, list):
return_source_data = source_data
else:
return_source_data = [source_data]
return return_source_data, build_from_source

# @staticmethod
# def consume_minio_after_page_processing(stmt, context: Context) -> None:
Expand Down Expand Up @@ -439,13 +443,13 @@ def consume_product_by_page(
# Create exporters cache in root context if it doesn't exist
if not hasattr(root_context, "_task_exporters"):
# Using task_id to namespace the cache
root_context._task_exporters = {} # type: ignore # skip mypy check
root_context._task_exporters = {} # TODO: mypy issue [attr-defined]

# Create a unique cache key incorporating task_id and statement details
cache_key = f"{root_context.task_id}_{stmt.name}_{stmt.storage_id}_{stmt}"

# Get or create exporters
if cache_key not in root_context._task_exporters: # type: ignore # skip mypy check
if cache_key not in root_context._task_exporters: # TODO: mypy issue [attr-defined]
# Create the consumer set once
consumer_set = stmt.targets.copy()
# consumer_set.add(EXPORTER_PREVIEW) deactivating preview exporter for multi-process
Expand All @@ -463,14 +467,14 @@ def consume_product_by_page(
)

# Cache the exporters
root_context._task_exporters[cache_key] = { # type: ignore # skip mypy check
root_context._task_exporters[cache_key] = { # TODO: mypy issue [attr-defined]
"with_operation": consumers_with_operation,
"without_operation": consumers_without_operation,
"page_count": 0, # Track number of pages processed
}

# Get cached exporters
exporters = root_context._task_exporters[cache_key] # type: ignore # skip mypy check
exporters = root_context._task_exporters[cache_key] # TODO: mypy issue [attr-defined]
exporters["page_count"] += 1

# Use cached exporters
Expand Down
2 changes: 1 addition & 1 deletion datamimic_ce/tasks/variable_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def execute(self, ctx: Context | GenIterContext | SetupContext) -> None:
# Default variable prefix and suffix
setup_ctx = ctx
while not isinstance(setup_ctx, SetupContext):
setup_ctx = setup_ctx.parent # type: ignore # skip mypy check
setup_ctx = setup_ctx.parent # TODO: mypy issue [attr-defined]
variable_prefix = self.statement.variable_prefix or setup_ctx.default_variable_prefix
variable_suffix = self.statement.variable_suffix or setup_ctx.default_variable_suffix
# Evaluate source script
Expand Down

0 comments on commit 5784ac2

Please sign in to comment.