diff --git a/datamimic_ce/clients/database_client.py b/datamimic_ce/clients/database_client.py index 96b26e9..8c17b98 100644 --- a/datamimic_ce/clients/database_client.py +++ b/datamimic_ce/clients/database_client.py @@ -32,7 +32,7 @@ def get_by_page_with_type(self, table_name: str, pagination: DataSourcePaginatio @abstractmethod def get_cyclic_data( - self, query: str, data_len: int, pagination: DataSourcePagination, cyclic: bool | None = False + self, query: str, data_len: int, pagination: DataSourcePagination | None, cyclic: bool | None = False ) -> list: """ Get cyclic data from database diff --git a/datamimic_ce/parsers/generate_parser.py b/datamimic_ce/parsers/generate_parser.py index 5ab6a9f..6a52d56 100644 --- a/datamimic_ce/parsers/generate_parser.py +++ b/datamimic_ce/parsers/generate_parser.py @@ -33,7 +33,7 @@ def __init__( class_factory_util=class_factory_util, ) - def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool = False) -> GenerateStatement: # type: ignore[override] + def parse(self, descriptor_dir: Path, parent_stmt: Statement, lazy_parse: bool = False) -> GenerateStatement: # TODO: mypy issue [override] """ Parse element "generate" into GenerateStatement :return: diff --git a/datamimic_ce/tasks/key_task.py b/datamimic_ce/tasks/key_task.py index 1cdacc4..434c492 100644 --- a/datamimic_ce/tasks/key_task.py +++ b/datamimic_ce/tasks/key_task.py @@ -56,7 +56,7 @@ def statement(self) -> KeyStatement: else: raise TypeError("Expected an KeyStatement") - def execute(self, ctx: GenIterContext): # type: ignore + def execute(self, ctx: GenIterContext): # TODO: mypy issue [override] """ Generate data for element "attribute" If 'type' element is not specified, then default type of generated data is string diff --git a/datamimic_ce/tasks/nested_key_task.py b/datamimic_ce/tasks/nested_key_task.py index cb0a027..a166408 100644 --- a/datamimic_ce/tasks/nested_key_task.py +++ b/datamimic_ce/tasks/nested_key_task.py @@ -39,7 +39,8 @@ def __init__( def statement(self) -> NestedKeyStatement: return self._statement - def execute(self, parent_context: GenIterContext): # type: ignore + def execute(self, parent_context: GenIterContext): # TODO: mypy issue [override] + """ Generate data for element "nestedKey" :param parent_context: @@ -196,7 +197,7 @@ def _try_execute_sub_tasks(self, ctx: GenIterContext) -> dict: ctx.current_product = self._post_convert(ctx.current_product) return {**ctx.current_product, **attributes} - def _evaluate_value_from_script(self, parent_context: GenIterContext) -> dict: + def _evaluate_value_from_script(self, parent_context: GenIterContext) -> list | dict: """ Evaluate data using script @@ -204,6 +205,7 @@ def _evaluate_value_from_script(self, parent_context: GenIterContext) -> dict: :return: """ value = parent_context.evaluate_python_expression(self._statement.script) + result: dict | list if isinstance(value, list): result = self._modify_nestedkey_data_list(parent_context, value) elif isinstance(value, dict): @@ -215,7 +217,7 @@ def _evaluate_value_from_script(self, parent_context: GenIterContext) -> dict: ) return result - def _load_data_from_source(self, parent_context: GenIterContext) -> list: + def _load_data_from_source(self, parent_context: GenIterContext) -> list | dict: """ Load data from source @@ -224,6 +226,7 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list: """ source_str = self._statement.source nestedkey_type = self._statement.type + result: dict | list # Evaluate scripted source string source = ( @@ -235,28 +238,28 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list: # Read data from source if source.endswith("csv"): separator = self._statement.separator or parent_context.root.default_separator - value = FileUtil.read_csv_to_dict_list(file_path=self._descriptor_dir / source, separator=separator) + list_value = FileUtil.read_csv_to_dict_list(file_path=self._descriptor_dir / source, separator=separator) elif source.endswith("json"): - value = FileUtil.read_json_to_dict_list(self._descriptor_dir / source) + list_value = FileUtil.read_json_to_dict_list(self._descriptor_dir / source) else: raise ValueError(f"Invalid source '{source}' of nestedkey '{self._statement.name}'") - result = self._modify_nestedkey_data_list(parent_context, value) + result = self._modify_nestedkey_data_list(parent_context, list_value) elif nestedkey_type == DATA_TYPE_DICT: if source.endswith("json"): - value = FileUtil.read_json_to_dict(self._descriptor_dir / source) - result = self._modify_nestedkey_data_dict(parent_context, value) + dict_value = FileUtil.read_json_to_dict(self._descriptor_dir / source) + result = self._modify_nestedkey_data_dict(parent_context, dict_value) else: raise ValueError(f"Source of nestedkey having type as 'dict' does not support format {source}") # handle memstore source elif parent_context.root.memstore_manager.contain(source_str): - value = parent_context.root.memstore_manager.get_memstore(source_str).get_data_by_type( + list_value = parent_context.root.memstore_manager.get_memstore(source_str).get_data_by_type( self._statement.type, None, self._statement.cyclic ) - result = self._modify_nestedkey_data_list(parent_context, value) + result = self._modify_nestedkey_data_list(parent_context, list_value) else: raise ValueError( f"Cannot load data from source '{self._statement.source}' of '{self._statement.name}'" @@ -274,12 +277,13 @@ def _load_data_from_source(self, parent_context: GenIterContext) -> list: # Determine variable prefix and suffix setup_ctx = parent_context.parent while not isinstance(setup_ctx, SetupContext): - setup_ctx = setup_ctx.parent + setup_ctx = setup_ctx.parent # TODO: mypy issue [attr-defined] variable_prefix = self.statement.variable_prefix or setup_ctx.default_variable_prefix variable_suffix = self.statement.variable_suffix or setup_ctx.default_variable_suffix # Evaluate source_script - result = TaskUtil.evaluate_file_script_template(parent_context, result, variable_prefix, variable_suffix) + result = TaskUtil.evaluate_file_script_template( + parent_context, result, variable_prefix, variable_suffix) return result diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py index d2362d9..827514e 100644 --- a/datamimic_ce/tasks/task_util.py +++ b/datamimic_ce/tasks/task_util.py @@ -376,7 +376,7 @@ def gen_task_load_data_from_source( source_data = client.get_by_page_with_query(original_query=selector, pagination=load_pagination) else: source_data = client.get_by_page_with_type( - table_name=stmt.type or stmt.name, # type: ignore + table_name=stmt.type or stmt.name, # TODO: mypy issue [return-value] pagination=load_pagination, ) else: @@ -384,7 +384,11 @@ def gen_task_load_data_from_source( else: raise ValueError(f"cannot find data source {source_str} for iterate task") - return source_data, build_from_source # type: ignore + if isinstance(source_data, list): + return_source_data = source_data + else: + return_source_data = [source_data] + return return_source_data, build_from_source # @staticmethod # def consume_minio_after_page_processing(stmt, context: Context) -> None: @@ -439,13 +443,13 @@ def consume_product_by_page( # Create exporters cache in root context if it doesn't exist if not hasattr(root_context, "_task_exporters"): # Using task_id to namespace the cache - root_context._task_exporters = {} # type: ignore # skip mypy check + root_context._task_exporters = {} # TODO: mypy issue [attr-defined] # Create a unique cache key incorporating task_id and statement details cache_key = f"{root_context.task_id}_{stmt.name}_{stmt.storage_id}_{stmt}" # Get or create exporters - if cache_key not in root_context._task_exporters: # type: ignore # skip mypy check + if cache_key not in root_context._task_exporters: # TODO: mypy issue [attr-defined] # Create the consumer set once consumer_set = stmt.targets.copy() # consumer_set.add(EXPORTER_PREVIEW) deactivating preview exporter for multi-process @@ -463,14 +467,14 @@ def consume_product_by_page( ) # Cache the exporters - root_context._task_exporters[cache_key] = { # type: ignore # skip mypy check + root_context._task_exporters[cache_key] = { # TODO: mypy issue [attr-defined] "with_operation": consumers_with_operation, "without_operation": consumers_without_operation, "page_count": 0, # Track number of pages processed } # Get cached exporters - exporters = root_context._task_exporters[cache_key] # type: ignore # skip mypy check + exporters = root_context._task_exporters[cache_key] # TODO: mypy issue [attr-defined] exporters["page_count"] += 1 # Use cached exporters diff --git a/datamimic_ce/tasks/variable_task.py b/datamimic_ce/tasks/variable_task.py index 8f3a7a5..5255004 100644 --- a/datamimic_ce/tasks/variable_task.py +++ b/datamimic_ce/tasks/variable_task.py @@ -331,7 +331,7 @@ def execute(self, ctx: Context | GenIterContext | SetupContext) -> None: # Default variable prefix and suffix setup_ctx = ctx while not isinstance(setup_ctx, SetupContext): - setup_ctx = setup_ctx.parent # type: ignore # skip mypy check + setup_ctx = setup_ctx.parent # TODO: mypy issue [attr-defined] variable_prefix = self.statement.variable_prefix or setup_ctx.default_variable_prefix variable_suffix = self.statement.variable_suffix or setup_ctx.default_variable_suffix # Evaluate source script