From ab5bc9163b9418bcd5fbc6642511f79359e1ce27 Mon Sep 17 00:00:00 2001 From: vTuanpham Date: Wed, 13 Dec 2023 16:44:08 +0700 Subject: [PATCH] chore: add string doc --- configs/config.py | 6 +++++- translator/data_parser.py | 13 ++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/configs/config.py b/configs/config.py index 3c4b920..59b8d48 100644 --- a/configs/config.py +++ b/configs/config.py @@ -8,7 +8,11 @@ @dataclass class Config(ABC): - qas_id: str + """ + Abstract config that inherited all method + """ + + qas_id: str # Required field in all subclass def __str__(self) -> str: return self.__repr__ diff --git a/translator/data_parser.py b/translator/data_parser.py index 59e933a..656df95 100644 --- a/translator/data_parser.py +++ b/translator/data_parser.py @@ -38,14 +38,17 @@ def __init__(self, file_path: str, target_fields: List[str], target_config: Union[BaseConfig, QAConfig, DialogsConfig], do_translate: bool = False, - enable_sub_task_thread: bool = True, + enable_sub_task_thread: bool = True, # Enable splitting the list into sublist if a list of one example is too large to process + # This argument go with max_list_length_per_thread no_translated_code: bool = False, - max_example_per_thread: int = 400, - large_chunks_threshold: int = 20000, - max_list_length_per_thread: int = 3, + max_example_per_thread: int = 400, # How many examples, each thread can contain + large_chunks_threshold: int = 20000, # Maximum number of examples that will be evenly across threads + max_list_length_per_thread: int = 3, # Maximum number of strings contain in a list in a single thread. + # if larger, split the list into sub-list and process in parallel source_lang: str = "en", target_lang: str = "vi", - fail_translation_code: str="P1OP1_F" + fail_translation_code: str="P1OP1_F" # Fail code for unexpected fail translation and can be removed + # post-translation ) -> None: self.data_read = None