-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add provider abstract class, support for self-implement provider
- Loading branch information
Showing
6 changed files
with
120 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
googletrans==3.1.0a0 | ||
translators | ||
datasets | ||
tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .base_provider import Provider | ||
from .google_provider import GoogleProvider | ||
from .multiple_providers import MultipleProviders |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import Union, List | ||
from abc import ABC, abstractmethod | ||
from types import SimpleNamespace | ||
|
||
|
||
class Provider(ABC): | ||
@abstractmethod | ||
def __init__(self): | ||
self.translator = None | ||
|
||
@abstractmethod | ||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str]]: | ||
raise NotImplemented(" The function _do_translate has not been implemented.") | ||
|
||
def translate(self, input_data: Union[str, List[str]], src: str, dest: str) -> SimpleNamespace: | ||
""" | ||
Translate text input_data from a language to another language | ||
:param input_data: The input_data(Can be string or list of string) | ||
:param src: The source lang of input_data | ||
:param dest: The target lang you want input_data to be translated | ||
:return: | ||
""" | ||
|
||
assert self.translator, "Please assign the translator object instance to self.translator" | ||
translated_instance = self._do_translate(input_data, src=src, dest=dest) | ||
if not hasattr(translated_instance, 'text'): | ||
if isinstance(translated_instance, list) or isinstance(translated_instance, str): | ||
return SimpleNamespace(text=translated_instance) | ||
else: | ||
raise ValueError(f"The return object of _do_translate expected to be 'list' or 'string'," | ||
f" found {type(translated_instance)}") | ||
else: | ||
if isinstance(translated_instance.text, list) or isinstance(translated_instance.text, str): | ||
return translated_instance | ||
else: | ||
raise ValueError(f"The return object of _do_translate with required 'text' attribute expected to be 'list' or 'string' " | ||
f"but found {type(translated_instance.text)}") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import sys | ||
from typing import Union, List | ||
sys.path.insert(0, r'./') | ||
from googletrans import Translator | ||
from .base_provider import Provider | ||
|
||
|
||
class GoogleProvider(Provider): | ||
def __init__(self): | ||
self.translator = Translator() | ||
|
||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str]]: | ||
return self.translator.translate(input_data, src=src, dest=dest) | ||
|
||
|
||
if __name__ == '__main__': | ||
test = GoogleProvider() | ||
print(test.translate("Hello", src="en", dest="vi").text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import sys | ||
sys.path.insert(0, r'./') | ||
from typing import Union, List | ||
import translators as ts | ||
from .base_provider import Provider | ||
|
||
|
||
class MultipleProviders(Provider): | ||
def __init__(self, cache: bool=False): | ||
self.translator = ts | ||
self.config = { | ||
"translator": "bing", | ||
"timeout": 5.0, | ||
} | ||
if cache: | ||
_ = self.translator.preaccelerate_and_speedtest() # Optional. Caching sessions in advance, which can help improve access speed. | ||
|
||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str) -> Union[str, List[str]]: | ||
""" | ||
translate_text(query_text: str, translator: str = 'bing', from_language: str = 'auto', to_language: str = 'en', **kwargs) -> Union[str, dict] | ||
:param query_text: str, must. | ||
:param translator: str, default 'bing'. | ||
:param from_language: str, default 'auto'. | ||
:param to_language: str, default 'en'. | ||
:param if_use_preacceleration: bool, default False. | ||
:param **kwargs: | ||
:param is_detail_result: bool, default False. | ||
:param professional_field: str, default None. Support alibaba(), baidu(), caiyun(), cloudTranslation(), elia(), sysTran(), youdao(), volcEngine() only. | ||
:param timeout: float, default None. | ||
:param proxies: dict, default None. | ||
:param sleep_seconds: float, default 0. | ||
:param update_session_after_freq: int, default 1000. | ||
:param update_session_after_seconds: float, default 1500. | ||
:param if_use_cn_host: bool, default False. Support google(), bing() only. | ||
:param reset_host_url: str, default None. Support google(), yandex() only. | ||
:param if_check_reset_host_url: bool, default True. Support google(), yandex() only. | ||
:param if_ignore_empty_query: bool, default False. | ||
:param limit_of_length: int, default 20000. | ||
:param if_ignore_limit_of_length: bool, default False. | ||
:param if_show_time_stat: bool, default False. | ||
:param show_time_stat_precision: int, default 2. | ||
:param if_print_warning: bool, default True. | ||
:param lingvanex_mode: str, default 'B2C', choose from ("B2C", "B2B"). | ||
:param myMemory_mode: str, default "web", choose from ("web", "api"). | ||
:return: str or dict | ||
""" | ||
|
||
return self.translator.translate_text(input_data, from_language=src, to_language=dest, **self.config) | ||
|
||
|
||
if __name__ == '__main__': | ||
test = MultipleProviders() | ||
print(test.translate("Hello", src="en", dest="vi").text) |