From 3b6fd5694bb6d01a6838f99a0361328e9b605d08 Mon Sep 17 00:00:00 2001 From: miro Date: Mon, 16 Sep 2024 21:52:31 +0100 Subject: [PATCH 1/8] feat:standardize_lang_tag --- ovos_plugin_manager/templates/coreference.py | 14 ++++++------- ovos_plugin_manager/templates/hotwords.py | 3 ++- ovos_plugin_manager/templates/postag.py | 3 ++- ovos_plugin_manager/templates/segmentation.py | 6 +++--- ovos_plugin_manager/templates/solvers.py | 5 +++++ ovos_plugin_manager/templates/stt.py | 11 ++++------ ovos_plugin_manager/templates/tokenization.py | 5 ++--- ovos_plugin_manager/templates/tts.py | 3 ++- ovos_plugin_manager/thirdparty/solvers.py | 9 +++++---- ovos_plugin_manager/utils/__init__.py | 20 +++---------------- ovos_plugin_manager/utils/config.py | 18 ++++++++--------- ovos_plugin_manager/utils/ui.py | 3 ++- requirements/requirements.txt | 2 +- 13 files changed, 47 insertions(+), 55 deletions(-) diff --git a/ovos_plugin_manager/templates/coreference.py b/ovos_plugin_manager/templates/coreference.py index dbfc55bc..36dbb468 100644 --- a/ovos_plugin_manager/templates/coreference.py +++ b/ovos_plugin_manager/templates/coreference.py @@ -1,5 +1,6 @@ from ovos_bus_client.message import dig_for_message from ovos_utils import classproperty +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements from quebra_frases import word_tokenize @@ -64,10 +65,10 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return lang or "en-us" + return standardize_lang_tag(lang or "en-us") def contains_corefs(self, text, lang=None): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang, macro=True) if lang.startswith("en"): indicators = self.COREFERENCE_INDICATORS_EN elif lang.startswith("pt"): @@ -120,7 +121,7 @@ def extract_replacements(original, solved): return bucket def add_context(self, word, solved, lang=None): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) if lang not in self.contexts: self.contexts[lang] = {} if word not in self.contexts[lang]: @@ -130,7 +131,7 @@ def add_context(self, word, solved, lang=None): self.contexts[lang][word].append(solved) def extract_context(self, text=None, solved=None, lang=None): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) text = text or self._prev_sentence solved = solved or self._prev_solved replaced = self.extract_replacements(text, solved) @@ -139,7 +140,7 @@ def extract_context(self, text=None, solved=None, lang=None): return replaced def replace_coreferences(self, text, lang=None, set_context=False): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) solved = self.solve_corefs(text, lang=lang) self._prev_sentence = text self._prev_solved = solved @@ -148,7 +149,7 @@ def replace_coreferences(self, text, lang=None, set_context=False): return solved def replace_coreferences_with_context(self, text, lang=None, context=None, set_context=False): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) lang_context = self.contexts.get(lang) or {} default_context = {k: v[0] for k, v in lang_context.items() if v} @@ -168,7 +169,6 @@ def replace_coreferences_with_context(self, text, lang=None, context=None, set_c return solved def solve_corefs(self, text, lang=None): - lang = lang or self.lang return text diff --git a/ovos_plugin_manager/templates/hotwords.py b/ovos_plugin_manager/templates/hotwords.py index aa495ebe..f3becc9a 100644 --- a/ovos_plugin_manager/templates/hotwords.py +++ b/ovos_plugin_manager/templates/hotwords.py @@ -6,6 +6,7 @@ """ from ovos_config import Configuration from ovos_utils import classproperty +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements @@ -49,7 +50,7 @@ def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): self.expected_duration = self.num_phonemes * phoneme_duration self.listener_config = mycroft_config.get("listener") or {} - self.lang = str(self.config.get("lang", lang)).lower() + self.lang = standardize_lang_tag(self.config.get("lang", lang)) @classproperty def runtime_requirements(self): diff --git a/ovos_plugin_manager/templates/postag.py b/ovos_plugin_manager/templates/postag.py index 68597da5..68097c76 100644 --- a/ovos_plugin_manager/templates/postag.py +++ b/ovos_plugin_manager/templates/postag.py @@ -1,5 +1,6 @@ from ovos_bus_client.message import dig_for_message from ovos_utils import classproperty +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements @@ -48,7 +49,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return lang or "en-us" + return standardize_lang_tag(lang or "en-us") def postag(self, spans, lang=None): lang = lang or self.lang diff --git a/ovos_plugin_manager/templates/segmentation.py b/ovos_plugin_manager/templates/segmentation.py index cffe7f4a..2a2c3991 100644 --- a/ovos_plugin_manager/templates/segmentation.py +++ b/ovos_plugin_manager/templates/segmentation.py @@ -1,6 +1,6 @@ from ovos_bus_client.message import dig_for_message -from ovos_utils import classproperty -from ovos_utils import flatten_list +from ovos_utils import classproperty, flatten_list +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements from quebra_frases import sentence_tokenize @@ -58,7 +58,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return lang or "en-us" + return standardize_lang_tag(lang or "en-us") @staticmethod def __extract(text, markers): diff --git a/ovos_plugin_manager/templates/solvers.py b/ovos_plugin_manager/templates/solvers.py index aea580d0..5f0b7dbb 100644 --- a/ovos_plugin_manager/templates/solvers.py +++ b/ovos_plugin_manager/templates/solvers.py @@ -5,6 +5,7 @@ from json_database import JsonStorageXDG from ovos_utils.log import LOG, log_deprecation +from ovos_utils.lang import standardize_lang_tag from ovos_utils.xdg_utils import xdg_cache_home from ovos_plugin_manager.templates.language import LanguageTranslator, LanguageDetector @@ -26,6 +27,8 @@ def func_wrapper(*args, **kwargs): return func(*args, **kwargs) lang = kwargs.get("lang") + if lang: + lang = standardize_lang_tag(lang) # check if translation can be skipped if any([lang is None, lang == solver.default_lang, @@ -91,6 +94,8 @@ def func_wrapper(*args, **kwargs): lang = solver.detect_language(v) LOG.debug(f"detected 'lang': {lang} in argument '{idx}' for func: {func}") + if lang: + lang = standardize_lang_tag(lang) kwargs["lang"] = lang return func(*args, **kwargs) diff --git a/ovos_plugin_manager/templates/stt.py b/ovos_plugin_manager/templates/stt.py index e028cad0..6bfc7744 100644 --- a/ovos_plugin_manager/templates/stt.py +++ b/ovos_plugin_manager/templates/stt.py @@ -14,7 +14,7 @@ from ovos_utils import classproperty from ovos_utils.log import deprecated from ovos_utils.process_utils import RuntimeRequirements - +from ovos_utils.lang import standardize_lang_tag from ovos_plugin_manager.utils.config import get_plugin_config @@ -78,9 +78,9 @@ def recognizer(self, val): @property def lang(self): - return self._lang or \ + return standardize_lang_tag(self._lang or \ self.config.get("lang") or \ - Configuration().get("lang", "en-us") + Configuration().get("lang", "en-us")) @lang.setter def lang(self, val): @@ -114,10 +114,7 @@ def credential(self, val): "implement config handling directly instead", "1.0.0") def init_language(config_core): lang = config_core.get("lang", "en-US") - langs = lang.split("-") - if len(langs) == 2: - return langs[0].lower() + "-" + langs[1].upper() - return lang + return standardize_lang_tag(lang, macro=True) @abstractmethod def execute(self, audio, language: Optional[str] = None) -> str: diff --git a/ovos_plugin_manager/templates/tokenization.py b/ovos_plugin_manager/templates/tokenization.py index dc6c802d..a550c1f4 100644 --- a/ovos_plugin_manager/templates/tokenization.py +++ b/ovos_plugin_manager/templates/tokenization.py @@ -1,5 +1,6 @@ from ovos_bus_client.message import dig_for_message from ovos_utils import classproperty +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements from quebra_frases import span_indexed_word_tokenize, word_tokenize @@ -49,14 +50,12 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return lang or "en-us" + return standardize_lang_tag(lang or "en-us") def span_tokenize(self, text, lang=None): - lang = lang or self.lang return span_indexed_word_tokenize(text) def tokenize(self, text, lang=None): - lang = lang or self.lang return word_tokenize(text) @staticmethod diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py index b05ecaf5..5fac1c8c 100644 --- a/ovos_plugin_manager/templates/tts.py +++ b/ovos_plugin_manager/templates/tts.py @@ -22,6 +22,7 @@ from ovos_utils.fakebus import FakeBus from ovos_utils.file_utils import get_cache_directory from ovos_utils.lang.visimes import VISIMES +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG, deprecated, log_deprecation from ovos_utils.metrics import Stopwatch from ovos_utils.process_utils import RuntimeRequirements @@ -62,7 +63,7 @@ def __init__(self, plugin_id: str, lang: str, voice: str, synth_kwargs: dict = N synth_kwargs (dict, optional): Additional keyword arguments for the synthesizer. """ self.plugin_id = plugin_id - self.lang = lang + self.lang = standardize_lang_tag(lang) self.voice = voice self.synth_kwargs = synth_kwargs or {} diff --git a/ovos_plugin_manager/thirdparty/solvers.py b/ovos_plugin_manager/thirdparty/solvers.py index 91bdef55..b16afcad 100644 --- a/ovos_plugin_manager/thirdparty/solvers.py +++ b/ovos_plugin_manager/thirdparty/solvers.py @@ -30,6 +30,7 @@ from typing import Optional, List, Dict from ovos_utils import flatten_list +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from quebra_frases import sentence_tokenize @@ -53,7 +54,7 @@ def __init__(self, config=None, self.enable_cache = enable_cache self.config = config or {} self.supported_langs = self.config.get("supported_langs") or [] - self.default_lang = internal_lang or self.config.get("lang", "en") + self.default_lang = standardize_lang_tag(internal_lang or self.config.get("lang", "en"), macro=True) if self.default_lang not in self.supported_langs: self.supported_langs.insert(0, self.default_lang) self._translator = translator or OVOSLangTranslationFactory.create() if self.enable_tx else None @@ -123,9 +124,9 @@ def translate(self, text: str, :param source_lang: Source language code. :return: Translated text. """ - source_lang = source_lang or self.detect_language(text) - target_lang = target_lang or self.default_lang - if source_lang.split("-")[0] == target_lang.split("-")[0]: + source_lang = standardize_lang_tag(source_lang or self.detect_language(text), macro=True) + target_lang = standardize_lang_tag(target_lang or self.default_lang, macro=True) + if source_lang == target_lang: return text # skip translation return self.translator.translate(text, target=target_lang, diff --git a/ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils/__init__.py index a07e6902..aef6372a 100644 --- a/ovos_plugin_manager/utils/__init__.py +++ b/ovos_plugin_manager/utils/__init__.py @@ -175,23 +175,9 @@ def load_plugin(plug_name: str, plug_type: Optional[PluginTypes] = None): def normalize_lang(lang): - # TODO consider moving to LF or ovos_utils - # special handling, the parse sometimes messes this up - # eg, uk-ua gets normalized to uk-gb - # this also makes lookup easier as we - # often get duplicate entries with both variants - if "-" in lang: - pieces = lang.split("-") - if len(pieces) == 2 and pieces[0] == pieces[1]: - lang = pieces[0] - - try: - from langcodes import standardize_tag as _normalize_lang - lang = _normalize_lang(lang, macro=True) - except ValueError: - # this lang code is apparently not valid ? - pass - return lang + # TODO - add deprecation warning + from ovos_utils.lang import standardize_lang_tag + return standardize_lang_tag(lang) class ReadWriteStream: diff --git a/ovos_plugin_manager/utils/config.py b/ovos_plugin_manager/utils/config.py index dd1186ae..7da73361 100644 --- a/ovos_plugin_manager/utils/config.py +++ b/ovos_plugin_manager/utils/config.py @@ -1,8 +1,8 @@ from typing import Optional, Union from ovos_config.config import Configuration +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG -from ovos_plugin_manager.utils import load_plugin, find_plugins, \ - normalize_lang, PluginTypes, PluginConfigTypes +from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes, PluginConfigTypes def get_plugin_config(config: Optional[dict] = None, section: str = None, @@ -19,7 +19,7 @@ def get_plugin_config(config: Optional[dict] = None, section: str = None, @return: Configuration for the requested module, including `lang` and `module` keys """ config = config or Configuration() - lang = config.get('lang') or Configuration().get('lang') + lang = standardize_lang_tag(config.get('lang') or Configuration().get('lang', "en")) config = (config.get('intentBox', {}).get(section) or config.get(section) or config) if section else config module = module or config.get('module') @@ -54,7 +54,7 @@ def get_valid_plugin_configs(configs: dict, lang: str, valid_configs = list() if include_dialects: # Check other dialects of the requested language - base_lang = lang.split("-")[0] + base_lang = standardize_lang_tag(lang, macro=True) for language, confs in configs.items(): if language.startswith(base_lang): for config in confs: @@ -117,7 +117,7 @@ def load_plugin_configs(plug_name: str, """ config = load_plugin(plug_name + ".config", plug_type) if normalize_language_keys: - return {normalize_lang(lang): v for lang, v in config.items()} + return {standardize_lang_tag(lang): v for lang, v in config.items()} return config @@ -144,7 +144,7 @@ def get_plugin_supported_languages(plug_type: PluginTypes) -> dict: load_plugin_configs(plug, PluginConfigTypes(f"{plug_type.value}.config")) for lang, config in configs: - lang = normalize_lang(lang) + lang = standardize_lang_tag(lang) lang_configs.setdefault(lang, list()) lang_configs[lang].append(plug) return lang_configs @@ -159,7 +159,7 @@ def get_plugin_language_configs(plug_type: PluginTypes, lang: str, @param include_dialects: consider configurations in different locales @return: dict {`plugin_name`: [`valid_configs`]} """ - lang = normalize_lang(lang) + lang = standardize_lang_tag(lang) plugin_configs = dict() valid_configs = dict() for plug in find_plugins(plug_type): @@ -167,10 +167,10 @@ def get_plugin_language_configs(plug_type: PluginTypes, lang: str, valid_configs = \ load_plugin_configs(plug, PluginConfigTypes(f"{plug_type.value}.config")) - valid_configs = {normalize_lang(lang): conf + valid_configs = {standardize_lang_tag(lang): conf for lang, conf in valid_configs.items()} if include_dialects: - lang = lang.split('-')[0] + lang = standardize_lang_tag(lang, macro=True) for language in valid_configs: if language.startswith(lang): plugin_configs[plug] += valid_configs[language] diff --git a/ovos_plugin_manager/utils/ui.py b/ovos_plugin_manager/utils/ui.py index 2a8647f7..20664aa9 100644 --- a/ovos_plugin_manager/utils/ui.py +++ b/ovos_plugin_manager/utils/ui.py @@ -2,6 +2,7 @@ from typing import Optional from ovos_utils import flatten_list +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_plugin_manager import PluginTypes from ovos_plugin_manager.stt import get_stt_lang_configs @@ -40,7 +41,7 @@ def config2option(cls, cfg: dict, plugin_type: PluginTypes, """ cfg = cls._migrate_old_cfg(cfg) engine = cfg["module"] - lang = lang or cfg.get("lang") + lang = standardize_lang_tag(lang or cfg.get("lang"), macro=True) plugin_display_name = engine.replace("_", " ").replace("-", " ").title() diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 439a14dd..dac1e654 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,4 +1,4 @@ -ovos-utils>=0.0.38,<1.0.0 +ovos-utils>=0.2.1,<1.0.0 ovos_bus_client>=0.0.8,<1.0.0 ovos-config>=0.0.12,<1.0.0 combo_lock~=0.2 From f8f2d220247cae0feef2668a4884eaac9fa19a14 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:26:20 +0100 Subject: [PATCH 2/8] Update __init__.py --- ovos_plugin_manager/utils/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils/__init__.py index aef6372a..4dac2518 100644 --- a/ovos_plugin_manager/utils/__init__.py +++ b/ovos_plugin_manager/utils/__init__.py @@ -18,7 +18,7 @@ from typing import Optional import pkg_resources -from ovos_utils.log import LOG, log_deprecation +from ovos_utils.log import LOG, log_deprecation, deprecated class PluginTypes(str, Enum): @@ -173,9 +173,8 @@ def load_plugin(plug_name: str, plug_type: Optional[PluginTypes] = None): LOG.warning(f'Could not find the plugin {plug_type}.{plug_name}') return None - +@deprecated("normalize_lang has been deprecated! update to 'from ovos_utils.lang import standardize_lang_tag'", "1.0.0") def normalize_lang(lang): - # TODO - add deprecation warning from ovos_utils.lang import standardize_lang_tag return standardize_lang_tag(lang) From 52bbef8afe3cd1dd48d0c12a115c73e46cc7d64f Mon Sep 17 00:00:00 2001 From: miro Date: Sat, 12 Oct 2024 20:38:41 +0100 Subject: [PATCH 3/8] normalize lang --- test/unittests/test_coref.py | 2 +- test/unittests/test_g2p.py | 8 ++++---- test/unittests/test_gui.py | 2 +- test/unittests/test_keywords.py | 2 +- test/unittests/test_language.py | 16 ++++++++-------- test/unittests/test_metadata_transformers.py | 2 +- test/unittests/test_microphone.py | 14 +++++++------- test/unittests/test_persona.py | 2 +- test/unittests/test_phal.py | 4 ++-- test/unittests/test_postag.py | 2 +- test/unittests/test_segmentation.py | 2 +- test/unittests/test_solver.py | 10 +++++----- test/unittests/test_stt.py | 2 +- test/unittests/test_text_transformers.py | 2 +- test/unittests/test_tokenization.py | 2 +- test/unittests/test_tts.py | 10 +++++----- test/unittests/test_utils.py | 4 ++-- test/unittests/test_vad.py | 4 ++-- test/unittests/test_wakewords.py | 12 ++++++------ 19 files changed, 51 insertions(+), 51 deletions(-) diff --git a/test/unittests/test_coref.py b/test/unittests/test_coref.py index 6a0fbcb9..e6d517bb 100644 --- a/test/unittests/test_coref.py +++ b/test/unittests/test_coref.py @@ -48,7 +48,7 @@ class TestCoref(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.COREFERENCE_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "coref" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_g2p.py b/test/unittests/test_g2p.py index 51a068e8..77d31929 100644 --- a/test/unittests/test_g2p.py +++ b/test/unittests/test_g2p.py @@ -38,7 +38,7 @@ class TestG2P(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.PHONEME TEST_CONFIG = {"test": True} CONFIG_SECTION = "g2p" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -105,10 +105,10 @@ def _copy_args(*args): mock_get_class.assert_called_once() self.assertEqual(call_args, ({**_TEST_CONFIG['g2p']['good'], **{"module": "good", - "lang": "en-us"}},)) + "lang": "en-US"}},)) mock_class.assert_called_once_with({**_TEST_CONFIG['g2p']['good'], **{"module": "good", - "lang": "en-us"}}) + "lang": "en-US"}}) OVOSG2PFactory.get_class = real_get_class def test_create_fallback(self): @@ -135,5 +135,5 @@ def _copy_args(*args): self.assertEqual(bad_call_args[0]["module"], 'bad') mock_class.assert_called_once_with({**_FALLBACK_CONFIG['g2p']['good'], **{"module": "good", - "lang": "en-us"}}) + "lang": "en-US"}}) OVOSG2PFactory.get_class = real_get_class diff --git a/test/unittests/test_gui.py b/test/unittests/test_gui.py index 39ad207c..80877ae3 100644 --- a/test/unittests/test_gui.py +++ b/test/unittests/test_gui.py @@ -15,7 +15,7 @@ class TestGui(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.GUI TEST_CONFIG = {"test": True} CONFIG_SECTION = "gui" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_keywords.py b/test/unittests/test_keywords.py index 9a174fa0..86bb0835 100644 --- a/test/unittests/test_keywords.py +++ b/test/unittests/test_keywords.py @@ -15,7 +15,7 @@ class TestKeywords(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.KEYWORD_EXTRACTION TEST_CONFIG = {"test": True} CONFIG_SECTION = "keyword_extract" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_language.py b/test/unittests/test_language.py index ebd3195b..95a05b93 100644 --- a/test/unittests/test_language.py +++ b/test/unittests/test_language.py @@ -35,7 +35,7 @@ class TestLanguageTranslator(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.TRANSLATE TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -69,7 +69,7 @@ class TestLanguageDetector(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.LANG_DETECT TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -128,7 +128,7 @@ def test_create(self, config, load_plugin): plug = OVOSLangDetectionFactory.create() load_plugin.assert_called_once_with('good') mock_plugin.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) self.assertEquals(plug_instance, plug) # Create plugin fully specified in passed config @@ -136,7 +136,7 @@ def test_create(self, config, load_plugin): plug = OVOSLangDetectionFactory.create(_TEST_CONFIG) load_plugin.assert_called_with("good") mock_plugin.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) self.assertEquals(plug_instance, plug) def test_create_fallback(self): @@ -163,7 +163,7 @@ def _copy_args(*args): self.assertEqual(call_args[0]["module"], 'good') self.assertEqual(bad_call_args[0]["module"], 'bad') mock_class.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) OVOSLangDetectionFactory.get_class = real_get_class @@ -198,7 +198,7 @@ def test_create(self, config, load_plugin): plug = OVOSLangTranslationFactory.create() load_plugin.assert_called_once_with('good') mock_plugin.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) self.assertEquals(plug_instance, plug) # Create plugin fully specified in passed config @@ -206,7 +206,7 @@ def test_create(self, config, load_plugin): plug = OVOSLangTranslationFactory.create(_TEST_CONFIG) load_plugin.assert_called_with("good") mock_plugin.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) self.assertEquals(plug_instance, plug) def test_create_fallback(self): @@ -233,5 +233,5 @@ def _copy_args(*args): self.assertEqual(call_args[0]["module"], 'good') self.assertEqual(bad_call_args[0]["module"], 'bad') mock_class.assert_called_once_with(config={**_TEST_CONFIG["language"]["good"], - **{'module': 'good', 'lang': 'en-us'}}) + **{'module': 'good', 'lang': 'en-US'}}) OVOSLangTranslationFactory.get_class = real_get_class diff --git a/test/unittests/test_metadata_transformers.py b/test/unittests/test_metadata_transformers.py index 81f82df2..7fb3b374 100644 --- a/test/unittests/test_metadata_transformers.py +++ b/test/unittests/test_metadata_transformers.py @@ -15,7 +15,7 @@ class TestMedatadataTransformer(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.METADATA_TRANSFORMER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_microphone.py b/test/unittests/test_microphone.py index 50001136..cd7114df 100644 --- a/test/unittests/test_microphone.py +++ b/test/unittests/test_microphone.py @@ -4,7 +4,7 @@ from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes _TEST_CONFIG = { - "lang": "en-us", + "lang": "en-US", "microphone": { "module": "dummy", "dummy": { @@ -20,7 +20,7 @@ } } _FALLBACK_CONFIG = { - "lang": "en-us", + "lang": "en-US", "microphone": { "module": "bad", "bad": { @@ -117,7 +117,7 @@ class TestMicrophone(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.MIC TEST_CONFIG = {"test": True} CONFIG_SECTION = "microphone" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -158,7 +158,7 @@ def _copy_args(*args): mock_get_class.assert_called_once() self.assertEqual(call_args, ({**_TEST_CONFIG['microphone']['dummy'], **{"module": "dummy", - "lang": "en-us"}},)) + "lang": "en-US"}},)) mock_class.assert_called_once_with(**_TEST_CONFIG['microphone']['dummy']) OVOSMicrophoneFactory.get_class = real_get_class @@ -204,7 +204,7 @@ def test_get_microphone_config(self): dummy_config = get_microphone_config(config) self.assertEqual(dummy_config, {**_TEST_CONFIG['microphone']['dummy'], **{'module': 'dummy', - 'lang': 'en-us'}}) + 'lang': 'en-US'}}) config = copy(_TEST_CONFIG) config['microphone']['module'] = 'ovos-microphone-plugin-alsa' alsa_config = get_microphone_config(config) @@ -212,8 +212,8 @@ def test_get_microphone_config(self): {**_TEST_CONFIG['microphone'] ['ovos-microphone-plugin-alsa'], **{'module': 'ovos-microphone-plugin-alsa', - 'lang': 'en-us'}}) + 'lang': 'en-US'}}) config = copy(_TEST_CONFIG) config['microphone']['module'] = 'fake' fake_config = get_microphone_config(config) - self.assertEqual(fake_config, {'module': 'fake', 'lang': 'en-us'}) + self.assertEqual(fake_config, {'module': 'fake', 'lang': 'en-US'}) diff --git a/test/unittests/test_persona.py b/test/unittests/test_persona.py index 747422ce..65be60a9 100644 --- a/test/unittests/test_persona.py +++ b/test/unittests/test_persona.py @@ -8,7 +8,7 @@ class TestPersona(unittest.TestCase): PLUGIN_TYPE = PluginTypes.PERSONA TEST_CONFIG = {"test": True} CONFIG_SECTION = "persona" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_phal.py b/test/unittests/test_phal.py index 89232d01..93901165 100644 --- a/test/unittests/test_phal.py +++ b/test/unittests/test_phal.py @@ -34,7 +34,7 @@ class TestPHAL(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.PHAL TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -61,7 +61,7 @@ class TestAdminPHAL(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.ADMIN TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_postag.py b/test/unittests/test_postag.py index cbe7acd0..4689985b 100644 --- a/test/unittests/test_postag.py +++ b/test/unittests/test_postag.py @@ -43,7 +43,7 @@ class TestPostag(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.POSTAG TEST_CONFIG = {"test": True} CONFIG_SECTION = "postag" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_segmentation.py b/test/unittests/test_segmentation.py index fc1cfbf2..042d7fbc 100644 --- a/test/unittests/test_segmentation.py +++ b/test/unittests/test_segmentation.py @@ -116,7 +116,7 @@ class TestSegmentation(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.UTTERANCE_SEGMENTATION TEST_CONFIG = {"test": True} CONFIG_SECTION = "segmentation" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_solver.py b/test/unittests/test_solver.py index 354b18af..56b10967 100644 --- a/test/unittests/test_solver.py +++ b/test/unittests/test_solver.py @@ -156,7 +156,7 @@ class TestQuestionSolver(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.QUESTION_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -205,7 +205,7 @@ class TestTldrSolver(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.TLDR_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -254,7 +254,7 @@ class TestEntailmentSolver(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.ENTAILMENT_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -303,7 +303,7 @@ class TestMultipleChoiceSolver(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.MULTIPLE_CHOICE_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -355,7 +355,7 @@ class TestReadingComprehensionSolver(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.READING_COMPREHENSION_SOLVER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_stt.py b/test/unittests/test_stt.py index de866fe5..b8152a04 100644 --- a/test/unittests/test_stt.py +++ b/test/unittests/test_stt.py @@ -40,7 +40,7 @@ class TestSTT(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.STT TEST_CONFIG = {"test": True} CONFIG_SECTION = "stt" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_text_transformers.py b/test/unittests/test_text_transformers.py index c0932fb8..8b7eecf2 100644 --- a/test/unittests/test_text_transformers.py +++ b/test/unittests/test_text_transformers.py @@ -15,7 +15,7 @@ class TestTextTransformers(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.UTTERANCE_TRANSFORMER TEST_CONFIG = {"test": True} CONFIG_SECTION = "" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_tokenization.py b/test/unittests/test_tokenization.py index 4c7822fd..7c718e48 100644 --- a/test/unittests/test_tokenization.py +++ b/test/unittests/test_tokenization.py @@ -30,7 +30,7 @@ class TestTokenization(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.TOKENIZATION TEST_CONFIG = {"test": True} CONFIG_SECTION = "tokenization" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_tts.py b/test/unittests/test_tts.py index 6ad28ba9..4a8bef2a 100644 --- a/test/unittests/test_tts.py +++ b/test/unittests/test_tts.py @@ -128,7 +128,7 @@ class TestTTS(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.TTS TEST_CONFIG = {"test": True} CONFIG_SECTION = "tts" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -317,14 +317,14 @@ def test_tts_synth_cache_disabled(self, hash_sentence_mock): class TestSession(unittest.TestCase): def test_tts_session(self): - sess = Session(session_id="123", lang="en-us") + sess = Session(session_id="123", lang="en-US") m = Message("speak", context={"session": sess.serialize()}) tts = TTS() self.assertEqual(tts.plugin_id, "ovos-tts-plugin-dummy") self.assertEqual(tts.voice, "default") # no voice set - self.assertEqual(tts.lang, "en-us") # from config + self.assertEqual(tts.lang, "en-US") # from config # test that session makes it all the way to the TTS.queue kwargs = {"message": m} @@ -337,8 +337,8 @@ def test_tts_session(self): ctxt = tts._get_ctxt(kwargs) self.assertEqual(ctxt.plugin_id, tts.plugin_id) self.assertEqual(ctxt.lang, sess.lang) - self.assertEqual(ctxt.tts_id, f"{tts.plugin_id}/default/en-us") - self.assertEqual(ctxt.synth_kwargs, {'lang': 'en-us', "voice": "default"}) + self.assertEqual(ctxt.tts_id, f"{tts.plugin_id}/default/en-US") + self.assertEqual(ctxt.synth_kwargs, {'lang': 'en-US', "voice": "default"}) sess = Session(session_id="123", lang="klingon") diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index ec0b3590..bd04db4b 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -631,7 +631,7 @@ def test_get_plugin_config(self, config): # Test TTS config with plugin `lang` override config = { - "lang": "en-us", + "lang": "en-US", "tts": { "module": "ovos_tts_plugin_espeakng", "ovos_tts_plugin_espeakng": { @@ -656,7 +656,7 @@ def test_get_valid_plugin_configs(self): valid_en_us = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-US', False) self.assertEqual(len(valid_en_us), 1) - valid_en = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-us', True) + valid_en = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-US', True) self.assertEqual(len(valid_en), 9) invalid_lang = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-ZZ', False) diff --git a/test/unittests/test_vad.py b/test/unittests/test_vad.py index 7545fd38..42cc5ba7 100644 --- a/test/unittests/test_vad.py +++ b/test/unittests/test_vad.py @@ -5,7 +5,7 @@ from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes _TEST_CONFIG = { - "lang": "en-us", + "lang": "en-US", "listener": { "VAD": { "module": "dummy", @@ -37,7 +37,7 @@ class TestVAD(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.VAD TEST_CONFIG = _TEST_CONFIG['listener'] CONFIG_SECTION = "VAD" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): diff --git a/test/unittests/test_wakewords.py b/test/unittests/test_wakewords.py index ca54aa5d..a73ee39c 100644 --- a/test/unittests/test_wakewords.py +++ b/test/unittests/test_wakewords.py @@ -35,7 +35,7 @@ class TestWakewords(unittest.TestCase): CONFIG_TYPE = PluginConfigTypes.WAKEWORD TEST_CONFIG = {"test": True} CONFIG_SECTION = "hotwords" - TEST_LANG = "en-us" + TEST_LANG = "en-US" @patch("ovos_plugin_manager.utils.find_plugins") def test_find_plugins(self, find_plugins): @@ -105,12 +105,12 @@ def test_create_hotword(self): OVOSWakeWordFactory.create_hotword(config=_TEST_CONFIG) mock_load.assert_called_once_with("ovos-ww-plugin-precise", "hey_mycroft", _TEST_CONFIG["hotwords"] - ['hey_mycroft'], "en-us", None) + ['hey_mycroft'], "en-US", None) OVOSWakeWordFactory.create_hotword("hey_neon", _TEST_CONFIG) mock_load.assert_called_with("ovos-ww-plugin-vosk", "hey_neon", _TEST_CONFIG["hotwords"] - ['hey_neon'], "en-us", None) + ['hey_neon'], "en-US", None) OVOSWakeWordFactory.load_module = real_load_module @patch("ovos_plugin_manager.utils.load_plugin") @@ -149,15 +149,15 @@ def test_load_module(self): mock_get_class.return_value = mock_return module = OVOSWakeWordFactory.load_module( "ovos-ww-plugin-precise", "hey_mycroft", _TEST_CONFIG['hotwords']['hey_mycroft'], - 'en-us') + 'en-US') mock_get_class.assert_called_once_with( - "hey_mycroft", {"lang": "en-us", "hotwords": { + "hey_mycroft", {"lang": "en-US", "hotwords": { "hey_mycroft": _TEST_CONFIG['hotwords']['hey_mycroft']}}) self.assertEqual(module, mock_return()) # Test no return mock_get_class.return_value = None with self.assertRaises(ImportError): - OVOSWakeWordFactory.load_module("dummy", "test", {}, "en-us") + OVOSWakeWordFactory.load_module("dummy", "test", {}, "en-US") OVOSWakeWordFactory.get_class = real_get_class From 0233f692f62c23b8a6cb6463c00cb3584a879be8 Mon Sep 17 00:00:00 2001 From: miro Date: Sat, 12 Oct 2024 20:50:47 +0100 Subject: [PATCH 4/8] normalize lang --- ovos_plugin_manager/segmentation.py | 1 - ovos_plugin_manager/templates/coreference.py | 2 +- ovos_plugin_manager/templates/hotwords.py | 2 +- ovos_plugin_manager/templates/language.py | 6 +++--- ovos_plugin_manager/templates/postag.py | 2 +- ovos_plugin_manager/templates/segmentation.py | 2 +- ovos_plugin_manager/templates/stt.py | 2 +- ovos_plugin_manager/templates/tokenization.py | 2 +- ovos_plugin_manager/templates/tts.py | 4 ++-- ovos_plugin_manager/utils/ui.py | 2 ++ ovos_plugin_manager/wakewords.py | 11 +++++++---- 11 files changed, 20 insertions(+), 16 deletions(-) diff --git a/ovos_plugin_manager/segmentation.py b/ovos_plugin_manager/segmentation.py index b3807574..57528334 100644 --- a/ovos_plugin_manager/segmentation.py +++ b/ovos_plugin_manager/segmentation.py @@ -5,7 +5,6 @@ from ovos_plugin_manager.templates.segmentation import Segmenter - def find_segmentation_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/templates/coreference.py b/ovos_plugin_manager/templates/coreference.py index 36dbb468..7124e494 100644 --- a/ovos_plugin_manager/templates/coreference.py +++ b/ovos_plugin_manager/templates/coreference.py @@ -65,7 +65,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return standardize_lang_tag(lang or "en-us") + return standardize_lang_tag(lang or "en-US") def contains_corefs(self, text, lang=None): lang = standardize_lang_tag(lang or self.lang, macro=True) diff --git a/ovos_plugin_manager/templates/hotwords.py b/ovos_plugin_manager/templates/hotwords.py index f3becc9a..77fffb1f 100644 --- a/ovos_plugin_manager/templates/hotwords.py +++ b/ovos_plugin_manager/templates/hotwords.py @@ -31,7 +31,7 @@ class HotWordEngine: lang (str): language code (BCP-47) """ - def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): + def __init__(self, key_phrase="hey mycroft", config=None, lang="en-US"): self.key_phrase = str(key_phrase).lower() mycroft_config = Configuration() if config is None: diff --git a/ovos_plugin_manager/templates/language.py b/ovos_plugin_manager/templates/language.py index 743674ff..7475687b 100644 --- a/ovos_plugin_manager/templates/language.py +++ b/ovos_plugin_manager/templates/language.py @@ -16,7 +16,7 @@ def __init__(self, config: Optional[Dict[str, Union[str, int]]] = None): Can contain "lang" for default language, "hint_lang" for a hint language, and "boost" for language boost score. """ self.config = config or {} - self.default_language = self.config.get("lang", "en-us") + self.default_language = self.config.get("lang", "en-US") self.hint_language = self.config.get("hint_lang") or self.config.get('user') or self.default_language self.boost = self.config.get("boost") @@ -46,7 +46,7 @@ def detect(self, text: str) -> str: text (str): The text to detect the language of. Returns: - str: The detected language code (e.g., 'en-us'). + str: The detected language code (e.g., 'en-US'). """ @abc.abstractmethod @@ -85,7 +85,7 @@ def __init__(self, config: Optional[Dict[str, str]] = None): """ self.config = config or {} # translate from, unless specified/detected otherwise - self.default_language = self.config.get("lang") or "en-us" + self.default_language = self.config.get("lang") or "en-US" # translate to self.internal_language = (Configuration().get('language') or dict()).get("internal") or \ diff --git a/ovos_plugin_manager/templates/postag.py b/ovos_plugin_manager/templates/postag.py index 68097c76..34a67ea2 100644 --- a/ovos_plugin_manager/templates/postag.py +++ b/ovos_plugin_manager/templates/postag.py @@ -49,7 +49,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return standardize_lang_tag(lang or "en-us") + return standardize_lang_tag(lang or "en-US") def postag(self, spans, lang=None): lang = lang or self.lang diff --git a/ovos_plugin_manager/templates/segmentation.py b/ovos_plugin_manager/templates/segmentation.py index 2a2c3991..be6da489 100644 --- a/ovos_plugin_manager/templates/segmentation.py +++ b/ovos_plugin_manager/templates/segmentation.py @@ -58,7 +58,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return standardize_lang_tag(lang or "en-us") + return standardize_lang_tag(lang or "en-US") @staticmethod def __extract(text, markers): diff --git a/ovos_plugin_manager/templates/stt.py b/ovos_plugin_manager/templates/stt.py index 6bfc7744..825fbea0 100644 --- a/ovos_plugin_manager/templates/stt.py +++ b/ovos_plugin_manager/templates/stt.py @@ -80,7 +80,7 @@ def recognizer(self, val): def lang(self): return standardize_lang_tag(self._lang or \ self.config.get("lang") or \ - Configuration().get("lang", "en-us")) + Configuration().get("lang", "en-US")) @lang.setter def lang(self, val): diff --git a/ovos_plugin_manager/templates/tokenization.py b/ovos_plugin_manager/templates/tokenization.py index a550c1f4..b41e3cd5 100644 --- a/ovos_plugin_manager/templates/tokenization.py +++ b/ovos_plugin_manager/templates/tokenization.py @@ -50,7 +50,7 @@ def lang(self): msg = dig_for_message() if msg: lang = msg.data.get("lang") - return standardize_lang_tag(lang or "en-us") + return standardize_lang_tag(lang or "en-US") def span_tokenize(self, text, lang=None): return span_indexed_word_tokenize(text) diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py index 5fac1c8c..20a75b19 100644 --- a/ovos_plugin_manager/templates/tts.py +++ b/ovos_plugin_manager/templates/tts.py @@ -594,7 +594,7 @@ def _get_ctxt(self, kwargs=None) -> TTSContext: LOG.debug(f"TTS kwargs: {kwargs}") return TTSContext(plugin_id=self.plugin_id, - lang=kwargs.get("lang") or Configuration().get("lang", "en-us"), + lang=kwargs.get("lang") or Configuration().get("lang", "en-US"), voice=kwargs.get("voice", "default"), synth_kwargs=kwargs) @@ -934,7 +934,7 @@ def lang(self): if message: sess = SessionManager.get(message) return sess.lang - return self.config.get("lang") or 'en-us' + return self.config.get("lang") or 'en-US' @lang.setter @deprecated("language is defined per request in get_tts, self.lang is not used", diff --git a/ovos_plugin_manager/utils/ui.py b/ovos_plugin_manager/utils/ui.py index 20664aa9..41f1047a 100644 --- a/ovos_plugin_manager/utils/ui.py +++ b/ovos_plugin_manager/utils/ui.py @@ -136,6 +136,7 @@ def get_config_options(cls, lang: str, plugin_type: PluginTypes, @param include_dialects: If True, include any ISO 639-1 matched codes @return: list of valid GUI-compatible config dicts """ + lang = standardize_lang_tag(lang) # NOTE: mycroft-gui will crash if theres more than 20 options according to @aiix # TODO - validate that this is true and 20 is a real limit blacklist = blacklist or [] @@ -188,6 +189,7 @@ def get_plugin_options(cls, lang: str, plugin_type: PluginTypes) -> list: @param plugin_type: Type of plugins to return @return: list of plugin specs with capabilities and config options """ + lang = standardize_lang_tag(lang) plugs = {} for entry in cls.get_config_options(lang, plugin_type): engine = entry["engine"] diff --git a/ovos_plugin_manager/wakewords.py b/ovos_plugin_manager/wakewords.py index fa777217..17821cd8 100644 --- a/ovos_plugin_manager/wakewords.py +++ b/ovos_plugin_manager/wakewords.py @@ -1,13 +1,14 @@ import json import os - from hashlib import md5 from typing import Optional + +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_utils.xdg_utils import xdg_data_home + from ovos_plugin_manager.templates.hotwords import HotWordEngine -from ovos_plugin_manager.utils import normalize_lang, \ - PluginTypes, PluginConfigTypes +from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes def find_wake_word_plugins() -> dict: @@ -135,6 +136,7 @@ def load_module(module: str, hotword: str, hotword_config: dict, @param loop: Unused @return: Initialized HotWordEngine """ + lang = standardize_lang_tag(lang) # config here is config['hotwords'][module] LOG.info(f'Loading "{hotword}" wake word via {module} with ' f'config: {hotword_config}') @@ -149,7 +151,7 @@ def load_module(module: str, hotword: str, hotword_config: dict, @classmethod def create_hotword(cls, hotword: str = "hey mycroft", config: Optional[dict] = None, - lang: str = "en-us", loop=None) -> HotWordEngine: + lang: str = "en-US", loop=None) -> HotWordEngine: """ Get an initialized HotWordEngine by configured name @param hotword: string hotword to load @@ -158,6 +160,7 @@ def create_hotword(cls, hotword: str = "hey mycroft", @param loop: Unused @return: Initialized HotWordEngine """ + lang = standardize_lang_tag(lang) ww_configs = get_hotwords_config(config) if hotword not in ww_configs: LOG.warning(f"replace ` ` in {hotword} with `_`") From d017ea57294aeba7dacd6b64b1ba010a05ed4e23 Mon Sep 17 00:00:00 2001 From: miro Date: Sat, 12 Oct 2024 20:58:50 +0100 Subject: [PATCH 5/8] fix:dialect support --- ovos_plugin_manager/tts.py | 5 ----- ovos_plugin_manager/utils/config.py | 3 ++- test/unittests/test_utils.py | 5 +++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ovos_plugin_manager/tts.py b/ovos_plugin_manager/tts.py index b529ad3a..31f2eb47 100644 --- a/ovos_plugin_manager/tts.py +++ b/ovos_plugin_manager/tts.py @@ -173,8 +173,3 @@ def create(config=None): f'\nAvailable modules: {modules}') raise return tts - - -if __name__ == "__main__": - lang = "en-us" - print(find_tts_plugins()) \ No newline at end of file diff --git a/ovos_plugin_manager/utils/config.py b/ovos_plugin_manager/utils/config.py index 7da73361..e303ff4c 100644 --- a/ovos_plugin_manager/utils/config.py +++ b/ovos_plugin_manager/utils/config.py @@ -3,6 +3,7 @@ from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes, PluginConfigTypes +from langcodes import tag_distance def get_plugin_config(config: Optional[dict] = None, section: str = None, @@ -56,7 +57,7 @@ def get_valid_plugin_configs(configs: dict, lang: str, # Check other dialects of the requested language base_lang = standardize_lang_tag(lang, macro=True) for language, confs in configs.items(): - if language.startswith(base_lang): + if tag_distance(base_lang, language) < 10: for config in confs: try: if language != lang: diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index bd04db4b..db0779d0 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -635,7 +635,7 @@ def test_get_plugin_config(self, config): "tts": { "module": "ovos_tts_plugin_espeakng", "ovos_tts_plugin_espeakng": { - "lang": "de-de", + "lang": "de-DE", "voice": "german-mbrola-5", "speed": "135", "amplitude": "80", @@ -644,7 +644,7 @@ def test_get_plugin_config(self, config): } } tts_config = get_plugin_config(config, "tts") - self.assertEqual(tts_config['lang'], 'de-de') + self.assertEqual(tts_config['lang'], 'de-DE') self.assertEqual(tts_config['module'], 'ovos_tts_plugin_espeakng') self.assertEqual(tts_config['voice'], 'german-mbrola-5') self.assertNotIn("ovos_tts_plugin_espeakng", tts_config) @@ -657,6 +657,7 @@ def test_get_valid_plugin_configs(self): 'en-US', False) self.assertEqual(len(valid_en_us), 1) valid_en = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-US', True) + self.assertEqual(len(valid_en), 9) invalid_lang = get_valid_plugin_configs(_MOCK_PLUGIN_CONFIG, 'en-ZZ', False) From 3d909810e0dcd68280f19d532172743c1ffd1b51 Mon Sep 17 00:00:00 2001 From: miro Date: Sat, 12 Oct 2024 21:10:54 +0100 Subject: [PATCH 6/8] standardize eveyrwhere --- ovos_plugin_manager/templates/language.py | 14 ++++++++------ ovos_plugin_manager/templates/postag.py | 2 +- ovos_plugin_manager/templates/stt.py | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ovos_plugin_manager/templates/language.py b/ovos_plugin_manager/templates/language.py index 7475687b..7279c144 100644 --- a/ovos_plugin_manager/templates/language.py +++ b/ovos_plugin_manager/templates/language.py @@ -2,6 +2,7 @@ from ovos_config.config import Configuration from ovos_utils import classproperty +from ovos_utils.lang import standardize_lang_tag from ovos_utils.process_utils import RuntimeRequirements from typing import Optional, Dict, Union, List, Set @@ -16,8 +17,10 @@ def __init__(self, config: Optional[Dict[str, Union[str, int]]] = None): Can contain "lang" for default language, "hint_lang" for a hint language, and "boost" for language boost score. """ self.config = config or {} - self.default_language = self.config.get("lang", "en-US") - self.hint_language = self.config.get("hint_lang") or self.config.get('user') or self.default_language + self.default_language = standardize_lang_tag(self.config.get("lang", "en-US")) + self.hint_language = standardize_lang_tag(self.config.get("hint_lang") or + self.config.get('user') or + self.default_language) self.boost = self.config.get("boost") @classproperty @@ -85,11 +88,10 @@ def __init__(self, config: Optional[Dict[str, str]] = None): """ self.config = config or {} # translate from, unless specified/detected otherwise - self.default_language = self.config.get("lang") or "en-US" + self.default_language = standardize_lang_tag(self.config.get("lang") or "en-US") # translate to - self.internal_language = (Configuration().get('language') or - dict()).get("internal") or \ - self.default_language + self.internal_language = standardize_lang_tag(Configuration().get('language', {}).get("internal") or \ + self.default_language) @classproperty def runtime_requirements(self) -> RuntimeRequirements: diff --git a/ovos_plugin_manager/templates/postag.py b/ovos_plugin_manager/templates/postag.py index 34a67ea2..5e37cc54 100644 --- a/ovos_plugin_manager/templates/postag.py +++ b/ovos_plugin_manager/templates/postag.py @@ -52,7 +52,7 @@ def lang(self): return standardize_lang_tag(lang or "en-US") def postag(self, spans, lang=None): - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) # this should be implemented by plugins! if lang.startswith("pt"): return _dummy_postag_pt(spans) diff --git a/ovos_plugin_manager/templates/stt.py b/ovos_plugin_manager/templates/stt.py index 825fbea0..af7afedc 100644 --- a/ovos_plugin_manager/templates/stt.py +++ b/ovos_plugin_manager/templates/stt.py @@ -85,7 +85,7 @@ def lang(self): @lang.setter def lang(self, val): # backwards compat - self._lang = val + self._lang = standardize_lang_tag(val) @property @deprecated("self.keys has been deprecated! " @@ -177,7 +177,7 @@ class StreamThread(Thread, metaclass=ABCMeta): def __init__(self, queue, language): super().__init__() - self.language = language + self.language = standardize_lang_tag(language) self.queue = queue self.text = None @@ -216,7 +216,7 @@ def stream_start(self, language=None): self.stream_stop() self.queue = Queue() self.stream = self.create_streaming_thread() - self.stream.language = language or self.lang + self.stream.language = standardize_lang_tag(language or self.lang) self.transcript_ready.clear() self.stream.start() From 57d6e1f0fd2510f7785212dbed86ec6da244464b Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Sat, 12 Oct 2024 21:35:07 +0100 Subject: [PATCH 7/8] Update ovos_plugin_manager/templates/tts.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- ovos_plugin_manager/templates/tts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py index 20a75b19..e6e47c75 100644 --- a/ovos_plugin_manager/templates/tts.py +++ b/ovos_plugin_manager/templates/tts.py @@ -934,7 +934,7 @@ def lang(self): if message: sess = SessionManager.get(message) return sess.lang - return self.config.get("lang") or 'en-US' + return standardize_lang_tag(self.config.get("lang") or 'en-US') @lang.setter @deprecated("language is defined per request in get_tts, self.lang is not used", From 267ff60191ace53f9258fce5bfcf10f4dc6c771f Mon Sep 17 00:00:00 2001 From: miro Date: Sat, 12 Oct 2024 21:39:22 +0100 Subject: [PATCH 8/8] fix syntax error --- ovos_plugin_manager/utils/config.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ovos_plugin_manager/utils/config.py b/ovos_plugin_manager/utils/config.py index e303ff4c..5c53182b 100644 --- a/ovos_plugin_manager/utils/config.py +++ b/ovos_plugin_manager/utils/config.py @@ -141,10 +141,8 @@ def get_plugin_supported_languages(plug_type: PluginTypes) -> dict: """ lang_configs = dict() for plug in find_plugins(plug_type): - configs = \ - load_plugin_configs(plug, - PluginConfigTypes(f"{plug_type.value}.config")) - for lang, config in configs: + configs = load_plugin_configs(plug, PluginConfigTypes(f"{plug_type.value}.config")) or {} + for lang, config in configs.items(): lang = standardize_lang_tag(lang) lang_configs.setdefault(lang, list()) lang_configs[lang].append(plug)