diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..26e59a20 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/requirements" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/publish_alpha.yml b/.github/workflows/publish_alpha.yml index 5091a6f3..83ed44cc 100644 --- a/.github/workflows/publish_alpha.yml +++ b/.github/workflows/publish_alpha.yml @@ -34,20 +34,6 @@ jobs: runs-on: ubuntu-latest needs: update_version steps: - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token - with: - tag_name: V${{ needs.update_version.outputs.version }} - release_name: Release ${{ needs.update_version.outputs.version }} - body: | - Changes in this Release - ${{ needs.update_version.outputs.changelog }} - draft: false - prerelease: true - commitish: dev - name: Checkout Repository uses: actions/checkout@v2 with: diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 62da0901..69a78d3a 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -33,7 +33,7 @@ jobs: strategy: max-parallel: 2 matrix: - python-version: [ 3.7, 3.8, 3.9, "3.10" ] + python-version: [ 3.8, 3.9, "3.10" ] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -46,12 +46,12 @@ jobs: sudo apt-get update sudo apt install python3-dev python -m pip install build wheel - - name: Install core repo - run: | - pip install . - name: Install test dependencies run: | pip install -r requirements/test.txt + - name: Install core repo + run: | + pip install . - name: Run unittests run: | pytest --cov=ovos_plugin_manager --cov-report xml test/unittests diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d25b1f6..e8b0ada1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,48 +1,143 @@ # Changelog -## [0.0.25](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/0.0.25) (2023-12-29) +## [0.1.1a3](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/0.1.1a3) (2024-09-05) -[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.25a4...0.0.25) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V...0.1.1a3) -**Merged pull requests:** +**Implemented enhancements:** -- Update requirements.txt [\#201](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/201) ([JarbasAl](https://github.com/JarbasAl)) +- feat/units\_kwarg\_solvers [\#247](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/247) ([JarbasAl](https://github.com/JarbasAl)) +- feat/metadata [\#246](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/246) ([JarbasAl](https://github.com/JarbasAl)) +- feat/embeddings\_metadata\_support [\#245](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/245) ([JarbasAl](https://github.com/JarbasAl)) +- feat/add\_rerank\_method [\#243](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/243) ([JarbasAl](https://github.com/JarbasAl)) +- feat/pipeline\_intent\_match [\#242](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/242) ([JarbasAl](https://github.com/JarbasAl)) +- feat/pipeline\_plugin\_placeholder [\#241](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/241) ([JarbasAl](https://github.com/JarbasAl)) +- feat/embeddings plugins [\#240](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/240) ([JarbasAl](https://github.com/JarbasAl)) +- feat/alternative\_transcripts [\#236](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/236) ([JarbasAl](https://github.com/JarbasAl)) +- feat/lang\_detection\_plugin [\#220](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/220) ([JarbasAl](https://github.com/JarbasAl)) +- feat/restore phonetic spellings [\#195](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/195) ([JarbasAl](https://github.com/JarbasAl)) + +**Fixed bugs:** + +- abstractmethod decorator breaks OCP 0.0.6 compat. [\#229](https://github.com/OpenVoiceOS/ovos-plugin-manager/issues/229) +- fix/restore\_dead\_code [\#256](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/256) ([JarbasAl](https://github.com/JarbasAl)) +- fix/context\_kwarg\_backwards\_compat [\#248](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/248) ([JarbasAl](https://github.com/JarbasAl)) +- refactor/solver\_decorators [\#244](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/244) ([JarbasAl](https://github.com/JarbasAl)) +- fix/missing\_property [\#239](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/239) ([JarbasAl](https://github.com/JarbasAl)) +- ensure cache dir exists [\#232](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/232) ([JarbasAl](https://github.com/JarbasAl)) +- fix/playback\_time\_not\_abstract [\#230](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/230) ([JarbasAl](https://github.com/JarbasAl)) +- fix/legacy\_playlist\_queue [\#227](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/227) ([JarbasAl](https://github.com/JarbasAl)) +- hotfix/voice\_kwarg [\#223](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/223) ([JarbasAl](https://github.com/JarbasAl)) +- hotfix/clean\_shutdown [\#222](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/222) ([JarbasAl](https://github.com/JarbasAl)) +- fix/tts\_reload [\#219](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/219) ([JarbasAl](https://github.com/JarbasAl)) -## [V0.0.25a4](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.25a4) (2023-12-29) +**Closed issues:** -[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.25a3...V0.0.25a4) +- Error about Azure TTS plugin [\#193](https://github.com/OpenVoiceOS/ovos-plugin-manager/issues/193) **Merged pull requests:** -- update imports [\#198](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/198) ([JarbasAl](https://github.com/JarbasAl)) +- license compliance [\#255](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/255) ([JarbasAl](https://github.com/JarbasAl)) +- planned\_deprecations [\#254](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/254) ([JarbasAl](https://github.com/JarbasAl)) +- refactor/improve\_readwritestream [\#234](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/234) ([JarbasAl](https://github.com/JarbasAl)) +- refactor/deprecation\_warnings [\#233](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/233) ([JarbasAl](https://github.com/JarbasAl)) +- fix/py3.12 [\#231](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/231) ([JarbasAl](https://github.com/JarbasAl)) +- refactor/legacy\_audio [\#226](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/226) ([JarbasAl](https://github.com/JarbasAl)) +- Fix file path handling in setup.py [\#225](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/225) ([NeonDaniel](https://github.com/NeonDaniel)) +- Update for ovos-utils 0.0.X compat. [\#224](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/224) ([NeonDaniel](https://github.com/NeonDaniel)) -## [V0.0.25a3](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.25a3) (2023-12-29) +## [V](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V) (2024-03-10) -[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.25a2...V0.0.25a3) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a10...V) **Merged pull requests:** -- Update requirements.txt [\#197](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/197) ([JarbasAl](https://github.com/JarbasAl)) +- chore\(docs\): add a long description to PyPi [\#215](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/215) ([mikejgray](https://github.com/mikejgray)) -## [V0.0.25a2](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.25a2) (2023-12-28) +## [V0.0.26a10](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a10) (2024-02-17) -[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.25a1...V0.0.25a2) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a9...V0.0.26a10) -**Closed issues:** +**Fixed bugs:** -- module 'inspect' has no attribute 'formatargspec' [\#189](https://github.com/OpenVoiceOS/ovos-plugin-manager/issues/189) +- fix: AdminPlugin [\#214](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/214) ([builderjer](https://github.com/builderjer)) -**Merged pull requests:** +## [V0.0.26a9](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a9) (2024-02-13) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a8...V0.0.26a9) + +**Implemented enhancements:** + +- feat/streaming\_solver [\#213](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/213) ([JarbasAl](https://github.com/JarbasAl)) + +**Fixed bugs:** + +- Messages from ovos\_utils.log.LOG are swallowed and not sent to STDOUT [\#173](https://github.com/OpenVoiceOS/ovos-plugin-manager/issues/173) + +## [V0.0.26a8](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a8) (2024-02-11) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a7...V0.0.26a8) + +**Fixed bugs:** + +- fix/load\_persistent\_cache\(\) [\#209](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/209) ([JarbasAl](https://github.com/JarbasAl)) + +## [V0.0.26a7](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a7) (2024-02-10) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a6...V0.0.26a7) + +**Implemented enhancements:** + +- feat/streaming\_tts [\#212](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/212) ([JarbasAl](https://github.com/JarbasAl)) + +## [V0.0.26a6](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a6) (2024-02-07) -- refactor/move\_from\_utils [\#196](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/196) ([JarbasAl](https://github.com/JarbasAl)) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a5...V0.0.26a6) -## [V0.0.25a1](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.25a1) (2023-12-09) +**Implemented enhancements:** + +- feat/track\_meta [\#211](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/211) ([NeonJarbas](https://github.com/NeonJarbas)) + +## [V0.0.26a5](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a5) (2024-01-08) -[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.24...V0.0.25a1) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a4...V0.0.26a5) **Implemented enhancements:** -- feat/disable\_cache [\#194](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/194) ([JarbasAl](https://github.com/JarbasAl)) +- feat/OCP\_backends [\#207](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/207) ([NeonJarbas](https://github.com/NeonJarbas)) + +## [V0.0.26a4](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a4) (2024-01-03) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a3...V0.0.26a4) + +**Implemented enhancements:** + +- feat/sentence\_tokenize\_before\_TTS [\#206](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/206) ([JarbasAl](https://github.com/JarbasAl)) + +## [V0.0.26a3](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a3) (2023-12-30) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a2...V0.0.26a3) + +**Fixed bugs:** + +- fix phal plugins in ovos-utils \>= 0.1.0a2 [\#205](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/205) ([JarbasAl](https://github.com/JarbasAl)) + +## [V0.0.26a2](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a2) (2023-12-30) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.26a1...V0.0.26a2) + +**Fixed bugs:** + +- fix gui extensions in ovos-utils \>= 0.1.0a2 [\#204](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/204) ([JarbasAl](https://github.com/JarbasAl)) + +## [V0.0.26a1](https://github.com/OpenVoiceOS/ovos-plugin-manager/tree/V0.0.26a1) (2023-12-29) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/compare/V0.0.25...V0.0.26a1) + +**Merged pull requests:** + +- packaging/update imports [\#203](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/203) ([JarbasAl](https://github.com/JarbasAl)) +- Update requirements.txt [\#201](https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/201) ([JarbasAl](https://github.com/JarbasAl)) diff --git a/README.md b/README.md index 625aa5f1..9456e061 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,7 @@ # OVOS plugin manager -OPM can be used to search, install, load and create plugins for the OpenVoiceOS ecosystem! +OPM can be used to load and create plugins for the OpenVoiceOS ecosystem! -![logo](https://raw.githubusercontent.com/OpenVoiceOS/ovos_assets/921b41891ed18c9e16d24d1894266200ee3bd104/Logo/Raw/opm-logo.svg) +![image](https://github.com/OpenVoiceOS/ovos-plugin-manager/assets/33701864/8c939267-42fc-4377-bcdb-f7df65e73252) -## Install - -```bash -pip install ovos-plugin-manager -``` - -## Usage - -see [./examples](examples) folder for basic usage - - -## Plugins - -OPM provides templates to create OVOS plugins - -#### List of plugins - -This list is non exhaustive - -##### Wake Words -- [ovos-ww-plugin-hotkeys](https://github.com/OpenVoiceOS/ovos_ww_plugin_hotkeys) -- [ovos-ww-plugin-pocketsphinx](https://github.com/OpenVoiceOS/ovos-wakeword-plugin-pocketsphinx) -- [ovos-ww-plugin-precise](https://github.com/OpenVoiceOS/ovos-wake-word-plugin-precise) -- [chatterbox-ww-plugin-dummy](https://github.com/HelloChatterbox/dummy_wakeword_plugin) - -##### TTS -- [ovos-tts-plugin-mimic](https://github.com/OpenVoiceOS/ovos-tts-plugin-mimic) -- [ovos-tts-plugin-mimic2](https://github.com/OpenVoiceOS/ovos-tts-plugin-mimic2) -- [ovos-tts-plugin-google](https://github.com/OpenVoiceOS/ovos-tts-plugin-google) -- [ovos-tts-plugin-responsivevoice](https://github.com/OpenVoiceOS/ovos-tts-plugin-responsivevoice) -- [ovos-tts-plugin-pico](https://github.com/OpenVoiceOS/ovos-tts-plugin-pico) -- [ovos-tts-plugin-espeakNG](https://github.com/OpenVoiceOS/ovos-tts-plugin-espeakNG) -- [chatterbox-polly-tts-plugin](https://github.com/HelloChatterbox/chatterbox-polly-tts-plugin) - -##### STT -- [ovos-stt-plugin-vosk](https://github.com/OpenVoiceOS/ovos-stt-plugin-vosk) - -##### Audio Service -- [ovos-guiplayer-plugin](https://github.com/OpenVoiceOS/ovos-guiplayer-plugin) -- [ovos-vlc-plugin](https://github.com/OpenVoiceOS/ovos-vlc-plugin) +Documentation can be found in the [ovos-technical-manual](https://openvoiceos.github.io/ovos-technical-manual/OPM) diff --git a/ovos_plugin_manager/__init__.py b/ovos_plugin_manager/__init__.py index 0d0fd980..64282511 100644 --- a/ovos_plugin_manager/__init__.py +++ b/ovos_plugin_manager/__init__.py @@ -1,19 +1,2 @@ from ovos_plugin_manager.utils import PluginTypes from ovos_plugin_manager.plugin_entry import OpenVoiceOSPlugin -from ovos_utils.log import LOG - - -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) diff --git a/ovos_plugin_manager/audio.py b/ovos_plugin_manager/audio.py index 3ca5d0d0..370952f5 100644 --- a/ovos_plugin_manager/audio.py +++ b/ovos_plugin_manager/audio.py @@ -2,14 +2,12 @@ from ovos_utils.log import LOG from ovos_bus_client.util import get_mycroft_bus from ovos_config import Configuration +from ovos_utils.log import log_deprecation -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) +# TODO - restore this log in next release with updated version string +#log_deprecation("ovos_plugin_manager.audio has been deprecated on ovos-audio, " +# "move to ovos_plugin_manager.media", "1.0.0") def find_audio_service_plugins() -> dict: diff --git a/ovos_plugin_manager/audio2ipa.py b/ovos_plugin_manager/audio2ipa.py index 39738a42..9c1dcbbd 100644 --- a/ovos_plugin_manager/audio2ipa.py +++ b/ovos_plugin_manager/audio2ipa.py @@ -4,22 +4,6 @@ from ovos_utils.log import LOG -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_audio2ipa_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/audio_transformers.py b/ovos_plugin_manager/audio_transformers.py index 3e9d6db0..f2e0f004 100644 --- a/ovos_plugin_manager/audio_transformers.py +++ b/ovos_plugin_manager/audio_transformers.py @@ -1,22 +1,5 @@ +from ovos_plugin_manager.templates.transformers import AudioTransformer, AudioLanguageDetector from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes -from ovos_plugin_manager.templates.transformers import AudioTransformer -from ovos_utils.log import LOG - - -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) def find_audio_transformer_plugins() -> dict: @@ -57,3 +40,13 @@ def get_audio_transformer_module_configs(module_name: str): """ from ovos_plugin_manager.utils.config import load_plugin_configs return load_plugin_configs(module_name, PluginConfigTypes.AUDIO_TRANSFORMER) + + +def find_audio_lang_detector_plugins() -> dict: + """ + Find all installed audio language detector plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return {k: p for k, p in find_plugins(PluginTypes.AUDIO_TRANSFORMER).items() + if issubclass(p, AudioLanguageDetector)} diff --git a/ovos_plugin_manager/coreference.py b/ovos_plugin_manager/coreference.py index 6d58d546..0f94274c 100644 --- a/ovos_plugin_manager/coreference.py +++ b/ovos_plugin_manager/coreference.py @@ -8,22 +8,6 @@ replace_coreferences -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_coref_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/embeddings.py b/ovos_plugin_manager/embeddings.py new file mode 100644 index 00000000..123c3b11 --- /dev/null +++ b/ovos_plugin_manager/embeddings.py @@ -0,0 +1,78 @@ +from ovos_plugin_manager.templates.embeddings import EmbeddingsDB, TextEmbeddingsStore, FaceEmbeddingsStore, VoiceEmbeddingsStore +from ovos_plugin_manager.utils import PluginTypes + + +def find_embeddings_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return find_plugins(PluginTypes.EMBEDDINGS) + + +def load_embeddings_plugin(module_name: str) -> type(EmbeddingsDB): + """ + Get an uninstantiated class for the requested module_name + @param module_name: Plugin entrypoint name to load + @return: Uninstantiated class + """ + from ovos_plugin_manager.utils import load_plugin + return load_plugin(module_name, PluginTypes.EMBEDDINGS) + + +def find_voice_embeddings_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return find_plugins(PluginTypes.VOICE_EMBEDDINGS) + + +def load_voice_embeddings_plugin(module_name: str) -> type(VoiceEmbeddingsStore): + """ + Get an uninstantiated class for the requested module_name + @param module_name: Plugin entrypoint name to load + @return: Uninstantiated class + """ + from ovos_plugin_manager.utils import load_plugin + return load_plugin(module_name, PluginTypes.VOICE_EMBEDDINGS) + + +def find_face_embeddings_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return find_plugins(PluginTypes.FACE_EMBEDDINGS) + + +def load_face_embeddings_plugin(module_name: str) -> type(FaceEmbeddingsStore): + """ + Get an uninstantiated class for the requested module_name + @param module_name: Plugin entrypoint name to load + @return: Uninstantiated class + """ + from ovos_plugin_manager.utils import load_plugin + return load_plugin(module_name, PluginTypes.FACE_EMBEDDINGS) + + +def find_text_embeddings_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return find_plugins(PluginTypes.TEXT_EMBEDDINGS) + + +def load_text_embeddings_plugin(module_name: str) -> type(TextEmbeddingsStore): + """ + Get an uninstantiated class for the requested module_name + @param module_name: Plugin entrypoint name to load + @return: Uninstantiated class + """ + from ovos_plugin_manager.utils import load_plugin + return load_plugin(module_name, PluginTypes.TEXT_EMBEDDINGS) diff --git a/ovos_plugin_manager/g2p.py b/ovos_plugin_manager/g2p.py index 5146ea5c..64497a15 100644 --- a/ovos_plugin_manager/g2p.py +++ b/ovos_plugin_manager/g2p.py @@ -3,23 +3,6 @@ from ovos_plugin_manager.utils import normalize_lang, PluginTypes, PluginConfigTypes from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, PhonemeAlphabet from ovos_utils.log import LOG -from ovos_config import Configuration - - -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) def find_g2p_plugins() -> dict: diff --git a/ovos_plugin_manager/gui.py b/ovos_plugin_manager/gui.py index 5bd1fc73..6759a9a9 100644 --- a/ovos_plugin_manager/gui.py +++ b/ovos_plugin_manager/gui.py @@ -5,22 +5,6 @@ from ovos_utils.log import LOG -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_gui_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/keywords.py b/ovos_plugin_manager/keywords.py index 9ad3152e..1de2d34b 100644 --- a/ovos_plugin_manager/keywords.py +++ b/ovos_plugin_manager/keywords.py @@ -4,22 +4,6 @@ from ovos_plugin_manager.templates.keywords import KeywordExtractor -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_keyword_extract_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/language.py b/ovos_plugin_manager/language.py index ed9f8fec..ebf2a25c 100644 --- a/ovos_plugin_manager/language.py +++ b/ovos_plugin_manager/language.py @@ -7,22 +7,6 @@ from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_tx_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/metadata_transformers.py b/ovos_plugin_manager/metadata_transformers.py index 4872404e..c70fbc8e 100644 --- a/ovos_plugin_manager/metadata_transformers.py +++ b/ovos_plugin_manager/metadata_transformers.py @@ -1,23 +1,6 @@ from ovos_plugin_manager.utils import normalize_lang, PluginTypes, \ PluginConfigTypes from ovos_plugin_manager.templates.transformers import MetadataTransformer -from ovos_utils.log import LOG - - -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) def find_metadata_transformer_plugins() -> dict: diff --git a/ovos_plugin_manager/microphone.py b/ovos_plugin_manager/microphone.py index 5a7de18a..2e8f0c84 100644 --- a/ovos_plugin_manager/microphone.py +++ b/ovos_plugin_manager/microphone.py @@ -3,22 +3,6 @@ from ovos_utils.log import LOG -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_microphone_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/ocp.py b/ovos_plugin_manager/ocp.py index 5b88976f..9dcf9c15 100644 --- a/ovos_plugin_manager/ocp.py +++ b/ovos_plugin_manager/ocp.py @@ -1,22 +1,15 @@ -from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes +from ovos_plugin_manager.utils import PluginTypes from ovos_plugin_manager.templates.ocp import OCPStreamExtractor from ovos_utils.log import LOG +from functools import lru_cache +from ovos_plugin_manager.utils import find_plugins -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) +try: + from ovos_plugin_manager.templates.media import AudioPlayerBackend, VideoPlayerBackend, WebPlayerBackend +except ImportError: + LOG.warning("Please install ovos-utils~=0.1 for `AudioPlayerBackend`, " + "`VideoPlayerBackend`, and `WebPlayerBackend` imports.") def find_ocp_plugins() -> dict: @@ -24,10 +17,33 @@ def find_ocp_plugins() -> dict: Find all installed plugins @return: dict plugin names to entrypoints """ - from ovos_plugin_manager.utils import find_plugins return find_plugins(PluginTypes.STREAM_EXTRACTOR) +def find_ocp_audio_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + return find_plugins(PluginTypes.AUDIO_PLAYER) + + +def find_ocp_video_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + return find_plugins(PluginTypes.VIDEO_PLAYER) + + +def find_ocp_web_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + return find_plugins(PluginTypes.WEB_PLAYER) + + class StreamHandler: def __init__(self): self.extractors = {} @@ -92,3 +108,14 @@ def extract_stream(self, uri, video=True): # no extractor available, return raw url return meta or {"uri": uri} + + +@lru_cache() # to avoid loading StreamHandler more than once +def load_stream_extractors(): + return StreamHandler() + + +def available_extractors(): + xtract = load_stream_extractors() + return ["/", "http:", "https:", "file:"] + \ + [f"{sei}//" for sei in xtract.supported_seis] diff --git a/ovos_plugin_manager/phal.py b/ovos_plugin_manager/phal.py index 935f9bb9..4a8228f4 100644 --- a/ovos_plugin_manager/phal.py +++ b/ovos_plugin_manager/phal.py @@ -4,22 +4,6 @@ from ovos_utils.log import LOG -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_phal_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/pipeline.py b/ovos_plugin_manager/pipeline.py new file mode 100644 index 00000000..56bcc01c --- /dev/null +++ b/ovos_plugin_manager/pipeline.py @@ -0,0 +1,21 @@ +from ovos_plugin_manager.templates.pipeline import PipelinePlugin +from ovos_plugin_manager.utils import PluginTypes + + +def find_pipeline_plugins() -> dict: + """ + Find all installed plugins + @return: dict plugin names to entrypoints + """ + from ovos_plugin_manager.utils import find_plugins + return find_plugins(PluginTypes.PIPELINE) + + +def load_pipeline_plugin(module_name: str) -> type(PipelinePlugin): + """ + Get an uninstantiated class for the requested module_name + @param module_name: Plugin entrypoint name to load + @return: Uninstantiated class + """ + from ovos_plugin_manager.utils import load_plugin + return load_plugin(module_name, PluginTypes.PIPELINE) diff --git a/ovos_plugin_manager/postag.py b/ovos_plugin_manager/postag.py index 726b7207..4a05e09e 100644 --- a/ovos_plugin_manager/postag.py +++ b/ovos_plugin_manager/postag.py @@ -5,22 +5,6 @@ from ovos_plugin_manager.templates.postag import PosTagger -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_postag_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/segmentation.py b/ovos_plugin_manager/segmentation.py index 1b56456b..b3807574 100644 --- a/ovos_plugin_manager/segmentation.py +++ b/ovos_plugin_manager/segmentation.py @@ -5,21 +5,6 @@ from ovos_plugin_manager.templates.segmentation import Segmenter -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - def find_segmentation_plugins() -> dict: """ diff --git a/ovos_plugin_manager/skills.py b/ovos_plugin_manager/skills.py index a38093dc..b48ee3f6 100644 --- a/ovos_plugin_manager/skills.py +++ b/ovos_plugin_manager/skills.py @@ -114,7 +114,7 @@ def get_default_skills_directory(conf: Optional[dict] = None) -> str: if path_override: log_deprecation("'directory_override' is deprecated!" "add the new path to 'extra_directories' instead", - "0.1.0") + "1.0.0") skills_folder = expanduser(path_override) elif conf["skills"].get("extra_directories") and \ len(conf["skills"].get("extra_directories")) > 0: diff --git a/ovos_plugin_manager/solvers.py b/ovos_plugin_manager/solvers.py index 23c0a7a7..352d2319 100644 --- a/ovos_plugin_manager/solvers.py +++ b/ovos_plugin_manager/solvers.py @@ -5,21 +5,6 @@ from ovos_utils.log import LOG -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - def find_question_solver_plugins() -> dict: """ diff --git a/ovos_plugin_manager/stt.py b/ovos_plugin_manager/stt.py index dd92ff0c..4cd8592c 100644 --- a/ovos_plugin_manager/stt.py +++ b/ovos_plugin_manager/stt.py @@ -7,22 +7,6 @@ from ovos_plugin_manager.templates.stt import STT, StreamingSTT, StreamThread -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_stt_plugins() -> dict: """ Find all installed plugins @@ -103,25 +87,6 @@ def get_stt_config(config: dict = None, module: str = None) -> dict: class OVOSSTTFactory: """ replicates the base mycroft class, but uses only OPM enabled plugins""" - MAPPINGS = { - "mycroft": "ovos-stt-plugin-selene", - "dummy": "ovos-stt-plugin-dummy", - "google": "ovos-stt-plugin-chromium", - # "google_cloud": GoogleCloudSTT, - # "google_cloud_streaming": GoogleCloudStreamingSTT, - # "wit": WITSTT, - # "ibm": IBMSTT, - # "kaldi": KaldiSTT, - # "bing": BingSTT, - # "govivace": GoVivaceSTT, - # "houndify": HoundifySTT, - # "deepspeech_server": DeepSpeechServerSTT, - # "deepspeech_stream_server": DeepSpeechStreamServerSTT, - # "mycroft_deepspeech": MycroftDeepSpeechSTT, - # "yandex": YandexSTT - "vosk": "ovos-stt-plugin-vosk", - "vosk_streaming": "ovos-stt-plugin-vosk-streaming" - } @staticmethod def get_class(config=None): @@ -136,8 +101,6 @@ def get_class(config=None): """ config = get_stt_config(config) stt_module = config["module"] - if stt_module in OVOSSTTFactory.MAPPINGS: - stt_module = OVOSSTTFactory.MAPPINGS[stt_module] return load_stt_plugin(stt_module) @staticmethod @@ -152,11 +115,6 @@ def create(config=None): } """ stt_config = get_stt_config(config) - plugin = stt_config.get("module", "dummy") - if plugin in OVOSSTTFactory.MAPPINGS: - log_deprecation("Module mappings will be deprecated", "0.1.0") - plugin = OVOSSTTFactory.MAPPINGS[plugin] - stt_config = get_stt_config(config, plugin) try: clazz = OVOSSTTFactory.get_class(stt_config) return clazz(stt_config) diff --git a/ovos_plugin_manager/templates/audio.py b/ovos_plugin_manager/templates/audio.py index 3bd0d2b1..b5dca2be 100644 --- a/ovos_plugin_manager/templates/audio.py +++ b/ovos_plugin_manager/templates/audio.py @@ -4,11 +4,81 @@ OpenVoiceOS's media playback options. """ from abc import ABCMeta, abstractmethod +from typing import List +from ovos_bus_client import Message +from ovos_bus_client.message import dig_for_message from ovos_utils import classproperty -from ovos_utils.messagebus import FakeBus +from ovos_utils.fakebus import FakeBus +from ovos_utils.log import LOG from ovos_utils.process_utils import RuntimeRequirements +try: + from ovos_utils.ocp import PlaybackType, TrackState, PlayerState, MediaState +except ImportError: + LOG.warning("Please update to ovos-utils~=0.1.") + from enum import IntEnum + + + class MediaState(IntEnum): + # https://doc.qt.io/qt-5/qmediaplayer.html#MediaStatus-enum + # The status of the media cannot be determined. + UNKNOWN = 0 + # There is no current media. PlayerState == STOPPED + NO_MEDIA = 1 + # The current media is being loaded. The player may be in any state. + LOADING_MEDIA = 2 + # The current media has been loaded. PlayerState== STOPPED + LOADED_MEDIA = 3 + # Playback of the current media has stalled due to + # insufficient buffering or some other temporary interruption. + # PlayerState != STOPPED + STALLED_MEDIA = 4 + # The player is buffering data but has enough data buffered + # for playback to continue for the immediate future. + # PlayerState != STOPPED + BUFFERING_MEDIA = 5 + # The player has fully buffered the current media. PlayerState != STOPPED + BUFFERED_MEDIA = 6 + # Playback has reached the end of the current media. PlayerState == STOPPED + END_OF_MEDIA = 7 + # The current media cannot be played. PlayerState == STOPPED + INVALID_MEDIA = 8 + + + class PlayerState(IntEnum): + # https://doc.qt.io/qt-5/qmediaplayer.html#State-enum + STOPPED = 0 + PLAYING = 1 + PAUSED = 2 + + + class PlaybackType(IntEnum): + SKILL = 0 # skills handle playback whatever way they see fit, + # eg spotify / mycroft common play + VIDEO = 1 # Video results + AUDIO = 2 # Results should be played audio only + AUDIO_SERVICE = 3 ## DEPRECATED - used in ovos 0.0.7 + MPRIS = 4 # External MPRIS compliant player + WEBVIEW = 5 # webview, render a url instead of media player + UNDEFINED = 100 # data not available, hopefully status will be updated soon.. + + + class TrackState(IntEnum): + DISAMBIGUATION = 1 # media result, not queued for playback + PLAYING_SKILL = 20 # Skill is handling playback internally + PLAYING_AUDIOSERVICE = 21 ## DEPRECATED - used in ovos 0.0.7 + PLAYING_VIDEO = 22 # Skill forwarded playback to video service + PLAYING_AUDIO = 23 # Skill forwarded playback to audio service + PLAYING_MPRIS = 24 # External media player is handling playback + PLAYING_WEBVIEW = 25 # Media playback handled in browser (eg. javascript) + + QUEUED_SKILL = 30 # Waiting playback to be handled inside skill + QUEUED_AUDIOSERVICE = 31 ## DEPRECATED - used in ovos 0.0.7 + QUEUED_VIDEO = 32 # Waiting playback in video service + QUEUED_AUDIO = 33 # Waiting playback in audio service + QUEUED_WEBVIEW = 34 # Waiting playback in browser service + class AudioBackend(metaclass=ABCMeta): """Base class for all audio backend implementations. @@ -18,7 +88,11 @@ class AudioBackend(metaclass=ABCMeta): bus (MessageBusClient): OpenVoiceOS messagebus emitter """ - def __init__(self, config=None, bus=None): + def __init__(self, config=None, bus=None, name=None): + self.name = name or self.__class__.__name__ + self._now_playing = None # single uri + self._tracks = [] # list of dicts for OCP entries + self._idx = 0 self._track_start_callback = None self.supports_mime_hints = False self.config = config or {} @@ -60,29 +134,18 @@ def runtime_requirements(self): no_network_fallback=True) @property - def playback_time(self): + def playback_time(self) -> int: + """ in milliseconds """ return 0 @abstractmethod - def supported_uris(self): + def supported_uris(self) -> List[str]: """List of supported uri types. Returns: list: Supported uri's """ - @abstractmethod - def clear_list(self): - """Clear playlist.""" - - @abstractmethod - def add_list(self, tracks): - """Add tracks to backend's playlist. - - Arguments: - tracks (list): list of tracks. - """ - @abstractmethod def play(self, repeat=False): """Start playback. @@ -95,22 +158,42 @@ def play(self, repeat=False): """ @abstractmethod - def stop(self): - """Stop playback. + def lower_volume(self): + """Lower volume. - Stops the current playback. + This method is used to implement audio ducking. It will be called when + OpenVoiceOS is listening or speaking to make sure the media playing isn't + interfering. + """ - Returns: - bool: True if playback was stopped, otherwise False + @abstractmethod + def restore_volume(self): + """Restore normal volume. + + Called when to restore the playback volume to previous level after + OpenVoiceOS has lowered it using lower_volume(). """ - def set_track_start_callback(self, callback_func): - """Register callback on track start. + @abstractmethod + def get_track_length(self) -> int: + """ + getting the duration of the audio in miliseconds + """ - This method should be called as each track in a playlist is started. + @abstractmethod + def get_track_position(self) -> int: + """ + get current position in miliseconds """ - self._track_start_callback = callback_func + @abstractmethod + def set_track_position(self, milliseconds): + """Go to X position. + Arguments: + milliseconds (int): position to go to in milliseconds + """ + + @abstractmethod def pause(self): """Pause playback. @@ -118,52 +201,70 @@ def pause(self): occured. """ + @abstractmethod def resume(self): """Resume paused playback. Resumes playback after being paused. """ - def next(self): - """Skip to next track in playlist.""" + @abstractmethod + def stop(self): + """Stop playback. - def previous(self): - """Skip to previous track in playlist.""" + Stops the current playback. - def lower_volume(self): - """Lower volume. + Returns: + bool: True if playback was stopped, otherwise False + """ - This method is used to implement audio ducking. It will be called when - OpenVoiceOS is listening or speaking to make sure the media playing isn't - interfering. + ##################### + # internals and default implementations + def track_info(self) -> dict: + """Request information of current playing track. + Returns: + Dict with track info. """ + return self._uri2meta(self._now_playing) - def restore_volume(self): - """Restore normal volume. + def clear_list(self): + """Clear playlist.""" + self._tracks = [] + self._idx = 0 - Called when to restore the playback volume to previous level after - OpenVoiceOS has lowered it using lower_volume(). - """ + def add_list(self, tracks): + """Add tracks to backend's playlist. - def get_track_length(self): - """ - getting the duration of the audio in milliseconds - NOTE: not yet supported by mycroft-core + Arguments: + tracks (list): list of tracks. """ + tracks = tracks or [] + if isinstance(tracks, str): + tracks = [tracks] + elif not isinstance(tracks, list): + raise ValueError + if tracks and not self._tracks: + self.load_track(tracks[0]) + self._idx = 0 + self._tracks += tracks - def get_track_position(self): - """ - get current position in milliseconds - NOTE: not yet supported by mycroft-core - """ + def next(self): + """Skip to next track in playlist.""" + self._idx += 1 + if self._idx < len(self._tracks): + self.load_track(self._tracks[self._idx]) + self.play() + else: + LOG.error("no more tracks!") - def set_track_position(self, milliseconds): - """ - go to position in milliseconds - NOTE: not yet supported by mycroft-core - Args: - milliseconds (int): number of milliseconds of final position - """ + def previous(self): + """Skip to previous track in playlist.""" + self._idx = max(self._idx - 1, 0) + if self._idx < len(self._tracks): + self.load_track(self._tracks[self._idx]) + self.play() + else: + LOG.error("already in first track!") def seek_forward(self, seconds=1): """Skip X seconds. @@ -171,6 +272,9 @@ def seek_forward(self, seconds=1): Arguments: seconds (int): number of seconds to seek, if negative rewind """ + miliseconds = seconds * 1000 + new_pos = self.get_track_position() + miliseconds + self.set_track_position(new_pos) def seek_backward(self, seconds=1): """Rewind X seconds. @@ -178,17 +282,16 @@ def seek_backward(self, seconds=1): Arguments: seconds (int): number of seconds to seek, if negative jump forward. """ + miliseconds = seconds * 1000 + new_pos = self.get_track_position() - miliseconds + self.set_track_position(new_pos) - def track_info(self): - """Get info about current playing track. + def set_track_start_callback(self, callback_func): + """Register callback on track start. - Returns: - dict: Track info containing atleast the keys artist and album. + This method should be called as each track in a playlist is started. """ - ret = {} - ret['artist'] = '' - ret['album'] = '' - return ret + self._track_start_callback = callback_func def shutdown(self): """Perform clean shutdown. @@ -197,6 +300,94 @@ def shutdown(self): """ self.stop() + def _format_msg(self, msg_type, msg_data=None): + # this method ensures all skills are .forward from the utterance + # that triggered the skill, this ensures proper routing and metadata + msg_data = msg_data or {} + msg = dig_for_message() + if msg: + msg = msg.forward(msg_type, msg_data) + else: + msg = Message(msg_type, msg_data) + # at this stage source == skills, lets indicate audio service took over + sauce = msg.context.get("source") + if sauce == "skills": + msg.context["source"] = "audio_service" + return msg + + ############################ + # OCP extensions - new methods to improve compat with OCP + @staticmethod + def _uri2meta(uri): + if isinstance(uri, list): + uri = uri[0] + try: + from ovos_ocp_files_plugin.plugin import OCPFilesMetadataExtractor + return OCPFilesMetadataExtractor.extract_metadata(uri) + except: + meta = {"uri": uri, + "skill_id": "mycroft.audio_interface", + "playback": PlaybackType.AUDIO, # TODO mime type check + "status": TrackState.QUEUED_AUDIO, + } + return meta + + def load_track(self, uri): + """ This method is only used by ovos-core + In ovos audio backends are single-track, playlists are handled by OCP + """ + self._now_playing = uri + LOG.debug(f"queuing for {self.__class__.__name__} playback: {uri}") + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.LOADING_MEDIA})) + self.bus.emit(Message("ovos.common_play.track.state", { + "state": TrackState.QUEUED_AUDIOSERVICE + })) + + def ocp_sync_playback(self, playback_time): + self.bus.emit(Message("ovos.common_play.playback_time", + {"position": playback_time, + "length": self.get_track_length()})) + + def ocp_start(self): + """Emit OCP status events for play""" + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PLAYING})) + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.LOADED_MEDIA})) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_AUDIOSERVICE})) + + def ocp_error(self): + """Emit OCP status events for playback error""" + if self._now_playing: + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.INVALID_MEDIA})) + self._now_playing = None + + def ocp_stop(self): + """Emit OCP status events for stop""" + if self._now_playing: + self._now_playing = None + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.STOPPED})) + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.END_OF_MEDIA})) + + def ocp_pause(self): + """Emit OCP status events for pause""" + if self._now_playing: + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PAUSED})) + + def ocp_resume(self): + """Emit OCP status events for resume""" + if self._now_playing: + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PLAYING})) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_AUDIOSERVICE})) + class RemoteAudioBackend(AudioBackend): """Base class for remote audio backends. diff --git a/ovos_plugin_manager/templates/embeddings.py b/ovos_plugin_manager/templates/embeddings.py new file mode 100644 index 00000000..943772b9 --- /dev/null +++ b/ovos_plugin_manager/templates/embeddings.py @@ -0,0 +1,602 @@ +import abc +from typing import List, Optional, Tuple, Dict, Union, Iterable + +# Typing helpers for readability +try: + import numpy as np + EmbeddingsArray = np.ndarray +except ImportError: + EmbeddingsArray = Iterable[Union[int, float]] +EmbeddingsTuple = Union[Tuple[str, float], Tuple[str, float, Dict]] + + +class EmbeddingsDB: + """Base class for an embeddings database that supports storage, retrieval, and querying of embeddings.""" + + @abc.abstractmethod + def add_embeddings(self, key: str, embedding: EmbeddingsArray, + metadata: Optional[Dict[str, any]] = None) -> EmbeddingsArray: + """Store 'embedding' under 'key' with associated metadata. + + Args: + key (str): The unique key for the embedding. + embedding (np.ndarray): The embedding vector to store. + metadata (Optional[Dict[str, any]]): Optional metadata associated with the embedding. + + Returns: + np.ndarray: The stored embedding. + """ + raise NotImplementedError + + @abc.abstractmethod + def get_embeddings(self, key: str) -> EmbeddingsArray: + """Retrieve embeddings stored under 'key'. + + Args: + key (str): The unique key for the embedding. + + Returns: + np.ndarray: The retrieved embedding. + """ + raise NotImplementedError + + @abc.abstractmethod + def delete_embeddings(self, key: str) -> EmbeddingsArray: + """Delete embeddings stored under 'key'. + + Args: + key (str): The unique key for the embedding. + + Returns: + np.ndarray: The deleted embedding. + """ + raise NotImplementedError + + @abc.abstractmethod + def query(self, embeddings: EmbeddingsArray, top_k: int = 5, + return_metadata: bool = False) -> List[EmbeddingsTuple]: + """Return the top_k embeddings closest to the given 'embeddings'. + + Args: + embeddings (np.ndarray): The embedding vector to query. + top_k (int, optional): The number of top results to return. Defaults to 5. + return_metadata (bool, optional): Whether to include metadata in the results. Defaults to False. + + Returns: + List[EmbeddingsTuple]: List of tuples containing the key and distance, and optionally metadata. + """ + raise NotImplementedError + + def distance(self, embeddings_a: EmbeddingsArray, embeddings_b: EmbeddingsArray, metric: str = "cosine", + alpha: float = 0.5, # for alpha_divergence and tversky metrics + beta: float = 0.5, # for tversky metric + p: float = 3, # for minkowski and weighted_minkowski metrics + euclidean_weights: Optional[EmbeddingsArray] = None, # required for weighted_euclidean and weighted_minkowski metrics + covariance_matrix: Optional[EmbeddingsArray] = None # required for mahalanobis distance with user-defined covariance + ) -> float: + """ + Calculate the distance between two embeddings vectors using the specified distance metric. + + Args: + embeddings_a (np.ndarray): The first embedding vector. + embeddings_b (np.ndarray): The second embedding vector. + metric (str, optional): The distance metric to use. Defaults to "cosine". + Supported metrics include: + - "cosine": Cosine distance, 1 - cosine similarity. Useful for text similarity and high-dimensional data. + - "euclidean": Euclidean distance, L2 norm of the difference. Commonly used in clustering and geometric distance. + - "manhattan": Manhattan distance, L1 norm of the difference. Suitable for grid-based maps and robotics. + - "chebyshev": Chebyshev distance, maximum absolute difference. Used for chessboard distance and pathfinding. + - "minkowski": Minkowski distance, generalization of Euclidean and Manhattan distances. Parameterized by p, flexible use case. + - "weighted_minkowski": Weighted Minkowski distance, a generalization of Minkowski with weights. Parameterized by `p`, uses `euclidean_weights`. + - "hamming": Hamming distance, proportion of differing elements. Ideal for error detection and binary data. + - "jaccard": Jaccard distance, 1 - Jaccard similarity (intersection over union). Used for set similarity and binary attributes. + - "canberra": Canberra distance, weighted version of Manhattan distance. Sensitive to small changes, used in environmental data. + - "braycurtis": Bray-Curtis distance, dissimilarity between non-negative vectors. Common in ecology and species abundance studies. + - "mahalanobis": Mahalanobis distance, considering correlations (requires covariance matrix). Useful for multivariate outlier detection. + - "pearson_correlation": Pearson correlation distance, 1 - Pearson correlation coefficient. Used in time series analysis and signal processing. + - "spearman_rank": Spearman rank correlation distance, 1 - Spearman rank correlation coefficient. Measures rank correlation for non-linear monotonic relationships. + - "wasserstein": Earth Mover's Distance (Wasserstein distance). Compares probability distributions or histograms. + - "cosine_squared": Cosine squared distance, 1 - cosine similarity squared. For squared similarity in high-dimensional data. + - "kl_divergence": Kullback-Leibler divergence, asymmetric measure of difference between distributions. Applied in information theory and probability distributions. + - "bhattacharyya": Bhattacharyya distance, measure of overlap between statistical samples. Useful in classification and image processing. + - "hellinger": Hellinger distance, measure of similarity between two probability distributions. Applied in statistical inference. + - "ruzicka": Ruzicka distance, similarity measure for non-negative vectors. Used in ecology and species abundance. + - "kulczynski": Kulczynski distance, used in ecology to compare similarity. Suitable for ecological studies and species distribution. + - "sorensen": Sørensen distance, another name for Dice distance. Applied in binary data comparison and text similarity. + - "chi_squared": Chi-squared distance, used for comparing categorical data distributions. Suitable for categorical data analysis and distribution comparison. + - "jensen_shannon": Jensen-Shannon divergence, symmetrized and smoothed version of KL divergence. Used in information theory and probability distributions. + - "squared_euclidean": Squared Euclidean distance, square of the Euclidean distance. Useful for clustering algorithms and geometric distance. + - "weighted_euclidean": Weighted Euclidean distance, L2 norm with weights. Applied when features have different scales or importance. + - "log_cosh": Log-Cosh distance, log of the hyperbolic cosine of the difference. Robust to outliers. + - "tanimoto": Tanimoto coefficient, similarity measure for binary vectors. Used for binary data comparison. + - "rao": Rao's Quadratic Entropy, measure of divergence between distributions. Useful for comparing probability distributions. + - "gower": Gower distance, handles mixed types of data. Applied in cases with numerical and categorical data. + - "tversky": Tversky index, generalization of Jaccard and Dice for asymmetrical comparison. Parameterized by alpha and beta. + - "alpha_divergence": Alpha divergence, generalized divergence measure. Parameterized by alpha, used for comparing distributions. + - "kendall_tau": Kendall's Tau distance: 1 - Kendall Tau correlation coefficient. Use case: Rank correlation for ordinal data + - "renyi_divergence": Generalized divergence measure. Use case: Comparing probability distributions + - "total_variation": Measure of divergence between distributions. Use case: Probability distributions, statistical inference + + alpha (float, optional): Parameter for `tversky` and `alpha_divergence` metrics. Default is 0.5. + beta (float, optional): Parameter for `tversky` metric. Default is 0.5. + p (float, optional): Parameter for `minkowski` and `weighted_minkowski` metrics. Default is 3. + euclidean_weights (Optional[np.ndarray], optional): Weights for `weighted_euclidean` and `weighted_minkowski` metrics. Must be provided if using these metrics. Default is None. + covariance_matrix (Optional[np.ndarray], optional): Covariance matrix for `mahalanobis` distance. Must be provided if using this metric. Default is None. + + Returns: + float: The calculated distance between the two embedding vectors. + + Raises: + ValueError: If the specified metric is unsupported or requires parameters not provided. + """ + if metric == "cosine": + # Cosine distance: 1 - cosine similarity + # Use case: Text similarity, high-dimensional data + dot = np.dot(embeddings_a, embeddings_b) + norma = np.linalg.norm(embeddings_a) + normb = np.linalg.norm(embeddings_b) + cos = dot / (norma * normb) + return 1 - cos + elif metric == "euclidean": + # Euclidean distance: L2 norm of the difference + # Use case: Geometric distance, clustering + return np.linalg.norm(embeddings_a - embeddings_b) + elif metric == "manhattan": + # Manhattan distance: L1 norm of the difference + # Use case: Grid-based maps, robotics + return np.sum(np.abs(embeddings_a - embeddings_b)) + elif metric == "chebyshev": + # Chebyshev distance: Maximum absolute difference + # Use case: Chessboard distance, pathfinding + return np.max(np.abs(embeddings_a - embeddings_b)) + elif metric == "minkowski": + # Minkowski distance: Generalization of Euclidean and Manhattan distances + # Use case: Flexible distance metric, parameterized by p + return np.sum(np.abs(embeddings_a - embeddings_b) ** p) ** (1 / p) + elif metric == "weighted_minkowski": + # Weighted Minkowski distance: Generalization of Minkowski distance with weights + # Use case: Flexible distance metric with weighted dimensions + if euclidean_weights is None: + raise ValueError("euclidean_weights must be provided for weighted_minkowski metric") + return np.sum(euclidean_weights * np.abs(embeddings_a - embeddings_b) ** p) ** (1 / p) + elif metric == "hamming": + # Hamming distance: Proportion of differing elements + # Use case: Error detection, binary data + return np.mean(embeddings_a != embeddings_b) + elif metric == "jaccard": + # Jaccard distance: 1 - Jaccard similarity (intersection over union) + # Use case: Set similarity, binary attributes + intersection = np.sum(np.minimum(embeddings_a, embeddings_b)) + union = np.sum(np.maximum(embeddings_a, embeddings_b)) + return 1 - intersection / union + elif metric == "canberra": + # Canberra distance: Weighted version of Manhattan distance + # Use case: Environmental data, sensitive to small changes + return np.sum(np.abs(embeddings_a - embeddings_b) / (np.abs(embeddings_a) + np.abs(embeddings_b))) + elif metric == "braycurtis": + # Bray-Curtis distance: Dissimilarity between non-negative vectors + # Use case: Ecology, species abundance + return np.sum(np.abs(embeddings_a - embeddings_b)) / np.sum(np.abs(embeddings_a + embeddings_b)) + elif metric == "mahalanobis": + # Mahalanobis distance: Distance considering correlations (requires covariance matrix) + # Use case: Multivariate outlier detection + if covariance_matrix is None: + covariance_matrix = np.cov(embeddings_a, embeddings_b, rowvar=False) + inv_cov_matrix = np.linalg.inv(covariance_matrix) + delta = embeddings_a - embeddings_b + return np.sqrt(np.dot(np.dot(delta.T, inv_cov_matrix), delta)) + elif metric == "pearson_correlation": + # Correlation distance: 1 - Pearson correlation coefficient + # Use case: Time series analysis, signal processing + mean_a = np.mean(embeddings_a) + mean_b = np.mean(embeddings_b) + centered_a = embeddings_a - mean_a + centered_b = embeddings_b - mean_b + norm_a = np.linalg.norm(centered_a) + norm_b = np.linalg.norm(centered_b) + correlation = np.dot(centered_a, centered_b) / (norm_a * norm_b) + return 1 - correlation + elif metric == "spearman_rank": + # Spearman rank correlation distance: 1 - Spearman rank correlation coefficient. + # Use case: Measures the rank correlation between two vectors. Useful for non-linear monotonic relationships. + rank_a = np.argsort(np.argsort(embeddings_a)) + rank_b = np.argsort(np.argsort(embeddings_b)) + return 1 - np.corrcoef(rank_a, rank_b)[0, 1] + elif metric == "wasserstein": + # Earth Mover's Distance (Wasserstein distance) + # Use case: Comparing probability distributions or histograms + arr1_sorted = np.sort(embeddings_a) + arr2_sorted = np.sort(embeddings_b) + cdf1 = np.cumsum(arr1_sorted) / np.sum(arr1_sorted) + cdf2 = np.cumsum(arr2_sorted) / np.sum(arr2_sorted) + return np.sum(np.abs(cdf1 - cdf2)) + elif metric == "cosine_squared": + # Cosine squared distance: 1 - cosine similarity squared + # Use case: Squared similarity, high-dimensional data + dot = np.dot(embeddings_a, embeddings_b) + norma = np.linalg.norm(embeddings_a) + normb = np.linalg.norm(embeddings_b) + cos = dot / (norma * normb) + return 1 - cos ** 2 + elif metric == "kl_divergence": + # Kullback-Leibler divergence: Asymmetric measure of difference between distributions + # Use case: Information theory, probability distributions + return np.sum(embeddings_a * np.log(embeddings_a / embeddings_b)) + elif metric == "bhattacharyya": + # Bhattacharyya distance: Measure of overlap between statistical samples + # Use case: Classification, image processing + bc = np.sum(np.sqrt(embeddings_a * embeddings_b)) + return -np.log(bc) + elif metric == "hellinger": + # Hellinger distance: Measure of similarity between two probability distributions + # Use case: Probability distributions, statistical inference + return np.sqrt(0.5 * np.sum((np.sqrt(embeddings_a) - np.sqrt(embeddings_b)) ** 2)) + elif metric == "ruzicka": + # Ruzicka distance: Similarity measure for non-negative vectors + # Use case: Ecology, species abundance + return 1 - np.sum(np.minimum(embeddings_a, embeddings_b)) / np.sum(np.maximum(embeddings_a, embeddings_b)) + elif metric == "kulczynski": + # Kulczynski distance: Measure used in ecology to compare similarity + # Use case: Ecological studies, species distribution + return np.sum(np.abs(embeddings_a - embeddings_b)) / np.sum(np.minimum(embeddings_a, embeddings_b)) + elif metric == "sorensen": + # Sørensen distance: Another name for Dice distance + # Use case: Binary data comparison, text similarity + intersection = np.sum(embeddings_a * embeddings_b) + return 1 - (2 * intersection) / (np.sum(embeddings_a) + np.sum(embeddings_b)) + elif metric == "chi_squared": + # Chi-squared distance: Used for comparing categorical data distributions + # Use case: Categorical data analysis, distribution comparison + return np.sum((embeddings_a - embeddings_b) ** 2 / (embeddings_a + embeddings_b)) + elif metric == "jensen_shannon": + # Jensen-Shannon divergence: Symmetrized and smoothed version of KL divergence + # Use case: Information theory, probability distributions + m = 0.5 * (embeddings_a + embeddings_b) + return 0.5 * (np.sum(embeddings_a * np.log(embeddings_a / m)) + np.sum(embeddings_b * np.log(embeddings_b / m))) + elif metric == "squared_euclidean": + # Squared Euclidean distance: Square of the Euclidean distance + # Use case: Clustering algorithms, geometric distance + return np.sum((embeddings_a - embeddings_b) ** 2) + elif metric == "weighted_euclidean": + # Weighted Euclidean distance: L2 norm with weights + # Use case: Features with different scales or importance + if euclidean_weights is None: + raise ValueError("euclidean_weights must be provided for weighted_euclidean metric") + return np.sqrt(np.sum(euclidean_weights * (embeddings_a - embeddings_b) ** 2)) + elif metric == "log_cosh": + # Log-Cosh distance: Log of the hyperbolic cosine of the difference + # Use case: Robustness to outliers + return np.sum(np.log(np.cosh(embeddings_a - embeddings_b))) + elif metric == "tanimoto": + # Tanimoto coefficient: Similarity measure for binary vectors + # Use case: Binary data comparison + intersection = np.sum(embeddings_a * embeddings_b) + return 1 - intersection / (np.sum(embeddings_a) + np.sum(embeddings_b) - intersection) + elif metric == "rao": + # Rao's Quadratic Entropy: Measure of divergence between distributions + # Use case: Comparing probability distributions + p = embeddings_a / np.sum(embeddings_a) + q = embeddings_b / np.sum(embeddings_b) + return np.sum((p - q) ** 2 / (p + q)) + elif metric == "gower": + # Gower distance: Handles mixed types of data + # Use case: Mixed data types (numerical and categorical) + numerical_part = np.sum(np.abs(embeddings_a - embeddings_b)) / len(embeddings_a) + categorical_part = np.mean(embeddings_a != embeddings_b) + return numerical_part + categorical_part + elif metric == "tversky": + # Tversky index: Generalization of Jaccard and Dice for asymmetrical comparison + intersection = np.sum(np.minimum(embeddings_a, embeddings_b)) + return 1 - intersection / (intersection + alpha * np.sum(embeddings_a - embeddings_b) + beta * np.sum(embeddings_b - embeddings_a)) + elif metric == "alpha_divergence": + # Alpha divergence: Generalized divergence measure + p = embeddings_a / np.sum(embeddings_a) + q = embeddings_b / np.sum(embeddings_b) + return np.sum((p ** alpha - q ** alpha) / (alpha * (p + q) ** alpha)) + elif metric == "kendall_tau": + # Kendall's Tau distance: 1 - Kendall Tau correlation coefficient + # Use case: Rank correlation for ordinal data + concordant = np.sum((embeddings_a > embeddings_b) == (embeddings_b > embeddings_a)) + discordant = np.sum((embeddings_a > embeddings_b) != (embeddings_b > embeddings_a)) + return 1 - (concordant - discordant) / (concordant + discordant) + elif metric == "renyi_divergence": + # Renyi Divergence: Generalized divergence measure + # Use case: Comparing probability distributions + p = embeddings_a / np.sum(embeddings_a) + q = embeddings_b / np.sum(embeddings_b) + return 1 / (1 - alpha) * np.log(np.sum((p ** alpha + q ** alpha) / 2)) + elif metric == "total_variation": + # Total Variation distance: Measure of divergence between distributions + # Use case: Probability distributions, statistical inference + p = embeddings_a / np.sum(embeddings_a) + q = embeddings_b / np.sum(embeddings_b) + return 0.5 * np.sum(np.abs(p - q)) + else: + raise ValueError("Unsupported metric") + + +class TextEmbeddingsStore: + """A store for text embeddings interfacing with the embeddings database.""" + + def __init__(self, db: EmbeddingsDB): + """Initialize the text embeddings store. + + Args: + db (EmbeddingsDB): The embeddings database instance. + """ + self.db = db + + @abc.abstractmethod + def get_text_embeddings(self, text: str) -> EmbeddingsArray: + """Convert text to its corresponding embeddings. + + Args: + text (str): The input text to be converted. + + Returns: + np.ndarray: The resulting embeddings. + """ + raise NotImplementedError + + def add_document(self, document: str, metadata: Optional[Dict[str, any]] = None) -> None: + """Add a document and its embeddings to the database. + + Args: + document (str): The document to add. + metadata (Optional[Dict[str, any]]): Optional metadata associated with the document. + """ + embeddings = self.get_text_embeddings(document) + self.db.add_embeddings(document, embeddings, metadata) + + def delete_document(self, document: str) -> None: + """Delete a document and its embeddings from the database. + + Args: + document (str): The document to delete. + """ + self.db.delete_embeddings(document) + + def query(self, document: str, top_k: int = 5, + return_metadata: bool = False) -> List[Tuple[str, float]]: + """Query the database for the top_k closest embeddings to the document. + + Args: + document (str): The document to query. + top_k (int, optional): The number of top results to return. Defaults to 5. + return_metadata (bool, optional): Whether to include metadata in the results. Defaults to False. + + Returns: + List[Tuple[str, float]]: List of tuples containing the document and distance. + """ + embeddings = self.get_text_embeddings(document) + return self.db.query(embeddings, top_k, + return_metadata=return_metadata) + + def distance(self, text_a: str, text_b: str, metric: str = "cosine") -> float: + """Calculate the distance between embeddings of two texts. + + Args: + text_a (str): The first text. + text_b (str): The second text. + metric (str, optional): The distance metric to use. Defaults to "cosine". + + Returns: + float: The calculated distance. + """ + emb_a = self.get_text_embeddings(text_a) + emb_b = self.get_text_embeddings(text_b) + return self.db.distance(emb_a, emb_b, metric) + + +class FaceEmbeddingsStore: + """A store for face embeddings interfacing with the embeddings database.""" + + def __init__(self, db: EmbeddingsDB): + """Initialize the face embeddings store. + + Args: + db (EmbeddingsDB): The embeddings database instance. + """ + self.db = db + + @abc.abstractmethod + def get_face_embeddings(self, frame: EmbeddingsArray) -> EmbeddingsArray: + """Convert an image frame to its corresponding face embeddings. + + Args: + frame (np.ndarray): The input image frame containing a face. + + Returns: + np.ndarray: The resulting face embeddings. + """ + raise NotImplementedError + + def add_face(self, user_id: str, frame: EmbeddingsArray, metadata: Optional[Dict[str, any]] = None) -> EmbeddingsArray: + """Add a face and its embeddings to the database. + + Args: + user_id (str): The unique user ID. + frame (np.ndarray): The image frame containing the face. + metadata (Optional[Dict[str, any]]): Optional metadata associated with the face. + + Returns: + np.ndarray: The stored face embeddings. + """ + embeddings = self.get_face_embeddings(frame) + return self.db.add_embeddings(user_id, embeddings, metadata) + + def delete_face(self, user_id: str) -> EmbeddingsArray: + """Delete a face and its embeddings from the database. + + Args: + user_id (str): The unique user ID. + + Returns: + np.ndarray: The deleted face embeddings. + """ + return self.db.delete_embeddings(user_id) + + def predict(self, frame: EmbeddingsArray, top_k: int = 3, thresh: float = 0.15) -> Optional[str]: + """Return the top predicted face closest to the given frame. + + Args: + frame (np.ndarray): The input image frame containing a face. + top_k (int, optional): The number of top results to return. Defaults to 3. + thresh (float, optional): The threshold for prediction. Defaults to 0.15. + + Returns: + Optional[str]: The predicted user ID or None if the best match exceeds the threshold. + """ + matches = self.query(frame, top_k) + if not matches: + return None + best_match = min(matches, key=lambda k: k[1]) + if best_match[1] > thresh: + return None + return best_match[0] + + def query(self, frame: EmbeddingsArray, top_k: int = 5, + return_metadata: bool = False) -> List[Tuple[str, float]]: + """Query the database for the top_k closest face embeddings to the frame. + + Args: + frame (np.ndarray): The input image frame containing a face. + top_k (int, optional): The number of top results to return. Defaults to 5. + return_metadata (bool, optional): Whether to include metadata in the results. Defaults to False. + + Returns: + List[Tuple[str, float]]: List of tuples containing the user ID and distance. + """ + embeddings = self.get_face_embeddings(frame) + return self.db.query(embeddings, top_k, + return_metadata=return_metadata) + + def distance(self, face_a: EmbeddingsArray, face_b: EmbeddingsArray, metric: str = "cosine") -> float: + """Calculate the distance between embeddings of two faces. + + Args: + face_a (np.ndarray): The first face embedding. + face_b (np.ndarray): The second face embedding. + metric (str, optional): The distance metric to use. Defaults to "cosine". + + Returns: + float: The calculated distance. + """ + emb_a = self.get_face_embeddings(face_a) + emb_b = self.get_face_embeddings(face_b) + return self.db.distance(emb_a, emb_b, metric) + + +class VoiceEmbeddingsStore: + """A store for voice embeddings interfacing with the embeddings database.""" + + def __init__(self, db: EmbeddingsDB): + """Initialize the voice embeddings store. + + Args: + db (EmbeddingsDB): The embeddings database instance. + """ + self.db = db + + @staticmethod + def audiochunk2array(audio_bytes: bytes) -> EmbeddingsArray: + """Convert audio buffer to a normalized float32 NumPy array. + + Args: + audio_bytes (bytes): The audio data buffer. + + Returns: + np.ndarray: The normalized float32 audio array. + """ + audio_as_np_int16 = np.frombuffer(audio_bytes, dtype=np.int16) + audio_as_np_float32 = audio_as_np_int16.astype(np.float32) + # Normalise float32 array so that values are between -1.0 and +1.0 + max_int16 = 2 ** 15 + return audio_as_np_float32 / max_int16 + + @abc.abstractmethod + def get_voice_embeddings(self, audio_data: EmbeddingsArray) -> EmbeddingsArray: + """Convert audio data to its corresponding voice embeddings. + + Args: + audio_data (np.ndarray): The input audio data. + + Returns: + np.ndarray: The resulting voice embeddings. + """ + raise NotImplementedError + + def add_voice(self, user_id: str, audio_data: EmbeddingsArray, metadata: Optional[Dict[str, any]] = None) -> EmbeddingsArray: + """Add a voice and its embeddings to the database. + + Args: + user_id (str): The unique user ID. + audio_data (np.ndarray): The input audio data. + metadata (Optional[Dict[str, any]]): Optional metadata associated with the voice. + + Returns: + np.ndarray: The stored voice embeddings. + """ + embeddings = self.get_voice_embeddings(audio_data) + return self.db.add_embeddings(user_id, embeddings, metadata) + + def delete_voice(self, user_id: str) -> EmbeddingsArray: + """Delete a voice and its embeddings from the database. + + Args: + user_id (str): The unique user ID. + + Returns: + np.ndarray: The deleted voice embeddings. + """ + return self.db.delete_embeddings(user_id) + + def predict(self, audio_data: EmbeddingsArray, top_k: int = 3, thresh: float = 0.75) -> Optional[str]: + """Return the top predicted voice closest to the given audio_data. + + Args: + audio_data (np.ndarray): The input audio data. + top_k (int, optional): The number of top results to return. Defaults to 3. + thresh (float, optional): The threshold for prediction. Defaults to 0.75. + + Returns: + Optional[str]: The predicted user ID or None if the best match exceeds the threshold. + """ + matches = self.query(audio_data, top_k) + if not matches: + return None + best_match = min(matches, key=lambda k: k[1]) + if best_match[1] > thresh: + return None + return best_match[0] + + def query(self, audio_data: EmbeddingsArray, top_k: int = 5, + return_metadata: bool = False) -> List[Tuple[str, float]]: + """Query the database for the top_k closest voice embeddings to the audio_data. + + Args: + audio_data (np.ndarray): The input audio data. + top_k (int, optional): The number of top results to return. Defaults to 5. + return_metadata (bool, optional): Whether to include metadata in the results. Defaults to False. + + Returns: + List[Tuple[str, float]]: List of tuples containing the user ID and distance. + """ + embeddings = self.get_voice_embeddings(audio_data) + return self.db.query(embeddings, top_k, + return_metadata=return_metadata) + + def distance(self, voice_a: EmbeddingsArray, voice_b: EmbeddingsArray, metric: str = "cosine") -> float: + """Calculate the distance between embeddings of two voices. + + Args: + voice_a (np.ndarray): The first voice embedding. + voice_b (np.ndarray): The second voice embedding. + metric (str, optional): The distance metric to use. Defaults to "cosine". + + Returns: + float: The calculated distance. + """ + emb_a = self.get_voice_embeddings(voice_a) + emb_b = self.get_voice_embeddings(voice_b) + return self.db.distance(emb_a, emb_b, metric) diff --git a/ovos_plugin_manager/templates/gui.py b/ovos_plugin_manager/templates/gui.py index fad98e19..e3c020cc 100644 --- a/ovos_plugin_manager/templates/gui.py +++ b/ovos_plugin_manager/templates/gui.py @@ -1,6 +1,6 @@ from ovos_bus_client import Message from ovos_bus_client import MessageBusClient -from ovos_utils.gui import GUIInterface +from ovos_bus_client.apis.gui import GUIInterface from ovos_utils.log import LOG from ovos_config import Configuration diff --git a/ovos_plugin_manager/templates/language.py b/ovos_plugin_manager/templates/language.py index 34efdc51..743674ff 100644 --- a/ovos_plugin_manager/templates/language.py +++ b/ovos_plugin_manager/templates/language.py @@ -1,75 +1,88 @@ +import abc + from ovos_config.config import Configuration from ovos_utils import classproperty from ovos_utils.process_utils import RuntimeRequirements +from typing import Optional, Dict, Union, List, Set class LanguageDetector: - def __init__(self, config=None): + def __init__(self, config: Optional[Dict[str, Union[str, int]]] = None): + """ + Initialize the LanguageDetector with configuration settings. + + Args: + config (Optional[Dict[str, Union[str, int]]]): Configuration dictionary. + Can contain "lang" for default language, "hint_lang" for a hint language, and "boost" for language boost score. + """ self.config = config or {} - self.default_language = self.config.get("lang") or "en-us" - # hint_language: str E.g., 'it' boosts Italian - self.hint_language = self.config.get("hint_lang") or \ - self.config.get('user') or self.default_language - # boost score for this language + self.default_language = self.config.get("lang", "en-us") + self.hint_language = self.config.get("hint_lang") or self.config.get('user') or self.default_language self.boost = self.config.get("boost") @classproperty - def runtime_requirements(self): - """ skill developers should override this if they do not require connectivity - some examples: - IOT plugin that controls devices via LAN could return: - scans_on_init = True - RuntimeRequirements(internet_before_load=False, - network_before_load=scans_on_init, - requires_internet=False, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=False) - online search plugin with a local cache: - has_cache = False - RuntimeRequirements(internet_before_load=not has_cache, - network_before_load=not has_cache, - requires_internet=True, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=True) - a fully offline plugin: - RuntimeRequirements(internet_before_load=False, - network_before_load=False, - requires_internet=False, - requires_network=False, - no_internet_fallback=True, - no_network_fallback=True) - """ - return RuntimeRequirements(internet_before_load=False, - network_before_load=False, - requires_internet=False, - requires_network=False, - no_internet_fallback=True, - no_network_fallback=True) - - def detect(self, text): - # assume default language - return self.default_language - - def detect_probs(self, text): - return {self.detect(text): 1} - - @property - def available_languages(self) -> set: + def runtime_requirements(self) -> RuntimeRequirements: + """ + Define the runtime requirements for this language detector. + + Returns: + RuntimeRequirements: Object indicating the runtime needs, including internet and network requirements. + """ + return RuntimeRequirements( + internet_before_load=False, + network_before_load=False, + requires_internet=False, + requires_network=False, + no_internet_fallback=True, + no_network_fallback=True + ) + + @abc.abstractmethod + def detect(self, text: str) -> str: + """ + Detect the language of the given text. + + Args: + text (str): The text to detect the language of. + + Returns: + str: The detected language code (e.g., 'en-us'). + """ + + @abc.abstractmethod + def detect_probs(self, text: str) -> Dict[str, float]: + """ + Detect the language of the text and return probabilities. + + Args: + text (str): The text to detect the language of. + + Returns: + Dict[str, float]: A dictionary with the detected language as the key and its probability as the value. + """ + + @property # TODO - make abstract method in future releases (mandatory for plugins to implement) + def available_languages(self) -> Set[str]: """ Return languages supported by this detector implementation in this state. This should be a set of languages this detector is capable of recognizing. This property should be overridden by the derived class to advertise what languages that engine supports. Returns: - set: supported languages + Set[str]: A set of language codes supported by this detector. """ return set() class LanguageTranslator: - def __init__(self, config=None): + def __init__(self, config: Optional[Dict[str, str]] = None): + """ + Initialize the LanguageTranslator with configuration settings. + + Args: + config (Optional[Dict[str, str]]): Configuration dictionary. + Can contain "lang" for the default language and "internal" for the internal language. + """ self.config = config or {} # translate from, unless specified/detected otherwise self.default_language = self.config.get("lang") or "en-us" @@ -79,44 +92,48 @@ def __init__(self, config=None): self.default_language @classproperty - def runtime_requirements(self): - """ skill developers should override this if they do not require connectivity - some examples: - IOT plugin that controls devices via LAN could return: - scans_on_init = True - RuntimeRequirements(internet_before_load=False, - network_before_load=scans_on_init, - requires_internet=False, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=False) - online search plugin with a local cache: - has_cache = False - RuntimeRequirements(internet_before_load=not has_cache, - network_before_load=not has_cache, - requires_internet=True, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=True) - a fully offline plugin: - RuntimeRequirements(internet_before_load=False, - network_before_load=False, - requires_internet=False, - requires_network=False, - no_internet_fallback=True, - no_network_fallback=True) - """ - return RuntimeRequirements(internet_before_load=False, - network_before_load=False, - requires_internet=False, - requires_network=False, - no_internet_fallback=True, - no_network_fallback=True) - - def translate(self, text, target=None, source=None): - return text - - def translate_dict(self, data, lang_tgt, lang_src="en"): + def runtime_requirements(self) -> RuntimeRequirements: + """ + Define the runtime requirements for this language translator. + + Returns: + RuntimeRequirements: Object indicating the runtime needs, including internet and network requirements. + """ + return RuntimeRequirements( + internet_before_load=False, + network_before_load=False, + requires_internet=False, + requires_network=False, + no_internet_fallback=True, + no_network_fallback=True + ) + + @abc.abstractmethod + def translate(self, text: str, target: Optional[str] = None, source: Optional[str] = None) -> str: + """ + Translate the given text from the source language to the target language. + + Args: + text (str): The text to translate. + target (Optional[str]): The target language code. If None, the internal language is used. + source (Optional[str]): The source language code. If None, the default language is used. + + Returns: + str: The translated text. + """ + + def translate_dict(self, data: Dict[str, Union[str, Dict, List]], lang_tgt: str, lang_src: str = "en") -> Dict[str, Union[str, Dict, List]]: + """ + Translate the values in a dictionary from one language to another. + + Args: + data (Dict[str, Union[str, Dict, List]]): The dictionary containing text to translate. + lang_tgt (str): The target language code. + lang_src (str): The source language code. + + Returns: + Dict[str, Union[str, Dict, List]]: The dictionary with translated values. + """ for k, v in data.items(): if isinstance(v, dict): data[k] = self.translate_dict(v, lang_tgt, lang_src) @@ -126,7 +143,18 @@ def translate_dict(self, data, lang_tgt, lang_src="en"): data[k] = self.translate_list(v, lang_tgt, lang_src) return data - def translate_list(self, data, lang_tgt, lang_src="en"): + def translate_list(self, data: List[Union[str, Dict, List]], lang_tgt: str, lang_src: str = "en") -> List[Union[str, Dict, List]]: + """ + Translate the values in a list from one language to another. + + Args: + data (List[Union[str, Dict, List]]): The list containing text to translate. + lang_tgt (str): The target language code. + lang_src (str): The source language code. + + Returns: + List[Union[str, Dict, List]]: The list with translated values. + """ for idx, v in enumerate(data): if isinstance(v, dict): data[idx] = self.translate_dict(v, lang_tgt, lang_src) @@ -136,25 +164,27 @@ def translate_list(self, data, lang_tgt, lang_src="en"): data[idx] = self.translate_list(v, lang_tgt, lang_src) return data - @property - def available_languages(self) -> set: + @property # TODO - make abstract method in future releases (mandatory for plugins to implement) + def available_languages(self) -> Set[str]: """ Return languages supported by this translator implementation in this state. Any language in this set should be translatable to any other language in the set. This property should be overridden by the derived class to advertise what languages that engine supports. Returns: - set: supported languages + Set[str]: A set of language codes supported by this translator. """ return set() - def supported_translations(self, source_lang: str = None) -> set: + # TODO - make abstract method in future releases (mandatory for plugins to implement) + def supported_translations(self, source_lang: Optional[str] = None) -> Set[str]: """ - Return valid target languages we can translate `source_lang` to. - This method should be overridden by the derived class. + Get the set of target languages to which the source language can be translated. + Args: - source_lang: ISO 639-1 source language code + source_lang (Optional[str]): The source language code. + Returns: - set of ISO 639-1 languages the source language can be translated to + Set[str]: A set of language codes that the source language can be translated to. """ return self.available_languages diff --git a/ovos_plugin_manager/templates/media.py b/ovos_plugin_manager/templates/media.py new file mode 100644 index 00000000..85e25381 --- /dev/null +++ b/ovos_plugin_manager/templates/media.py @@ -0,0 +1,276 @@ +from abc import ABCMeta, abstractmethod + +from ovos_bus_client.message import Message +from ovos_utils.log import LOG +from ovos_utils.messagebus import FakeBus +from ovos_utils.ocp import MediaState, PlayerState, TrackState + + +class MediaBackend(metaclass=ABCMeta): + """Base class for all OCP media backend implementations. + + Media backends are single-track, playlists are handled by OCP + + Arguments: + config (dict): configuration dict for the instance + bus (MessageBusClient): Mycroft messagebus emitter + """ + + def __init__(self, config=None, bus=None): + if MediaState is None: + raise RuntimeError("Please update to ovos-utils~=0.1.") + self._now_playing = None # single uri + self._track_start_callback = None + self.supports_mime_hints = False + self.config = config or {} + self.bus = bus or FakeBus() + self.meta = {} + + def set_track_start_callback(self, callback_func): + """Register callback on track start. + + This method should be called as each track in a playlist is started. + """ + self._track_start_callback = callback_func + + def load_track(self, uri: str, metadata: dict = None): + self._now_playing = uri + self.meta.update(metadata or {}) + LOG.debug(f"queuing for {self.__class__.__name__} playback: {uri}") + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.LOADED_MEDIA})) + + def ocp_start(self): + """Emit OCP status events for play""" + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PLAYING})) + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.LOADED_MEDIA})) + self.play() + + def ocp_error(self): + """Emit OCP status events for playback error""" + if self._now_playing: + self._now_playing = None + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.INVALID_MEDIA})) + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.STOPPED})) + + def ocp_stop(self): + """Emit OCP status events for stop""" + if self._now_playing: + self._now_playing = None + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.STOPPED})) + self.bus.emit(Message("ovos.common_play.media.state", + {"state": MediaState.END_OF_MEDIA})) + self.stop() + + def ocp_pause(self): + """Emit OCP status events for pause""" + if self._now_playing: + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PAUSED})) + self.pause() + + def ocp_resume(self): + """Emit OCP status events for resume""" + if self._now_playing: + self.bus.emit(Message("ovos.common_play.player.state", + {"state": PlayerState.PLAYING})) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_AUDIO})) + self.resume() + + @property + def playback_time(self): + return 0 + + @abstractmethod + def supported_uris(self): + """List of supported uri types. + + Returns: + list: Supported uri's + """ + + @abstractmethod + def play(self): + """Start playback. + + Starts playing the first track in the playlist and will contiune + until all tracks have been played. + """ + + @abstractmethod + def stop(self): + """Stop playback. + + Stops the current playback. + + Returns: + bool: True if playback was stopped, otherwise False + """ + + @abstractmethod + def pause(self): + """Pause playback. + + Stops playback but may be resumed at the exact position the pause + occured. + """ + + @abstractmethod + def resume(self): + """Resume paused playback. + + Resumes playback after being paused. + """ + + @abstractmethod + def lower_volume(self): + """Lower volume. + + This method is used to implement audio ducking. It will be called when + OpenVoiceOS is listening or speaking to make sure the media playing isn't + interfering. + """ + + @abstractmethod + def restore_volume(self): + """Restore normal volume. + + Called when to restore the playback volume to previous level after + OpenVoiceOS has lowered it using lower_volume(). + """ + + @abstractmethod + def get_track_length(self) -> int: + """ + getting the duration of the audio in milliseconds + """ + + @abstractmethod + def get_track_position(self) -> int: + """ + get current position in milliseconds + """ + + @abstractmethod + def set_track_position(self, milliseconds): + """ + go to position in milliseconds + Args: + milliseconds (int): number of milliseconds of final position + """ + + def seek_forward(self, seconds=1): + """Skip X seconds. + + Arguments: + seconds (int): number of seconds to seek, if negative rewind + """ + miliseconds = seconds * 1000 + new_pos = self.get_track_position() + miliseconds + self.set_track_position(new_pos) + + def seek_backward(self, seconds=1): + """Rewind X seconds. + + Arguments: + seconds (int): number of seconds to seek, if negative jump forward. + """ + miliseconds = seconds * 1000 + new_pos = self.get_track_position() - miliseconds + self.set_track_position(new_pos) + + def track_info(self): + """Get info about current playing track. + + Returns: + dict: Track info containing atleast the keys artist and album. + """ + return self.meta + + def shutdown(self): + """Perform clean shutdown. + + Implements any audio backend specific shutdown procedures. + """ + self.stop() + + +class AudioPlayerBackend(MediaBackend): + """ for audio""" + + def load_track(self, uri, metadata: dict = None): + super().load_track(uri, metadata) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.QUEUED_AUDIO})) + + def ocp_start(self): + """Emit OCP status events for play""" + super().ocp_start() + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_AUDIO})) + + +class RemoteAudioPlayerBackend(AudioPlayerBackend): + """Base class for remote audio backends. + + RemoteAudioBackends will always be checked after the normal + AudioBackends to make playback start locally by default. + + An example of a RemoteAudioBackend would be things like mopidy servers, etc. + """ + + +class VideoPlayerBackend(MediaBackend): + """ for video""" + def load_track(self, uri, metadata: dict = None): + super().load_track(uri, metadata) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.QUEUED_VIDEO})) + + def ocp_start(self): + """Emit OCP status events for play""" + super().ocp_start() + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_VIDEO})) + + +class RemoteVideoPlayerBackend(VideoPlayerBackend): + """Base class for remote audio backends. + + RemoteVideoBackends will always be checked after the normal + VideoBackends to make playback start locally by default. + + An example of a RemoteVideoBackend would be things like Chromecasts, etc. + """ + + +class WebPlayerBackend(MediaBackend): + """ for web pages""" + + def load_track(self, uri, metadata: dict = None): + super().load_track(uri, metadata) + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.QUEUED_WEBVIEW})) + + def ocp_start(self): + """Emit OCP status events for play""" + super().ocp_start() + self.bus.emit(Message("ovos.common_play.track.state", + {"state": TrackState.PLAYING_WEBVIEW})) + + +class RemoteWebPlayerBackend(WebPlayerBackend): + """Base class for remote web backends. + + RemoteWebBackends will always be checked after the normal + VideoBackends to make playback start locally by default. + + An example of a RemoteWebBackend would be + things that can render a webpage in a different machine + """ diff --git a/ovos_plugin_manager/templates/phal.py b/ovos_plugin_manager/templates/phal.py index aed65f3a..4ec7fa53 100644 --- a/ovos_plugin_manager/templates/phal.py +++ b/ovos_plugin_manager/templates/phal.py @@ -5,7 +5,7 @@ from ovos_utils import camel_case_split from ovos_utils import classproperty from ovos_utils.log import LOG -from ovos_utils.messagebus import get_mycroft_bus +from ovos_bus_client.util import get_mycroft_bus from ovos_utils.process_utils import RuntimeRequirements from ovos_plugin_manager.utils.config import get_plugin_config @@ -515,7 +515,8 @@ def _deactivate_mouth_events(self, message=None): """Disable movement of the mouth with speech""" self._mouth_events = False +class AdminPlugin(PHALPlugin): + """Running as Admin""" -# Just for api consistency -AdminPlugin = PHALPlugin -AdminValidator = PHALValidator +class AdminValidator(PHALValidator): + """Running as Admin""" diff --git a/ovos_plugin_manager/templates/pipeline.py b/ovos_plugin_manager/templates/pipeline.py new file mode 100644 index 00000000..ce85dda7 --- /dev/null +++ b/ovos_plugin_manager/templates/pipeline.py @@ -0,0 +1,18 @@ +from collections import namedtuple +from typing import Optional, Dict + +# Intent match response tuple, ovos-core expects PipelinePlugin to return this data structure +# intent_service: Name of the service that matched the intent +# intent_type: intent name (used to call intent handler over the message bus) +# intent_data: data provided by the intent match +# skill_id: the skill this handler belongs to +IntentMatch = namedtuple('IntentMatch', + ['intent_service', 'intent_type', + 'intent_data', 'skill_id', 'utterance'] + ) + + +class PipelinePlugin: + """This class is a placeholder, this API will be defined in ovos-core release 0.1.0""" + def __init__(self, config: Optional[Dict] = None): + self.config = config or {} diff --git a/ovos_plugin_manager/templates/solvers.py b/ovos_plugin_manager/templates/solvers.py index a517a1b1..aea580d0 100644 --- a/ovos_plugin_manager/templates/solvers.py +++ b/ovos_plugin_manager/templates/solvers.py @@ -1,173 +1,298 @@ -# NEON AI (TM) SOFTWARE, Software Development Kit & Application Framework -# All trademark and other rights reserved by their respective owners -# Copyright 2008-2022 Neongecko.com Inc. -# Contributors: Daniel McKnight, Guy Daniels, Elon Gasper, Richard Leeds, -# Regina Bloomstine, Casimiro Ferreira, Andrii Pernatii, Kirill Hrymailo -# BSD-3 License -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from this -# software without specific prior written permission. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, -# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Solver service can be found at: https://github.com/Neongeckocom/neon_solvers +import abc +import inspect +from functools import wraps +from typing import Optional, List, Iterable, Tuple, Dict, Union, Any from json_database import JsonStorageXDG +from ovos_utils.log import LOG, log_deprecation from ovos_utils.xdg_utils import xdg_cache_home -from quebra_frases import sentence_tokenize -from ovos_utils.log import LOG -from ovos_plugin_manager.language import OVOSLangTranslationFactory - - -class AbstractSolver: - # these are defined by the plugin developer - priority = 50 - enable_tx = False - enable_cache = False - - def __init__(self, config=None, translator=None, *args, **kwargs): - if args or kwargs: - LOG.warning("solver plugins init signature changed, please update to accept config=None, translator=None. " - "an exception will be raised in next stable release") - for arg in args: - if isinstance(arg, str): - kwargs["name"] = arg - if isinstance(arg, int): - kwargs["priority"] = arg - if "priority" in kwargs: - self.priority = kwargs["priority"] - if "enable_tx" in kwargs: - self.enable_tx = kwargs["enable_tx"] - if "enable_cache" in kwargs: - self.enable_cache = kwargs["enable_cache"] - self.config = config or {} - self.supported_langs = self.config.get("supported_langs") or [] - self.default_lang = self.config.get("lang", "en") - if self.default_lang not in self.supported_langs: - self.supported_langs.insert(0, self.default_lang) - self.translator = translator or OVOSLangTranslationFactory.create() - - @staticmethod - def sentence_split(text, max_sentences=25): - return sentence_tokenize(text)[:max_sentences] - - def _get_user_lang(self, context, lang=None): - context = context or {} - lang = lang or context.get("lang") or self.default_lang - lang = lang.split("-")[0] - return lang - - def _tx_query(self, query, context=None, lang=None): - if not self.enable_tx: - return query, context, lang - context = context or {} - lang = user_lang = self._get_user_lang(context, lang) - - # translate input to default lang - if user_lang not in self.supported_langs: - lang = self.default_lang - query = self.translator.translate(query, lang, user_lang) - - context["lang"] = lang - - # HACK - cleanup some common translation mess ups - # this is properly solving by using a good translate plugin - # only common mistakes in default libretranslate plugin are handled - if lang.startswith("en"): - query = query.replace("who is is ", "who is ") - - return query, context, lang - - def shutdown(self): - """ module specific shutdown method """ - pass +from ovos_plugin_manager.templates.language import LanguageTranslator, LanguageDetector +from ovos_plugin_manager.thirdparty.solvers import AbstractSolver + + +def auto_translate(translate_keys: List[str], translate_str_args=True): + """ Decorator to ensure all kwargs in 'translate_keys' are translated to self.default_lang. + data returned by the decorated function will be translated back to original language + NOTE: not meant to be used outside solver plugins""" + + def func_decorator(func): + + @wraps(func) + def func_wrapper(*args, **kwargs): + solver: AbstractSolver = args[0] + # check if translation is enabled + if not solver.enable_tx: + return func(*args, **kwargs) + + lang = kwargs.get("lang") + # check if translation can be skipped + if any([lang is None, + lang == solver.default_lang, + lang in solver.supported_langs]): + LOG.debug(f"skipping translation, 'lang': {lang} is supported by {func}") + return func(*args, **kwargs) -class QuestionSolver(AbstractSolver): - """free form unscontrained spoken question solver - handling automatic translation back and forth as needed""" + # translate string arguments + if translate_str_args: + args = list(args) + for idx, arg in enumerate(args): + if isinstance(arg, str): + LOG.debug( + f"translating string argument with index: '{idx}' from {lang} to {solver.default_lang} for func: {func}") + args[idx] = _do_tx(solver, arg, + source_lang=lang, + target_lang=solver.default_lang) + + # translate input keys + for k in translate_keys: + v = kwargs.get(k) + if not v: + continue + kwargs[k] = _do_tx(solver, v, + source_lang=lang, + target_lang=solver.default_lang) + + out = func(*args, **kwargs) + + # reverse translate + return _do_tx(solver, out, + source_lang=solver.default_lang, + target_lang=lang) + + return func_wrapper + + return func_decorator + + +def auto_detect_lang(text_keys: List[str]): + """ Decorator to auto detect language if needed + NOTE: requires "lang" argument, not meant to be used outside solver plugins""" + + def func_decorator(func): + + @wraps(func) + def func_wrapper(*args, **kwargs): + solver: AbstractSolver = args[0] + + # detect language if needed + lang = kwargs.get("lang") + if lang is None: + LOG.debug(f"'lang' missing in kwargs for func: {func}") + for k in text_keys: + v = kwargs.get(k) + if isinstance(v, str): + lang = solver.detect_language(v) + LOG.debug(f"detected 'lang': {lang} in key: '{k}' for func: {func}") + break + else: + for idx, v in enumerate(args): + if isinstance(v, str) and len(v.split(" ")) > 1: + lang = solver.detect_language(v) + LOG.debug(f"detected 'lang': {lang} in argument '{idx}' for func: {func}") - def __init__(self, config=None, translator=None, *args, **kwargs): - super().__init__(config, translator, *args, **kwargs) + kwargs["lang"] = lang + return func(*args, **kwargs) + + return func_wrapper + + return func_decorator + + +def _deprecate_context2lang(): + """Decorator to deprecate the 'context' kwarg and replace it with 'lang'. + NOTE: can only be used in methods that accept "lang" as argument""" + + def func_decorator(func): + + @wraps(func) + def func_wrapper(*args, **kwargs): + + # Inspect the function signature to ensure it has both 'lang' and 'context' parameters + signature = inspect.signature(func) + params = signature.parameters + + if "context" in kwargs: + # NOTE: deprecate this at same time we + # standardize plugin namespaces to opm.XXX + log_deprecation("'context' kwarg has been deprecated, " + "please pass 'lang' as it's own kwarg instead", "1.0.0") + if "lang" in kwargs["context"] and "lang" not in kwargs: + kwargs["lang"] = kwargs["context"]["lang"] + + # ensure valid kwargs + if "lang" not in params and "lang" in kwargs: + kwargs.pop("lang") + if "context" not in params and "context" in kwargs: + kwargs.pop("context") + return func(*args, **kwargs) + + return func_wrapper + + return func_decorator + + +class QuestionSolver(AbstractSolver): + """ + A solver for free-form, unconstrained spoken questions that handles automatic translation as needed. + """ + + def __init__(self, config: Optional[Dict] = None, + translator: Optional[LanguageTranslator] = None, + detector: Optional[LanguageDetector] = None, + priority: int = 50, + enable_tx: bool = False, + enable_cache: bool = False, + internal_lang: Optional[str] = None, + *args, **kwargs): + """ + Initialize the QuestionSolver. + + Args: + config (Optional[Dict]): Optional configuration dictionary. + translator (Optional[LanguageTranslator]): Optional language translator. + detector (Optional[LanguageDetector]): Optional language detector. + priority (int): Priority of the solver. + enable_tx (bool): Flag to enable translation. + enable_cache (bool): Flag to enable caching. + internal_lang (Optional[str]): Internal language code. Defaults to None. + """ + super().__init__(config, translator, detector, priority, + enable_tx, enable_cache, internal_lang, + *args, **kwargs) name = kwargs.get("name") or self.__class__.__name__ if self.enable_cache: # cache contains raw data self.cache = JsonStorageXDG(name + "_data", xdg_folder=xdg_cache_home(), - subfolder="neon_solvers") + subfolder="ovos_solvers") # spoken cache contains dialogs self.spoken_cache = JsonStorageXDG(name, xdg_folder=xdg_cache_home(), - subfolder="neon_solvers") + subfolder="ovos_solvers") else: self.cache = self.spoken_cache = {} # plugin methods to override - def get_spoken_answer(self, query, context): + @abc.abstractmethod + def get_spoken_answer(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[str]: """ - query assured to be in self.default_lang - return a single sentence text response + Obtain the spoken answer for a given query. + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + str: The spoken answer as a text response. """ raise NotImplementedError - def get_data(self, query, context): + @_deprecate_context2lang() + def stream_utterances(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Iterable[str]: """ - query assured to be in self.default_lang - return a dict response + Stream utterances for the given query as they become available. + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + Iterable[str]: An iterable of utterances. """ - raise NotImplementedError + ans = _call_with_sanitized_kwargs(self.get_spoken_answer, query, lang=lang, units=units) + for utt in self.sentence_split(ans): + yield utt - def get_image(self, query, context=None): + @_deprecate_context2lang() + def get_data(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[Dict[str, str]]: """ - query assured to be in self.default_lang - return path/url to a single image to acompany spoken_answer + Retrieve data for the given query. + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + Optional[Dict]: A dictionary containing the answer. """ - raise NotImplementedError + return {"answer": _call_with_sanitized_kwargs(self.get_spoken_answer, query, lang=lang, units=units)} - def get_expanded_answer(self, query, context=None): + @_deprecate_context2lang() + def get_image(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[str]: """ - query assured to be in self.default_lang - return a list of ordered steps to expand the answer, eg, "tell me more" - { - "title": "optional", - "summary": "speak this", - "img": "optional/path/or/url - } - :return: + Get the path or URL to an image associated with the query. + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + Optional[str]: The path or URL to a single image. """ - raise NotImplementedError + return None + + @_deprecate_context2lang() + def get_expanded_answer(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> List[Dict[str, str]]: + """ + Get an expanded list of steps to elaborate on the answer. + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + List[Dict]: A list of dictionaries with each step containing a title, summary, and optional image. + """ + return [{"title": query, + "summary": _call_with_sanitized_kwargs(self.get_spoken_answer, query, lang=lang, units=units), + "img": _call_with_sanitized_kwargs(self.get_image, query, lang=lang, units=units)}] # user facing methods - def search(self, query, context=None, lang=None): + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def search(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[Dict]: """ - cache and auto translate query if needed - returns translated response from self.get_data + Perform a search with automatic translation and caching. + + NOTE: "lang" assured to be in self.supported_langs, + otherwise "query" automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + Dict: The data dictionary retrieved from the cache or computed anew. """ - user_lang = self._get_user_lang(context, lang) - query, context, lang = self._tx_query(query, context, lang) # read from cache if self.enable_cache and query in self.cache: data = self.cache[query] else: # search data try: - data = self.get_data(query, context) + data = _call_with_sanitized_kwargs(self.get_data, query, lang=lang, units=units) except: return {} @@ -175,187 +300,407 @@ def search(self, query, context=None, lang=None): if self.enable_cache: self.cache[query] = data self.cache.store() - - # translate english output to user lang - if self.enable_tx and user_lang not in self.supported_langs: - return self.translator.translate_dict(data, user_lang, lang) return data - def visual_answer(self, query, context=None, lang=None): - """ - cache and auto translate query if needed - returns image that answers query + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def visual_answer(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[str]: """ - query, context, lang = self._tx_query(query, context, lang) - return self.get_image(query, context) + Retrieve the image associated with the query with automatic translation and caching. + + NOTE: "lang" assured to be in self.supported_langs, + otherwise "query" automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back - def spoken_answer(self, query, context=None, lang=None): + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + str: The path or URL to the image. """ - cache and auto translate query if needed - returns chunked and translated response from self.get_spoken_answer + return _call_with_sanitized_kwargs(self.get_image, query, lang=lang, units=units) + + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def spoken_answer(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> Optional[str]: """ - user_lang = self._get_user_lang(context, lang) - query, context, lang = self._tx_query(query, context, lang) + Retrieve the spoken answer for the query with automatic translation and caching. + NOTE: "lang" assured to be in self.supported_langs, + otherwise "query" automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back + + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + str: The spoken answer as a text response. + """ # get answer if self.enable_cache and query in self.spoken_cache: # read from cache summary = self.spoken_cache[query] else: - summary = self.get_spoken_answer(query, context) + + summary = _call_with_sanitized_kwargs(self.get_spoken_answer, query, lang=lang, units=units) # save to cache if self.enable_cache: self.spoken_cache[query] = summary self.spoken_cache.store() + return summary - # summarize - if summary: - # translate english output to user lang - if self.enable_tx and user_lang not in self.supported_langs: - return self.translator.translate(summary, user_lang, lang) - else: - return summary - - def long_answer(self, query, context=None, lang=None): - """ - return a list of ordered steps to expand the answer, eg, "tell me more" - step0 is always self.spoken_answer and self.get_image - { - "title": "optional", - "summary": "speak this", - "img": "optional/path/or/url - } - :return: + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def long_answer(self, query: str, + lang: Optional[str] = None, + units: Optional[str] = None) -> List[Dict[str, str]]: """ - user_lang = self._get_user_lang(context, lang) - query, context, lang = self._tx_query(query, context, lang) - steps = self.get_expanded_answer(query, context) + Retrieve a detailed list of steps to expand the answer. + + NOTE: "lang" assured to be in self.supported_langs, + otherwise "query" automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back + Args: + query (str): The query text. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + List[Dict]: A list of steps to elaborate on the answer, with each step containing a title, summary, and optional image. + """ + steps = _call_with_sanitized_kwargs(self.get_expanded_answer, query, lang=lang, units=units) # use spoken_answer as last resort if not steps: - summary = self.get_spoken_answer(query, context) + summary = _call_with_sanitized_kwargs(self.get_spoken_answer, query, lang=lang, units=units) if summary: - img = self.get_image(query, context) - steps = [{"title": query, "summary": step0, "img": img} - for step0 in self.sentence_split(summary, -1)] - - # translate english output to user lang - if self.enable_tx and user_lang not in self.supported_langs: - return self.translator.translate_list(steps, user_lang, lang) + img = _call_with_sanitized_kwargs(self.get_image, query, lang=lang, units=units) + steps = [{"title": query, "summary": step, "img": img} for step in self.sentence_split(summary, -1)] return steps +class CorpusSolver(QuestionSolver): + """Retrieval based question solver""" + + def __init__(self, config=None, + translator: Optional[LanguageTranslator] = None, + detector: Optional[LanguageDetector] = None, + priority: int = 50, + enable_tx: bool = False, + enable_cache: bool = False, + *args, **kwargs): + super().__init__(config, translator, detector, + priority, enable_tx, enable_cache, + *args, **kwargs) + LOG.debug(f"corpus presumed to be in language: {self.default_lang}") + + @abc.abstractmethod + def load_corpus(self, corpus: List[str]): + """index the provided list of sentences""" + + @abc.abstractmethod + def query(self, query: str, lang: Optional[str], k: int = 3) -> Iterable[Tuple[str, float]]: + """return top_k matches from indexed corpus""" + + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def retrieve_from_corpus(self, query: str, k: int = 3, lang: Optional[str] = None) -> List[Tuple[float, str]]: + """return top_k matches from indexed corpus""" + res = [] + for doc, score in self.query(query, lang, k=k): + LOG.debug(f"Rank {len(res) + 1} (score: {score}): {doc}") + if self.config.get("min_conf"): + if score >= self.config["min_conf"]: + res.append((score, doc)) + else: + res.append((score, doc)) + return res + + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def get_spoken_answer(self, query: str, lang: Optional[str] = None) -> Optional[str]: + # Query the corpus + answers = [a[1] for a in self.retrieve_from_corpus(query, lang=lang, + k=self.config.get("n_answer", 1))] + if answers: + return ". ".join(answers[:self.config.get("n_answer", 1)]) + + +class QACorpusSolver(CorpusSolver): + def __init__(self, config=None, + translator: Optional[LanguageTranslator] = None, + detector: Optional[LanguageDetector] = None, + priority: int = 50, + enable_tx: bool = False, + enable_cache: bool = False, + *args, **kwargs): + self.answers = {} + super().__init__(config, translator, detector, + priority, enable_tx, enable_cache, + *args, **kwargs) + + def load_corpus(self, corpus: Dict): + self.answers = corpus + super().load_corpus(list(self.answers.keys())) + + @auto_detect_lang(text_keys=["query"]) + @auto_translate(translate_keys=["query"]) + def retrieve_from_corpus(self, query: str, k: int = 1, lang: Optional[str] = None) -> List[Tuple[float, str]]: + res = [] + for doc, score in super().retrieve_from_corpus(query, k, lang): + LOG.debug(f"Answer {len(res) + 1} (score: {score}): {self.answers[doc]}") + res.append((score, self.answers[doc])) + return res + + class TldrSolver(AbstractSolver): - """perform NLP summarization task, - handling automatic translation back and forth as needed""" + """ + Solver for performing NLP summarization tasks, + handling automatic translation as needed. + """ - # plugin methods to override - def get_tldr(self, document, context): + @abc.abstractmethod + def get_tldr(self, document: str, + lang: Optional[str] = None) -> str: """ - document assured to be in self.default_lang - returns summary of provided document + Summarize the provided document. + + :param document: The text of the document to summarize, assured to be in the default language. + :param lang: Optional language code. + :return: A summary of the provided document. """ raise NotImplementedError # user facing methods - def tldr(self, document, context=None, lang=None): - """ - cache and auto translate query if needed - returns summary of provided document + + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["document"]) + @auto_translate(translate_keys=["document"]) + def tldr(self, document: str, lang: Optional[str] = None) -> str: """ - user_lang = self._get_user_lang(context, lang) - document, context, lang = self._tx_query(document, context, lang) + Summarize the provided document with automatic translation and caching if needed. - # summarize - tldr = self.get_tldr(document, context) + NOTE: "lang" assured to be in self.supported_langs, + otherwise "document" automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back - # translate output to user lang - if self.enable_tx and user_lang not in self.supported_langs: - return self.translator.translate(tldr, user_lang, lang) - return tldr + :param document: The text of the document to summarize. + :param lang: Optional language code. + :return: A summary of the provided document. + """ + # summarize + return _call_with_sanitized_kwargs(self.get_tldr, document, lang=lang) class EvidenceSolver(AbstractSolver): - """perform NLP reading comprehension task, - handling automatic translation back and forth as needed""" + """ + Solver for NLP reading comprehension tasks, + handling automatic translation as needed. + """ - # plugin methods to override - def get_best_passage(self, evidence, question, context): + @abc.abstractmethod + def get_best_passage(self, evidence: str, question: str, + lang: Optional[str] = None) -> str: """ - evidence and question assured to be in self.default_lang - returns summary of provided document + Extract the best passage from evidence that answers the given question. + + :param evidence: The text containing the evidence, assured to be in the default language. + :param question: The question to answer, assured to be in the default language. + :param lang: Optional language code. + :return: The passage from the evidence that best answers the question. """ raise NotImplementedError # user facing methods - def extract_answer(self, evidence, question, context=None, lang=None): + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["evidence", "question"]) + @auto_translate(translate_keys=["evidence", "question"]) + def extract_answer(self, evidence: str, question: str, + lang: Optional[str] = None) -> str: """ - cache and auto translate evidence and question if needed - returns passage from evidence that answers question - """ - user_lang = self._get_user_lang(context, lang) - evidence, context, lang = self._tx_query(evidence, context, lang) - question, context, lang = self._tx_query(question, context, lang) + Extract the best passage from evidence that answers the question with automatic translation and caching if needed. - # extract answer from doc - ans = self.get_best_passage(evidence, question, context) + NOTE: "lang" assured to be in self.supported_langs, + otherwise "evidence" and "question" are automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back - # translate output to user lang - if self.enable_tx and user_lang not in self.supported_langs: - return self.translator.translate(ans, user_lang, lang) - return ans + :param evidence: The text containing the evidence. + :param question: The question to answer. + :param lang: Optional language code. + :return: The passage from the evidence that answers the question. + """ + # extract answer from doc + return self.get_best_passage(evidence, question, lang=lang) class MultipleChoiceSolver(AbstractSolver): - """ select best answer from question + multiple choice - handling automatic translation back and forth as needed""" + """ + Solver for selecting the best answer from a question with multiple choices, + handling automatic translation as needed. + """ - # plugin methods to override - def select_answer(self, query, options, context): + @abc.abstractmethod + def rerank(self, query: str, options: List[str], + lang: Optional[str] = None, + return_index: bool = False) -> List[Tuple[float, Union[str, int]]]: """ - query and options assured to be in self.default_lang - return best answer from options list + Rank the provided options based on the query. + + :param query: The query text, assured to be in the default language. + :param options: A list of answer options, each assured to be in the default language. + :param lang: Optional language code. + :param return_index: If True, return the index of the best option; otherwise, return the best option text. + :return: A list of tuples where each tuple contains a score and the corresponding option text, sorted by score. """ raise NotImplementedError - # user facing methods - def solve(self, query, options, context=None, lang=None): - """ - cache and auto translate query and options if needed - returns best answer from provided options + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["query", "options"]) + @auto_translate(translate_keys=["query", "options"]) + def select_answer(self, query: str, options: List[str], + lang: Optional[str] = None, + return_index: bool = False) -> Union[str, int]: """ - user_lang = self._get_user_lang(context, lang) - query, context, lang = self._tx_query(query, context, lang) - opts = [self.translator.translate(opt, lang, user_lang) - for opt in options] + Select the best answer from the provided options based on the query with automatic translation and caching if needed. - # select best answer - ans = self.select_answer(query, opts, context) + NOTE: "lang" assured to be in self.supported_langs, + otherwise "query" and "options" are automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back - idx = opts.index(ans) - return options[idx] + :param query: The query text. + :param options: A list of answer options. + :param lang: Optional language code. + :param return_index: If True, return the index of the best option; otherwise, return the best option text. + :return: The best answer from the options list, or the index of the best option if `return_index` is True. + """ + return self.rerank(query, options, lang=lang, return_index=return_index)[0][1] class EntailmentSolver(AbstractSolver): """ select best answer from question + multiple choice handling automatic translation back and forth as needed""" - # plugin methods to override - def check_entailment(self, premise, hypothesis, context): + @abc.abstractmethod + def check_entailment(self, premise: str, hypothesis: str, + lang: Optional[str] = None) -> bool: """ - premise and hyopithesis assured to be in self.default_lang - return Bool, True if premise entails the hypothesis False otherwise + Check if the premise entails the hypothesis. + + :param premise: The premise text, assured to be in the default language. + :param hypothesis: The hypothesis text, assured to be in the default language. + :param lang: Optional language code. + :return: True if the premise entails the hypothesis; False otherwise. """ raise NotImplementedError # user facing methods - def entails(self, premise, hypothesis, context=None, lang=None): - """ - cache and auto translate premise and hypothesis if needed - return Bool, True if premise entails the hypothesis False otherwise - """ - user_lang = self._get_user_lang(context, lang) - query, context, lang = self._tx_query(query, context, lang) - - # summarize - return self.check_entailment(premise, hypothesis) + @_deprecate_context2lang() + @auto_detect_lang(text_keys=["premise", "hypothesis"]) + @auto_translate(translate_keys=["premise", "hypothesis"]) + def entails(self, premise: str, hypothesis: str, lang: Optional[str] = None) -> bool: + """ + Determine if the premise entails the hypothesis with automatic translation and caching if needed. + + NOTE: "lang" assured to be in self.supported_langs, + otherwise "premise" and "hypothesis" are automatically translated to self.default_lang. + If translations happens, the returned value of this method will also + be automatically translated back + + :param premise: The premise text. + :param hypothesis: The hypothesis text. + :param lang: Optional language code. + :return: True if the premise entails the hypothesis; False otherwise. + """ + # check for entailment + return self.check_entailment(premise, hypothesis, lang=lang) + + +def _do_tx(solver, data: Any, source_lang: str, target_lang: str) -> Any: + """ + Translate the given data from source language to target language using the provided solver. + + Args: + solver: The translation solver. + data (Any): The data to translate. Can be a string, list, dictionary, or tuple. + source_lang (str): The source language code. + target_lang (str): The target language code. + + Returns: + Any: The translated data in the same structure as the input data. + """ + if isinstance(data, str): + return solver.translate(data, + source_lang=source_lang, target_lang=target_lang) + elif isinstance(data, list): + for idx, e in enumerate(data): + data[idx] = _do_tx(solver, e, source_lang=source_lang, target_lang=target_lang) + elif isinstance(data, dict): + for k, v in data.items(): + data[k] = _do_tx(solver, v, source_lang=source_lang, target_lang=target_lang) + elif isinstance(data, tuple) and len(data) == 2: + if isinstance(data[0], str): + a = _do_tx(solver, data[0], source_lang=source_lang, target_lang=target_lang) + else: + a = data[0] + if isinstance(data[1], str): + b = _do_tx(solver, data[1], source_lang=source_lang, target_lang=target_lang) + else: + b = data[1] + return (a, b) + return data + + +def _call_with_sanitized_kwargs(func, *args: Any, + lang: Optional[str] = None, + units: Optional[str] = None) -> Any: + """ + Call a function with sanitized keyword arguments for language and units. + + Args: + func: The function to call. + args (Any): Positional arguments to pass to the function. + lang (Optional[str]): Optional language code. Defaults to None. + units (Optional[str]): Optional units for the query. Defaults to None. + + Returns: + Any: The result of the function call. + """ + params = inspect.signature(func).parameters + kwargs = {} + + # ensure context is passed, it didn't used to be optional + if "context" in params and "context" not in kwargs: + kwargs["context"] = {} + + if "lang" in params: + # new style - only lang/units is passed + kwargs["lang"] = lang + elif "context" in kwargs: + # old style - when plugins received context only + kwargs["context"]["lang"] = lang + + if "units" in params: + # new style - only lang/units is passed + kwargs["units"] = units + elif "context" in kwargs: + # old style - when plugins received context only + kwargs["context"]["units"] = units + + return func(*args, **kwargs) diff --git a/ovos_plugin_manager/templates/stt.py b/ovos_plugin_manager/templates/stt.py index ef593fa6..e028cad0 100644 --- a/ovos_plugin_manager/templates/stt.py +++ b/ovos_plugin_manager/templates/stt.py @@ -8,9 +8,11 @@ from abc import ABCMeta, abstractmethod from queue import Queue from threading import Thread, Event +from typing import List, Tuple, Optional from ovos_config import Configuration from ovos_utils import classproperty +from ovos_utils.log import deprecated from ovos_utils.process_utils import RuntimeRequirements from ovos_plugin_manager.utils.config import get_plugin_config @@ -20,8 +22,6 @@ class STT(metaclass=ABCMeta): """ STT Base class, all STT backends derives from this one. """ def __init__(self, config=None): - # only imported here to not drag dependency - from speech_recognition import Recognizer self.config_core = Configuration() self._lang = None self._credential = None @@ -30,7 +30,7 @@ def __init__(self, config=None): self.config = get_plugin_config(config, "stt") self.can_stream = False - self.recognizer = Recognizer() + self._recognizer = None @classproperty def runtime_requirements(self): @@ -62,11 +62,25 @@ def runtime_requirements(self): """ return RuntimeRequirements() + @property + @deprecated("self.recognizer has been deprecated! " + "if you need it 'from speech_recognition import Recognizer' directly", "1.0.0") + def recognizer(self): + # only imported here to not drag dependency + from speech_recognition import Recognizer + if not self._recognizer: + self._recognizer = Recognizer() + return self._recognizer + + @recognizer.setter + def recognizer(self, val): + self._recognizer = val + @property def lang(self): return self._lang or \ - self.config.get("lang") or \ - self.init_language(self.config_core) + self.config.get("lang") or \ + Configuration().get("lang", "en-us") @lang.setter def lang(self, val): @@ -74,6 +88,8 @@ def lang(self, val): self._lang = val @property + @deprecated("self.keys has been deprecated! " + "implement config handling directly instead", "1.0.0") def keys(self): return self._keys or self.config_core.get("keys", {}) @@ -83,6 +99,8 @@ def keys(self, val): self._keys = val @property + @deprecated("self.credential has been deprecated! " + "implement config handling directly instead", "1.0.0") def credential(self): return self._credential or self.config.get("credential", {}) @@ -92,6 +110,8 @@ def credential(self, val): self._credential = val @staticmethod + @deprecated("self.init_language has been deprecated! " + "implement config handling directly instead", "1.0.0") def init_language(config_core): lang = config_core.get("lang", "en-US") langs = lang.split("-") @@ -99,9 +119,16 @@ def init_language(config_core): return langs[0].lower() + "-" + langs[1].upper() return lang - def execute(self, audio, language=None): + @abstractmethod + def execute(self, audio, language: Optional[str] = None) -> str: + # TODO - eventually deprecate this and make transcribe the @abstractmethod pass + def transcribe(self, audio, lang: Optional[str] = None) -> List[Tuple[str, float]]: + """transcribe audio data to a list of + possible transcriptions and respective confidences""" + return [(self.execute(audio, lang), 1.0)] + @property def available_languages(self) -> set: """Return languages supported by this STT implementation in this state @@ -114,12 +141,14 @@ def available_languages(self) -> set: class TokenSTT(STT, metaclass=ABCMeta): + @deprecated("TokenSTT is deprecated, please subclass from STT directly", "1.0.0") def __init__(self, config=None): super().__init__(config) self.token = self.credential.get("token") class GoogleJsonSTT(STT, metaclass=ABCMeta): + @deprecated("GoogleJsonSTT is deprecated, please subclass from STT directly", "1.0.0") def __init__(self, config=None): super().__init__(config) if not self.credential.get("json") or self.keys.get("google_cloud"): @@ -128,7 +157,7 @@ def __init__(self, config=None): class BasicSTT(STT, metaclass=ABCMeta): - + @deprecated("BasicSTT is deprecated, please subclass from STT directly", "1.0.0") def __init__(self, config=None): super().__init__(config) self.username = str(self.credential.get("username")) @@ -137,6 +166,7 @@ def __init__(self, config=None): class KeySTT(STT, metaclass=ABCMeta): + @deprecated("KeySTT is deprecated, please subclass from STT directly", "1.0.0") def __init__(self, config=None): super().__init__(config) self.id = str(self.credential.get("client_id")) @@ -207,9 +237,16 @@ def stream_stop(self): return text return None - def execute(self, audio, language=None): + def execute(self, audio: Optional = None, + language: Optional[str] = None): return self.stream_stop() + def transcribe(self, audio: Optional = None, + lang: Optional[str] = None) -> List[Tuple[str, float]]: + """transcribe audio data to a list of + possible transcriptions and respective confidences""" + return [(self.execute(audio, lang), 1.0)] + @abstractmethod def create_streaming_thread(self): pass diff --git a/ovos_plugin_manager/templates/transformers.py b/ovos_plugin_manager/templates/transformers.py index d2e48005..9a58fad4 100644 --- a/ovos_plugin_manager/templates/transformers.py +++ b/ovos_plugin_manager/templates/transformers.py @@ -1,7 +1,10 @@ -from typing import List, Tuple +import abc +from typing import List, Tuple, Optional -from ovos_config.config import Configuration from ovos_bus_client.util import get_mycroft_bus +from ovos_config.config import Configuration +from ovos_config.locale import get_default_lang +from ovos_utils.log import LOG from ovos_plugin_manager.utils import ReadWriteStream @@ -92,9 +95,11 @@ def __init__(self, name, priority=50, config=None): # buffers with audio chunks to be used in predictions # always cleared before STT stage - self.noise_feed = ReadWriteStream() - self.hotword_feed = ReadWriteStream() - self.speech_feed = ReadWriteStream() + # 16000 samples/second * 2 bytes/sample * 3 seconds = 96000 bytes. + self.noise_feed = ReadWriteStream(max_size=96000) # 3 second buffer + self.hotword_feed = ReadWriteStream(max_size=96000) # 3 seconds buffer + # 16000 samples/second * 2 bytes/sample * 10 seconds = 320000 bytes. + self.speech_feed = ReadWriteStream(max_size=320000) # 10 seconds buffer def _read_mycroft_conf(self): config_core = dict(Configuration()) @@ -233,3 +238,22 @@ def transform(self, wav_file: str, context: dict = None) -> Tuple[str, dict]: def default_shutdown(self): """ perform any shutdown actions """ pass + + +class AudioLanguageDetector(AudioTransformer): + + @property + def valid_langs(self) -> List[str]: + return list( + set([get_default_lang()] + Configuration().get("secondary_langs", [])) + ) + + @abc.abstractmethod + def detect(self, audio_data: bytes, valid_langs: Optional[List] = None) -> Tuple[str, float]: + raise NotImplementedError + + # plugin api + def transform(self, audio_data: bytes): + lang, prob = self.detect(audio_data) + LOG.info(f"Detected speech language '{lang}' with probability {prob}") + return audio_data, {"stt_lang": lang, "lang_probability": prob} diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py index 019a1931..b05ecaf5 100644 --- a/ovos_plugin_manager/templates/tts.py +++ b/ovos_plugin_manager/templates/tts.py @@ -1,133 +1,172 @@ -""" -this module is meant to enable usage of mycroft plugins inside and outside -mycroft, importing from here will make things work as planned in mycroft, -but if outside mycroft things will still work - -The main use case is for plugins to be used across different projects - -## Differences from upstream - -TTS: -- added automatic guessing of phonemes/visime calculation, enabling mouth -movements for all TTS engines (only mimic implements this in upstream) -- playback start call has been omitted and moved to init method -- init is called by mycroft, but non mycroft usage wont call it -- outside mycroft the enclosure is not set, bus is dummy and playback thread is not used - - playback queue is not wanted when some module is calling get_tts - - if playback was started on init then python scripts would never stop - from mycroft.tts import TTSFactory - engine = TTSFactory.create() - engine.get_tts("hello world", "hello_world." + engine.audio_ext) - # would hang here - engine.playback.stop() -""" +import abc +import asyncio import inspect -import random +import os.path import re +import sys import subprocess from os.path import isfile, join from pathlib import Path from queue import Queue from threading import Thread +from typing import AsyncIterable, List, Dict, Tuple, Optional + +import quebra_frases import requests +from ovos_bus_client.apis.enclosure import EnclosureAPI from ovos_bus_client.message import Message, dig_for_message +from ovos_bus_client.session import SessionManager from ovos_config import Configuration -from ovos_plugin_manager.g2p import OVOSG2PFactory, find_g2p_plugins -from ovos_plugin_manager.templates.g2p import OutOfVocabulary -from ovos_plugin_manager.utils.config import get_plugin_config -from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence +from ovos_config.locations import get_xdg_cache_save_path from ovos_utils import classproperty -from ovos_utils.file_utils import resolve_resource_file -from ovos_bus_client.apis.enclosure import EnclosureAPI +from ovos_utils.fakebus import FakeBus from ovos_utils.file_utils import get_cache_directory from ovos_utils.lang.visimes import VISIMES -from ovos_utils.log import LOG -from ovos_utils.messagebus import FakeBus as BUS +from ovos_utils.log import LOG, deprecated, log_deprecation from ovos_utils.metrics import Stopwatch from ovos_utils.process_utils import RuntimeRequirements +from ovos_plugin_manager.g2p import OVOSG2PFactory, find_g2p_plugins +from ovos_plugin_manager.templates.g2p import OutOfVocabulary +from ovos_plugin_manager.utils.config import get_plugin_config +from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence + EMPTY_PLAYBACK_QUEUE_TUPLE = (None, None, None, None, None) SSML_TAGS = re.compile(r'<[^>]*>') -class PlaybackThread(Thread): - """ PlaybackThread moved to ovos_audio.playback - standalone plugin usage should rely on self.get_tts - ovos-audio relies on self.execute and needs this class +class TTSContext: + """ + A context manager for handling Text-To-Speech (TTS) operations and caching. - this class was only in ovos-plugin-manager in order to - patch usage of our plugins in mycroft-core""" + Attributes: + plugin_id (str): Identifier for the TTS plugin being used. + lang (str): Language code for the TTS operation. + voice (str): Identifier for the voice type in use. + synth_kwargs (dict): Optional dictionary containing additional keyword arguments for the TTS synthesizer. - def __new__(self, *args, **kwargs): - LOG.warning("PlaybackThread moved to ovos_audio.playback") - try: - from ovos_audio.playback import PlaybackThread - return PlaybackThread(*args, **kwargs) - except ImportError: - raise ImportError("please install ovos-audio for playback handling") + Class Attributes: + _caches (dict): A class-level dictionary acting as a cache store for different TTS contexts. + """ + _caches: Dict[str, TextToSpeechCache] = {} -class TTSContext: - """ parses kwargs for valid signatures and extracts voice/lang optional parameters - it will look for a requested voice in kwargs and inside the source Message data if available. - voice can also be defined by a combination of language and gender, - in that case the helper method get_voice will be used to resolve the final voice_id - """ + def __init__(self, plugin_id: str, lang: str, voice: str, synth_kwargs: dict = None): + """ + Initializes the TTSContext instance. + + Parameters: + plugin_id (str): The unique identifier for the TTS plugin. + lang (str): The language in which the text will be synthesized. + voice (str): The voice model to be used for text synthesis. + synth_kwargs (dict, optional): Additional keyword arguments for the synthesizer. + """ + self.plugin_id = plugin_id + self.lang = lang + self.voice = voice + self.synth_kwargs = synth_kwargs or {} + + @property + def tts_id(self): + """ + Constructs a unique identifier for the TTS context based on plugin, voice, and language. + + Returns: + str: A unique identifier that represents the TTS context. + """ + return join(self.plugin_id, self.voice, self.lang) + + def get_cache(self, audio_ext="wav", cache_config=None): + """ + Retrieves or creates a cache instance for the current TTS context. + + Parameters: + audio_ext (str, optional): The file extension for the audio files (default is 'wav'). + cache_config (dict, optional): Configuration settings for the cache, including parameters like + minimum free percent, persistence settings, and cache directory path. + + Returns: + TextToSpeechCache: The cache instance associated with the current TTS context. + """ + cache_config = cache_config or { + "min_free_percent": 75, + "persist_cache": False, + "persist_thresh": 1, + "preloaded_cache": f"{get_xdg_cache_save_path()}/{self.tts_id}" + } + if self.tts_id not in TTSContext._caches: + TTSContext._caches[self.tts_id] = TextToSpeechCache( + cache_config, self.tts_id, audio_ext + ) + return self._caches[self.tts_id] + + def get_from_cache(self, sentence, audio_ext="wav", cache_config=None): + """ + Retrieves an audio file and phoneme data from the cache, based on the input sentence. + + Parameters: + sentence (str): The sentence for which to retrieve audio data. + audio_ext (str, optional): The file extension of the audio file (default is 'wav'). + cache_config (dict, optional): Configuration settings for the cache. + + Returns: + tuple: A tuple containing the path to the cached audio file and optionally the phoneme data. + + Raises: + FileNotFoundError: If the sentence is not found in the cache. + """ + sentence_hash = hash_sentence(sentence) + phonemes = None + cache = self.get_cache(audio_ext, cache_config) + if sentence_hash not in cache: + raise FileNotFoundError(f"sentence is not cached, {sentence_hash}.{audio_ext}") + audio_file, pho_file = cache.cached_sentences[sentence_hash] + LOG.info(f"Found {audio_file.name} in TTS cache") + if pho_file: + phonemes = pho_file.load() + return audio_file, phonemes - def __init__(self, engine): - self.engine = engine + @classmethod + def curate_caches(cls): + for cache in TTSContext._caches.values(): + cache.curate() - def get_message(self, kwargs): + ########### + # deprecated methods + @deprecated("'get_message' has been deprecated without replacement", "1.0.0") + def get_message(self, kwargs) -> Optional[Message]: msg = kwargs.get("message") or dig_for_message() if msg and isinstance(msg, Message): return msg - def get_lang(self, kwargs): - # parse requested language for this TTS request - # NOTE: this is ovos only functionality, not in mycroft-core! - lang = kwargs.get("lang") - message = self.get_message(kwargs) - if not lang and message: - # get lang from message object if possible - lang = message.data.get("lang") or \ - message.context.get("lang") - return lang or self.engine.lang + @deprecated("'self.get_lang' has been deprecated, access self.lang directly", "1.0.0") + def get_lang(self, kwargs) -> str: + return kwargs.get("lang") or self.lang - def get_gender(self, kwargs): + @deprecated("'self.get_gender' has been deprecated, access self.voice and self.lang directly", "1.0.0") + def get_gender(self, kwargs) -> Optional[str]: gender = kwargs.get("gender") message = self.get_message(kwargs) if not gender and message: - # get gender from message object if possible gender = message.data.get("gender") or \ message.context.get("gender") return gender + @deprecated("'self.get_voice' has been deprecated, access self.voice directly", "1.0.0") def get_voice(self, kwargs): - # parse requested voice for this TTS request - # NOTE: this is ovos only functionality, not in mycroft-core! voice = kwargs.get("voice") message = self.get_message(kwargs) if not voice and message: # get voice from message object if possible voice = message.data.get("voice") or \ message.context.get("voice") + return voice or self.voice - if not voice: - gender = self.get_gender(kwargs) - if gender: - lang = self.get_lang(kwargs) - voice = self.engine.get_voice(gender, lang) - - return voice or self.engine.voice - - def get(self, kwargs=None): + @deprecated("'self.get' has been deprecated, access self.voice and self.lang directly", "1.0.0") + def get(self, kwargs=None) -> Tuple[str, Optional[str]]: kwargs = kwargs or {} return self.get_lang(kwargs), self.get_voice(kwargs) - def get_cache(self, kwargs=None): - lang, voice = self.get(kwargs) - return self.engine.get_cache(voice, lang) - class TTS: """TTS abstract class to be implemented by all TTS engines. @@ -135,26 +174,44 @@ class TTS: It aggregates the minimum required parameters and exposes ``execute(sentence)`` and ``validate_ssml(sentence)`` functions. - Arguments: - lang (str): - config (dict): Configuration for this specific tts engine - validator (TTSValidator): Used to verify proper installation - phonetic_spelling (bool): Whether to spell certain words phonetically - ssml_tags (list): Supported ssml properties. Ex. ['speak', 'prosody'] + Attributes: + queue (Queue): A queue for managing TTS playback tasks. + playback (PlaybackThread): The playback thread used for TTS audio output. + + Args: + lang (str): The language code for the TTS engine. + config (dict): Configuration settings for the specific TTS engine. + validator (TTSValidator): Validator used to verify proper installation. + audio_ext (str): The default audio file extension (default is 'wav'). + phonetic_spelling (bool): Whether to spell certain words phonetically. + ssml_tags (list): Supported SSML properties (e.g., ['speak', 'prosody']). """ queue = None playback = None - def __init__(self, lang="en-us", config=None, validator=None, + def __init__(self, lang=None, config=None, validator=None, audio_ext='wav', phonetic_spelling=True, ssml_tags=None): - self.log_timestamps = False + """ + Initializes the TTS engine with specified parameters. + Args: + lang (str): The language code (deprecated). + config (dict): Configuration settings for the TTS engine. + validator (TTSValidator): Validator for verifying installation. + audio_ext (str): Default audio file extension (default is 'wav'). + phonetic_spelling (bool): Whether to use phonetic spelling (default is True). + ssml_tags (list): Supported SSML tags (default is None). + """ + if lang is not None: + log_deprecation("lang argument for TTS has been deprecated! it will be ignored, " + "pass lang to get_tts directly instead") + self.log_timestamps = False + self.root_dir = os.path.dirname(os.path.abspath(sys.modules[self.__module__].__file__)) self.config = config or get_plugin_config(config, "tts") self.stopwatch = Stopwatch() self.tts_name = self.__class__.__name__ - self.bus = BUS() # initialized in "init" step - self.lang = lang or self.config.get("lang") or 'en-us' + self.validator = validator or TTSValidator(self) self.phonetic_spelling = phonetic_spelling self.audio_ext = audio_ext @@ -163,203 +220,85 @@ def __init__(self, lang="en-us", config=None, validator=None, self.enable_cache = self.config.get("enable_cache", True) - self.voice = self.config.get("voice") or "default" - # TODO can self.filename be deprecated ? is it used anywhere at all? - cache_dir = get_cache_directory(self.tts_name) - self.filename = join(cache_dir, 'tts.' + self.audio_ext) - - random.seed() - if TTS.queue is None: TTS.queue = Queue() - self.context = TTSContext(self) - - # NOTE: self.playback.start() was moved to init method - # playback queue is not wanted if we only care about get_tts - # init is called by mycroft, but non mycroft usage wont call it, - # outside mycroft the enclosure is not set, bus is dummy and - # playback thread is not used - self.spellings = self.load_spellings() + self.spellings: Dict[str, dict] = self.load_spellings() + self._init_g2p() - self.caches = { - self.tts_id: TextToSpeechCache( - self.config, self.tts_id, self.audio_ext - )} - - cfg = Configuration() - g2pm = self.config.get("g2p_module") - if g2pm: - if g2pm in find_g2p_plugins(): - cfg.setdefault("g2p", {}) - globl = cfg["g2p"].get("module") or g2pm - if globl != g2pm: - LOG.info(f"TTS requested {g2pm} explicitly, ignoring global module {globl} ") - cfg["g2p"]["module"] = g2pm - else: - LOG.warning(f"TTS selected {g2pm}, but it is not available!") + self.add_metric({"metric_type": "tts.init"}) - try: - self.g2p = OVOSG2PFactory.create(cfg) - except: - LOG.exception("G2P plugin not loaded, there will be no mouth movements") - self.g2p = None + # unused by plugins, assigned in init method by ovos-audio, + # only present for backwards compat reasons + self.bus = None - self.cache.curate() + self._plugin_id = "" # the plugin name - self.add_metric({"metric_type": "tts.init"}) + @property + def plugin_id(self) -> str: + """ + Retrieves the plugin ID for the TTS engine. + Returns: + str: The plugin ID associated with the TTS engine. + """ + if not self._plugin_id: + from ovos_plugin_manager.tts import find_tts_plugins + for tts_id, clazz in find_tts_plugins().items(): + if isinstance(self, clazz): + self._plugin_id = tts_id + break + return self._plugin_id + + # methods for individual plugins to override @classproperty def runtime_requirements(self): - """ skill developers should override this if they do not require connectivity - some examples: - IOT plugin that controls devices via LAN could return: - scans_on_init = True - RuntimeRequirements(internet_before_load=False, - network_before_load=scans_on_init, - requires_internet=False, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=False) - online search plugin with a local cache: - has_cache = False - RuntimeRequirements(internet_before_load=not has_cache, - network_before_load=not has_cache, - requires_internet=True, - requires_network=True, - no_internet_fallback=True, - no_network_fallback=True) - a fully offline plugin: - RuntimeRequirements(internet_before_load=False, - network_before_load=False, - requires_internet=False, - requires_network=False, - no_internet_fallback=True, - no_network_fallback=True) - """ + """ WIP - currently unused, + placeholder to allow plugins to request internet/gui before load + refer to skills to see how it is used""" return RuntimeRequirements() @property - def tts_id(self): - lang, voice = self.context.get() - return join(self.tts_name, voice, lang) - - @property - def cache(self): - return self.caches.get(self.tts_id) or \ - self.get_cache() - - @cache.setter - def cache(self, val): - self.caches[self.tts_id] = val - - def get_cache(self, voice=None, lang=None): - lang = lang or self.lang - voice = voice or self.voice or "default" - tts_id = join(self.tts_name, voice, lang) - if tts_id not in self.caches: - self.caches[tts_id] = TextToSpeechCache( - self.config, tts_id, self.audio_ext - ) - return self.caches[tts_id] - - def handle_metric(self, metadata=None): - """ receive timing metrics for diagnostics - does nothing by default but plugins might use it, eg, NeonCore""" - - def add_metric(self, metadata=None): - """ wraps handle_metric to catch exceptions and log timestamps """ - try: - self.handle_metric(metadata) - if self.log_timestamps: - LOG.debug(f"time delta: {self.stopwatch.delta} metric: {metadata}") - except Exception as e: - LOG.exception(e) - - def load_spellings(self, config=None): - """Load phonetic spellings of words as dictionary.""" - path = join('text', self.lang.lower(), 'phonetic_spellings.txt') - try: - spellings_file = resolve_resource_file(path, config=config or Configuration()) - except: - LOG.debug('Failed to locate phonetic spellings resouce file.') - return {} - if not spellings_file: - return {} - try: - with open(spellings_file) as f: - lines = filter(bool, f.read().split('\n')) - lines = [i.split(':') for i in lines] - return {key.strip(): value.strip() for key, value in lines} - except ValueError: - LOG.exception('Failed to load phonetic spellings.') - return {} - - def begin_audio(self): - """Helper function for child classes to call in execute()""" - self.add_metric({"metric_type": "tts.start"}) - - def end_audio(self, listen=False): - """Helper cleanup function for child classes to call in execute(). - - Arguments: - listen (bool): DEPRECATED: indication if listening trigger should be sent. - """ - self.add_metric({"metric_type": "tts.end"}) - self.stopwatch.stop() - - def init(self, bus=None, playback=None): - """ Performs intial setup of TTS object. - - Arguments: - bus: OpenVoiceOS messagebus connection + def available_languages(self) -> set: + """Return languages supported by this TTS implementation in this state + This property should be overridden by the derived class to advertise + what languages that engine supports. + Returns: + set: A set of supported language codes. """ - self.bus = bus or BUS() - if playback is None: - LOG.warning("PlaybackThread should be inited by ovos-audio, initing via plugin has been deprecated, " - "please pass playback=PlaybackThread() to TTS.init") - if TTS.playback: - playback.shutdown() - playback = PlaybackThread(TTS.queue, self.bus) # compat - playback.start() - self._init_playback(playback) - self.add_metric({"metric_type": "tts.setup"}) - - def _init_playback(self, playback): - TTS.playback = playback - TTS.playback.set_bus(self.bus) - TTS.playback.attach_tts(self) - if not TTS.playback.enclosure: - TTS.playback.enclosure = EnclosureAPI(self.bus) + return set() - if not TTS.playback.is_running: - TTS.playback.start() + @abc.abstractmethod + def get_tts(self, sentence, wav_file, lang=None, voice=None): + """Abstract method that a tts implementation needs to implement. - @property - def enclosure(self): - if not TTS.playback.enclosure: - bus = TTS.playback.bus or self.bus - TTS.playback.enclosure = EnclosureAPI(bus) - return TTS.playback.enclosure + Args: + sentence (str): The input sentence to synthesize. + wav_file (str): The output file path for the synthesized audio. + lang (str, optional): The requested language (defaults to self.lang). + voice (str, optional): The requested voice (defaults to self.voice). - @enclosure.setter - def enclosure(self, val): - TTS.playback.enclosure = val + Returns: + tuple: (wav_file, phoneme) + """ + return "", None - def get_tts(self, sentence, wav_file, lang=None): - """Abstract method that a tts implementation needs to implement. + def preprocess_sentence(self, sentence: str) -> List[str]: + """Default preprocessing is a sentence_tokenizer, + ie. splits the utterance into sub-sentences using quebra_frases - Should get data from tts. + This method can be overridden to create chunks suitable to the + TTS engine in question. Arguments: - sentence(str): Sentence to synthesize - wav_file(str): output file - lang(str): requested language (optional), defaults to self.lang + sentence (str): sentence to preprocess Returns: - tuple: (wav_file, phoneme) + list: list of sentence parts """ - return "", None + if self.config.get("sentence_tokenize"): # TODO default to True on next major release + return quebra_frases.sentence_tokenize(sentence) + return [sentence] def modify_tag(self, tag): """Override to modify each supported ssml tag. @@ -369,6 +308,19 @@ def modify_tag(self, tag): """ return tag + def handle_metric(self, metadata=None): + """ receive timing metrics for diagnostics + does nothing by default but plugins might use it, eg, NeonCore""" + + @property + def voice(self): + return self.config.get("voice") or "default" + + @voice.setter + def voice(self, val): + self.config["voice"] = val + + # SSML helpers @staticmethod def remove_ssml(text): """Removes SSML tags from a string. @@ -455,19 +407,121 @@ def validate_ssml(self, utterance): # return text with supported ssml tags only return utterance.replace(" ", " ") - def _preprocess_sentence(self, sentence): - """Default preprocessing is no preprocessing. + # init helpers + def _init_g2p(self): + """ + Initializes the grapheme-to-phoneme (G2P) conversion for the TTS engine. + """ + cfg = Configuration() + g2pm = self.config.get("g2p_module") + if g2pm: + if g2pm in find_g2p_plugins(): + cfg.setdefault("g2p", {}) + globl = cfg["g2p"].get("module") or g2pm + if globl != g2pm: + LOG.info(f"TTS requested {g2pm} explicitly, ignoring global module {globl} ") + cfg["g2p"]["module"] = g2pm + else: + LOG.warning(f"TTS selected {g2pm}, but it is not available!") + + try: + self.g2p = OVOSG2PFactory.create(cfg) + except: + LOG.debug("G2P plugin not loaded, there will be no mouth movements") + self.g2p = None - This method can be overridden to create chunks suitable to the - TTS engine in question. + def init(self, bus=None, playback=None): + """ Connects TTS object to PlaybackQueue in ovos-audio. + + This method needs to be called in order for self.execute to do anything + + not needed if using get_tts / synth methods directly as intended in standalone usage Arguments: - sentence (str): sentence to preprocess + bus: OpenVoiceOS messagebus connection + """ + self.bus = bus or FakeBus() + if playback is None: + LOG.warning("PlaybackThread should be inited by ovos-audio, initing via plugin has been deprecated, " + "please pass playback=PlaybackThread() to TTS.init") + if not TTS.playback: + playback = PlaybackThread(TTS.queue, self.bus) # compat + playback.start() + self._init_playback(playback) + self.add_metric({"metric_type": "tts.setup"}) + + def _init_playback(self, playback): + """ + Initializes the playback functionality for the TTS engine. + + Args: + playback: PlaybackThread instance. + """ + + TTS.playback = playback + TTS.playback.set_bus(self.bus) + if not TTS.playback.enclosure: + TTS.playback.enclosure = EnclosureAPI(self.bus) + + if not TTS.playback.is_alive(): + TTS.playback.start() + + def load_spellings(self, config=None) -> Dict[str, dict]: + """ + Loads phonetic spellings of words as a dictionary. + + Args: + config (dict, optional): Configuration settings. Returns: - list: list of sentence parts + dict: A dictionary of phonetic spellings. """ - return [sentence] + if config: + LOG.warning("config argument is deprecated and unused!") + spellings_data = {} + locale = f"{self.root_dir}/locale" + if os.path.isdir(locale): + for lang in os.listdir(locale): + spellings_file = f"{locale}/{lang}/phonetic_spellings.txt" + if not os.path.isfile(spellings_file): + continue + try: + with open(spellings_file) as f: + lines = filter(bool, f.read().split('\n')) + lines = [i.split(':') for i in lines] + spellings_data[lang] = {key.strip(): value.strip() for key, value in lines} + except ValueError: + LOG.exception(f'Failed to load {lang} phonetic spellings.') + return spellings_data + + ## execution events + def add_metric(self, metadata=None): + """ + Wraps handle_metric to catch exceptions and log timestamps. + + Args: + metadata (dict, optional): Additional metadata for the metric. + """ + try: + self.handle_metric(metadata) + if self.log_timestamps: + LOG.debug(f"time delta: {self.stopwatch.delta} metric: {metadata}") + except Exception as e: + LOG.exception(e) + + def begin_audio(self): + """Helper function for child classes to call in execute()""" + self.stopwatch.start() + self.add_metric({"metric_type": "tts.start"}) + + def end_audio(self, listen=False): + """Helper cleanup function for child classes to call in execute(). + + Arguments: + listen (bool): DEPRECATED: indication if listening trigger should be sent. + """ + self.add_metric({"metric_type": "tts.end"}) + self.stopwatch.stop() def execute(self, sentence, ident=None, listen=False, **kwargs): """Convert sentence to speech, preprocessing out unsupported ssml @@ -481,144 +535,149 @@ def execute(self, sentence, ident=None, listen=False, **kwargs): listen: (bool) True if listen should be triggered at the end of the utterance. """ + self.begin_audio() sentence = self.validate_ssml(sentence) self.add_metric({"metric_type": "tts.ssml.validated"}) self._execute(sentence, ident, listen, **kwargs) + self.end_audio() - def _replace_phonetic_spellings(self, sentence): - if self.phonetic_spelling: + ## synth + def _replace_phonetic_spellings(self, sentence:str, lang: str) -> str: + if self.phonetic_spelling and lang in self.spellings: for word in re.findall(r"[\w']+", sentence): - if word.lower() in self.spellings: - spelled = self.spellings[word.lower()] + if word.lower() in self.spellings[lang]: + spelled = self.spellings[lang][word.lower()] sentence = sentence.replace(word, spelled) return sentence - def _execute(self, sentence, ident, listen, **kwargs): - self.stopwatch.start() - sentence = self._replace_phonetic_spellings(sentence) - chunks = self._preprocess_sentence(sentence) - # Apply the listen flag to the last chunk, set the rest to False - chunks = [(chunks[i], listen if i == len(chunks) - 1 else False) - for i in range(len(chunks))] - self.add_metric({"metric_type": "tts.preprocessed", - "n_chunks": len(chunks)}) + def _get_visemes(self, phonemes, sentence, ctxt): + # get visemes/mouth movements + viseme = [] + if phonemes: + viseme = self.viseme(phonemes) + elif self.g2p is not None: + try: + viseme = self.g2p.utterance2visemes(sentence, ctxt.lang) + except OutOfVocabulary: + pass + except: + # this one is unplanned, let devs know all the info so they can fix it + LOG.exception(f"Unexpected failure in G2P plugin: {self.g2p}") + + if not viseme: + # Debug level because this is expected in default installs + LOG.debug(f"no mouth movements available! unknown visemes for {sentence}") + return viseme + + def _get_ctxt(self, kwargs=None) -> TTSContext: + """create a TTSContext from arbitrary kwargs passed to synth/execute methods + takes lang from Session into account if a message is present + """ + # get request specific synth params + kwargs = kwargs or {} + message = kwargs.get("message") or dig_for_message() + + # update kwargs from session + if message and "lang" not in kwargs: + sess = SessionManager.get(message) + kwargs["lang"] = sess.lang - lang, voice = self.context.get(kwargs) - tts_id = join(self.tts_name, voice, lang) + # voice from config + if "voice" not in kwargs: + kwargs["voice"] = self.voice + + # filter kwargs accepted by this specific plugin + kwargs = {k: v for k, v in kwargs.items() + if k in inspect.signature(self.get_tts).parameters + and k not in ["sentence", "wav_file"]} + + LOG.debug(f"TTS kwargs: {kwargs}") + return TTSContext(plugin_id=self.plugin_id, + lang=kwargs.get("lang") or Configuration().get("lang", "en-us"), + voice=kwargs.get("voice", "default"), + synth_kwargs=kwargs) + + def _execute(self, sentence, ident, listen, preprocess=True, **kwargs): + # get request specific synth params + ctxt = self._get_ctxt(kwargs) + + if preprocess: + # pre-process + sentence = self._replace_phonetic_spellings(sentence, ctxt.lang) + chunks = self.preprocess_sentence(sentence) + # Apply the listen flag to the last chunk, set the rest to False + chunks = [(chunks[i], listen if i == len(chunks) - 1 else False) + for i in range(len(chunks))] + + # metrics timing callback + self.add_metric({"metric_type": "tts.preprocessed", + "n_chunks": len(chunks)}) + else: + chunks = [(sentence, listen)] + + message = kwargs.get("message") or \ + dig_for_message() or \ + Message("speak", context={"session": {"session_id": ident}}) # synth -> queue for playback for sentence, l in chunks: # load from cache or synth + cache - audio_file, phonemes = self.synth(sentence, **kwargs) + audio_file, phonemes = self.synth(sentence, ctxt) # get visemes/mouth movements - viseme = [] - if phonemes: - viseme = self.viseme(phonemes) - elif self.g2p is not None: - try: - viseme = self.g2p.utterance2visemes(sentence, lang) - except OutOfVocabulary: - pass - except: - # this one is unplanned, let devs know all the info so they can fix it - LOG.exception(f"Unexpected failure in G2P plugin: {self.g2p}") - - if not viseme: - # Debug level because this is expected in default installs - LOG.debug(f"no mouth movements available! unknown visemes for {sentence}") - - message = kwargs.get("message") or \ - dig_for_message() or \ - Message("speak", context={"session": {"session_id": ident}}) + viseme = self._get_visemes(phonemes, sentence, ctxt) + + # queue audio for playback TTS.queue.put( - (str(audio_file), viseme, l, tts_id, message) + (str(audio_file), viseme, l, ctxt.tts_id, message) ) + + # metrics timing callback self.add_metric({"metric_type": "tts.queued"}) - def synth(self, sentence, **kwargs): - """ This method wraps get_tts - several optional keyword arguments are supported - sentence will be read/saved to cache""" + def synth(self, sentence, ctxt: TTSContext = None, **kwargs): + """ + Synthesizes speech for the given sentence. wraps get_tts + + sentence will be read/saved to cache + + Args: + sentence (str): The sentence to synthesize. + ctxt (TTSContext): The TTS context. + **kwargs: Additional synth arguments for get_tts. + + Returns: + tuple: A tuple containing the path to the synthesized audio file and phoneme data. + """ self.add_metric({"metric_type": "tts.synth.start"}) sentence_hash = hash_sentence(sentence) - # parse requested language for this TTS request - # NOTE: this is ovos/neon only functionality, not in mycroft-core! - lang, voice = self.context.get(kwargs) - kwargs["lang"] = lang - kwargs["voice"] = voice - - cache = self.get_cache(voice, lang) # cache per tts_id (lang/voice combo) + # parse kwargs for this TTS request + ctxt = ctxt or self._get_ctxt(kwargs) + cache = ctxt.get_cache(self.audio_ext, self.config) # load from cache if self.enable_cache and sentence_hash in cache: - audio, phonemes = self.get_from_cache(sentence, **kwargs) + audio, phonemes = ctxt.get_from_cache(sentence, cache) self.add_metric({"metric_type": "tts.synth.finished", "cache": True}) return audio, phonemes # synth + cache audio = cache.define_audio_file(sentence_hash) - - # filter kwargs per plugin, different plugins expose different options - # mycroft-core -> no kwargs - # ovos -> lang + voice optional kwargs - # neon-core -> message - kwargs = {k: v for k, v in kwargs.items() - if k in inspect.signature(self.get_tts).parameters - and k not in ["sentence", "wav_file"]} - - # finally do the TTS synth - audio.path, phonemes = self.get_tts(sentence, str(audio), **kwargs) + # ensure cache dir exists + base_dir = os.path.dirname(str(audio)) + if base_dir: # handle empty string + os.makedirs(base_dir, exist_ok=True) + audio.path, phonemes = self.get_tts(sentence, str(audio), + **ctxt.synth_kwargs) self.add_metric({"metric_type": "tts.synth.finished"}) # cache sentence + phonemes if self.enable_cache: - self._cache_sentence(sentence, audio, phonemes, sentence_hash, - voice=voice, lang=lang) + self._cache_sentence(sentence, ctxt.lang, audio, cache, + phonemes, sentence_hash) return audio, phonemes - def _cache_phonemes(self, sentence, phonemes=None, sentence_hash=None): - sentence_hash = sentence_hash or hash_sentence(sentence) - if not phonemes and self.g2p is not None: - try: - phonemes = self.g2p.utterance2arpa(sentence, self.lang) - self.add_metric({"metric_type": "tts.phonemes.g2p"}) - except Exception as e: - self.add_metric({"metric_type": "tts.phonemes.g2p.error", "error": str(e)}) - if phonemes: - return self.save_phonemes(sentence_hash, phonemes) - return None - - def _cache_sentence(self, sentence, audio_file, phonemes=None, sentence_hash=None, - voice=None, lang=None): - sentence_hash = sentence_hash or hash_sentence(sentence) - # RANT: why do you hate strings ChrisV? - if isinstance(audio_file.path, str): - audio_file.path = Path(audio_file.path) - pho_file = self._cache_phonemes(sentence, phonemes, sentence_hash) - cache = self.get_cache(voice=voice, lang=lang) - cache.cached_sentences[sentence_hash] = (audio_file, pho_file) - self.add_metric({"metric_type": "tts.synth.cached"}) - - def get_from_cache(self, sentence, **kwargs): - sentence_hash = hash_sentence(sentence) - phonemes = None - cache = self.context.get_cache(kwargs) - audio_file, pho_file = cache.cached_sentences[sentence_hash] - LOG.info(f"Found {audio_file.name} in TTS cache") - if not pho_file: - # guess phonemes from sentence + cache them - pho_file = self._cache_phonemes(sentence, sentence_hash) - if pho_file: - phonemes = pho_file.load() - return audio_file, phonemes - - def get_voice(self, gender, lang=None): - """ map a language and gender to a valid voice for this TTS engine """ - lang = lang or self.lang - return gender - def viseme(self, phonemes): """Create visemes from phonemes. @@ -645,31 +704,52 @@ def viseme(self, phonemes): float(0.2))) return visimes or None - def clear_cache(self): - """ Remove all cached files. """ - self.cache.clear() - - def save_phonemes(self, key, phonemes): - """Cache phonemes + ## cache + def _cache_phonemes(self, sentence, lang: str, cache: TextToSpeechCache = None, phonemes=None, sentence_hash=None): + """ + Caches phonemes for the given sentence. - Arguments: - key (str): Hash key for the sentence - phonemes (str): phoneme string to save + Args: + sentence (str): The sentence to cache phonemes for. + cache (TextToSpeechCache): The cache instance. + phonemes (str, optional): The phonemes for the sentence. + sentence_hash (str, optional): The hash of the sentence. """ - phoneme_file = self.cache.define_phoneme_file(key) - phoneme_file.save(phonemes) - return phoneme_file + sentence_hash = sentence_hash or hash_sentence(sentence) + if not phonemes and self.g2p is not None: + try: + phonemes = self.g2p.utterance2arpa(sentence, lang) + self.add_metric({"metric_type": "tts.phonemes.g2p"}) + except Exception as e: + self.add_metric({"metric_type": "tts.phonemes.g2p.error", "error": str(e)}) + if phonemes: + phoneme_file = cache.define_phoneme_file(sentence_hash) + phoneme_file.save(phonemes) + return phoneme_file + return None - def load_phonemes(self, key): - """Load phonemes from cache file. + def _cache_sentence(self, sentence, lang: str, audio_file, cache, phonemes=None, sentence_hash=None): + """ + Caches the sentence along with associated audio and phonemes. - Arguments: - key (str): Key identifying phoneme cache + Args: + sentence (str): The sentence to cache. + audio_file (AudioFile): The audio file associated with the sentence. + cache (TextToSpeechCache): The cache instance. + phonemes (str, optional): The phonemes for the sentence. + sentence_hash (str, optional): The hash of the sentence. """ - phoneme_file = self.cache.define_phoneme_file(key) - return phoneme_file.load() + sentence_hash = sentence_hash or hash_sentence(sentence) + # RANT: why do you hate strings ChrisV? + if isinstance(audio_file.path, str): + audio_file.path = Path(audio_file.path) + pho_file = self._cache_phonemes(sentence, lang, cache, phonemes, sentence_hash) + cache.cached_sentences[sentence_hash] = (audio_file, pho_file) + self.add_metric({"metric_type": "tts.synth.cached"}) + ## shutdown def stop(self): + """Stops the TTS playback.""" if TTS.playback: try: TTS.playback.stop() @@ -678,22 +758,188 @@ def stop(self): self.add_metric({"metric_type": "tts.stop"}) def shutdown(self): + """Shuts down the TTS engine.""" self.stop() - if TTS.playback: - TTS.playback.detach_tts(self) def __del__(self): + """Destructor for the TTS object.""" self.shutdown() + # below code is all deprecated and marked for removal in next stable release @property - def available_languages(self) -> set: - """Return languages supported by this TTS implementation in this state - This property should be overridden by the derived class to advertise - what languages that engine supports. + @deprecated("self.enclosure has been deprecated, use EnclosureAPI directly decoupled from the plugin code", + "1.0.0") + def enclosure(self): + """Deprecated. Accessor for the enclosure property. + Returns: - set: supported languages + EnclosureAPI: The EnclosureAPI instance associated with the TTS playback. """ - return set() + if not TTS.playback.enclosure: + bus = TTS.playback.bus or self.bus + TTS.playback.enclosure = EnclosureAPI(bus) + return TTS.playback.enclosure + + @enclosure.setter + @deprecated("self.enclosure has been deprecated, use EnclosureAPI directly decoupled from the plugin code", + "1.0.0") + def enclosure(self, val): + """Deprecated. Setter for the enclosure property. + + Arguments: + val (EnclosureAPI): The EnclosureAPI instance to set. + """ + TTS.playback.enclosure = val + + @property + @deprecated("self.filename has been deprecated, unused for a long time now", + "1.0.0") + def filename(self): + """Deprecated. Accessor for the filename property. + + Returns: + str: The filename for the TTS audio. + """ + cache_dir = get_cache_directory(self.tts_name) + return join(cache_dir, 'tts.' + self.audio_ext) + + @filename.setter + @deprecated("self.filename has been deprecated, unused for a long time now", + "1.0.0") + def filename(self, val): + """Deprecated. Setter for the filename property. + + Arguments: + val (str): The filename to set. + """ + + @property + @deprecated("self.tts_id has been deprecated, use TTSContext().tts_id", + "1.0.0") + def tts_id(self): + """Deprecated. Accessor for the tts_id property. + + Returns: + str: The ID associated with the TTS context. + """ + return self._get_ctxt().tts_id + + @property + @deprecated("self.cache has been deprecated, use TTSContext().get_cache", + "1.0.0") + def cache(self): + """Deprecated. Accessor for the cache property. + + Returns: + TextToSpeechCache: The cache associated with the TTS context. + """ + return TTSContext._caches.get(self.tts_id) or \ + self.get_cache() + + @cache.setter + @deprecated("self.cache has been deprecated, use TTSContext().get_cache", + "1.0.0") + def cache(self, val): + """Deprecated. Setter for the cache property. + + Arguments: + val (TextToSpeechCache): The cache to set. + """ + TTSContext._caches[self.tts_id] = val + + @deprecated("get_voice was never formally adopted and is unused, it will be removed", + "1.0.0") + def get_voice(self, gender, lang=None): + """Deprecated. Get a valid voice for the TTS engine. + + Arguments: + gender (str): Gender of the voice. + lang (str, optional): Language for the voice. Defaults to None. + + Returns: + str: The selected voice. + """ + return gender + + @deprecated("get_cache has been deprecated, use TTSContext().get_cache directly", + "1.0.0") + def get_cache(self, voice=None, lang=None): + """Deprecated. Get the cache associated with the TTS context. + + Arguments: + voice (str, optional): Voice for the cache. Defaults to None. + lang (str, optional): Language for the cache. Defaults to None. + + Returns: + TextToSpeechCache: The cache associated with the TTS context. + """ + return self._get_ctxt().get_cache(self.audio_ext, self.config) + + @deprecated("clear_cache has been deprecated, use TTSContext().get_cache directly", + "1.0.0") + def clear_cache(self): + """Deprecated. Clear all cached files.""" + cache = self._get_ctxt().get_cache(self.audio_ext, self.config) + cache.clear() + + @deprecated("save_phonemes has been deprecated, use TTSContext().get_cache directly", + "1.0.0") + def save_phonemes(self, key, phonemes): + """Deprecated. Cache phonemes. + + Arguments: + key (str): Hash key for the sentence. + phonemes (str): Phoneme string to save. + + Returns: + PhonemeFile: The PhonemeFile instance. + """ + cache = self._get_ctxt().get_cache(self.audio_ext, self.config) + phoneme_file = cache.define_phoneme_file(key) + phoneme_file.save(phonemes) + return phoneme_file + + @deprecated("load_phonemes has been deprecated, use TTSContext().get_cache directly", + "1.0.0") + def load_phonemes(self, key): + """Deprecated. Load phonemes from cache file. + + Arguments: + key (str): Key identifying phoneme cache. + + Returns: + str: Phonemes loaded from the cache file. + """ + cache = self._get_ctxt().get_cache(self.audio_ext, self.config) + phoneme_file = cache.define_phoneme_file(key) + return phoneme_file.load() + + @deprecated("get_from_cache has been deprecated, use TTSContext().get_from_cache directly", + "1.0.0") + def get_from_cache(self, sentence): + """Deprecated. Get data from the cache. + + Arguments: + sentence (str): Sentence used as cache key. + + Returns: + tuple: Tuple containing the audio and phonemes. + """ + return self._get_ctxt().get_from_cache(sentence, self.audio_ext, self.config) + + @property + def lang(self): + message = dig_for_message() + if message: + sess = SessionManager.get(message) + return sess.lang + return self.config.get("lang") or 'en-us' + + @lang.setter + @deprecated("language is defined per request in get_tts, self.lang is not used", + "1.0.0") + def lang(self, val): + LOG.warning("self.lang can not be set! it comes from the bus message") class TTSValidator: @@ -747,6 +993,7 @@ def __init__(self, *args, **kwargs): self.channels = self.config.get("channels", "1") self.rate = self.config.get("rate", "16000") + @abc.abstractmethod def sentence_to_files(self, sentence): """ list of ordered files to concatenate and form final wav file return files (list) , phonemes (list) @@ -797,6 +1044,175 @@ class RemoteTTSTimeoutException(RemoteTTSException): pass +class StreamingTTSCallbacks: + """handle the playback of streaming TTS, can be overrided in StreamingTTS""" + + def __init__(self, bus, play_args=None, tts_config=None): + self.bus = bus + self.config = tts_config or {} + self.play_args = play_args or ["paplay"] + self._process = None + + def stream_start(self, message=None): + """prepare anything needed to playback streamed audio + events: + - "ovos.common_play.duck" + - "recognizer_loop:audio_output_start" + """ + LOG.info(f"TTS stream start: {self.__class__.__name__}") + message = message or \ + dig_for_message() or \ + Message("speak") + + # we don't use the regular PlaybackThread here, we need to handle recognizer_loop:audio_output_start + if not self.config.get("pulse_duck", False): + self.bus.emit(message.forward("ovos.common_play.duck")) + self.bus.emit(message.forward("recognizer_loop:audio_output_start")) + + if self._process: + self.stream_stop() + LOG.debug(f"stream playback command: {self.play_args}") + self._process = subprocess.Popen(self.play_args, stdin=subprocess.PIPE) + + def stream_chunk(self, chunk): + """play streamed chunk of audio""" + LOG.debug(f"TTS stream chunk: {self.__class__.__name__} - {len(chunk)} bytes") + if self._process: + self._process.stdin.write(chunk) + self._process.stdin.flush() + + def stream_stop(self, listen=False, message=None): + """got all streamed audio, cleanup state + events: + - "ovos.common_play.unduck" + - "recognizer_loop:audio_output_end" + - 'mycroft.mic.listen' + """ + LOG.info(f"TTS stream stop: {self.__class__.__name__}") + message = message or \ + dig_for_message() or \ + Message("speak") + + if self._process: + self._process.stdin.close() + self._process.wait() + self._process = None + + # we don't use the regular PlaybackThread here, we need to handle recognizer_loop:audio_output_end and listen flag + if not self.config.get("pulse_duck", False): + self.bus.emit(message.forward("ovos.common_play.unduck")) + self.bus.emit(message.forward("recognizer_loop:audio_output_end")) + if listen: + self.bus.emit(message.forward('mycroft.mic.listen')) + + +class StreamingTTS(TTS): + """ + Abstract class for a Streaming TTS engine implementation. + Audio is streamed in chunks as it becomes available instead of waiting the full sentence to be synthesized + + this plugin can be used in a synchronous way like any other plugin via self.get_tts(sentence, wav_file) + + to play audio as it becomes available use self.generate_audio(sentence, wav_file) + + NOTE: StreamingTTS does not support phonemes + """ + + def init(self, bus=None, playback=None, callbacks=None): + """ Performs intial setup of TTS object. + + Arguments: + bus: OpenVoiceOS messagebus connection + playback: PlaybackThread + callbacks: StreamingTTSCallbacks + """ + super().init(bus, playback) + self.callbacks = callbacks or StreamingTTSCallbacks(self.bus, + tts_config=self.config) + + @abc.abstractmethod + async def stream_tts(self, sentence, **kwargs) -> AsyncIterable[bytes]: + """yield chunks of TTS audio as they become available""" + raise NotImplementedError + + async def generate_audio(self, sentence, wav_file, play_streaming=True, + listen=False, message=None, plugin_kwargs=None): + """save streamed TTS to wav file, if configured also play TTS as it becomes available""" + plugin_kwargs = plugin_kwargs or {} + if play_streaming: + self.callbacks.stream_start(message) + with open(wav_file, "wb") as f: + try: + async for chunk in self.stream_tts(sentence, **plugin_kwargs): + f.write(chunk) + if play_streaming: + self.callbacks.stream_chunk(chunk) + finally: + if play_streaming: + self.callbacks.stream_stop(listen, message) + return wav_file + + def _execute(self, sentence, ident, listen, **kwargs): + + # parse requested language for this TTS request + ctxt = self._get_ctxt(kwargs) + cache = ctxt.get_cache(self.audio_ext, self.config) + + sentence = self._replace_phonetic_spellings(sentence, ctxt.lang) + self.add_metric({"metric_type": "tts.preprocessed"}) + + sentence_hash = hash_sentence(sentence) + + # if cached, play existing file instead + if self.enable_cache and sentence_hash in cache: + super()._execute(sentence, ident, listen, + preprocess=False, **ctxt.synth_kwargs) + return + + wav_file = str(cache.define_audio_file(sentence_hash)) + + message = kwargs.get("message") or \ + dig_for_message() or \ + Message("speak") + + # filter kwargs accepted by this specific plugin + ctxt.synth_kwargs = {k: v for k, v in kwargs.items() + if k in inspect.signature(self.stream_tts).parameters + and k not in ["sentence"]} + + # handle streaming TTS + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + self.add_metric({"metric_type": "tts.stream.start"}) + loop.run_until_complete( + self.generate_audio(sentence, wav_file, + play_streaming=True, + listen=listen, + message=message, + plugin_kwargs=ctxt.synth_kwargs) + ) + finally: + loop.close() + self.add_metric({"metric_type": "tts.stream.end"}) + + def get_tts(self, sentence, wav_file, **kwargs): + """wrap streaming TTS into sync usage""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + wav_file = loop.run_until_complete( + self.generate_audio(sentence, wav_file, + play_streaming=False, + plugin_kwargs=kwargs) + ) + finally: + loop.close() + return wav_file, None # No phonemes + + +# below classes are deprecated and will be removed in 0.1.0 + class RemoteTTS(TTS): """ Abstract class for a Remote TTS engine implementation. @@ -804,6 +1220,8 @@ class RemoteTTS(TTS): Usage is discouraged """ + @deprecated("RemoteTTS has been deprecated, please use the regular TTS class", + "1.0.0") def __init__(self, lang, config, url, api_path, validator): super(RemoteTTS, self).__init__(lang, config, validator) self.api_path = api_path @@ -822,3 +1240,20 @@ def get_tts(self, sentence, wav_file, lang=None): with open(wav_file, 'wb') as f: f.write(r.content) return wav_file, None + + +class PlaybackThread(Thread): + """ PlaybackThread moved to ovos_audio.playback + standalone plugin usage should rely on self.get_tts + ovos-audio relies on self.execute and needs this class + + this class was only in ovos-plugin-manager in order to + patch usage of our plugins in mycroft-core""" + + def __new__(self, *args, **kwargs): + LOG.warning("PlaybackThread moved to ovos_audio.playback") + try: + from ovos_audio.playback import PlaybackThread + return PlaybackThread(*args, **kwargs) + except ImportError: + raise ImportError("please install ovos-audio for playback handling") diff --git a/ovos_plugin_manager/text_transformers.py b/ovos_plugin_manager/text_transformers.py index efc277dd..4f118092 100644 --- a/ovos_plugin_manager/text_transformers.py +++ b/ovos_plugin_manager/text_transformers.py @@ -1,23 +1,6 @@ from ovos_plugin_manager.utils import normalize_lang, \ PluginTypes, PluginConfigTypes from ovos_plugin_manager.templates.transformers import UtteranceTransformer -from ovos_utils import LOG - - -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) def find_utterance_transformer_plugins() -> dict: @@ -84,25 +67,3 @@ def get_utterance_transformer_supported_langs() -> dict: from ovos_plugin_manager.utils.config import get_plugin_supported_languages return get_plugin_supported_languages(PluginTypes.UTTERANCE_TRANSFORMER) - -def find_text_transformer_plugins() -> dict: - """ - Find all installed plugins - @return: dict plugin names to entrypoints - """ - # TODO: Deprecate in 0.1.0 - LOG.warning(f"This reference is deprecated. " - f"Use `find_utterance_transformer_plugins") - return find_utterance_transformer_plugins() - - -def load_text_transformer_plugin(module_name: str) -> type(UtteranceTransformer): - """ - Get an uninstantiated class for the requested module_name - @param module_name: Plugin entrypoint name to load - @return: Uninstantiated class - """ - # TODO: Deprecate in 0.1.0 - LOG.warning(f"This reference is deprecated. " - f"Use `find_utterance_transformer_plugins") - return load_utterance_transformer_plugin(module_name) diff --git a/ovos_plugin_manager/thirdparty/__init__.py b/ovos_plugin_manager/thirdparty/__init__.py new file mode 100644 index 00000000..ebc428b9 --- /dev/null +++ b/ovos_plugin_manager/thirdparty/__init__.py @@ -0,0 +1 @@ +# any code that isnt OVOS original should live in this submodule to ensure proper attribution diff --git a/ovos_plugin_manager/thirdparty/solvers.py b/ovos_plugin_manager/thirdparty/solvers.py new file mode 100644 index 00000000..91bdef55 --- /dev/null +++ b/ovos_plugin_manager/thirdparty/solvers.py @@ -0,0 +1,166 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Framework +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2022 Neongecko.com Inc. +# Contributors: Daniel McKnight, Guy Daniels, Elon Gasper, Richard Leeds, +# Regina Bloomstine, Casimiro Ferreira, Andrii Pernatii, Kirill Hrymailo +# BSD-3 License +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from functools import lru_cache +from typing import Optional, List, Dict + +from ovos_utils import flatten_list +from ovos_utils.log import LOG +from quebra_frases import sentence_tokenize + +from ovos_plugin_manager.language import OVOSLangTranslationFactory, OVOSLangDetectionFactory +from ovos_plugin_manager.templates.language import LanguageTranslator, LanguageDetector + + +class AbstractSolver: + """Base class for solvers that perform various NLP tasks.""" + + def __init__(self, config=None, + translator: Optional[LanguageTranslator] = None, + detector: Optional[LanguageDetector] = None, + priority=50, + enable_tx=False, + enable_cache=False, + internal_lang: Optional[str] = None, + *args, **kwargs): + self.priority = priority + self.enable_tx = enable_tx + self.enable_cache = enable_cache + self.config = config or {} + self.supported_langs = self.config.get("supported_langs") or [] + self.default_lang = internal_lang or self.config.get("lang", "en") + if self.default_lang not in self.supported_langs: + self.supported_langs.insert(0, self.default_lang) + self._translator = translator or OVOSLangTranslationFactory.create() if self.enable_tx else None + self._detector = detector or OVOSLangDetectionFactory.create() if self.enable_tx else None + LOG.debug(f"{self.__class__.__name__} default language: {self.default_lang}") + + @property + def detector(self): + """ language detector, lazy init on first access""" + if not self._detector: + # if it's being used, there is no recovery, do not try: except: + self._detector = OVOSLangDetectionFactory.create() + return self._detector + + @detector.setter + def detector(self, val): + self._detector = val + + @property + def translator(self): + """ language translator, lazy init on first access""" + if not self._translator: + # if it's being used, there is no recovery, do not try: except: + self._translator = OVOSLangTranslationFactory.create() + return self._translator + + @translator.setter + def translator(self, val): + self._translator = val + + @staticmethod + def sentence_split(text: str, max_sentences: int = 25) -> List[str]: + """ + Split text into sentences. + + :param text: Input text. + :param max_sentences: Maximum number of sentences to return. + :return: List of sentences. + """ + try: + # sentence_tokenize occasionally has issues with \n for some reason + return flatten_list([sentence_tokenize(t) + for t in text.split("\n")])[:max_sentences] + except Exception as e: + LOG.exception(f"Error in sentence_split: {e}") + return [text] + + @lru_cache(maxsize=128) + def detect_language(self, text: str) -> str: + """ + Detect the language of the input text. + + :param text: Input text. + :return: Detected language code. + """ + return self.detector.detect(text) + + @lru_cache(maxsize=128) + def translate(self, text: str, + target_lang: Optional[str] = None, + source_lang: Optional[str] = None) -> str: + """ + Translate text from source_lang to target_lang. + + :param text: Input text. + :param target_lang: Target language code. + :param source_lang: Source language code. + :return: Translated text. + """ + source_lang = source_lang or self.detect_language(text) + target_lang = target_lang or self.default_lang + if source_lang.split("-")[0] == target_lang.split("-")[0]: + return text # skip translation + return self.translator.translate(text, + target=target_lang, + source=source_lang) + + def translate_list(self, data: List[str], + target_lang: Optional[str] = None, + source_lang: Optional[str] = None) -> List[str]: + """ + Translate a list of strings from source_lang to target_lang. + + :param data: List of strings. + :param target_lang: Target language code. + :param source_lang: Source language code. + :return: List of translated strings. + """ + return self.translator.translate_list(data, + lang_tgt=target_lang, + lang_src=source_lang) + + def translate_dict(self, data: Dict[str, str], + target_lang: Optional[str] = None, + source_lang: Optional[str] = None) -> Dict[str, str]: + """ + Translate a dictionary of strings from source_lang to target_lang. + + :param data: Dictionary of strings. + :param target_lang: Target language code. + :param source_lang: Source language code. + :return: Dictionary of translated strings. + """ + return self.translator.translate_dict(data, + lang_tgt=target_lang, + lang_src=source_lang) + + def shutdown(self): + """Module specific shutdown method.""" + pass diff --git a/ovos_plugin_manager/tokenization.py b/ovos_plugin_manager/tokenization.py index 598331fa..2ce8f1ff 100644 --- a/ovos_plugin_manager/tokenization.py +++ b/ovos_plugin_manager/tokenization.py @@ -5,22 +5,6 @@ from ovos_plugin_manager.templates.tokenization import Tokenizer -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_tokenization_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/tts.py b/ovos_plugin_manager/tts.py index ba1983a2..b529ad3a 100644 --- a/ovos_plugin_manager/tts.py +++ b/ovos_plugin_manager/tts.py @@ -11,22 +11,6 @@ from hashlib import md5 -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_tts_plugins() -> dict: """ Find all installed plugins @@ -143,25 +127,6 @@ def get_voices(scan=False): class OVOSTTSFactory: """ replicates the base mycroft class, but uses only OPM enabled plugins""" - MAPPINGS = { - "dummy": "ovos-tts-plugin-dummy", - "mimic": "ovos-tts-plugin-mimic", - "mimic2": "ovos-tts-plugin-mimic2", - "mimic3": "ovos-tts-plugin-mimic3", - "google": "ovos-tts-plugin-google-tx", - "marytts": "ovos-tts-plugin-marytts", - # "fatts": FATTS, - # "festival": Festival, - "espeak": "ovos_tts_plugin_espeakng", - # "spdsay": SpdSay, - # "watson": WatsonTTS, - # "bing": BingTTS, - "responsive_voice": "ovos-tts-plugin-responsivevoice", - # "yandex": YandexTTS, - "polly": "ovos-tts-plugin-polly", - # "mozilla": MozillaTTS, - "pico": "ovos-tts-plugin-pico" - } @staticmethod def get_class(config=None): @@ -175,9 +140,7 @@ def get_class(config=None): } """ config = config or get_tts_config() - tts_module = config.get("module") or "dummy" - if tts_module in OVOSTTSFactory.MAPPINGS: - tts_module = OVOSTTSFactory.MAPPINGS[tts_module] + tts_module = config.get("module") or "ovos-tts-plugin-dummy" return load_tts_plugin(tts_module) @staticmethod @@ -192,19 +155,13 @@ def create(config=None): } """ tts_config = get_tts_config(config) - tts_module = tts_config.get('module', 'dummy') - if tts_module in OVOSTTSFactory.MAPPINGS: - # The configured module maps to a valid plugin; get configuration - # again to make sure any module-specific config/overrides are loaded - log_deprecation("Module mappings will be deprecated", "0.1.0") - tts_module = OVOSTTSFactory.MAPPINGS[tts_module] - tts_config = get_tts_config(config, tts_module) + tts_module = tts_config.get('module', 'ovos-tts-plugin-dummy') try: clazz = OVOSTTSFactory.get_class(tts_config) if clazz: LOG.info(f'Found plugin {tts_module}') - tts = clazz(lang=None, # explicitly read lang from config - config=tts_config) + tts = clazz(config=tts_config) + tts._plugin_id = tts_module tts.validator.validate() LOG.info(f'Loaded plugin {tts_module}') else: @@ -216,3 +173,8 @@ def create(config=None): f'\nAvailable modules: {modules}') raise return tts + + +if __name__ == "__main__": + lang = "en-us" + print(find_tts_plugins()) \ No newline at end of file diff --git a/ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils/__init__.py index 26f3bd51..8972c4bf 100644 --- a/ovos_plugin_manager/utils/__init__.py +++ b/ovos_plugin_manager/utils/__init__.py @@ -12,16 +12,21 @@ # """Common functions for loading plugins.""" import time +from collections import deque from enum import Enum -from threading import Event +from threading import Event, Lock from typing import Optional import pkg_resources - -from ovos_utils.log import LOG +from ovos_utils.log import LOG, log_deprecation class PluginTypes(str, Enum): + PIPELINE = "opm.pipeline" + EMBEDDINGS = "opm.embeddings" + FACE_EMBEDDINGS = "opm.embeddings.face" + VOICE_EMBEDDINGS = "opm.embeddings.voice" + TEXT_EMBEDDINGS = "opm.embeddings.text" GUI = "ovos.plugin.gui" PHAL = "ovos.plugin.phal" ADMIN = "ovos.plugin.phal.admin" @@ -30,7 +35,7 @@ class PluginTypes(str, Enum): VAD = "ovos.plugin.VAD" PHONEME = "ovos.plugin.g2p" AUDIO2IPA = "ovos.plugin.audio2ipa" - AUDIO = 'mycroft.plugin.audioservice' + AUDIO = 'mycroft.plugin.audioservice' # DEPRECATED STT = 'mycroft.plugin.stt' TTS = 'mycroft.plugin.tts' WAKEWORD = 'mycroft.plugin.wake_word' @@ -52,10 +57,18 @@ class PluginTypes(str, Enum): TOKENIZATION = "intentbox.tokenization" POSTAG = "intentbox.postag" STREAM_EXTRACTOR = "ovos.ocp.extractor" + AUDIO_PLAYER = "opm.media.audio" + VIDEO_PLAYER = "opm.media.video" + WEB_PLAYER = "opm.media.web" PERSONA = "opm.plugin.persona" # personas are a dict, they have no config because they ARE a config class PluginConfigTypes(str, Enum): + PIPELINE = "opm.pipeline.config" + EMBEDDINGS = "opm.embeddings.config" + FACE_EMBEDDINGS = "opm.embeddings.face.config" + VOICE_EMBEDDINGS = "opm.embeddings.voice.config" + TEXT_EMBEDDINGS = "opm.embeddings.text.config" GUI = "ovos.plugin.gui.config" PHAL = "ovos.plugin.phal.config" ADMIN = "ovos.plugin.phal.admin.config" @@ -86,6 +99,9 @@ class PluginConfigTypes(str, Enum): TOKENIZATION = "intentbox.tokenization.config" POSTAG = "intentbox.postag.config" STREAM_EXTRACTOR = "ovos.ocp.extractor.config" + AUDIO_PLAYER = "opm.media.audio.config" + VIDEO_PLAYER = "opm.media.video.config" + WEB_PLAYER = "opm.media.web.config" def find_plugins(plug_type: PluginTypes = None) -> dict: @@ -179,36 +195,49 @@ def normalize_lang(lang): class ReadWriteStream: """ Class used to support writing binary audio data at any pace, - optionally chopping when the buffer gets too large + with an optional maximum buffer size """ - def __init__(self, s=b'', chop_samples=-1): - self.buffer = s + def __init__(self, s=b'', chop_samples=-1, max_size=None): + if chop_samples != -1: + log_deprecation("'chop_samples' kwarg has been deprecated and will be ignored", "1.0.0") + self.buffer = deque(s) self.write_event = Event() - self.chop_samples = chop_samples + self.lock = Lock() + self.max_size = max_size # Introduce max size def __len__(self): - return len(self.buffer) + with self.lock: + return len(self.buffer) def read(self, n=-1, timeout=None): - if n == -1: - n = len(self.buffer) - if 0 < self.chop_samples < len(self.buffer): - samples_left = len(self.buffer) % self.chop_samples - self.buffer = self.buffer[-samples_left:] - return_time = 1e10 if timeout is None else ( - timeout + time.time() - ) - while len(self.buffer) < n: + with self.lock: + if n == -1 or n > len(self.buffer): + n = len(self.buffer) + + end_time = time.time() + timeout if timeout is not None else float('inf') + + while True: + with self.lock: + if len(self.buffer) >= n: + chunk = bytes([self.buffer.popleft() for _ in range(n)]) + return chunk + + remaining_time = None + if timeout is not None: + remaining_time = end_time - time.time() + if remaining_time <= 0: + return b'' + self.write_event.clear() - if not self.write_event.wait(return_time - time.time()): - return b'' - chunk = self.buffer[:n] - self.buffer = self.buffer[n:] - return chunk + self.write_event.wait(remaining_time) def write(self, s): - self.buffer += s + with self.lock: + self.buffer.extend(s) + if self.max_size is not None: + while len(self.buffer) > self.max_size: + self.buffer.popleft() # Discard oldest data to maintain max size self.write_event.set() def flush(self): @@ -216,4 +245,5 @@ def flush(self): pass def clear(self): - self.buffer = b'' + with self.lock: + self.buffer.clear() diff --git a/ovos_plugin_manager/utils/tts_cache.py b/ovos_plugin_manager/utils/tts_cache.py index 0b2ef5bf..177feebe 100644 --- a/ovos_plugin_manager/utils/tts_cache.py +++ b/ovos_plugin_manager/utils/tts_cache.py @@ -241,6 +241,7 @@ def __init__(self, tts_config, tts_name, audio_file_type): # only persist if utterance is spoken >= N times self.persist_thresh = self.config.get("persist_thresh", 1) self._sentence_count = {} + self.load_persistent_cache() def __contains__(self, sha): """The cache contains a SHA if it knows of it and it exists on disk.""" diff --git a/ovos_plugin_manager/vad.py b/ovos_plugin_manager/vad.py index 1715d53d..966c3edf 100644 --- a/ovos_plugin_manager/vad.py +++ b/ovos_plugin_manager/vad.py @@ -4,22 +4,6 @@ from ovos_plugin_manager.templates.vad import VADEngine -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_vad_plugins() -> dict: """ Find all installed plugins diff --git a/ovos_plugin_manager/version.py b/ovos_plugin_manager/version.py index 6cdbd02a..c79fb1ef 100644 --- a/ovos_plugin_manager/version.py +++ b/ovos_plugin_manager/version.py @@ -1,7 +1,7 @@ # The following lines are replaced during the release process. # START_VERSION_BLOCK VERSION_MAJOR = 0 -VERSION_MINOR = 0 -VERSION_BUILD = 25 +VERSION_MINOR = 1 +VERSION_BUILD = 1 VERSION_ALPHA = 0 # END_VERSION_BLOCK diff --git a/ovos_plugin_manager/wakewords.py b/ovos_plugin_manager/wakewords.py index f667f997..f098b64a 100644 --- a/ovos_plugin_manager/wakewords.py +++ b/ovos_plugin_manager/wakewords.py @@ -10,22 +10,6 @@ PluginTypes, PluginConfigTypes -def find_plugins(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import find_plugins - return find_plugins(*args, **kwargs) - - -def load_plugin(*args, **kwargs): - # TODO: Deprecate in 0.1.0 - LOG.warning("This reference is deprecated. " - "Import from ovos_plugin_manager.utils directly") - from ovos_plugin_manager.utils import load_plugin - return load_plugin(*args, **kwargs) - - def find_wake_word_plugins() -> dict: """ Find all installed plugins diff --git a/requirements/requirements.txt b/requirements/requirements.txt index b10d2770..9ae8badd 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ -ovos-utils < 0.2.0, >=0.0.37 -ovos-bus-client < 0.2.0, >=0.0.8 +ovos-utils < 0.2.0, >=0.0.38 +ovos-bus-client < 0.2.0, >=0.0.9 ovos-config < 0.2.0, >=0.0.12 combo_lock~=0.2 requests~=2.26 @@ -8,3 +8,6 @@ langcodes~=3.3.0 # see https://github.com/pypa/setuptools/issues/1471 importlib_metadata + +# needed explicitly since python 3.12 +setuptools diff --git a/requirements/test.txt b/requirements/test.txt index 48548151..4373cf13 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,4 +1,6 @@ pytest pytest-timeout pytest-cov -ovos-translate-server-plugin \ No newline at end of file +ovos-translate-server-plugin +ovos-classifiers +ovos-utils>=0.1.0a8 \ No newline at end of file diff --git a/setup.py b/setup.py index 8137f04a..b1fe1515 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,8 @@ def required(requirements_file): STT_PLUGIN_ENTRY_POINT = 'ovos-stt-plugin-dummy=ovos_plugin_manager.templates.stt:STT' WW_PLUGIN_ENTRY_POINT = 'ovos-ww-plugin-dummy=ovos_plugin_manager.templates.hotwords:HotWordEngine' +with open(os.path.join(BASEDIR, "README.md"), "r") as f: + long_description = f.read() setup( name='ovos-plugin-manager', @@ -63,6 +65,7 @@ def required(requirements_file): packages=['ovos_plugin_manager', 'ovos_plugin_manager.templates', 'ovos_plugin_manager.utils', + 'ovos_plugin_manager.thirdparty', 'ovos_plugin_manager.hardware', 'ovos_plugin_manager.hardware.led'], url='https://github.com/OpenVoiceOS/OVOS-plugin-manager', @@ -70,8 +73,10 @@ def required(requirements_file): author='jarbasAi', install_requires=required("requirements/requirements.txt"), package_data={'': package_files('ovos-plugin-manager')}, - author_email='jarbasai@mailfence.com', + author_email='jarbas@openvoiceos.com', description='OpenVoiceOS plugin manager', + long_description=long_description, + long_description_content_type="text/markdown", entry_points={ 'intentbox.segmentation': SEG_PLUGIN_ENTRY_POINT, 'intentbox.tokenization': TOK_PLUGIN_ENTRY_POINT, diff --git a/test/unittests/test_audio.py b/test/unittests/test_audio.py index 1ad550ff..59db48d8 100644 --- a/test/unittests/test_audio.py +++ b/test/unittests/test_audio.py @@ -6,11 +6,11 @@ class TestAudioTemplate(unittest.TestCase): def test_audio_backend(self): - from ovos_plugin_manager.templates.audio import AudioBackend + from ovos_plugin_manager.templates.media import AudioPlayerBackend # TODO def test_remote_audio_backend(self): - from ovos_plugin_manager.templates.audio import RemoteAudioBackend + from ovos_plugin_manager.templates.media import RemoteAudioPlayerBackend class TestAudio(unittest.TestCase): diff --git a/test/unittests/test_audio_transformers.py b/test/unittests/test_audio_transformers.py index 70d8ba69..2e44a72c 100644 --- a/test/unittests/test_audio_transformers.py +++ b/test/unittests/test_audio_transformers.py @@ -1,12 +1,60 @@ import unittest +import time from unittest.mock import patch -from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes +from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes, ReadWriteStream + + +class TestReadWriteStream(unittest.TestCase): + def test_write_and_read(self): + # Initialize the stream + stream = ReadWriteStream() + + # Write some data to the stream + stream.write(b'1234567890abcdefghijklmnopqrstuvwxyz') + + # Read some data from the stream + self.assertEqual(stream.read(10), b'1234567890') + + # Read more data with a timeout + self.assertEqual(stream.read(5, timeout=1), b'abcde') + + def test_clear_buffer(self): + # Initialize the stream + stream = ReadWriteStream() + + # Write some data to the stream + stream.write(b'1234567890abcdefghijklmnopqrstuvwxyz') + + # Clear the buffer + stream.clear() + self.assertEqual(len(stream), 0) + + def test_write_with_max_size(self): + # Initialize the stream with a max size of 20 bytes + stream = ReadWriteStream(max_size=20) + + # Write some data to the stream + stream.write(b'1234567890abcdefghijklmnopqrstuvwxyz') + + # The buffer should have been trimmed to the last 20 bytes + self.assertEqual(stream.read(20), b'ghijklmnopqrstuvwxyz') + + def test_clear_buffer_with_max_size(self): + # Initialize the stream with a max size of 20 bytes + stream = ReadWriteStream(max_size=20) + + # Write some data to the stream + stream.write(b'1234567890abcdefghijklmnopqrstuvwxyz') + + # Clear the buffer + stream.clear() + self.assertEqual(len(stream), 0) class TestAudioTransformersTemplate(unittest.TestCase): def test_audio_transformer(self): - from ovos_plugin_manager.templates.transformers import AudioTransformer + pass # TODO diff --git a/test/unittests/test_ocp.py b/test/unittests/test_ocp.py index 61cbf058..3305dd43 100644 --- a/test/unittests/test_ocp.py +++ b/test/unittests/test_ocp.py @@ -22,10 +22,7 @@ def test_find_plugins(self, find_plugins): find_ocp_plugins() find_plugins.assert_called_once_with(self.PLUGIN_TYPE) - @patch("ovos_plugin_manager.utils.find_plugins") - def test_stream_handler(self, find_plugins): + def test_stream_handler(self): from ovos_plugin_manager.ocp import StreamHandler handler = StreamHandler() self.assertIsInstance(handler.extractors, dict) - find_plugins.assert_called_once_with(PluginTypes.STREAM_EXTRACTOR) - # TODO: More tests diff --git a/test/unittests/test_phal.py b/test/unittests/test_phal.py index 3f3f0ccf..89232d01 100644 --- a/test/unittests/test_phal.py +++ b/test/unittests/test_phal.py @@ -17,12 +17,17 @@ def test_PHAL_Plugin(self): from ovos_plugin_manager.templates.phal import PHALValidator # TODO - def test_admin_classes(self): - from ovos_plugin_manager.templates.phal import AdminPlugin, \ - AdminValidator, PHALPlugin, PHALValidator - self.assertEqual(AdminPlugin, PHALPlugin) - self.assertEqual(AdminValidator, PHALValidator) + def test_Admin_Validator(self): + from ovos_plugin_manager.templates.phal import AdminValidator + self.assertTrue(AdminValidator.validate()) + self.assertTrue(AdminValidator.validate({"test": "val"})) + self.assertTrue(AdminValidator.validate({"enabled": True})) + self.assertFalse(AdminValidator.validate({"enabled": False})) + self.assertFalse(AdminValidator.validate({"enabled": None})) + def test_Admin_Plugin(self): + from ovos_plugin_manager.templates.phal import AdminPlugin + # TODO class TestPHAL(unittest.TestCase): PLUGIN_TYPE = PluginTypes.PHAL diff --git a/test/unittests/test_solver.py b/test/unittests/test_solver.py index c460f267..354b18af 100644 --- a/test/unittests/test_solver.py +++ b/test/unittests/test_solver.py @@ -1,8 +1,10 @@ import unittest -from unittest.mock import Mock, patch +from unittest.mock import Mock, patch, MagicMock +from ovos_plugin_manager.templates.solvers import QuestionSolver, auto_detect_lang, auto_translate, _deprecate_context2lang, AbstractSolver from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes -from ovos_plugin_manager.templates.solvers import QuestionSolver + + # TODO: Test Tldr, Evidence, MultipleChoice, Entailment @@ -141,11 +143,11 @@ def test_translation(self): solver.translator.translate.return_value = "a wild translation appears" # no translation - ans = solver.spoken_answer("some query") + ans = solver.spoken_answer("some query", lang="en") solver.translator.translate.assert_not_called() # translation - ans = solver.spoken_answer("not english", context={"lang": "unk"}) + ans = solver.spoken_answer("not english", lang="unk") solver.translator.translate.assert_called() @@ -398,3 +400,76 @@ def test_get_supported_langs(self, get_supported_languages): get_reading_comprehension_solver_supported_langs get_reading_comprehension_solver_supported_langs() get_supported_languages.assert_called_once_with(self.PLUGIN_TYPE) + + +class TestAutoTranslate(unittest.TestCase): + def setUp(self): + self.solver = AbstractSolver(enable_tx=True, default_lang='en') + self.solver.translate = MagicMock(side_effect=lambda text, source_lang=None, target_lang=None: text[ + ::-1] if source_lang and target_lang else text) + + def test_auto_translate_decorator(self): + @auto_translate(translate_keys=['text']) + def test_func(solver, text, lang=None): + return text[::-1] + + result = test_func(self.solver, 'hello', lang='es') + self.assertEqual(result, 'olleh') # 'hello' reversed due to mock translation + + def test_auto_translate_no_translation(self): + @auto_translate(translate_keys=['text']) + def test_func(solver, text, lang=None): + return text + + result = test_func(self.solver, 'hello') + self.assertEqual(result, 'hello') + + +class TestAutoDetectLang(unittest.TestCase): + def setUp(self): + self.solver = AbstractSolver() + self.solver.detect_language = MagicMock(return_value='en') + + def test_auto_detect_lang_decorator(self): + self.solver.detector = Mock() + self.solver.detector.detect.return_value = "en" + + @auto_detect_lang(text_keys=['text']) + def test_func(solver, text, lang=None): + return lang + + result = test_func(self.solver, 'hello world') + self.assertEqual(result, 'en') + + def test_auto_detect_lang_with_lang(self): + @auto_detect_lang(text_keys=['text']) + def test_func(solver, text, lang=None): + return lang + + result = test_func(self.solver, 'hello', lang='es') + self.assertEqual(result, 'es') + + +class TestDeprecateContext2Lang(unittest.TestCase): + def setUp(self): + self.solver = AbstractSolver() + + def test_deprecate_context2lang(self): + @_deprecate_context2lang() + def test_func(solver, lang=None): + return lang + + result = test_func(self.solver, context={'lang': 'en'}) + self.assertEqual(result, 'en') + + def test_no_context(self): + @_deprecate_context2lang() + def test_func(solver, lang=None): + return lang + + result = test_func(self.solver, lang='fr') + self.assertEqual(result, 'fr') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/unittests/test_stt.py b/test/unittests/test_stt.py index a1705dd3..de866fe5 100644 --- a/test/unittests/test_stt.py +++ b/test/unittests/test_stt.py @@ -91,18 +91,11 @@ def test_get_stt_config(self, get_config): class TestSTTFactory(unittest.TestCase): - def test_mappings(self): - from ovos_plugin_manager.stt import OVOSSTTFactory - self.assertIsInstance(OVOSSTTFactory.MAPPINGS, dict) - for key in OVOSSTTFactory.MAPPINGS: - self.assertIsInstance(key, str) - self.assertIsInstance(OVOSSTTFactory.MAPPINGS[key], str) - self.assertNotEqual(key, OVOSSTTFactory.MAPPINGS[key]) @patch("ovos_plugin_manager.stt.load_stt_plugin") def test_get_class(self, load_plugin): from ovos_plugin_manager.stt import OVOSSTTFactory - global_config = {"stt": {"module": "dummy"}} + global_config = {"stt": {"module": "ovos-stt-plugin-dummy"}} tts_config = {"module": "test-stt-plugin-test"} # Test load plugin mapped global config @@ -120,7 +113,7 @@ def test_create(self, get_class): get_class.return_value = plugin_class global_config = {"lang": "en-gb", - "stt": {"module": "dummy", + "stt": {"module": "ovos-stt-plugin-dummy", "ovos-stt-plugin-dummy": {"config": True, "lang": "en-ca"}}} stt_config = {"lang": "es-es", diff --git a/test/unittests/test_tts.py b/test/unittests/test_tts.py index 27fe6e3d..6ad28ba9 100644 --- a/test/unittests/test_tts.py +++ b/test/unittests/test_tts.py @@ -1,7 +1,12 @@ import unittest +from unittest.mock import MagicMock from unittest.mock import patch, Mock + +from ovos_bus_client.session import Session +from ovos_utils.fakebus import FakeBus, Message + +from ovos_plugin_manager.templates.tts import TTS, TTSContext from ovos_plugin_manager.utils import PluginTypes, PluginConfigTypes -from ovos_plugin_manager.templates.tts import TTS class TestTTSTemplate(unittest.TestCase): @@ -113,25 +118,9 @@ def test_format_speak_tags_with_speech_no_tags(self): tagged_with_exclusion = TTS.format_speak_tags("Don'tSpeak This.But Not this.", False) self.assertEqual(tagged_with_exclusion, valid_output) - def test_playback_thread(self): - from ovos_plugin_manager.templates.tts import PlaybackThread - # TODO - - def test_tts_context(self): - from ovos_plugin_manager.templates.tts import TTSContext - # TODO - def test_tts_validator(self): from ovos_plugin_manager.templates.tts import TTSValidator # TODO - - def test_concat_tts(self): - from ovos_plugin_manager.templates.tts import ConcatTTS - # TODO - - def test_remote_tt(self): - from ovos_plugin_manager.templates.tts import RemoteTTS - # TODO class TestTTS(unittest.TestCase): @@ -200,18 +189,11 @@ def test_get_voices(self): class TestTTSFactory(unittest.TestCase): - def test_mappings(self): - from ovos_plugin_manager.tts import OVOSTTSFactory - self.assertIsInstance(OVOSTTSFactory.MAPPINGS, dict) - for key in OVOSTTSFactory.MAPPINGS: - self.assertIsInstance(key, str) - self.assertIsInstance(OVOSTTSFactory.MAPPINGS[key], str) - self.assertNotEqual(key, OVOSTTSFactory.MAPPINGS[key]) @patch("ovos_plugin_manager.tts.load_tts_plugin") def test_get_class(self, load_plugin): from ovos_plugin_manager.tts import OVOSTTSFactory - global_config = {"tts": {"module": "dummy"}} + global_config = {"tts": {"module": "ovos-tts-plugin-dummy"}} tts_config = {"module": "test-tts-plugin-test"} # Test load plugin mapped global config @@ -229,7 +211,7 @@ def test_create(self, get_class): get_class.return_value = plugin_class global_config = {"lang": "en-gb", - "tts": {"module": "dummy", + "tts": {"module": "ovos-tts-plugin-dummy", "ovos-tts-plugin-dummy": {"config": True, "lang": "en-ca"}}} tts_config = {"lang": "es-es", @@ -246,13 +228,13 @@ def test_create(self, get_class): "config": True, "lang": "en-ca"} get_class.assert_called_once_with(expected_config) - plugin_class.assert_called_once_with(lang=None, config=expected_config) + plugin_class.assert_called_once_with(config=expected_config) self.assertEqual(plugin, plugin_class()) # Test create with TTS config and no module config plugin = OVOSTTSFactory.create(tts_config) get_class.assert_called_with(tts_config) - plugin_class.assert_called_with(lang=None, config=tts_config) + plugin_class.assert_called_with(config=tts_config) self.assertEqual(plugin, plugin_class()) # Test create with TTS config with module-specific config @@ -260,5 +242,110 @@ def test_create(self, get_class): expected_config = {"module": "test-tts-plugin-test", "config": True, "lang": "es-mx"} get_class.assert_called_with(expected_config) - plugin_class.assert_called_with(lang=None, config=expected_config) + plugin_class.assert_called_with(config=expected_config) self.assertEqual(plugin, plugin_class()) + + +class TestTTSContext(unittest.TestCase): + + @patch("ovos_plugin_manager.templates.tts.TextToSpeechCache", autospec=True) + def test_tts_context_get_cache(self, cache_mock): + tts_context = TTSContext("plug", "voice", "lang") + + result = tts_context.get_cache() + + self.assertEqual(result, cache_mock.return_value) + self.assertEqual(result, tts_context._caches[tts_context.tts_id]) + + +class TestTTSCache(unittest.TestCase): + def setUp(self): + self.tts_mock = TTS(config={"some_config_key": "some_config_value"}) + self.tts_mock.stopwatch = MagicMock() + self.tts_mock.queue = MagicMock() + self.tts_mock.playback = MagicMock() + + @patch("ovos_plugin_manager.templates.tts.hash_sentence", return_value="fake_hash") + @patch("ovos_plugin_manager.templates.tts.TTSContext") + def test_tts_synth(self, tts_context_mock, hash_sentence_mock): + tts_context_mock.get_cache.return_value = MagicMock() + tts_context_mock.get_cache.return_value.define_audio_file.return_value.path = "fake_audio_path" + + sentence = "Hello world!" + result = self.tts_mock.synth(sentence, tts_context_mock) + + tts_context_mock.get_cache.assert_called_once_with("wav", self.tts_mock.config) + tts_context_mock.get_cache.return_value.define_audio_file.assert_called_once_with("fake_hash") + self.assertEqual(result, (tts_context_mock.get_cache.return_value.define_audio_file.return_value, None)) + + @patch("ovos_plugin_manager.templates.tts.hash_sentence", return_value="fake_hash") + def test_tts_synth_cache_enabled(self, hash_sentence_mock): + tts_context_mock = MagicMock() + tts_context_mock.tts_id = "fake_tts_id" + tts_context_mock.get_cache.return_value = MagicMock() + tts_context_mock.get_cache.return_value.cached_sentences = {} + tts_context_mock.get_cache.return_value.define_audio_file.return_value.path = "fake_audio_path" + tts_context_mock._caches = {tts_context_mock.tts_id: tts_context_mock.get_cache.return_value} + + sentence = "Hello world!" + self.tts_mock.enable_cache = True + result = self.tts_mock.synth(sentence, tts_context_mock) + + tts_context_mock.get_cache.assert_called_once_with("wav", self.tts_mock.config) + tts_context_mock.get_cache.return_value.define_audio_file.assert_called_once_with("fake_hash") + self.assertEqual(result, (tts_context_mock.get_cache.return_value.define_audio_file.return_value, None)) + self.assertIn("fake_hash", tts_context_mock.get_cache.return_value.cached_sentences) + + @patch("ovos_plugin_manager.templates.tts.hash_sentence", return_value="fake_hash") + def test_tts_synth_cache_disabled(self, hash_sentence_mock): + tts_context_mock = MagicMock() + tts_context_mock.tts_id = "fake_tts_id" + tts_context_mock.get_cache.return_value = MagicMock() + tts_context_mock.get_cache.return_value.cached_sentences = {} + tts_context_mock.get_cache.return_value.define_audio_file.return_value.path = "fake_audio_path" + tts_context_mock._caches = {tts_context_mock.tts_id: tts_context_mock.get_cache.return_value} + + sentence = "Hello world!" + self.tts_mock.enable_cache = False + result = self.tts_mock.synth(sentence, tts_context_mock) + + tts_context_mock.get_cache.assert_called_once_with("wav", self.tts_mock.config) + tts_context_mock.get_cache.return_value.define_audio_file.assert_called_once_with("fake_hash") + self.assertEqual(result, (tts_context_mock.get_cache.return_value.define_audio_file.return_value, None)) + self.assertNotIn("fake_hash", tts_context_mock.get_cache.return_value.cached_sentences) + + +class TestSession(unittest.TestCase): + def test_tts_session(self): + sess = Session(session_id="123", lang="en-us") + m = Message("speak", + context={"session": sess.serialize()}) + + tts = TTS() + self.assertEqual(tts.plugin_id, "ovos-tts-plugin-dummy") + self.assertEqual(tts.voice, "default") # no voice set + self.assertEqual(tts.lang, "en-us") # from config + + # test that session makes it all the way to the TTS.queue + kwargs = {"message": m} + tts.execute("test sentence", **kwargs) + path, visemes, listen, tts_id, message = tts.queue.get() + self.assertEqual(message, m) + self.assertEqual(message.context["session"]["session_id"], sess.session_id) + + # test that lang from Session is used + ctxt = tts._get_ctxt(kwargs) + self.assertEqual(ctxt.plugin_id, tts.plugin_id) + self.assertEqual(ctxt.lang, sess.lang) + self.assertEqual(ctxt.tts_id, f"{tts.plugin_id}/default/en-us") + self.assertEqual(ctxt.synth_kwargs, {'lang': 'en-us', "voice": "default"}) + + sess = Session(session_id="123", + lang="klingon") + m = Message("speak", + context={"session": sess.serialize()}) + kwargs = {"message": m, "voice": "Daghor"} + ctxt = tts._get_ctxt(kwargs) + self.assertEqual(ctxt.lang, sess.lang) + self.assertEqual(ctxt.tts_id, f"{tts.plugin_id}/Daghor/klingon") + self.assertEqual(ctxt.synth_kwargs, {'lang': 'klingon', 'voice': 'Daghor'})