From 1a152a827e0c1c284562be13017b0b73cb5c2159 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 18 Jun 2024 19:51:38 +0100 Subject: [PATCH] feat/alt_transcripts companion to https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/236 and https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/124 --- ovos_stt_plugin_chromium/__init__.py | 33 ++++++++++++++-------------- requirements.txt | 2 +- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/ovos_stt_plugin_chromium/__init__.py b/ovos_stt_plugin_chromium/__init__.py index 7fe79f3..2321554 100644 --- a/ovos_stt_plugin_chromium/__init__.py +++ b/ovos_stt_plugin_chromium/__init__.py @@ -1,5 +1,6 @@ import json import logging +from typing import List, Tuple, Optional import requests from ovos_plugin_manager.templates.stt import STT @@ -220,7 +221,9 @@ def __init__(self, *args, **kwargs): log = logging.getLogger("urllib3.connectionpool") log.setLevel("INFO") - def execute(self, audio, language=None): + def transcribe(self, audio, lang: Optional[str] = None) -> List[Tuple[str, float]]: + """transcribe audio data to a list of + possible transcriptions and respective confidences""" flac_data = audio.get_flac_data( convert_rate=None if audio.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz @@ -229,7 +232,7 @@ def execute(self, audio, language=None): params = { "client": "chromium", - "lang": language or self.lang, + "lang": lang or self.lang, "key": self.key, "pFilter": int(self.pfilter) } @@ -245,6 +248,8 @@ def execute(self, audio, language=None): """ result = r.text.split("\n")[1] + if not result: + return [] data = json.loads(result)["result"] if len(data) == 0: return "" @@ -252,20 +257,14 @@ def execute(self, audio, language=None): if self.debug: LOG.debug("transcriptions:" + str(data)) if len(data) == 0: - return "" - - # we arbitrarily choose the first hypothesis by default. - # results seem to be ordered by confidence - best_hypothesis = data[0]["transcript"] + return [] - # if confidence is provided return highest conf - candidates = [alt for alt in data if alt.get("confidence")] - if self.debug: - LOG.debug("confidences: " + str(candidates)) + candidates = [(u["transcript"], u.get("confidence", 0.0)) + for u in data] + return sorted(candidates, key=lambda alt: alt[1], reverse=True) - if len(candidates): - best = max(candidates, key=lambda alt: alt["confidence"]) - best_hypothesis = best["transcript"] - if self.debug: - LOG.debug("best confidence: " + best_hypothesis) - return best_hypothesis + def execute(self, audio, language=None) -> str: + transcripts = self.transcribe(audio, language) + if not transcripts: + return "" + return transcripts[0][0] diff --git a/requirements.txt b/requirements.txt index f48eda6..d5748d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests ovos_utils>=0.0.8a3 -ovos-plugin-manager>=0.0.1a7 \ No newline at end of file +ovos-plugin-manager<0.1.0, >=0.0.26a28 \ No newline at end of file