From 1a152a827e0c1c284562be13017b0b73cb5c2159 Mon Sep 17 00:00:00 2001
From: miro <jarbasai@mailfence.com>
Date: Tue, 18 Jun 2024 19:51:38 +0100
Subject: [PATCH] feat/alt_transcripts

companion to https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/236 and https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/124
---
 ovos_stt_plugin_chromium/__init__.py | 33 ++++++++++++++--------------
 requirements.txt                     |  2 +-
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/ovos_stt_plugin_chromium/__init__.py b/ovos_stt_plugin_chromium/__init__.py
index 7fe79f3..2321554 100644
--- a/ovos_stt_plugin_chromium/__init__.py
+++ b/ovos_stt_plugin_chromium/__init__.py
@@ -1,5 +1,6 @@
 import json
 import logging
+from typing import List, Tuple, Optional
 
 import requests
 from ovos_plugin_manager.templates.stt import STT
@@ -220,7 +221,9 @@ def __init__(self, *args, **kwargs):
             log = logging.getLogger("urllib3.connectionpool")
             log.setLevel("INFO")
 
-    def execute(self, audio, language=None):
+    def transcribe(self, audio, lang: Optional[str] = None) -> List[Tuple[str, float]]:
+        """transcribe audio data to a list of
+        possible transcriptions and respective confidences"""
         flac_data = audio.get_flac_data(
             convert_rate=None if audio.sample_rate >= 8000 else 8000,
             # audio samples must be at least 8 kHz
@@ -229,7 +232,7 @@ def execute(self, audio, language=None):
 
         params = {
             "client": "chromium",
-            "lang": language or self.lang,
+            "lang": lang or self.lang,
             "key": self.key,
             "pFilter": int(self.pfilter)
         }
@@ -245,6 +248,8 @@ def execute(self, audio, language=None):
         """
 
         result = r.text.split("\n")[1]
+        if not result:
+            return []
         data = json.loads(result)["result"]
         if len(data) == 0:
             return ""
@@ -252,20 +257,14 @@ def execute(self, audio, language=None):
         if self.debug:
             LOG.debug("transcriptions:" + str(data))
         if len(data) == 0:
-            return ""
-
-        # we arbitrarily choose the first hypothesis by default.
-        # results seem to be ordered by confidence
-        best_hypothesis = data[0]["transcript"]
+            return []
 
-        # if confidence is provided return highest conf
-        candidates = [alt for alt in data if alt.get("confidence")]
-        if self.debug:
-            LOG.debug("confidences: " + str(candidates))
+        candidates = [(u["transcript"], u.get("confidence", 0.0))
+                      for u in data]
+        return sorted(candidates, key=lambda alt: alt[1], reverse=True)
 
-        if len(candidates):
-            best = max(candidates, key=lambda alt: alt["confidence"])
-            best_hypothesis = best["transcript"]
-            if self.debug:
-                LOG.debug("best confidence: " + best_hypothesis)
-        return best_hypothesis
+    def execute(self, audio, language=None) -> str:
+        transcripts = self.transcribe(audio, language)
+        if not transcripts:
+            return ""
+        return transcripts[0][0]
diff --git a/requirements.txt b/requirements.txt
index f48eda6..d5748d1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 requests
 ovos_utils>=0.0.8a3
-ovos-plugin-manager>=0.0.1a7
\ No newline at end of file
+ovos-plugin-manager<0.1.0, >=0.0.26a28
\ No newline at end of file