Skip to content

Commit

Permalink
feat/alt_transcripts
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Jun 18, 2024
1 parent ba015b3 commit 1a152a8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 18 deletions.
33 changes: 16 additions & 17 deletions ovos_stt_plugin_chromium/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
from typing import List, Tuple, Optional

import requests
from ovos_plugin_manager.templates.stt import STT
Expand Down Expand Up @@ -220,7 +221,9 @@ def __init__(self, *args, **kwargs):
log = logging.getLogger("urllib3.connectionpool")
log.setLevel("INFO")

def execute(self, audio, language=None):
def transcribe(self, audio, lang: Optional[str] = None) -> List[Tuple[str, float]]:
"""transcribe audio data to a list of
possible transcriptions and respective confidences"""
flac_data = audio.get_flac_data(
convert_rate=None if audio.sample_rate >= 8000 else 8000,
# audio samples must be at least 8 kHz
Expand All @@ -229,7 +232,7 @@ def execute(self, audio, language=None):

params = {
"client": "chromium",
"lang": language or self.lang,
"lang": lang or self.lang,
"key": self.key,
"pFilter": int(self.pfilter)
}
Expand All @@ -245,27 +248,23 @@ def execute(self, audio, language=None):
"""

result = r.text.split("\n")[1]
if not result:
return []
data = json.loads(result)["result"]
if len(data) == 0:
return ""
data = data[0]["alternative"]
if self.debug:
LOG.debug("transcriptions:" + str(data))
if len(data) == 0:
return ""

# we arbitrarily choose the first hypothesis by default.
# results seem to be ordered by confidence
best_hypothesis = data[0]["transcript"]
return []

# if confidence is provided return highest conf
candidates = [alt for alt in data if alt.get("confidence")]
if self.debug:
LOG.debug("confidences: " + str(candidates))
candidates = [(u["transcript"], u.get("confidence", 0.0))
for u in data]
return sorted(candidates, key=lambda alt: alt[1], reverse=True)

if len(candidates):
best = max(candidates, key=lambda alt: alt["confidence"])
best_hypothesis = best["transcript"]
if self.debug:
LOG.debug("best confidence: " + best_hypothesis)
return best_hypothesis
def execute(self, audio, language=None) -> str:
transcripts = self.transcribe(audio, language)
if not transcripts:
return ""
return transcripts[0][0]
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
requests
ovos_utils>=0.0.8a3
ovos-plugin-manager>=0.0.1a7
ovos-plugin-manager<0.1.0, >=0.0.26a28

0 comments on commit 1a152a8

Please sign in to comment.