Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(espeak_wrapper): capture stderr separately #17

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 47 additions & 42 deletions TTS/tts/utils/text/phonemizers/espeak_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Wrapper to call the espeak/espeak-ng phonemizer."""

import logging
import re
import subprocess
from typing import Dict, List
from typing import Optional

from packaging.version import Version

Expand All @@ -11,7 +13,7 @@
logger = logging.getLogger(__name__)


def is_tool(name):
def _is_tool(name) -> bool:
from shutil import which

return which(name) is not None
Expand All @@ -22,31 +24,33 @@ def is_tool(name):
espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)")


def get_espeak_version():
def get_espeak_version() -> str:
"""Return version of the `espeak` binary."""
output = subprocess.getoutput("espeak --version")
match = espeak_version_pattern.search(output)

return match.group("version")


def get_espeakng_version():
def get_espeakng_version() -> str:
"""Return version of the `espeak-ng` binary."""
output = subprocess.getoutput("espeak-ng --version")
return output.split()[3]


# priority: espeakng > espeak
if is_tool("espeak-ng"):
if _is_tool("espeak-ng"):
_DEF_ESPEAK_LIB = "espeak-ng"
_DEF_ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
elif _is_tool("espeak"):
_DEF_ESPEAK_LIB = "espeak"
_DEF_ESPEAK_VER = get_espeak_version()
else:
_DEF_ESPEAK_LIB = None
_DEF_ESPEAK_VER = None


def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]:
"""Run espeak with the given arguments."""
cmd = [
espeak_lib,
Expand All @@ -60,19 +64,20 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
) as p:
res = iter(p.stdout.readline, b"")
err = iter(p.stderr.readline, b"")
for line in err:
logger.warning("espeakng: %s", line.decode("utf-8").strip())
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = []
for line in res:
res2.append(line)
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
Expand All @@ -83,7 +88,7 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:


class ESpeak(BasePhonemizer):
"""ESpeak wrapper calling `espeak` or `espeak-ng` from the command-line the perform G2P
"""Wrapper calling `espeak` or `espeak-ng` from the command-line to perform G2P.

Args:
language (str):
Expand All @@ -108,13 +113,17 @@ class ESpeak(BasePhonemizer):

"""

_ESPEAK_LIB = _DEF_ESPEAK_LIB
_ESPEAK_VER = _DEF_ESPEAK_VER

def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
if self._ESPEAK_LIB is None:
raise Exception(" [!] No espeak backend found. Install espeak-ng or espeak to your system.")
self.backend = self._ESPEAK_LIB
def __init__(
self,
language: str,
backend: Optional[str] = None,
punctuations: str = Punctuation.default_puncs(),
keep_puncs: bool = True,
):
if _DEF_ESPEAK_LIB is None:
msg = "[!] No espeak backend found. Install espeak-ng or espeak to your system."
raise FileNotFoundError(msg)
self.backend = _DEF_ESPEAK_LIB

# band-aid for backwards compatibility
if language == "en":
Expand All @@ -127,35 +136,37 @@ def __init__(self, language: str, backend=None, punctuations=Punctuation.default
self.backend = backend

@property
def backend(self):
def backend(self) -> str:
return self._ESPEAK_LIB

@property
def backend_version(self):
def backend_version(self) -> str:
return self._ESPEAK_VER

@backend.setter
def backend(self, backend):
def backend(self, backend: str) -> None:
if backend not in ["espeak", "espeak-ng"]:
raise Exception("Unknown backend: %s" % backend)
msg = f"Unknown backend: {backend}"
raise ValueError(msg)
self._ESPEAK_LIB = backend
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()

def auto_set_espeak_lib(self) -> None:
if is_tool("espeak-ng"):
if _is_tool("espeak-ng"):
self._ESPEAK_LIB = "espeak-ng"
self._ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
elif _is_tool("espeak"):
self._ESPEAK_LIB = "espeak"
self._ESPEAK_VER = get_espeak_version()
else:
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
msg = "Cannot set backend automatically. espeak-ng or espeak not found"
raise FileNotFoundError(msg)

@staticmethod
def name():
def name() -> str:
return "espeak"

def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False) -> str:
"""Convert input text to phonemes.

Args:
Expand Down Expand Up @@ -190,7 +201,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
args.append(text)
# compute phonemes
phonemes = ""
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
for line in _espeak_exe(self.backend, args, sync=True):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
# espeak:
Expand All @@ -207,11 +218,11 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
phonemes += ph_decoded.strip()
return phonemes.replace("_", separator)

def _phonemize(self, text, separator=None):
def _phonemize(self, text: str, separator: str = "") -> str:
return self.phonemize_espeak(text, separator, tie=False)

@staticmethod
def supported_languages() -> Dict:
def supported_languages() -> dict[str, str]:
"""Get a dictionary of supported languages.

Returns:
Expand All @@ -221,16 +232,14 @@ def supported_languages() -> Dict:
return {}
args = ["--voices"]
langs = {}
count = 0
for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)):
line = line.decode("utf8").strip()
if count > 0:
cols = line.split()
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
count += 1
return langs

def version(self) -> str:
Expand All @@ -239,16 +248,12 @@ def version(self) -> str:
Returns:
str: Version of the used backend.
"""
args = ["--version"]
for line in _espeak_exe(self.backend, args, sync=True):
version = line.decode("utf8").strip().split()[2]
logger.debug("line: %s", repr(line))
return version
return self.backend_version

@classmethod
def is_available(cls):
"""Return true if ESpeak is available else false"""
return is_tool("espeak") or is_tool("espeak-ng")
def is_available(cls) -> bool:
"""Return true if ESpeak is available else false."""
return _is_tool("espeak") or _is_tool("espeak-ng")


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ requires = [
]

[tool.ruff]
target-version = "py39"
line-length = 120
lint.extend-select = [
"B033", # duplicate-value
Expand Down
2 changes: 1 addition & 1 deletion tests/tts_tests/test_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_in_out(self): # pylint: disable=no-self-use
late_x = -200.0 * sequence_mask(length + 1, 100).float() + 100.0 # simulate logits on late stopping

loss = layer(true_x, target, length)
self.assertEqual(loss.item(), 0.0)
self.assertAlmostEqual(loss.item(), 0.0)

loss = layer(early_x, target, length)
self.assertAlmostEqual(loss.item(), 2.1053, places=4)
Expand Down
Loading