Skip to content

Commit

Permalink
feat(api): support setting speed (#316)
Browse files Browse the repository at this point in the history
XTTS accepts a `speed` parameter, but that was previously captured in the API
which prevented it from being passed through. Removing the non-functional
`speed` parameter from the API lets it pass via `kwargs` to models that support
it.
  • Loading branch information
eginhard authored Feb 25, 2025
1 parent 382b418 commit b20533e
Showing 1 changed file with 3 additions and 13 deletions.
16 changes: 3 additions & 13 deletions TTS/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ def _check_arguments(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float | None = None,
**kwargs,
) -> None:
"""Check if the arguments are valid for the model."""
Expand All @@ -277,8 +276,8 @@ def _check_arguments(
raise ValueError("Model is not multi-speaker but `speaker` is provided.")
if not self.is_multi_lingual and language is not None:
raise ValueError("Model is not multi-lingual but `language` is provided.")
if emotion is not None and speed is not None:
raise ValueError("Emotion and speed can only be used with Coqui Studio models. Which is discontinued.")
if emotion is not None:
raise ValueError("Emotion can only be used with Coqui Studio models. Which is discontinued.")

def tts(
self,
Expand All @@ -287,7 +286,6 @@ def tts(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float | None = None,
split_sentences: bool = True,
**kwargs,
):
Expand All @@ -306,19 +304,14 @@ def tts(
Defaults to None.
emotion (str, optional):
Emotion to use for 🐸Coqui Studio models. If None, Studio models use "Neutral". Defaults to None.
speed (float, optional):
Speed factor to use for 🐸Coqui Studio models, between 0 and 2.0. If None, Studio models use 1.0.
Defaults to None.
split_sentences (bool, optional):
Split text into sentences, synthesize them separately and concatenate the file audio.
Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
applicable to the 🐸TTS models. Defaults to True.
kwargs (dict, optional):
Additional arguments for the model.
"""
self._check_arguments(
speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, speed=speed, **kwargs
)
self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, **kwargs)
wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
Expand All @@ -336,7 +329,6 @@ def tts_to_file(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float = 1.0,
pipe_out=None,
file_path: str = "output.wav",
split_sentences: bool = True,
Expand All @@ -358,8 +350,6 @@ def tts_to_file(
Defaults to None.
emotion (str, optional):
Emotion to use for 🐸Coqui Studio models. Defaults to "Neutral".
speed (float, optional):
Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
pipe_out (BytesIO, optional):
Flag to stdout the generated TTS wav file for shell pipe.
file_path (str, optional):
Expand Down

0 comments on commit b20533e

Please sign in to comment.