Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): support setting speed #316

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 3 additions & 13 deletions TTS/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ def _check_arguments(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float | None = None,
**kwargs,
) -> None:
"""Check if the arguments are valid for the model."""
Expand All @@ -277,8 +276,8 @@ def _check_arguments(
raise ValueError("Model is not multi-speaker but `speaker` is provided.")
if not self.is_multi_lingual and language is not None:
raise ValueError("Model is not multi-lingual but `language` is provided.")
if emotion is not None and speed is not None:
raise ValueError("Emotion and speed can only be used with Coqui Studio models. Which is discontinued.")
if emotion is not None:
raise ValueError("Emotion can only be used with Coqui Studio models. Which is discontinued.")

def tts(
self,
Expand All @@ -287,7 +286,6 @@ def tts(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float | None = None,
split_sentences: bool = True,
**kwargs,
):
Expand All @@ -306,19 +304,14 @@ def tts(
Defaults to None.
emotion (str, optional):
Emotion to use for 🐸Coqui Studio models. If None, Studio models use "Neutral". Defaults to None.
speed (float, optional):
Speed factor to use for 🐸Coqui Studio models, between 0 and 2.0. If None, Studio models use 1.0.
Defaults to None.
split_sentences (bool, optional):
Split text into sentences, synthesize them separately and concatenate the file audio.
Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
applicable to the 🐸TTS models. Defaults to True.
kwargs (dict, optional):
Additional arguments for the model.
"""
self._check_arguments(
speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, speed=speed, **kwargs
)
self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, **kwargs)
wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
Expand All @@ -336,7 +329,6 @@ def tts_to_file(
language: str | None = None,
speaker_wav: str | None = None,
emotion: str | None = None,
speed: float = 1.0,
pipe_out=None,
file_path: str = "output.wav",
split_sentences: bool = True,
Expand All @@ -358,8 +350,6 @@ def tts_to_file(
Defaults to None.
emotion (str, optional):
Emotion to use for 🐸Coqui Studio models. Defaults to "Neutral".
speed (float, optional):
Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
pipe_out (BytesIO, optional):
Flag to stdout the generated TTS wav file for shell pipe.
file_path (str, optional):
Expand Down
Loading