Skip to content

Commit

Permalink
feat(server): add speaker_wav parameter for voice cloning (#295)
Browse files Browse the repository at this point in the history
* Added speaker_wav parameter to the server

* Check if speaker_id is not empty

* feat(server): add input field for speaker_wav

---------

Co-authored-by: Enno Hermann <[email protected]>
  • Loading branch information
shavit and eginhard authored Feb 21, 2025
1 parent 1641257 commit 382b418
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
7 changes: 6 additions & 1 deletion TTS/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def create_argparser() -> argparse.ArgumentParser:

# TODO: set this from SpeakerManager
use_gst = api.synthesizer.tts_config.get("use_gst", False)
supports_cloning = api.synthesizer.tts_config.get("model", "") in ["xtts", "bark"]
app = Flask(__name__)


Expand Down Expand Up @@ -136,6 +137,7 @@ def index():
speaker_ids=api.speakers,
language_ids=api.languages,
use_gst=use_gst,
supports_cloning=supports_cloning,
)


Expand Down Expand Up @@ -163,18 +165,21 @@ def tts():
speaker_idx = (
request.headers.get("speaker-id") or request.values.get("speaker_id", "") if api.is_multi_speaker else None
)
if speaker_idx == "":
speaker_idx = None
language_idx = (
request.headers.get("language-id") or request.values.get("language_id", "")
if api.is_multi_lingual
else None
)
style_wav = request.headers.get("style-wav") or request.values.get("style_wav", "")
style_wav = style_wav_uri_to_dict(style_wav)
speaker_wav = request.headers.get("speaker-wav") or request.values.get("speaker_wav", "")

logger.info("Model input: %s", text)
logger.info("Speaker idx: %s", speaker_idx)
logger.info("Language idx: %s", language_idx)
wavs = api.tts(text, speaker=speaker_idx, language=language_idx, style_wav=style_wav)
wavs = api.tts(text, speaker=speaker_idx, language=language_idx, style_wav=style_wav, speaker_wav=speaker_wav)
out = io.BytesIO()
api.synthesizer.save_wav(wavs, out)
return send_file(out, mimetype="audio/wav")
Expand Down
19 changes: 14 additions & 5 deletions TTS/server/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@

{%if use_gst%}
<input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path to wav).." size=45
type="text" name="style_wav">
type="text" name="style_wav"><br /><br />
{%endif%}

{%if supports_cloning%}
Reference audio:
<input id="speaker_wav" placeholder="path/to/speaker.wav" name="speaker_wav" accept=".wav"><br /><br />
{%endif%}

<input id="text" placeholder="Type here..." size=45 type="text" name="text">
Expand Down Expand Up @@ -114,14 +119,18 @@
q('#text').focus()
function do_tts(e) {
const text = q('#text').value
const speaker_id = getTextValue('#speaker_id')
const style_wav = getTextValue('#style_wav')
const speaker_wav = getTextValue('#speaker_wav')
let speaker_id = getTextValue('#speaker_id')
if (speaker_wav !== '') {
speaker_id = ''
}
const language_id = getTextValue('#language_id')
if (text) {
q('#message').textContent = 'Synthesizing...'
q('#speak-button').disabled = true
q('#audio').hidden = true
synthesize(text, speaker_id, style_wav, language_id)
synthesize(text, speaker_id, style_wav, speaker_wav, language_id)
}
e.preventDefault()
return false
Expand All @@ -132,8 +141,8 @@
do_tts(e)
}
})
function synthesize(text, speaker_id = "", style_wav = "", language_id = "") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}&language_id=${encodeURIComponent(language_id)}`, { cache: 'no-cache' })
function synthesize(text, speaker_id = "", style_wav = "", speaker_wav = "", language_id = "") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}&speaker_wav=${encodeURIComponent(speaker_wav)}&language_id=${encodeURIComponent(language_id)}`, { cache: 'no-cache' })
.then(function (res) {
if (!res.ok) throw Error(res.statusText)
return res.blob()
Expand Down

0 comments on commit 382b418

Please sign in to comment.