From 3b263539cd34fb14b53d72339bc7c095028f4578 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 2 Jan 2025 15:54:34 +0800 Subject: [PATCH] Publish MatchaTTS onnx models trained with LJSpeech to huggingface (#1854) --- .github/scripts/docker/Dockerfile | 2 +- .github/scripts/ljspeech/TTS/run-matcha.sh | 33 +++++++++- .github/workflows/ljspeech.yml | 74 +++++++++++++++++++++- egs/ljspeech/TTS/README.md | 9 +++ egs/ljspeech/TTS/matcha/export_onnx.py | 4 ++ 5 files changed, 118 insertions(+), 4 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index 94e8d8e1e3..cf05234015 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -49,7 +49,7 @@ RUN pip install --no-cache-dir \ kaldifst \ kaldilm \ librosa \ - matplotlib \ + "matplotlib<=3.9.4" \ multi_quantization \ numba \ "numpy<2.0" \ diff --git a/.github/scripts/ljspeech/TTS/run-matcha.sh b/.github/scripts/ljspeech/TTS/run-matcha.sh index 954dd5bd83..bfb37fb6d5 100755 --- a/.github/scripts/ljspeech/TTS/run-matcha.sh +++ b/.github/scripts/ljspeech/TTS/run-matcha.sh @@ -77,7 +77,7 @@ function export_onnx() { popd pushd data/fbank - rm -v *.json + rm -fv *.json curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json popd @@ -115,6 +115,37 @@ function export_onnx() { ls -lh /icefall/*.wav soxi /icefall/generated-matcha-tts-steps-6-*.wav + + cp ./model-steps-*.onnx /icefall + + d=matcha-icefall-en_US-ljspeech + mkdir $d + cp -v data/tokens.txt $d + cp model-steps-3.onnx $d + pushd $d + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 + tar xf espeak-ng-data.tar.bz2 + rm espeak-ng-data.tar.bz2 + +cat >README.md <=2.2.0`. + To export the Hifigan vocoder to onnx, please use: diff --git a/egs/ljspeech/TTS/matcha/export_onnx.py b/egs/ljspeech/TTS/matcha/export_onnx.py index 39709cc36e..3c653fbf1d 100755 --- a/egs/ljspeech/TTS/matcha/export_onnx.py +++ b/egs/ljspeech/TTS/matcha/export_onnx.py @@ -176,12 +176,16 @@ def main(): "language": "English", "voice": "en-us", "has_espeak": 1, + "jieba": 0, "n_speakers": 1, "sample_rate": 22050, "version": 1, + "pad_id": tokenizer.pad_id, "model_author": "icefall", "maintainer": "k2-fsa", + "use_eos_bos": 1, "dataset": "LJ Speech", + "dataset_url": "https://keithito.com/LJ-Speech-Dataset/", "num_ode_steps": num_steps, } add_meta_data(filename=filename, meta_data=meta_data)