diff --git a/egs/libritts/TTS/local/prepare_tokens_libritts.py b/egs/libritts/TTS/local/prepare_tokens_libritts.py index faeb611f5d..cdc39ea6b7 100755 --- a/egs/libritts/TTS/local/prepare_tokens_libritts.py +++ b/egs/libritts/TTS/local/prepare_tokens_libritts.py @@ -31,15 +31,6 @@ from tqdm.auto import tqdm -def remove_punc_to_upper(text: str) -> str: - text = text.replace("‘", "'") - text = text.replace("’", "'") - tokens = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'") - s_list = [x.upper() if x in tokens else " " for x in text] - s = " ".join("".join(s_list).split()).strip() - return s - - def prepare_tokens_libritts(): output_dir = Path("data/spectrogram") prefix = "libritts" @@ -72,7 +63,7 @@ def prepare_tokens_libritts(): for t in tokens_list: tokens.extend(t) cut.tokens = tokens - cut.supervisions[0].normalized_text = remove_punc_to_upper(text) + cut.supervisions[0].normalized_text = text new_cuts.append(cut)