Skip to content

Commit

Permalink
Implement format_speak_tags from neon_utils (#36)
Browse files Browse the repository at this point in the history
* Implement `format_speak_tags` from neon_utils

* add unittests

* add ssml unittests

Co-authored-by: Daniel McKnight <[email protected]>
Co-authored-by: jarbasai <[email protected]>
  • Loading branch information
3 people authored Mar 3, 2022
1 parent 04ad316 commit e187cb5
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 12 deletions.
47 changes: 47 additions & 0 deletions ovos_plugin_manager/templates/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,45 @@ def remove_ssml(text):
"""
return re.sub('<[^>]*>', '', text).replace(' ', ' ')

@staticmethod
def format_speak_tags(sentence: str, include_tags: bool = True) -> str:
"""
Cleans up SSML tags for speech synthesis and ensures the phrase is
wrapped in 'speak' tags and any excluded text is
removed.
Args:
sentence: Input sentence to be spoken
include_tags: Flag to include <speak> tags in returned string
Returns:
Cleaned sentence to pass to TTS
"""
# Wrap sentence in speak tag if no tags present
if "<speak>" not in sentence and "</speak>" not in sentence:
to_speak = f"<speak>{sentence}</speak>"
# Assume speak starts at the beginning of the sentence
elif "<speak>" not in sentence:
to_speak = f"<speak>{sentence}"
# Assume speak ends at the end of the sentence
elif "</speak>" not in sentence:
to_speak = f"{sentence}</speak>"
else:
to_speak = sentence

# Trim text outside of speak tags
if not to_speak.startswith("<speak>"):
to_speak = f"<speak>{to_speak.split('<speak>', 1)[1]}"

if not to_speak.endswith("</speak>"):
to_speak = f"{to_speak.split('</speak>', 1)[0]}</speak>"

if to_speak == "<speak></speak>":
return ""

if include_tags:
return to_speak
else:
return to_speak.lstrip("<speak>").rstrip("</speak>")

def validate_ssml(self, utterance):
"""Check if engine supports ssml, if not remove all tags.
Expand All @@ -503,6 +542,14 @@ def validate_ssml(self, utterance):
Returns:
str: validated_sentence
"""

# Validate speak tags
if not self.ssml_tags or "speak" not in self.ssml_tags:
self.format_speak_tags(utterance, False)
elif self.ssml_tags and "speak" in self.ssml_tags:
self.format_speak_tags(utterance)


# if ssml is not supported by TTS engine remove all tags
if not self.ssml_tags:
return self.remove_ssml(utterance)
Expand Down
12 changes: 0 additions & 12 deletions test/unittests/test_something.py

This file was deleted.

120 changes: 120 additions & 0 deletions test/unittests/test_ssml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# write your first unittest!
import unittest
from ovos_plugin_manager.templates.tts import TTS
from ovos_utils.messagebus import FakeBus


class TestSSML(unittest.TestCase):
@classmethod
def setUpClass(self):
tts = TTS() # dummy engine
# bus = FakeBus()
# tts.init(bus)
self.tts = tts

def test_ssml(self):
sentence = "<speak>Prosody can be used to change the way words " \
"sound. The following words are " \
"<prosody volume='x-loud'> " \
"quite a bit louder than the rest of this passage. " \
"</prosody> Each morning when I wake up, " \
"<prosody rate='x-slow'>I speak quite slowly and " \
"deliberately until I have my coffee.</prosody> I can " \
"also change the pitch of my voice using prosody. " \
"Do you like <prosody pitch='+5%'> speech with a pitch " \
"that is higher, </prosody> or <prosody pitch='-10%'> " \
"is a lower pitch preferable?</prosody></speak>"
sentence_no_ssml = "Prosody can be used to change the way " \
"words sound. The following words are quite " \
"a bit louder than the rest of this passage. " \
"Each morning when I wake up, I speak quite " \
"slowly and deliberately until I have my " \
"coffee. I can also change the pitch of my " \
"voice using prosody. Do you like speech " \
"with a pitch that is higher, or is " \
"a lower pitch preferable?"
sentence_bad_ssml = "<foo_invalid>" + sentence + \
"</foo_invalid end=whatever>"
sentence_extra_ssml = "<whispered>whisper tts<\\whispered>"

tts = TTS() # dummy engine
# test valid ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence), sentence)

# test extra ssml
tts.ssml_tags = ['whispered']
self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
sentence_extra_ssml)

# test unsupported extra ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
"whisper tts")

# test mixed valid / invalid ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence_bad_ssml), sentence)

# test unsupported ssml
tts.ssml_tags = []
self.assertEqual(tts.validate_ssml(sentence), sentence_no_ssml)

self.assertEqual(tts.validate_ssml(sentence_bad_ssml),
sentence_no_ssml)

self.assertEqual(TTS.remove_ssml(sentence), sentence_no_ssml)

def test_format_speak_tags_with_speech(self):
valid_output = "<speak>Speak This.</speak>"
no_tags = TTS.format_speak_tags("Speak This.")
self.assertEqual(no_tags, valid_output)

leading_only = TTS.format_speak_tags("<speak>Speak This.")
self.assertEqual(leading_only, valid_output)

leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.")
self.assertEqual(leading_with_exclusion, valid_output)

trailing_only = TTS.format_speak_tags("Speak This.</speak>")
self.assertEqual(trailing_only, valid_output)

trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.")
self.assertEqual(trailing_with_exclusion, valid_output)

tagged = TTS.format_speak_tags("<speak>Speak This.</speak>")
self.assertEqual(tagged, valid_output)

tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.")
self.assertEqual(tagged_with_exclusion, valid_output)

def test_format_speak_tags_empty(self):
leading_closure = TTS.format_speak_tags("</speak>hello.")
self.assertFalse(leading_closure)

trailing_open = TTS.format_speak_tags("hello.<speak>")
self.assertFalse(trailing_open)

def test_format_speak_tags_with_speech_no_tags(self):
valid_output = "Speak This."
no_tags = TTS.format_speak_tags("Speak This.", False)
self.assertEqual(no_tags, valid_output)

leading_only = TTS.format_speak_tags("<speak>Speak This.", False)
self.assertEqual(leading_only, valid_output)

leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.", False)
self.assertEqual(leading_with_exclusion, valid_output)

trailing_only = TTS.format_speak_tags("Speak This.</speak>", False)
self.assertEqual(trailing_only, valid_output)

trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.", False)
self.assertEqual(trailing_with_exclusion, valid_output)

tagged = TTS.format_speak_tags("<speak>Speak This.</speak>", False)
self.assertEqual(tagged, valid_output)

tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.", False)
self.assertEqual(tagged_with_exclusion, valid_output)

0 comments on commit e187cb5

Please sign in to comment.