Implement format_speak_tags from neon_utils (#36)

* Implement `format_speak_tags` from neon_utils * add unittests * add ssml unittests Co-authored-by: Daniel McKnight <[email protected]> Co-authored-by: jarbasai <[email protected]>
OpenVoiceOS · Mar 3, 2022 · e187cb5 · e187cb5
1 parent 04ad316
commit e187cb5
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 12 deletions.
diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py
@@ -492,6 +492,45 @@ def remove_ssml(text):
         """
         return re.sub('<[^>]*>', '', text).replace('  ', ' ')
 
+    @staticmethod
+    def format_speak_tags(sentence: str, include_tags: bool = True) -> str:
+        """
+        Cleans up SSML tags for speech synthesis and ensures the phrase is
+        wrapped in 'speak' tags and any excluded text is
+        removed.
+        Args:
+            sentence: Input sentence to be spoken
+            include_tags: Flag to include <speak> tags in returned string
+        Returns:
+            Cleaned sentence to pass to TTS
+        """
+        # Wrap sentence in speak tag if no tags present
+        if "<speak>" not in sentence and "</speak>" not in sentence:
+            to_speak = f"<speak>{sentence}</speak>"
+        # Assume speak starts at the beginning of the sentence
+        elif "<speak>" not in sentence:
+            to_speak = f"<speak>{sentence}"
+        # Assume speak ends at the end of the sentence
+        elif "</speak>" not in sentence:
+            to_speak = f"{sentence}</speak>"
+        else:
+            to_speak = sentence
+
+        # Trim text outside of speak tags
+        if not to_speak.startswith("<speak>"):
+            to_speak = f"<speak>{to_speak.split('<speak>', 1)[1]}"
+
+        if not to_speak.endswith("</speak>"):
+            to_speak = f"{to_speak.split('</speak>', 1)[0]}</speak>"
+
+        if to_speak == "<speak></speak>":
+            return ""
+
+        if include_tags:
+            return to_speak
+        else:
+            return to_speak.lstrip("<speak>").rstrip("</speak>")
+
     def validate_ssml(self, utterance):
         """Check if engine supports ssml, if not remove all tags.
 
@@ -503,6 +542,14 @@ def validate_ssml(self, utterance):
         Returns:
             str: validated_sentence
         """
+
+        # Validate speak tags
+        if not self.ssml_tags or "speak" not in self.ssml_tags:
+            self.format_speak_tags(utterance, False)
+        elif self.ssml_tags and "speak" in self.ssml_tags:
+            self.format_speak_tags(utterance)
+
+
         # if ssml is not supported by TTS engine remove all tags
         if not self.ssml_tags:
             return self.remove_ssml(utterance)

diff --git a/test/unittests/test_something.py b/test/unittests/test_something.py
diff --git a/test/unittests/test_ssml.py b/test/unittests/test_ssml.py
@@ -0,0 +1,120 @@
+# write your first unittest!
+import unittest
+from ovos_plugin_manager.templates.tts import TTS
+from ovos_utils.messagebus import FakeBus
+
+
+class TestSSML(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        tts = TTS()  # dummy engine
+       # bus = FakeBus()
+       # tts.init(bus)
+        self.tts = tts
+
+    def test_ssml(self):
+        sentence = "<speak>Prosody can be used to change the way words " \
+                   "sound. The following words are " \
+                   "<prosody volume='x-loud'> " \
+                   "quite a bit louder than the rest of this passage. " \
+                   "</prosody> Each morning when I wake up, " \
+                   "<prosody rate='x-slow'>I speak quite slowly and " \
+                   "deliberately until I have my coffee.</prosody> I can " \
+                   "also change the pitch of my voice using prosody. " \
+                   "Do you like <prosody pitch='+5%'> speech with a pitch " \
+                   "that is higher, </prosody> or <prosody pitch='-10%'> " \
+                   "is a lower pitch preferable?</prosody></speak>"
+        sentence_no_ssml = "Prosody can be used to change the way " \
+                           "words sound. The following words are quite " \
+                           "a bit louder than the rest of this passage. " \
+                           "Each morning when I wake up, I speak quite " \
+                           "slowly and deliberately until I have my " \
+                           "coffee. I can also change the pitch of my " \
+                           "voice using prosody. Do you like speech " \
+                           "with a pitch that is higher, or is " \
+                           "a lower pitch preferable?"
+        sentence_bad_ssml = "<foo_invalid>" + sentence + \
+                            "</foo_invalid end=whatever>"
+        sentence_extra_ssml = "<whispered>whisper tts<\\whispered>"
+
+        tts = TTS()  # dummy engine
+        # test valid ssml
+        tts.ssml_tags = ['speak', 'prosody']
+        self.assertEqual(tts.validate_ssml(sentence), sentence)
+
+        # test extra ssml
+        tts.ssml_tags = ['whispered']
+        self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
+                         sentence_extra_ssml)
+
+        # test unsupported extra ssml
+        tts.ssml_tags = ['speak', 'prosody']
+        self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
+                         "whisper tts")
+
+        # test mixed valid / invalid ssml
+        tts.ssml_tags = ['speak', 'prosody']
+        self.assertEqual(tts.validate_ssml(sentence_bad_ssml), sentence)
+
+        # test unsupported ssml
+        tts.ssml_tags = []
+        self.assertEqual(tts.validate_ssml(sentence), sentence_no_ssml)
+
+        self.assertEqual(tts.validate_ssml(sentence_bad_ssml),
+                         sentence_no_ssml)
+
+        self.assertEqual(TTS.remove_ssml(sentence), sentence_no_ssml)
+
+    def test_format_speak_tags_with_speech(self):
+        valid_output = "<speak>Speak This.</speak>"
+        no_tags = TTS.format_speak_tags("Speak This.")
+        self.assertEqual(no_tags, valid_output)
+
+        leading_only = TTS.format_speak_tags("<speak>Speak This.")
+        self.assertEqual(leading_only, valid_output)
+
+        leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.")
+        self.assertEqual(leading_with_exclusion, valid_output)
+
+        trailing_only = TTS.format_speak_tags("Speak This.</speak>")
+        self.assertEqual(trailing_only, valid_output)
+
+        trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.")
+        self.assertEqual(trailing_with_exclusion, valid_output)
+
+        tagged = TTS.format_speak_tags("<speak>Speak This.</speak>")
+        self.assertEqual(tagged, valid_output)
+
+        tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.")
+        self.assertEqual(tagged_with_exclusion, valid_output)
+
+    def test_format_speak_tags_empty(self):
+        leading_closure = TTS.format_speak_tags("</speak>hello.")
+        self.assertFalse(leading_closure)
+
+        trailing_open = TTS.format_speak_tags("hello.<speak>")
+        self.assertFalse(trailing_open)
+
+    def test_format_speak_tags_with_speech_no_tags(self):
+        valid_output = "Speak This."
+        no_tags = TTS.format_speak_tags("Speak This.", False)
+        self.assertEqual(no_tags, valid_output)
+
+        leading_only = TTS.format_speak_tags("<speak>Speak This.", False)
+        self.assertEqual(leading_only, valid_output)
+
+        leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.", False)
+        self.assertEqual(leading_with_exclusion, valid_output)
+
+        trailing_only = TTS.format_speak_tags("Speak This.</speak>", False)
+        self.assertEqual(trailing_only, valid_output)
+
+        trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.", False)
+        self.assertEqual(trailing_with_exclusion, valid_output)
+
+        tagged = TTS.format_speak_tags("<speak>Speak This.</speak>", False)
+        self.assertEqual(tagged, valid_output)
+
+        tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.", False)
+        self.assertEqual(tagged_with_exclusion, valid_output)
+