Release 0.1.1 (#6)

* Adapted Azure TTS API changes (#5) * Adapted Azure TTS API changes * Update README.md * Bump version
ManimCommunity · Oct 26, 2022 · 2b1c709 · 2b1c709
1 parent 0079978
commit 2b1c709
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 79 deletions.
diff --git a/README.md b/README.md
@@ -10,6 +10,10 @@
 - Develop an animation with an auto-generated AI voice without having to re-record and re-sync the audio.
 - Record a voiceover and have it stitched back onto the video instantly. (Note that this is not the same as AI voice cloning)
 
+Here is a demo:
+
+https://user-images.githubusercontent.com/2453968/198145393-6a1bd709-4441-4821-8541-45d5f5e25be7.mp4
+
 Currently supported TTS services:
 
 - [Azure Text to Speech](https://azure.microsoft.com/en-us/services/cognitive-services/text-to-speech/) (Recommended)
@@ -21,7 +25,7 @@ Currently supported TTS services:
 Install from PyPI with the extras `azure` and `gtts`:
 
 ```sh
-pip install manim-voiceover manim-voiceover[azure] manim-voiceover[gtts]
+pip install manim-voiceover "manim-voiceover[azure]" "manim-voiceover[gtts]"
 ```
 
 Check whether your installation works correctly:

diff --git a/manim_voiceover/services/azure.py b/manim_voiceover/services/azure.py
@@ -9,6 +9,17 @@
 load_dotenv()
 
 
+def serialize_word_boundary(wb):
+    return {
+        "audio_offset": wb["audio_offset"],
+        "duration_milliseconds": int(wb["duration_milliseconds"].microseconds / 1000),
+        "text_offset": wb["text_offset"],
+        "word_length": wb["word_length"],
+        "text": wb["text"],
+        "boundary_type": wb["boundary_type"],
+    }
+
+
 class AzureService(SpeechService):
     def __init__(
         self,
@@ -23,7 +34,9 @@ def __init__(
         self.output_format = output_format
         SpeechService.__init__(self, **kwargs)
 
-    def generate_from_text(self, text: str, output_dir: str = None, path: str = None, **kwargs) -> dict:
+    def generate_from_text(
+        self, text: str, output_dir: str = None, path: str = None, **kwargs
+    ) -> dict:
         inner = text
         # Remove bookmarks
         inner = re.sub("<bookmark\s*mark\s*=['\"]\w*[\"']\s*/>", "", inner)
@@ -119,17 +132,17 @@ def process_event(evt):
             lambda evt: word_boundaries.append(process_event(evt))
         )
 
-        speech_synthesis_result = speech_service.speak_ssml(ssml)
+        speech_synthesis_result = speech_service.speak_ssml_async(ssml).get()
+
         json_dict = {
             "input_text": text,
             "ssml": ssml,
-            "word_boundaries": word_boundaries,
+            "word_boundaries": [serialize_word_boundary(wb) for wb in word_boundaries],
             "original_audio": audio_path,
             "json_path": json_path,
         }
 
         # open(json_path, "w").write(json.dumps(json_dict, indent=2))
-
         if (
             speech_synthesis_result.reason
             == speechsdk.ResultReason.SynthesizingAudioCompleted