Support for HLS transcription (resolves #62)

collabora · Dec 9, 2023 · f3cd20f · f3cd20f
1 parent da86c18
commit f3cd20f
Showing 1 changed file with 46 additions and 3 deletions.
diff --git a/whisper_live/client.py b/whisper_live/client.py
@@ -11,6 +11,7 @@
 import websocket
 import uuid
 import time
+import subprocess
 
 
 def resample(file: str, sr: int = 16000):
@@ -344,6 +345,46 @@ def write_audio_frames_to_file(self, frames, file_name):
             wavfile.setframerate(self.rate)
             wavfile.writeframes(frames)
 
+    def process_hls_stream(self, hls_url):
+        """
+        Connect to an HLS source, process the audio stream, and send it for transcription.
+
+        Args:
+            hls_url (str): The URL of the HLS stream source.
+        """
+        print("[INFO]: Connecting to HLS stream...")
+        process = None  # Initialize process to None
+
+
+        try:
+            # Launch an FFMPEG process to connect to the HLS stream
+            command = [
+                'ffmpeg',
+                '-i', hls_url,         # Input URL
+                '-acodec', 'pcm_s16le', # Output codec
+                '-f', 's16le',         # Output format
+                '-ac', '1',            # Set audio channels to 1 (mono)
+                '-ar', str(self.rate), # Resample audio to the specified rate
+                '-'
+            ]
+            process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+            # Process the stream
+            while True:
+                in_bytes = process.stdout.read(self.chunk * 2)  # 2 bytes per sample
+                if not in_bytes:
+                    break
+                audio_array = self.bytes_to_float_array(in_bytes)
+                self.send_packet_to_server(audio_array.tobytes())
+
+        except Exception as e:
+            print(f"[ERROR]: Failed to connect to HLS stream: {e}")
+        finally:
+            if process:
+                process.kill()
+
+        print("[INFO]: HLS stream processing finished.")
+
     def record(self, out_file="output_recording.wav"):
         """
         Record audio data from the input stream and save it to a WAV file.
@@ -464,7 +505,7 @@ class TranscriptionClient:
     def __init__(self, host, port, is_multilingual=False, lang=None, translate=False):
         self.client = Client(host, port, is_multilingual, lang, translate)
 
-    def __call__(self, audio=None):
+    def __call__(self, audio=None, hls_url=None):
         """
         Start the transcription process.
 
@@ -483,8 +524,10 @@ def __call__(self, audio=None):
                 return
             pass
         print("[INFO]: Server Ready!")
-        if audio is not None:
+        if hls_url is not None:
+            self.client.process_hls_stream(hls_url)
+        elif audio is not None:
             resampled_file = resample(audio)
             self.client.play_file(resampled_file)
         else:
-            self.client.record()
+            self.client.record()