Merge pull request #212 from dshepelev15/feat/RTSP_support

Add support for RTSP stream
collabora · May 28, 2024 · 03e30e1 · 03e30e1
2 parents c0a947a + 615c9c7
commit 03e30e1
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -86,9 +86,14 @@ client("tests/jfk.wav")
 client()
 ```
 
+- TO transcribe from a RTSP stream:
+```python
+client(rtsp_url="rtsp://admin:[email protected]/rtsp")
+```
+
 - To transcribe from a HLS stream:
 ```python
-client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8") 
+client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8")
 ```
 
 ## Browser Extensions

diff --git a/whisper_live/client.py b/whisper_live/client.py
@@ -295,7 +295,7 @@ def __init__(self, clients):
             print(f"[WARN]: Unable to access microphone. {error}")
             self.stream = None
 
-    def __call__(self, audio=None, hls_url=None, save_file=None):
+    def __call__(self, audio=None, rtsp_url=None, hls_url=None, save_file=None):
         """
         Start the transcription process.
 
@@ -307,6 +307,10 @@ def __call__(self, audio=None, hls_url=None, save_file=None):
             audio (str, optional): Path to an audio file for transcription. Default is None, which triggers live recording.
 
         """
+        assert sum(
+            source is not None for source in [audio, rtsp_url, hls_url]
+        ) <= 1, 'You must provide only one selected source'
+
         print("[INFO]: Waiting for server ready ...")
         for client in self.clients:
             while not client.recording:
@@ -320,6 +324,8 @@ def __call__(self, audio=None, hls_url=None, save_file=None):
         elif audio is not None:
             resampled_file = utils.resample(audio)
             self.play_file(resampled_file)
+        elif rtsp_url is not None:
+            self.process_rtsp_stream(rtsp_url)
         else:
             self.record()
 
@@ -398,6 +404,16 @@ def play_file(self, filename):
                 self.write_all_clients_srt()
                 print("[INFO]: Keyboard interrupt.")
 
+    def process_rtsp_stream(self, rtsp_url):
+        """
+        Connect to an RTSP source, process the audio stream, and send it for trascription.
+
+        Args:
+            rtsp_url (str): The URL of the RTSP stream source.
+        """
+        process = self.get_rtsp_ffmpeg_process(rtsp_url)
+        self.handle_ffmpeg_process(process, stream_type='RTSP')
+
     def process_hls_stream(self, hls_url, save_file):
         """
         Connect to an HLS source, process the audio stream, and send it for transcription.
@@ -406,27 +422,12 @@ def process_hls_stream(self, hls_url, save_file):
             hls_url (str): The URL of the HLS stream source.
             save_file （str, optional): Local path to save the network stream.
         """
-        print("[INFO]: Connecting to HLS stream...")
-        process = None  # Initialize process to None
+        process = self.get_hls_ffmpeg_process(hls_url, save_file)
+        self.handle_ffmpeg_process(process, stream_type='HLS')
 
+    def handle_ffmpeg_process(self, process, stream_type):
+        print(f"[INFO]: Connecting to {stream_type} stream...")
         try:
-            # Connecting to the HLS stream using ffmpeg-python
-            if save_file is None:
-                process = (
-                    ffmpeg
-                    .input(hls_url, threads=0)
-                    .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
-                    .run_async(pipe_stdout=True, pipe_stderr=True)
-                )
-            else:
-                input = ffmpeg.input(hls_url, threads=0)
-                output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet')
-                output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
-                process = (
-                    ffmpeg.merge_outputs(output_file, output_std)
-                    .run_async(pipe_stdout=True, pipe_stderr=True)
-                )
-
             # Process the stream
             while True:
                 in_bytes = process.stdout.read(self.chunk * 2)  # 2 bytes per sample
@@ -436,14 +437,41 @@ def process_hls_stream(self, hls_url, save_file):
                 self.multicast_packet(audio_array.tobytes())
 
         except Exception as e:
-            print(f"[ERROR]: Failed to connect to HLS stream: {e}")
+            print(f"[ERROR]: Failed to connect to {stream_type} stream: {e}")
         finally:
-            self.close_all_clients() 
+            self.close_all_clients()
             self.write_all_clients_srt()
             if process:
                 process.kill()
 
-        print("[INFO]: HLS stream processing finished.")
+        print(f"[INFO]: {stream_type} stream processing finished.")
+
+    def get_rtsp_ffmpeg_process(self, rtsp_url):
+        return (
+            ffmpeg
+            .input(rtsp_url, threads=0)
+            .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
+            .run_async(pipe_stdout=True, pipe_stderr=True)
+        )
+
+    def get_hls_ffmpeg_process(self, hls_url, save_file):
+        if save_file is None:
+            process = (
+                ffmpeg
+                .input(hls_url, threads=0)
+                .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
+                .run_async(pipe_stdout=True, pipe_stderr=True)
+            )
+        else:
+            input = ffmpeg.input(hls_url, threads=0)
+            output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet')
+            output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
+            process = (
+                ffmpeg.merge_outputs(output_file, output_std)
+                .run_async(pipe_stdout=True, pipe_stderr=True)
+            )
+
+        return process
 
     def record(self, out_file="output_recording.wav"):
         """