Skip to content

Commit

Permalink
Merge pull request #212 from dshepelev15/feat/RTSP_support
Browse files Browse the repository at this point in the history
Add support for RTSP stream
  • Loading branch information
makaveli10 authored May 28, 2024
2 parents c0a947a + 615c9c7 commit 03e30e1
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 24 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,14 @@ client("tests/jfk.wav")
client()
```

- TO transcribe from a RTSP stream:
```python
client(rtsp_url="rtsp://admin:[email protected]/rtsp")
```

- To transcribe from a HLS stream:
```python
client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8")
client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8")
```

## Browser Extensions
Expand Down
74 changes: 51 additions & 23 deletions whisper_live/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def __init__(self, clients):
print(f"[WARN]: Unable to access microphone. {error}")
self.stream = None

def __call__(self, audio=None, hls_url=None, save_file=None):
def __call__(self, audio=None, rtsp_url=None, hls_url=None, save_file=None):
"""
Start the transcription process.
Expand All @@ -307,6 +307,10 @@ def __call__(self, audio=None, hls_url=None, save_file=None):
audio (str, optional): Path to an audio file for transcription. Default is None, which triggers live recording.
"""
assert sum(
source is not None for source in [audio, rtsp_url, hls_url]
) <= 1, 'You must provide only one selected source'

print("[INFO]: Waiting for server ready ...")
for client in self.clients:
while not client.recording:
Expand All @@ -320,6 +324,8 @@ def __call__(self, audio=None, hls_url=None, save_file=None):
elif audio is not None:
resampled_file = utils.resample(audio)
self.play_file(resampled_file)
elif rtsp_url is not None:
self.process_rtsp_stream(rtsp_url)
else:
self.record()

Expand Down Expand Up @@ -398,6 +404,16 @@ def play_file(self, filename):
self.write_all_clients_srt()
print("[INFO]: Keyboard interrupt.")

def process_rtsp_stream(self, rtsp_url):
"""
Connect to an RTSP source, process the audio stream, and send it for trascription.
Args:
rtsp_url (str): The URL of the RTSP stream source.
"""
process = self.get_rtsp_ffmpeg_process(rtsp_url)
self.handle_ffmpeg_process(process, stream_type='RTSP')

def process_hls_stream(self, hls_url, save_file):
"""
Connect to an HLS source, process the audio stream, and send it for transcription.
Expand All @@ -406,27 +422,12 @@ def process_hls_stream(self, hls_url, save_file):
hls_url (str): The URL of the HLS stream source.
save_file (str, optional): Local path to save the network stream.
"""
print("[INFO]: Connecting to HLS stream...")
process = None # Initialize process to None
process = self.get_hls_ffmpeg_process(hls_url, save_file)
self.handle_ffmpeg_process(process, stream_type='HLS')

def handle_ffmpeg_process(self, process, stream_type):
print(f"[INFO]: Connecting to {stream_type} stream...")
try:
# Connecting to the HLS stream using ffmpeg-python
if save_file is None:
process = (
ffmpeg
.input(hls_url, threads=0)
.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
.run_async(pipe_stdout=True, pipe_stderr=True)
)
else:
input = ffmpeg.input(hls_url, threads=0)
output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet')
output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
process = (
ffmpeg.merge_outputs(output_file, output_std)
.run_async(pipe_stdout=True, pipe_stderr=True)
)

# Process the stream
while True:
in_bytes = process.stdout.read(self.chunk * 2) # 2 bytes per sample
Expand All @@ -436,14 +437,41 @@ def process_hls_stream(self, hls_url, save_file):
self.multicast_packet(audio_array.tobytes())

except Exception as e:
print(f"[ERROR]: Failed to connect to HLS stream: {e}")
print(f"[ERROR]: Failed to connect to {stream_type} stream: {e}")
finally:
self.close_all_clients()
self.close_all_clients()
self.write_all_clients_srt()
if process:
process.kill()

print("[INFO]: HLS stream processing finished.")
print(f"[INFO]: {stream_type} stream processing finished.")

def get_rtsp_ffmpeg_process(self, rtsp_url):
return (
ffmpeg
.input(rtsp_url, threads=0)
.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
.run_async(pipe_stdout=True, pipe_stderr=True)
)

def get_hls_ffmpeg_process(self, hls_url, save_file):
if save_file is None:
process = (
ffmpeg
.input(hls_url, threads=0)
.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
.run_async(pipe_stdout=True, pipe_stderr=True)
)
else:
input = ffmpeg.input(hls_url, threads=0)
output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet')
output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate)
process = (
ffmpeg.merge_outputs(output_file, output_std)
.run_async(pipe_stdout=True, pipe_stderr=True)
)

return process

def record(self, out_file="output_recording.wav"):
"""
Expand Down

0 comments on commit 03e30e1

Please sign in to comment.