Skip to content

Commit

Permalink
Support for HLS transcription (resolves #62)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhormigo committed Dec 9, 2023
1 parent da86c18 commit f3cd20f
Showing 1 changed file with 46 additions and 3 deletions.
49 changes: 46 additions & 3 deletions whisper_live/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import websocket
import uuid
import time
import subprocess


def resample(file: str, sr: int = 16000):
Expand Down Expand Up @@ -344,6 +345,46 @@ def write_audio_frames_to_file(self, frames, file_name):
wavfile.setframerate(self.rate)
wavfile.writeframes(frames)

def process_hls_stream(self, hls_url):
"""
Connect to an HLS source, process the audio stream, and send it for transcription.
Args:
hls_url (str): The URL of the HLS stream source.
"""
print("[INFO]: Connecting to HLS stream...")
process = None # Initialize process to None


try:
# Launch an FFMPEG process to connect to the HLS stream
command = [
'ffmpeg',
'-i', hls_url, # Input URL
'-acodec', 'pcm_s16le', # Output codec
'-f', 's16le', # Output format
'-ac', '1', # Set audio channels to 1 (mono)
'-ar', str(self.rate), # Resample audio to the specified rate
'-'
]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Process the stream
while True:
in_bytes = process.stdout.read(self.chunk * 2) # 2 bytes per sample
if not in_bytes:
break
audio_array = self.bytes_to_float_array(in_bytes)
self.send_packet_to_server(audio_array.tobytes())

except Exception as e:
print(f"[ERROR]: Failed to connect to HLS stream: {e}")
finally:
if process:
process.kill()

print("[INFO]: HLS stream processing finished.")

def record(self, out_file="output_recording.wav"):
"""
Record audio data from the input stream and save it to a WAV file.
Expand Down Expand Up @@ -464,7 +505,7 @@ class TranscriptionClient:
def __init__(self, host, port, is_multilingual=False, lang=None, translate=False):
self.client = Client(host, port, is_multilingual, lang, translate)

def __call__(self, audio=None):
def __call__(self, audio=None, hls_url=None):
"""
Start the transcription process.
Expand All @@ -483,8 +524,10 @@ def __call__(self, audio=None):
return
pass
print("[INFO]: Server Ready!")
if audio is not None:
if hls_url is not None:
self.client.process_hls_stream(hls_url)
elif audio is not None:
resampled_file = resample(audio)
self.client.play_file(resampled_file)
else:
self.client.record()
self.client.record()

0 comments on commit f3cd20f

Please sign in to comment.