-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #797 from kunci115/main
live streaming socket
- Loading branch information
Showing
5 changed files
with
153 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,3 +23,4 @@ py-cpuinfo | |
hjson | ||
psutil | ||
sounddevice | ||
spacy==3.7.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import socket | ||
import sounddevice as sd | ||
import numpy as np | ||
|
||
def play_audio_stream(client_socket): | ||
buffer = b'' | ||
stream = sd.OutputStream(samplerate=24000, channels=1, dtype='float32') | ||
stream.start() | ||
|
||
try: | ||
while True: | ||
chunk = client_socket.recv(1024) | ||
if b"END_OF_AUDIO" in chunk: | ||
buffer += chunk.replace(b"END_OF_AUDIO", b"") | ||
if buffer: | ||
audio_array = np.frombuffer(buffer, dtype=np.float32) | ||
stream.write(audio_array) | ||
break | ||
|
||
buffer += chunk | ||
while len(buffer) >= 4096: | ||
audio_chunk = buffer[:4096] | ||
audio_array = np.frombuffer(audio_chunk, dtype=np.float32) | ||
stream.write(audio_array) | ||
buffer = buffer[4096:] | ||
|
||
finally: | ||
stream.stop() | ||
stream.close() | ||
|
||
def send_text_to_server(character_name, text, server_ip='localhost', server_port=5000): | ||
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
client_socket.connect((server_ip, server_port)) | ||
|
||
try: | ||
data = f"{character_name}|{text}" | ||
client_socket.sendall(data.encode('utf-8')) | ||
|
||
play_audio_stream(client_socket) | ||
|
||
print("Audio playback finished.") | ||
|
||
finally: | ||
client_socket.close() | ||
|
||
|
||
if __name__ == "__main__": | ||
character_name ="deniro" | ||
text = "Hello This is just for a live speaking test" | ||
send_text_to_server(character_name, text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import spacy | ||
import threading | ||
import socket | ||
from tortoise.api_fast import TextToSpeech | ||
from utils.audio import load_voices | ||
|
||
tts = TextToSpeech() | ||
nlp = spacy.load("en_core_web_sm") | ||
|
||
|
||
def generate_audio_stream(text, tts, voice_samples): | ||
print(f"Generating audio stream...: {text}") | ||
voice_samples, conditioning_latents = load_voices([voice_samples]) | ||
stream = tts.tts_stream( | ||
text, | ||
voice_samples=voice_samples, | ||
conditioning_latents=conditioning_latents, | ||
verbose=True, | ||
stream_chunk_size=40 # Adjust chunk size as needed | ||
) | ||
for audio_chunk in stream: | ||
yield audio_chunk | ||
|
||
|
||
def split_text(text, max_length=200): | ||
doc = nlp(text) | ||
chunks = [] | ||
chunk = [] | ||
length = 0 | ||
|
||
for sent in doc.sents: | ||
sent_length = len(sent.text) | ||
if length + sent_length > max_length: | ||
chunks.append(' '.join(chunk)) | ||
chunk = [] | ||
length = 0 | ||
chunk.append(sent.text) | ||
length += sent_length + 1 | ||
|
||
if chunk: | ||
chunks.append(' '.join(chunk)) | ||
|
||
return chunks | ||
|
||
|
||
def handle_client(client_socket, tts): | ||
try: | ||
while True: | ||
data = client_socket.recv(1024).decode('utf-8') | ||
if not data: | ||
break | ||
character_name, text = data.split('|', 1) | ||
text_chunks = split_text(text, max_length=200) | ||
print(text_chunks) | ||
for chunk in text_chunks: | ||
audio_stream = generate_audio_stream(chunk, tts, character_name) | ||
|
||
for audio_chunk in audio_stream: | ||
audio_data = audio_chunk.cpu().numpy().flatten() | ||
client_socket.sendall(audio_data.tobytes()) | ||
|
||
client_socket.sendall(b"END_OF_AUDIO") | ||
|
||
finally: | ||
client_socket.close() | ||
print("Client disconnected.") | ||
|
||
|
||
def start_server(): | ||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
server.bind(('0.0.0.0', 5000)) | ||
server.listen(5) | ||
print("Server listening on port 5000") | ||
|
||
while True: | ||
client_socket, addr = server.accept() | ||
print(f"Accepted connection from {addr}") | ||
client_handler = threading.Thread(target=handle_client, args=(client_socket, tts)) | ||
client_handler.start() | ||
|
||
|
||
if __name__ == "__main__": | ||
start_server() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters