From 58d9308ec02fe301d079e704209999aee28d652c Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Thu, 25 Jul 2024 16:25:08 -0700 Subject: [PATCH 01/27] replace sounddevice with pvspeaker --- demo/python/orca_demo_streaming.py | 155 +++++------------------------ demo/python/requirements.txt | 2 +- 2 files changed, 28 insertions(+), 129 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 05b0d92e..1fde54c8 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -15,113 +15,25 @@ import subprocess import threading import time -import traceback from dataclasses import dataclass from queue import Queue +from collections import deque +from itertools import chain from typing import ( - Any, Callable, - Dict, Optional, Sequence, ) -import numpy as np import pvorca import tiktoken -from numpy.typing import NDArray from pvorca import OrcaActivationLimitError, OrcaInvalidArgumentError -from sounddevice import ( - OutputStream, - query_devices, - PortAudioError, -) +from pvspeaker import PvSpeaker CUSTOM_PRON_PATTERN = r"\{(.*?\|.*?)\}" CUSTOM_PRON_PATTERN_NO_WHITESPACE = r"\{(.*?\|.*?)\}(?!\s)" -class StreamingAudioDevice: - def __init__(self, device_index: Optional[int] = None) -> None: - if device_index is None: - device_info = query_devices(kind="output") - device_index = int(device_info["index"]) - - self._device_index = device_index - self._queue: Queue[Sequence[int]] = Queue() - - self._buffer = None - self._stream = None - self._sample_rate = None - self._blocksize = None - - def start(self, sample_rate: int) -> None: - self._sample_rate = sample_rate - self._blocksize = self._sample_rate // 20 - self._stream = OutputStream( - channels=1, - samplerate=self._sample_rate, - dtype=np.int16, - device=self._device_index, - callback=self._callback, - blocksize=self._blocksize) - self._stream.start() - - # noinspection PyShadowingNames - # noinspection PyUnusedLocal - def _callback(self, outdata: NDArray, frames: int, time: Any, status: Any) -> None: - if self._queue.empty(): - outdata[:] = 0 - return - - pcm = self._queue.get() - outdata[:, 0] = pcm - - def play(self, pcm_chunk: Sequence[int]) -> None: - if self._stream is None: - raise ValueError("Stream is not started. Call `start` method first.") - - pcm_chunk = np.array(pcm_chunk, dtype=np.int16) - - if self._buffer is not None: - if pcm_chunk is not None: - pcm_chunk = np.concatenate([self._buffer, pcm_chunk]) - else: - pcm_chunk = self._buffer - self._buffer = None - - length = pcm_chunk.shape[0] - for index_block in range(0, length, self._blocksize): - if (length - index_block) < self._blocksize: - self._buffer = pcm_chunk[index_block: index_block + (length - index_block)] - else: - self._queue.put_nowait(pcm_chunk[index_block: index_block + self._blocksize]) - - def flush_and_terminate(self) -> None: - self.flush() - self.terminate() - - def flush(self) -> None: - if self._buffer is not None: - chunk = np.zeros(self._blocksize, dtype=np.int16) - chunk[:self._buffer.shape[0]] = self._buffer - self._queue.put_nowait(chunk) - - time_interval = self._blocksize / self._sample_rate - while not self._queue.empty(): - time.sleep(time_interval) - - time.sleep(time_interval) - - def terminate(self) -> None: - self._stream.stop() - self._stream.close() - - @staticmethod - def list_output_devices() -> Dict[str, Any]: - return query_devices(kind="output") - - def linux_machine() -> str: machine = platform.machine() if machine == "x86_64": @@ -159,7 +71,7 @@ class OrcaInput: def __init__( self, - play_audio_callback: Callable[[Sequence[int]], None], + write_audio_callback: Callable[[Sequence[int]], int], access_key: str, num_tokens_per_second: int, model_path: Optional[str] = None, @@ -171,7 +83,7 @@ def __init__( self._orca_stream = self._orca.stream_open() self._sample_rate = self._orca.sample_rate - self._play_audio_callback = play_audio_callback + self.write_audio_callback = write_audio_callback self._num_tokens_per_second = num_tokens_per_second assert self._num_tokens_per_second > 0 @@ -179,7 +91,7 @@ def __init__( self._thread = None self._time_first_audio_available = -1 - self._pcm_buffer: Queue[Sequence[int]] = Queue() + self._pcm_buffer = deque() self._wait_chunks = audio_wait_chunks or self._get_first_audio_wait_chunks() self._num_pcm_chunks_processed = 0 @@ -197,8 +109,6 @@ def _run(self) -> None: while True: orca_input = self._queue.get() if orca_input is None: - while not self._pcm_buffer.empty(): - self._play_audio_callback(self._pcm_buffer.get()) break try: @@ -210,12 +120,11 @@ def _run(self) -> None: raise ValueError(f"Orca could not synthesize text input `{orca_input.text}`: `{e}`") if pcm is not None: - if self._num_pcm_chunks_processed < self._wait_chunks: - self._pcm_buffer.put_nowait(pcm) - else: - while not self._pcm_buffer.empty(): - self._play_audio_callback(self._pcm_buffer.get()) - self._play_audio_callback(pcm) + self._pcm_buffer.append(pcm) + pcm_to_play = self._pcm_buffer.popleft() + written = self.write_audio_callback(pcm_to_play) + if written < len(pcm_to_play): + self._pcm_buffer.appendleft(pcm_to_play[written:]) if self._num_pcm_chunks_processed == 0: self._time_first_audio_available = time.time() @@ -233,10 +142,10 @@ def start(self) -> None: def synthesize(self, text: str) -> None: self._queue.put_nowait(self.OrcaInput(text=text, flush=False)) - def flush(self) -> None: + def flush(self) -> deque: self._queue.put_nowait(self.OrcaInput(text="", flush=True)) self._close_thread_blocking() - self.start() + return self._pcm_buffer def delete(self) -> None: self._close_thread_blocking() @@ -318,11 +227,13 @@ def main() -> None: "--show_audio_devices", action="store_true", help="Only list available audio output devices and exit") - parser.add_argument('--audio-device-index', type=int, default=None, help='Index of input audio device') + parser.add_argument('--audio-device-index', type=int, default=-1, help='Index of input audio device') args = parser.parse_args() if args.show_audio_devices: - print(StreamingAudioDevice.list_output_devices()) + devices = PvSpeaker.get_available_devices() + for i in range(len(devices)): + print("index: %d, device name: %s" % (i, devices[i])) exit(0) access_key = args.access_key @@ -333,25 +244,10 @@ def main() -> None: audio_wait_chunks = args.audio_wait_chunks audio_device_index = args.audio_device_index - try: - audio_device = StreamingAudioDevice(device_index=audio_device_index) - # Some systems may have issues with PortAudio only when starting the audio device. Test it here. - audio_device.start(sample_rate=16000) - audio_device.terminate() - play_audio_callback = audio_device.play - except PortAudioError: - print(traceback.format_exc()) - print( - "WARNING: Failed to initialize audio device, see details above. Falling back to running " - "the demo without audio playback.\n") - audio_device = None - - # noinspection PyUnusedLocal - def play_audio_callback(pcm: Sequence[int]): - pass + speaker = PvSpeaker(sample_rate=22050, bits_per_sample=16, buffer_size_secs=1, device_index=audio_device_index) orca = OrcaThread( - play_audio_callback=play_audio_callback, + write_audio_callback=speaker.write, num_tokens_per_second=tokens_per_second, access_key=access_key, model_path=model_path, @@ -360,8 +256,8 @@ def play_audio_callback(pcm: Sequence[int]): ) orca.start() - if audio_device is not None: - audio_device.start(sample_rate=orca.sample_rate) + if speaker is not None: + speaker.start() try: print(f"Orca version: {orca.version}\n") @@ -379,16 +275,19 @@ def play_audio_callback(pcm: Sequence[int]): text_stream_duration_seconds = time.time() - time_start_text_stream - orca.flush() + remaining_pcm = orca.flush() first_audio_available_seconds = orca.get_time_first_audio_available() - time_start_text_stream print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") - if audio_device is not None: + if speaker is not None: print("Waiting for audio to finish ...") - audio_device.flush_and_terminate() + speaker.flush(list(chain.from_iterable(remaining_pcm))) + except KeyboardInterrupt: + speaker.stop() + print("\nStopped...") except OrcaActivationLimitError: print("AccessKey has reached its processing limit") finally: diff --git a/demo/python/requirements.txt b/demo/python/requirements.txt index a9a52a0c..2c53c86b 100644 --- a/demo/python/requirements.txt +++ b/demo/python/requirements.txt @@ -1,4 +1,4 @@ numpy>=1.24.0 pvorca==0.2.3 -sounddevice==0.4.6 +pvspeaker==1.0.1 tiktoken==0.6.0 From f348d587942c04d87edbd43117197013f08a9f70 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Fri, 26 Jul 2024 16:39:56 -0700 Subject: [PATCH 02/27] wip - rm while loop --- demo/python/orca_demo_streaming.py | 47 +++++++++++++++++------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 1fde54c8..eb8e3742 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -16,6 +16,7 @@ import threading import time from dataclasses import dataclass +from os import remove from queue import Queue from collections import deque from itertools import chain @@ -221,7 +222,7 @@ def main() -> None: parser.add_argument( "--audio_wait_chunks", type=int, - default=None, + default=1, help="Number of PCM chunks to wait before starting to play audio. Default: system-dependent.") parser.add_argument( "--show_audio_devices", @@ -244,46 +245,52 @@ def main() -> None: audio_wait_chunks = args.audio_wait_chunks audio_device_index = args.audio_device_index - speaker = PvSpeaker(sample_rate=22050, bits_per_sample=16, buffer_size_secs=1, device_index=audio_device_index) + orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) - orca = OrcaThread( - write_audio_callback=speaker.write, - num_tokens_per_second=tokens_per_second, - access_key=access_key, - model_path=model_path, - library_path=library_path, - audio_wait_chunks=audio_wait_chunks, - ) + # TODO: Make audio_wait_chunks a proper param + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=audio_wait_chunks, + device_index=audio_device_index) + + stream = orca.stream_open() - orca.start() if speaker is not None: speaker.start() + pcm_buf = deque() + try: print(f"Orca version: {orca.version}\n") print(f"Simulated text stream:") tokens = tokenize_text(text=text) - time_start_text_stream = time.time() + # time_start_text_stream = time.time() for token in tokens: print(f"{token}", end="", flush=True) - orca.synthesize(text=token) + pcm = stream.synthesize(text=token) + if pcm is not None: + if len(pcm_buf) != 0: + pcm_buf.append(pcm) + pcm = pcm_buf.popleft() + written = speaker.write(pcm) + if written < len(pcm): + pcm_buf.appendleft(pcm[written:]) time.sleep(1 / tokens_per_second) - text_stream_duration_seconds = time.time() - time_start_text_stream + # text_stream_duration_seconds = time.time() - time_start_text_stream - remaining_pcm = orca.flush() + remaining_pcm = stream.flush() + pcm_buf.append(remaining_pcm) - first_audio_available_seconds = orca.get_time_first_audio_available() - time_start_text_stream - print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") - print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + # first_audio_available_seconds = orca.get_time_first_audio_available() - time_start_text_stream + # print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") + # print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") if speaker is not None: - print("Waiting for audio to finish ...") - speaker.flush(list(chain.from_iterable(remaining_pcm))) + print("\nWaiting for audio to finish ...") + speaker.flush(list(chain.from_iterable(pcm_buf))) except KeyboardInterrupt: speaker.stop() From bb825d90200098ba7414f9f87f3c54367e697d74 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Fri, 26 Jul 2024 17:51:05 -0700 Subject: [PATCH 03/27] wip - use process --- demo/python/orca_demo_streaming.py | 55 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index eb8e3742..dbcac852 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -14,12 +14,14 @@ import re import subprocess import threading +import multiprocessing import time from dataclasses import dataclass from os import remove from queue import Queue from collections import deque from itertools import chain +from time import sleep from typing import ( Callable, Optional, @@ -194,6 +196,29 @@ def tokenize_text(text: str) -> Sequence[str]: return tokens_with_custom_pronunciations +def worker_function(queue, sample_rate, audio_wait_chunks): + speaker = PvSpeaker(sample_rate=sample_rate, bits_per_sample=16, buffer_size_secs=audio_wait_chunks) + speaker.start() + + pcm_buf = deque() + + while True: + if len(pcm_buf) > 0: + buf_pcm = pcm_buf.popleft() + else: + buf_pcm = queue.get() + if buf_pcm is None: + break + + worker_written = speaker.write(buf_pcm) + if worker_written < len(buf_pcm): + pcm_buf.appendleft(buf_pcm[worker_written:]) + + speaker.flush() + speaker.stop() + speaker.delete() + + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( @@ -248,15 +273,12 @@ def main() -> None: orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) # TODO: Make audio_wait_chunks a proper param - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=audio_wait_chunks, - device_index=audio_device_index) stream = orca.stream_open() - if speaker is not None: - speaker.start() - - pcm_buf = deque() + queue = multiprocessing.Queue() + process = multiprocessing.Process(target=worker_function, args=(queue, orca.sample_rate, audio_wait_chunks)) + process.start() try: print(f"Orca version: {orca.version}\n") @@ -270,30 +292,29 @@ def main() -> None: pcm = stream.synthesize(text=token) if pcm is not None: - if len(pcm_buf) != 0: - pcm_buf.append(pcm) - pcm = pcm_buf.popleft() - written = speaker.write(pcm) - if written < len(pcm): - pcm_buf.appendleft(pcm[written:]) + queue.put(pcm) time.sleep(1 / tokens_per_second) # text_stream_duration_seconds = time.time() - time_start_text_stream remaining_pcm = stream.flush() - pcm_buf.append(remaining_pcm) + if remaining_pcm is not None: + queue.put(remaining_pcm) + + queue.put(None) + process.join() # first_audio_available_seconds = orca.get_time_first_audio_available() - time_start_text_stream # print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") # print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") - if speaker is not None: - print("\nWaiting for audio to finish ...") - speaker.flush(list(chain.from_iterable(pcm_buf))) + # if speaker is not None: + print("\nWaiting for audio to finish ...") + except KeyboardInterrupt: - speaker.stop() + # speaker.stop() print("\nStopped...") except OrcaActivationLimitError: print("AccessKey has reached its processing limit") From f2eca0ff44c8a491a54d387fa82c076ca056ea90 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 29 Jul 2024 14:15:00 -0700 Subject: [PATCH 04/27] c demo use pvspeaker --- .gitmodules | 5 +- demo/c/CMakeLists.txt | 11 +- demo/c/README.md | 8 +- demo/c/orca_demo_streaming.c | 193 ++++++++++++++++++++++++++++++++++- demo/c/pvspeaker | 1 + 5 files changed, 210 insertions(+), 8 deletions(-) create mode 160000 demo/c/pvspeaker diff --git a/.gitmodules b/.gitmodules index 41039694..c7eb0caa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "demo/c/dr_libs"] path = demo/c/dr_libs - url = ../../mackron/dr_libs.git \ No newline at end of file + url = ../../mackron/dr_libs.git +[submodule "demo/c/pvspeaker"] + path = demo/c/pvspeaker + url = ../pvspeaker.git diff --git a/demo/c/CMakeLists.txt b/demo/c/CMakeLists.txt index 3efdb691..19490296 100644 --- a/demo/c/CMakeLists.txt +++ b/demo/c/CMakeLists.txt @@ -3,16 +3,21 @@ project(orca_demo_c) set(CMAKE_C_STANDARD 99) set(CMAKE_BUILD_TYPE Release) +add_subdirectory(pvspeaker/project) set(COMMON_LIBS dl) include_directories("${PROJECT_SOURCE_DIR}/../../include") add_executable(orca_demo orca_demo.c) -add_executable(orca_demo_streaming orca_demo_streaming.c) -target_include_directories(orca_demo_streaming PRIVATE dr_libs) +add_executable( + orca_demo_streaming + orca_demo_streaming.c) +target_include_directories(orca_demo_streaming PRIVATE dr_libs pvspeaker/project/include) + +target_link_libraries(orca_demo_streaming pv_speaker) if (NOT WIN32) target_link_libraries(orca_demo ${COMMON_LIBS}) target_link_libraries(orca_demo_streaming ${COMMON_LIBS}) -endif() +endif () diff --git a/demo/c/README.md b/demo/c/README.md index 715acb9a..a953b7fe 100644 --- a/demo/c/README.md +++ b/demo/c/README.md @@ -17,6 +17,7 @@ Signup or Login to [Picovoice Console](https://console.picovoice.ai/) to get you - **For Windows Only**: [MinGW](https://www.mingw-w64.org/) is required to build the demo. # Speech Synthesis Demos + Orca supports two modes of operation: streaming and single synthesis. In the streaming synthesis mode, Orca processes an incoming text stream in real-time and generates audio in parallel. This is demonstrated in the Orca streaming demo. @@ -31,9 +32,12 @@ In the single synthesis mode, the text is synthesized in a single call to the Or Use CMake to build the Orca demo target: ```console -cmake -S demo/c/ -B demo/c/build && cmake --build demo/c/build --target orca_demo_streaming +cmake -S demo/c/ -B demo/c/build -DPV_SPEAKER_PLATFORM={PV_SPEAKER_PLATFORM} && cmake --build demo/c/build --target orca_demo_streaming ``` +The `{PV_SPEAKER_PLATFORM}` variable will set the compilation flags for the given platform. Exclude this variable +to get a list of possible values. + ### Usage Running the executable without any command-line arguments prints the usage info to the console: @@ -72,7 +76,7 @@ To run the Orca demo: ./demo/c/build/orca_demo -l ${LIBRARY_PATH} -m ${MODEL_PATH} -a ${ACCESS_KEY} -t ${TEXT} -o ${OUTPUT_PATH} ``` -Replace `${LIBRARY_PATH}` with the path to appropriate library available under [lib](../../lib), `${MODEL_PATH}` with +Replace `${LIBRARY_PATH}` with the path to appropriate library available under [lib](../../lib), `${MODEL_PATH}` with a path to any of the model files available under [lib/common](../../lib/common), `${ACCESS_KEY}` with AccessKey obtained from [Picovoice Console](https://console.picovoice.ai/), `${TEXT}` with the text to be synthesized, and `${WAV_OUTPUT_PATH}` with a path to a output audio file. diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index 238f3918..44d4f496 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -36,6 +36,8 @@ the License. #include "pv_orca.h" +#include "pv_speaker.h" + #define MAX_NUM_CHUNKS (500) #define MAX_NUM_BYTES_PER_CHARACTER (5) @@ -198,15 +200,139 @@ void handle_error( pv_free_error_stack_func(message_stack); } +static void show_audio_devices(void) { + char **device_list = NULL; + int32_t device_list_length = 0; + + pv_speaker_status_t status = pv_speaker_get_available_devices(&device_list_length, &device_list); + if (status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "failed to get audio devices with `%s`.\n", pv_speaker_status_to_string(status)); + exit(1); + } + + for (int32_t i = 0; i < device_list_length; i++) { + fprintf(stdout, "[%d] %s\n", i, device_list[i]); + } + + pv_speaker_free_available_devices(device_list_length, device_list); +} + +typedef struct Node { + int16_t *data; + struct Node *prev; + struct Node *next; +} Node; + +Node *createNode(int16_t *data) { + Node *newNode = (Node *) malloc(sizeof(Node)); + newNode->data = data; + newNode->prev = NULL; + newNode->next = NULL; + return newNode; +} + +typedef struct Deque { + Node *front; + Node *rear; +} Deque; + +Deque *createDeque(); +void destroyDeque(Deque *deque); +void pushFront(Deque *deque, int16_t *data); +void pushBack(Deque *deque, int16_t *data); +int16_t *popFront(Deque *deque); +int isEmpty(Deque *deque); + +Deque *createDeque() { + Deque *deque = (Deque *) malloc(sizeof(Deque)); + deque->front = NULL; + deque->rear = NULL; + return deque; +} + +void destroyDeque(Deque *deque) { + if (deque == NULL) { + return; + } + + Node *current = deque->front; + while (current) { + Node *temp = current; + current = current->next; + free(temp); + } + free(deque); +} + +int isEmpty(Deque *deque) { + if (deque == NULL) { + return 1; + } + + return deque->front == NULL; +} + +void pushFront(Deque *deque, int16_t *data) { + if (deque == NULL) { + return; + } + + Node *newNode = createNode(data); + if (isEmpty(deque)) { + deque->front = deque->rear = newNode; + } else { + newNode->next = deque->front; + deque->front->prev = newNode; + deque->front = newNode; + } +} + +void pushBack(Deque *deque, int16_t *data) { + if (deque == NULL) { + return; + } + + Node *newNode = createNode(data); + if (isEmpty(deque)) { + deque->front = deque->rear = newNode; + } else { + newNode->prev = deque->rear; + deque->rear->next = newNode; + deque->rear = newNode; + } +} + +int16_t *popFront(Deque *deque) { + if (deque == NULL) { + return NULL; + } + + if (isEmpty(deque)) { + fprintf(stderr, "Deque is empty\n"); + exit(EXIT_FAILURE); + } + Node *temp = deque->front; + int16_t *data = temp->data; + deque->front = deque->front->next; + if (deque->front) { + deque->front->prev = NULL; + } else { + deque->rear = NULL; + } + free(temp); + return data; +} + int32_t picovoice_main(int32_t argc, char **argv) { const char *library_path = NULL; const char *model_path = NULL; const char *access_key = NULL; const char *text = NULL; const char *output_path = NULL; + int32_t device_index = -1; int32_t c; - while ((c = getopt_long(argc, argv, "l:m:a:t:o:", long_options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "l:m:a:t:o:i:s", long_options, NULL)) != -1) { switch (c) { case 'l': library_path = optarg; @@ -223,6 +349,16 @@ int32_t picovoice_main(int32_t argc, char **argv) { case 'o': output_path = optarg; break; + case 'i': + device_index = (int32_t) strtol(optarg, NULL, 10); + if (device_index < -1) { + fprintf(stderr, "device index should be either `-1` (default) or a non-negative valid index\n"); + exit(1); + } + break; + case 's': + show_audio_devices(); + exit(0); default: exit(EXIT_FAILURE); } @@ -374,6 +510,19 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } + pv_speaker_t *speaker = NULL; + pv_speaker_status_t speaker_status = pv_speaker_init(sample_rate, 16, 20, device_index, &speaker); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to initialize audio device with `%s`.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + + speaker_status = pv_speaker_start(speaker); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to start device with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + drwav_data_format format; format.container = drwav_container_riff; format.format = DR_WAVE_FORMAT_PCM; @@ -441,6 +590,8 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } + Deque *deque = createDeque(); + char character[MAX_NUM_BYTES_PER_CHARACTER] = {0}; for (int32_t i = 0; i < (int32_t) strlen(text); i++) { if (num_chunks > (MAX_NUM_CHUNKS - 1)) { @@ -487,6 +638,18 @@ int32_t picovoice_main(int32_t argc, char **argv) { num_samples_chunks[num_chunks] = num_samples_chunk; end_chunks[num_chunks++] = timestamp; start_chunks[num_chunks] = timestamp; + + pushBack(deque, pcm_chunk); + int32_t written_length = 0; + int8_t *pcm_ptr = (int8_t *) popFront(deque); + speaker_status = pv_speaker_write(speaker, pcm_ptr, num_samples_chunk, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + if (written_length < num_samples_chunk) { + pushFront(deque, &pcm_chunk[written_length * 16 / 8]); + } } } @@ -504,7 +667,25 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } + while (!isEmpty(deque)) { + int8_t *pcm_ptr = (int8_t *) popFront(deque); + int32_t written_length = 0; + speaker_status = pv_speaker_write(speaker, pcm_ptr, num_samples_chunk, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + } + if (num_samples_chunk > 0) { + int32_t written_length = 0; + int8_t *pcm_ptr = (int8_t *) pcm_chunk; + speaker_status = pv_speaker_flush(speaker, pcm_ptr, num_samples_chunk, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to flush pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + if (pcm_chunk_prev == NULL) { pcm_chunk_init(num_samples_chunk, pcm_chunk, &pcm_chunk_prev); pcm_chunk_head = pcm_chunk_prev; @@ -522,6 +703,13 @@ int32_t picovoice_main(int32_t argc, char **argv) { pv_orca_synthesize_params_delete_func(synthesize_params); pv_orca_delete_func(orca); + destroyDeque(deque); + speaker_status = pv_speaker_stop(speaker); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to stop device with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + int32_t num_samples = 0; pcm_chunk_t *pcm_chunk_iter = pcm_chunk_head; while (pcm_chunk_iter != NULL) { @@ -556,7 +744,8 @@ int32_t picovoice_main(int32_t argc, char **argv) { fprintf( stdout, "\nGenerated %d audio chunk%s in %.2f seconds.\n", - num_chunks, num_chunks == 1 ? "" : "s", + num_chunks, + num_chunks == 1 ? "" : "s", end_chunks[num_chunks - 1] - start_chunks[0]); for (int32_t i = 0; i < num_chunks; i++) { diff --git a/demo/c/pvspeaker b/demo/c/pvspeaker new file mode 160000 index 00000000..ca766027 --- /dev/null +++ b/demo/c/pvspeaker @@ -0,0 +1 @@ +Subproject commit ca7660270b00e7798ca11f69c9dc8dc470c7a299 From 88ea946897d1fb81b88e761fb6c590e4cda3fb29 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 29 Jul 2024 16:26:57 -0700 Subject: [PATCH 05/27] pv_speaker_write to pthread --- demo/c/orca_demo_streaming.c | 214 +++++++++++++++++++++-------------- 1 file changed, 131 insertions(+), 83 deletions(-) diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index 44d4f496..added41f 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -10,12 +10,13 @@ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include +#include #include #include #include #include +#include #if !(defined(_WIN32) || defined(_WIN64)) @@ -217,112 +218,164 @@ static void show_audio_devices(void) { pv_speaker_free_available_devices(device_list_length, device_list); } +typedef struct { + int16_t *pcm; + int32_t num_samples; +} NodeData; + typedef struct Node { - int16_t *data; + int16_t *pcm; + int32_t num_samples; struct Node *prev; struct Node *next; } Node; -Node *createNode(int16_t *data) { - Node *newNode = (Node *) malloc(sizeof(Node)); - newNode->data = data; - newNode->prev = NULL; - newNode->next = NULL; - return newNode; -} - typedef struct Deque { Node *front; Node *rear; + size_t size; } Deque; Deque *createDeque(); -void destroyDeque(Deque *deque); -void pushFront(Deque *deque, int16_t *data); -void pushBack(Deque *deque, int16_t *data); -int16_t *popFront(Deque *deque); +Node *createNode(int16_t *pcm, int32_t num_samples); +void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples); +void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples); +NodeData popFront(Deque *deque); +void popRear(Deque *deque); int isEmpty(Deque *deque); +void freeDeque(Deque *deque); Deque *createDeque() { Deque *deque = (Deque *) malloc(sizeof(Deque)); - deque->front = NULL; - deque->rear = NULL; - return deque; -} - -void destroyDeque(Deque *deque) { if (deque == NULL) { - return; - } - - Node *current = deque->front; - while (current) { - Node *temp = current; - current = current->next; - free(temp); + perror("Failed to create deque"); + exit(EXIT_FAILURE); } - free(deque); + deque->front = deque->rear = NULL; + deque->size = 0; + return deque; } -int isEmpty(Deque *deque) { - if (deque == NULL) { - return 1; +Node *createNode(int16_t *pcm, int32_t num_samples) { + Node *node = (Node *) malloc(sizeof(Node)); + if (node == NULL) { + perror("Failed to create node"); + exit(EXIT_FAILURE); } - - return deque->front == NULL; + node->pcm = pcm; + node->num_samples = num_samples; + node->prev = node->next = NULL; + return node; } -void pushFront(Deque *deque, int16_t *data) { - if (deque == NULL) { - return; - } - - Node *newNode = createNode(data); +void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples) { + Node *node = createNode(pcm, num_samples); if (isEmpty(deque)) { - deque->front = deque->rear = newNode; + deque->front = deque->rear = node; } else { - newNode->next = deque->front; - deque->front->prev = newNode; - deque->front = newNode; + node->next = deque->front; + deque->front->prev = node; + deque->front = node; } + deque->size++; } -void pushBack(Deque *deque, int16_t *data) { - if (deque == NULL) { - return; - } - - Node *newNode = createNode(data); +void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples) { + Node *node = createNode(pcm, num_samples); if (isEmpty(deque)) { - deque->front = deque->rear = newNode; + deque->front = deque->rear = node; } else { - newNode->prev = deque->rear; - deque->rear->next = newNode; - deque->rear = newNode; + node->prev = deque->rear; + deque->rear->next = node; + deque->rear = node; } + deque->size++; } -int16_t *popFront(Deque *deque) { - if (deque == NULL) { - return NULL; - } - +NodeData popFront(Deque *deque) { + NodeData data = {NULL, 0}; if (isEmpty(deque)) { - fprintf(stderr, "Deque is empty\n"); - exit(EXIT_FAILURE); + printf("Deque is empty\n"); + return data; } Node *temp = deque->front; - int16_t *data = temp->data; + data.pcm = temp->pcm; + data.num_samples = temp->num_samples; deque->front = deque->front->next; - if (deque->front) { + if (deque->front != NULL) { deque->front->prev = NULL; } else { deque->rear = NULL; } free(temp); + deque->size--; return data; } +void popRear(Deque *deque) { + if (isEmpty(deque)) { + printf("Deque is empty\n"); + return; + } + Node *temp = deque->rear; + deque->rear = deque->rear->prev; + if (deque->rear != NULL) { + deque->rear->next = NULL; + } else { + deque->front = NULL; + } + free(temp); + deque->size--; +} + +int isEmpty(Deque *deque) { + return deque->size == 0; +} + +void freeDeque(Deque *deque) { + while (!isEmpty(deque)) { + popFront(deque); + } + free(deque); +} + +typedef struct { + pv_speaker_t *speaker; + Deque *deque; +} ThreadData; + +// Thread function +void *threadFunction(void *arg) { + // Cast the argument to ThreadData* + ThreadData *data = (ThreadData *) arg; + + // Access the struct members + Deque *deque = data->deque; + pv_speaker_t *speaker = data->speaker; + + while (true) { + if (!isEmpty(deque)) { + NodeData node_data = popFront(deque); + if (node_data.num_samples == 0) { + break; + } + int32_t written_length = 0; + pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + if (written_length < node_data.num_samples) { + pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); + } + } else { + sleep(1); + } + } + + return NULL; +} + int32_t picovoice_main(int32_t argc, char **argv) { const char *library_path = NULL; const char *model_path = NULL; @@ -592,6 +645,14 @@ int32_t picovoice_main(int32_t argc, char **argv) { Deque *deque = createDeque(); + pthread_t thread; + ThreadData data = {speaker, deque}; + + if (pthread_create(&thread, NULL, threadFunction, &data)) { + fprintf(stderr, "Error creating thread\n"); + return 1; + } + char character[MAX_NUM_BYTES_PER_CHARACTER] = {0}; for (int32_t i = 0; i < (int32_t) strlen(text); i++) { if (num_chunks > (MAX_NUM_CHUNKS - 1)) { @@ -639,17 +700,7 @@ int32_t picovoice_main(int32_t argc, char **argv) { end_chunks[num_chunks++] = timestamp; start_chunks[num_chunks] = timestamp; - pushBack(deque, pcm_chunk); - int32_t written_length = 0; - int8_t *pcm_ptr = (int8_t *) popFront(deque); - speaker_status = pv_speaker_write(speaker, pcm_ptr, num_samples_chunk, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - if (written_length < num_samples_chunk) { - pushFront(deque, &pcm_chunk[written_length * 16 / 8]); - } + pushRear(deque, pcm_chunk, num_samples_chunk); } } @@ -667,15 +718,13 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } - while (!isEmpty(deque)) { - int8_t *pcm_ptr = (int8_t *) popFront(deque); - int32_t written_length = 0; - speaker_status = pv_speaker_write(speaker, pcm_ptr, num_samples_chunk, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } + pushRear(deque, NULL, 0); + + if (pthread_join(thread, NULL)) { + fprintf(stderr, "Error joining thread\n"); + return 2; } + freeDeque(deque); if (num_samples_chunk > 0) { int32_t written_length = 0; @@ -703,7 +752,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { pv_orca_synthesize_params_delete_func(synthesize_params); pv_orca_delete_func(orca); - destroyDeque(deque); speaker_status = pv_speaker_stop(speaker); if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { fprintf(stderr, "Failed to stop device with %s.\n", pv_speaker_status_to_string(speaker_status)); From a7c93601d75c428075dba70961d985da48871742 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 29 Jul 2024 16:39:05 -0700 Subject: [PATCH 06/27] sleep 100ms --- demo/c/orca_demo_streaming.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index added41f..aacaa9ec 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -369,7 +369,7 @@ void *threadFunction(void *arg) { pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); } } else { - sleep(1); + usleep(100 * 1000); } } From a4bccbb7703f98c3ced331c50233701085318260 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 29 Jul 2024 17:05:20 -0700 Subject: [PATCH 07/27] rm while loop --- demo/c/orca_demo_streaming.c | 62 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index aacaa9ec..36564f1e 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -16,7 +16,6 @@ the License. #include #include #include -#include #if !(defined(_WIN32) || defined(_WIN64)) @@ -234,6 +233,7 @@ typedef struct Deque { Node *front; Node *rear; size_t size; + pthread_mutex_t mutex; // Mutex to protect deque } Deque; Deque *createDeque(); @@ -253,6 +253,7 @@ Deque *createDeque() { } deque->front = deque->rear = NULL; deque->size = 0; + pthread_mutex_init(&deque->mutex, NULL); return deque; } @@ -269,6 +270,7 @@ Node *createNode(int16_t *pcm, int32_t num_samples) { } void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples) { + pthread_mutex_lock(&deque->mutex); // Lock mutex Node *node = createNode(pcm, num_samples); if (isEmpty(deque)) { deque->front = deque->rear = node; @@ -278,9 +280,11 @@ void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples) { deque->front = node; } deque->size++; + pthread_mutex_unlock(&deque->mutex); // Unlock mutex } void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples) { + pthread_mutex_lock(&deque->mutex); // Lock mutex Node *node = createNode(pcm, num_samples); if (isEmpty(deque)) { deque->front = deque->rear = node; @@ -290,9 +294,11 @@ void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples) { deque->rear = node; } deque->size++; + pthread_mutex_unlock(&deque->mutex); // Unlock mutex } NodeData popFront(Deque *deque) { + pthread_mutex_lock(&deque->mutex); // Lock mutex NodeData data = {NULL, 0}; if (isEmpty(deque)) { printf("Deque is empty\n"); @@ -309,6 +315,7 @@ NodeData popFront(Deque *deque) { } free(temp); deque->size--; + pthread_mutex_unlock(&deque->mutex); // Unlock mutex return data; } @@ -333,10 +340,12 @@ int isEmpty(Deque *deque) { } void freeDeque(Deque *deque) { + pthread_mutex_lock(&deque->mutex); // Lock mutex while (!isEmpty(deque)) { popFront(deque); } free(deque); + pthread_mutex_unlock(&deque->mutex); // Unlock mutex } typedef struct { @@ -353,24 +362,15 @@ void *threadFunction(void *arg) { Deque *deque = data->deque; pv_speaker_t *speaker = data->speaker; - while (true) { - if (!isEmpty(deque)) { - NodeData node_data = popFront(deque); - if (node_data.num_samples == 0) { - break; - } - int32_t written_length = 0; - pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - if (written_length < node_data.num_samples) { - pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); - } - } else { - usleep(100 * 1000); - } + NodeData node_data = popFront(deque); + int32_t written_length = 0; + pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + if (written_length < node_data.num_samples) { + pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); } return NULL; @@ -648,11 +648,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { pthread_t thread; ThreadData data = {speaker, deque}; - if (pthread_create(&thread, NULL, threadFunction, &data)) { - fprintf(stderr, "Error creating thread\n"); - return 1; - } - char character[MAX_NUM_BYTES_PER_CHARACTER] = {0}; for (int32_t i = 0; i < (int32_t) strlen(text); i++) { if (num_chunks > (MAX_NUM_CHUNKS - 1)) { @@ -701,6 +696,10 @@ int32_t picovoice_main(int32_t argc, char **argv) { start_chunks[num_chunks] = timestamp; pushRear(deque, pcm_chunk, num_samples_chunk); + if (pthread_create(&thread, NULL, threadFunction, &data)) { + fprintf(stderr, "Error creating thread\n"); + return 1; + } } } @@ -718,12 +717,23 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } - pushRear(deque, NULL, 0); - if (pthread_join(thread, NULL)) { fprintf(stderr, "Error joining thread\n"); return 2; } + + while (!isEmpty(deque)) { + NodeData node_data = popFront(deque); + int32_t written_length = 0; + pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); + if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { + fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); + exit(1); + } + if (written_length < node_data.num_samples) { + pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); + } + } freeDeque(deque); if (num_samples_chunk > 0) { From 3de4e1ce5727247115e294778461c9b00a7eed59 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 29 Jul 2024 17:46:41 -0700 Subject: [PATCH 08/27] add wait chunks config --- demo/c/orca_demo_streaming.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index 36564f1e..89a3c3b3 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -383,9 +383,10 @@ int32_t picovoice_main(int32_t argc, char **argv) { const char *text = NULL; const char *output_path = NULL; int32_t device_index = -1; + int32_t audio_wait_chunks = 0; int32_t c; - while ((c = getopt_long(argc, argv, "l:m:a:t:o:i:s", long_options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "l:m:a:t:o:w:i:s", long_options, NULL)) != -1) { switch (c) { case 'l': library_path = optarg; @@ -402,6 +403,9 @@ int32_t picovoice_main(int32_t argc, char **argv) { case 'o': output_path = optarg; break; + case 'w': + audio_wait_chunks = (int32_t) strtol(optarg, NULL, 10); + break; case 'i': device_index = (int32_t) strtol(optarg, NULL, 10); if (device_index < -1) { @@ -649,6 +653,7 @@ int32_t picovoice_main(int32_t argc, char **argv) { ThreadData data = {speaker, deque}; char character[MAX_NUM_BYTES_PER_CHARACTER] = {0}; + int32_t num_pcm = 0; for (int32_t i = 0; i < (int32_t) strlen(text); i++) { if (num_chunks > (MAX_NUM_CHUNKS - 1)) { fprintf(stderr, "Trying to synthesize too many chunks. Only `%d` chunks are supported.\n", MAX_NUM_CHUNKS); @@ -695,10 +700,13 @@ int32_t picovoice_main(int32_t argc, char **argv) { end_chunks[num_chunks++] = timestamp; start_chunks[num_chunks] = timestamp; + num_pcm++; pushRear(deque, pcm_chunk, num_samples_chunk); - if (pthread_create(&thread, NULL, threadFunction, &data)) { - fprintf(stderr, "Error creating thread\n"); - return 1; + if (num_pcm >= audio_wait_chunks) { + if (pthread_create(&thread, NULL, threadFunction, &data)) { + fprintf(stderr, "Error creating thread\n"); + return 1; + } } } } From 04a4e5cd56dc72c958c73cbdefd490b91797d37f Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 11:56:37 -0700 Subject: [PATCH 09/27] all in main thread --- demo/python/orca_demo_streaming.py | 200 ++++++++--------------------- 1 file changed, 52 insertions(+), 148 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index dbcac852..fc6dd04b 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -14,14 +14,12 @@ import re import subprocess import threading -import multiprocessing import time from dataclasses import dataclass from os import remove from queue import Queue from collections import deque from itertools import chain -from time import sleep from typing import ( Callable, Optional, @@ -66,105 +64,13 @@ def linux_machine() -> str: raise NotImplementedError("Unsupported CPU: `%s`." % cpu_part) -class OrcaThread: - @dataclass - class OrcaInput: - text: str - flush: bool - - def __init__( - self, - write_audio_callback: Callable[[Sequence[int]], int], - access_key: str, - num_tokens_per_second: int, - model_path: Optional[str] = None, - library_path: Optional[str] = None, - audio_wait_chunks: Optional[int] = None, - ) -> None: - - self._orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) - self._orca_stream = self._orca.stream_open() - self._sample_rate = self._orca.sample_rate - - self.write_audio_callback = write_audio_callback - self._num_tokens_per_second = num_tokens_per_second - assert self._num_tokens_per_second > 0 - - self._queue: Queue[Optional[OrcaThread.OrcaInput]] = Queue() - self._thread = None - - self._time_first_audio_available = -1 - self._pcm_buffer = deque() - - self._wait_chunks = audio_wait_chunks or self._get_first_audio_wait_chunks() - self._num_pcm_chunks_processed = 0 - - @staticmethod - def _get_first_audio_wait_chunks() -> int: - wait_chunks = 0 - if platform.system() == "Linux": - machine = linux_machine() - if "cortex" in machine: - wait_chunks = 1 - return wait_chunks - - def _run(self) -> None: - while True: - orca_input = self._queue.get() - if orca_input is None: - break - - try: - if not orca_input.flush: - pcm = self._orca_stream.synthesize(orca_input.text) - else: - pcm = self._orca_stream.flush() - except OrcaInvalidArgumentError as e: - raise ValueError(f"Orca could not synthesize text input `{orca_input.text}`: `{e}`") - - if pcm is not None: - self._pcm_buffer.append(pcm) - pcm_to_play = self._pcm_buffer.popleft() - written = self.write_audio_callback(pcm_to_play) - if written < len(pcm_to_play): - self._pcm_buffer.appendleft(pcm_to_play[written:]) - - if self._num_pcm_chunks_processed == 0: - self._time_first_audio_available = time.time() - - self._num_pcm_chunks_processed += 1 - - def _close_thread_blocking(self): - self._queue.put_nowait(None) - self._thread.join() - - def start(self) -> None: - self._thread = threading.Thread(target=self._run) - self._thread.start() - - def synthesize(self, text: str) -> None: - self._queue.put_nowait(self.OrcaInput(text=text, flush=False)) - - def flush(self) -> deque: - self._queue.put_nowait(self.OrcaInput(text="", flush=True)) - self._close_thread_blocking() - return self._pcm_buffer - - def delete(self) -> None: - self._close_thread_blocking() - self._orca_stream.close() - self._orca.delete() - - def get_time_first_audio_available(self) -> float: - return self._time_first_audio_available - - @property - def sample_rate(self) -> int: - return self._sample_rate - - @property - def version(self) -> str: - return self._orca.version +def get_first_audio_wait_chunks() -> int: + wait_chunks = 0 + if platform.system() == "Linux": + machine = linux_machine() + if "cortex" in machine: + wait_chunks = 1 + return wait_chunks def tokenize_text(text: str) -> Sequence[str]: @@ -196,29 +102,6 @@ def tokenize_text(text: str) -> Sequence[str]: return tokens_with_custom_pronunciations -def worker_function(queue, sample_rate, audio_wait_chunks): - speaker = PvSpeaker(sample_rate=sample_rate, bits_per_sample=16, buffer_size_secs=audio_wait_chunks) - speaker.start() - - pcm_buf = deque() - - while True: - if len(pcm_buf) > 0: - buf_pcm = pcm_buf.popleft() - else: - buf_pcm = queue.get() - if buf_pcm is None: - break - - worker_written = speaker.write(buf_pcm) - if worker_written < len(buf_pcm): - pcm_buf.appendleft(buf_pcm[worker_written:]) - - speaker.flush() - speaker.stop() - speaker.delete() - - def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( @@ -247,8 +130,13 @@ def main() -> None: parser.add_argument( "--audio_wait_chunks", type=int, - default=1, + default=0, help="Number of PCM chunks to wait before starting to play audio. Default: system-dependent.") + parser.add_argument( + "--buffer_size_secs", + type=int, + default=20, + help="Size of internal buffer for pvspeaker") parser.add_argument( "--show_audio_devices", action="store_true", @@ -267,18 +155,21 @@ def main() -> None: library_path = args.library_path text = args.text_to_stream tokens_per_second = args.tokens_per_second - audio_wait_chunks = args.audio_wait_chunks + audio_wait_chunks = max(args.audio_wait_chunks, get_first_audio_wait_chunks()) + buffer_size_secs = args.buffer_size_secs audio_device_index = args.audio_device_index orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) - # TODO: Make audio_wait_chunks a proper param + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, + device_index=audio_device_index) stream = orca.stream_open() - queue = multiprocessing.Queue() - process = multiprocessing.Process(target=worker_function, args=(queue, orca.sample_rate, audio_wait_chunks)) - process.start() + if speaker is not None: + speaker.start() + + pcm_buf = deque() try: print(f"Orca version: {orca.version}\n") @@ -286,35 +177,48 @@ def main() -> None: print(f"Simulated text stream:") tokens = tokenize_text(text=text) - # time_start_text_stream = time.time() + time_start_text_stream = time.time() + time_first_audio_available = None + is_start_playing = False for token in tokens: print(f"{token}", end="", flush=True) pcm = stream.synthesize(text=token) + time_start_pvspeaker_write = time.time() if pcm is not None: - queue.put(pcm) - - time.sleep(1 / tokens_per_second) - - # text_stream_duration_seconds = time.time() - time_start_text_stream + if time_first_audio_available is None: + time_first_audio_available = time.time() + pcm_buf.append(pcm) + if len(pcm_buf) >= audio_wait_chunks: + is_start_playing = True + if is_start_playing and len(pcm_buf) != 0: + pcm = pcm_buf.popleft() + written = speaker.write(pcm) + if written < len(pcm): + print(f" [appendleft] ") + pcm_buf.appendleft(pcm[written:]) + pvspeaker_write_duration_seconds = time.time() - time_start_pvspeaker_write + print(f" [{pvspeaker_write_duration_seconds:.8f}] ") + + time.sleep(min(0, (1 / tokens_per_second) - pvspeaker_write_duration_seconds)) + + text_stream_duration_seconds = time.time() - time_start_text_stream remaining_pcm = stream.flush() - if remaining_pcm is not None: - queue.put(remaining_pcm) - - queue.put(None) - process.join() - - # first_audio_available_seconds = orca.get_time_first_audio_available() - time_start_text_stream - # print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") - # print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + if time_first_audio_available is None: + time_first_audio_available = time.time() + pcm_buf.append(remaining_pcm) - # if speaker is not None: - print("\nWaiting for audio to finish ...") + first_audio_available_seconds = time_first_audio_available - time_start_text_stream + print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") + print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + if speaker is not None: + print("\nWaiting for audio to finish ...") + speaker.flush(list(chain.from_iterable(pcm_buf))) except KeyboardInterrupt: - # speaker.stop() + speaker.stop() print("\nStopped...") except OrcaActivationLimitError: print("AccessKey has reached its processing limit") From 88af0a437f27b85e0c06001d0eccd0406aff0bf5 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 12:46:11 -0700 Subject: [PATCH 10/27] cleanup --- demo/python/orca_demo_streaming.py | 58 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index fc6dd04b..03da4343 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -13,22 +13,16 @@ import platform import re import subprocess -import threading import time -from dataclasses import dataclass -from os import remove -from queue import Queue from collections import deque from itertools import chain from typing import ( - Callable, - Optional, Sequence, ) import pvorca import tiktoken -from pvorca import OrcaActivationLimitError, OrcaInvalidArgumentError +from pvorca import OrcaActivationLimitError from pvspeaker import PvSpeaker CUSTOM_PRON_PATTERN = r"\{(.*?\|.*?)\}" @@ -136,12 +130,16 @@ def main() -> None: "--buffer_size_secs", type=int, default=20, - help="Size of internal buffer for pvspeaker") + help="The size in seconds of the internal buffer used by pvspeaker to play audio.") parser.add_argument( "--show_audio_devices", action="store_true", help="Only list available audio output devices and exit") - parser.add_argument('--audio-device-index', type=int, default=-1, help='Index of input audio device') + parser.add_argument( + '--audio-device-index', + type=int, + default=-1, + help='Index of input audio device') args = parser.parse_args() if args.show_audio_devices: @@ -160,16 +158,17 @@ def main() -> None: audio_device_index = args.audio_device_index orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) - - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, - device_index=audio_device_index) - stream = orca.stream_open() - if speaker is not None: - speaker.start() - + speaker = None pcm_buf = deque() + try: + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, + device_index=audio_device_index) + speaker.start() + except ValueError: + print( + "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") try: print(f"Orca version: {orca.version}\n") @@ -177,30 +176,29 @@ def main() -> None: print(f"Simulated text stream:") tokens = tokenize_text(text=text) - time_start_text_stream = time.time() - time_first_audio_available = None is_start_playing = False + time_first_audio_available = None + time_start_text_stream = time.time() + for token in tokens: print(f"{token}", end="", flush=True) - pcm = stream.synthesize(text=token) - time_start_pvspeaker_write = time.time() + if pcm is not None: if time_first_audio_available is None: time_first_audio_available = time.time() - pcm_buf.append(pcm) - if len(pcm_buf) >= audio_wait_chunks: - is_start_playing = True - if is_start_playing and len(pcm_buf) != 0: + if speaker is not None: + pcm_buf.append(pcm) + if len(pcm_buf) > audio_wait_chunks: + is_start_playing = True + + if is_start_playing and len(pcm_buf) > 0: pcm = pcm_buf.popleft() written = speaker.write(pcm) if written < len(pcm): - print(f" [appendleft] ") pcm_buf.appendleft(pcm[written:]) - pvspeaker_write_duration_seconds = time.time() - time_start_pvspeaker_write - print(f" [{pvspeaker_write_duration_seconds:.8f}] ") - time.sleep(min(0, (1 / tokens_per_second) - pvspeaker_write_duration_seconds)) + time.sleep(1 / tokens_per_second) text_stream_duration_seconds = time.time() - time_start_text_stream @@ -211,7 +209,7 @@ def main() -> None: first_audio_available_seconds = time_first_audio_available - time_start_text_stream print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") - print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started") if speaker is not None: print("\nWaiting for audio to finish ...") @@ -221,7 +219,7 @@ def main() -> None: speaker.stop() print("\nStopped...") except OrcaActivationLimitError: - print("AccessKey has reached its processing limit") + print("\nAccessKey has reached its processing limit") finally: orca.delete() From a7378380ec92eb043baca3f4da8d36ccbc39f4f9 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 12:47:56 -0700 Subject: [PATCH 11/27] wip rm speaker --- demo/python/orca_demo_streaming.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 03da4343..ef054eb7 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -162,13 +162,13 @@ def main() -> None: speaker = None pcm_buf = deque() - try: - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, - device_index=audio_device_index) - speaker.start() - except ValueError: - print( - "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") + # try: + # speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, + # device_index=audio_device_index) + # speaker.start() + # except ValueError: + # print( + # "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") try: print(f"Orca version: {orca.version}\n") From fe53aeee3c2d855916953fb2ab9699eea1f71b85 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 12:54:16 -0700 Subject: [PATCH 12/27] wip - rm write --- demo/python/orca_demo_streaming.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index ef054eb7..af2f4536 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -184,19 +184,19 @@ def main() -> None: print(f"{token}", end="", flush=True) pcm = stream.synthesize(text=token) - if pcm is not None: - if time_first_audio_available is None: - time_first_audio_available = time.time() - if speaker is not None: - pcm_buf.append(pcm) - if len(pcm_buf) > audio_wait_chunks: - is_start_playing = True - - if is_start_playing and len(pcm_buf) > 0: - pcm = pcm_buf.popleft() - written = speaker.write(pcm) - if written < len(pcm): - pcm_buf.appendleft(pcm[written:]) + # if pcm is not None: + # if time_first_audio_available is None: + # time_first_audio_available = time.time() + # if speaker is not None: + # pcm_buf.append(pcm) + # if len(pcm_buf) > audio_wait_chunks: + # is_start_playing = True + # + # if is_start_playing and len(pcm_buf) > 0: + # pcm = pcm_buf.popleft() + # written = speaker.write(pcm) + # if written < len(pcm): + # pcm_buf.appendleft(pcm[written:]) time.sleep(1 / tokens_per_second) From f7520bf4a7cc99f34530b52fba6e1b0a39ca8427 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 14:21:01 -0700 Subject: [PATCH 13/27] orca thread --- demo/python/orca_demo_streaming.py | 210 ++++++++++++++++++++++------- 1 file changed, 158 insertions(+), 52 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index af2f4536..706ab409 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -13,16 +13,20 @@ import platform import re import subprocess +import threading import time +from dataclasses import dataclass +from queue import Queue from collections import deque -from itertools import chain from typing import ( - Sequence, + Callable, + Optional, + Sequence, Any, ) import pvorca import tiktoken -from pvorca import OrcaActivationLimitError +from pvorca import OrcaActivationLimitError, OrcaInvalidArgumentError from pvspeaker import PvSpeaker CUSTOM_PRON_PATTERN = r"\{(.*?\|.*?)\}" @@ -58,13 +62,109 @@ def linux_machine() -> str: raise NotImplementedError("Unsupported CPU: `%s`." % cpu_part) -def get_first_audio_wait_chunks() -> int: - wait_chunks = 0 - if platform.system() == "Linux": - machine = linux_machine() - if "cortex" in machine: - wait_chunks = 1 - return wait_chunks +class OrcaThread: + @dataclass + class OrcaInput: + text: str + flush: bool + + def __init__( + self, + orca: Any, + flush_audio_callback: Callable, + play_audio_callback: Callable[[Sequence[int]], int], + num_tokens_per_second: int, + audio_wait_chunks: Optional[int] = None, + ) -> None: + + self._orca = orca + self._orca_stream = self._orca.stream_open() + + self._play_audio_callback = play_audio_callback + self._flush_audio_callback = flush_audio_callback + self._num_tokens_per_second = num_tokens_per_second + assert self._num_tokens_per_second > 0 + + self._queue: Queue[Optional[OrcaThread.OrcaInput]] = Queue() + self._thread = None + + self._time_first_audio_available = -1 + self._pcm_buffer = deque() + + self._wait_chunks = audio_wait_chunks or self._get_first_audio_wait_chunks() + self._num_pcm_chunks_processed = 0 + + @staticmethod + def _get_first_audio_wait_chunks() -> int: + wait_chunks = 0 + if platform.system() == "Linux": + machine = linux_machine() + if "cortex" in machine: + wait_chunks = 1 + return wait_chunks + + def _run(self) -> None: + while True: + orca_input = self._queue.get() + if orca_input is None: + while len(self._pcm_buffer) > 0: + pcm_chunk = self._pcm_buffer.popleft() + written = self._play_audio_callback(pcm_chunk) + if written < len(pcm_chunk): + self._pcm_buffer.appendleft(pcm_chunk[written:]) + break + + try: + if not orca_input.flush: + pcm = self._orca_stream.synthesize(orca_input.text) + else: + pcm = self._orca_stream.flush() + except OrcaInvalidArgumentError as e: + raise ValueError(f"Orca could not synthesize text input `{orca_input.text}`: `{e}`") + + if pcm is not None: + self._pcm_buffer.append(pcm) + + if self._num_pcm_chunks_processed == 0: + self._time_first_audio_available = time.time() + self._num_pcm_chunks_processed += 1 + + if self._num_pcm_chunks_processed < self._wait_chunks: + continue + else: + while len(self._pcm_buffer) > 0: + pcm_chunk = self._pcm_buffer.popleft() + written = self._play_audio_callback(pcm_chunk) + if written < len(pcm_chunk): + self._pcm_buffer.appendleft(pcm_chunk[written:]) + + def _close_thread_blocking(self): + self._queue.put_nowait(None) + self._thread.join() + + def start(self) -> None: + self._thread = threading.Thread(target=self._run) + self._thread.start() + + def synthesize(self, text: str) -> None: + self._queue.put_nowait(self.OrcaInput(text=text, flush=False)) + + def flush(self) -> None: + self._queue.put_nowait(self.OrcaInput(text="", flush=True)) + self._close_thread_blocking() + + def flush_audio(self) -> None: + self._thread = threading.Thread(target=self._flush_audio_callback) + self._thread.start() + self._thread.join() + + def delete(self) -> None: + self._close_thread_blocking() + self._orca_stream.close() + self._orca.delete() + + def get_time_first_audio_available(self) -> float: + return self._time_first_audio_available def tokenize_text(text: str) -> Sequence[str]: @@ -124,7 +224,7 @@ def main() -> None: parser.add_argument( "--audio_wait_chunks", type=int, - default=0, + default=None, help="Number of PCM chunks to wait before starting to play audio. Default: system-dependent.") parser.add_argument( "--buffer_size_secs", @@ -153,75 +253,81 @@ def main() -> None: library_path = args.library_path text = args.text_to_stream tokens_per_second = args.tokens_per_second - audio_wait_chunks = max(args.audio_wait_chunks, get_first_audio_wait_chunks()) + audio_wait_chunks = args.audio_wait_chunks buffer_size_secs = args.buffer_size_secs audio_device_index = args.audio_device_index orca = pvorca.create(access_key=access_key, model_path=model_path, library_path=library_path) - stream = orca.stream_open() speaker = None - pcm_buf = deque() - # try: - # speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, - # device_index=audio_device_index) - # speaker.start() - # except ValueError: - # print( - # "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") - + try: + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, + device_index=audio_device_index) + speaker.start() + # TODO: PvSpeaker Error? + except ValueError: + print( + "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") + + def play_audio_callback(pcm: Sequence[int]) -> int: + try: + if speaker is not None: + return speaker.write(pcm) + return 0 + # TODO: PvSpeaker Error? + except ValueError: + pass + return 0 + + def flush_audio_callback() -> None: + try: + if speaker is not None: + speaker.flush() + # TODO: PvSpeaker Error? + except ValueError: + pass + + orca_thread = OrcaThread( + orca=orca, + play_audio_callback=play_audio_callback, + flush_audio_callback=flush_audio_callback, + num_tokens_per_second=tokens_per_second, + audio_wait_chunks=audio_wait_chunks, + ) + + orca_thread.start() try: print(f"Orca version: {orca.version}\n") print(f"Simulated text stream:") tokens = tokenize_text(text=text) - is_start_playing = False - time_first_audio_available = None time_start_text_stream = time.time() - for token in tokens: print(f"{token}", end="", flush=True) - pcm = stream.synthesize(text=token) - - # if pcm is not None: - # if time_first_audio_available is None: - # time_first_audio_available = time.time() - # if speaker is not None: - # pcm_buf.append(pcm) - # if len(pcm_buf) > audio_wait_chunks: - # is_start_playing = True - # - # if is_start_playing and len(pcm_buf) > 0: - # pcm = pcm_buf.popleft() - # written = speaker.write(pcm) - # if written < len(pcm): - # pcm_buf.appendleft(pcm[written:]) + + orca_thread.synthesize(text=token) time.sleep(1 / tokens_per_second) text_stream_duration_seconds = time.time() - time_start_text_stream - remaining_pcm = stream.flush() - if time_first_audio_available is None: - time_first_audio_available = time.time() - pcm_buf.append(remaining_pcm) - - first_audio_available_seconds = time_first_audio_available - time_start_text_stream + orca_thread.flush() + first_audio_available_seconds = orca_thread.get_time_first_audio_available() - time_start_text_stream print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") - print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started") + print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + orca_thread.flush_audio() if speaker is not None: - print("\nWaiting for audio to finish ...") - speaker.flush(list(chain.from_iterable(pcm_buf))) - + speaker.delete() except KeyboardInterrupt: - speaker.stop() print("\nStopped...") + if speaker is not None: + speaker.stop() except OrcaActivationLimitError: print("\nAccessKey has reached its processing limit") finally: - orca.delete() + orca_thread.delete() if __name__ == "__main__": From 6a0f4fa9773a91276b4ad6a3a887d126e207db6e Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 14:25:11 -0700 Subject: [PATCH 14/27] fix audio wait chunks counter --- demo/python/orca_demo_streaming.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 706ab409..b6ad93d1 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -125,10 +125,6 @@ def _run(self) -> None: if pcm is not None: self._pcm_buffer.append(pcm) - if self._num_pcm_chunks_processed == 0: - self._time_first_audio_available = time.time() - self._num_pcm_chunks_processed += 1 - if self._num_pcm_chunks_processed < self._wait_chunks: continue else: @@ -138,6 +134,10 @@ def _run(self) -> None: if written < len(pcm_chunk): self._pcm_buffer.appendleft(pcm_chunk[written:]) + if self._num_pcm_chunks_processed == 0: + self._time_first_audio_available = time.time() + self._num_pcm_chunks_processed += 1 + def _close_thread_blocking(self): self._queue.put_nowait(None) self._thread.join() From 1c7e6090c589426b7469724fe46c9415643a1627 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 14:25:50 -0700 Subject: [PATCH 15/27] fix indent --- demo/python/orca_demo_streaming.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index b6ad93d1..e6750838 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -134,9 +134,9 @@ def _run(self) -> None: if written < len(pcm_chunk): self._pcm_buffer.appendleft(pcm_chunk[written:]) - if self._num_pcm_chunks_processed == 0: - self._time_first_audio_available = time.time() - self._num_pcm_chunks_processed += 1 + if self._num_pcm_chunks_processed == 0: + self._time_first_audio_available = time.time() + self._num_pcm_chunks_processed += 1 def _close_thread_blocking(self): self._queue.put_nowait(None) From c7b90530828bbb4634f84d2a0b265e4ae4b50d68 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 14:28:41 -0700 Subject: [PATCH 16/27] fix indent --- demo/python/orca_demo_streaming.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index e6750838..30ed554a 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -123,6 +123,8 @@ def _run(self) -> None: raise ValueError(f"Orca could not synthesize text input `{orca_input.text}`: `{e}`") if pcm is not None: + if self._num_pcm_chunks_processed == 0: + self._time_first_audio_available = time.time() self._pcm_buffer.append(pcm) if self._num_pcm_chunks_processed < self._wait_chunks: @@ -134,9 +136,7 @@ def _run(self) -> None: if written < len(pcm_chunk): self._pcm_buffer.appendleft(pcm_chunk[written:]) - if self._num_pcm_chunks_processed == 0: - self._time_first_audio_available = time.time() - self._num_pcm_chunks_processed += 1 + self._num_pcm_chunks_processed += 1 def _close_thread_blocking(self): self._queue.put_nowait(None) From 9372785cdbb407b5c36c6ba913317d9ae1462d40 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 30 Jul 2024 14:33:15 -0700 Subject: [PATCH 17/27] fix --- demo/python/orca_demo_streaming.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 30ed554a..014f158b 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -125,19 +125,16 @@ def _run(self) -> None: if pcm is not None: if self._num_pcm_chunks_processed == 0: self._time_first_audio_available = time.time() - self._pcm_buffer.append(pcm) + self._num_pcm_chunks_processed += 1 - if self._num_pcm_chunks_processed < self._wait_chunks: - continue - else: + self._pcm_buffer.append(pcm) + if self._num_pcm_chunks_processed > self._wait_chunks: while len(self._pcm_buffer) > 0: pcm_chunk = self._pcm_buffer.popleft() written = self._play_audio_callback(pcm_chunk) if written < len(pcm_chunk): self._pcm_buffer.appendleft(pcm_chunk[written:]) - self._num_pcm_chunks_processed += 1 - def _close_thread_blocking(self): self._queue.put_nowait(None) self._thread.join() From bd728f9839605194220d2165674a43eb2d2ab09a Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 31 Jul 2024 13:05:07 -0700 Subject: [PATCH 18/27] minor --- demo/python/orca_demo_streaming.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 014f158b..a0a0ef3b 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -103,15 +103,18 @@ def _get_first_audio_wait_chunks() -> int: wait_chunks = 1 return wait_chunks + def _play_buffered_pcm(self): + while len(self._pcm_buffer) > 0: + pcm_chunk = self._pcm_buffer.popleft() + written = self._play_audio_callback(pcm_chunk) + if written < len(pcm_chunk): + self._pcm_buffer.appendleft(pcm_chunk[written:]) + def _run(self) -> None: while True: orca_input = self._queue.get() if orca_input is None: - while len(self._pcm_buffer) > 0: - pcm_chunk = self._pcm_buffer.popleft() - written = self._play_audio_callback(pcm_chunk) - if written < len(pcm_chunk): - self._pcm_buffer.appendleft(pcm_chunk[written:]) + self._play_buffered_pcm() break try: @@ -129,11 +132,7 @@ def _run(self) -> None: self._pcm_buffer.append(pcm) if self._num_pcm_chunks_processed > self._wait_chunks: - while len(self._pcm_buffer) > 0: - pcm_chunk = self._pcm_buffer.popleft() - written = self._play_audio_callback(pcm_chunk) - if written < len(pcm_chunk): - self._pcm_buffer.appendleft(pcm_chunk[written:]) + self._play_buffered_pcm() def _close_thread_blocking(self): self._queue.put_nowait(None) @@ -261,7 +260,6 @@ def main() -> None: speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, device_index=audio_device_index) speaker.start() - # TODO: PvSpeaker Error? except ValueError: print( "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") @@ -271,7 +269,6 @@ def play_audio_callback(pcm: Sequence[int]) -> int: if speaker is not None: return speaker.write(pcm) return 0 - # TODO: PvSpeaker Error? except ValueError: pass return 0 @@ -280,7 +277,6 @@ def flush_audio_callback() -> None: try: if speaker is not None: speaker.flush() - # TODO: PvSpeaker Error? except ValueError: pass From 42b360e68f8a5aa0d5507de1a12d9ad11c89465f Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 31 Jul 2024 13:39:59 -0700 Subject: [PATCH 19/27] cleanup --- demo/python/orca_demo_streaming.py | 33 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index a0a0ef3b..43b86bb2 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -18,10 +18,12 @@ from dataclasses import dataclass from queue import Queue from collections import deque +from itertools import chain from typing import ( Callable, Optional, - Sequence, Any, + Sequence, + Any, ) import pvorca @@ -71,7 +73,7 @@ class OrcaInput: def __init__( self, orca: Any, - flush_audio_callback: Callable, + flush_audio_callback: Callable[[Sequence[int]], None], play_audio_callback: Callable[[Sequence[int]], int], num_tokens_per_second: int, audio_wait_chunks: Optional[int] = None, @@ -103,18 +105,10 @@ def _get_first_audio_wait_chunks() -> int: wait_chunks = 1 return wait_chunks - def _play_buffered_pcm(self): - while len(self._pcm_buffer) > 0: - pcm_chunk = self._pcm_buffer.popleft() - written = self._play_audio_callback(pcm_chunk) - if written < len(pcm_chunk): - self._pcm_buffer.appendleft(pcm_chunk[written:]) - def _run(self) -> None: while True: orca_input = self._queue.get() if orca_input is None: - self._play_buffered_pcm() break try: @@ -132,7 +126,11 @@ def _run(self) -> None: self._pcm_buffer.append(pcm) if self._num_pcm_chunks_processed > self._wait_chunks: - self._play_buffered_pcm() + if len(self._pcm_buffer) > 0: + pcm = self._pcm_buffer.popleft() + written = self._play_audio_callback(pcm) + if written < len(pcm): + self._pcm_buffer.appendleft(pcm[written:]) def _close_thread_blocking(self): self._queue.put_nowait(None) @@ -150,7 +148,8 @@ def flush(self) -> None: self._close_thread_blocking() def flush_audio(self) -> None: - self._thread = threading.Thread(target=self._flush_audio_callback) + remaining_pcm = list(chain.from_iterable(self._pcm_buffer)) + self._thread = threading.Thread(target=self._flush_audio_callback, args=(remaining_pcm,)) self._thread.start() self._thread.join() @@ -268,16 +267,16 @@ def play_audio_callback(pcm: Sequence[int]) -> int: try: if speaker is not None: return speaker.write(pcm) - return 0 + return len(pcm) except ValueError: pass - return 0 + return len(pcm) - def flush_audio_callback() -> None: + def flush_audio_callback(pcm: Sequence[int]) -> None: try: if speaker is not None: - speaker.flush() - except ValueError: + speaker.flush(pcm) + except MemoryError: pass orca_thread = OrcaThread( From f0abd72ef69b8bfaff156e08ad09eb0abe9a9ab5 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 31 Jul 2024 13:43:23 -0700 Subject: [PATCH 20/27] minor --- demo/python/orca_demo_streaming.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 43b86bb2..75cb9c12 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -309,7 +309,10 @@ def flush_audio_callback(pcm: Sequence[int]) -> None: print(f"\n\nTime to finish text stream: {text_stream_duration_seconds:.2f} seconds") print(f"Time to receive first audio: {first_audio_available_seconds:.2f} seconds after text stream started\n") + if speaker is not None: + print("Waiting for audio to finish ...") orca_thread.flush_audio() + if speaker is not None: speaker.delete() except KeyboardInterrupt: From 3e40901fc1775fc5b06475cb2663e152647f0a45 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 31 Jul 2024 14:52:25 -0700 Subject: [PATCH 21/27] fix --- demo/python/orca_demo_streaming.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 75cb9c12..6b377ed8 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -126,11 +126,11 @@ def _run(self) -> None: self._pcm_buffer.append(pcm) if self._num_pcm_chunks_processed > self._wait_chunks: - if len(self._pcm_buffer) > 0: + while len(self._pcm_buffer) > 0: pcm = self._pcm_buffer.popleft() - written = self._play_audio_callback(pcm) - if written < len(pcm): - self._pcm_buffer.appendleft(pcm[written:]) + written = self._play_audio_callback(pcm) + if written < len(pcm): + self._pcm_buffer.appendleft(pcm[written:]) def _close_thread_blocking(self): self._queue.put_nowait(None) From d3f747c86a9fbb5956780353092c9aaf8e63b874 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 31 Jul 2024 14:57:56 -0700 Subject: [PATCH 22/27] fix --- demo/python/orca_demo_streaming.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 6b377ed8..9f108c79 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -125,12 +125,13 @@ def _run(self) -> None: self._num_pcm_chunks_processed += 1 self._pcm_buffer.append(pcm) - if self._num_pcm_chunks_processed > self._wait_chunks: - while len(self._pcm_buffer) > 0: - pcm = self._pcm_buffer.popleft() - written = self._play_audio_callback(pcm) - if written < len(pcm): - self._pcm_buffer.appendleft(pcm[written:]) + + if self._num_pcm_chunks_processed > self._wait_chunks: + if len(self._pcm_buffer) > 0: + pcm = self._pcm_buffer.popleft() + written = self._play_audio_callback(pcm) + if written < len(pcm): + self._pcm_buffer.appendleft(pcm[written:]) def _close_thread_blocking(self): self._queue.put_nowait(None) From 28ef8c765111edfd7b04ee100ec30e6c6b7ec1e1 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 6 Aug 2024 13:28:35 -0700 Subject: [PATCH 23/27] use updated pvspeaker --- .gitmodules | 3 - demo/c/CMakeLists.txt | 11 +- demo/c/README.md | 5 +- demo/c/orca_demo_streaming.c | 263 +---------------------------------- demo/c/pvspeaker | 1 - demo/python/requirements.txt | 2 +- 6 files changed, 9 insertions(+), 276 deletions(-) delete mode 160000 demo/c/pvspeaker diff --git a/.gitmodules b/.gitmodules index c7eb0caa..03951153 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "demo/c/dr_libs"] path = demo/c/dr_libs url = ../../mackron/dr_libs.git -[submodule "demo/c/pvspeaker"] - path = demo/c/pvspeaker - url = ../pvspeaker.git diff --git a/demo/c/CMakeLists.txt b/demo/c/CMakeLists.txt index 19490296..3efdb691 100644 --- a/demo/c/CMakeLists.txt +++ b/demo/c/CMakeLists.txt @@ -3,21 +3,16 @@ project(orca_demo_c) set(CMAKE_C_STANDARD 99) set(CMAKE_BUILD_TYPE Release) -add_subdirectory(pvspeaker/project) set(COMMON_LIBS dl) include_directories("${PROJECT_SOURCE_DIR}/../../include") add_executable(orca_demo orca_demo.c) -add_executable( - orca_demo_streaming - orca_demo_streaming.c) -target_include_directories(orca_demo_streaming PRIVATE dr_libs pvspeaker/project/include) - -target_link_libraries(orca_demo_streaming pv_speaker) +add_executable(orca_demo_streaming orca_demo_streaming.c) +target_include_directories(orca_demo_streaming PRIVATE dr_libs) if (NOT WIN32) target_link_libraries(orca_demo ${COMMON_LIBS}) target_link_libraries(orca_demo_streaming ${COMMON_LIBS}) -endif () +endif() diff --git a/demo/c/README.md b/demo/c/README.md index a953b7fe..3ed50721 100644 --- a/demo/c/README.md +++ b/demo/c/README.md @@ -32,12 +32,9 @@ In the single synthesis mode, the text is synthesized in a single call to the Or Use CMake to build the Orca demo target: ```console -cmake -S demo/c/ -B demo/c/build -DPV_SPEAKER_PLATFORM={PV_SPEAKER_PLATFORM} && cmake --build demo/c/build --target orca_demo_streaming +cmake -S demo/c/ -B demo/c/build && cmake --build demo/c/build --target orca_demo_streaming ``` -The `{PV_SPEAKER_PLATFORM}` variable will set the compilation flags for the given platform. Exclude this variable -to get a list of possible values. - ### Usage Running the executable without any command-line arguments prints the usage info to the console: diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index 89a3c3b3..b7102f25 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -10,8 +10,8 @@ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + #include -#include #include #include #include @@ -36,8 +36,6 @@ the License. #include "pv_orca.h" -#include "pv_speaker.h" - #define MAX_NUM_CHUNKS (500) #define MAX_NUM_BYTES_PER_CHARACTER (5) @@ -200,193 +198,15 @@ void handle_error( pv_free_error_stack_func(message_stack); } -static void show_audio_devices(void) { - char **device_list = NULL; - int32_t device_list_length = 0; - - pv_speaker_status_t status = pv_speaker_get_available_devices(&device_list_length, &device_list); - if (status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "failed to get audio devices with `%s`.\n", pv_speaker_status_to_string(status)); - exit(1); - } - - for (int32_t i = 0; i < device_list_length; i++) { - fprintf(stdout, "[%d] %s\n", i, device_list[i]); - } - - pv_speaker_free_available_devices(device_list_length, device_list); -} - -typedef struct { - int16_t *pcm; - int32_t num_samples; -} NodeData; - -typedef struct Node { - int16_t *pcm; - int32_t num_samples; - struct Node *prev; - struct Node *next; -} Node; - -typedef struct Deque { - Node *front; - Node *rear; - size_t size; - pthread_mutex_t mutex; // Mutex to protect deque -} Deque; - -Deque *createDeque(); -Node *createNode(int16_t *pcm, int32_t num_samples); -void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples); -void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples); -NodeData popFront(Deque *deque); -void popRear(Deque *deque); -int isEmpty(Deque *deque); -void freeDeque(Deque *deque); - -Deque *createDeque() { - Deque *deque = (Deque *) malloc(sizeof(Deque)); - if (deque == NULL) { - perror("Failed to create deque"); - exit(EXIT_FAILURE); - } - deque->front = deque->rear = NULL; - deque->size = 0; - pthread_mutex_init(&deque->mutex, NULL); - return deque; -} - -Node *createNode(int16_t *pcm, int32_t num_samples) { - Node *node = (Node *) malloc(sizeof(Node)); - if (node == NULL) { - perror("Failed to create node"); - exit(EXIT_FAILURE); - } - node->pcm = pcm; - node->num_samples = num_samples; - node->prev = node->next = NULL; - return node; -} - -void pushFront(Deque *deque, int16_t *pcm, int32_t num_samples) { - pthread_mutex_lock(&deque->mutex); // Lock mutex - Node *node = createNode(pcm, num_samples); - if (isEmpty(deque)) { - deque->front = deque->rear = node; - } else { - node->next = deque->front; - deque->front->prev = node; - deque->front = node; - } - deque->size++; - pthread_mutex_unlock(&deque->mutex); // Unlock mutex -} - -void pushRear(Deque *deque, int16_t *pcm, int32_t num_samples) { - pthread_mutex_lock(&deque->mutex); // Lock mutex - Node *node = createNode(pcm, num_samples); - if (isEmpty(deque)) { - deque->front = deque->rear = node; - } else { - node->prev = deque->rear; - deque->rear->next = node; - deque->rear = node; - } - deque->size++; - pthread_mutex_unlock(&deque->mutex); // Unlock mutex -} - -NodeData popFront(Deque *deque) { - pthread_mutex_lock(&deque->mutex); // Lock mutex - NodeData data = {NULL, 0}; - if (isEmpty(deque)) { - printf("Deque is empty\n"); - return data; - } - Node *temp = deque->front; - data.pcm = temp->pcm; - data.num_samples = temp->num_samples; - deque->front = deque->front->next; - if (deque->front != NULL) { - deque->front->prev = NULL; - } else { - deque->rear = NULL; - } - free(temp); - deque->size--; - pthread_mutex_unlock(&deque->mutex); // Unlock mutex - return data; -} - -void popRear(Deque *deque) { - if (isEmpty(deque)) { - printf("Deque is empty\n"); - return; - } - Node *temp = deque->rear; - deque->rear = deque->rear->prev; - if (deque->rear != NULL) { - deque->rear->next = NULL; - } else { - deque->front = NULL; - } - free(temp); - deque->size--; -} - -int isEmpty(Deque *deque) { - return deque->size == 0; -} - -void freeDeque(Deque *deque) { - pthread_mutex_lock(&deque->mutex); // Lock mutex - while (!isEmpty(deque)) { - popFront(deque); - } - free(deque); - pthread_mutex_unlock(&deque->mutex); // Unlock mutex -} - -typedef struct { - pv_speaker_t *speaker; - Deque *deque; -} ThreadData; - -// Thread function -void *threadFunction(void *arg) { - // Cast the argument to ThreadData* - ThreadData *data = (ThreadData *) arg; - - // Access the struct members - Deque *deque = data->deque; - pv_speaker_t *speaker = data->speaker; - - NodeData node_data = popFront(deque); - int32_t written_length = 0; - pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - if (written_length < node_data.num_samples) { - pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); - } - - return NULL; -} - int32_t picovoice_main(int32_t argc, char **argv) { const char *library_path = NULL; const char *model_path = NULL; const char *access_key = NULL; const char *text = NULL; const char *output_path = NULL; - int32_t device_index = -1; - int32_t audio_wait_chunks = 0; int32_t c; - while ((c = getopt_long(argc, argv, "l:m:a:t:o:w:i:s", long_options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "l:m:a:t:o:", long_options, NULL)) != -1) { switch (c) { case 'l': library_path = optarg; @@ -403,19 +223,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { case 'o': output_path = optarg; break; - case 'w': - audio_wait_chunks = (int32_t) strtol(optarg, NULL, 10); - break; - case 'i': - device_index = (int32_t) strtol(optarg, NULL, 10); - if (device_index < -1) { - fprintf(stderr, "device index should be either `-1` (default) or a non-negative valid index\n"); - exit(1); - } - break; - case 's': - show_audio_devices(); - exit(0); default: exit(EXIT_FAILURE); } @@ -567,19 +374,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } - pv_speaker_t *speaker = NULL; - pv_speaker_status_t speaker_status = pv_speaker_init(sample_rate, 16, 20, device_index, &speaker); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to initialize audio device with `%s`.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - - speaker_status = pv_speaker_start(speaker); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to start device with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - drwav_data_format format; format.container = drwav_container_riff; format.format = DR_WAVE_FORMAT_PCM; @@ -647,13 +441,7 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } - Deque *deque = createDeque(); - - pthread_t thread; - ThreadData data = {speaker, deque}; - char character[MAX_NUM_BYTES_PER_CHARACTER] = {0}; - int32_t num_pcm = 0; for (int32_t i = 0; i < (int32_t) strlen(text); i++) { if (num_chunks > (MAX_NUM_CHUNKS - 1)) { fprintf(stderr, "Trying to synthesize too many chunks. Only `%d` chunks are supported.\n", MAX_NUM_CHUNKS); @@ -699,15 +487,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { num_samples_chunks[num_chunks] = num_samples_chunk; end_chunks[num_chunks++] = timestamp; start_chunks[num_chunks] = timestamp; - - num_pcm++; - pushRear(deque, pcm_chunk, num_samples_chunk); - if (num_pcm >= audio_wait_chunks) { - if (pthread_create(&thread, NULL, threadFunction, &data)) { - fprintf(stderr, "Error creating thread\n"); - return 1; - } - } } } @@ -725,34 +504,7 @@ int32_t picovoice_main(int32_t argc, char **argv) { exit(EXIT_FAILURE); } - if (pthread_join(thread, NULL)) { - fprintf(stderr, "Error joining thread\n"); - return 2; - } - - while (!isEmpty(deque)) { - NodeData node_data = popFront(deque); - int32_t written_length = 0; - pv_speaker_status_t speaker_status = pv_speaker_write(speaker, (int8_t *) node_data.pcm, node_data.num_samples, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to write pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - if (written_length < node_data.num_samples) { - pushFront(deque, &node_data.pcm[written_length * 16 / 2], node_data.num_samples - written_length); - } - } - freeDeque(deque); - if (num_samples_chunk > 0) { - int32_t written_length = 0; - int8_t *pcm_ptr = (int8_t *) pcm_chunk; - speaker_status = pv_speaker_flush(speaker, pcm_ptr, num_samples_chunk, &written_length); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to flush pcm with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - if (pcm_chunk_prev == NULL) { pcm_chunk_init(num_samples_chunk, pcm_chunk, &pcm_chunk_prev); pcm_chunk_head = pcm_chunk_prev; @@ -770,12 +522,6 @@ int32_t picovoice_main(int32_t argc, char **argv) { pv_orca_synthesize_params_delete_func(synthesize_params); pv_orca_delete_func(orca); - speaker_status = pv_speaker_stop(speaker); - if (speaker_status != PV_SPEAKER_STATUS_SUCCESS) { - fprintf(stderr, "Failed to stop device with %s.\n", pv_speaker_status_to_string(speaker_status)); - exit(1); - } - int32_t num_samples = 0; pcm_chunk_t *pcm_chunk_iter = pcm_chunk_head; while (pcm_chunk_iter != NULL) { @@ -810,8 +556,7 @@ int32_t picovoice_main(int32_t argc, char **argv) { fprintf( stdout, "\nGenerated %d audio chunk%s in %.2f seconds.\n", - num_chunks, - num_chunks == 1 ? "" : "s", + num_chunks, num_chunks == 1 ? "" : "s", end_chunks[num_chunks - 1] - start_chunks[0]); for (int32_t i = 0; i < num_chunks; i++) { @@ -875,4 +620,4 @@ int32_t main(int argc, char *argv[]) { #endif return result; -} +} \ No newline at end of file diff --git a/demo/c/pvspeaker b/demo/c/pvspeaker deleted file mode 160000 index ca766027..00000000 --- a/demo/c/pvspeaker +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ca7660270b00e7798ca11f69c9dc8dc470c7a299 diff --git a/demo/python/requirements.txt b/demo/python/requirements.txt index 2c53c86b..695a008a 100644 --- a/demo/python/requirements.txt +++ b/demo/python/requirements.txt @@ -1,4 +1,4 @@ numpy>=1.24.0 pvorca==0.2.3 -pvspeaker==1.0.1 +pvspeaker==1.0.2 tiktoken==0.6.0 From 74b80b9b02b7cb47b0a112b2a27510a0c128031e Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 6 Aug 2024 13:47:31 -0700 Subject: [PATCH 24/27] fix spelling, rm jetson from workflows, install pulseaudio for workflows --- .github/workflows/python-demo.yml | 17 ++++++++++++++++- .github/workflows/python-perf.yml | 8 ++------ .github/workflows/python.yml | 4 +--- .gitmodules | 2 +- demo/c/orca_demo_streaming.c | 2 +- demo/python/orca_demo_streaming.py | 2 +- resources/.lint/spell-check/dict.txt | 3 +++ 7 files changed, 25 insertions(+), 13 deletions(-) diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml index 0cf761a9..c16ead6b 100644 --- a/.github/workflows/python-demo.yml +++ b/.github/workflows/python-demo.yml @@ -49,6 +49,14 @@ jobs: ${{matrix.install_dep}} pip install -r requirements.txt + # GitHub Actions runners do not have sound cards, so a virtual one must be created in order for unit tests to run. + - name: Install PulseAudio on Ubuntu + run: | + sudo apt-get update + sudo apt-get install -y pulseaudio + pulseaudio --check || pulseaudio --start + pactl load-module module-null-sink + - name: Test streaming run: > python3 orca_demo_streaming.py @@ -67,7 +75,7 @@ jobs: strategy: matrix: - machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64, jetson ] + machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64 ] steps: - uses: actions/checkout@v3 @@ -75,6 +83,13 @@ jobs: - name: Install dependencies run: pip3 install -r requirements.txt + - name: Install PulseAudio + run: | + sudo apt-get update + sudo apt-get install -y pulseaudio + pulseaudio --check || pulseaudio --start + pactl load-module module-null-sink + - name: Test streaming run: > python3 orca_demo_streaming.py diff --git a/.github/workflows/python-perf.yml b/.github/workflows/python-perf.yml index 64c8a6cd..e8f149c2 100644 --- a/.github/workflows/python-perf.yml +++ b/.github/workflows/python-perf.yml @@ -11,7 +11,6 @@ on: - 'binding/python/_util.py' - 'binding/python/test_orca_perf.py' - 'lib/common/**' - - 'lib/jetson/**' - 'lib/linux/**' - 'lib/mac/**' - 'lib/raspberry-pi/**' @@ -25,7 +24,6 @@ on: - 'binding/python/_util.py' - 'binding/python/test_orca_perf.py' - 'lib/common/**' - - 'lib/jetson/**' - 'lib/linux/**' - 'lib/mac/**' - 'lib/raspberry-pi/**' @@ -42,7 +40,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ ubuntu-latest, windows-latest, macos-latest ] include: - os: ubuntu-latest proc_performance_threshold_rtf: 5.0 @@ -78,7 +76,7 @@ jobs: strategy: fail-fast: false matrix: - machine: [rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64, jetson] + machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64 ] include: - machine: rpi3-32 proc_performance_threshold_rtf: 1.0 @@ -90,8 +88,6 @@ jobs: proc_performance_threshold_rtf: 2.0 - machine: rpi5-64 proc_performance_threshold_rtf: 2.0 - - machine: jetson - proc_performance_threshold_rtf: 2.0 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 83a2fdf7..f7073116 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -9,7 +9,6 @@ on: - 'binding/python/**' - '!binding/python/README.md' - 'lib/common/**' - - 'lib/jetson/**' - 'lib/linux/**' - 'lib/mac/**' - 'lib/raspberry-pi/**' @@ -22,7 +21,6 @@ on: - 'binding/python/**' - '!binding/python/README.md' - 'lib/common/**' - - 'lib/jetson/**' - 'lib/linux/**' - 'lib/mac/**' - 'lib/raspberry-pi/**' @@ -64,7 +62,7 @@ jobs: strategy: matrix: - machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64, jetson ] + machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64 ] steps: - uses: actions/checkout@v3 diff --git a/.gitmodules b/.gitmodules index 03951153..41039694 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "demo/c/dr_libs"] path = demo/c/dr_libs - url = ../../mackron/dr_libs.git + url = ../../mackron/dr_libs.git \ No newline at end of file diff --git a/demo/c/orca_demo_streaming.c b/demo/c/orca_demo_streaming.c index b7102f25..238f3918 100644 --- a/demo/c/orca_demo_streaming.c +++ b/demo/c/orca_demo_streaming.c @@ -620,4 +620,4 @@ int32_t main(int argc, char *argv[]) { #endif return result; -} \ No newline at end of file +} diff --git a/demo/python/orca_demo_streaming.py b/demo/python/orca_demo_streaming.py index 9f108c79..516ce712 100644 --- a/demo/python/orca_demo_streaming.py +++ b/demo/python/orca_demo_streaming.py @@ -260,7 +260,7 @@ def main() -> None: speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=buffer_size_secs, device_index=audio_device_index) speaker.start() - except ValueError: + except RuntimeError or ValueError: print( "\nWarning: Failed to initialize PvSpeaker. Orca will still generate PCM data, but it will not be played.\n") diff --git a/resources/.lint/spell-check/dict.txt b/resources/.lint/spell-check/dict.txt index 7e77faff..4649d037 100644 --- a/resources/.lint/spell-check/dict.txt +++ b/resources/.lint/spell-check/dict.txt @@ -61,3 +61,6 @@ btns Btns pltf usleep +pvspeaker +popleft +appendleft \ No newline at end of file From 04279915ebbf5972cc9f00a6b2665ae16d8b8484 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Tue, 6 Aug 2024 13:50:22 -0700 Subject: [PATCH 25/27] fix --- .github/workflows/python-demo.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml index c16ead6b..9a0b4d16 100644 --- a/.github/workflows/python-demo.yml +++ b/.github/workflows/python-demo.yml @@ -51,6 +51,7 @@ jobs: # GitHub Actions runners do not have sound cards, so a virtual one must be created in order for unit tests to run. - name: Install PulseAudio on Ubuntu + if: matrix.machine == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install -y pulseaudio From eb7ccb7aadda879e9dfc597827225e924dc07cc5 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 7 Aug 2024 09:27:15 -0700 Subject: [PATCH 26/27] rm pulseaudio --- .github/workflows/python-demo.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml index 9a0b4d16..78363740 100644 --- a/.github/workflows/python-demo.yml +++ b/.github/workflows/python-demo.yml @@ -49,15 +49,6 @@ jobs: ${{matrix.install_dep}} pip install -r requirements.txt - # GitHub Actions runners do not have sound cards, so a virtual one must be created in order for unit tests to run. - - name: Install PulseAudio on Ubuntu - if: matrix.machine == 'ubuntu-latest' - run: | - sudo apt-get update - sudo apt-get install -y pulseaudio - pulseaudio --check || pulseaudio --start - pactl load-module module-null-sink - - name: Test streaming run: > python3 orca_demo_streaming.py From 14c3cf5659411939bc610039470f1d4be1089558 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Wed, 7 Aug 2024 09:27:52 -0700 Subject: [PATCH 27/27] rm pulseaudio --- .github/workflows/python-demo.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml index 78363740..d165055f 100644 --- a/.github/workflows/python-demo.yml +++ b/.github/workflows/python-demo.yml @@ -75,13 +75,6 @@ jobs: - name: Install dependencies run: pip3 install -r requirements.txt - - name: Install PulseAudio - run: | - sudo apt-get update - sudo apt-get install -y pulseaudio - pulseaudio --check || pulseaudio --start - pactl load-module module-null-sink - - name: Test streaming run: > python3 orca_demo_streaming.py