diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c90ca7a..c7a3c45 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,7 @@ Changelog for package ROS 2 Whisper 1.3.1 (2024-07-01) ------------------ * `whisper_msgs`: Changed to `whisper_idl` package +* `whisper_bringup`: Changed executor to `MultiThreadedExecutor` so audio and inference can run in parallel on `whisper_server` 1.3.0 (2024-06-21) ------------------ diff --git a/README.md b/README.md index d2ae80c..1af7af4 100644 --- a/README.md +++ b/README.md @@ -27,4 +27,3 @@ Action server under topic `inference` of type [Inference.action](whisper_idl/act ## Troubleshoot - Encoder inference time: https://github.com/ggerganov/whisper.cpp/issues/10#issuecomment-1302462960 -- Compile with GPU support (might differ between platforms): https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas WHISPER_CUBLAS=On diff --git a/whisper_bringup/launch/bringup.launch.py b/whisper_bringup/launch/bringup.launch.py index 39d0a41..799a25e 100644 --- a/whisper_bringup/launch/bringup.launch.py +++ b/whisper_bringup/launch/bringup.launch.py @@ -2,7 +2,8 @@ from ament_index_python import get_package_share_directory from launch import LaunchDescription -from launch_ros.actions import Node +from launch_ros.actions import ComposableNodeContainer, Node +from launch_ros.descriptions import ComposableNode def generate_launch_description() -> LaunchDescription: @@ -21,17 +22,22 @@ def generate_launch_description() -> LaunchDescription: whisper_config = os.path.join( get_package_share_directory("whisper_server"), "config", "whisper.yaml" ) + composable_node = ComposableNode( + package="whisper_server", + plugin="whisper::InferenceComponent", + name="inference", + namespace="whisper", + parameters=[whisper_config], + remappings=[("audio", "/audio_listener/audio")], + ) ld.add_action( - Node( - package="whisper_server", - executable="whisper", + ComposableNodeContainer( + name="whisper_container", + package="rclcpp_components", + namespace="", + executable="component_container_mt", # require multi-threaded executor so inference server can parallelize audio encueing and inference output="screen", - namespace="whisper", - parameters=[whisper_config], - remappings=[ - ("/whisper/audio", "/audio_listener/audio"), - ], + composable_node_descriptions=[composable_node], ) ) - return ld diff --git a/whisper_server/config/whisper.yaml b/whisper_server/config/whisper.yaml index 4eff33b..18a73ff 100644 --- a/whisper_server/config/whisper.yaml +++ b/whisper_server/config/whisper.yaml @@ -5,7 +5,7 @@ wparams: language: "en" print_progress: false - n_threads: 1 + n_threads: 4 cparams: flash_attn: true gpu_device: 0 diff --git a/whisper_util/include/whisper_util/audio_buffers.hpp b/whisper_util/include/whisper_util/audio_buffers.hpp index 26140df..de0a037 100644 --- a/whisper_util/include/whisper_util/audio_buffers.hpp +++ b/whisper_util/include/whisper_util/audio_buffers.hpp @@ -84,6 +84,7 @@ class BatchedBuffer { std::uint16_t batch_idx_; std::vector audio_; + std::vector carry_over_audio_; RingBuffer audio_buffer_; }; } // end of namespace whisper diff --git a/whisper_util/src/audio_buffers.cpp b/whisper_util/src/audio_buffers.cpp index bec0bae..7d4ee73 100644 --- a/whisper_util/src/audio_buffers.cpp +++ b/whisper_util/src/audio_buffers.cpp @@ -46,7 +46,7 @@ BatchedBuffer::BatchedBuffer(const std::chrono::milliseconds &batch_capacity, const std::chrono::milliseconds &carry_over_capacity) : batch_capacity_(time_to_count(batch_capacity)), carry_over_capacity_(time_to_count(carry_over_capacity)), batch_idx_(0), - audio_buffer_(time_to_count(buffer_capacity)){ + carry_over_audio_(carry_over_capacity_), audio_buffer_(time_to_count(buffer_capacity)) { }; @@ -77,9 +77,10 @@ bool BatchedBuffer::require_new_batch_() { } void BatchedBuffer::carry_over_() { - std::vector carry_over(audio_.end() - carry_over_capacity_, audio_.end()); + carry_over_audio_.insert(carry_over_audio_.begin(), audio_.end() - carry_over_capacity_, + audio_.end()); audio_.clear(); - audio_.insert(audio_.end(), carry_over.begin(), carry_over.end()); + audio_.insert(audio_.end(), carry_over_audio_.begin(), carry_over_audio_.end()); } void BatchedBuffer::clear() {