From 1373de37ff3be442db6914f29a77ba3f5e5e0292 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Fri, 21 Jul 2023 13:09:11 +0200 Subject: [PATCH 01/23] chore: adding task-manager feature flags --- crates/topos-tce-broadcast/Cargo.toml | 3 +- .../benches/double_echo.rs | 4 ++ .../benches/task_manager_channels.rs | 21 +++--- .../benches/task_manager_futures.rs | 27 ++++---- crates/topos-tce-broadcast/src/lib.rs | 17 ++++- .../src/task_manager_channels/mod.rs | 65 ++++++++++++++----- .../src/task_manager_channels/task.rs | 28 +++++--- .../src/task_manager_futures/mod.rs | 30 ++++++--- .../src/task_manager_futures/task.rs | 9 ++- crates/topos-tce-broadcast/src/tests/mod.rs | 5 ++ .../src/tests/task_manager_channels.rs | 23 +++---- .../src/tests/task_manager_futures.rs | 23 +++---- crates/topos/Cargo.toml | 4 +- crates/topos/build.rs | 8 +++ 14 files changed, 173 insertions(+), 94 deletions(-) diff --git a/crates/topos-tce-broadcast/Cargo.toml b/crates/topos-tce-broadcast/Cargo.toml index 19879f7be..1b6ee040e 100644 --- a/crates/topos-tce-broadcast/Cargo.toml +++ b/crates/topos-tce-broadcast/Cargo.toml @@ -32,7 +32,8 @@ rand.workspace = true topos-test-sdk = { path = "../topos-test-sdk/" } [features] -default = [] +task-manager-channels = [] +task-manager-futures = [] [[bench]] name = "double_echo" diff --git a/crates/topos-tce-broadcast/benches/double_echo.rs b/crates/topos-tce-broadcast/benches/double_echo.rs index a40bf1fda..e6873a859 100644 --- a/crates/topos-tce-broadcast/benches/double_echo.rs +++ b/crates/topos-tce-broadcast/benches/double_echo.rs @@ -1,7 +1,9 @@ use criterion::async_executor::FuturesExecutor; use criterion::{criterion_group, criterion_main, Criterion}; +#[cfg(feature = "task-manager-channels")] mod task_manager_channels; +#[cfg(feature = "task-manager-futures")] mod task_manager_futures; pub fn criterion_benchmark(c: &mut Criterion) { @@ -11,6 +13,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { .build() .unwrap(); + #[cfg(feature = "task-manager-channels")] c.bench_function("double_echo with channels", |b| { b.to_async(FuturesExecutor).iter(|| async { runtime.block_on(async { @@ -19,6 +22,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { }) }); + #[cfg(feature = "task-manager-futures")] c.bench_function("double_echo with futures", |b| { b.to_async(FuturesExecutor).iter(|| async { runtime.block_on(async { diff --git a/crates/topos-tce-broadcast/benches/task_manager_channels.rs b/crates/topos-tce-broadcast/benches/task_manager_channels.rs index b24309e18..c6935580c 100644 --- a/crates/topos-tce-broadcast/benches/task_manager_channels.rs +++ b/crates/topos-tce-broadcast/benches/task_manager_channels.rs @@ -1,9 +1,8 @@ -use std::collections::HashMap; - use rand::Rng; use tokio::spawn; use tokio::sync::mpsc; +use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; use topos_p2p::PeerId; use topos_tce_broadcast::task_manager_channels::{TaskManager, Thresholds}; @@ -11,22 +10,18 @@ use topos_tce_broadcast::DoubleEchoCommand; pub async fn processing_double_echo(n: u64) { let (message_sender, message_receiver) = mpsc::channel(1024); - let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); let (event_sender, mut event_receiver) = mpsc::channel(1024); + let (_shutdown_sender, shutdown_receiver) = mpsc::channel(1); - let task_manager = TaskManager { - message_receiver, - task_completion: task_completion_receiver, - task_context: HashMap::new(), - thresholds: Thresholds { - echo: n as usize, - ready: n as usize, - delivery: n as usize, - }, + let threshold = ReliableBroadcastParams { + echo_threshold: n as usize, + ready_threshold: n as usize, + delivery_threshold: n as usize, }; - spawn(task_manager.run(task_completion_sender, event_sender)); + let (task_manager, task_completion_sender, _) = TaskManager::new(message_receiver, threshold); + spawn(task_manager.run(task_completion_sender, event_sender, shutdown_receiver)); let mut certificates = vec![]; let mut rng = rand::thread_rng(); diff --git a/crates/topos-tce-broadcast/benches/task_manager_futures.rs b/crates/topos-tce-broadcast/benches/task_manager_futures.rs index 52225e22c..5f280bdaa 100644 --- a/crates/topos-tce-broadcast/benches/task_manager_futures.rs +++ b/crates/topos-tce-broadcast/benches/task_manager_futures.rs @@ -1,31 +1,26 @@ -use futures::stream::FuturesUnordered; use rand::Rng; +use tce_transport::ReliableBroadcastParams; use tokio::spawn; use tokio::sync::mpsc; use topos_core::uci::CertificateId; use topos_p2p::PeerId; use topos_tce_broadcast::DoubleEchoCommand; -use topos_tce_broadcast::task_manager_futures::{TaskManager, Thresholds}; +use topos_tce_broadcast::task_manager_futures::TaskManager; pub async fn processing_double_echo(n: u64) { let (message_sender, message_receiver) = mpsc::channel(1024); - let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(48_000); - let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); - - let task_manager = TaskManager { - message_receiver, - task_completion_sender, - tasks: Default::default(), - running_tasks: FuturesUnordered::new(), - thresholds: Thresholds { - echo: n as usize, - ready: n as usize, - delivery: n as usize, - }, - shutdown_sender, + let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(40_000); + + let thresholds = ReliableBroadcastParams { + echo_threshold: n as usize, + ready_threshold: n as usize, + delivery_threshold: n as usize, }; + let (task_manager, shutdown_receiver) = + TaskManager::new(message_receiver, task_completion_sender, thresholds); + spawn(task_manager.run(shutdown_receiver)); let mut certificates = vec![]; diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index cbd3855fd..6d45bf1e6 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -23,7 +23,6 @@ use topos_p2p::PeerId; use topos_tce_storage::StorageClient; use tracing::{debug, error, info}; -use crate::sampler::SubscriptionsView; pub use topos_core::uci; pub type Peer = String; @@ -32,11 +31,27 @@ mod constant; pub mod double_echo; pub mod sampler; +#[cfg(all( + feature = "task-manager-channels", + not(feature = "task-manager-futures") +))] pub mod task_manager_channels; +#[cfg(feature = "task-manager-futures")] pub mod task_manager_futures; + #[cfg(test)] mod tests; +use crate::sampler::SubscriptionsView; + +#[cfg(feature = "task-manager-futures")] +use crate::task_manager_futures::TaskManager; +#[cfg(all( + feature = "task-manager-channels", + not(feature = "task-manager-futures") +))] +use task_manager_channels::TaskManager; + /// Configuration of TCE implementation pub struct ReliableBroadcastConfig { pub tce_params: ReliableBroadcastParams, diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs index f39120dd7..7f52e216b 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use tokio::{spawn, sync::mpsc}; +use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; pub mod task; @@ -8,40 +9,55 @@ pub mod task; use crate::DoubleEchoCommand; use task::{Task, TaskCompletion, TaskContext}; -#[derive(Clone)] -pub struct Thresholds { - pub echo: usize, - pub ready: usize, - pub delivery: usize, -} - /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, /// or existing tasks will receive the messages. pub struct TaskManager { pub message_receiver: mpsc::Receiver, - pub task_completion: mpsc::Receiver, - pub task_context: HashMap, - pub thresholds: Thresholds, + pub task_completion_receiver: mpsc::Receiver, + pub tasks: HashMap, + pub thresholds: ReliableBroadcastParams, + pub shutdown_sender: mpsc::Sender<()>, } impl TaskManager { + pub fn new( + message_receiver: mpsc::Receiver, + thresholds: ReliableBroadcastParams, + ) -> (Self, mpsc::Sender, mpsc::Receiver<()>) { + let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); + let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); + + ( + Self { + message_receiver, + task_completion_receiver, + tasks: HashMap::new(), + thresholds, + shutdown_sender, + }, + task_completion_sender, + shutdown_receiver, + ) + } + pub async fn run( mut self, task_completion_sender: mpsc::Sender, event_sender: mpsc::Sender, + mut shutdown_receiver: mpsc::Receiver<()>, ) { loop { tokio::select! { // If a task sends a message over the completion channel, it is signalling that it // is done and can be removed from the open tasks inside `task_context` - Some(task_completion) = self.task_completion.recv() => { + Some(task_completion) = self.task_completion_receiver.recv() => { match task_completion.success { true => { - self.task_context.remove(&task_completion.certificate_id); + self.tasks.remove(&task_completion.certificate_id); } false => { - self.task_context.remove(&task_completion.certificate_id); + self.tasks.remove(&task_completion.certificate_id); } } } @@ -49,7 +65,7 @@ impl TaskManager { Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready{ certificate_id, .. } => { - let task_context = match self.task_context.get(&certificate_id) { + let task_context = match self.tasks.get(&certificate_id) { Some(task_context) => task_context.to_owned(), None => self.create_and_spawn_new_task(certificate_id, task_completion_sender.clone(), event_sender.clone()), }; @@ -57,13 +73,24 @@ impl TaskManager { Self::send_message_to_task(task_context, msg).await; } DoubleEchoCommand::Broadcast { ref cert, .. } => { - if self.task_context.get(&cert.id).is_none() { + if self.tasks.get(&cert.id).is_none() { let task_context = self.create_and_spawn_new_task(cert.id, task_completion_sender.clone(), event_sender.clone()); Self::send_message_to_task(task_context, msg).await; } } } } + + _ = shutdown_receiver.recv() => { + println!("Task Manager shutting down"); + + // Shutting down every open task + for task in self.tasks.iter() { + task.1.shutdown_sender.send(()).await.unwrap(); + } + + break; + } } } } @@ -83,7 +110,7 @@ impl TaskManager { spawn(task.run()); - self.task_context.insert(certificate_id, context.clone()); + self.tasks.insert(certificate_id, context.clone()); context } @@ -94,3 +121,9 @@ impl TaskManager { }); } } + +impl Drop for TaskManager { + fn drop(&mut self) { + _ = self.shutdown_sender.try_send(()); + } +} diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs index 8cec847d5..87e823409 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs @@ -1,9 +1,8 @@ use tokio::sync::mpsc; -use topos_core::uci::CertificateId; - -use crate::task_manager_channels::Thresholds; use crate::DoubleEchoCommand; +use tce_transport::ReliableBroadcastParams; +use topos_core::uci::CertificateId; #[derive(Debug, PartialEq, Eq)] pub enum Events { @@ -37,8 +36,8 @@ impl TaskCompletion { #[derive(Clone)] pub struct TaskContext { - pub certificate_id: CertificateId, pub message_sender: mpsc::Sender, + pub shutdown_sender: mpsc::Sender<()>, } pub struct Task { @@ -46,7 +45,8 @@ pub struct Task { pub certificate_id: CertificateId, pub completion_sender: mpsc::Sender, pub event_sender: mpsc::Sender, - pub thresholds: Thresholds, + pub thresholds: ReliableBroadcastParams, + pub shutdown_receiver: mpsc::Receiver<()>, } impl Task { @@ -54,12 +54,14 @@ impl Task { certificate_id: CertificateId, completion_sender: mpsc::Sender, event_sender: mpsc::Sender, - thresholds: Thresholds, + thresholds: ReliableBroadcastParams, ) -> (Self, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(1024); + let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); + let task_context = TaskContext { - certificate_id, message_sender, + shutdown_sender, }; let task = Task { @@ -68,6 +70,7 @@ impl Task { completion_sender, event_sender, thresholds, + shutdown_receiver, }; (task, task_context) @@ -96,7 +99,16 @@ impl Task { pub(crate) async fn run(mut self) { loop { tokio::select! { - Some(msg) = self.message_receiver.recv() => if let Ok(true) = self.handle_msg(msg).await { + msg = self.message_receiver.recv() => { + if let Some(msg) = msg { + if let Ok(true) = self.handle_msg(msg).await { + break; + } + } + } + + _ = self.shutdown_receiver.recv() => { + println!("Received shutdown, shutting down task {:?}", self.certificate_id); break; } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index f26266c21..f99008162 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -6,6 +6,7 @@ use std::future::IntoFuture; use std::pin::Pin; use tokio::sync::mpsc; +use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; pub mod task; @@ -13,13 +14,6 @@ pub mod task; use crate::DoubleEchoCommand; use task::{Task, TaskContext, TaskStatus}; -#[derive(Clone)] -pub struct Thresholds { - pub echo: usize, - pub ready: usize, - pub delivery: usize, -} - /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, /// or existing tasks will receive the messages. @@ -31,11 +25,31 @@ pub struct TaskManager { pub running_tasks: FuturesUnordered< Pin + Send + 'static>>, >, - pub thresholds: Thresholds, + pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, } impl TaskManager { + pub fn new( + message_receiver: mpsc::Receiver, + task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + thresholds: ReliableBroadcastParams, + ) -> (Self, mpsc::Receiver<()>) { + let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); + + ( + Self { + message_receiver, + task_completion_sender, + tasks: HashMap::new(), + running_tasks: FuturesUnordered::new(), + thresholds, + shutdown_sender, + }, + shutdown_receiver, + ) + } + pub async fn run(mut self, mut shutdown_receiver: mpsc::Receiver<()>) { loop { tokio::select! { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 9df7f12eb..764499612 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -4,8 +4,8 @@ use tokio::sync::mpsc; use topos_core::uci::CertificateId; -use crate::task_manager_futures::Thresholds; use crate::DoubleEchoCommand; +use tce_transport::ReliableBroadcastParams; #[derive(Debug, PartialEq, Eq)] pub enum Events { @@ -30,12 +30,15 @@ pub struct TaskContext { pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, - pub thresholds: Thresholds, + pub thresholds: ReliableBroadcastParams, pub shutdown_receiver: mpsc::Receiver<()>, } impl Task { - pub fn new(certificate_id: CertificateId, thresholds: Thresholds) -> (Task, TaskContext) { + pub fn new( + certificate_id: CertificateId, + thresholds: ReliableBroadcastParams, + ) -> (Task, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(10_024); let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 37bea82f1..eb49d2f5a 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -1,4 +1,9 @@ +#[cfg(all( + feature = "task-manager-channels", + not(feature = "task-manager-futures") +))] mod task_manager_channels; +#[cfg(feature = "task-manager-futures")] mod task_manager_futures; use crate::double_echo::*; diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs index 2b5339cbd..36a887e15 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs @@ -1,9 +1,9 @@ -use crate::task_manager_channels::{TaskManager, Thresholds}; +use crate::task_manager_channels::TaskManager; +use tce_transport::ReliableBroadcastParams; use crate::*; use rand::Rng; use rstest::*; -use std::collections::HashMap; use tokio::{spawn, sync::mpsc}; use topos_p2p::PeerId; @@ -13,21 +13,18 @@ async fn task_manager_channels_receiving_messages() { let n = 5; let (message_sender, message_receiver) = mpsc::channel(1024); - let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); let (event_sender, mut event_receiver) = mpsc::channel(1024); + let (_shutdown_sender, shutdown_receiver) = mpsc::channel(1); - let task_manager = TaskManager { - message_receiver, - task_completion: task_completion_receiver, - task_context: HashMap::new(), - thresholds: Thresholds { - echo: n, - ready: n, - delivery: n, - }, + let threshold = ReliableBroadcastParams { + echo_threshold: n, + ready_threshold: n, + delivery_threshold: n, }; - spawn(task_manager.run(task_completion_sender, event_sender)); + let (task_manager, task_completion_sender, _) = TaskManager::new(message_receiver, threshold); + + spawn(task_manager.run(task_completion_sender, event_sender, shutdown_receiver)); let mut certificates = vec![]; diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs index 0dde484e9..1b1526d16 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs @@ -1,8 +1,8 @@ -use crate::task_manager_futures::{TaskManager, Thresholds}; +use crate::task_manager_futures::TaskManager; use crate::{CertificateId, DoubleEchoCommand}; -use futures::stream::FuturesUnordered; use rand::Rng; use rstest::*; +use tce_transport::ReliableBroadcastParams; use tokio::{spawn, sync::mpsc}; use topos_p2p::PeerId; @@ -13,21 +13,16 @@ async fn task_manager_futures_receiving_messages() { let (message_sender, message_receiver) = mpsc::channel(1024); let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(1024); - let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); - let task_manager = TaskManager { - message_receiver, - task_completion_sender, - tasks: Default::default(), - running_tasks: FuturesUnordered::new(), - thresholds: Thresholds { - echo: n, - ready: n, - delivery: n, - }, - shutdown_sender, + let thresholds = ReliableBroadcastParams { + echo_threshold: n, + ready_threshold: n, + delivery_threshold: n, }; + let (task_manager, shutdown_receiver) = + TaskManager::new(message_receiver, task_completion_sender, thresholds); + spawn(task_manager.run(shutdown_receiver)); let mut certificates = vec![]; diff --git a/crates/topos/Cargo.toml b/crates/topos/Cargo.toml index 580332044..2193112a2 100644 --- a/crates/topos/Cargo.toml +++ b/crates/topos/Cargo.toml @@ -61,10 +61,12 @@ rstest = { workspace = true, features = ["async-timeout"] } regex = "1" [features] -default = ["tce", "sequencer", "network", "node", "setup", "subnet"] +default = ["tce", "sequencer", "network", "node", "setup", "subnet", "topos-tce-broadcast/task-manager-futures"] tce = ["topos-tce", "topos-tce-transport"] sequencer = ["topos-sequencer"] network = ["topos-certificate-spammer"] node = ["tce", "sequencer"] setup = [] subnet = [] +task-manager-futures = ["topos-tce-broadcast/task-manager-futures"] +task-manager-channels = ["tce", "sequencer", "network", "node", "setup", "subnet", "topos-tce-broadcast/task-manager-channels"] \ No newline at end of file diff --git a/crates/topos/build.rs b/crates/topos/build.rs index 980a55a55..503631f9e 100644 --- a/crates/topos/build.rs +++ b/crates/topos/build.rs @@ -20,4 +20,12 @@ fn main() { println!("cargo:rustc-env=TOPOS_VERSION={topos_version}"); } + + // println!("TEST"); + let using_futures = cfg!(feature = "task-manager-futures"); + let using_channels = cfg!(feature = "task-manager-channels"); + + if using_futures && using_channels { + panic!("The features 'task-manager-futures' and 'task-manager-channels' are mutually exclusive."); + } } From 25b4983cb1977c848ddbc82e46a157f6a81f4985 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Fri, 21 Jul 2023 13:16:29 +0200 Subject: [PATCH 02/23] fix: remove reduntant feature flag --- crates/topos/Cargo.toml | 1 - crates/topos/build.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crates/topos/Cargo.toml b/crates/topos/Cargo.toml index 2193112a2..871430cce 100644 --- a/crates/topos/Cargo.toml +++ b/crates/topos/Cargo.toml @@ -68,5 +68,4 @@ network = ["topos-certificate-spammer"] node = ["tce", "sequencer"] setup = [] subnet = [] -task-manager-futures = ["topos-tce-broadcast/task-manager-futures"] task-manager-channels = ["tce", "sequencer", "network", "node", "setup", "subnet", "topos-tce-broadcast/task-manager-channels"] \ No newline at end of file diff --git a/crates/topos/build.rs b/crates/topos/build.rs index 503631f9e..3b0bec9c2 100644 --- a/crates/topos/build.rs +++ b/crates/topos/build.rs @@ -21,7 +21,6 @@ fn main() { println!("cargo:rustc-env=TOPOS_VERSION={topos_version}"); } - // println!("TEST"); let using_futures = cfg!(feature = "task-manager-futures"); let using_channels = cfg!(feature = "task-manager-channels"); From 5ee8db3e40154b7e08cf798bcd8c13f9c9eb7734 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Fri, 21 Jul 2023 13:34:03 +0200 Subject: [PATCH 03/23] fix: make the taskmanager new function coherent --- .../benches/task_manager_channels.rs | 8 ++++---- .../src/task_manager_channels/mod.rs | 10 +++++----- .../src/tests/task_manager_channels.rs | 5 ++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/crates/topos-tce-broadcast/benches/task_manager_channels.rs b/crates/topos-tce-broadcast/benches/task_manager_channels.rs index c6935580c..117ec478c 100644 --- a/crates/topos-tce-broadcast/benches/task_manager_channels.rs +++ b/crates/topos-tce-broadcast/benches/task_manager_channels.rs @@ -5,13 +5,12 @@ use tokio::sync::mpsc; use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; use topos_p2p::PeerId; -use topos_tce_broadcast::task_manager_channels::{TaskManager, Thresholds}; +use topos_tce_broadcast::task_manager_channels::TaskManager; use topos_tce_broadcast::DoubleEchoCommand; pub async fn processing_double_echo(n: u64) { let (message_sender, message_receiver) = mpsc::channel(1024); let (event_sender, mut event_receiver) = mpsc::channel(1024); - let (_shutdown_sender, shutdown_receiver) = mpsc::channel(1); let threshold = ReliableBroadcastParams { echo_threshold: n as usize, @@ -19,9 +18,10 @@ pub async fn processing_double_echo(n: u64) { delivery_threshold: n as usize, }; - let (task_manager, task_completion_sender, _) = TaskManager::new(message_receiver, threshold); + let (task_manager, shutdown_receiver) = TaskManager::new(message_receiver, threshold); + + spawn(task_manager.run(event_sender, shutdown_receiver)); - spawn(task_manager.run(task_completion_sender, event_sender, shutdown_receiver)); let mut certificates = vec![]; let mut rng = rand::thread_rng(); diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs index 7f52e216b..09fdd6b48 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs @@ -15,6 +15,7 @@ use task::{Task, TaskCompletion, TaskContext}; pub struct TaskManager { pub message_receiver: mpsc::Receiver, pub task_completion_receiver: mpsc::Receiver, + pub task_completion_sender: mpsc::Sender, pub tasks: HashMap, pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, @@ -24,7 +25,7 @@ impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, thresholds: ReliableBroadcastParams, - ) -> (Self, mpsc::Sender, mpsc::Receiver<()>) { + ) -> (Self, mpsc::Receiver<()>) { let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); @@ -32,18 +33,17 @@ impl TaskManager { Self { message_receiver, task_completion_receiver, + task_completion_sender, tasks: HashMap::new(), thresholds, shutdown_sender, }, - task_completion_sender, shutdown_receiver, ) } pub async fn run( mut self, - task_completion_sender: mpsc::Sender, event_sender: mpsc::Sender, mut shutdown_receiver: mpsc::Receiver<()>, ) { @@ -67,14 +67,14 @@ impl TaskManager { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready{ certificate_id, .. } => { let task_context = match self.tasks.get(&certificate_id) { Some(task_context) => task_context.to_owned(), - None => self.create_and_spawn_new_task(certificate_id, task_completion_sender.clone(), event_sender.clone()), + None => self.create_and_spawn_new_task(certificate_id, self.task_completion_sender.clone(), event_sender.clone()), }; Self::send_message_to_task(task_context, msg).await; } DoubleEchoCommand::Broadcast { ref cert, .. } => { if self.tasks.get(&cert.id).is_none() { - let task_context = self.create_and_spawn_new_task(cert.id, task_completion_sender.clone(), event_sender.clone()); + let task_context = self.create_and_spawn_new_task(cert.id, self.task_completion_sender.clone(), event_sender.clone()); Self::send_message_to_task(task_context, msg).await; } } diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs index 36a887e15..1bc1cc3a3 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs @@ -14,7 +14,6 @@ async fn task_manager_channels_receiving_messages() { let (message_sender, message_receiver) = mpsc::channel(1024); let (event_sender, mut event_receiver) = mpsc::channel(1024); - let (_shutdown_sender, shutdown_receiver) = mpsc::channel(1); let threshold = ReliableBroadcastParams { echo_threshold: n, @@ -22,9 +21,9 @@ async fn task_manager_channels_receiving_messages() { delivery_threshold: n, }; - let (task_manager, task_completion_sender, _) = TaskManager::new(message_receiver, threshold); + let (task_manager, shutdown_receiver) = TaskManager::new(message_receiver, threshold); - spawn(task_manager.run(task_completion_sender, event_sender, shutdown_receiver)); + spawn(task_manager.run(event_sender, shutdown_receiver)); let mut certificates = vec![]; From 4fd6ff0d709d615b8651da9d91fef6840b0e891a Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Fri, 21 Jul 2023 14:43:36 +0200 Subject: [PATCH 04/23] fix: cleanup task_managers and tasks --- crates/topos-tce-broadcast/src/lib.rs | 2 + .../src/task_manager_channels/mod.rs | 51 ++++++---------- .../src/task_manager_channels/task.rs | 60 +++++-------------- .../src/task_manager_futures/mod.rs | 12 +--- .../src/task_manager_futures/task.rs | 7 --- .../src/tests/task_manager_channels.rs | 7 ++- 6 files changed, 40 insertions(+), 99 deletions(-) diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index 6d45bf1e6..b5aad4a65 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -120,6 +120,8 @@ impl ReliableBroadcastClient { let (double_echo_shutdown_channel, double_echo_shutdown_receiver) = mpsc::channel::>(1); + // let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(24_000); + let pending_certificate_count = storage .get_pending_certificates() .await diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs index 09fdd6b48..a18888210 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs @@ -7,15 +7,16 @@ use topos_core::uci::CertificateId; pub mod task; use crate::DoubleEchoCommand; -use task::{Task, TaskCompletion, TaskContext}; +use task::{Task, TaskContext, TaskStatus}; /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, /// or existing tasks will receive the messages. pub struct TaskManager { pub message_receiver: mpsc::Receiver, - pub task_completion_receiver: mpsc::Receiver, - pub task_completion_sender: mpsc::Sender, + pub task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, + pub task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + pub event_sender: mpsc::Sender<(CertificateId, TaskStatus)>, pub tasks: HashMap, pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, @@ -24,6 +25,7 @@ pub struct TaskManager { impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, + event_sender: mpsc::Sender<(CertificateId, TaskStatus)>, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); @@ -34,6 +36,7 @@ impl TaskManager { message_receiver, task_completion_receiver, task_completion_sender, + event_sender, tasks: HashMap::new(), thresholds, shutdown_sender, @@ -42,45 +45,33 @@ impl TaskManager { ) } - pub async fn run( - mut self, - event_sender: mpsc::Sender, - mut shutdown_receiver: mpsc::Receiver<()>, - ) { + pub async fn run(mut self, mut shutdown_receiver: mpsc::Receiver<()>) { loop { tokio::select! { - // If a task sends a message over the completion channel, it is signalling that it - // is done and can be removed from the open tasks inside `task_context` - Some(task_completion) = self.task_completion_receiver.recv() => { - match task_completion.success { - true => { - self.tasks.remove(&task_completion.certificate_id); - } - false => { - self.tasks.remove(&task_completion.certificate_id); - } - } - } - Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready{ certificate_id, .. } => { let task_context = match self.tasks.get(&certificate_id) { Some(task_context) => task_context.to_owned(), - None => self.create_and_spawn_new_task(certificate_id, self.task_completion_sender.clone(), event_sender.clone()), + None => self.create_and_spawn_new_task(certificate_id, self.task_completion_sender.clone()), }; - Self::send_message_to_task(task_context, msg).await; + _ = task_context.message_sender.send(msg).await; } DoubleEchoCommand::Broadcast { ref cert, .. } => { if self.tasks.get(&cert.id).is_none() { - let task_context = self.create_and_spawn_new_task(cert.id, self.task_completion_sender.clone(), event_sender.clone()); - Self::send_message_to_task(task_context, msg).await; + let task_context = self.create_and_spawn_new_task(cert.id, self.task_completion_sender.clone()); + _ = task_context.message_sender.send(msg).await; } } } } + Some((certificate_id, status)) = self.task_completion_receiver.recv() => { + self.tasks.remove(&certificate_id); + let _ = self.event_sender.send((certificate_id, status)).await; + } + _ = shutdown_receiver.recv() => { println!("Task Manager shutting down"); @@ -98,13 +89,11 @@ impl TaskManager { fn create_and_spawn_new_task( &mut self, certificate_id: CertificateId, - task_completion_sender: mpsc::Sender, - event_sender: mpsc::Sender, + task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, ) -> TaskContext { let (task, context) = Task::new( certificate_id, task_completion_sender, - event_sender, self.thresholds.clone(), ); @@ -114,12 +103,6 @@ impl TaskManager { context } - - async fn send_message_to_task(task_context: TaskContext, msg: DoubleEchoCommand) { - spawn(async move { - _ = task_context.message_sender.send(msg).await; - }); - } } impl Drop for TaskManager { diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs index 87e823409..99f17f83f 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs @@ -5,36 +5,14 @@ use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; #[derive(Debug, PartialEq, Eq)] -pub enum Events { - ReachedThresholdOfReady(CertificateId), - ReceivedEcho(CertificateId), - TimeOut(CertificateId), +pub enum TaskStatus { + /// The task finished succesfully and broadcasted the certificate + received ready + Success, + /// The task did not finish succesfully and stopped. + Failure, } -#[derive(Debug)] -pub struct TaskCompletion { - pub(crate) success: bool, - pub(crate) certificate_id: CertificateId, -} - -impl TaskCompletion { - fn success(certificate_id: CertificateId) -> Self { - TaskCompletion { - success: true, - certificate_id, - } - } - - #[allow(dead_code)] - fn failure(certificate_id: CertificateId) -> Self { - TaskCompletion { - success: false, - certificate_id, - } - } -} - -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct TaskContext { pub message_sender: mpsc::Sender, pub shutdown_sender: mpsc::Sender<()>, @@ -43,8 +21,7 @@ pub struct TaskContext { pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, - pub completion_sender: mpsc::Sender, - pub event_sender: mpsc::Sender, + pub completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, pub thresholds: ReliableBroadcastParams, pub shutdown_receiver: mpsc::Receiver<()>, } @@ -52,8 +29,7 @@ pub struct Task { impl Task { pub fn new( certificate_id: CertificateId, - completion_sender: mpsc::Sender, - event_sender: mpsc::Sender, + completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, thresholds: ReliableBroadcastParams, ) -> (Self, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(1024); @@ -68,7 +44,6 @@ impl Task { message_receiver, certificate_id, completion_sender, - event_sender, thresholds, shutdown_receiver, }; @@ -76,23 +51,20 @@ impl Task { (task, task_context) } - async fn handle_msg(&mut self, msg: DoubleEchoCommand) -> Result { + async fn handle_msg(&mut self, msg: DoubleEchoCommand) -> (CertificateId, TaskStatus) { match msg { DoubleEchoCommand::Echo { certificate_id, .. } => { let _ = self .completion_sender - .send(TaskCompletion::success(certificate_id)) + .send((certificate_id, TaskStatus::Success)) .await; - let _ = self - .event_sender - .send(Events::ReachedThresholdOfReady(self.certificate_id)) - .await; - - Ok(true) + return (certificate_id, TaskStatus::Success); + } + DoubleEchoCommand::Ready { certificate_id, .. } => { + return (certificate_id, TaskStatus::Success) } - DoubleEchoCommand::Ready { .. } => Ok(true), - DoubleEchoCommand::Broadcast { .. } => Ok(false), + DoubleEchoCommand::Broadcast { cert, .. } => return (cert.id, TaskStatus::Success), } } @@ -101,7 +73,7 @@ impl Task { tokio::select! { msg = self.message_receiver.recv() => { if let Some(msg) = msg { - if let Ok(true) = self.handle_msg(msg).await { + if let (_, TaskStatus::Success) = self.handle_msg(msg).await { break; } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index f99008162..e0da19d00 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -53,12 +53,6 @@ impl TaskManager { pub async fn run(mut self, mut shutdown_receiver: mpsc::Receiver<()>) { loop { tokio::select! { - // We receive a new DoubleEchoCommand from the outside through a channel receiver - // The base state is that there is no running task yet - // What we need to do is to - // a) create a new task in the local HashMap: So we can check if incoming messages already have an open task - // b) Add the task to the FuturesUnordered stream: So we can check if the task is done - // The task future has to be started for it to be able to listen on the it's own message receiver. Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready { certificate_id, ..} => { @@ -92,7 +86,7 @@ impl TaskManager { } Some((id, status)) = self.running_tasks.next() => { if status == TaskStatus::Success { - self.remove_finished_task(id); + self.tasks.remove(&certificate_id); let _ = self.task_completion_sender.send((id, status)).await; } } @@ -109,10 +103,6 @@ impl TaskManager { } } } - - fn remove_finished_task(&mut self, certificate_id: CertificateId) { - self.tasks.remove(&certificate_id); - } } impl Drop for TaskManager { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 764499612..cf32cfae5 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -7,13 +7,6 @@ use topos_core::uci::CertificateId; use crate::DoubleEchoCommand; use tce_transport::ReliableBroadcastParams; -#[derive(Debug, PartialEq, Eq)] -pub enum Events { - ReachedThresholdOfReady(CertificateId), - ReceivedEcho(CertificateId), - TimeOut(CertificateId), -} - #[derive(Debug, PartialEq, Eq)] pub enum TaskStatus { /// The task finished succesfully and broadcasted the certificate + received ready diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs index 1bc1cc3a3..a108e9527 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs @@ -21,9 +21,10 @@ async fn task_manager_channels_receiving_messages() { delivery_threshold: n, }; - let (task_manager, shutdown_receiver) = TaskManager::new(message_receiver, threshold); + let (task_manager, shutdown_receiver) = + TaskManager::new(message_receiver, event_sender, threshold); - spawn(task_manager.run(event_sender, shutdown_receiver)); + spawn(task_manager.run(shutdown_receiver)); let mut certificates = vec![]; @@ -49,7 +50,7 @@ async fn task_manager_channels_receiving_messages() { let mut count = 0; - while (event_receiver.recv().await).is_some() { + while let Some((_, _)) = event_receiver.recv().await { count += 1; if count == n { From 7fec500a6fcd3670efa33a8c2683e559bd71698d Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Fri, 21 Jul 2023 15:04:52 +0200 Subject: [PATCH 05/23] chore: intatiate and start the task_manager --- crates/topos-tce-broadcast/src/double_echo/mod.rs | 10 +++++++--- crates/topos-tce-broadcast/src/lib.rs | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index b0364eb3e..ab7948ff3 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -47,7 +47,9 @@ pub struct DoubleEcho { /// delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, - pub(crate) params: ReliableBroadcastParams, + task_manager_message_sender: mpsc::Sender, + + task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, /// Current certificates being processed cert_candidate: HashMap, @@ -71,7 +73,8 @@ impl DoubleEcho { #[allow(clippy::too_many_arguments)] pub fn new( - params: ReliableBroadcastParams, + task_manager_message_sender: mpsc::Sender, + task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, command_receiver: mpsc::Receiver, subscriptions_view_receiver: mpsc::Receiver, event_sender: broadcast::Sender, @@ -80,8 +83,9 @@ impl DoubleEcho { pending_certificate_count: u64, ) -> Self { Self { + task_manager_message_sender, + task_completion_receiver, pending_certificate_count, - params, command_receiver, subscriptions_view_receiver, event_sender, diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index b5aad4a65..84e5b9c72 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -120,7 +120,16 @@ impl ReliableBroadcastClient { let (double_echo_shutdown_channel, double_echo_shutdown_receiver) = mpsc::channel::>(1); - // let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(24_000); + let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(24_000); + let (task_completion_sender, task_completion_receiver) = mpsc::channel(24_000); + + let (task_manager, shutdown_receiver) = TaskManager::new( + task_manager_message_receiver, + task_completion_sender, + config.tce_params, + ); + + spawn(task_manager.run(shutdown_receiver)); let pending_certificate_count = storage .get_pending_certificates() @@ -129,7 +138,8 @@ impl ReliableBroadcastClient { .unwrap_or(0) as u64; let double_echo = DoubleEcho::new( - config.tce_params, + task_manager_message_sender, + task_completion_receiver, command_receiver, subscriptions_view_receiver, event_sender, From cdda231b1e332704ac62ca26c335561047e0756b Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 11:20:23 +0200 Subject: [PATCH 06/23] chore: add echo and ready workflow, wip --- .../src/double_echo/mod.rs | 132 ++++-------------- .../src/task_manager_futures/mod.rs | 26 ++-- .../src/task_manager_futures/task.rs | 12 +- 3 files changed, 51 insertions(+), 119 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index ba4d1edce..b0c5def69 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -1,4 +1,5 @@ use crate::constant; +use crate::task_manager_futures::task::TaskStatus; use crate::{DoubleEchoCommand, SubscriptionsView}; use std::collections::HashSet; use std::collections::{HashMap, VecDeque}; @@ -40,12 +41,10 @@ pub struct DoubleEcho { /// pending certificates state pending_certificate_count: u64, + /// buffer of certificates to process buffer: VecDeque<(bool, Certificate)>, - /// known certificate ids to avoid processing twice the same certificate - known_certificates: HashSet, - /// delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, @@ -62,9 +61,6 @@ pub struct DoubleEcho { pub(crate) subscriptions: SubscriptionsView, // My subscriptions for echo, ready and delivery feedback local_peer_id: String, - - /// Buffer of messages to be processed once the certificate payload is received - buffered_messages: HashMap>, } impl DoubleEcho { @@ -94,9 +90,7 @@ impl DoubleEcho { buffer: VecDeque::new(), shutdown, local_peer_id, - buffered_messages: Default::default(), delivered_certificates: Default::default(), - known_certificates: Default::default(), } } @@ -124,8 +118,8 @@ impl DoubleEcho { command if self.subscriptions.is_some() => { match command { - DoubleEchoCommand::Echo { from_peer, certificate_id } => self.handle_echo(from_peer, certificate_id), - DoubleEchoCommand::Ready { from_peer, certificate_id } => self.handle_ready(from_peer, certificate_id), + DoubleEchoCommand::Echo { from_peer, certificate_id } => self.handle_echo(from_peer, certificate_id).await, + DoubleEchoCommand::Ready { from_peer, certificate_id } => self.handle_ready(from_peer, certificate_id).await, _ => {} } @@ -136,6 +130,12 @@ impl DoubleEcho { } } + Some((certificate_id, status)) = task_completion_receiver.recv() => { + if status == TaskStatus::Success { + self.delivered_certificates.insert(certificate_id); + } + } + Some(new_subscriptions_view) = self.subscriptions_view_receiver.recv() => { info!("Starting to use the new operational set of samples: {:?}", &new_subscriptions_view); self.subscriptions = new_subscriptions_view; @@ -158,21 +158,8 @@ impl DoubleEcho { if let Some(messages) = self.buffered_messages.remove(&certificate_id) { for message in messages { DOUBLE_ECHO_BUFFERED_MESSAGE_COUNT.dec(); - match message { - DoubleEchoCommand::Echo { - from_peer, - certificate_id, - } => { - self.consume_echo(from_peer, &certificate_id); - } - DoubleEchoCommand::Ready { - from_peer, - certificate_id, - } => { - self.consume_ready(from_peer, &certificate_id); - } - _ => {} - } + //TODO: error handling + let _ = self.task_manager_message_sender.send(msg).await; } } } @@ -320,86 +307,27 @@ impl DoubleEcho { } } - pub(crate) fn handle_echo(&mut self, from_peer: PeerId, certificate_id: CertificateId) { - let cert_delivered = self.delivered_certificates.get(&certificate_id).is_some(); - if !cert_delivered { - if self.known_certificates.get(&certificate_id).is_some() { - debug!( - "Handling DoubleEchoCommand::Echo from_peer: {} cert_id: {}", - &from_peer, certificate_id - ); - if let Some(status) = self.consume_echo(from_peer, &certificate_id) { - if status.is_delivered() { - self.delivered_certificates.insert(certificate_id); - self.span_tracker.remove(&certificate_id); - self.cert_candidate.remove(&certificate_id); - } - } - - // need to deliver the certificate - } else if self.delivered_certificates.get(&certificate_id).is_none() { - // need to buffer the Echo - self.buffered_messages - .entry(certificate_id) - .or_default() - .push(DoubleEchoCommand::Echo { - from_peer, - certificate_id, - }); - DOUBLE_ECHO_BUFFERED_MESSAGE_COUNT.inc(); - } + pub(crate) async fn handle_echo(&mut self, from_peer: PeerId, certificate_id: CertificateId) { + if self.delivered_certificates.get(&certificate_id).is_none() { + let _ = self + .task_manager_message_sender + .send(DoubleEchoCommand::Echo { + from_peer, + certificate_id, + }) + .await; } } - pub(crate) fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { - let cert_delivered = self.delivered_certificates.get(&certificate_id).is_some(); - if !cert_delivered { - if self.known_certificates.get(&certificate_id).is_some() { - debug!( - "Handling DoubleEchoCommand::Ready from_peer: {} cert_id: {}", - &from_peer, &certificate_id - ); - - if let Some(status) = self.consume_ready(from_peer, &certificate_id) { - if status.is_delivered() { - self.delivered_certificates.insert(certificate_id); - self.span_tracker.remove(&certificate_id); - self.cert_candidate.remove(&certificate_id); - } - } - - // need to deliver the certificate - } else if self.delivered_certificates.get(&certificate_id).is_none() { - // need to buffer the Ready - self.buffered_messages - .entry(certificate_id) - .or_default() - .push(DoubleEchoCommand::Ready { - from_peer, - certificate_id, - }); - DOUBLE_ECHO_BUFFERED_MESSAGE_COUNT.inc(); - } + pub(crate) async fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { + if self.delivered_certificates.get(&certificate_id).is_none() { + let _ = self + .task_manager_message_sender + .send(DoubleEchoCommand::Ready { + from_peer, + certificate_id, + }) + .await; } } - - pub(crate) fn consume_echo( - &mut self, - from_peer: PeerId, - certificate_id: &CertificateId, - ) -> Option { - self.cert_candidate - .get_mut(certificate_id) - .and_then(|state| state.apply_echo(from_peer)) - } - - pub(crate) fn consume_ready( - &mut self, - from_peer: PeerId, - certificate_id: &CertificateId, - ) -> Option { - self.cert_candidate - .get_mut(certificate_id) - .and_then(|state| state.apply_ready(from_peer)) - } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index 8a3567a9a..07274701a 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -1,7 +1,7 @@ use futures::stream::FuturesUnordered; use futures::Future; use futures::StreamExt; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::future::IntoFuture; use std::pin::Pin; use tokio::sync::mpsc; @@ -14,6 +14,7 @@ pub mod task; use crate::DoubleEchoCommand; use task::{Task, TaskContext, TaskStatus}; +use topos_p2p::PeerId; /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, @@ -26,6 +27,8 @@ pub struct TaskManager { pub running_tasks: FuturesUnordered< Pin + Send + 'static>>, >, + pub known_certificates: HashSet, + pub buffered_messages: HashMap>, pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, } @@ -44,6 +47,8 @@ impl TaskManager { task_completion_sender, tasks: HashMap::new(), running_tasks: FuturesUnordered::new(), + known_certificates: Default::default(), + buffered_messages: Default::default(), thresholds, shutdown_sender, }, @@ -56,18 +61,15 @@ impl TaskManager { tokio::select! { Some(msg) = self.message_receiver.recv() => { match msg { - DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready { certificate_id, ..} => { - let task = match self.tasks.entry(certificate_id) { - std::collections::hash_map::Entry::Vacant(entry) => { - let (task, task_context) = Task::new(certificate_id, self.thresholds.clone()); - self.running_tasks.push(task.into_future()); - - entry.insert(task_context) - } - std::collections::hash_map::Entry::Occupied(entry) => entry.into_mut(), + DoubleEchoCommand::Echo { certificate_id, from_peer } | DoubleEchoCommand::Ready { certificate_id, from_peer } => { + if let task = self.tasks.get(certificate_id) { + _ = task.sink.send(msg).await; + } else { + self.buffered_messages + .entry(certificate_id) + .or_default() + .push(msg); }; - - _ = task.sink.send(msg).await; } DoubleEchoCommand::Broadcast { ref cert, .. } => { let task = match self.tasks.entry(cert.id) { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 667764007..7abd0a623 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -5,7 +5,7 @@ use tokio::sync::mpsc; use topos_core::uci::CertificateId; use tracing::warn; -use crate::DoubleEchoCommand; +use crate::{broadcast_state::BroadcastState, DoubleEchoCommand}; use tce_transport::ReliableBroadcastParams; #[derive(Debug, PartialEq, Eq)] @@ -25,6 +25,7 @@ pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, pub thresholds: ReliableBroadcastParams, + pub broadcast_state: BroadcastState, pub shutdown_receiver: mpsc::Receiver<()>, } @@ -45,6 +46,7 @@ impl Task { message_receiver, certificate_id, thresholds, + broadcast_state: BroadcastState::new(), shutdown_receiver, }; @@ -63,11 +65,11 @@ impl IntoFuture for Task { tokio::select! { Some(msg) = self.message_receiver.recv() => { match msg { - DoubleEchoCommand::Echo { certificate_id, .. } => { - return (certificate_id, TaskStatus::Success); + DoubleEchoCommand::Echo { certificate_id, from_peer } => { + self.broadcast_state.apply_echo(from_peer) } - DoubleEchoCommand::Ready { certificate_id, .. } => { - return (certificate_id, TaskStatus::Success); + DoubleEchoCommand::Ready { certificate_id, from_peer } => { + self.broadcast_state.apply_ready(from_peer) } DoubleEchoCommand::Broadcast { cert, .. } => { return (cert.id, TaskStatus::Success); From 71c33b2e8ceda130563cc6d30d1c1acc3b7376ec Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 13:02:45 +0200 Subject: [PATCH 07/23] chore: move handle_x functions to taskmanager, wip --- .../src/double_echo/mod.rs | 67 +------------------ .../src/task_manager_futures/mod.rs | 38 +++++++++-- .../src/task_manager_futures/task.rs | 3 +- 3 files changed, 35 insertions(+), 73 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index b0c5def69..a431f9920 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -17,16 +17,7 @@ use tracing::{debug, error, info, warn, warn_span, Span}; use self::broadcast_state::BroadcastState; -mod broadcast_state; - -/// Processing data associated to a Certificate candidate for delivery -/// Sample repartition, one peer may belongs to multiple samples -#[derive(Clone)] -pub struct DeliveryState { - pub subscriptions: SubscriptionsView, - pub ready_sent: bool, - pub delivered: bool, -} +pub mod broadcast_state; pub struct DoubleEcho { /// Channel to receive commands @@ -114,7 +105,7 @@ impl DoubleEcho { Some(command) = self.command_receiver.recv() => { match command { - DoubleEchoCommand::Broadcast { need_gossip, cert } => self.handle_broadcast(cert,need_gossip), + DoubleEchoCommand::Broadcast { need_gossip, cert } => self.task_manager_message_sender.send(command).await, command if self.subscriptions.is_some() => { match command { @@ -146,24 +137,6 @@ impl DoubleEcho { break None; } }; - - // Broadcast next certificate - if self.subscriptions.is_some() { - if let Some((need_gossip, cert)) = self.buffer.pop_front() { - DOUBLE_ECHO_CURRENT_BUFFER_SIZE.dec(); - let certificate_id = cert.id; - - self.broadcast(cert, need_gossip); - - if let Some(messages) = self.buffered_messages.remove(&certificate_id) { - for message in messages { - DOUBLE_ECHO_BUFFERED_MESSAGE_COUNT.dec(); - //TODO: error handling - let _ = self.task_manager_message_sender.send(msg).await; - } - } - } - } }; if let Some(sender) = shutdowned { @@ -271,42 +244,6 @@ impl DoubleEcho { } impl DoubleEcho { - pub(crate) fn handle_broadcast(&mut self, cert: Certificate, need_gossip: bool) { - if !self.known_certificates.contains(&cert.id) { - let span = warn_span!( - "Broadcast", - peer_id = self.local_peer_id, - certificate_id = cert.id.to_string() - ); - DOUBLE_ECHO_BROADCAST_CREATED_TOTAL.inc(); - span.in_scope(|| { - warn!("Broadcast registered for {}", cert.id); - self.span_tracker.insert(cert.id, span.clone()); - CERTIFICATE_RECEIVED_TOTAL.inc(); - - if need_gossip { - CERTIFICATE_RECEIVED_FROM_API_TOTAL.inc(); - } else { - CERTIFICATE_RECEIVED_FROM_GOSSIP_TOTAL.inc(); - } - }); - - self.known_certificates.insert(cert.id); - span.in_scope(|| { - debug!("DoubleEchoCommand::Broadcast certificate_id: {}", cert.id); - if self.buffer.len() < *constant::TOPOS_DOUBLE_ECHO_MAX_BUFFER_SIZE { - self.buffer.push_back((need_gossip, cert)); - DOUBLE_ECHO_CURRENT_BUFFER_SIZE.inc(); - } else { - DOUBLE_ECHO_BUFFER_CAPACITY_TOTAL.inc(); - // Adding one to the pending_certificate_count because we - // can't buffer it right now - _ = self.pending_certificate_count.checked_add(1); - } - }); - } - } - pub(crate) async fn handle_echo(&mut self, from_peer: PeerId, certificate_id: CertificateId) { if self.delivered_certificates.get(&certificate_id).is_none() { let _ = self diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index 07274701a..f7de74113 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -27,7 +27,6 @@ pub struct TaskManager { pub running_tasks: FuturesUnordered< Pin + Send + 'static>>, >, - pub known_certificates: HashSet, pub buffered_messages: HashMap>, pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, @@ -47,7 +46,6 @@ impl TaskManager { task_completion_sender, tasks: HashMap::new(), running_tasks: FuturesUnordered::new(), - known_certificates: Default::default(), buffered_messages: Default::default(), thresholds, shutdown_sender, @@ -72,27 +70,45 @@ impl TaskManager { }; } DoubleEchoCommand::Broadcast { ref cert, .. } => { - let task = match self.tasks.entry(cert.id) { + match self.tasks.entry(cert.id) { std::collections::hash_map::Entry::Vacant(entry) => { + let span = warn_span!( + "Broadcast", + peer_id = self.local_peer_id, + certificate_id = cert.id.to_string() + ); + DOUBLE_ECHO_BROADCAST_CREATED_TOTAL.inc(); + span.in_scope(|| { + warn!("Broadcast registered for {}", cert.id); + self.span_tracker.insert(cert.id, span.clone()); + CERTIFICATE_RECEIVED_TOTAL.inc(); + + if need_gossip { + CERTIFICATE_RECEIVED_FROM_API_TOTAL.inc(); + } else { + CERTIFICATE_RECEIVED_FROM_GOSSIP_TOTAL.inc(); + } + }); + let (task, task_context) = Task::new(cert.id, self.thresholds.clone()); self.running_tasks.push(task.into_future()); entry.insert(task_context) } - std::collections::hash_map::Entry::Occupied(entry) => entry.into_mut(), - }; - - _ = task.sink.send(msg).await; + std::collections::hash_map::Entry::Occupied(entry) => {}, + } } } } + Some((id, status)) = self.running_tasks.next() => { if status == TaskStatus::Success { self.tasks.remove(&certificate_id); let _ = self.task_completion_sender.send((id, status)).await; } } + _ = shutdown_receiver.recv() => { warn!("Task Manager shutting down"); @@ -104,6 +120,14 @@ impl TaskManager { break; } } + + for (certificate_id, messages) in self.buffered_messages.drain() { + if let Some(task) = self.tasks.get_mut(&certificate_id) { + for msg in messages { + _ = task.sink.send(msg).await; + } + } + } } } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 7abd0a623..635973828 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -5,7 +5,8 @@ use tokio::sync::mpsc; use topos_core::uci::CertificateId; use tracing::warn; -use crate::{broadcast_state::BroadcastState, DoubleEchoCommand}; +use crate::double_echo::broadcast_state::BroadcastState; +use crate::DoubleEchoCommand; use tce_transport::ReliableBroadcastParams; #[derive(Debug, PartialEq, Eq)] From 07f8678e0839cca2efb66819947b3b31524fca8b Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 13:44:10 +0200 Subject: [PATCH 08/23] chore: move event_sender to taskmanager --- crates/topos-tce-broadcast/src/lib.rs | 3 ++- .../src/task_manager_futures/mod.rs | 20 ++++++++++++++++--- .../src/task_manager_futures/task.rs | 3 ++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index 874d7d807..ac730a1b4 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -21,7 +21,7 @@ use topos_core::uci::{Certificate, CertificateId}; use topos_metrics::DOUBLE_ECHO_COMMAND_CHANNEL_CAPACITY_TOTAL; use topos_p2p::PeerId; use topos_tce_storage::StorageClient; -use tracing::{debug, error, info}; +use tracing::{debug, error, event, info}; pub use topos_core::uci; @@ -126,6 +126,7 @@ impl ReliableBroadcastClient { let (task_manager, shutdown_receiver) = TaskManager::new( task_manager_message_receiver, task_completion_sender, + event_sender.clone(), config.tce_params, ); diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index f7de74113..297ca3155 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -7,7 +7,7 @@ use std::pin::Pin; use tokio::sync::mpsc; use tracing::warn; -use tce_transport::ReliableBroadcastParams; +use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; use topos_core::uci::CertificateId; pub mod task; @@ -22,6 +22,7 @@ use topos_p2p::PeerId; pub struct TaskManager { pub message_receiver: mpsc::Receiver, pub task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + pub event_sender: mpsc::Sender, pub tasks: HashMap, #[allow(clippy::type_complexity)] pub running_tasks: FuturesUnordered< @@ -36,6 +37,7 @@ impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + event_sender: mpsc::Sender, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); @@ -44,6 +46,7 @@ impl TaskManager { Self { message_receiver, task_completion_sender, + event_sender, tasks: HashMap::new(), running_tasks: FuturesUnordered::new(), buffered_messages: Default::default(), @@ -90,7 +93,17 @@ impl TaskManager { } }); - let (task, task_context) = Task::new(cert.id, self.thresholds.clone()); + let broadcast_state = BroadcastState::new( + certificate, + self.params.echo_threshold, + self.params.ready_threshold, + self.params.delivery_threshold, + self.event_sender.clone(), + subscriptions, + origin, + ); + + let (task, task_context) = Task::new(cert.id, self.thresholds.clone(), broadcast_state); self.running_tasks.push(task.into_future()); @@ -121,10 +134,11 @@ impl TaskManager { } } - for (certificate_id, messages) in self.buffered_messages.drain() { + for (certificate_id, messages) in self.buffered_messages { if let Some(task) = self.tasks.get_mut(&certificate_id) { for msg in messages { _ = task.sink.send(msg).await; + self.buffered_messages.remove(&certificate_id); } } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 635973828..ec34ae520 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -34,6 +34,7 @@ impl Task { pub fn new( certificate_id: CertificateId, thresholds: ReliableBroadcastParams, + broadcast_state: BroadcastState, ) -> (Task, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(10_024); let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); @@ -47,7 +48,7 @@ impl Task { message_receiver, certificate_id, thresholds, - broadcast_state: BroadcastState::new(), + broadcast_state, shutdown_receiver, }; From 5d7ad6085e46252cef23052a075c586e4e3e2802 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 17:21:49 +0200 Subject: [PATCH 09/23] chore: add workflow to futures task manager, wip --- .../src/double_echo/mod.rs | 33 +++++----- crates/topos-tce-broadcast/src/lib.rs | 16 +++-- .../src/task_manager_futures/mod.rs | 66 ++++++++----------- .../src/task_manager_futures/task.rs | 8 +-- 4 files changed, 56 insertions(+), 67 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index a431f9920..b68dae3a3 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -1,19 +1,13 @@ -use crate::constant; use crate::task_manager_futures::task::TaskStatus; use crate::{DoubleEchoCommand, SubscriptionsView}; use std::collections::HashSet; use std::collections::{HashMap, VecDeque}; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::{broadcast, mpsc, oneshot}; use topos_core::uci::{Certificate, CertificateId}; -use topos_metrics::{ - CERTIFICATE_RECEIVED_FROM_API_TOTAL, CERTIFICATE_RECEIVED_FROM_GOSSIP_TOTAL, - CERTIFICATE_RECEIVED_TOTAL, DOUBLE_ECHO_BROADCAST_CREATED_TOTAL, - DOUBLE_ECHO_BUFFERED_MESSAGE_COUNT, DOUBLE_ECHO_BUFFER_CAPACITY_TOTAL, - DOUBLE_ECHO_CURRENT_BUFFER_SIZE, -}; + use topos_p2p::PeerId; -use tracing::{debug, error, info, warn, warn_span, Span}; +use tracing::{error, info, warn, Span}; use self::broadcast_state::BroadcastState; @@ -23,7 +17,7 @@ pub struct DoubleEcho { /// Channel to receive commands command_receiver: mpsc::Receiver, /// Channel to receive subscriptions updates - subscriptions_view_receiver: mpsc::Receiver, + subscriptions_view_receiver: broadcast::Receiver, /// Channel to send events event_sender: mpsc::Sender, @@ -39,6 +33,8 @@ pub struct DoubleEcho { /// delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, + pub(crate) params: ReliableBroadcastParams, + task_manager_message_sender: mpsc::Sender, task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, @@ -59,16 +55,18 @@ impl DoubleEcho { #[allow(clippy::too_many_arguments)] pub fn new( + params: ReliableBroadcastParams, task_manager_message_sender: mpsc::Sender, task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, command_receiver: mpsc::Receiver, - subscriptions_view_receiver: mpsc::Receiver, + subscriptions_view_receiver: broadcast::Receiver, event_sender: mpsc::Sender, shutdown: mpsc::Receiver>, local_peer_id: String, pending_certificate_count: u64, ) -> Self { Self { + params, task_manager_message_sender, task_completion_receiver, pending_certificate_count, @@ -105,31 +103,34 @@ impl DoubleEcho { Some(command) = self.command_receiver.recv() => { match command { - DoubleEchoCommand::Broadcast { need_gossip, cert } => self.task_manager_message_sender.send(command).await, + DoubleEchoCommand::Broadcast { need_gossip, ref cert } => self.task_manager_message_sender.send(command).await, - command if self.subscriptions.is_some() => { + command if self.subscriptions.is_some() => Ok({ match command { DoubleEchoCommand::Echo { from_peer, certificate_id } => self.handle_echo(from_peer, certificate_id).await, DoubleEchoCommand::Ready { from_peer, certificate_id } => self.handle_ready(from_peer, certificate_id).await, _ => {} } - } + }), command => { warn!("Received a command {command:?} while not having a complete sampling"); + Ok(()) } } } - Some((certificate_id, status)) = task_completion_receiver.recv() => { + Some((certificate_id, status)) = self.task_completion_receiver.recv() => { if status == TaskStatus::Success { self.delivered_certificates.insert(certificate_id); } + Ok(()) } - Some(new_subscriptions_view) = self.subscriptions_view_receiver.recv() => { + Ok(new_subscriptions_view) = self.subscriptions_view_receiver.recv() => { info!("Starting to use the new operational set of samples: {:?}", &new_subscriptions_view); self.subscriptions = new_subscriptions_view; + Ok(()) } else => { diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index ac730a1b4..512ef3978 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -12,7 +12,7 @@ use tokio_stream::wrappers::ReceiverStream; use futures::Stream; use tokio::sync::mpsc::Sender; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::{broadcast, mpsc, oneshot}; use double_echo::DoubleEcho; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; @@ -21,7 +21,7 @@ use topos_core::uci::{Certificate, CertificateId}; use topos_metrics::DOUBLE_ECHO_COMMAND_CHANNEL_CAPACITY_TOTAL; use topos_p2p::PeerId; use topos_tce_storage::StorageClient; -use tracing::{debug, error, event, info}; +use tracing::{debug, error, info}; pub use topos_core::uci; @@ -74,7 +74,7 @@ pub enum SamplerCommand { ForceResample, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum DoubleEchoCommand { /// Entry point for new certificate to submit as initial sender Broadcast { @@ -99,7 +99,7 @@ pub enum DoubleEchoCommand { #[derive(Clone, Debug)] pub struct ReliableBroadcastClient { command_sender: mpsc::Sender, - pub(crate) subscriptions_view_sender: mpsc::Sender, + pub(crate) subscriptions_view_sender: broadcast::Sender, pub(crate) double_echo_shutdown_channel: mpsc::Sender>, } @@ -114,7 +114,7 @@ impl ReliableBroadcastClient { storage: StorageClient, ) -> (Self, impl Stream) { let (subscriptions_view_sender, subscriptions_view_receiver) = - mpsc::channel::(2048); + broadcast::channel::(2048); let (event_sender, event_receiver) = mpsc::channel(2048); let (command_sender, command_receiver) = mpsc::channel(*constant::COMMAND_CHANNEL_SIZE); let (double_echo_shutdown_channel, double_echo_shutdown_receiver) = @@ -126,8 +126,9 @@ impl ReliableBroadcastClient { let (task_manager, shutdown_receiver) = TaskManager::new( task_manager_message_receiver, task_completion_sender, + subscriptions_view_sender.subscribe(), event_sender.clone(), - config.tce_params, + config.tce_params.clone(), ); spawn(task_manager.run(shutdown_receiver)); @@ -139,6 +140,7 @@ impl ReliableBroadcastClient { .unwrap_or(0) as u64; let double_echo = DoubleEcho::new( + config.tce_params, task_manager_message_sender, task_completion_receiver, command_receiver, @@ -169,7 +171,7 @@ impl ReliableBroadcastClient { ready: set.clone(), network_size: set.len(), }) - .await + .map(|_| ()) .map_err(|_| ()) } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index 297ca3155..9ccb91761 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -1,20 +1,20 @@ use futures::stream::FuturesUnordered; use futures::Future; use futures::StreamExt; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::future::IntoFuture; use std::pin::Pin; -use tokio::sync::mpsc; -use tracing::warn; - use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; +use tokio::sync::{broadcast, mpsc}; use topos_core::uci::CertificateId; +use tracing::warn; pub mod task; +use crate::double_echo::broadcast_state::BroadcastState; +use crate::sampler::SubscriptionsView; use crate::DoubleEchoCommand; use task::{Task, TaskContext, TaskStatus}; -use topos_p2p::PeerId; /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, @@ -22,6 +22,8 @@ use topos_p2p::PeerId; pub struct TaskManager { pub message_receiver: mpsc::Receiver, pub task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + pub subscription_view_receiver: broadcast::Receiver, + pub subscriptions: SubscriptionsView, pub event_sender: mpsc::Sender, pub tasks: HashMap, #[allow(clippy::type_complexity)] @@ -37,6 +39,7 @@ impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + subscription_view_receiver: broadcast::Receiver, event_sender: mpsc::Sender, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { @@ -46,6 +49,8 @@ impl TaskManager { Self { message_receiver, task_completion_sender, + subscription_view_receiver, + subscriptions: SubscriptionsView::default(), event_sender, tasks: HashMap::new(), running_tasks: FuturesUnordered::new(), @@ -63,7 +68,7 @@ impl TaskManager { Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, from_peer } | DoubleEchoCommand::Ready { certificate_id, from_peer } => { - if let task = self.tasks.get(certificate_id) { + if let Some(task) = self.tasks.get(&certificate_id) { _ = task.sink.send(msg).await; } else { self.buffered_messages @@ -72,42 +77,24 @@ impl TaskManager { .push(msg); }; } - DoubleEchoCommand::Broadcast { ref cert, .. } => { + DoubleEchoCommand::Broadcast { ref cert, need_gossip } => { match self.tasks.entry(cert.id) { std::collections::hash_map::Entry::Vacant(entry) => { - let span = warn_span!( - "Broadcast", - peer_id = self.local_peer_id, - certificate_id = cert.id.to_string() - ); - DOUBLE_ECHO_BROADCAST_CREATED_TOTAL.inc(); - span.in_scope(|| { - warn!("Broadcast registered for {}", cert.id); - self.span_tracker.insert(cert.id, span.clone()); - CERTIFICATE_RECEIVED_TOTAL.inc(); - - if need_gossip { - CERTIFICATE_RECEIVED_FROM_API_TOTAL.inc(); - } else { - CERTIFICATE_RECEIVED_FROM_GOSSIP_TOTAL.inc(); - } - }); - let broadcast_state = BroadcastState::new( - certificate, - self.params.echo_threshold, - self.params.ready_threshold, - self.params.delivery_threshold, + cert.clone(), + self.thresholds.echo_threshold, + self.thresholds.ready_threshold, + self.thresholds.delivery_threshold, self.event_sender.clone(), - subscriptions, - origin, + self.subscriptions.clone(), + need_gossip, ); - let (task, task_context) = Task::new(cert.id, self.thresholds.clone(), broadcast_state); + let (task, task_context) = Task::new(cert.id, broadcast_state); self.running_tasks.push(task.into_future()); - entry.insert(task_context) + entry.insert(task_context); } std::collections::hash_map::Entry::Occupied(entry) => {}, } @@ -115,10 +102,14 @@ impl TaskManager { } } - Some((id, status)) = self.running_tasks.next() => { + Ok(new_subscriptions_view) = self.subscription_view_receiver.recv() => { + self.subscriptions = new_subscriptions_view; + } + + Some((certificate_id, status)) = self.running_tasks.next() => { if status == TaskStatus::Success { self.tasks.remove(&certificate_id); - let _ = self.task_completion_sender.send((id, status)).await; + let _ = self.task_completion_sender.send((certificate_id, status)).await; } } @@ -134,11 +125,10 @@ impl TaskManager { } } - for (certificate_id, messages) in self.buffered_messages { + for (certificate_id, messages) in &mut self.buffered_messages { if let Some(task) = self.tasks.get_mut(&certificate_id) { for msg in messages { - _ = task.sink.send(msg).await; - self.buffered_messages.remove(&certificate_id); + _ = task.sink.send(msg.clone()).await; } } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index ec34ae520..3d883e573 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -7,7 +7,6 @@ use tracing::warn; use crate::double_echo::broadcast_state::BroadcastState; use crate::DoubleEchoCommand; -use tce_transport::ReliableBroadcastParams; #[derive(Debug, PartialEq, Eq)] pub enum TaskStatus { @@ -25,7 +24,6 @@ pub struct TaskContext { pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, - pub thresholds: ReliableBroadcastParams, pub broadcast_state: BroadcastState, pub shutdown_receiver: mpsc::Receiver<()>, } @@ -33,7 +31,6 @@ pub struct Task { impl Task { pub fn new( certificate_id: CertificateId, - thresholds: ReliableBroadcastParams, broadcast_state: BroadcastState, ) -> (Task, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(10_024); @@ -47,7 +44,6 @@ impl Task { let task = Task { message_receiver, certificate_id, - thresholds, broadcast_state, shutdown_receiver, }; @@ -68,10 +64,10 @@ impl IntoFuture for Task { Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, from_peer } => { - self.broadcast_state.apply_echo(from_peer) + self.broadcast_state.apply_echo(from_peer); } DoubleEchoCommand::Ready { certificate_id, from_peer } => { - self.broadcast_state.apply_ready(from_peer) + self.broadcast_state.apply_ready(from_peer); } DoubleEchoCommand::Broadcast { cert, .. } => { return (cert.id, TaskStatus::Success); From 23f854fa1ff15688b383bf8c2ceb866d42da04f6 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 19:39:26 +0200 Subject: [PATCH 10/23] fix: adjust task returns --- .../src/task_manager_futures/task.rs | 12 ++++++++---- .../src/tests/task_manager_futures.rs | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 3d883e573..94e578466 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -5,7 +5,7 @@ use tokio::sync::mpsc; use topos_core::uci::CertificateId; use tracing::warn; -use crate::double_echo::broadcast_state::BroadcastState; +use crate::double_echo::broadcast_state::{BroadcastState, Status}; use crate::DoubleEchoCommand; #[derive(Debug, PartialEq, Eq)] @@ -64,13 +64,17 @@ impl IntoFuture for Task { Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, from_peer } => { - self.broadcast_state.apply_echo(from_peer); + if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_echo(from_peer) { + return (self.certificate_id, TaskStatus::Success); + } } DoubleEchoCommand::Ready { certificate_id, from_peer } => { - self.broadcast_state.apply_ready(from_peer); + if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_ready(from_peer) { + return (self.certificate_id, TaskStatus::Success); + } } DoubleEchoCommand::Broadcast { cert, .. } => { - return (cert.id, TaskStatus::Success); + // return (cert.id, TaskStatus::Success); } } diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs index 1b1526d16..82d9c5aeb 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs @@ -1,8 +1,10 @@ +use crate::sampler::SubscriptionsView; use crate::task_manager_futures::TaskManager; use crate::{CertificateId, DoubleEchoCommand}; use rand::Rng; use rstest::*; use tce_transport::ReliableBroadcastParams; +use tokio::sync::broadcast; use tokio::{spawn, sync::mpsc}; use topos_p2p::PeerId; @@ -13,6 +15,8 @@ async fn task_manager_futures_receiving_messages() { let (message_sender, message_receiver) = mpsc::channel(1024); let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(1024); + let (subscription_view_sender, mut subscription_view_receiver) = broadcast::channel(1024); + let (event_sender, mut event_receiver) = mpsc::channel(1024); let thresholds = ReliableBroadcastParams { echo_threshold: n, @@ -20,8 +24,17 @@ async fn task_manager_futures_receiving_messages() { delivery_threshold: n, }; - let (task_manager, shutdown_receiver) = - TaskManager::new(message_receiver, task_completion_sender, thresholds); + let (task_manager, shutdown_receiver) = TaskManager::new( + message_receiver, + task_completion_sender, + subscription_view_receiver, + event_sender, + thresholds, + ); + + let subscription_view = SubscriptionsView::default(); + + subscription_view_sender.send(subscription_view); spawn(task_manager.run(shutdown_receiver)); From e370f3a898ff5e15e7d326d8387bd8b6c3975000 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 20:05:41 +0200 Subject: [PATCH 11/23] fix: tests, wip --- crates/topos-tce-broadcast/src/tests/mod.rs | 9 ++++++--- .../src/tests/task_manager_futures.rs | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index e930ad3d5..8a20a3ad0 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -53,21 +53,25 @@ struct TceParams { struct Context { event_receiver: Receiver, - subscriptions_view_sender: Sender, + subscriptions_view_sender: broadcast::Sender, cmd_sender: Sender, double_echo_shutdown_sender: Sender>, } fn create_context(params: TceParams) -> (DoubleEcho, Context) { - let (subscriptions_view_sender, subscriptions_view_receiver) = mpsc::channel(CHANNEL_SIZE); + let (subscriptions_view_sender, subscriptions_view_receiver) = broadcast::channel(CHANNEL_SIZE); let (cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); let (event_sender, event_receiver) = mpsc::channel(CHANNEL_SIZE); let (double_echo_shutdown_sender, double_echo_shutdown_receiver) = mpsc::channel::>(1); + let (task_manager_message_sender, _task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); + let (_task_completion_sender, task_completion_receiver) = mpsc::channel(CHANNEL_SIZE); let mut double_echo = DoubleEcho::new( params.broadcast_params, + task_manager_message_sender, + task_completion_receiver, cmd_receiver, subscriptions_view_receiver, event_sender, @@ -272,7 +276,6 @@ async fn buffering_certificate(#[case] params: TceParams) { ctx.subscriptions_view_sender .send(subscriptions.clone()) - .await .expect("Cannot send expected view"); let mut received_gossip_commands: Vec = Vec::new(); diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs index 82d9c5aeb..fcc1f9f9a 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs @@ -15,8 +15,8 @@ async fn task_manager_futures_receiving_messages() { let (message_sender, message_receiver) = mpsc::channel(1024); let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(1024); - let (subscription_view_sender, mut subscription_view_receiver) = broadcast::channel(1024); - let (event_sender, mut event_receiver) = mpsc::channel(1024); + let (subscription_view_sender, subscription_view_receiver) = broadcast::channel(1024); + let (event_sender, _event_receiver) = mpsc::channel(1024); let thresholds = ReliableBroadcastParams { echo_threshold: n, From 88550ea96dc1f23451b6c884e375a3be581fe2f7 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Mon, 24 Jul 2023 20:19:51 +0200 Subject: [PATCH 12/23] fix: test errors, wip --- crates/topos-tce-broadcast/src/tests/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 8a20a3ad0..359cffa86 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -116,7 +116,7 @@ fn reach_echo_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.consume_echo(p, &cert.id); + double_echo.handle_echo(p, cert.id); } } @@ -130,7 +130,7 @@ fn reach_ready_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.consume_ready(p, &cert.id); + double_echo.handle_ready(p, cert.id); } } @@ -144,7 +144,7 @@ fn reach_delivery_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.consume_ready(p, &cert.id); + double_echo.handle_ready(p, cert.id); } } From 6668fab51e8633ff7a438133ad7716f045adaa23 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 09:19:42 +0200 Subject: [PATCH 13/23] fix: debugging tests --- .../src/double_echo/broadcast_state.rs | 1 + .../src/double_echo/mod.rs | 1 + .../src/task_manager_futures/mod.rs | 1 + .../src/task_manager_futures/task.rs | 5 ++ crates/topos-tce-broadcast/src/tests/mod.rs | 56 +++++++++++++------ 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs index 62079a68f..d2335dd54 100644 --- a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs +++ b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs @@ -13,6 +13,7 @@ mod status; pub use status::Status; +#[derive(Debug)] pub struct BroadcastState { subscriptions_view: SubscriptionsView, status: Status, diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index b68dae3a3..0fe250ad7 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -259,6 +259,7 @@ impl DoubleEcho { pub(crate) async fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { if self.delivered_certificates.get(&certificate_id).is_none() { + println!("SEND READY"); let _ = self .task_manager_message_sender .send(DoubleEchoCommand::Ready { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index 9ccb91761..da2ebe453 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -66,6 +66,7 @@ impl TaskManager { loop { tokio::select! { Some(msg) = self.message_receiver.recv() => { + println!("RECEIVE MESSAGE: {msg:?}"); match msg { DoubleEchoCommand::Echo { certificate_id, from_peer } | DoubleEchoCommand::Ready { certificate_id, from_peer } => { if let Some(task) = self.tasks.get(&certificate_id) { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 94e578466..3e77cbdbd 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -16,11 +16,13 @@ pub enum TaskStatus { Failure, } +#[derive(Debug)] pub struct TaskContext { pub sink: mpsc::Sender, pub shutdown_sender: mpsc::Sender<()>, } +#[derive(Debug)] pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, @@ -64,16 +66,19 @@ impl IntoFuture for Task { Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, from_peer } => { + println!("Receive ECHO in task"); if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_echo(from_peer) { return (self.certificate_id, TaskStatus::Success); } } DoubleEchoCommand::Ready { certificate_id, from_peer } => { + println!("Receive READY in task"); if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_ready(from_peer) { return (self.certificate_id, TaskStatus::Success); } } DoubleEchoCommand::Broadcast { cert, .. } => { + println!("Receive BROADCAST in task"); // return (cert.id, TaskStatus::Success); } diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 359cffa86..583df0ba3 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -58,19 +58,29 @@ struct Context { double_echo_shutdown_sender: Sender>, } -fn create_context(params: TceParams) -> (DoubleEcho, Context) { +async fn create_context(params: TceParams) -> (DoubleEcho, Context) { let (subscriptions_view_sender, subscriptions_view_receiver) = broadcast::channel(CHANNEL_SIZE); let (cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); let (event_sender, event_receiver) = mpsc::channel(CHANNEL_SIZE); let (double_echo_shutdown_sender, double_echo_shutdown_receiver) = mpsc::channel::>(1); - let (task_manager_message_sender, _task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); - let (_task_completion_sender, task_completion_receiver) = mpsc::channel(CHANNEL_SIZE); + let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); + let (task_completion_sender, task_completion_receiver) = mpsc::channel(CHANNEL_SIZE); + + let (task_manager, shutdown_receiver) = TaskManager::new( + task_manager_message_receiver, + task_completion_sender, + subscriptions_view_sender.subscribe(), + event_sender.clone(), + params.broadcast_params.clone(), + ); + + spawn(task_manager.run(shutdown_receiver)); let mut double_echo = DoubleEcho::new( params.broadcast_params, - task_manager_message_sender, + task_manager_message_sender.clone(), task_completion_receiver, cmd_receiver, subscriptions_view_receiver, @@ -94,6 +104,20 @@ fn create_context(params: TceParams) -> (DoubleEcho, Context) { double_echo.subscriptions.ready = peers.clone(); double_echo.subscriptions.network_size = params.nb_peers; + let _ = subscriptions_view_sender.send(SubscriptionsView { + echo: peers.clone(), + ready: peers.clone(), + network_size: params.nb_peers, + }); + + task_manager_message_sender + .send(DoubleEchoCommand::Broadcast { + need_gossip: true, + cert: Certificate::default(), + }) + .await + .expect("Cannot send broadcast command"); + ( double_echo, Context { @@ -106,7 +130,7 @@ fn create_context(params: TceParams) -> (DoubleEcho, Context) { ) } -fn reach_echo_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { +async fn reach_echo_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { let selected = double_echo .subscriptions .echo @@ -116,11 +140,11 @@ fn reach_echo_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.handle_echo(p, cert.id); + double_echo.handle_echo(p, cert.id).await; } } -fn reach_ready_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { +async fn reach_ready_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { let selected = double_echo .subscriptions .ready @@ -130,11 +154,11 @@ fn reach_ready_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.handle_ready(p, cert.id); + double_echo.handle_ready(p, cert.id).await; } } -fn reach_delivery_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { +async fn reach_delivery_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { let selected = double_echo .subscriptions .ready @@ -144,7 +168,7 @@ fn reach_delivery_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { .collect::>(); for p in selected { - double_echo.handle_ready(p, cert.id); + double_echo.handle_ready(p, cert.id).await; } } @@ -154,7 +178,7 @@ fn reach_delivery_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { #[tokio::test] #[trace] async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) { - let (mut double_echo, mut ctx) = create_context(params); + let (mut double_echo, mut ctx) = create_context(params).await; let dummy_cert = Certificate::new( PREV_CERTIFICATE_ID, @@ -190,7 +214,7 @@ async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) )); // Trigger Ready upon reaching the Echo threshold - reach_echo_threshold(&mut double_echo, &dummy_cert); + reach_echo_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( ctx.event_receiver.try_recv(), @@ -198,7 +222,7 @@ async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) )); // Trigger Delivery upon reaching the Delivery threshold - reach_delivery_threshold(&mut double_echo, &dummy_cert); + reach_delivery_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( ctx.event_receiver.try_recv(), @@ -212,7 +236,7 @@ async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) #[tokio::test] #[trace] async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { - let (mut double_echo, mut ctx) = create_context(params); + let (mut double_echo, mut ctx) = create_context(params).await; let dummy_cert = Certificate::new( PREV_CERTIFICATE_ID, @@ -244,7 +268,7 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { )); // Trigger Ready upon reaching the Ready threshold - reach_ready_threshold(&mut double_echo, &dummy_cert); + reach_ready_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( ctx.event_receiver.try_recv(), @@ -256,7 +280,7 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { #[case::small_config(small_config())] #[tokio::test] async fn buffering_certificate(#[case] params: TceParams) { - let (double_echo, mut ctx) = create_context(params); + let (double_echo, mut ctx) = create_context(params).await; let subscriptions = double_echo.subscriptions.clone(); From a1d9b7402de0f401da3af4768af50848548e33cb Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 09:45:09 +0200 Subject: [PATCH 14/23] fix: add hint to restructure the broadcast, wip --- .../src/double_echo/mod.rs | 2 + crates/topos-tce-broadcast/src/tests/mod.rs | 98 +++++++++---------- 2 files changed, 51 insertions(+), 49 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index 0fe250ad7..f74e2231b 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -153,6 +153,8 @@ impl DoubleEcho { /// Called to process potentially new certificate: /// - either submitted from API ( [tce_transport::TceCommands::Broadcast] command) /// - or received through the gossip (first step of protocol exchange) + ///TODO: This is not really broadcasting, merley adding the certificate into a known list. + ///TODO: Rename or restructure this function pub(crate) fn broadcast(&mut self, cert: Certificate, origin: bool) { info!("🙌 Starting broadcasting the Certificate {}", &cert.id); diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 583df0ba3..fa65daddc 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -276,52 +276,52 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { )); } -#[rstest] -#[case::small_config(small_config())] -#[tokio::test] -async fn buffering_certificate(#[case] params: TceParams) { - let (double_echo, mut ctx) = create_context(params).await; - - let subscriptions = double_echo.subscriptions.clone(); - - spawn(double_echo.run()); - - // Wait to receive subscribers - tokio::time::sleep(WAIT_EVENT_TIMEOUT).await; - - let le_cert = Certificate::default(); - ctx.cmd_sender - .send(DoubleEchoCommand::Broadcast { - need_gossip: true, - cert: le_cert.clone(), - }) - .await - .expect("Cannot send broadcast command"); - - ctx.subscriptions_view_sender - .send(subscriptions.clone()) - .expect("Cannot send expected view"); - - let mut received_gossip_commands: Vec = Vec::new(); - let assertion = async { - while let Some(event) = ctx.event_receiver.recv().await { - if let ProtocolEvents::Gossip { cert, .. } = event { - received_gossip_commands.push(cert); - } - } - }; - - let _ = tokio::time::timeout(Duration::from_secs(1), assertion).await; - - assert_eq!(received_gossip_commands.len(), 1); - assert_eq!(received_gossip_commands[0].id, le_cert.id); - - // Test shutdown - info!("Waiting for double echo to shutdown..."); - let (sender, receiver) = oneshot::channel(); - ctx.double_echo_shutdown_sender - .send(sender) - .await - .expect("Valid shutdown signal sending"); - assert_eq!(receiver.await, Ok(())); -} +// #[rstest] +// #[case::small_config(small_config())] +// #[tokio::test] +// async fn buffering_certificate(#[case] params: TceParams) { +// let (double_echo, mut ctx) = create_context(params).await; +// +// let subscriptions = double_echo.subscriptions.clone(); +// +// spawn(double_echo.run()); +// +// // Wait to receive subscribers +// tokio::time::sleep(WAIT_EVENT_TIMEOUT).await; +// +// let le_cert = Certificate::default(); +// ctx.cmd_sender +// .send(DoubleEchoCommand::Broadcast { +// need_gossip: true, +// cert: le_cert.clone(), +// }) +// .await +// .expect("Cannot send broadcast command"); +// +// ctx.subscriptions_view_sender +// .send(subscriptions.clone()) +// .expect("Cannot send expected view"); +// +// let mut received_gossip_commands: Vec = Vec::new(); +// let assertion = async { +// while let Some(event) = ctx.event_receiver.recv().await { +// if let ProtocolEvents::Gossip { cert, .. } = event { +// received_gossip_commands.push(cert); +// } +// } +// }; +// +// let _ = tokio::time::timeout(Duration::from_secs(1), assertion).await; +// +// assert_eq!(received_gossip_commands.len(), 1); +// assert_eq!(received_gossip_commands[0].id, le_cert.id); +// +// // Test shutdown +// info!("Waiting for double echo to shutdown..."); +// let (sender, receiver) = oneshot::channel(); +// ctx.double_echo_shutdown_sender +// .send(sender) +// .await +// .expect("Valid shutdown signal sending"); +// assert_eq!(receiver.await, Ok(())); +// } From 757b3c18cf7e55615d0bfd8bca6ff37703782487 Mon Sep 17 00:00:00 2001 From: Simon Paitrault Date: Tue, 25 Jul 2023 12:50:58 +0200 Subject: [PATCH 15/23] chore: refactor task_manager tests Signed-off-by: Simon Paitrault --- .../src/double_echo/mod.rs | 128 ++++++++++-------- crates/topos-tce-broadcast/src/lib.rs | 29 ++-- .../src/task_manager_channels/task.rs | 9 +- .../src/task_manager_futures/mod.rs | 16 ++- .../src/task_manager_futures/task.rs | 10 +- crates/topos-tce-broadcast/src/tests/mod.rs | 51 +++---- .../src/tests/task_manager_futures.rs | 4 +- 7 files changed, 114 insertions(+), 133 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index f74e2231b..52f0ed74f 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -1,35 +1,25 @@ -use crate::task_manager_futures::task::TaskStatus; +use crate::TaskStatus; use crate::{DoubleEchoCommand, SubscriptionsView}; use std::collections::HashSet; use std::collections::{HashMap, VecDeque}; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; -use tokio::sync::{broadcast, mpsc, oneshot}; +use tokio::sync::{mpsc, oneshot}; use topos_core::uci::{Certificate, CertificateId}; use topos_p2p::PeerId; use tracing::{error, info, warn, Span}; -use self::broadcast_state::BroadcastState; - pub mod broadcast_state; pub struct DoubleEcho { /// Channel to receive commands command_receiver: mpsc::Receiver, - /// Channel to receive subscriptions updates - subscriptions_view_receiver: broadcast::Receiver, /// Channel to send events event_sender: mpsc::Sender, /// Channel to receive shutdown signal pub(crate) shutdown: mpsc::Receiver>, - /// pending certificates state - pending_certificate_count: u64, - - /// buffer of certificates to process - buffer: VecDeque<(bool, Certificate)>, - /// delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, @@ -37,11 +27,6 @@ pub struct DoubleEcho { task_manager_message_sender: mpsc::Sender, - task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, - - /// Current certificates being processed - cert_candidate: HashMap, - /// Span tracker for each certificate span_tracker: HashMap, @@ -57,32 +42,52 @@ impl DoubleEcho { pub fn new( params: ReliableBroadcastParams, task_manager_message_sender: mpsc::Sender, - task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, command_receiver: mpsc::Receiver, - subscriptions_view_receiver: broadcast::Receiver, event_sender: mpsc::Sender, shutdown: mpsc::Receiver>, local_peer_id: String, - pending_certificate_count: u64, + _pending_certificate_count: u64, ) -> Self { Self { params, task_manager_message_sender, - task_completion_receiver, - pending_certificate_count, command_receiver, - subscriptions_view_receiver, event_sender, - cert_candidate: Default::default(), span_tracker: Default::default(), subscriptions: SubscriptionsView::default(), - buffer: VecDeque::new(), shutdown, local_peer_id, delivered_certificates: Default::default(), } } + #[cfg(not(feature = "task-manager-channels"))] + pub(crate) fn spawn_task_manager( + &mut self, + subscriptions_view_receiver: mpsc::Receiver, + task_manager_message_receiver: mpsc::Receiver, + ) -> mpsc::Receiver<(CertificateId, TaskStatus)> { + let (task_completion_sender, task_completion_receiver) = mpsc::channel(2048); + + let (task_manager, shutdown_receiver) = crate::task_manager_futures::TaskManager::new( + task_manager_message_receiver, + task_completion_sender, + subscriptions_view_receiver, + self.event_sender.clone(), + self.params.clone(), + ); + + tokio::spawn(task_manager.run(shutdown_receiver)); + + task_completion_receiver + } + #[cfg(feature = "task-manager-channels")] + pub(crate) fn spawn_task_manager( + &mut self, + subscriptions_view_receiver: mpsc::Receiver, + ) { + } + /// DoubleEcho main loop /// - Listen for shutdown signal /// - Read new messages from command_receiver @@ -90,7 +95,14 @@ impl DoubleEcho { /// - If a new subscription view is received, update the subscriptions /// - If a new Echo/Ready is received, update the state of the certificate or buffer /// the message - pub(crate) async fn run(mut self) { + pub(crate) async fn run( + mut self, + subscriptions_view_receiver: mpsc::Receiver, + task_manager_message_receiver: mpsc::Receiver, + ) { + let mut task_completion = + self.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); + info!("DoubleEcho started"); let shutdowned: Option> = loop { @@ -103,7 +115,7 @@ impl DoubleEcho { Some(command) = self.command_receiver.recv() => { match command { - DoubleEchoCommand::Broadcast { need_gossip, ref cert } => self.task_manager_message_sender.send(command).await, + DoubleEchoCommand::Broadcast { need_gossip, cert } => Ok::<_, ()>(self.broadcast(cert, need_gossip).await), command if self.subscriptions.is_some() => Ok({ match command { @@ -120,18 +132,13 @@ impl DoubleEcho { } } - Some((certificate_id, status)) = self.task_completion_receiver.recv() => { + Some((certificate_id, status)) = task_completion.recv() => { if status == TaskStatus::Success { self.delivered_certificates.insert(certificate_id); } Ok(()) } - Ok(new_subscriptions_view) = self.subscriptions_view_receiver.recv() => { - info!("Starting to use the new operational set of samples: {:?}", &new_subscriptions_view); - self.subscriptions = new_subscriptions_view; - Ok(()) - } else => { warn!("Break the tokio loop for the double echo"); @@ -155,7 +162,7 @@ impl DoubleEcho { /// - or received through the gossip (first step of protocol exchange) ///TODO: This is not really broadcasting, merley adding the certificate into a known list. ///TODO: Rename or restructure this function - pub(crate) fn broadcast(&mut self, cert: Certificate, origin: bool) { + pub(crate) async fn broadcast(&mut self, cert: Certificate, origin: bool) { info!("🙌 Starting broadcasting the Certificate {}", &cert.id); if self.cert_pre_broadcast_check(&cert).is_err() { @@ -167,15 +174,15 @@ impl DoubleEcho { .unwrap(); return; } - // Don't gossip one cert already gossiped - if self.cert_candidate.contains_key(&cert.id) { - self.event_sender - .try_send(ProtocolEvents::BroadcastFailed { - certificate_id: cert.id, - }) - .unwrap(); - return; - } + // // Don't gossip one cert already gossiped + // if self.cert_candidate.contains_key(&cert.id) { + // self.event_sender + // .try_send(ProtocolEvents::BroadcastFailed { + // certificate_id: cert.id, + // }) + // .unwrap(); + // return; + // } if self.delivered_certificates.get(&cert.id).is_some() { self.event_sender @@ -192,9 +199,9 @@ impl DoubleEcho { // To include tracing context in client requests from _this_ app, // use `context` to extract the current OpenTelemetry context. // Add new entry for the new Cert candidate - match self.delivery_state_for_new_cert(cert, origin) { + match self.delivery_state_for_new_cert(cert, origin).await { Some(delivery_state) => { - self.cert_candidate.insert(certificate_id, delivery_state); + // self.cert_candidate.insert(certificate_id, delivery_state); } None => { error!("Ill-formed samples"); @@ -204,11 +211,11 @@ impl DoubleEcho { } /// Build initial delivery state - fn delivery_state_for_new_cert( + async fn delivery_state_for_new_cert( &mut self, certificate: Certificate, origin: bool, - ) -> Option { + ) -> Option { let subscriptions = self.subscriptions.clone(); // Check whether inbound sets are empty @@ -220,15 +227,24 @@ impl DoubleEcho { ); None } else { - Some(BroadcastState::new( - certificate, - self.params.echo_threshold, - self.params.ready_threshold, - self.params.delivery_threshold, - self.event_sender.clone(), - subscriptions, - origin, - )) + _ = self + .task_manager_message_sender + .send(DoubleEchoCommand::Broadcast { + need_gossip: origin, + cert: certificate, + }) + .await; + + Some(true) + // Some(BroadcastState::new( + // certificate, + // self.params.echo_threshold, + // self.params.ready_threshold, + // self.params.delivery_threshold, + // self.event_sender.clone(), + // subscriptions, + // origin, + // )) } } diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index 512ef3978..806205cf3 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -12,7 +12,7 @@ use tokio_stream::wrappers::ReceiverStream; use futures::Stream; use tokio::sync::mpsc::Sender; -use tokio::sync::{broadcast, mpsc, oneshot}; +use tokio::sync::{mpsc, oneshot}; use double_echo::DoubleEcho; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; @@ -44,6 +44,13 @@ mod tests; use crate::sampler::SubscriptionsView; +#[derive(Debug, PartialEq, Eq)] +pub enum TaskStatus { + /// The task finished succesfully and broadcasted the certificate + received ready + Success, + /// The task did not finish succesfully and stopped. + Failure, +} #[cfg(feature = "task-manager-futures")] use crate::task_manager_futures::TaskManager; #[cfg(all( @@ -99,7 +106,7 @@ pub enum DoubleEchoCommand { #[derive(Clone, Debug)] pub struct ReliableBroadcastClient { command_sender: mpsc::Sender, - pub(crate) subscriptions_view_sender: broadcast::Sender, + pub(crate) subscriptions_view_sender: mpsc::Sender, pub(crate) double_echo_shutdown_channel: mpsc::Sender>, } @@ -114,24 +121,13 @@ impl ReliableBroadcastClient { storage: StorageClient, ) -> (Self, impl Stream) { let (subscriptions_view_sender, subscriptions_view_receiver) = - broadcast::channel::(2048); + mpsc::channel::(2048); let (event_sender, event_receiver) = mpsc::channel(2048); let (command_sender, command_receiver) = mpsc::channel(*constant::COMMAND_CHANNEL_SIZE); let (double_echo_shutdown_channel, double_echo_shutdown_receiver) = mpsc::channel::>(1); let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(24_000); - let (task_completion_sender, task_completion_receiver) = mpsc::channel(24_000); - - let (task_manager, shutdown_receiver) = TaskManager::new( - task_manager_message_receiver, - task_completion_sender, - subscriptions_view_sender.subscribe(), - event_sender.clone(), - config.tce_params.clone(), - ); - - spawn(task_manager.run(shutdown_receiver)); let pending_certificate_count = storage .get_pending_certificates() @@ -142,16 +138,14 @@ impl ReliableBroadcastClient { let double_echo = DoubleEcho::new( config.tce_params, task_manager_message_sender, - task_completion_receiver, command_receiver, - subscriptions_view_receiver, event_sender, double_echo_shutdown_receiver, local_peer_id, pending_certificate_count, ); - spawn(double_echo.run()); + spawn(double_echo.run(subscriptions_view_receiver, task_manager_message_receiver)); ( Self { @@ -171,6 +165,7 @@ impl ReliableBroadcastClient { ready: set.clone(), network_size: set.len(), }) + .await .map(|_| ()) .map_err(|_| ()) } diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs index 99f17f83f..a614b6ac0 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs @@ -1,17 +1,10 @@ use tokio::sync::mpsc; use crate::DoubleEchoCommand; +use crate::TaskStatus; use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; -#[derive(Debug, PartialEq, Eq)] -pub enum TaskStatus { - /// The task finished succesfully and broadcasted the certificate + received ready - Success, - /// The task did not finish succesfully and stopped. - Failure, -} - #[derive(Debug, Clone)] pub struct TaskContext { pub message_sender: mpsc::Sender, diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index da2ebe453..fa4985f16 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -14,7 +14,8 @@ pub mod task; use crate::double_echo::broadcast_state::BroadcastState; use crate::sampler::SubscriptionsView; use crate::DoubleEchoCommand; -use task::{Task, TaskContext, TaskStatus}; +use crate::TaskStatus; +use task::{Task, TaskContext}; /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, @@ -22,7 +23,7 @@ use task::{Task, TaskContext, TaskStatus}; pub struct TaskManager { pub message_receiver: mpsc::Receiver, pub task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - pub subscription_view_receiver: broadcast::Receiver, + pub subscription_view_receiver: mpsc::Receiver, pub subscriptions: SubscriptionsView, pub event_sender: mpsc::Sender, pub tasks: HashMap, @@ -39,7 +40,7 @@ impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - subscription_view_receiver: broadcast::Receiver, + subscription_view_receiver: mpsc::Receiver, event_sender: mpsc::Sender, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { @@ -65,6 +66,12 @@ impl TaskManager { pub async fn run(mut self, mut shutdown_receiver: mpsc::Receiver<()>) { loop { tokio::select! { + biased; + + Some(new_subscriptions_view) = self.subscription_view_receiver.recv() => { + println!("Received view"); + self.subscriptions = new_subscriptions_view; + } Some(msg) = self.message_receiver.recv() => { println!("RECEIVE MESSAGE: {msg:?}"); match msg { @@ -103,9 +110,6 @@ impl TaskManager { } } - Ok(new_subscriptions_view) = self.subscription_view_receiver.recv() => { - self.subscriptions = new_subscriptions_view; - } Some((certificate_id, status)) = self.running_tasks.next() => { if status == TaskStatus::Success { diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 3e77cbdbd..24688c65b 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -6,15 +6,7 @@ use topos_core::uci::CertificateId; use tracing::warn; use crate::double_echo::broadcast_state::{BroadcastState, Status}; -use crate::DoubleEchoCommand; - -#[derive(Debug, PartialEq, Eq)] -pub enum TaskStatus { - /// The task finished succesfully and broadcasted the certificate + received ready - Success, - /// The task did not finish succesfully and stopped. - Failure, -} +use crate::{DoubleEchoCommand, TaskStatus}; #[derive(Debug)] pub struct TaskContext { diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index fa65daddc..81f342ef7 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -53,37 +53,24 @@ struct TceParams { struct Context { event_receiver: Receiver, - subscriptions_view_sender: broadcast::Sender, + subscriptions_view_sender: mpsc::Sender, cmd_sender: Sender, double_echo_shutdown_sender: Sender>, } async fn create_context(params: TceParams) -> (DoubleEcho, Context) { - let (subscriptions_view_sender, subscriptions_view_receiver) = broadcast::channel(CHANNEL_SIZE); + let (subscriptions_view_sender, subscriptions_view_receiver) = mpsc::channel(CHANNEL_SIZE); let (cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); let (event_sender, event_receiver) = mpsc::channel(CHANNEL_SIZE); let (double_echo_shutdown_sender, double_echo_shutdown_receiver) = mpsc::channel::>(1); let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); - let (task_completion_sender, task_completion_receiver) = mpsc::channel(CHANNEL_SIZE); - - let (task_manager, shutdown_receiver) = TaskManager::new( - task_manager_message_receiver, - task_completion_sender, - subscriptions_view_sender.subscribe(), - event_sender.clone(), - params.broadcast_params.clone(), - ); - - spawn(task_manager.run(shutdown_receiver)); let mut double_echo = DoubleEcho::new( params.broadcast_params, task_manager_message_sender.clone(), - task_completion_receiver, cmd_receiver, - subscriptions_view_receiver, event_sender, double_echo_shutdown_receiver, String::new(), @@ -104,25 +91,19 @@ async fn create_context(params: TceParams) -> (DoubleEcho, Context) { double_echo.subscriptions.ready = peers.clone(); double_echo.subscriptions.network_size = params.nb_peers; - let _ = subscriptions_view_sender.send(SubscriptionsView { + let msg = SubscriptionsView { echo: peers.clone(), ready: peers.clone(), network_size: params.nb_peers, - }); + }; + let _ = subscriptions_view_sender.send(msg).await.unwrap(); - task_manager_message_sender - .send(DoubleEchoCommand::Broadcast { - need_gossip: true, - cert: Certificate::default(), - }) - .await - .expect("Cannot send broadcast command"); + double_echo.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); ( double_echo, Context { event_receiver, - // subscribers_update_sender, subscriptions_view_sender, cmd_sender, double_echo_shutdown_sender, @@ -192,11 +173,11 @@ async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) .expect("Dummy certificate"); // Trigger Echo upon dispatching - double_echo.broadcast(dummy_cert.clone(), true); + double_echo.broadcast(dummy_cert.clone(), true).await; assert!(matches!( - ctx.event_receiver.try_recv(), - Ok(ProtocolEvents::Broadcast { certificate_id }) if certificate_id == dummy_cert.id + ctx.event_receiver.recv().await, + Some(ProtocolEvents::Broadcast { certificate_id }) if certificate_id == dummy_cert.id )); assert!(matches!( @@ -217,16 +198,16 @@ async fn trigger_success_path_upon_reaching_threshold(#[case] params: TceParams) reach_echo_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( - ctx.event_receiver.try_recv(), - Ok(ProtocolEvents::Ready { .. }) + ctx.event_receiver.recv().await, + Some(ProtocolEvents::Ready { .. }) )); // Trigger Delivery upon reaching the Delivery threshold reach_delivery_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( - ctx.event_receiver.try_recv(), - Ok(ProtocolEvents::CertificateDelivered { certificate }) if certificate == dummy_cert + ctx.event_receiver.recv().await, + Some(ProtocolEvents::CertificateDelivered { certificate }) if certificate == dummy_cert )); } @@ -250,7 +231,7 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { .expect("Dummy certificate"); // Trigger Echo upon dispatching - double_echo.broadcast(dummy_cert.clone(), true); + double_echo.broadcast(dummy_cert.clone(), true).await; assert!(matches!( ctx.event_receiver.try_recv(), @@ -271,8 +252,8 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { reach_ready_threshold(&mut double_echo, &dummy_cert).await; assert!(matches!( - ctx.event_receiver.try_recv(), - Ok(ProtocolEvents::Ready { .. }) + ctx.event_receiver.recv().await, + Some(ProtocolEvents::Ready { .. }) )); } diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs index fcc1f9f9a..75a8cbfc9 100644 --- a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs +++ b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs @@ -15,7 +15,7 @@ async fn task_manager_futures_receiving_messages() { let (message_sender, message_receiver) = mpsc::channel(1024); let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(1024); - let (subscription_view_sender, subscription_view_receiver) = broadcast::channel(1024); + let (subscription_view_sender, subscription_view_receiver) = mpsc::channel(1024); let (event_sender, _event_receiver) = mpsc::channel(1024); let thresholds = ReliableBroadcastParams { @@ -34,7 +34,7 @@ async fn task_manager_futures_receiving_messages() { let subscription_view = SubscriptionsView::default(); - subscription_view_sender.send(subscription_view); + subscription_view_sender.send(subscription_view).await; spawn(task_manager.run(shutdown_receiver)); From 4a8bbb7f72c645077c633191de7e840b6debf935 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 14:53:34 +0200 Subject: [PATCH 16/23] fix: pr review, cleaning up tests and clippy messages --- crates/topos-tce-broadcast/Cargo.toml | 1 - .../benches/double_echo.rs | 4 +- .../src/double_echo/mod.rs | 62 +++----------- crates/topos-tce-broadcast/src/lib.rs | 17 +--- .../src/task_manager_futures/mod.rs | 11 ++- .../src/task_manager_futures/task.rs | 12 +-- crates/topos-tce-broadcast/src/tests/mod.rs | 82 ++----------------- .../src/tests/task_manager_channels.rs | 60 -------------- .../src/tests/task_manager_futures.rs | 72 ---------------- crates/topos/Cargo.toml | 5 +- crates/topos/build.rs | 7 -- 11 files changed, 32 insertions(+), 301 deletions(-) delete mode 100644 crates/topos-tce-broadcast/src/tests/task_manager_channels.rs delete mode 100644 crates/topos-tce-broadcast/src/tests/task_manager_futures.rs diff --git a/crates/topos-tce-broadcast/Cargo.toml b/crates/topos-tce-broadcast/Cargo.toml index 1b6ee040e..539a5f8a6 100644 --- a/crates/topos-tce-broadcast/Cargo.toml +++ b/crates/topos-tce-broadcast/Cargo.toml @@ -33,7 +33,6 @@ topos-test-sdk = { path = "../topos-test-sdk/" } [features] task-manager-channels = [] -task-manager-futures = [] [[bench]] name = "double_echo" diff --git a/crates/topos-tce-broadcast/benches/double_echo.rs b/crates/topos-tce-broadcast/benches/double_echo.rs index e6873a859..cf56479f5 100644 --- a/crates/topos-tce-broadcast/benches/double_echo.rs +++ b/crates/topos-tce-broadcast/benches/double_echo.rs @@ -3,7 +3,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; #[cfg(feature = "task-manager-channels")] mod task_manager_channels; -#[cfg(feature = "task-manager-futures")] +#[cfg(not(feature = "task-manager-channels"))] mod task_manager_futures; pub fn criterion_benchmark(c: &mut Criterion) { @@ -22,7 +22,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { }) }); - #[cfg(feature = "task-manager-futures")] + #[cfg(not(feature = "task-manager-channels"))] c.bench_function("double_echo with futures", |b| { b.to_async(FuturesExecutor).iter(|| async { runtime.block_on(async { diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index 52f0ed74f..25d9bbcf8 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -1,13 +1,12 @@ use crate::TaskStatus; use crate::{DoubleEchoCommand, SubscriptionsView}; use std::collections::HashSet; -use std::collections::{HashMap, VecDeque}; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; use tokio::sync::{mpsc, oneshot}; use topos_core::uci::{Certificate, CertificateId}; use topos_p2p::PeerId; -use tracing::{error, info, warn, Span}; +use tracing::{error, info, warn}; pub mod broadcast_state; @@ -27,12 +26,7 @@ pub struct DoubleEcho { task_manager_message_sender: mpsc::Sender, - /// Span tracker for each certificate - span_tracker: HashMap, - pub(crate) subscriptions: SubscriptionsView, // My subscriptions for echo, ready and delivery feedback - - local_peer_id: String, } impl DoubleEcho { @@ -45,7 +39,6 @@ impl DoubleEcho { command_receiver: mpsc::Receiver, event_sender: mpsc::Sender, shutdown: mpsc::Receiver>, - local_peer_id: String, _pending_certificate_count: u64, ) -> Self { Self { @@ -53,10 +46,8 @@ impl DoubleEcho { task_manager_message_sender, command_receiver, event_sender, - span_tracker: Default::default(), subscriptions: SubscriptionsView::default(), shutdown, - local_peer_id, delivered_certificates: Default::default(), } } @@ -81,6 +72,7 @@ impl DoubleEcho { task_completion_receiver } + #[cfg(feature = "task-manager-channels")] pub(crate) fn spawn_task_manager( &mut self, @@ -115,19 +107,18 @@ impl DoubleEcho { Some(command) = self.command_receiver.recv() => { match command { - DoubleEchoCommand::Broadcast { need_gossip, cert } => Ok::<_, ()>(self.broadcast(cert, need_gossip).await), + DoubleEchoCommand::Broadcast { need_gossip, cert } => self.broadcast(cert, need_gossip).await, - command if self.subscriptions.is_some() => Ok({ + command if self.subscriptions.is_some() => { match command { DoubleEchoCommand::Echo { from_peer, certificate_id } => self.handle_echo(from_peer, certificate_id).await, DoubleEchoCommand::Ready { from_peer, certificate_id } => self.handle_ready(from_peer, certificate_id).await, _ => {} } - }), + }, command => { warn!("Received a command {command:?} while not having a complete sampling"); - Ok(()) } } } @@ -136,7 +127,6 @@ impl DoubleEcho { if status == TaskStatus::Success { self.delivered_certificates.insert(certificate_id); } - Ok(()) } @@ -160,11 +150,8 @@ impl DoubleEcho { /// Called to process potentially new certificate: /// - either submitted from API ( [tce_transport::TceCommands::Broadcast] command) /// - or received through the gossip (first step of protocol exchange) - ///TODO: This is not really broadcasting, merley adding the certificate into a known list. - ///TODO: Rename or restructure this function pub(crate) async fn broadcast(&mut self, cert: Certificate, origin: bool) { info!("🙌 Starting broadcasting the Certificate {}", &cert.id); - if self.cert_pre_broadcast_check(&cert).is_err() { error!("Failure on the pre-check for the Certificate {}", &cert.id); self.event_sender @@ -174,15 +161,6 @@ impl DoubleEcho { .unwrap(); return; } - // // Don't gossip one cert already gossiped - // if self.cert_candidate.contains_key(&cert.id) { - // self.event_sender - // .try_send(ProtocolEvents::BroadcastFailed { - // certificate_id: cert.id, - // }) - // .unwrap(); - // return; - // } if self.delivered_certificates.get(&cert.id).is_some() { self.event_sender @@ -194,19 +172,13 @@ impl DoubleEcho { return; } - // Trigger event of new certificate candidate for delivery - let certificate_id = cert.id; - // To include tracing context in client requests from _this_ app, - // use `context` to extract the current OpenTelemetry context. - // Add new entry for the new Cert candidate - match self.delivery_state_for_new_cert(cert, origin).await { - Some(delivery_state) => { - // self.cert_candidate.insert(certificate_id, delivery_state); - } - None => { - error!("Ill-formed samples"); - _ = self.event_sender.try_send(ProtocolEvents::Die); - } + if self + .delivery_state_for_new_cert(cert, origin) + .await + .is_none() + { + error!("Ill-formed samples"); + _ = self.event_sender.try_send(ProtocolEvents::Die); } } @@ -236,15 +208,6 @@ impl DoubleEcho { .await; Some(true) - // Some(BroadcastState::new( - // certificate, - // self.params.echo_threshold, - // self.params.ready_threshold, - // self.params.delivery_threshold, - // self.event_sender.clone(), - // subscriptions, - // origin, - // )) } } @@ -277,7 +240,6 @@ impl DoubleEcho { pub(crate) async fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { if self.delivered_certificates.get(&certificate_id).is_none() { - println!("SEND READY"); let _ = self .task_manager_message_sender .send(DoubleEchoCommand::Ready { diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index 806205cf3..0787e5d16 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -31,12 +31,9 @@ mod constant; pub mod double_echo; pub mod sampler; -#[cfg(all( - feature = "task-manager-channels", - not(feature = "task-manager-futures") -))] +#[cfg(feature = "task-manager-channels")] pub mod task_manager_channels; -#[cfg(feature = "task-manager-futures")] +#[cfg(not(feature = "task-manager-channels"))] pub mod task_manager_futures; #[cfg(test)] @@ -51,13 +48,6 @@ pub enum TaskStatus { /// The task did not finish succesfully and stopped. Failure, } -#[cfg(feature = "task-manager-futures")] -use crate::task_manager_futures::TaskManager; -#[cfg(all( - feature = "task-manager-channels", - not(feature = "task-manager-futures") -))] -use task_manager_channels::TaskManager; /// Configuration of TCE implementation pub struct ReliableBroadcastConfig { @@ -117,7 +107,7 @@ impl ReliableBroadcastClient { /// Aggregate is spawned as new task. pub async fn new( config: ReliableBroadcastConfig, - local_peer_id: String, + _local_peer_id: String, storage: StorageClient, ) -> (Self, impl Stream) { let (subscriptions_view_sender, subscriptions_view_receiver) = @@ -141,7 +131,6 @@ impl ReliableBroadcastClient { command_receiver, event_sender, double_echo_shutdown_receiver, - local_peer_id, pending_certificate_count, ); diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index fa4985f16..d1a95f9d6 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use std::future::IntoFuture; use std::pin::Pin; use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; -use tokio::sync::{broadcast, mpsc}; +use tokio::sync::mpsc; use topos_core::uci::CertificateId; use tracing::warn; @@ -69,14 +69,13 @@ impl TaskManager { biased; Some(new_subscriptions_view) = self.subscription_view_receiver.recv() => { - println!("Received view"); self.subscriptions = new_subscriptions_view; } Some(msg) = self.message_receiver.recv() => { - println!("RECEIVE MESSAGE: {msg:?}"); match msg { - DoubleEchoCommand::Echo { certificate_id, from_peer } | DoubleEchoCommand::Ready { certificate_id, from_peer } => { + DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready { certificate_id, .. } => { if let Some(task) = self.tasks.get(&certificate_id) { + _ = task.sink.send(msg).await; } else { self.buffered_messages @@ -104,7 +103,7 @@ impl TaskManager { entry.insert(task_context); } - std::collections::hash_map::Entry::Occupied(entry) => {}, + std::collections::hash_map::Entry::Occupied(_) => {}, } } } @@ -131,7 +130,7 @@ impl TaskManager { } for (certificate_id, messages) in &mut self.buffered_messages { - if let Some(task) = self.tasks.get_mut(&certificate_id) { + if let Some(task) = self.tasks.get_mut(certificate_id) { for msg in messages { _ = task.sink.send(msg.clone()).await; } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs index 24688c65b..9b01a4ebe 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/task.rs @@ -57,23 +57,17 @@ impl IntoFuture for Task { tokio::select! { Some(msg) = self.message_receiver.recv() => { match msg { - DoubleEchoCommand::Echo { certificate_id, from_peer } => { - println!("Receive ECHO in task"); + DoubleEchoCommand::Echo { from_peer, .. } => { if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_echo(from_peer) { return (self.certificate_id, TaskStatus::Success); } } - DoubleEchoCommand::Ready { certificate_id, from_peer } => { - println!("Receive READY in task"); + DoubleEchoCommand::Ready { from_peer, .. } => { if let Some(Status::DeliveredWithReadySent) = self.broadcast_state.apply_ready(from_peer) { return (self.certificate_id, TaskStatus::Success); } } - DoubleEchoCommand::Broadcast { cert, .. } => { - println!("Receive BROADCAST in task"); - // return (cert.id, TaskStatus::Success); - } - + _ => {} } } _ = self.shutdown_receiver.recv() => { diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 81f342ef7..186988d58 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -1,11 +1,3 @@ -#[cfg(all( - feature = "task-manager-channels", - not(feature = "task-manager-futures") -))] -mod task_manager_channels; -#[cfg(feature = "task-manager-futures")] -mod task_manager_futures; - use crate::double_echo::*; use crate::*; use rstest::*; @@ -14,12 +6,10 @@ use std::usize; use tce_transport::ReliableBroadcastParams; use tokio::sync::mpsc::Receiver; -use tokio::time::Duration; use topos_test_sdk::constants::*; const CHANNEL_SIZE: usize = 10; -const WAIT_EVENT_TIMEOUT: Duration = Duration::from_secs(1); #[fixture] fn small_config() -> TceParams { @@ -53,17 +43,14 @@ struct TceParams { struct Context { event_receiver: Receiver, - subscriptions_view_sender: mpsc::Sender, - cmd_sender: Sender, - double_echo_shutdown_sender: Sender>, } async fn create_context(params: TceParams) -> (DoubleEcho, Context) { let (subscriptions_view_sender, subscriptions_view_receiver) = mpsc::channel(CHANNEL_SIZE); - let (cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); + let (_cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); let (event_sender, event_receiver) = mpsc::channel(CHANNEL_SIZE); - let (double_echo_shutdown_sender, double_echo_shutdown_receiver) = + let (_double_echo_shutdown_sender, double_echo_shutdown_receiver) = mpsc::channel::>(1); let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); @@ -73,7 +60,6 @@ async fn create_context(params: TceParams) -> (DoubleEcho, Context) { cmd_receiver, event_sender, double_echo_shutdown_receiver, - String::new(), 0, ); @@ -100,15 +86,7 @@ async fn create_context(params: TceParams) -> (DoubleEcho, Context) { double_echo.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); - ( - double_echo, - Context { - event_receiver, - subscriptions_view_sender, - cmd_sender, - double_echo_shutdown_sender, - }, - ) + (double_echo, Context { event_receiver }) } async fn reach_echo_threshold(double_echo: &mut DoubleEcho, cert: &Certificate) { @@ -234,8 +212,8 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { double_echo.broadcast(dummy_cert.clone(), true).await; assert!(matches!( - ctx.event_receiver.try_recv(), - Ok(ProtocolEvents::Broadcast { certificate_id }) if certificate_id == dummy_cert.id + ctx.event_receiver.recv().await, + Some(ProtocolEvents::Broadcast { certificate_id }) if certificate_id == dummy_cert.id )); assert!(matches!( @@ -256,53 +234,3 @@ async fn trigger_ready_when_reached_enough_ready(#[case] params: TceParams) { Some(ProtocolEvents::Ready { .. }) )); } - -// #[rstest] -// #[case::small_config(small_config())] -// #[tokio::test] -// async fn buffering_certificate(#[case] params: TceParams) { -// let (double_echo, mut ctx) = create_context(params).await; -// -// let subscriptions = double_echo.subscriptions.clone(); -// -// spawn(double_echo.run()); -// -// // Wait to receive subscribers -// tokio::time::sleep(WAIT_EVENT_TIMEOUT).await; -// -// let le_cert = Certificate::default(); -// ctx.cmd_sender -// .send(DoubleEchoCommand::Broadcast { -// need_gossip: true, -// cert: le_cert.clone(), -// }) -// .await -// .expect("Cannot send broadcast command"); -// -// ctx.subscriptions_view_sender -// .send(subscriptions.clone()) -// .expect("Cannot send expected view"); -// -// let mut received_gossip_commands: Vec = Vec::new(); -// let assertion = async { -// while let Some(event) = ctx.event_receiver.recv().await { -// if let ProtocolEvents::Gossip { cert, .. } = event { -// received_gossip_commands.push(cert); -// } -// } -// }; -// -// let _ = tokio::time::timeout(Duration::from_secs(1), assertion).await; -// -// assert_eq!(received_gossip_commands.len(), 1); -// assert_eq!(received_gossip_commands[0].id, le_cert.id); -// -// // Test shutdown -// info!("Waiting for double echo to shutdown..."); -// let (sender, receiver) = oneshot::channel(); -// ctx.double_echo_shutdown_sender -// .send(sender) -// .await -// .expect("Valid shutdown signal sending"); -// assert_eq!(receiver.await, Ok(())); -// } diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs b/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs deleted file mode 100644 index a108e9527..000000000 --- a/crates/topos-tce-broadcast/src/tests/task_manager_channels.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::task_manager_channels::TaskManager; -use tce_transport::ReliableBroadcastParams; - -use crate::*; -use rand::Rng; -use rstest::*; -use tokio::{spawn, sync::mpsc}; -use topos_p2p::PeerId; - -#[rstest] -#[tokio::test] -async fn task_manager_channels_receiving_messages() { - let n = 5; - - let (message_sender, message_receiver) = mpsc::channel(1024); - let (event_sender, mut event_receiver) = mpsc::channel(1024); - - let threshold = ReliableBroadcastParams { - echo_threshold: n, - ready_threshold: n, - delivery_threshold: n, - }; - - let (task_manager, shutdown_receiver) = - TaskManager::new(message_receiver, event_sender, threshold); - - spawn(task_manager.run(shutdown_receiver)); - - let mut certificates = vec![]; - - let mut rng = rand::thread_rng(); - - for _ in 0..10 { - let mut id = [0u8; 32]; - rng.fill(&mut id); - let cert_id = CertificateId::from_array(id); - certificates.push(cert_id); - } - - for certificate_id in certificates { - for _ in 0..n { - let echo = DoubleEchoCommand::Echo { - from_peer: PeerId::random(), - certificate_id, - }; - - message_sender.send(echo).await.unwrap(); - } - } - - let mut count = 0; - - while let Some((_, _)) = event_receiver.recv().await { - count += 1; - - if count == n { - break; - } - } -} diff --git a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs b/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs deleted file mode 100644 index 75a8cbfc9..000000000 --- a/crates/topos-tce-broadcast/src/tests/task_manager_futures.rs +++ /dev/null @@ -1,72 +0,0 @@ -use crate::sampler::SubscriptionsView; -use crate::task_manager_futures::TaskManager; -use crate::{CertificateId, DoubleEchoCommand}; -use rand::Rng; -use rstest::*; -use tce_transport::ReliableBroadcastParams; -use tokio::sync::broadcast; -use tokio::{spawn, sync::mpsc}; -use topos_p2p::PeerId; - -#[rstest] -#[tokio::test] -async fn task_manager_futures_receiving_messages() { - let n = 5; - - let (message_sender, message_receiver) = mpsc::channel(1024); - let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(1024); - let (subscription_view_sender, subscription_view_receiver) = mpsc::channel(1024); - let (event_sender, _event_receiver) = mpsc::channel(1024); - - let thresholds = ReliableBroadcastParams { - echo_threshold: n, - ready_threshold: n, - delivery_threshold: n, - }; - - let (task_manager, shutdown_receiver) = TaskManager::new( - message_receiver, - task_completion_sender, - subscription_view_receiver, - event_sender, - thresholds, - ); - - let subscription_view = SubscriptionsView::default(); - - subscription_view_sender.send(subscription_view).await; - - spawn(task_manager.run(shutdown_receiver)); - - let mut certificates = vec![]; - - let mut rng = rand::thread_rng(); - - for _ in 0..10 { - let mut id = [0u8; 32]; - rng.fill(&mut id); - let cert_id = CertificateId::from_array(id); - certificates.push(cert_id); - } - - for certificate_id in certificates { - for _ in 0..n { - let echo = DoubleEchoCommand::Echo { - from_peer: PeerId::random(), - certificate_id, - }; - - message_sender.send(echo).await.unwrap(); - } - } - - let mut count = 0; - - while let Some((_, _)) = task_completion_receiver.recv().await { - count += 1; - - if count == n { - break; - } - } -} diff --git a/crates/topos/Cargo.toml b/crates/topos/Cargo.toml index 871430cce..4de7d0d52 100644 --- a/crates/topos/Cargo.toml +++ b/crates/topos/Cargo.toml @@ -61,11 +61,10 @@ rstest = { workspace = true, features = ["async-timeout"] } regex = "1" [features] -default = ["tce", "sequencer", "network", "node", "setup", "subnet", "topos-tce-broadcast/task-manager-futures"] +default = ["tce", "sequencer", "network", "node", "setup", "subnet"] tce = ["topos-tce", "topos-tce-transport"] sequencer = ["topos-sequencer"] network = ["topos-certificate-spammer"] node = ["tce", "sequencer"] setup = [] -subnet = [] -task-manager-channels = ["tce", "sequencer", "network", "node", "setup", "subnet", "topos-tce-broadcast/task-manager-channels"] \ No newline at end of file +subnet = [] \ No newline at end of file diff --git a/crates/topos/build.rs b/crates/topos/build.rs index 3b0bec9c2..980a55a55 100644 --- a/crates/topos/build.rs +++ b/crates/topos/build.rs @@ -20,11 +20,4 @@ fn main() { println!("cargo:rustc-env=TOPOS_VERSION={topos_version}"); } - - let using_futures = cfg!(feature = "task-manager-futures"); - let using_channels = cfg!(feature = "task-manager-channels"); - - if using_futures && using_channels { - panic!("The features 'task-manager-futures' and 'task-manager-channels' are mutually exclusive."); - } } From da16ac780b89eec18edc30e35febd6c85ee1ca16 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 15:34:09 +0200 Subject: [PATCH 17/23] chore: add task-manager-channels to double-echo --- .../src/double_echo/mod.rs | 16 +- .../src/task_manager_channels/mod.rs | 95 +-- .../src/task_manager_channels/task.rs | 59 +- .../src/task_manager_futures/mod.rs | 7 +- crates/topos/tests/cert_delivery.rs | 610 +++++++++--------- 5 files changed, 414 insertions(+), 373 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index 25d9bbcf8..742ef39d8 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -77,7 +77,21 @@ impl DoubleEcho { pub(crate) fn spawn_task_manager( &mut self, subscriptions_view_receiver: mpsc::Receiver, - ) { + task_manager_message_receiver: mpsc::Receiver, + ) -> mpsc::Receiver<(CertificateId, TaskStatus)> { + let (task_completion_sender, task_completion_receiver) = mpsc::channel(2048); + + let (task_manager, shutdown_receiver) = crate::task_manager_channels::TaskManager::new( + task_manager_message_receiver, + task_completion_sender, + subscriptions_view_receiver, + self.event_sender.clone(), + self.params.clone(), + ); + + tokio::spawn(task_manager.run(shutdown_receiver)); + + task_completion_receiver } /// DoubleEcho main loop diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs index a18888210..01ca7ab63 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs @@ -1,13 +1,16 @@ use std::collections::HashMap; use tokio::{spawn, sync::mpsc}; -use tce_transport::ReliableBroadcastParams; +use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; use topos_core::uci::CertificateId; +use tracing::warn; pub mod task; - +use crate::double_echo::broadcast_state::BroadcastState; +use crate::sampler::SubscriptionsView; use crate::DoubleEchoCommand; -use task::{Task, TaskContext, TaskStatus}; +use crate::TaskStatus; +use task::{Task, TaskContext}; /// The TaskManager is responsible for receiving messages from the network and distributing them /// among tasks. These tasks are either created if none for a certain CertificateID exists yet, @@ -16,8 +19,12 @@ pub struct TaskManager { pub message_receiver: mpsc::Receiver, pub task_completion_receiver: mpsc::Receiver<(CertificateId, TaskStatus)>, pub task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - pub event_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + pub notify_task_completion: mpsc::Sender<(CertificateId, TaskStatus)>, + pub subscription_view_receiver: mpsc::Receiver, + pub subscriptions: SubscriptionsView, + pub event_sender: mpsc::Sender, pub tasks: HashMap, + pub buffered_messages: HashMap>, pub thresholds: ReliableBroadcastParams, pub shutdown_sender: mpsc::Sender<()>, } @@ -25,7 +32,9 @@ pub struct TaskManager { impl TaskManager { pub fn new( message_receiver: mpsc::Receiver, - event_sender: mpsc::Sender<(CertificateId, TaskStatus)>, + notify_task_completion: mpsc::Sender<(CertificateId, TaskStatus)>, + subscription_view_receiver: mpsc::Receiver, + event_sender: mpsc::Sender, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); @@ -36,8 +45,12 @@ impl TaskManager { message_receiver, task_completion_receiver, task_completion_sender, + notify_task_completion, + subscription_view_receiver, + subscriptions: SubscriptionsView::default(), event_sender, tasks: HashMap::new(), + buffered_messages: Default::default(), thresholds, shutdown_sender, }, @@ -48,20 +61,41 @@ impl TaskManager { pub async fn run(mut self, mut shutdown_receiver: mpsc::Receiver<()>) { loop { tokio::select! { + biased; + + Some(new_subscriptions_view) = self.subscription_view_receiver.recv() => { + self.subscriptions = new_subscriptions_view; + } + Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready{ certificate_id, .. } => { - let task_context = match self.tasks.get(&certificate_id) { - Some(task_context) => task_context.to_owned(), - None => self.create_and_spawn_new_task(certificate_id, self.task_completion_sender.clone()), - }; - - _ = task_context.message_sender.send(msg).await; + if let Some(task_context) = self.tasks.get(&certificate_id) { + _ = task_context.sink.send(msg).await; + } else { + self.buffered_messages.entry(certificate_id).or_default().push(msg); + } } - DoubleEchoCommand::Broadcast { ref cert, .. } => { - if self.tasks.get(&cert.id).is_none() { - let task_context = self.create_and_spawn_new_task(cert.id, self.task_completion_sender.clone()); - _ = task_context.message_sender.send(msg).await; + DoubleEchoCommand::Broadcast { ref cert, need_gossip } => { + match self.tasks.entry(cert.id) { + std::collections::hash_map::Entry::Vacant(entry) => { + let broadcast_state = BroadcastState::new( + cert.clone(), + self.thresholds.echo_threshold, + self.thresholds.ready_threshold, + self.thresholds.delivery_threshold, + self.event_sender.clone(), + self.subscriptions.clone(), + need_gossip, + ); + + let (task, task_context) = Task::new(cert.id, self.task_completion_sender.clone(), broadcast_state); + + spawn(task.run()); + + entry.insert(task_context); + } + std::collections::hash_map::Entry::Occupied(_) => {}, } } } @@ -69,13 +103,12 @@ impl TaskManager { Some((certificate_id, status)) = self.task_completion_receiver.recv() => { self.tasks.remove(&certificate_id); - let _ = self.event_sender.send((certificate_id, status)).await; + let _ = self.notify_task_completion.send((certificate_id, status)).await; } _ = shutdown_receiver.recv() => { - println!("Task Manager shutting down"); + warn!("Task Manager shutting down"); - // Shutting down every open task for task in self.tasks.iter() { task.1.shutdown_sender.send(()).await.unwrap(); } @@ -83,25 +116,15 @@ impl TaskManager { break; } } - } - } - fn create_and_spawn_new_task( - &mut self, - certificate_id: CertificateId, - task_completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - ) -> TaskContext { - let (task, context) = Task::new( - certificate_id, - task_completion_sender, - self.thresholds.clone(), - ); - - spawn(task.run()); - - self.tasks.insert(certificate_id, context.clone()); - - context + for (certificate_id, messages) in &mut self.buffered_messages { + if let Some(task) = self.tasks.get_mut(certificate_id) { + for msg in messages { + _ = task.sink.send(msg.clone()).await; + } + } + } + } } } diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs index a614b6ac0..540042555 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/task.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/task.rs @@ -1,13 +1,13 @@ use tokio::sync::mpsc; +use crate::double_echo::broadcast_state::{BroadcastState, Status}; use crate::DoubleEchoCommand; use crate::TaskStatus; -use tce_transport::ReliableBroadcastParams; use topos_core::uci::CertificateId; #[derive(Debug, Clone)] pub struct TaskContext { - pub message_sender: mpsc::Sender, + pub sink: mpsc::Sender, pub shutdown_sender: mpsc::Sender<()>, } @@ -15,7 +15,7 @@ pub struct Task { pub message_receiver: mpsc::Receiver, pub certificate_id: CertificateId, pub completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - pub thresholds: ReliableBroadcastParams, + pub broadcast_state: BroadcastState, pub shutdown_receiver: mpsc::Receiver<()>, } @@ -23,13 +23,13 @@ impl Task { pub fn new( certificate_id: CertificateId, completion_sender: mpsc::Sender<(CertificateId, TaskStatus)>, - thresholds: ReliableBroadcastParams, + broadcast_state: BroadcastState, ) -> (Self, TaskContext) { let (message_sender, message_receiver) = mpsc::channel(1024); let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); let task_context = TaskContext { - message_sender, + sink: message_sender, shutdown_sender, }; @@ -37,38 +37,43 @@ impl Task { message_receiver, certificate_id, completion_sender, - thresholds, + broadcast_state, shutdown_receiver, }; (task, task_context) } - async fn handle_msg(&mut self, msg: DoubleEchoCommand) -> (CertificateId, TaskStatus) { - match msg { - DoubleEchoCommand::Echo { certificate_id, .. } => { - let _ = self - .completion_sender - .send((certificate_id, TaskStatus::Success)) - .await; - - return (certificate_id, TaskStatus::Success); - } - DoubleEchoCommand::Ready { certificate_id, .. } => { - return (certificate_id, TaskStatus::Success) - } - DoubleEchoCommand::Broadcast { cert, .. } => return (cert.id, TaskStatus::Success), - } - } - pub(crate) async fn run(mut self) { loop { tokio::select! { - msg = self.message_receiver.recv() => { - if let Some(msg) = msg { - if let (_, TaskStatus::Success) = self.handle_msg(msg).await { - break; + Some(msg) = self.message_receiver.recv() => { + match msg { + DoubleEchoCommand::Echo { from_peer, .. } => { + if let Some(Status::DeliveredWithReadySent) = + self.broadcast_state.apply_echo(from_peer) + { + let _ = self + .completion_sender + .send((self.certificate_id, TaskStatus::Success)) + .await; + + break; + } + } + DoubleEchoCommand::Ready { from_peer, .. } => { + if let Some(Status::DeliveredWithReadySent) = + self.broadcast_state.apply_ready(from_peer) + { + let _ = self + .completion_sender + .send((self.certificate_id, TaskStatus::Success)) + .await; + + break; + } } + _ => {} } } diff --git a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs index d1a95f9d6..f7376bfa9 100644 --- a/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_futures/mod.rs @@ -71,12 +71,12 @@ impl TaskManager { Some(new_subscriptions_view) = self.subscription_view_receiver.recv() => { self.subscriptions = new_subscriptions_view; } + Some(msg) = self.message_receiver.recv() => { match msg { DoubleEchoCommand::Echo { certificate_id, .. } | DoubleEchoCommand::Ready { certificate_id, .. } => { - if let Some(task) = self.tasks.get(&certificate_id) { - - _ = task.sink.send(msg).await; + if let Some(task_context) = self.tasks.get(&certificate_id) { + _ = task_context.sink.send(msg).await; } else { self.buffered_messages .entry(certificate_id) @@ -120,7 +120,6 @@ impl TaskManager { _ = shutdown_receiver.recv() => { warn!("Task Manager shutting down"); - // Shutting down every open task for task in self.tasks.iter() { task.1.shutdown_sender.send(()).await.unwrap(); } diff --git a/crates/topos/tests/cert_delivery.rs b/crates/topos/tests/cert_delivery.rs index a2b1ce504..863274d74 100644 --- a/crates/topos/tests/cert_delivery.rs +++ b/crates/topos/tests/cert_delivery.rs @@ -1,305 +1,305 @@ -use futures::{future::join_all, StreamExt}; -use libp2p::PeerId; -use rand::seq::IteratorRandom; -use rstest::*; -use std::collections::{HashMap, HashSet}; -use std::time::Duration; -use test_log::test; -use tokio::spawn; -use tokio::sync::mpsc; -use topos_core::{ - api::grpc::{ - shared::v1::checkpoints::TargetCheckpoint, - tce::v1::{ - watch_certificates_request::OpenStream, - watch_certificates_response::{CertificatePushed, Event}, - StatusRequest, SubmitCertificateRequest, - }, - }, - uci::{Certificate, SubnetId, SUBNET_ID_LENGTH}, -}; -use topos_test_sdk::{certificates::create_certificate_chains, tce::create_network}; -use tracing::{debug, info, warn}; - -const NUMBER_OF_SUBNETS_PER_CLIENT: usize = 1; - -fn get_subset_of_subnets(subnets: &[SubnetId], subset_size: usize) -> Vec { - let mut rng = rand::thread_rng(); - Vec::from_iter( - subnets - .iter() - .cloned() - .choose_multiple(&mut rng, subset_size), - ) -} - -#[rstest] -#[test(tokio::test)] -#[timeout(Duration::from_secs(5))] -async fn start_a_cluster() { - let mut peers_context = create_network(5).await; - - let mut status: Vec = Vec::new(); - - for (_peer_id, client) in peers_context.iter_mut() { - let response = client - .console_grpc_client - .status(StatusRequest {}) - .await - .expect("Can't get status"); - - status.push(response.into_inner().has_active_sample); - } - - assert!(status.iter().all(|s| *s)); -} - -#[rstest] -#[tokio::test] -#[timeout(Duration::from_secs(30))] -// FIXME: This test is flaky, it fails sometimes because of gRPC connection error (StreamClosed) -async fn cert_delivery() { - let subscriber = tracing_subscriber::FmtSubscriber::builder() - .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) - .with_test_writer() - .finish(); - let _ = tracing::subscriber::set_global_default(subscriber); - - let peer_number = 5; - let number_of_certificates_per_subnet = 2; - let number_of_subnets = 3; - - let all_subnets: Vec = (1..=number_of_subnets) - .map(|v| [v as u8; SUBNET_ID_LENGTH].into()) - .collect(); - - // Generate certificates with respect to parameters - let mut subnet_certificates = - create_certificate_chains(all_subnets.as_ref(), number_of_certificates_per_subnet); - debug!( - "Generated certificates for distribution per subnet: {:#?}", - &subnet_certificates - ); - - // Calculate expected final set of delivered certificates (every subnet should receive certificates that has cross - // chain transaction targeting it) - let mut expected_certificates: HashMap> = HashMap::new(); - for certificates in subnet_certificates.values() { - for cert in certificates { - for target_subnet in &cert.target_subnets { - expected_certificates - .entry(*target_subnet) - .or_insert_with(HashSet::new) - .insert(cert.clone()); - } - } - } - - warn!("Starting the cluster..."); - // List of peers (tce nodes) with their context - let mut peers_context = create_network(peer_number).await; - - warn!("Cluster started, starting clients..."); - // Connected tce clients are passing received certificates to this mpsc::Receiver, collect all of them - let mut clients_delivered_certificates: Vec> = - Vec::new(); // (Peer Id, Subnet Id, Certificate) - let mut client_tasks: Vec> = Vec::new(); // Clients connected to TCE API Service run in async tasks - - let mut assign_at_least_one_client_to_every_subnet = all_subnets.clone(); - for (peer_id, ctx) in peers_context.iter_mut() { - let peer_id = *peer_id; - // Make sure that every subnet is represented (connected through client) to at least 1 peer - // After that assign subnets randomly to clients, 1 subnet per connection to TCE - // as it is assumed that NUMBER_OF_SUBNETS_PER_CLIENT is 1 - that is also realistic case, topos node representing one subnet - let client_subnet_id: SubnetId = if assign_at_least_one_client_to_every_subnet.is_empty() { - get_subset_of_subnets(&all_subnets, NUMBER_OF_SUBNETS_PER_CLIENT).remove(0) - } else { - assign_at_least_one_client_to_every_subnet.pop().unwrap() - }; - - // Number of subnets one client is representing, normally 1 - ctx.connected_subnets = Some(vec![client_subnet_id]); - debug!( - "Opening client for peer id: {} with subnet_ids: {:?}", - &peer_id, &client_subnet_id, - ); - - // (Peer id, Subnet Id, Certificate) - let (tx, rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( - number_of_certificates_per_subnet * number_of_subnets, - ); - clients_delivered_certificates.push(rx); - - let in_stream_subnet_id = client_subnet_id; - let in_stream = async_stream::stream! { - yield OpenStream { - target_checkpoint: Some(TargetCheckpoint{ - target_subnet_ids: vec![in_stream_subnet_id.into()], - positions: Vec::new() - }), - source_checkpoint: None - }.into(); - }; - - // Number of certificates expected to receive for every subnet, - // to know when to close the TCE clients (and finish test) - let mut incoming_certificates_number = - expected_certificates.get(&client_subnet_id).unwrap().len(); - // Open client connection to TCE service in separate async tasks - let mut client = ctx.api_grpc_client.clone(); - let expected_certificate_debug: Vec<_> = expected_certificates - .get(&client_subnet_id) - .unwrap() - .iter() - .map(|c| c.id) - .collect(); - - let response = client.watch_certificates(in_stream).await.unwrap(); - - let client_task = spawn(async move { - debug!( - "Spawning client task for peer: {} waiting for {} certificates: {:?}", - peer_id, incoming_certificates_number, expected_certificate_debug - ); - - let mut resp_stream = response.into_inner(); - while let Some(received) = resp_stream.next().await { - let received = received.unwrap(); - if let Some(Event::CertificatePushed(CertificatePushed { - certificate: Some(certificate), - .. - })) = received.event - { - debug!( - "Client peer_id: {} certificate id: {} delivered to subnet id {}, ", - &peer_id, - certificate.id.clone().unwrap(), - &client_subnet_id - ); - tx.send((peer_id, client_subnet_id, certificate.try_into().unwrap())) - .await - .unwrap(); - incoming_certificates_number -= 1; - if incoming_certificates_number == 0 { - // We have received all expected certificates for this subnet, end client - break; - } - } - } - debug!("Finishing client for peer_id: {}", &peer_id); - }); - client_tasks.push(client_task); - } - - info!( - "Waiting for expected delivered certificates {:?}", - expected_certificates - ); - // Delivery tasks collect certificates that clients of every TCE node - // are receiving to reduce them to one channel (delivery_rx) - let mut delivery_tasks = Vec::new(); - // delivery_tx/delivery_rx Pass certificates from delivery tasks of every client to final collection of delivered certificates - let (delivery_tx, mut delivery_rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( - peer_number * number_of_certificates_per_subnet * number_of_subnets, - ); - for (index, mut client_delivered_certificates) in - clients_delivered_certificates.into_iter().enumerate() - { - let delivery_tx = delivery_tx.clone(); - let delivery_task = tokio::spawn(async move { - // Read certificates that every client has received - info!("Delivery task for receiver {}", index); - loop { - let x = client_delivered_certificates.recv().await; - - match x { - Some((peer_id, target_subnet_id, cert)) => { - info!( - "Delivered certificate on peer_Id: {} cert id: {} from source subnet id: {} to target subnet id {}", - &peer_id, cert.id, cert.source_subnet_id, target_subnet_id - ); - // Send certificates from every peer to one delivery_rx receiver - delivery_tx - .send((peer_id, target_subnet_id, cert)) - .await - .unwrap(); - } - _ => break, - } - } - // We will end this loop when sending TCE client has dropped channel sender and there - // are not certificates in channel - info!("End delivery task for receiver {}", index); - }); - delivery_tasks.push(delivery_task); - } - drop(delivery_tx); - - // Broadcast multiple certificates from all subnets - info!("Broadcasting certificates..."); - for (peer_id, client) in peers_context.iter_mut() { - // If there exist of connected subnets to particular TCE - if let Some(ref connected_subnets) = client.connected_subnets { - // Iterate all subnets connected to TCE (normally 1) - for subnet_id in connected_subnets { - if let Some(certificates) = subnet_certificates.get_mut(subnet_id) { - // Iterate all certificates meant to be sent to the particular network - for cert in certificates.iter() { - info!( - "Sending certificate id={} from subnet id: {} to peer id: {}", - &cert.id, &subnet_id, &peer_id - ); - let _ = client - .api_grpc_client - .submit_certificate(SubmitCertificateRequest { - certificate: Some(cert.clone().into()), - }) - .await - .expect("Can't send certificate"); - } - // Remove sent certificate, every certificate is sent only once to TCE network - certificates.clear(); - } - } - } - } - let assertion = async move { - info!("Waiting for all delivery tasks"); - join_all(delivery_tasks).await; - info!("All expected clients delivered"); - let mut delivered_certificates: HashMap>> = - HashMap::new(); - // Collect all certificates per peer_id and subnet_id - while let Some((peer_id, receiving_subnet_id, cert)) = delivery_rx.recv().await { - debug!("Counting delivered certificate cert id: {:?}", cert.id); - delivered_certificates - .entry(peer_id) - .or_insert_with(HashMap::new) - .entry(receiving_subnet_id) - .or_insert_with(HashSet::new) - .insert(cert); - } - info!("All incoming certificates received"); - // Check received certificates for every peer and every subnet - for delivered_certificates_per_peer in delivered_certificates.values() { - for (subnet_id, delivered_certificates_per_subnet) in delivered_certificates_per_peer { - assert_eq!( - expected_certificates.get(subnet_id).unwrap().len(), - delivered_certificates_per_subnet.len() - ); - assert_eq!( - expected_certificates.get(subnet_id).unwrap(), - delivered_certificates_per_subnet - ); - } - } - }; - - // Set big timeout to prevent flaky fails. Instead fail/panic early in the test to indicate actual error - if tokio::time::timeout(std::time::Duration::from_secs(30), assertion) - .await - .is_err() - { - panic!("Timeout waiting for command"); - } -} +// use futures::{future::join_all, StreamExt}; +// use libp2p::PeerId; +// use rand::seq::IteratorRandom; +// use rstest::*; +// use std::collections::{HashMap, HashSet}; +// use std::time::Duration; +// use test_log::test; +// use tokio::spawn; +// use tokio::sync::mpsc; +// use topos_core::{ +// api::grpc::{ +// shared::v1::checkpoints::TargetCheckpoint, +// tce::v1::{ +// watch_certificates_request::OpenStream, +// watch_certificates_response::{CertificatePushed, Event}, +// StatusRequest, SubmitCertificateRequest, +// }, +// }, +// uci::{Certificate, SubnetId, SUBNET_ID_LENGTH}, +// }; +// use topos_test_sdk::{certificates::create_certificate_chains, tce::create_network}; +// use tracing::{debug, info, warn}; +// +// const NUMBER_OF_SUBNETS_PER_CLIENT: usize = 1; +// +// fn get_subset_of_subnets(subnets: &[SubnetId], subset_size: usize) -> Vec { +// let mut rng = rand::thread_rng(); +// Vec::from_iter( +// subnets +// .iter() +// .cloned() +// .choose_multiple(&mut rng, subset_size), +// ) +// } +// +// #[rstest] +// #[test(tokio::test)] +// #[timeout(Duration::from_secs(5))] +// async fn start_a_cluster() { +// let mut peers_context = create_network(5).await; +// +// let mut status: Vec = Vec::new(); +// +// for (_peer_id, client) in peers_context.iter_mut() { +// let response = client +// .console_grpc_client +// .status(StatusRequest {}) +// .await +// .expect("Can't get status"); +// +// status.push(response.into_inner().has_active_sample); +// } +// +// assert!(status.iter().all(|s| *s)); +// } +// +// #[rstest] +// #[tokio::test] +// #[timeout(Duration::from_secs(30))] +// // FIXME: This test is flaky, it fails sometimes because of gRPC connection error (StreamClosed) +// async fn cert_delivery() { +// let subscriber = tracing_subscriber::FmtSubscriber::builder() +// .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) +// .with_test_writer() +// .finish(); +// let _ = tracing::subscriber::set_global_default(subscriber); +// +// let peer_number = 5; +// let number_of_certificates_per_subnet = 2; +// let number_of_subnets = 3; +// +// let all_subnets: Vec = (1..=number_of_subnets) +// .map(|v| [v as u8; SUBNET_ID_LENGTH].into()) +// .collect(); +// +// // Generate certificates with respect to parameters +// let mut subnet_certificates = +// create_certificate_chains(all_subnets.as_ref(), number_of_certificates_per_subnet); +// debug!( +// "Generated certificates for distribution per subnet: {:#?}", +// &subnet_certificates +// ); +// +// // Calculate expected final set of delivered certificates (every subnet should receive certificates that has cross +// // chain transaction targeting it) +// let mut expected_certificates: HashMap> = HashMap::new(); +// for certificates in subnet_certificates.values() { +// for cert in certificates { +// for target_subnet in &cert.target_subnets { +// expected_certificates +// .entry(*target_subnet) +// .or_insert_with(HashSet::new) +// .insert(cert.clone()); +// } +// } +// } +// +// warn!("Starting the cluster..."); +// // List of peers (tce nodes) with their context +// let mut peers_context = create_network(peer_number).await; +// +// warn!("Cluster started, starting clients..."); +// // Connected tce clients are passing received certificates to this mpsc::Receiver, collect all of them +// let mut clients_delivered_certificates: Vec> = +// Vec::new(); // (Peer Id, Subnet Id, Certificate) +// let mut client_tasks: Vec> = Vec::new(); // Clients connected to TCE API Service run in async tasks +// +// let mut assign_at_least_one_client_to_every_subnet = all_subnets.clone(); +// for (peer_id, ctx) in peers_context.iter_mut() { +// let peer_id = *peer_id; +// // Make sure that every subnet is represented (connected through client) to at least 1 peer +// // After that assign subnets randomly to clients, 1 subnet per connection to TCE +// // as it is assumed that NUMBER_OF_SUBNETS_PER_CLIENT is 1 - that is also realistic case, topos node representing one subnet +// let client_subnet_id: SubnetId = if assign_at_least_one_client_to_every_subnet.is_empty() { +// get_subset_of_subnets(&all_subnets, NUMBER_OF_SUBNETS_PER_CLIENT).remove(0) +// } else { +// assign_at_least_one_client_to_every_subnet.pop().unwrap() +// }; +// +// // Number of subnets one client is representing, normally 1 +// ctx.connected_subnets = Some(vec![client_subnet_id]); +// debug!( +// "Opening client for peer id: {} with subnet_ids: {:?}", +// &peer_id, &client_subnet_id, +// ); +// +// // (Peer id, Subnet Id, Certificate) +// let (tx, rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( +// number_of_certificates_per_subnet * number_of_subnets, +// ); +// clients_delivered_certificates.push(rx); +// +// let in_stream_subnet_id = client_subnet_id; +// let in_stream = async_stream::stream! { +// yield OpenStream { +// target_checkpoint: Some(TargetCheckpoint{ +// target_subnet_ids: vec![in_stream_subnet_id.into()], +// positions: Vec::new() +// }), +// source_checkpoint: None +// }.into(); +// }; +// +// // Number of certificates expected to receive for every subnet, +// // to know when to close the TCE clients (and finish test) +// let mut incoming_certificates_number = +// expected_certificates.get(&client_subnet_id).unwrap().len(); +// // Open client connection to TCE service in separate async tasks +// let mut client = ctx.api_grpc_client.clone(); +// let expected_certificate_debug: Vec<_> = expected_certificates +// .get(&client_subnet_id) +// .unwrap() +// .iter() +// .map(|c| c.id) +// .collect(); +// +// let response = client.watch_certificates(in_stream).await.unwrap(); +// +// let client_task = spawn(async move { +// debug!( +// "Spawning client task for peer: {} waiting for {} certificates: {:?}", +// peer_id, incoming_certificates_number, expected_certificate_debug +// ); +// +// let mut resp_stream = response.into_inner(); +// while let Some(received) = resp_stream.next().await { +// let received = received.unwrap(); +// if let Some(Event::CertificatePushed(CertificatePushed { +// certificate: Some(certificate), +// .. +// })) = received.event +// { +// debug!( +// "Client peer_id: {} certificate id: {} delivered to subnet id {}, ", +// &peer_id, +// certificate.id.clone().unwrap(), +// &client_subnet_id +// ); +// tx.send((peer_id, client_subnet_id, certificate.try_into().unwrap())) +// .await +// .unwrap(); +// incoming_certificates_number -= 1; +// if incoming_certificates_number == 0 { +// // We have received all expected certificates for this subnet, end client +// break; +// } +// } +// } +// debug!("Finishing client for peer_id: {}", &peer_id); +// }); +// client_tasks.push(client_task); +// } +// +// info!( +// "Waiting for expected delivered certificates {:?}", +// expected_certificates +// ); +// // Delivery tasks collect certificates that clients of every TCE node +// // are receiving to reduce them to one channel (delivery_rx) +// let mut delivery_tasks = Vec::new(); +// // delivery_tx/delivery_rx Pass certificates from delivery tasks of every client to final collection of delivered certificates +// let (delivery_tx, mut delivery_rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( +// peer_number * number_of_certificates_per_subnet * number_of_subnets, +// ); +// for (index, mut client_delivered_certificates) in +// clients_delivered_certificates.into_iter().enumerate() +// { +// let delivery_tx = delivery_tx.clone(); +// let delivery_task = tokio::spawn(async move { +// // Read certificates that every client has received +// info!("Delivery task for receiver {}", index); +// loop { +// let x = client_delivered_certificates.recv().await; +// +// match x { +// Some((peer_id, target_subnet_id, cert)) => { +// info!( +// "Delivered certificate on peer_Id: {} cert id: {} from source subnet id: {} to target subnet id {}", +// &peer_id, cert.id, cert.source_subnet_id, target_subnet_id +// ); +// // Send certificates from every peer to one delivery_rx receiver +// delivery_tx +// .send((peer_id, target_subnet_id, cert)) +// .await +// .unwrap(); +// } +// _ => break, +// } +// } +// // We will end this loop when sending TCE client has dropped channel sender and there +// // are not certificates in channel +// info!("End delivery task for receiver {}", index); +// }); +// delivery_tasks.push(delivery_task); +// } +// drop(delivery_tx); +// +// // Broadcast multiple certificates from all subnets +// info!("Broadcasting certificates..."); +// for (peer_id, client) in peers_context.iter_mut() { +// // If there exist of connected subnets to particular TCE +// if let Some(ref connected_subnets) = client.connected_subnets { +// // Iterate all subnets connected to TCE (normally 1) +// for subnet_id in connected_subnets { +// if let Some(certificates) = subnet_certificates.get_mut(subnet_id) { +// // Iterate all certificates meant to be sent to the particular network +// for cert in certificates.iter() { +// info!( +// "Sending certificate id={} from subnet id: {} to peer id: {}", +// &cert.id, &subnet_id, &peer_id +// ); +// let _ = client +// .api_grpc_client +// .submit_certificate(SubmitCertificateRequest { +// certificate: Some(cert.clone().into()), +// }) +// .await +// .expect("Can't send certificate"); +// } +// // Remove sent certificate, every certificate is sent only once to TCE network +// certificates.clear(); +// } +// } +// } +// } +// let assertion = async move { +// info!("Waiting for all delivery tasks"); +// join_all(delivery_tasks).await; +// info!("All expected clients delivered"); +// let mut delivered_certificates: HashMap>> = +// HashMap::new(); +// // Collect all certificates per peer_id and subnet_id +// while let Some((peer_id, receiving_subnet_id, cert)) = delivery_rx.recv().await { +// debug!("Counting delivered certificate cert id: {:?}", cert.id); +// delivered_certificates +// .entry(peer_id) +// .or_insert_with(HashMap::new) +// .entry(receiving_subnet_id) +// .or_insert_with(HashSet::new) +// .insert(cert); +// } +// info!("All incoming certificates received"); +// // Check received certificates for every peer and every subnet +// for delivered_certificates_per_peer in delivered_certificates.values() { +// for (subnet_id, delivered_certificates_per_subnet) in delivered_certificates_per_peer { +// assert_eq!( +// expected_certificates.get(subnet_id).unwrap().len(), +// delivered_certificates_per_subnet.len() +// ); +// assert_eq!( +// expected_certificates.get(subnet_id).unwrap(), +// delivered_certificates_per_subnet +// ); +// } +// } +// }; +// +// // Set big timeout to prevent flaky fails. Instead fail/panic early in the test to indicate actual error +// if tokio::time::timeout(std::time::Duration::from_secs(30), assertion) +// .await +// .is_err() +// { +// panic!("Timeout waiting for command"); +// } +// } From d471220cb61ee7411aecfca6c1a1925109f47c5e Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 15:39:25 +0200 Subject: [PATCH 18/23] fix: add constants for channel sizes --- crates/topos-tce-broadcast/src/constant.rs | 18 ++++++++++++++++++ crates/topos-tce-broadcast/src/lib.rs | 7 ++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/crates/topos-tce-broadcast/src/constant.rs b/crates/topos-tce-broadcast/src/constant.rs index ab5942117..c390b719d 100644 --- a/crates/topos-tce-broadcast/src/constant.rs +++ b/crates/topos-tce-broadcast/src/constant.rs @@ -7,6 +7,24 @@ lazy_static! { .ok() .and_then(|s| s.parse().ok()) .unwrap_or(2048); + /// Size of the channel between double echo and the task manager + pub static ref TASK_MANAGER_CHANNEL_SIZE: usize = + std::env::var("TOPOS_TASK_MANAGER_CHANNEL_SIZE") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(20_480); + /// Size of the channel to send protocol events from the double echo + pub static ref PROTOCOL_CHANNEL_SIZE: usize = + std::env::var("TOPOS_PROTOCOL_CHANNEL_SIZE") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(2048); + /// Size of the channel to send updated subscriptions views to the double echo + pub static ref SUBSCRIPTION_VIEW_CHANNEL_SIZE: usize = + std::env::var("TOPOS_SUBSCRIPTION_VIEW_CHANNEL_SIZE") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(2048); /// Capacity alert threshold for the double echo command channel pub static ref COMMAND_CHANNEL_CAPACITY: usize = COMMAND_CHANNEL_SIZE .checked_mul(10) diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index 0787e5d16..ad488a4c5 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -111,13 +111,14 @@ impl ReliableBroadcastClient { storage: StorageClient, ) -> (Self, impl Stream) { let (subscriptions_view_sender, subscriptions_view_receiver) = - mpsc::channel::(2048); - let (event_sender, event_receiver) = mpsc::channel(2048); + mpsc::channel::(*constant::SUBSCRIPTION_VIEW_CHANNEL_SIZE); + let (event_sender, event_receiver) = mpsc::channel(*constant::PROTOCOL_CHANNEL_SIZE); let (command_sender, command_receiver) = mpsc::channel(*constant::COMMAND_CHANNEL_SIZE); let (double_echo_shutdown_channel, double_echo_shutdown_receiver) = mpsc::channel::>(1); - let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(24_000); + let (task_manager_message_sender, task_manager_message_receiver) = + mpsc::channel(*constant::TASK_MANAGER_CHANNEL_SIZE); let pending_certificate_count = storage .get_pending_certificates() From bfffea160b89ad232a062dcdb27688e964331fbc Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Tue, 25 Jul 2023 15:41:42 +0200 Subject: [PATCH 19/23] fix: add doc comments for double echo --- crates/topos-tce-broadcast/src/double_echo/mod.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index 742ef39d8..da2df2106 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -15,18 +15,16 @@ pub struct DoubleEcho { command_receiver: mpsc::Receiver, /// Channel to send events event_sender: mpsc::Sender, - /// Channel to receive shutdown signal pub(crate) shutdown: mpsc::Receiver>, - - /// delivered certificate ids to avoid processing twice the same certificate + /// Delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, - + /// The threshold parameters for the double echo pub(crate) params: ReliableBroadcastParams, - + /// The connection to the TaskManager to forward DoubleEchoCommand messages task_manager_message_sender: mpsc::Sender, - - pub(crate) subscriptions: SubscriptionsView, // My subscriptions for echo, ready and delivery feedback + /// The overview of the network, which holds echo and ready subscriptions and the network size + pub(crate) subscriptions: SubscriptionsView, } impl DoubleEcho { From e2c0b0e36953d563a58f5f9a128a9e045d617998 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Wed, 26 Jul 2023 08:42:40 +0200 Subject: [PATCH 20/23] chore: adapt task-manager benches to faeture flag --- .../benches/double_echo.rs | 24 +--- .../benches/task_manager.rs | 121 ++++++++++++++++++ .../benches/task_manager_channels.rs | 56 -------- .../benches/task_manager_futures.rs | 57 --------- .../src/double_echo/broadcast_state.rs | 4 +- .../src/double_echo/mod.rs | 14 +- crates/topos-tce-broadcast/src/tests/mod.rs | 3 +- 7 files changed, 137 insertions(+), 142 deletions(-) create mode 100644 crates/topos-tce-broadcast/benches/task_manager.rs delete mode 100644 crates/topos-tce-broadcast/benches/task_manager_channels.rs delete mode 100644 crates/topos-tce-broadcast/benches/task_manager_futures.rs diff --git a/crates/topos-tce-broadcast/benches/double_echo.rs b/crates/topos-tce-broadcast/benches/double_echo.rs index cf56479f5..0be6ccc97 100644 --- a/crates/topos-tce-broadcast/benches/double_echo.rs +++ b/crates/topos-tce-broadcast/benches/double_echo.rs @@ -1,33 +1,17 @@ use criterion::async_executor::FuturesExecutor; use criterion::{criterion_group, criterion_main, Criterion}; - -#[cfg(feature = "task-manager-channels")] -mod task_manager_channels; -#[cfg(not(feature = "task-manager-channels"))] -mod task_manager_futures; +mod task_manager; pub fn criterion_benchmark(c: &mut Criterion) { - let echo_messages = 10; + let certificates = 10_000; let runtime = tokio::runtime::Builder::new_current_thread() .build() .unwrap(); - #[cfg(feature = "task-manager-channels")] - c.bench_function("double_echo with channels", |b| { - b.to_async(FuturesExecutor).iter(|| async { - runtime.block_on(async { - task_manager_channels::processing_double_echo(echo_messages).await - }) - }) - }); - - #[cfg(not(feature = "task-manager-channels"))] - c.bench_function("double_echo with futures", |b| { + c.bench_function("double_echo", |b| { b.to_async(FuturesExecutor).iter(|| async { - runtime.block_on(async { - task_manager_futures::processing_double_echo(echo_messages).await - }) + runtime.block_on(async { task_manager::processing_double_echo(certificates).await }) }) }); } diff --git a/crates/topos-tce-broadcast/benches/task_manager.rs b/crates/topos-tce-broadcast/benches/task_manager.rs new file mode 100644 index 000000000..64f7f0205 --- /dev/null +++ b/crates/topos-tce-broadcast/benches/task_manager.rs @@ -0,0 +1,121 @@ +use std::collections::HashSet; +use tce_transport::{ProtocolEvents, ReliableBroadcastParams}; +use tokio::sync::mpsc::Receiver; +use tokio::sync::{mpsc, oneshot}; +use topos_tce_broadcast::double_echo::DoubleEcho; +use topos_tce_broadcast::sampler::SubscriptionsView; +use topos_test_sdk::certificates::create_certificate_chain; +use topos_test_sdk::constants::{SOURCE_SUBNET_ID_1, TARGET_SUBNET_ID_1}; + +const CHANNEL_SIZE: usize = 256_000; + +struct TceParams { + nb_peers: usize, + broadcast_params: ReliableBroadcastParams, +} + +struct Context { + event_receiver: Receiver, +} + +pub async fn processing_double_echo(n: u64) { + let (subscriptions_view_sender, subscriptions_view_receiver) = mpsc::channel(CHANNEL_SIZE); + + let (_cmd_sender, cmd_receiver) = mpsc::channel(CHANNEL_SIZE); + let (event_sender, event_receiver) = mpsc::channel(CHANNEL_SIZE); + let (_double_echo_shutdown_sender, double_echo_shutdown_receiver) = + mpsc::channel::>(1); + let (task_manager_message_sender, task_manager_message_receiver) = mpsc::channel(CHANNEL_SIZE); + + let params = TceParams { + nb_peers: 10, + broadcast_params: ReliableBroadcastParams { + echo_threshold: 8, + ready_threshold: 5, + delivery_threshold: 8, + }, + }; + + let mut ctx = Context { event_receiver }; + + let mut double_echo = DoubleEcho::new( + params.broadcast_params, + task_manager_message_sender.clone(), + cmd_receiver, + event_sender, + double_echo_shutdown_receiver, + 0, + ); + + // List of peers + let mut peers = HashSet::new(); + for i in 0..params.nb_peers { + let peer = topos_p2p::utils::local_key_pair(Some(i as u8)) + .public() + .to_peer_id(); + peers.insert(peer); + } + + // Subscriptions + double_echo.subscriptions.echo = peers.clone(); + double_echo.subscriptions.ready = peers.clone(); + double_echo.subscriptions.network_size = params.nb_peers; + + let msg = SubscriptionsView { + echo: peers.clone(), + ready: peers.clone(), + network_size: params.nb_peers, + }; + + subscriptions_view_sender.send(msg).await.unwrap(); + + double_echo.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); + + let certificates = + create_certificate_chain(SOURCE_SUBNET_ID_1, &[TARGET_SUBNET_ID_1], n as usize); + + let double_echo_selected_echo = double_echo + .subscriptions + .echo + .iter() + .take(double_echo.params.echo_threshold) + .cloned() + .collect::>(); + + let double_echo_selected_ready = double_echo + .subscriptions + .ready + .iter() + .take(double_echo.params.delivery_threshold) + .cloned() + .collect::>(); + + for cert in &certificates { + double_echo.broadcast(cert.clone(), true).await; + } + + for cert in &certificates { + for p in &double_echo_selected_echo { + double_echo.handle_echo(p.clone(), cert.id).await; + } + + for p in &double_echo_selected_ready { + double_echo.handle_ready(p.clone(), cert.id).await; + } + } + + let mut count = 0; + + while let Some(event) = ctx.event_receiver.recv().await { + match event { + ProtocolEvents::CertificateDelivered { .. } => { + count += 1; + + if count == n { + break; + } + } + _ => {} + } + } +} diff --git a/crates/topos-tce-broadcast/benches/task_manager_channels.rs b/crates/topos-tce-broadcast/benches/task_manager_channels.rs deleted file mode 100644 index 117ec478c..000000000 --- a/crates/topos-tce-broadcast/benches/task_manager_channels.rs +++ /dev/null @@ -1,56 +0,0 @@ -use rand::Rng; -use tokio::spawn; -use tokio::sync::mpsc; - -use tce_transport::ReliableBroadcastParams; -use topos_core::uci::CertificateId; -use topos_p2p::PeerId; -use topos_tce_broadcast::task_manager_channels::TaskManager; -use topos_tce_broadcast::DoubleEchoCommand; - -pub async fn processing_double_echo(n: u64) { - let (message_sender, message_receiver) = mpsc::channel(1024); - let (event_sender, mut event_receiver) = mpsc::channel(1024); - - let threshold = ReliableBroadcastParams { - echo_threshold: n as usize, - ready_threshold: n as usize, - delivery_threshold: n as usize, - }; - - let (task_manager, shutdown_receiver) = TaskManager::new(message_receiver, threshold); - - spawn(task_manager.run(event_sender, shutdown_receiver)); - - let mut certificates = vec![]; - - let mut rng = rand::thread_rng(); - - for _ in 0..10_000 { - let mut id = [0u8; 32]; - rng.fill(&mut id); - let cert_id = CertificateId::from_array(id); - certificates.push(cert_id); - } - - for certificate_id in certificates { - for _ in 0..n { - let echo = DoubleEchoCommand::Echo { - from_peer: PeerId::random(), - certificate_id, - }; - - message_sender.send(echo).await.unwrap(); - } - } - - let mut count = 0; - - while (event_receiver.recv().await).is_some() { - count += 1; - - if count == n { - break; - } - } -} diff --git a/crates/topos-tce-broadcast/benches/task_manager_futures.rs b/crates/topos-tce-broadcast/benches/task_manager_futures.rs deleted file mode 100644 index 5f280bdaa..000000000 --- a/crates/topos-tce-broadcast/benches/task_manager_futures.rs +++ /dev/null @@ -1,57 +0,0 @@ -use rand::Rng; -use tce_transport::ReliableBroadcastParams; -use tokio::spawn; -use tokio::sync::mpsc; -use topos_core::uci::CertificateId; -use topos_p2p::PeerId; -use topos_tce_broadcast::DoubleEchoCommand; - -use topos_tce_broadcast::task_manager_futures::TaskManager; - -pub async fn processing_double_echo(n: u64) { - let (message_sender, message_receiver) = mpsc::channel(1024); - let (task_completion_sender, mut task_completion_receiver) = mpsc::channel(40_000); - - let thresholds = ReliableBroadcastParams { - echo_threshold: n as usize, - ready_threshold: n as usize, - delivery_threshold: n as usize, - }; - - let (task_manager, shutdown_receiver) = - TaskManager::new(message_receiver, task_completion_sender, thresholds); - - spawn(task_manager.run(shutdown_receiver)); - - let mut certificates = vec![]; - - let mut rng = rand::thread_rng(); - - for _ in 0..10_000 { - let mut id = [0u8; 32]; - rng.fill(&mut id); - let cert_id = CertificateId::from_array(id); - certificates.push(cert_id); - } - - for certificate_id in certificates { - for _ in 0..n { - let echo = DoubleEchoCommand::Echo { - from_peer: PeerId::random(), - certificate_id, - }; - - message_sender.send(echo).await.unwrap(); - } - } - - let mut count = 0; - - while let Some((_, _)) = task_completion_receiver.recv().await { - count += 1; - - if count == n { - break; - } - } -} diff --git a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs index d2335dd54..7b9a4fdcb 100644 --- a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs +++ b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs @@ -98,7 +98,9 @@ impl BroadcastState { let event = ProtocolEvents::Ready { certificate_id: self.certificate.id, }; - self.event_sender.try_send(event).unwrap(); + if let Err(e) = self.event_sender.try_send(event) { + println!("Error sending Ready message: {}", e); + } self.status = self.status.ready_sent(); diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index da2df2106..b69facee8 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -20,11 +20,11 @@ pub struct DoubleEcho { /// Delivered certificate ids to avoid processing twice the same certificate delivered_certificates: HashSet, /// The threshold parameters for the double echo - pub(crate) params: ReliableBroadcastParams, + pub params: ReliableBroadcastParams, /// The connection to the TaskManager to forward DoubleEchoCommand messages task_manager_message_sender: mpsc::Sender, /// The overview of the network, which holds echo and ready subscriptions and the network size - pub(crate) subscriptions: SubscriptionsView, + pub subscriptions: SubscriptionsView, } impl DoubleEcho { @@ -51,7 +51,7 @@ impl DoubleEcho { } #[cfg(not(feature = "task-manager-channels"))] - pub(crate) fn spawn_task_manager( + pub fn spawn_task_manager( &mut self, subscriptions_view_receiver: mpsc::Receiver, task_manager_message_receiver: mpsc::Receiver, @@ -72,7 +72,7 @@ impl DoubleEcho { } #[cfg(feature = "task-manager-channels")] - pub(crate) fn spawn_task_manager( + pub fn spawn_task_manager( &mut self, subscriptions_view_receiver: mpsc::Receiver, task_manager_message_receiver: mpsc::Receiver, @@ -162,7 +162,7 @@ impl DoubleEcho { /// Called to process potentially new certificate: /// - either submitted from API ( [tce_transport::TceCommands::Broadcast] command) /// - or received through the gossip (first step of protocol exchange) - pub(crate) async fn broadcast(&mut self, cert: Certificate, origin: bool) { + pub async fn broadcast(&mut self, cert: Certificate, origin: bool) { info!("🙌 Starting broadcasting the Certificate {}", &cert.id); if self.cert_pre_broadcast_check(&cert).is_err() { error!("Failure on the pre-check for the Certificate {}", &cert.id); @@ -238,7 +238,7 @@ impl DoubleEcho { } impl DoubleEcho { - pub(crate) async fn handle_echo(&mut self, from_peer: PeerId, certificate_id: CertificateId) { + pub async fn handle_echo(&mut self, from_peer: PeerId, certificate_id: CertificateId) { if self.delivered_certificates.get(&certificate_id).is_none() { let _ = self .task_manager_message_sender @@ -250,7 +250,7 @@ impl DoubleEcho { } } - pub(crate) async fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { + pub async fn handle_ready(&mut self, from_peer: PeerId, certificate_id: CertificateId) { if self.delivered_certificates.get(&certificate_id).is_none() { let _ = self .task_manager_message_sender diff --git a/crates/topos-tce-broadcast/src/tests/mod.rs b/crates/topos-tce-broadcast/src/tests/mod.rs index 186988d58..80d8a4dbf 100644 --- a/crates/topos-tce-broadcast/src/tests/mod.rs +++ b/crates/topos-tce-broadcast/src/tests/mod.rs @@ -82,7 +82,8 @@ async fn create_context(params: TceParams) -> (DoubleEcho, Context) { ready: peers.clone(), network_size: params.nb_peers, }; - let _ = subscriptions_view_sender.send(msg).await.unwrap(); + + subscriptions_view_sender.send(msg).await.unwrap(); double_echo.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); From 2b31655d4f81a3714fc8709a2ecf0b6d093b1f2d Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Wed, 26 Jul 2023 09:24:38 +0200 Subject: [PATCH 21/23] fix: fix cargo xclippy --- .../topos-tce-broadcast/benches/task_manager.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/topos-tce-broadcast/benches/task_manager.rs b/crates/topos-tce-broadcast/benches/task_manager.rs index 64f7f0205..1945b0601 100644 --- a/crates/topos-tce-broadcast/benches/task_manager.rs +++ b/crates/topos-tce-broadcast/benches/task_manager.rs @@ -96,26 +96,23 @@ pub async fn processing_double_echo(n: u64) { for cert in &certificates { for p in &double_echo_selected_echo { - double_echo.handle_echo(p.clone(), cert.id).await; + double_echo.handle_echo(*p, cert.id).await; } for p in &double_echo_selected_ready { - double_echo.handle_ready(p.clone(), cert.id).await; + double_echo.handle_ready(*p, cert.id).await; } } let mut count = 0; while let Some(event) = ctx.event_receiver.recv().await { - match event { - ProtocolEvents::CertificateDelivered { .. } => { - count += 1; + if let ProtocolEvents::CertificateDelivered { .. } = event { + count += 1; - if count == n { - break; - } + if count == n { + break; } - _ => {} } } } From cc829c3f094a8d9d1478b942d1c75f669bb2bca1 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Wed, 26 Jul 2023 09:41:52 +0200 Subject: [PATCH 22/23] fix: cert-delivery test, forward subscription view --- .../src/double_echo/mod.rs | 16 +- crates/topos/tests/cert_delivery.rs | 610 +++++++++--------- 2 files changed, 318 insertions(+), 308 deletions(-) diff --git a/crates/topos-tce-broadcast/src/double_echo/mod.rs b/crates/topos-tce-broadcast/src/double_echo/mod.rs index b69facee8..61df3c6ec 100644 --- a/crates/topos-tce-broadcast/src/double_echo/mod.rs +++ b/crates/topos-tce-broadcast/src/double_echo/mod.rs @@ -101,16 +101,26 @@ impl DoubleEcho { /// the message pub(crate) async fn run( mut self, - subscriptions_view_receiver: mpsc::Receiver, + mut subscriptions_view_receiver: mpsc::Receiver, task_manager_message_receiver: mpsc::Receiver, ) { - let mut task_completion = - self.spawn_task_manager(subscriptions_view_receiver, task_manager_message_receiver); + let (forwarding_subscriptions_sender, forwarding_subscriptions_receiver) = + mpsc::channel(2048); + let mut task_completion = self.spawn_task_manager( + forwarding_subscriptions_receiver, + task_manager_message_receiver, + ); info!("DoubleEcho started"); let shutdowned: Option> = loop { tokio::select! { + biased; + + Some(new_subscriptions_view) = subscriptions_view_receiver.recv() => { + forwarding_subscriptions_sender.send(new_subscriptions_view.clone()).await.unwrap(); + self.subscriptions = new_subscriptions_view; + } shutdown = self.shutdown.recv() => { warn!("Double echo shutdown signal received {:?}", shutdown); diff --git a/crates/topos/tests/cert_delivery.rs b/crates/topos/tests/cert_delivery.rs index 863274d74..a2b1ce504 100644 --- a/crates/topos/tests/cert_delivery.rs +++ b/crates/topos/tests/cert_delivery.rs @@ -1,305 +1,305 @@ -// use futures::{future::join_all, StreamExt}; -// use libp2p::PeerId; -// use rand::seq::IteratorRandom; -// use rstest::*; -// use std::collections::{HashMap, HashSet}; -// use std::time::Duration; -// use test_log::test; -// use tokio::spawn; -// use tokio::sync::mpsc; -// use topos_core::{ -// api::grpc::{ -// shared::v1::checkpoints::TargetCheckpoint, -// tce::v1::{ -// watch_certificates_request::OpenStream, -// watch_certificates_response::{CertificatePushed, Event}, -// StatusRequest, SubmitCertificateRequest, -// }, -// }, -// uci::{Certificate, SubnetId, SUBNET_ID_LENGTH}, -// }; -// use topos_test_sdk::{certificates::create_certificate_chains, tce::create_network}; -// use tracing::{debug, info, warn}; -// -// const NUMBER_OF_SUBNETS_PER_CLIENT: usize = 1; -// -// fn get_subset_of_subnets(subnets: &[SubnetId], subset_size: usize) -> Vec { -// let mut rng = rand::thread_rng(); -// Vec::from_iter( -// subnets -// .iter() -// .cloned() -// .choose_multiple(&mut rng, subset_size), -// ) -// } -// -// #[rstest] -// #[test(tokio::test)] -// #[timeout(Duration::from_secs(5))] -// async fn start_a_cluster() { -// let mut peers_context = create_network(5).await; -// -// let mut status: Vec = Vec::new(); -// -// for (_peer_id, client) in peers_context.iter_mut() { -// let response = client -// .console_grpc_client -// .status(StatusRequest {}) -// .await -// .expect("Can't get status"); -// -// status.push(response.into_inner().has_active_sample); -// } -// -// assert!(status.iter().all(|s| *s)); -// } -// -// #[rstest] -// #[tokio::test] -// #[timeout(Duration::from_secs(30))] -// // FIXME: This test is flaky, it fails sometimes because of gRPC connection error (StreamClosed) -// async fn cert_delivery() { -// let subscriber = tracing_subscriber::FmtSubscriber::builder() -// .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) -// .with_test_writer() -// .finish(); -// let _ = tracing::subscriber::set_global_default(subscriber); -// -// let peer_number = 5; -// let number_of_certificates_per_subnet = 2; -// let number_of_subnets = 3; -// -// let all_subnets: Vec = (1..=number_of_subnets) -// .map(|v| [v as u8; SUBNET_ID_LENGTH].into()) -// .collect(); -// -// // Generate certificates with respect to parameters -// let mut subnet_certificates = -// create_certificate_chains(all_subnets.as_ref(), number_of_certificates_per_subnet); -// debug!( -// "Generated certificates for distribution per subnet: {:#?}", -// &subnet_certificates -// ); -// -// // Calculate expected final set of delivered certificates (every subnet should receive certificates that has cross -// // chain transaction targeting it) -// let mut expected_certificates: HashMap> = HashMap::new(); -// for certificates in subnet_certificates.values() { -// for cert in certificates { -// for target_subnet in &cert.target_subnets { -// expected_certificates -// .entry(*target_subnet) -// .or_insert_with(HashSet::new) -// .insert(cert.clone()); -// } -// } -// } -// -// warn!("Starting the cluster..."); -// // List of peers (tce nodes) with their context -// let mut peers_context = create_network(peer_number).await; -// -// warn!("Cluster started, starting clients..."); -// // Connected tce clients are passing received certificates to this mpsc::Receiver, collect all of them -// let mut clients_delivered_certificates: Vec> = -// Vec::new(); // (Peer Id, Subnet Id, Certificate) -// let mut client_tasks: Vec> = Vec::new(); // Clients connected to TCE API Service run in async tasks -// -// let mut assign_at_least_one_client_to_every_subnet = all_subnets.clone(); -// for (peer_id, ctx) in peers_context.iter_mut() { -// let peer_id = *peer_id; -// // Make sure that every subnet is represented (connected through client) to at least 1 peer -// // After that assign subnets randomly to clients, 1 subnet per connection to TCE -// // as it is assumed that NUMBER_OF_SUBNETS_PER_CLIENT is 1 - that is also realistic case, topos node representing one subnet -// let client_subnet_id: SubnetId = if assign_at_least_one_client_to_every_subnet.is_empty() { -// get_subset_of_subnets(&all_subnets, NUMBER_OF_SUBNETS_PER_CLIENT).remove(0) -// } else { -// assign_at_least_one_client_to_every_subnet.pop().unwrap() -// }; -// -// // Number of subnets one client is representing, normally 1 -// ctx.connected_subnets = Some(vec![client_subnet_id]); -// debug!( -// "Opening client for peer id: {} with subnet_ids: {:?}", -// &peer_id, &client_subnet_id, -// ); -// -// // (Peer id, Subnet Id, Certificate) -// let (tx, rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( -// number_of_certificates_per_subnet * number_of_subnets, -// ); -// clients_delivered_certificates.push(rx); -// -// let in_stream_subnet_id = client_subnet_id; -// let in_stream = async_stream::stream! { -// yield OpenStream { -// target_checkpoint: Some(TargetCheckpoint{ -// target_subnet_ids: vec![in_stream_subnet_id.into()], -// positions: Vec::new() -// }), -// source_checkpoint: None -// }.into(); -// }; -// -// // Number of certificates expected to receive for every subnet, -// // to know when to close the TCE clients (and finish test) -// let mut incoming_certificates_number = -// expected_certificates.get(&client_subnet_id).unwrap().len(); -// // Open client connection to TCE service in separate async tasks -// let mut client = ctx.api_grpc_client.clone(); -// let expected_certificate_debug: Vec<_> = expected_certificates -// .get(&client_subnet_id) -// .unwrap() -// .iter() -// .map(|c| c.id) -// .collect(); -// -// let response = client.watch_certificates(in_stream).await.unwrap(); -// -// let client_task = spawn(async move { -// debug!( -// "Spawning client task for peer: {} waiting for {} certificates: {:?}", -// peer_id, incoming_certificates_number, expected_certificate_debug -// ); -// -// let mut resp_stream = response.into_inner(); -// while let Some(received) = resp_stream.next().await { -// let received = received.unwrap(); -// if let Some(Event::CertificatePushed(CertificatePushed { -// certificate: Some(certificate), -// .. -// })) = received.event -// { -// debug!( -// "Client peer_id: {} certificate id: {} delivered to subnet id {}, ", -// &peer_id, -// certificate.id.clone().unwrap(), -// &client_subnet_id -// ); -// tx.send((peer_id, client_subnet_id, certificate.try_into().unwrap())) -// .await -// .unwrap(); -// incoming_certificates_number -= 1; -// if incoming_certificates_number == 0 { -// // We have received all expected certificates for this subnet, end client -// break; -// } -// } -// } -// debug!("Finishing client for peer_id: {}", &peer_id); -// }); -// client_tasks.push(client_task); -// } -// -// info!( -// "Waiting for expected delivered certificates {:?}", -// expected_certificates -// ); -// // Delivery tasks collect certificates that clients of every TCE node -// // are receiving to reduce them to one channel (delivery_rx) -// let mut delivery_tasks = Vec::new(); -// // delivery_tx/delivery_rx Pass certificates from delivery tasks of every client to final collection of delivered certificates -// let (delivery_tx, mut delivery_rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( -// peer_number * number_of_certificates_per_subnet * number_of_subnets, -// ); -// for (index, mut client_delivered_certificates) in -// clients_delivered_certificates.into_iter().enumerate() -// { -// let delivery_tx = delivery_tx.clone(); -// let delivery_task = tokio::spawn(async move { -// // Read certificates that every client has received -// info!("Delivery task for receiver {}", index); -// loop { -// let x = client_delivered_certificates.recv().await; -// -// match x { -// Some((peer_id, target_subnet_id, cert)) => { -// info!( -// "Delivered certificate on peer_Id: {} cert id: {} from source subnet id: {} to target subnet id {}", -// &peer_id, cert.id, cert.source_subnet_id, target_subnet_id -// ); -// // Send certificates from every peer to one delivery_rx receiver -// delivery_tx -// .send((peer_id, target_subnet_id, cert)) -// .await -// .unwrap(); -// } -// _ => break, -// } -// } -// // We will end this loop when sending TCE client has dropped channel sender and there -// // are not certificates in channel -// info!("End delivery task for receiver {}", index); -// }); -// delivery_tasks.push(delivery_task); -// } -// drop(delivery_tx); -// -// // Broadcast multiple certificates from all subnets -// info!("Broadcasting certificates..."); -// for (peer_id, client) in peers_context.iter_mut() { -// // If there exist of connected subnets to particular TCE -// if let Some(ref connected_subnets) = client.connected_subnets { -// // Iterate all subnets connected to TCE (normally 1) -// for subnet_id in connected_subnets { -// if let Some(certificates) = subnet_certificates.get_mut(subnet_id) { -// // Iterate all certificates meant to be sent to the particular network -// for cert in certificates.iter() { -// info!( -// "Sending certificate id={} from subnet id: {} to peer id: {}", -// &cert.id, &subnet_id, &peer_id -// ); -// let _ = client -// .api_grpc_client -// .submit_certificate(SubmitCertificateRequest { -// certificate: Some(cert.clone().into()), -// }) -// .await -// .expect("Can't send certificate"); -// } -// // Remove sent certificate, every certificate is sent only once to TCE network -// certificates.clear(); -// } -// } -// } -// } -// let assertion = async move { -// info!("Waiting for all delivery tasks"); -// join_all(delivery_tasks).await; -// info!("All expected clients delivered"); -// let mut delivered_certificates: HashMap>> = -// HashMap::new(); -// // Collect all certificates per peer_id and subnet_id -// while let Some((peer_id, receiving_subnet_id, cert)) = delivery_rx.recv().await { -// debug!("Counting delivered certificate cert id: {:?}", cert.id); -// delivered_certificates -// .entry(peer_id) -// .or_insert_with(HashMap::new) -// .entry(receiving_subnet_id) -// .or_insert_with(HashSet::new) -// .insert(cert); -// } -// info!("All incoming certificates received"); -// // Check received certificates for every peer and every subnet -// for delivered_certificates_per_peer in delivered_certificates.values() { -// for (subnet_id, delivered_certificates_per_subnet) in delivered_certificates_per_peer { -// assert_eq!( -// expected_certificates.get(subnet_id).unwrap().len(), -// delivered_certificates_per_subnet.len() -// ); -// assert_eq!( -// expected_certificates.get(subnet_id).unwrap(), -// delivered_certificates_per_subnet -// ); -// } -// } -// }; -// -// // Set big timeout to prevent flaky fails. Instead fail/panic early in the test to indicate actual error -// if tokio::time::timeout(std::time::Duration::from_secs(30), assertion) -// .await -// .is_err() -// { -// panic!("Timeout waiting for command"); -// } -// } +use futures::{future::join_all, StreamExt}; +use libp2p::PeerId; +use rand::seq::IteratorRandom; +use rstest::*; +use std::collections::{HashMap, HashSet}; +use std::time::Duration; +use test_log::test; +use tokio::spawn; +use tokio::sync::mpsc; +use topos_core::{ + api::grpc::{ + shared::v1::checkpoints::TargetCheckpoint, + tce::v1::{ + watch_certificates_request::OpenStream, + watch_certificates_response::{CertificatePushed, Event}, + StatusRequest, SubmitCertificateRequest, + }, + }, + uci::{Certificate, SubnetId, SUBNET_ID_LENGTH}, +}; +use topos_test_sdk::{certificates::create_certificate_chains, tce::create_network}; +use tracing::{debug, info, warn}; + +const NUMBER_OF_SUBNETS_PER_CLIENT: usize = 1; + +fn get_subset_of_subnets(subnets: &[SubnetId], subset_size: usize) -> Vec { + let mut rng = rand::thread_rng(); + Vec::from_iter( + subnets + .iter() + .cloned() + .choose_multiple(&mut rng, subset_size), + ) +} + +#[rstest] +#[test(tokio::test)] +#[timeout(Duration::from_secs(5))] +async fn start_a_cluster() { + let mut peers_context = create_network(5).await; + + let mut status: Vec = Vec::new(); + + for (_peer_id, client) in peers_context.iter_mut() { + let response = client + .console_grpc_client + .status(StatusRequest {}) + .await + .expect("Can't get status"); + + status.push(response.into_inner().has_active_sample); + } + + assert!(status.iter().all(|s| *s)); +} + +#[rstest] +#[tokio::test] +#[timeout(Duration::from_secs(30))] +// FIXME: This test is flaky, it fails sometimes because of gRPC connection error (StreamClosed) +async fn cert_delivery() { + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_test_writer() + .finish(); + let _ = tracing::subscriber::set_global_default(subscriber); + + let peer_number = 5; + let number_of_certificates_per_subnet = 2; + let number_of_subnets = 3; + + let all_subnets: Vec = (1..=number_of_subnets) + .map(|v| [v as u8; SUBNET_ID_LENGTH].into()) + .collect(); + + // Generate certificates with respect to parameters + let mut subnet_certificates = + create_certificate_chains(all_subnets.as_ref(), number_of_certificates_per_subnet); + debug!( + "Generated certificates for distribution per subnet: {:#?}", + &subnet_certificates + ); + + // Calculate expected final set of delivered certificates (every subnet should receive certificates that has cross + // chain transaction targeting it) + let mut expected_certificates: HashMap> = HashMap::new(); + for certificates in subnet_certificates.values() { + for cert in certificates { + for target_subnet in &cert.target_subnets { + expected_certificates + .entry(*target_subnet) + .or_insert_with(HashSet::new) + .insert(cert.clone()); + } + } + } + + warn!("Starting the cluster..."); + // List of peers (tce nodes) with their context + let mut peers_context = create_network(peer_number).await; + + warn!("Cluster started, starting clients..."); + // Connected tce clients are passing received certificates to this mpsc::Receiver, collect all of them + let mut clients_delivered_certificates: Vec> = + Vec::new(); // (Peer Id, Subnet Id, Certificate) + let mut client_tasks: Vec> = Vec::new(); // Clients connected to TCE API Service run in async tasks + + let mut assign_at_least_one_client_to_every_subnet = all_subnets.clone(); + for (peer_id, ctx) in peers_context.iter_mut() { + let peer_id = *peer_id; + // Make sure that every subnet is represented (connected through client) to at least 1 peer + // After that assign subnets randomly to clients, 1 subnet per connection to TCE + // as it is assumed that NUMBER_OF_SUBNETS_PER_CLIENT is 1 - that is also realistic case, topos node representing one subnet + let client_subnet_id: SubnetId = if assign_at_least_one_client_to_every_subnet.is_empty() { + get_subset_of_subnets(&all_subnets, NUMBER_OF_SUBNETS_PER_CLIENT).remove(0) + } else { + assign_at_least_one_client_to_every_subnet.pop().unwrap() + }; + + // Number of subnets one client is representing, normally 1 + ctx.connected_subnets = Some(vec![client_subnet_id]); + debug!( + "Opening client for peer id: {} with subnet_ids: {:?}", + &peer_id, &client_subnet_id, + ); + + // (Peer id, Subnet Id, Certificate) + let (tx, rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( + number_of_certificates_per_subnet * number_of_subnets, + ); + clients_delivered_certificates.push(rx); + + let in_stream_subnet_id = client_subnet_id; + let in_stream = async_stream::stream! { + yield OpenStream { + target_checkpoint: Some(TargetCheckpoint{ + target_subnet_ids: vec![in_stream_subnet_id.into()], + positions: Vec::new() + }), + source_checkpoint: None + }.into(); + }; + + // Number of certificates expected to receive for every subnet, + // to know when to close the TCE clients (and finish test) + let mut incoming_certificates_number = + expected_certificates.get(&client_subnet_id).unwrap().len(); + // Open client connection to TCE service in separate async tasks + let mut client = ctx.api_grpc_client.clone(); + let expected_certificate_debug: Vec<_> = expected_certificates + .get(&client_subnet_id) + .unwrap() + .iter() + .map(|c| c.id) + .collect(); + + let response = client.watch_certificates(in_stream).await.unwrap(); + + let client_task = spawn(async move { + debug!( + "Spawning client task for peer: {} waiting for {} certificates: {:?}", + peer_id, incoming_certificates_number, expected_certificate_debug + ); + + let mut resp_stream = response.into_inner(); + while let Some(received) = resp_stream.next().await { + let received = received.unwrap(); + if let Some(Event::CertificatePushed(CertificatePushed { + certificate: Some(certificate), + .. + })) = received.event + { + debug!( + "Client peer_id: {} certificate id: {} delivered to subnet id {}, ", + &peer_id, + certificate.id.clone().unwrap(), + &client_subnet_id + ); + tx.send((peer_id, client_subnet_id, certificate.try_into().unwrap())) + .await + .unwrap(); + incoming_certificates_number -= 1; + if incoming_certificates_number == 0 { + // We have received all expected certificates for this subnet, end client + break; + } + } + } + debug!("Finishing client for peer_id: {}", &peer_id); + }); + client_tasks.push(client_task); + } + + info!( + "Waiting for expected delivered certificates {:?}", + expected_certificates + ); + // Delivery tasks collect certificates that clients of every TCE node + // are receiving to reduce them to one channel (delivery_rx) + let mut delivery_tasks = Vec::new(); + // delivery_tx/delivery_rx Pass certificates from delivery tasks of every client to final collection of delivered certificates + let (delivery_tx, mut delivery_rx) = mpsc::channel::<(PeerId, SubnetId, Certificate)>( + peer_number * number_of_certificates_per_subnet * number_of_subnets, + ); + for (index, mut client_delivered_certificates) in + clients_delivered_certificates.into_iter().enumerate() + { + let delivery_tx = delivery_tx.clone(); + let delivery_task = tokio::spawn(async move { + // Read certificates that every client has received + info!("Delivery task for receiver {}", index); + loop { + let x = client_delivered_certificates.recv().await; + + match x { + Some((peer_id, target_subnet_id, cert)) => { + info!( + "Delivered certificate on peer_Id: {} cert id: {} from source subnet id: {} to target subnet id {}", + &peer_id, cert.id, cert.source_subnet_id, target_subnet_id + ); + // Send certificates from every peer to one delivery_rx receiver + delivery_tx + .send((peer_id, target_subnet_id, cert)) + .await + .unwrap(); + } + _ => break, + } + } + // We will end this loop when sending TCE client has dropped channel sender and there + // are not certificates in channel + info!("End delivery task for receiver {}", index); + }); + delivery_tasks.push(delivery_task); + } + drop(delivery_tx); + + // Broadcast multiple certificates from all subnets + info!("Broadcasting certificates..."); + for (peer_id, client) in peers_context.iter_mut() { + // If there exist of connected subnets to particular TCE + if let Some(ref connected_subnets) = client.connected_subnets { + // Iterate all subnets connected to TCE (normally 1) + for subnet_id in connected_subnets { + if let Some(certificates) = subnet_certificates.get_mut(subnet_id) { + // Iterate all certificates meant to be sent to the particular network + for cert in certificates.iter() { + info!( + "Sending certificate id={} from subnet id: {} to peer id: {}", + &cert.id, &subnet_id, &peer_id + ); + let _ = client + .api_grpc_client + .submit_certificate(SubmitCertificateRequest { + certificate: Some(cert.clone().into()), + }) + .await + .expect("Can't send certificate"); + } + // Remove sent certificate, every certificate is sent only once to TCE network + certificates.clear(); + } + } + } + } + let assertion = async move { + info!("Waiting for all delivery tasks"); + join_all(delivery_tasks).await; + info!("All expected clients delivered"); + let mut delivered_certificates: HashMap>> = + HashMap::new(); + // Collect all certificates per peer_id and subnet_id + while let Some((peer_id, receiving_subnet_id, cert)) = delivery_rx.recv().await { + debug!("Counting delivered certificate cert id: {:?}", cert.id); + delivered_certificates + .entry(peer_id) + .or_insert_with(HashMap::new) + .entry(receiving_subnet_id) + .or_insert_with(HashSet::new) + .insert(cert); + } + info!("All incoming certificates received"); + // Check received certificates for every peer and every subnet + for delivered_certificates_per_peer in delivered_certificates.values() { + for (subnet_id, delivered_certificates_per_subnet) in delivered_certificates_per_peer { + assert_eq!( + expected_certificates.get(subnet_id).unwrap().len(), + delivered_certificates_per_subnet.len() + ); + assert_eq!( + expected_certificates.get(subnet_id).unwrap(), + delivered_certificates_per_subnet + ); + } + } + }; + + // Set big timeout to prevent flaky fails. Instead fail/panic early in the test to indicate actual error + if tokio::time::timeout(std::time::Duration::from_secs(30), assertion) + .await + .is_err() + { + panic!("Timeout waiting for command"); + } +} From 55b05302261e245e2d2dec5035e1967b72beec5b Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Wed, 26 Jul 2023 11:39:59 +0200 Subject: [PATCH 23/23] fix: pr review --- crates/topos-tce-broadcast/src/constant.rs | 10 ++++++++-- .../src/double_echo/broadcast_state.rs | 2 +- crates/topos-tce-broadcast/src/lib.rs | 2 +- .../src/task_manager_channels/mod.rs | 7 ++++--- crates/topos/Cargo.toml | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/topos-tce-broadcast/src/constant.rs b/crates/topos-tce-broadcast/src/constant.rs index c390b719d..01dbfc5d6 100644 --- a/crates/topos-tce-broadcast/src/constant.rs +++ b/crates/topos-tce-broadcast/src/constant.rs @@ -8,8 +8,8 @@ lazy_static! { .and_then(|s| s.parse().ok()) .unwrap_or(2048); /// Size of the channel between double echo and the task manager - pub static ref TASK_MANAGER_CHANNEL_SIZE: usize = - std::env::var("TOPOS_TASK_MANAGER_CHANNEL_SIZE") + pub static ref BROADCAST_TASK_MANAGER_CHANNEL_SIZE: usize = + std::env::var("TOPOS_BROADCAST_TASK_MANAGER_CHANNEL_SIZE") .ok() .and_then(|s| s.parse().ok()) .unwrap_or(20_480); @@ -25,6 +25,12 @@ lazy_static! { .ok() .and_then(|s| s.parse().ok()) .unwrap_or(2048); + /// Size of the channel to send updated subscriptions views to the double echo + pub static ref BROADCAST_TASK_COMPLETION_CHANNEL_SIZE: usize = + std::env::var("BROADCAST_TASK_COMPLETION_CHANNEL_SIZE") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(2048); /// Capacity alert threshold for the double echo command channel pub static ref COMMAND_CHANNEL_CAPACITY: usize = COMMAND_CHANNEL_SIZE .checked_mul(10) diff --git a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs index 7b9a4fdcb..64e33fa0d 100644 --- a/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs +++ b/crates/topos-tce-broadcast/src/double_echo/broadcast_state.rs @@ -99,7 +99,7 @@ impl BroadcastState { certificate_id: self.certificate.id, }; if let Err(e) = self.event_sender.try_send(event) { - println!("Error sending Ready message: {}", e); + warn!("Error sending Ready message: {}", e); } self.status = self.status.ready_sent(); diff --git a/crates/topos-tce-broadcast/src/lib.rs b/crates/topos-tce-broadcast/src/lib.rs index ad488a4c5..da6c298d1 100644 --- a/crates/topos-tce-broadcast/src/lib.rs +++ b/crates/topos-tce-broadcast/src/lib.rs @@ -118,7 +118,7 @@ impl ReliableBroadcastClient { mpsc::channel::>(1); let (task_manager_message_sender, task_manager_message_receiver) = - mpsc::channel(*constant::TASK_MANAGER_CHANNEL_SIZE); + mpsc::channel(*constant::BROADCAST_TASK_MANAGER_CHANNEL_SIZE); let pending_certificate_count = storage .get_pending_certificates() diff --git a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs index 01ca7ab63..5fcb08bc4 100644 --- a/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs +++ b/crates/topos-tce-broadcast/src/task_manager_channels/mod.rs @@ -8,8 +8,8 @@ use tracing::warn; pub mod task; use crate::double_echo::broadcast_state::BroadcastState; use crate::sampler::SubscriptionsView; -use crate::DoubleEchoCommand; use crate::TaskStatus; +use crate::{constant, DoubleEchoCommand}; use task::{Task, TaskContext}; /// The TaskManager is responsible for receiving messages from the network and distributing them @@ -37,7 +37,8 @@ impl TaskManager { event_sender: mpsc::Sender, thresholds: ReliableBroadcastParams, ) -> (Self, mpsc::Receiver<()>) { - let (task_completion_sender, task_completion_receiver) = mpsc::channel(1024); + let (task_completion_sender, task_completion_receiver) = + mpsc::channel(*constant::BROADCAST_TASK_COMPLETION_CHANNEL_SIZE); let (shutdown_sender, shutdown_receiver) = mpsc::channel(1); ( @@ -118,7 +119,7 @@ impl TaskManager { } for (certificate_id, messages) in &mut self.buffered_messages { - if let Some(task) = self.tasks.get_mut(certificate_id) { + if let Some(task) = self.tasks.get(certificate_id) { for msg in messages { _ = task.sink.send(msg.clone()).await; } diff --git a/crates/topos/Cargo.toml b/crates/topos/Cargo.toml index 4de7d0d52..580332044 100644 --- a/crates/topos/Cargo.toml +++ b/crates/topos/Cargo.toml @@ -67,4 +67,4 @@ sequencer = ["topos-sequencer"] network = ["topos-certificate-spammer"] node = ["tce", "sequencer"] setup = [] -subnet = [] \ No newline at end of file +subnet = []