From 0c4110000cd83b5f86015517f87e973f2fe4dd40 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Sun, 10 Mar 2024 19:29:57 +0300 Subject: [PATCH 01/32] dag struct --- Cargo.lock | 5 + consensus/Cargo.toml | 14 +- consensus/src/engine/dag.rs | 321 ++++++++++++++++++++++++ consensus/src/engine/mod.rs | 9 +- consensus/src/engine/neighbour_watch.rs | 45 ++++ consensus/src/engine/node_schedule.rs | 1 + consensus/src/engine/promise.rs | 94 +++++++ consensus/src/engine/signer.rs | 1 + consensus/src/engine/threshold_clock.rs | 8 + consensus/src/engine/verifier.rs | 1 + consensus/src/intercom/dispatcher.rs | 113 +++------ consensus/src/intercom/mod.rs | 1 - consensus/src/intercom/responses.rs | 32 --- consensus/src/lib.rs | 8 +- consensus/src/models.rs | 61 ----- consensus/src/models/mod.rs | 1 + consensus/src/models/point.rs | 130 ++++++++++ consensus/src/tasks/downloader.rs | 14 ++ consensus/src/tasks/mod.rs | 8 +- 19 files changed, 679 insertions(+), 188 deletions(-) create mode 100644 consensus/src/engine/neighbour_watch.rs create mode 100644 consensus/src/engine/node_schedule.rs create mode 100644 consensus/src/engine/promise.rs create mode 100644 consensus/src/engine/signer.rs create mode 100644 consensus/src/engine/verifier.rs delete mode 100644 consensus/src/intercom/responses.rs delete mode 100644 consensus/src/models.rs create mode 100644 consensus/src/models/mod.rs create mode 100644 consensus/src/models/point.rs diff --git a/Cargo.lock b/Cargo.lock index 6e8bb7916..5442cff8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1953,11 +1953,16 @@ dependencies = [ name = "tycho-consensus" version = "0.0.1" dependencies = [ + "ahash", "anyhow", "bincode", "bytes", + "everscale-crypto", "futures-util", + "parking_lot", "serde", + "sha2", + "thiserror", "tokio", "tracing", "tracing-test", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index ba9255877..36a9fb207 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -9,15 +9,23 @@ description = "DAG-based consensus for external messages queue." anyhow = "1.0" bincode = "1.3" bytes = { version = "1.0", features = ["serde"] } +everscale-crypto = "0.2" futures-util = { version = "0.3" } +parking_lot = "0.12" serde = { version = "1.0", features = ["derive"] } +sha2 = "0.10" tracing = "0.1" weedb = "0.1" # local deps -tycho-network = { path = "../network", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "=0.0.1" } -tycho-util = { path = "../util", version = "=0.0.1" } +tycho-network = { path = "../network" } +tycho-storage = { path = "../storage" } +tycho-util = { path = "../util" } + +# temp +#hex = "0.4.3" +thiserror = "1.0" +ahash = "0.8" [dev-dependencies] tokio = { version = "1", features = ["rt-multi-thread", "macros"] } diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 8b1378917..b8eb019e2 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -1 +1,322 @@ +use std::collections::{BTreeMap, VecDeque}; +use std::num::{NonZeroU8, NonZeroUsize}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, OnceLock, Weak}; +use ahash::RandomState; +use anyhow::{anyhow, Result}; +use futures_util::FutureExt; + +use tycho_util::FastDashMap; + +use crate::engine::promise::Promise; +use crate::models::point::{Digest, NodeId, Point, Round, Signature}; +use crate::tasks::downloader::DownloadTask; + +pub struct IndexedPoint { + point: Point, + // proof_for: Option>, + // includes: Vec>, + // witness: Vec>, + is_committed: AtomicBool, +} + +impl IndexedPoint { + pub fn new(point: Point) -> Self { + Self { + point, + is_committed: AtomicBool::new(false), + } + } +} + +#[derive(Clone)] +pub enum DagPoint { + /* Downloading, // -> Validating | Invalid | Unknown */ + /* Validating(Arc), // -> Valid | Invalid */ + Valid(Arc), // needed to blame equivocation or graph connectivity violations + Invalid(Arc), // invalidates dependent point; needed to blame equivocation + NotExists, // invalidates dependent point; blame with caution +} + +impl DagPoint { + pub fn is_valid(&self) -> bool { + match self { + DagPoint::Valid(_) => true, + _ => false, + } + } + + pub fn valid(&self) -> Option> { + match self { + DagPoint::Valid(point) => Some(point.clone()), + _ => None, + } + } +} + +#[derive(Default)] +struct DagLocation { + // one of the points at current location + // was proven by the next point of a node; + // even if we marked this point as invalid, consensus may override our decision + // and we will have to sync + /* vertex: Option, */ + // we can sign just a single point at the current location; + // other (equivocated) points may be received as includes, witnesses or a proven vertex; + // we have to include signed points as dependencies in our next block + signed_by_me: OnceLock<(Digest, Round, Signature)>, + // if we rejected to sign previous point, + // we require a node to skip the current round; + // if we require to skip after responding with a signature - + // our node cannot invalidate a block retrospectively + no_points_expected: AtomicBool, + // only one of the point versions at current location + // may become proven by the next round point(s) of a node; + // even if we marked a proven point as invalid, consensus may override our decision + versions: BTreeMap>, +} + +struct DagRound { + round: Round, + node_count: u8, + locations: FastDashMap, + prev: Weak, +} + +impl DagRound { + fn new(round: Round, node_count: NonZeroU8, prev: Option<&Arc>) -> Self { + Self { + round, + node_count: ((node_count.get() + 2) / 3) * 3 + 1, // 3F+1 + locations: FastDashMap::with_capacity_and_hasher( + node_count.get() as usize, + RandomState::new(), + ), + prev: prev.map_or(Weak::new(), |a| Arc::downgrade(a)), + } + } + + pub async fn valid_point(&self, node: &NodeId, digest: &Digest) -> Option> { + let location = self.locations.get(node)?; + let promise = location.versions.get(digest)?; + let point = promise.get().await.ok()?; + point.valid() + } + + pub async fn add(&mut self, point: Point) -> Result> { + if point.body.location.round != self.round { + return Err(anyhow!("wrong point round")); + } + if !point.is_integrity_ok() { + return Err(anyhow!("point integrity check failed")); + } + let mut dependencies = vec![]; + if let Some(r_1) = self.prev.upgrade() { + for (node, digest) in point.body.includes.clone() { + let mut loc = r_1.locations.entry(node).or_default(); + let promise = loc + .versions + .entry(digest) + .or_insert(Promise::new(Box::pin(DownloadTask {}))) + .clone(); + dependencies.push(promise); + } + if let Some(r_2) = r_1.prev.upgrade() { + for (node, digest) in point.body.witness.clone() { + let mut loc = r_2.locations.entry(node).or_default(); + let promise = loc + .versions + .entry(digest) + .or_insert(Promise::new(Box::pin(DownloadTask {}))) + .clone(); + dependencies.push(promise); + } + }; + }; + + /* + Ok(Promise::new(|| { + Box::pin(async move { + let res: Result, _> = join_all(dependencies.into_iter().map(|p| p.get())) + .await + .into_iter() + .collect(); + res.map(|deps| { + if deps.iter().any(|point| !point.is_valid()) { + DagPoint::Invalid(Arc::new(point)) + } else { + DagPoint::Valid(Arc::new(IndexedPoint::new(point))) + } + }) + }) + })) + */ + /* + let task = Box::pin({ + try_join_all(dependencies.iter().map(|p| p.get())).map(|res| { + res.map(|deps| { + if deps.iter().any(|point| !point.is_valid()) { + DagPoint::Invalid(Arc::new(point)) + } else { + DagPoint::Valid(Arc::new(IndexedPoint::new(point))) + } + }) + }) + }) + .boxed(); + Ok(Promise::new(task)) + */ + Ok(Promise::ready(DagPoint::NotExists)) + // Ok(Promise::new(task)) // FIXME make fn sync + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DagError { + #[error("Dag empty")] + Empty, + #[error("Point not in dag")] + PointNotInDag, + #[error("Round not in dag")] + RoundNotInDag, +} +pub struct Dag { + current: Round, + // from the oldest to the current round; newer ones are in the future + rounds: VecDeque>, // TODO VecDeque>> for sync +} + +impl Dag { + pub fn new(round: Round, node_count: NonZeroU8) -> Self { + Self { + current: round, + rounds: VecDeque::from([Arc::new(DagRound::new(round, node_count, None))]), + } + } + + // TODO new point is checked against the dag only if it has valid sig, time and round + // TODO download from neighbours + pub fn fill_up_to(&mut self, round: Round, node_count: NonZeroU8) -> Result<()> { + match self.rounds.front().map(|f| f.round) { + None => unreachable!("DAG empty"), + Some(front) => { + for round in front.0..round.0 { + self.rounds.push_front(Arc::new(DagRound::new( + Round(round + 1), + node_count, + self.rounds.front(), + ))) + } + Ok(()) + } + } + } + + pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroUsize) { + if let Some(tail) = self + .index_of(anchor_at) + .and_then(|a| a.checked_sub(dag_depth.get())) + { + self.rounds.drain(0..tail); + }; + } + + fn round_at(&self, round: Round) -> Option> { + self.rounds.get(self.index_of(round)?).map(|r| r.clone()) + } + + fn index_of(&self, round: Round) -> Option { + match self.rounds.back().map(|b| b.round) { + Some(back) if back <= round => Some((round.0 - back.0) as usize), + _ => None, + } + } + + pub async fn vertex_by(&self, proof: &IndexedPoint) -> Option> { + let digest = &proof.point.body.proof.as_ref()?.digest; + let round = proof.point.body.location.round.prev()?; + let dag_round = self.round_at(round)?; + dag_round + .valid_point(&proof.point.body.location.author, digest) + .await + } + + // @return historically ordered vertices (back to front is older to newer) + pub async fn gather_uncommitted( + &self, + anchor_proof: &IndexedPoint, + // dag_depth: usize, + ) -> Result>> { + // anchor must be a vertex @ r+1, proven with point @ r+2 + let Some(anchor) = self.vertex_by(&anchor_proof).await else { + return Err(anyhow!( + "anchor proof @ {} not in dag", + &anchor_proof.point.body.location.round.0 + )); + }; + _ = anchor_proof; // needed no more + + let Some(mut cur_includes_round) = anchor.point.body.location.round.prev() else { + return Err(anyhow!("anchor proof @ 0 cannot exist")); + }; + + let mut r_0 = anchor.point.body.includes.clone(); // points @ r+0 + let mut r_1 = anchor.point.body.witness.clone(); // points @ r-1 + let mut r_2 = BTreeMap::new(); // points @ r-2 + let mut r_3 = BTreeMap::new(); // points @ r-3 + _ = anchor; // anchor payload will be committed the next time + + let mut uncommitted = VecDeque::new(); + + // TODO visited rounds count must be equal to dag depth: + // read/download non-existent rounds and drop too old ones + while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self + .round_at(cur_includes_round) + .and_then(|cur| cur.prev.upgrade().map(|prev| (cur, prev))) + .filter(|_| !(r_0.is_empty() && r_1.is_empty() && r_2.is_empty() && r_3.is_empty())) + { + // take points @ r+0, and select their vertices @ r-1 for commit + // the order is of NodeId (public key) + while let Some((node, digest)) = &r_0.pop_first() { + // Every point must be valid (we've validated anchor dependencies already), + // but some points don't have previous one to proof as vertex. + // Any valid point among equivocated will do, as they include the same vertex. + if let Some(proof /* point @ r+0 */) = proof_round.valid_point(node, digest).await { + if proof.is_committed.load(Ordering::Relaxed) { + continue; + } + let author = &proof.point.body.location.author; + r_1.extend(proof.point.body.includes.clone()); // points @ r-1 + r_2.extend(proof.point.body.witness.clone()); // points @ r-2 + let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { + continue; + }; + if let Some(vertex /* point @ r-1 */) = vertex_round + .valid_point(author, &digest) + .await + // select uncommitted ones, marking them as committed + // to exclude from the next commit + .filter(|vertex| { + vertex + .is_committed + .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + }) + { + // vertex will be skipped in r_1 as committed + r_2.extend(vertex.point.body.includes.clone()); // points @ r-2 + r_3.extend(vertex.point.body.witness.clone()); // points @ r-3 + uncommitted.push_back(vertex); // LIFO + } + } + } + cur_includes_round = vertex_round.round; // next r+0 + r_0 = r_1; // next r+0 + r_1 = r_2; // next r-1 + r_2 = r_3; // next r-2 + r_3 = BTreeMap::new(); // next r-3 + } + Ok(uncommitted) + } +} diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index 16aee4a36..afcc7c0a1 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,2 +1,7 @@ -mod dag; -mod threshold_clock; +pub mod dag; +pub mod neighbour_watch; +mod node_schedule; +mod promise; +mod signer; +pub mod threshold_clock; +mod verifier; diff --git a/consensus/src/engine/neighbour_watch.rs b/consensus/src/engine/neighbour_watch.rs new file mode 100644 index 000000000..fe90165dd --- /dev/null +++ b/consensus/src/engine/neighbour_watch.rs @@ -0,0 +1,45 @@ +use std::time::SystemTime; +use tycho_util::FastDashMap; + +use crate::models::point::{NodeId, Point, Round}; + +// from latest block +struct NodeInfo { + round: Round, + time: SystemTime, +} + +pub struct NeighbourWatch { + nodes: FastDashMap, +} + +impl NeighbourWatch { + /// every node must provide: + /// * increasing rounds (two points per same round are equivocation) + /// * time increasing with every round + /// * no prev_point - in case of a gap in rounds (no weak links) + /// * prev_point - in case node made no gaps in rounds + /// * TODO: insert linked (previous) point first, then current one; or move to DAG + pub fn verify(&mut self, point: &Point) -> bool { + let round = point.body.location.round; + let time = point.body.time; + let mut valid = true; + // TODO move to as-is validation: let mut valid = prev_round.map_or(true, |prev| prev.0 + 1 == round.0); + self.nodes + .entry(point.body.location.author.clone()) + .and_modify(|e| { + valid = e.round < round + && e.time < time + // node either skipped a round, or provided evidences for prev block + && round.prev().map_or(e.round.0 + 1 < round.0, |prev| e.round <= prev); + if e.round < round { + (*e).round = round + }; + if e.time < time { + (*e).time = time + }; + }) + .or_insert(NodeInfo { round, time }); + valid + } +} diff --git a/consensus/src/engine/node_schedule.rs b/consensus/src/engine/node_schedule.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/consensus/src/engine/node_schedule.rs @@ -0,0 +1 @@ + diff --git a/consensus/src/engine/promise.rs b/consensus/src/engine/promise.rs new file mode 100644 index 000000000..7941bac38 --- /dev/null +++ b/consensus/src/engine/promise.rs @@ -0,0 +1,94 @@ +use std::fmt::Debug; +use std::ops::Deref; +use std::pin::Pin; +use std::sync::{Arc, Weak}; + +use parking_lot::Mutex; +use thiserror; +use tokio::sync::broadcast; +use tokio::sync::broadcast::error::RecvError; + +#[derive(Clone, Debug, thiserror::Error)] +pub enum PromiseError { + #[error("Promise task failed: '{0}'")] + TaskFailed(String), + #[error("Promise task panicked")] + TaskPanicked, + #[error("Promise internal error: sender closed")] + Internal, +} + +#[derive(Clone)] +enum Inner +where + T: Clone + Send + Sync + 'static, +{ + Ready(Result), + Pending(Weak>>), +} + +#[derive(Clone)] +pub struct Promise +where + T: Clone + Send + Sync + 'static, +{ + // TODO try OnceLock::get before mutex, then if None = Sender::subscribe() try OnceLock again + inner: Arc>>, +} + +impl Promise +where + T: Clone + Send + Sync + 'static, +{ + pub async fn get(&self) -> Result { + let inner = self.inner.lock(); + let inner = inner.deref(); + + match inner { + Inner::Ready(value) => value.clone(), + Inner::Pending(inflight) => match inflight.upgrade().map(|a| a.subscribe()) { + None => Err(PromiseError::TaskPanicked), + Some(mut rx) => match rx.recv().await { + Ok(value) => value, + Err(RecvError::Lagged(_)) => { + rx.recv().await.unwrap_or(Err(PromiseError::Internal)) + } + Err(RecvError::Closed) => Err(PromiseError::Internal), + }, + }, + } + } + + pub fn ready(value: T) -> Self { + Self { + inner: Arc::new(Mutex::new(Inner::Ready(Ok(value)))), + } + } + + pub fn new( + fut: Pin> + Send + 'static>>, + ) -> Self + where + E: Debug + 'static, + { + let (tx, _) = broadcast::channel::>(1); + let tx = Arc::new(tx); + + // weak ref to Sender is dropped if spawned task panicked + let inner = Arc::new(Mutex::new(Inner::Pending(Arc::downgrade(&tx)))); + let this = Self { + inner: inner.clone(), + }; + + tokio::spawn(async move { + let res = fut + .await + .map_err(|e| PromiseError::TaskFailed(format!("{e:?}"))); + let mut inner = inner.lock(); + let _ = tx.send(res.clone()); + *inner = Inner::Ready(res); + }); + + this + } +} diff --git a/consensus/src/engine/signer.rs b/consensus/src/engine/signer.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/consensus/src/engine/signer.rs @@ -0,0 +1 @@ + diff --git a/consensus/src/engine/threshold_clock.rs b/consensus/src/engine/threshold_clock.rs index 8b1378917..8c76e5f9c 100644 --- a/consensus/src/engine/threshold_clock.rs +++ b/consensus/src/engine/threshold_clock.rs @@ -1 +1,9 @@ +use tycho_util::FastHashSet; +use crate::models::point::{NodeId, Round}; + +pub struct ThresholdClock { + round: Round, + signatures_received: FastHashSet, + rejected: FastHashSet, // TODO reason +} diff --git a/consensus/src/engine/verifier.rs b/consensus/src/engine/verifier.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/consensus/src/engine/verifier.rs @@ -0,0 +1 @@ + diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index cfb39c5a7..27457a542 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -9,23 +9,32 @@ use tycho_network::{ service_query_fn, Network, NetworkConfig, NetworkExt, Response, ServiceRequest, Version, }; -use crate::intercom::responses::*; -use crate::models::{Location, Point, PointId, RoundId, Signature}; +use crate::models::point::{Location, Point, PointId, Round, Signature}; + +#[derive(Serialize, Deserialize, Debug)] +pub struct BroadcastResponse { + // for requested point + pub signature: Signature, + // at the same round, if it was not skipped + pub signer_point: Option, +} +#[derive(Serialize, Deserialize, Debug)] +pub struct PointResponse { + pub point: Option, +} +//PointLast(Option), + +#[derive(Serialize, Deserialize, Debug)] +pub struct PointsResponse { + pub vertices: Vec, +} #[derive(Serialize, Deserialize, Debug)] enum MPRequest { // by author Broadcast { point: Point }, Point { id: PointId }, - // any point from the last author's round; - // 1/3+1 evidenced vertices determine current consensus round - // PointLast, - // unique point with known evidence - Vertex { id: Location }, - // the next point by the same author - // that contains >=2F signatures for requested vertex - Evidence { vertex_id: Location }, - Vertices { round: RoundId }, + Points { round: Round }, } #[derive(Serialize, Deserialize, Debug)] @@ -33,9 +42,7 @@ enum MPResponse { Broadcast(BroadcastResponse), Point(PointResponse), //PointLast(Option), - Vertex(VertexResponse), - Evidence(EvidenceResponse), - Vertices(VerticesResponse), + Points(PointsResponse), } #[derive(Serialize, Deserialize, Debug)] @@ -98,10 +105,10 @@ impl Dispatcher { } } - pub async fn vertex(&self, id: Location, from: SocketAddr) -> Result { + pub async fn points(&self, round: Round, from: SocketAddr) -> Result { let request = tycho_network::Request { version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Vertex { id })?), + body: Bytes::from(bincode::serialize(&MPRequest::Points { round })?), }; let remote_peer = self.network.connect(from).await?; @@ -109,43 +116,7 @@ impl Dispatcher { let response = self.network.query(&remote_peer, request).await?; match parse_response(&response.body)? { - MPResponse::Vertex(r) => Ok(r), - x => Err(anyhow!("wrong response")), - } - } - - pub async fn evidence( - &self, - vertex_id: Location, - from: SocketAddr, - ) -> Result { - let request = tycho_network::Request { - version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Evidence { vertex_id })?), - }; - - let remote_peer = self.network.connect(from).await?; - - let response = self.network.query(&remote_peer, request).await?; - - match parse_response(&response.body)? { - MPResponse::Evidence(r) => Ok(r), - x => Err(anyhow!("wrong response")), - } - } - - pub async fn vertices(&self, round: RoundId, from: SocketAddr) -> Result { - let request = tycho_network::Request { - version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Vertices { round })?), - }; - - let remote_peer = self.network.connect(from).await?; - - let response = self.network.query(&remote_peer, request).await?; - - match parse_response(&response.body)? { - MPResponse::Vertices(r) => Ok(r), + MPResponse::Points(r) => Ok(r), x => Err(anyhow!("wrong response")), } } @@ -171,35 +142,17 @@ impl DispatcherInner { // 1.1 sigs for my block + 1.2 my next includes // ?? + 3.1 ask last MPResponse::Broadcast(BroadcastResponse { - current_round: RoundId(0), signature: Signature(Bytes::new()), signer_point: None, }) } MPRequest::Point { id } => { // 1.2 my next includes (merged with Broadcast flow) - MPResponse::Point(PointResponse { - current_round: RoundId(0), - point: None, - }) - } - MPRequest::Vertex { id } => { - // verification flow: downloader - MPResponse::Vertex(VertexResponse { - current_round: RoundId(0), - vertex: None, - }) - } - MPRequest::Evidence { vertex_id } => { - // verification flow: downloader - MPResponse::Evidence(EvidenceResponse { - current_round: RoundId(0), - point: None, - }) + MPResponse::Point(PointResponse { point: None }) } - MPRequest::Vertices { round } => { - // cold sync flow: downloader - MPResponse::Vertices(VerticesResponse { + MPRequest::Points { round } => { + // sync flow: downloader + MPResponse::Points(PointsResponse { vertices: Vec::new(), }) } @@ -207,7 +160,7 @@ impl DispatcherInner { Some(Response { version: Version::default(), - body: Bytes::from(match bincode::serialize(&response) { + body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { Ok(data) => data, Err(e) => { tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata); @@ -251,8 +204,8 @@ mod tests { body: Bytes::from("bites"), }, ) - .await?; - let response = parse_response(&response.body); + .await + .and_then(|a| parse_response(&a.body)); tracing::info!("response '{response:?}'"); @@ -266,9 +219,7 @@ mod tests { let node1 = Dispatcher::new()?; let node2 = Dispatcher::new()?; - let data = node1 - .vertices(RoundId(0), node2.network.local_addr()) - .await?; + let data = node1.points(Round(0), node2.network.local_addr()).await?; tracing::info!("response: '{data:?}'"); diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index 736e764d3..24b17ec46 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1,4 +1,3 @@ mod dispatcher; mod receiver; -mod responses; mod uploader; diff --git a/consensus/src/intercom/responses.rs b/consensus/src/intercom/responses.rs deleted file mode 100644 index 1eb2c18c7..000000000 --- a/consensus/src/intercom/responses.rs +++ /dev/null @@ -1,32 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use crate::models::{Point, RoundId, Signature}; - -#[derive(Serialize, Deserialize, Debug)] -pub struct BroadcastResponse { - pub current_round: RoundId, - // for requested point - pub signature: Signature, - // at the same round, if it was not skipped - pub signer_point: Option, -} -#[derive(Serialize, Deserialize, Debug)] -pub struct PointResponse { - pub current_round: RoundId, - pub point: Option, -} -//PointLast(Option), -#[derive(Serialize, Deserialize, Debug)] -pub struct VertexResponse { - pub current_round: RoundId, - pub vertex: Option, -} -#[derive(Serialize, Deserialize, Debug)] -pub struct EvidenceResponse { - pub current_round: RoundId, - pub point: Option, -} -#[derive(Serialize, Deserialize, Debug)] -pub struct VerticesResponse { - pub vertices: Vec, -} diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index a43c4473e..73afa7366 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -1,4 +1,4 @@ -mod engine; -mod intercom; -mod models; -mod tasks; +pub(crate) mod engine; +pub(crate) mod intercom; +pub(crate) mod models; +pub(crate) mod tasks; diff --git a/consensus/src/models.rs b/consensus/src/models.rs deleted file mode 100644 index b8ab3bff5..000000000 --- a/consensus/src/models.rs +++ /dev/null @@ -1,61 +0,0 @@ -use bytes::Bytes; -use serde::{Deserialize, Serialize}; -use tycho_util::FastHashMap; - -pub const POINT_DIGEST_SIZE: usize = 32; -pub const SIGNATURE_SIZE: usize = 64; - -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub struct Digest(pub Bytes); -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub struct Signature(pub Bytes); -#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Debug)] -pub struct NodeId(pub u8); -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub struct RoundId(pub u32); - -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub struct Location { - round: RoundId, - author: NodeId, -} - -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub struct PointId { - location: Location, - digest: Digest, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct PrevPoint { - round: RoundId, - digest: Digest, - // >= 2F witnesses, point author excluded - evidence: FastHashMap, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct PointData { - location: Location, - local_time: u64, - payload: Vec, - // >= 2F+1 vertices from the round before last, - // optionally including author's own vertex - includes: FastHashMap, - anchor: PointId, - proposed_leader: Option, - // any vertices the leader adds to its diff-graph - // beyond its direct inclusions - leader_deep_includes: Vec, - // of the same author - prev_point: Option, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct Point { - data: PointData, - // author's - signature: Signature, - // of both data and author's signature - digest: Digest, -} diff --git a/consensus/src/models/mod.rs b/consensus/src/models/mod.rs new file mode 100644 index 000000000..a199ff751 --- /dev/null +++ b/consensus/src/models/mod.rs @@ -0,0 +1 @@ +pub mod point; diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs new file mode 100644 index 000000000..27b564992 --- /dev/null +++ b/consensus/src/models/point.rs @@ -0,0 +1,130 @@ +use std::collections::BTreeMap; +use std::hash::{Hash, Hasher}; +use std::time::SystemTime; + +use bytes::Bytes; +use everscale_crypto::ed25519::ExpandedSecretKey; +use serde::{Deserialize, Serialize}; +use sha2::{Digest as Sha2Digest, Sha256}; + +use tycho_network::PeerId; +use tycho_util::FastHashMap; + +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub struct Digest([u8; 32]); + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Signature(pub Bytes); + +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub struct NodeId([u8; 32]); + +impl From<&PeerId> for NodeId { + fn from(value: &PeerId) -> Self { + NodeId(value.0) + } +} + +impl From<&NodeId> for PeerId { + fn from(value: &NodeId) -> Self { + PeerId(value.0) + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, PartialOrd, PartialEq, Debug)] +pub struct Round(pub u32); + +impl Round { + pub fn prev(&self) -> Option { + self.0.checked_sub(1).map(Round) + } +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Location { + pub round: Round, + pub author: NodeId, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct PointId { + pub location: Location, + pub digest: Digest, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct PrevPoint { + // until weak links are supported, + // any node may proof its vertex@r-1 with its point@r+0 only + // pub round: Round, + pub digest: Digest, + // >= 2F witnesses, point author excluded + pub evidence: FastHashMap, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct PointBody { + pub location: Location, // let it be @ r+0 + pub time: SystemTime, + pub payload: Vec, + // of the same author + pub proof: Option, + // >= 2F+1 points @ r-1, + // signed by author @ r-1 with some additional points just mentioned; + // optionally includes author's own vertex (if exists). + // BTree provides repeatable order on every node + pub includes: BTreeMap, + // >= 0 points @ r-2, signed by author @ r-1 + pub witness: BTreeMap, + // the last known third point in a row by some leader; + // defines author's current anchor + pub last_commit_trigger: PointId, + // (only) for every leader node - three points in a row: + // in leader point @ r+0: prev leader proof + // in leader proof @ r+1: current leader point @ r+0 + // in commit trigger @ r+2: leader proof @ r+1 + pub leader_chain: Option, +} + +impl PointBody { + pub fn wrap(self, secret: ExpandedSecretKey) -> Option { + let body = bincode::serialize(&self).ok()?; + let pubkey = PeerId::from(&self.location.author).as_public_key()?; + let sig = secret.sign_raw(body.as_slice(), &pubkey); + let mut hasher = Sha256::new(); + hasher.update(body.as_slice()); + hasher.update(sig.as_slice()); + let digest = Digest(hasher.finalize().into()); + Some(Point { + body: self, + signature: Signature(Bytes::from(sig.to_vec())), + digest, + }) + } +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Point { + pub body: PointBody, + // author's signature for the body + pub signature: Signature, + // hash of both data and author's signature + pub digest: Digest, +} + +impl Point { + pub fn is_integrity_ok(&self) -> bool { + let pubkey = PeerId::from(&self.body.location.author).as_public_key(); + let body = bincode::serialize(&self.body).ok(); + let sig: Result<[u8; 64], _> = self.signature.0.to_vec().try_into(); + if let Some(((pubkey, body), sig)) = pubkey.zip(body).zip(sig.ok()) { + let mut hasher = Sha256::new(); + hasher.update(body.as_slice()); + hasher.update(sig.as_slice()); + let digest = Digest(hasher.finalize().into()); + pubkey.verify_raw(body.as_slice(), &sig) && digest == self.digest + } else { + false + } + } +} diff --git a/consensus/src/tasks/downloader.rs b/consensus/src/tasks/downloader.rs index 8b1378917..b84008d4b 100644 --- a/consensus/src/tasks/downloader.rs +++ b/consensus/src/tasks/downloader.rs @@ -1 +1,15 @@ +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; +use crate::engine::dag::DagPoint; + +pub struct DownloadTask {} + +impl Future for DownloadTask { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + todo!() + } +} diff --git a/consensus/src/tasks/mod.rs b/consensus/src/tasks/mod.rs index 0926bcb19..81b1dbacb 100644 --- a/consensus/src/tasks/mod.rs +++ b/consensus/src/tasks/mod.rs @@ -1,4 +1,4 @@ -mod broadcaster; -mod downloader; -mod syncer; -mod uploader; +pub mod broadcaster; +pub mod downloader; +pub mod syncer; +pub mod uploader; From 8a9a50e58751d42953c859932671c83467851497 Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Sun, 10 Mar 2024 19:23:19 +0100 Subject: [PATCH 02/32] fix(consensus): fix build --- Cargo.lock | 1 + consensus/Cargo.toml | 2 + consensus/src/engine/dag.rs | 72 ++++++--------- consensus/src/engine/neighbour_watch.rs | 6 +- consensus/src/engine/promise.rs | 116 +++++++++++------------- consensus/src/engine/threshold_clock.rs | 7 +- consensus/src/models/point.rs | 30 ++---- consensus/src/tasks/downloader.rs | 2 +- 8 files changed, 101 insertions(+), 135 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5442cff8c..b8d3dbbd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1957,6 +1957,7 @@ dependencies = [ "anyhow", "bincode", "bytes", + "castaway", "everscale-crypto", "futures-util", "parking_lot", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index 36a9fb207..e8de46d2c 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -9,11 +9,13 @@ description = "DAG-based consensus for external messages queue." anyhow = "1.0" bincode = "1.3" bytes = { version = "1.0", features = ["serde"] } +castaway = "0.2" everscale-crypto = "0.2" futures-util = { version = "0.3" } parking_lot = "0.12" serde = { version = "1.0", features = ["derive"] } sha2 = "0.10" +tokio = { version = "1", features = ["rt"] } tracing = "0.1" weedb = "0.1" diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index b8eb019e2..1e9afe4d6 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -5,12 +5,12 @@ use std::sync::{Arc, OnceLock, Weak}; use ahash::RandomState; use anyhow::{anyhow, Result}; -use futures_util::FutureExt; - +use tokio::task::JoinSet; +use tycho_network::PeerId; use tycho_util::FastDashMap; use crate::engine::promise::Promise; -use crate::models::point::{Digest, NodeId, Point, Round, Signature}; +use crate::models::point::{Digest, Point, Round, Signature}; use crate::tasks::downloader::DownloadTask; pub struct IndexedPoint { @@ -80,7 +80,7 @@ struct DagLocation { struct DagRound { round: Round, node_count: u8, - locations: FastDashMap, + locations: FastDashMap, prev: Weak, } @@ -97,10 +97,10 @@ impl DagRound { } } - pub async fn valid_point(&self, node: &NodeId, digest: &Digest) -> Option> { + pub async fn valid_point(&self, node: &PeerId, digest: &Digest) -> Option> { let location = self.locations.get(node)?; let promise = location.versions.get(digest)?; - let point = promise.get().await.ok()?; + let point = promise.get().await; point.valid() } @@ -111,64 +111,50 @@ impl DagRound { if !point.is_integrity_ok() { return Err(anyhow!("point integrity check failed")); } - let mut dependencies = vec![]; + + let mut dependencies = JoinSet::new(); if let Some(r_1) = self.prev.upgrade() { - for (node, digest) in point.body.includes.clone() { + for (&node, &digest) in &point.body.includes { let mut loc = r_1.locations.entry(node).or_default(); let promise = loc .versions .entry(digest) .or_insert(Promise::new(Box::pin(DownloadTask {}))) .clone(); - dependencies.push(promise); + dependencies.spawn(async move { promise.get().await }); } if let Some(r_2) = r_1.prev.upgrade() { - for (node, digest) in point.body.witness.clone() { + for (&node, &digest) in &point.body.witness { let mut loc = r_2.locations.entry(node).or_default(); let promise = loc .versions .entry(digest) .or_insert(Promise::new(Box::pin(DownloadTask {}))) .clone(); - dependencies.push(promise); + dependencies.spawn(async move { promise.get().await }); } }; }; - /* - Ok(Promise::new(|| { - Box::pin(async move { - let res: Result, _> = join_all(dependencies.into_iter().map(|p| p.get())) - .await - .into_iter() - .collect(); - res.map(|deps| { - if deps.iter().any(|point| !point.is_valid()) { - DagPoint::Invalid(Arc::new(point)) - } else { - DagPoint::Valid(Arc::new(IndexedPoint::new(point))) + Ok(Promise::new(async move { + while let Some(res) = dependencies.join_next().await { + let res = match res { + Ok(value) => value, + Err(e) => { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + unreachable!(); } - }) - }) + }; + + if !res.is_valid() { + return DagPoint::Invalid(Arc::new(point)); + } + } + + DagPoint::Valid(Arc::new(IndexedPoint::new(point))) })) - */ - /* - let task = Box::pin({ - try_join_all(dependencies.iter().map(|p| p.get())).map(|res| { - res.map(|deps| { - if deps.iter().any(|point| !point.is_valid()) { - DagPoint::Invalid(Arc::new(point)) - } else { - DagPoint::Valid(Arc::new(IndexedPoint::new(point))) - } - }) - }) - }) - .boxed(); - Ok(Promise::new(task)) - */ - Ok(Promise::ready(DagPoint::NotExists)) - // Ok(Promise::new(task)) // FIXME make fn sync } } diff --git a/consensus/src/engine/neighbour_watch.rs b/consensus/src/engine/neighbour_watch.rs index fe90165dd..e5af2f1cb 100644 --- a/consensus/src/engine/neighbour_watch.rs +++ b/consensus/src/engine/neighbour_watch.rs @@ -1,7 +1,9 @@ use std::time::SystemTime; + +use tycho_network::PeerId; use tycho_util::FastDashMap; -use crate::models::point::{NodeId, Point, Round}; +use crate::models::point::{Point, Round}; // from latest block struct NodeInfo { @@ -10,7 +12,7 @@ struct NodeInfo { } pub struct NeighbourWatch { - nodes: FastDashMap, + nodes: FastDashMap, } impl NeighbourWatch { diff --git a/consensus/src/engine/promise.rs b/consensus/src/engine/promise.rs index 7941bac38..ae629f7da 100644 --- a/consensus/src/engine/promise.rs +++ b/consensus/src/engine/promise.rs @@ -1,37 +1,19 @@ -use std::fmt::Debug; -use std::ops::Deref; -use std::pin::Pin; use std::sync::{Arc, Weak}; +use futures_util::future::BoxFuture; +use futures_util::Future; use parking_lot::Mutex; -use thiserror; use tokio::sync::broadcast; use tokio::sync::broadcast::error::RecvError; -#[derive(Clone, Debug, thiserror::Error)] -pub enum PromiseError { - #[error("Promise task failed: '{0}'")] - TaskFailed(String), - #[error("Promise task panicked")] - TaskPanicked, - #[error("Promise internal error: sender closed")] - Internal, -} - #[derive(Clone)] -enum Inner -where - T: Clone + Send + Sync + 'static, -{ - Ready(Result), - Pending(Weak>>), +enum Inner { + Ready(T), + Pending(Weak>), } #[derive(Clone)] -pub struct Promise -where - T: Clone + Send + Sync + 'static, -{ +pub struct Promise { // TODO try OnceLock::get before mutex, then if None = Sender::subscribe() try OnceLock again inner: Arc>>, } @@ -40,55 +22,63 @@ impl Promise where T: Clone + Send + Sync + 'static, { - pub async fn get(&self) -> Result { - let inner = self.inner.lock(); - let inner = inner.deref(); + pub fn new(fut: F) -> Self + where + F: Future + Send + 'static, + { + fn new_impl(fut: BoxFuture<'static, T>) -> Promise + where + T: Clone + Send + 'static, + { + let (tx, _) = broadcast::channel::(1); + let tx = Arc::new(tx); - match inner { - Inner::Ready(value) => value.clone(), - Inner::Pending(inflight) => match inflight.upgrade().map(|a| a.subscribe()) { - None => Err(PromiseError::TaskPanicked), - Some(mut rx) => match rx.recv().await { - Ok(value) => value, - Err(RecvError::Lagged(_)) => { - rx.recv().await.unwrap_or(Err(PromiseError::Internal)) - } - Err(RecvError::Closed) => Err(PromiseError::Internal), - }, - }, + // weak ref to Sender is dropped if spawned task panicked + let inner = Arc::new(Mutex::new(Inner::Pending(Arc::downgrade(&tx)))); + let this = Promise { + inner: inner.clone(), + }; + + tokio::spawn(async move { + let res = fut.await; + let mut inner = inner.lock(); + _ = tx.send(res.clone()); + *inner = Inner::Ready(res); + }); + + this } + + let fut = match castaway::cast!(fut, BoxFuture<'static, T>) { + Ok(fut) => fut, + Err(fut) => Box::pin(fut), + }; + new_impl(fut) } pub fn ready(value: T) -> Self { Self { - inner: Arc::new(Mutex::new(Inner::Ready(Ok(value)))), + inner: Arc::new(Mutex::new(Inner::Ready(value))), } } - pub fn new( - fut: Pin> + Send + 'static>>, - ) -> Self - where - E: Debug + 'static, - { - let (tx, _) = broadcast::channel::>(1); - let tx = Arc::new(tx); - - // weak ref to Sender is dropped if spawned task panicked - let inner = Arc::new(Mutex::new(Inner::Pending(Arc::downgrade(&tx)))); - let this = Self { - inner: inner.clone(), - }; + pub async fn get(&self) -> T { + 'panicked: { + let mut rx = match &*self.inner.lock() { + Inner::Ready(value) => return value.clone(), + Inner::Pending(inflight) => match inflight.upgrade() { + Some(rx) => rx.subscribe(), + None => break 'panicked, + }, + }; - tokio::spawn(async move { - let res = fut - .await - .map_err(|e| PromiseError::TaskFailed(format!("{e:?}"))); - let mut inner = inner.lock(); - let _ = tx.send(res.clone()); - *inner = Inner::Ready(res); - }); + match rx.recv().await { + Ok(value) => return value, + Err(RecvError::Closed) => break 'panicked, + Err(RecvError::Lagged(_)) => unreachable!(), + } + } - this + panic!("task panicked") } } diff --git a/consensus/src/engine/threshold_clock.rs b/consensus/src/engine/threshold_clock.rs index 8c76e5f9c..1d27cbbea 100644 --- a/consensus/src/engine/threshold_clock.rs +++ b/consensus/src/engine/threshold_clock.rs @@ -1,9 +1,10 @@ +use tycho_network::PeerId; use tycho_util::FastHashSet; -use crate::models::point::{NodeId, Round}; +use crate::models::point::Round; pub struct ThresholdClock { round: Round, - signatures_received: FastHashSet, - rejected: FastHashSet, // TODO reason + signatures_received: FastHashSet, + rejected: FastHashSet, // TODO reason } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 27b564992..2342b6a0f 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -1,5 +1,4 @@ use std::collections::BTreeMap; -use std::hash::{Hash, Hasher}; use std::time::SystemTime; use bytes::Bytes; @@ -10,27 +9,12 @@ use sha2::{Digest as Sha2Digest, Sha256}; use tycho_network::PeerId; use tycho_util::FastHashMap; -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Digest([u8; 32]); #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Signature(pub Bytes); -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -pub struct NodeId([u8; 32]); - -impl From<&PeerId> for NodeId { - fn from(value: &PeerId) -> Self { - NodeId(value.0) - } -} - -impl From<&NodeId> for PeerId { - fn from(value: &NodeId) -> Self { - PeerId(value.0) - } -} - #[derive(Copy, Clone, Serialize, Deserialize, PartialOrd, PartialEq, Debug)] pub struct Round(pub u32); @@ -43,7 +27,7 @@ impl Round { #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Location { pub round: Round, - pub author: NodeId, + pub author: PeerId, } #[derive(Clone, Serialize, Deserialize, Debug)] @@ -59,7 +43,7 @@ pub struct PrevPoint { // pub round: Round, pub digest: Digest, // >= 2F witnesses, point author excluded - pub evidence: FastHashMap, + pub evidence: FastHashMap, } #[derive(Clone, Serialize, Deserialize, Debug)] @@ -73,9 +57,9 @@ pub struct PointBody { // signed by author @ r-1 with some additional points just mentioned; // optionally includes author's own vertex (if exists). // BTree provides repeatable order on every node - pub includes: BTreeMap, + pub includes: BTreeMap, // >= 0 points @ r-2, signed by author @ r-1 - pub witness: BTreeMap, + pub witness: BTreeMap, // the last known third point in a row by some leader; // defines author's current anchor pub last_commit_trigger: PointId, @@ -89,7 +73,7 @@ pub struct PointBody { impl PointBody { pub fn wrap(self, secret: ExpandedSecretKey) -> Option { let body = bincode::serialize(&self).ok()?; - let pubkey = PeerId::from(&self.location.author).as_public_key()?; + let pubkey = self.location.author.as_public_key()?; let sig = secret.sign_raw(body.as_slice(), &pubkey); let mut hasher = Sha256::new(); hasher.update(body.as_slice()); @@ -114,7 +98,7 @@ pub struct Point { impl Point { pub fn is_integrity_ok(&self) -> bool { - let pubkey = PeerId::from(&self.body.location.author).as_public_key(); + let pubkey = self.body.location.author.as_public_key(); let body = bincode::serialize(&self.body).ok(); let sig: Result<[u8; 64], _> = self.signature.0.to_vec().try_into(); if let Some(((pubkey, body), sig)) = pubkey.zip(body).zip(sig.ok()) { diff --git a/consensus/src/tasks/downloader.rs b/consensus/src/tasks/downloader.rs index b84008d4b..917cfd35a 100644 --- a/consensus/src/tasks/downloader.rs +++ b/consensus/src/tasks/downloader.rs @@ -7,7 +7,7 @@ use crate::engine::dag::DagPoint; pub struct DownloadTask {} impl Future for DownloadTask { - type Output = Result; + type Output = DagPoint; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { todo!() From abc9aaceb396461ac2e61113226b2e1744d17bdf Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Sun, 10 Mar 2024 19:47:17 +0100 Subject: [PATCH 03/32] refactor(consensus): refactor `gather_uncommitted` --- consensus/src/engine/dag.rs | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 1e9afe4d6..2322c7aa8 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -209,7 +209,7 @@ impl Dag { } fn round_at(&self, round: Round) -> Option> { - self.rounds.get(self.index_of(round)?).map(|r| r.clone()) + self.rounds.get(self.index_of(round)?).cloned() } fn index_of(&self, round: Round) -> Option { @@ -241,17 +241,18 @@ impl Dag { &anchor_proof.point.body.location.round.0 )); }; - _ = anchor_proof; // needed no more let Some(mut cur_includes_round) = anchor.point.body.location.round.prev() else { return Err(anyhow!("anchor proof @ 0 cannot exist")); }; - let mut r_0 = anchor.point.body.includes.clone(); // points @ r+0 - let mut r_1 = anchor.point.body.witness.clone(); // points @ r-1 - let mut r_2 = BTreeMap::new(); // points @ r-2 - let mut r_3 = BTreeMap::new(); // points @ r-3 - _ = anchor; // anchor payload will be committed the next time + let mut r = [ + anchor.point.body.includes.clone(), // points @ r+0 + anchor.point.body.witness.clone(), // points @ r-1 + BTreeMap::new(), // points @ r-2 + BTreeMap::new(), // points @ r-3 + ]; + drop(anchor); // anchor payload will be committed the next time let mut uncommitted = VecDeque::new(); @@ -260,21 +261,21 @@ impl Dag { while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self .round_at(cur_includes_round) .and_then(|cur| cur.prev.upgrade().map(|prev| (cur, prev))) - .filter(|_| !(r_0.is_empty() && r_1.is_empty() && r_2.is_empty() && r_3.is_empty())) + .filter(|_| !r.iter().all(BTreeMap::is_empty)) { // take points @ r+0, and select their vertices @ r-1 for commit // the order is of NodeId (public key) - while let Some((node, digest)) = &r_0.pop_first() { + while let Some((node, digest)) = &r[0].pop_first() { // Every point must be valid (we've validated anchor dependencies already), // but some points don't have previous one to proof as vertex. // Any valid point among equivocated will do, as they include the same vertex. if let Some(proof /* point @ r+0 */) = proof_round.valid_point(node, digest).await { - if proof.is_committed.load(Ordering::Relaxed) { + if proof.is_committed.load(Ordering::Acquire) { continue; } let author = &proof.point.body.location.author; - r_1.extend(proof.point.body.includes.clone()); // points @ r-1 - r_2.extend(proof.point.body.witness.clone()); // points @ r-2 + r[1].extend(proof.point.body.includes.clone()); // points @ r-1 + r[2].extend(proof.point.body.witness.clone()); // points @ r-2 let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { continue; }; @@ -286,22 +287,19 @@ impl Dag { .filter(|vertex| { vertex .is_committed - .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed) + .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) .is_ok() }) { // vertex will be skipped in r_1 as committed - r_2.extend(vertex.point.body.includes.clone()); // points @ r-2 - r_3.extend(vertex.point.body.witness.clone()); // points @ r-3 + r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 + r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 uncommitted.push_back(vertex); // LIFO } } } cur_includes_round = vertex_round.round; // next r+0 - r_0 = r_1; // next r+0 - r_1 = r_2; // next r-1 - r_2 = r_3; // next r-2 - r_3 = BTreeMap::new(); // next r-3 + r.rotate_left(1); } Ok(uncommitted) } From 688702adc4294dfec4603e7d5d60e392de5ab177 Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Sun, 10 Mar 2024 20:05:55 +0100 Subject: [PATCH 04/32] fix(consensus): store points in round on add --- Cargo.lock | 1 + consensus/Cargo.toml | 1 + consensus/src/engine/dag.rs | 101 ++++++++++++++++++-------------- consensus/src/engine/promise.rs | 4 ++ 4 files changed, 62 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b8d3dbbd1..f73e65af3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1958,6 +1958,7 @@ dependencies = [ "bincode", "bytes", "castaway", + "dashmap", "everscale-crypto", "futures-util", "parking_lot", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index e8de46d2c..eeade71a5 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -10,6 +10,7 @@ anyhow = "1.0" bincode = "1.3" bytes = { version = "1.0", features = ["serde"] } castaway = "0.2" +dashmap = "5.4" everscale-crypto = "0.2" futures-util = { version = "0.3" } parking_lot = "0.12" diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 2322c7aa8..063b95333 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, VecDeque}; +use std::collections::{btree_map, BTreeMap, VecDeque}; use std::num::{NonZeroU8, NonZeroUsize}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock, Weak}; @@ -104,57 +104,68 @@ impl DagRound { point.valid() } - pub async fn add(&mut self, point: Point) -> Result> { - if point.body.location.round != self.round { - return Err(anyhow!("wrong point round")); - } - if !point.is_integrity_ok() { - return Err(anyhow!("point integrity check failed")); - } + pub async fn add(&mut self, point: Point) -> Result { + anyhow::ensure!(point.body.location.round == self.round, "wrong point round"); + anyhow::ensure!(point.is_integrity_ok(), "point integrity check failed"); - let mut dependencies = JoinSet::new(); - if let Some(r_1) = self.prev.upgrade() { - for (&node, &digest) in &point.body.includes { - let mut loc = r_1.locations.entry(node).or_default(); - let promise = loc - .versions - .entry(digest) - .or_insert(Promise::new(Box::pin(DownloadTask {}))) - .clone(); - dependencies.spawn(async move { promise.get().await }); - } - if let Some(r_2) = r_1.prev.upgrade() { - for (&node, &digest) in &point.body.witness { - let mut loc = r_2.locations.entry(node).or_default(); - let promise = loc - .versions - .entry(digest) - .or_insert(Promise::new(Box::pin(DownloadTask {}))) - .clone(); - dependencies.spawn(async move { promise.get().await }); - } - }; - }; + let promise = match self + .locations + .entry(point.body.location.author) + .or_default() + .versions + .entry(point.digest) + { + btree_map::Entry::Occupied(entry) => entry.get().clone(), + btree_map::Entry::Vacant(entry) => { + let mut dependencies = JoinSet::new(); + if let Some(r_1) = self.prev.upgrade() { + for (&node, &digest) in &point.body.includes { + let mut loc = r_1.locations.entry(node).or_default(); + let promise = loc + .versions + .entry(digest) + .or_insert(Promise::new(Box::pin(DownloadTask {}))) + .clone(); + dependencies.spawn(promise.into_value()); + } + if let Some(r_2) = r_1.prev.upgrade() { + for (&node, &digest) in &point.body.witness { + let mut loc = r_2.locations.entry(node).or_default(); + let promise = loc + .versions + .entry(digest) + .or_insert(Promise::new(Box::pin(DownloadTask {}))) + .clone(); + dependencies.spawn(promise.into_value()); + } + }; + }; - Ok(Promise::new(async move { - while let Some(res) = dependencies.join_next().await { - let res = match res { - Ok(value) => value, - Err(e) => { - if e.is_panic() { - std::panic::resume_unwind(e.into_panic()); + let promise = Promise::new(async move { + while let Some(res) = dependencies.join_next().await { + let res = match res { + Ok(value) => value, + Err(e) => { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + unreachable!(); + } + }; + + if !res.is_valid() { + return DagPoint::Invalid(Arc::new(point)); } - unreachable!(); } - }; - if !res.is_valid() { - return DagPoint::Invalid(Arc::new(point)); - } + DagPoint::Valid(Arc::new(IndexedPoint::new(point))) + }); + + entry.insert(promise).clone() } + }; - DagPoint::Valid(Arc::new(IndexedPoint::new(point))) - })) + Ok(promise.get().await) } } diff --git a/consensus/src/engine/promise.rs b/consensus/src/engine/promise.rs index ae629f7da..b3f0b17b3 100644 --- a/consensus/src/engine/promise.rs +++ b/consensus/src/engine/promise.rs @@ -81,4 +81,8 @@ where panic!("task panicked") } + + pub async fn into_value(self) -> T { + self.get().await + } } From f8d456fae1e08457c87222480e122763c5818933 Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Sun, 10 Mar 2024 20:57:34 +0100 Subject: [PATCH 05/32] fix(consensus): fix deadlocks in round --- consensus/src/engine/dag.rs | 136 +++++++++++++++++++++++--------- consensus/src/engine/mod.rs | 1 - consensus/src/engine/promise.rs | 88 --------------------- 3 files changed, 98 insertions(+), 127 deletions(-) delete mode 100644 consensus/src/engine/promise.rs diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 063b95333..b135008e0 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -1,15 +1,19 @@ use std::collections::{btree_map, BTreeMap, VecDeque}; use std::num::{NonZeroU8, NonZeroUsize}; +use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock, Weak}; +use std::task::{Context, Poll}; use ahash::RandomState; use anyhow::{anyhow, Result}; -use tokio::task::JoinSet; +use futures_util::future::BoxFuture; +use futures_util::{Future, FutureExt}; +use tokio::task::{JoinHandle, JoinSet}; use tycho_network::PeerId; +use tycho_util::futures::Shared; use tycho_util::FastDashMap; -use crate::engine::promise::Promise; use crate::models::point::{Digest, Point, Round, Signature}; use crate::tasks::downloader::DownloadTask; @@ -74,7 +78,7 @@ struct DagLocation { // only one of the point versions at current location // may become proven by the next round point(s) of a node; // even if we marked a proven point as invalid, consensus may override our decision - versions: BTreeMap>, + versions: BTreeMap, } struct DagRound { @@ -98,74 +102,130 @@ impl DagRound { } pub async fn valid_point(&self, node: &PeerId, digest: &Digest) -> Option> { - let location = self.locations.get(node)?; - let promise = location.versions.get(digest)?; - let point = promise.get().await; - point.valid() + let point_fut = { + let location = self.locations.get(node)?; + location.versions.get(digest)?.clone() + }; + point_fut.await.valid() } - pub async fn add(&mut self, point: Point) -> Result { + pub fn add(&self, point: Point) -> Result { anyhow::ensure!(point.body.location.round == self.round, "wrong point round"); anyhow::ensure!(point.is_integrity_ok(), "point integrity check failed"); - let promise = match self + let mut location = self .locations .entry(point.body.location.author) - .or_default() - .versions - .entry(point.digest) - { + .or_default(); + + fn add_dependency( + round: &Arc, + node: &PeerId, + digest: &Digest, + dependencies: &mut JoinSet, + ) { + let mut loc = round.locations.entry(*node).or_default(); + let fut = loc + .versions + .entry(*digest) + .or_insert_with(|| DagPointFut::new(DownloadTask {})) + .clone(); + dependencies.spawn(fut); + } + + Ok(match location.versions.entry(point.digest) { btree_map::Entry::Occupied(entry) => entry.get().clone(), btree_map::Entry::Vacant(entry) => { let mut dependencies = JoinSet::new(); if let Some(r_1) = self.prev.upgrade() { - for (&node, &digest) in &point.body.includes { - let mut loc = r_1.locations.entry(node).or_default(); - let promise = loc - .versions - .entry(digest) - .or_insert(Promise::new(Box::pin(DownloadTask {}))) - .clone(); - dependencies.spawn(promise.into_value()); + for (node, digest) in &point.body.includes { + add_dependency(&r_1, &node, &digest, &mut dependencies); } if let Some(r_2) = r_1.prev.upgrade() { - for (&node, &digest) in &point.body.witness { - let mut loc = r_2.locations.entry(node).or_default(); - let promise = loc - .versions - .entry(digest) - .or_insert(Promise::new(Box::pin(DownloadTask {}))) - .clone(); - dependencies.spawn(promise.into_value()); + for (node, digest) in &point.body.witness { + add_dependency(&r_2, &node, &digest, &mut dependencies); } }; }; - let promise = Promise::new(async move { + let fut = DagPointFut::new(async move { while let Some(res) = dependencies.join_next().await { - let res = match res { - Ok(value) => value, + match res { + Ok(value) if value.is_valid() => continue, + Ok(_) => return DagPoint::Invalid(Arc::new(point)), Err(e) => { if e.is_panic() { std::panic::resume_unwind(e.into_panic()); } unreachable!(); } - }; - - if !res.is_valid() { - return DagPoint::Invalid(Arc::new(point)); } } DagPoint::Valid(Arc::new(IndexedPoint::new(point))) }); - entry.insert(promise).clone() + entry.insert(fut).clone() + } + }) + } +} + +#[derive(Clone)] +#[repr(transparent)] +pub struct DagPointFut { + inner: Shared>, +} + +impl DagPointFut { + fn new(f: F) -> Self + where + F: Future + Send + 'static, + { + struct FutGuard { + handle: JoinHandle, + complete: bool, + } + + impl Drop for FutGuard { + fn drop(&mut self) { + if !self.complete { + self.handle.abort(); + } } + } + + let mut guard = FutGuard { + handle: tokio::spawn(f), + complete: false, }; - Ok(promise.get().await) + Self { + inner: Shared::new(Box::pin(async move { + match (&mut guard.handle).await { + Ok(value) => { + guard.complete = true; + value + } + Err(e) => { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + unreachable!() + } + } + })), + } + } +} + +impl Future for DagPointFut { + type Output = DagPoint; + + #[inline] + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let (value, _) = futures_util::ready!(self.inner.poll_unpin(cx)); + Poll::Ready(value) } } diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index afcc7c0a1..8dfa1d74b 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,7 +1,6 @@ pub mod dag; pub mod neighbour_watch; mod node_schedule; -mod promise; mod signer; pub mod threshold_clock; mod verifier; diff --git a/consensus/src/engine/promise.rs b/consensus/src/engine/promise.rs deleted file mode 100644 index b3f0b17b3..000000000 --- a/consensus/src/engine/promise.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::sync::{Arc, Weak}; - -use futures_util::future::BoxFuture; -use futures_util::Future; -use parking_lot::Mutex; -use tokio::sync::broadcast; -use tokio::sync::broadcast::error::RecvError; - -#[derive(Clone)] -enum Inner { - Ready(T), - Pending(Weak>), -} - -#[derive(Clone)] -pub struct Promise { - // TODO try OnceLock::get before mutex, then if None = Sender::subscribe() try OnceLock again - inner: Arc>>, -} - -impl Promise -where - T: Clone + Send + Sync + 'static, -{ - pub fn new(fut: F) -> Self - where - F: Future + Send + 'static, - { - fn new_impl(fut: BoxFuture<'static, T>) -> Promise - where - T: Clone + Send + 'static, - { - let (tx, _) = broadcast::channel::(1); - let tx = Arc::new(tx); - - // weak ref to Sender is dropped if spawned task panicked - let inner = Arc::new(Mutex::new(Inner::Pending(Arc::downgrade(&tx)))); - let this = Promise { - inner: inner.clone(), - }; - - tokio::spawn(async move { - let res = fut.await; - let mut inner = inner.lock(); - _ = tx.send(res.clone()); - *inner = Inner::Ready(res); - }); - - this - } - - let fut = match castaway::cast!(fut, BoxFuture<'static, T>) { - Ok(fut) => fut, - Err(fut) => Box::pin(fut), - }; - new_impl(fut) - } - - pub fn ready(value: T) -> Self { - Self { - inner: Arc::new(Mutex::new(Inner::Ready(value))), - } - } - - pub async fn get(&self) -> T { - 'panicked: { - let mut rx = match &*self.inner.lock() { - Inner::Ready(value) => return value.clone(), - Inner::Pending(inflight) => match inflight.upgrade() { - Some(rx) => rx.subscribe(), - None => break 'panicked, - }, - }; - - match rx.recv().await { - Ok(value) => return value, - Err(RecvError::Closed) => break 'panicked, - Err(RecvError::Lagged(_)) => unreachable!(), - } - } - - panic!("task panicked") - } - - pub async fn into_value(self) -> T { - self.get().await - } -} From 21d8b7e513380e81a935d4612ae7ac2587362146 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 11 Mar 2024 22:07:04 +0300 Subject: [PATCH 06/32] fix(consensus): use private overlay --- Cargo.lock | 1 + consensus/Cargo.toml | 1 + consensus/src/engine/dag.rs | 2 +- consensus/src/intercom/dispatcher.rs | 283 ++++++++++++++++----------- consensus/src/models/point.rs | 2 +- 5 files changed, 171 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f73e65af3..c14c6400c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1962,6 +1962,7 @@ dependencies = [ "everscale-crypto", "futures-util", "parking_lot", + "rand", "serde", "sha2", "thiserror", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index eeade71a5..53e944d3b 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -33,6 +33,7 @@ ahash = "0.8" [dev-dependencies] tokio = { version = "1", features = ["rt-multi-thread", "macros"] } tracing-test = "0.2" +rand = "0.8" [lints] workspace = true diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index b135008e0..88e7cf0ef 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -323,7 +323,7 @@ impl Dag { BTreeMap::new(), // points @ r-2 BTreeMap::new(), // points @ r-3 ]; - drop(anchor); // anchor payload will be committed the next time + _ = anchor; // anchor payload will be committed the next time let mut uncommitted = VecDeque::new(); diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index 27457a542..0ecccc9db 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -1,137 +1,157 @@ -use std::net::{Ipv4Addr, SocketAddr}; +use std::net::{Ipv4Addr, SocketAddr, ToSocketAddrs}; use std::sync::Arc; use anyhow::{anyhow, Result}; use bytes::Bytes; +use everscale_crypto::ed25519; use serde::{Deserialize, Serialize}; use tycho_network::{ - service_query_fn, Network, NetworkConfig, NetworkExt, Response, ServiceRequest, Version, + Network, OverlayId, OverlayService, PeerId, PrivateOverlay, Response, Router, Service, + ServiceRequest, Version, }; +use tycho_util::futures::BoxFutureOrNoop; use crate::models::point::{Location, Point, PointId, Round, Signature}; -#[derive(Serialize, Deserialize, Debug)] -pub struct BroadcastResponse { - // for requested point - pub signature: Signature, - // at the same round, if it was not skipped - pub signer_point: Option, -} -#[derive(Serialize, Deserialize, Debug)] -pub struct PointResponse { - pub point: Option, -} -//PointLast(Option), - -#[derive(Serialize, Deserialize, Debug)] -pub struct PointsResponse { - pub vertices: Vec, -} - #[derive(Serialize, Deserialize, Debug)] enum MPRequest { - // by author Broadcast { point: Point }, Point { id: PointId }, - Points { round: Round }, +} + +#[derive(Serialize, Deserialize, Debug)] +enum MPRemoteResult { + Ok(MPResponse), + Err(String), } #[derive(Serialize, Deserialize, Debug)] enum MPResponse { Broadcast(BroadcastResponse), Point(PointResponse), - //PointLast(Option), - Points(PointsResponse), } #[derive(Serialize, Deserialize, Debug)] -enum MPRemoteResult { - Ok(MPResponse), - Err(String), +struct BroadcastResponse { + // for requested point + pub signature: Signature, + // at the same round, if it was not skipped + pub signer_point: Option, +} +#[derive(Serialize, Deserialize, Debug)] +struct PointResponse { + pub point: Option, } pub struct Dispatcher { - inner: Arc, network: Network, + private_overlay: PrivateOverlay, } impl Dispatcher { - pub fn new() -> Result { - let inner = Arc::new(DispatcherInner {}); - let service_fn = service_query_fn({ - let inner = inner.clone(); - move |req| inner.clone().handle(req) - }); + const PRIVATE_OVERLAY_ID: OverlayId = OverlayId(*b"ac87b6945b4f6f736963f7f65d025943"); - let network = Network::builder() - .with_config(NetworkConfig::default()) - .with_random_private_key() - .with_service_name("tycho-mempool-router") - .build((Ipv4Addr::LOCALHOST, 0), service_fn)?; + pub fn new(socket_addr: T, key: &ed25519::SecretKey) -> Self { + let keypair = ed25519::KeyPair::from(key); + let local_id = PeerId::from(keypair.public_key); - Ok(Self { inner, network }) - } + let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) + .build(Responder(Arc::new(ResponderInner {}))); - pub async fn broadcast(&self, point: Point, from: SocketAddr) -> Result { - let request = tycho_network::Request { - version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Broadcast { point })?), - }; + let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) + .with_private_overlay(&private_overlay) + .build(); + + let router = Router::builder().route(overlay_service).build(); + + let network = Network::builder() + .with_private_key(key.to_bytes()) + .with_service_name("mempool-network-service") + .build(socket_addr, router) + .unwrap(); - let remote_peer = self.network.connect(from).await?; + overlay_tasks.spawn(network.clone()); - let response = self.network.query(&remote_peer, request).await?; + Self { + network, + private_overlay, + } + } - match parse_response(&response.body)? { + pub async fn broadcast(&self, node: &PeerId, point: Point) -> Result { + // TODO: move MPRequest et al to TL - will need not copy Point + let response = self.query(node, &MPRequest::Broadcast { point }).await?; + match Self::parse_response(node, &response.body)? { MPResponse::Broadcast(r) => Ok(r), - x => Err(anyhow!("wrong response")), + _ => Err(anyhow!("MPResponse::Broadcast: mismatched response")), } } - pub async fn point(&self, id: PointId, from: SocketAddr) -> Result { + pub async fn get_point(&self, node: &PeerId, id: PointId) -> Result { + let response = self.query(node, &MPRequest::Point { id }).await?; + match Self::parse_response(node, &response.body)? { + MPResponse::Point(r) => Ok(r), + _ => Err(anyhow!("MPResponse::Point: mismatched response")), + } + } + + async fn query(&self, node: &PeerId, data: &MPRequest) -> Result { let request = tycho_network::Request { version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Point { id })?), + body: Bytes::from(bincode::serialize(data)?), }; - let remote_peer = self.network.connect(from).await?; - - let response = self.network.query(&remote_peer, request).await?; + self.private_overlay + .query(&self.network, node, request) + .await + } - match parse_response(&response.body)? { - MPResponse::Point(r) => Ok(r), - x => Err(anyhow!("wrong response")), + fn parse_response(node: &PeerId, body: &Bytes) -> Result { + match bincode::deserialize::(body) { + Ok(MPRemoteResult::Ok(response)) => Ok(response), + Ok(MPRemoteResult::Err(e)) => Err(anyhow::Error::msg(e)), + Err(e) => Err(anyhow!( + "failed to deserialize response from {node:?}: {e:?}" + )), } } +} - pub async fn points(&self, round: Round, from: SocketAddr) -> Result { - let request = tycho_network::Request { - version: Version::V1, - body: Bytes::from(bincode::serialize(&MPRequest::Points { round })?), - }; +struct Responder(Arc); - let remote_peer = self.network.connect(from).await?; +impl Service for Responder { + type QueryResponse = Response; + type OnQueryFuture = BoxFutureOrNoop>; + type OnMessageFuture = futures_util::future::Ready<()>; + type OnDatagramFuture = futures_util::future::Ready<()>; - let response = self.network.query(&remote_peer, request).await?; + #[inline] + fn on_query(&self, req: ServiceRequest) -> Self::OnQueryFuture { + BoxFutureOrNoop::future(self.0.clone().handle(req)) + } - match parse_response(&response.body)? { - MPResponse::Points(r) => Ok(r), - x => Err(anyhow!("wrong response")), - } + #[inline] + fn on_message(&self, _req: ServiceRequest) -> Self::OnMessageFuture { + futures_util::future::ready(()) + } + + #[inline] + fn on_datagram(&self, _req: ServiceRequest) -> Self::OnDatagramFuture { + futures_util::future::ready(()) } } -struct DispatcherInner { +struct ResponderInner { // state and storage components go here } -impl DispatcherInner { +impl ResponderInner { async fn handle(self: Arc, req: ServiceRequest) -> Option { let body = match bincode::deserialize::(&req.body) { Ok(body) => body, Err(e) => { - tracing::error!("unexpected request from {:?}: {e:?}", req.metadata); + tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); // NOTE: malformed request is a reason to ignore it return None; } @@ -150,12 +170,6 @@ impl DispatcherInner { // 1.2 my next includes (merged with Broadcast flow) MPResponse::Point(PointResponse { point: None }) } - MPRequest::Points { round } => { - // sync flow: downloader - MPResponse::Points(PointsResponse { - vertices: Vec::new(), - }) - } }; Some(Response { @@ -172,58 +186,95 @@ impl DispatcherInner { } } -fn parse_response(body: &Bytes) -> anyhow::Result { - if body.is_empty() { - return Err(anyhow::Error::msg( - "remote response serialization exception is hidden by exception during serialization", - )); - } - match bincode::deserialize::(body) { - Ok(MPRemoteResult::Ok(response)) => Ok(response), - Ok(MPRemoteResult::Err(e)) => Err(anyhow::Error::msg(e)), - Err(e) => Err(anyhow!("failed to deserialize response: {e:?}")), - } -} - #[cfg(test)] mod tests { + use tycho_network::{Address, PeerInfo}; + use tycho_util::time::now_sec; + + use crate::models::point::Digest; + use super::*; - #[tokio::test] - #[tracing_test::traced_test] - async fn underlying_network_works() -> Result<()> { - let node1 = Dispatcher::new()?.network; - let node2 = Dispatcher::new()?.network; - - let peer2 = node1.connect(node2.local_addr()).await?; - let response = node1 - .query( - &peer2, - tycho_network::Request { - version: Version::V1, - body: Bytes::from("bites"), - }, - ) - .await - .and_then(|a| parse_response(&a.body)); + fn make_peer_info(key: &ed25519::SecretKey, address: Address) -> PeerInfo { + let keypair = ed25519::KeyPair::from(key); + let peer_id = PeerId::from(keypair.public_key); + + let now = now_sec(); + let mut node_info = PeerInfo { + id: peer_id, + address_list: vec![address].into_boxed_slice(), + created_at: now, + expires_at: u32::MAX, + signature: Box::new([0; 64]), + }; + *node_info.signature = keypair.sign(&node_info); + node_info + } - tracing::info!("response '{response:?}'"); + fn make_network(node_count: usize) -> Vec { + let keys = (0..node_count) + .map(|_| ed25519::SecretKey::generate(&mut rand::thread_rng())) + .collect::>(); - assert!(response.is_err()); - Ok(()) + let mut nodes = keys + .iter() + .map(|k| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), k)) + .collect::>(); + + let bootstrap_info = std::iter::zip(&keys, &nodes) + .map(|(key, node)| Arc::new(make_peer_info(key, node.network.local_addr().into()))) + .collect::>(); + + for node in &mut nodes { + let mut private_overlay_entries = node.private_overlay.write_entries(); + + for info in &bootstrap_info { + if info.id == node.network.peer_id() { + continue; + } + + let handle = node + .network + .known_peers() + .insert(info.clone(), false) + .unwrap(); + private_overlay_entries.insert(&info.id, Some(handle)); + } + } + + nodes } #[tokio::test] #[tracing_test::traced_test] async fn dispatcher_works() -> Result<()> { - let node1 = Dispatcher::new()?; - let node2 = Dispatcher::new()?; + tracing::info!("dispatcher_works"); + + let nodes = make_network(2); - let data = node1.points(Round(0), node2.network.local_addr()).await?; + let point_id = PointId { + location: crate::models::point::Location { + round: Round(0), + author: PeerId([0u8; 32]), + }, + digest: Digest([0u8; 32]), + }; + + for i in 0..nodes.len() { + for j in 0..nodes.len() { + if i == j { + continue; + } - tracing::info!("response: '{data:?}'"); + let left = &nodes[i]; + let right = &nodes[j]; - assert!(data.vertices.is_empty()); + let PointResponse { point } = left + .get_point(right.network.peer_id(), point_id.clone()) + .await?; + assert!(point.is_none()); + } + } Ok(()) } } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 2342b6a0f..f6e4586f0 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -10,7 +10,7 @@ use tycho_network::PeerId; use tycho_util::FastHashMap; #[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -pub struct Digest([u8; 32]); +pub struct Digest(pub [u8; 32]); #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Signature(pub Bytes); From 957cc6fd602b0f0bbaf3dad82b599f924c70725c Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 11 Mar 2024 23:18:10 +0300 Subject: [PATCH 07/32] refactor(consensus): use shared join task --- consensus/src/engine/dag.rs | 82 +++++-------------------------------- 1 file changed, 11 insertions(+), 71 deletions(-) diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 88e7cf0ef..b2da1626c 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -1,17 +1,15 @@ use std::collections::{btree_map, BTreeMap, VecDeque}; use std::num::{NonZeroU8, NonZeroUsize}; -use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock, Weak}; -use std::task::{Context, Poll}; use ahash::RandomState; use anyhow::{anyhow, Result}; -use futures_util::future::BoxFuture; -use futures_util::{Future, FutureExt}; -use tokio::task::{JoinHandle, JoinSet}; +use futures_util::FutureExt; +use tokio::task::JoinSet; + use tycho_network::PeerId; -use tycho_util::futures::Shared; +use tycho_util::futures::{JoinTask, Shared}; use tycho_util::FastDashMap; use crate::models::point::{Digest, Point, Round, Signature}; @@ -78,7 +76,7 @@ struct DagLocation { // only one of the point versions at current location // may become proven by the next round point(s) of a node; // even if we marked a proven point as invalid, consensus may override our decision - versions: BTreeMap, + versions: BTreeMap>>, } struct DagRound { @@ -106,10 +104,10 @@ impl DagRound { let location = self.locations.get(node)?; location.versions.get(digest)?.clone() }; - point_fut.await.valid() + point_fut.await.0.valid() } - pub fn add(&self, point: Point) -> Result { + pub fn add(&self, point: Point) -> Result>> { anyhow::ensure!(point.body.location.round == self.round, "wrong point round"); anyhow::ensure!(point.is_integrity_ok(), "point integrity check failed"); @@ -128,9 +126,9 @@ impl DagRound { let fut = loc .versions .entry(*digest) - .or_insert_with(|| DagPointFut::new(DownloadTask {})) + .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) .clone(); - dependencies.spawn(fut); + dependencies.spawn(fut.map(|a| a.0)); } Ok(match location.versions.entry(point.digest) { @@ -148,7 +146,7 @@ impl DagRound { }; }; - let fut = DagPointFut::new(async move { + let fut = Shared::new(JoinTask::new(async move { while let Some(res) = dependencies.join_next().await { match res { Ok(value) if value.is_valid() => continue, @@ -163,7 +161,7 @@ impl DagRound { } DagPoint::Valid(Arc::new(IndexedPoint::new(point))) - }); + })); entry.insert(fut).clone() } @@ -171,64 +169,6 @@ impl DagRound { } } -#[derive(Clone)] -#[repr(transparent)] -pub struct DagPointFut { - inner: Shared>, -} - -impl DagPointFut { - fn new(f: F) -> Self - where - F: Future + Send + 'static, - { - struct FutGuard { - handle: JoinHandle, - complete: bool, - } - - impl Drop for FutGuard { - fn drop(&mut self) { - if !self.complete { - self.handle.abort(); - } - } - } - - let mut guard = FutGuard { - handle: tokio::spawn(f), - complete: false, - }; - - Self { - inner: Shared::new(Box::pin(async move { - match (&mut guard.handle).await { - Ok(value) => { - guard.complete = true; - value - } - Err(e) => { - if e.is_panic() { - std::panic::resume_unwind(e.into_panic()); - } - unreachable!() - } - } - })), - } - } -} - -impl Future for DagPointFut { - type Output = DagPoint; - - #[inline] - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let (value, _) = futures_util::ready!(self.inner.poll_unpin(cx)); - Poll::Ready(value) - } -} - #[derive(Debug, thiserror::Error)] pub enum DagError { #[error("Dag empty")] From a8f539f971ee6a52a56ca96f798ecb60b52cac0f Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Tue, 12 Mar 2024 18:56:46 +0300 Subject: [PATCH 08/32] feat(consensus): add overlay client --- Cargo.lock | 2 +- consensus/Cargo.toml | 4 +- consensus/src/engine/mod.rs | 4 +- consensus/src/engine/signer.rs | 1 - consensus/src/engine/verifier.rs | 1 - consensus/src/intercom/dispatcher.rs | 88 +++++++++++++++++------- consensus/src/intercom/mod.rs | 3 +- consensus/src/intercom/overlay_client.rs | 79 +++++++++++++++++++++ consensus/src/intercom/receiver.rs | 1 - consensus/src/intercom/uploader.rs | 1 - 10 files changed, 148 insertions(+), 36 deletions(-) delete mode 100644 consensus/src/engine/signer.rs delete mode 100644 consensus/src/engine/verifier.rs create mode 100644 consensus/src/intercom/overlay_client.rs delete mode 100644 consensus/src/intercom/receiver.rs delete mode 100644 consensus/src/intercom/uploader.rs diff --git a/Cargo.lock b/Cargo.lock index c14c6400c..e86f5e2c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1968,7 +1968,7 @@ dependencies = [ "thiserror", "tokio", "tracing", - "tracing-test", + "tracing-subscriber", "tycho-network", "tycho-storage", "tycho-util", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index 53e944d3b..c5da9934e 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -31,9 +31,9 @@ thiserror = "1.0" ahash = "0.8" [dev-dependencies] -tokio = { version = "1", features = ["rt-multi-thread", "macros"] } -tracing-test = "0.2" rand = "0.8" +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +tracing-subscriber = { version = "0.3", features = ["env-filter"] } [lints] workspace = true diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index 8dfa1d74b..8bbeea5f3 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,6 +1,4 @@ pub mod dag; -pub mod neighbour_watch; +mod neighbour_watch; mod node_schedule; -mod signer; pub mod threshold_clock; -mod verifier; diff --git a/consensus/src/engine/signer.rs b/consensus/src/engine/signer.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/engine/signer.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/engine/verifier.rs b/consensus/src/engine/verifier.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/engine/verifier.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index 0ecccc9db..72050e620 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -1,5 +1,6 @@ use std::net::{Ipv4Addr, SocketAddr, ToSocketAddrs}; use std::sync::Arc; +use std::time::Duration; use anyhow::{anyhow, Result}; use bytes::Bytes; @@ -7,11 +8,12 @@ use everscale_crypto::ed25519; use serde::{Deserialize, Serialize}; use tycho_network::{ - Network, OverlayId, OverlayService, PeerId, PrivateOverlay, Response, Router, Service, - ServiceRequest, Version, + DhtClient, DhtConfig, DhtService, Network, OverlayConfig, OverlayId, OverlayService, PeerId, + PrivateOverlay, Response, Router, Service, ServiceRequest, Version, }; use tycho_util::futures::BoxFutureOrNoop; +use crate::intercom::overlay_client::OverlayClient; use crate::models::point::{Location, Point, PointId, Round, Signature}; #[derive(Serialize, Deserialize, Debug)] @@ -45,25 +47,55 @@ struct PointResponse { } pub struct Dispatcher { + pub overlay_client: OverlayClient, + pub dht_client: DhtClient, network: Network, - private_overlay: PrivateOverlay, } impl Dispatcher { const PRIVATE_OVERLAY_ID: OverlayId = OverlayId(*b"ac87b6945b4f6f736963f7f65d025943"); - pub fn new(socket_addr: T, key: &ed25519::SecretKey) -> Self { + pub fn new( + socket_addr: T, + key: &ed25519::SecretKey, + all_peers: &Vec, + ) -> Self { let keypair = ed25519::KeyPair::from(key); let local_id = PeerId::from(keypair.public_key); + // TODO receive configured services from general node, + // move current setup to test below as it provides acceptable timing + + let (dht_client_builder, dht_service) = DhtService::builder(local_id) + .with_config(DhtConfig { + local_info_announce_period: Duration::from_secs(1), + max_local_info_announce_period_jitter: Duration::from_secs(1), + routing_table_refresh_period: Duration::from_secs(1), + max_routing_table_refresh_period_jitter: Duration::from_secs(1), + ..Default::default() + }) + .build(); + let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) + .resolve_peers(true) + .with_entries(all_peers) .build(Responder(Arc::new(ResponderInner {}))); let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) - .with_private_overlay(&private_overlay) + .with_config(OverlayConfig { + private_overlay_peer_resolve_period: Duration::from_secs(1), + private_overlay_peer_resolve_max_jitter: Duration::from_secs(1), + ..Default::default() + }) + .with_dht_service(dht_service.clone()) .build(); - let router = Router::builder().route(overlay_service).build(); + overlay_service.try_add_private_overlay(&private_overlay); + + let router = Router::builder() + .route(dht_service) + .route(overlay_service) + .build(); let network = Network::builder() .with_private_key(key.to_bytes()) @@ -71,16 +103,21 @@ impl Dispatcher { .build(socket_addr, router) .unwrap(); + let dht_client = dht_client_builder.build(network.clone()); + overlay_tasks.spawn(network.clone()); + let overlay_client = OverlayClient::new(all_peers.len(), private_overlay, local_id); + Self { + overlay_client, + dht_client, network, - private_overlay, } } pub async fn broadcast(&self, node: &PeerId, point: Point) -> Result { - // TODO: move MPRequest et al to TL - will need not copy Point + // TODO: move MPRequest et al to TL - won't need to copy Point let response = self.query(node, &MPRequest::Broadcast { point }).await?; match Self::parse_response(node, &response.body)? { MPResponse::Broadcast(r) => Ok(r), @@ -102,7 +139,8 @@ impl Dispatcher { body: Bytes::from(bincode::serialize(data)?), }; - self.private_overlay + self.overlay_client + .overlay .query(&self.network, node, request) .await } @@ -152,7 +190,7 @@ impl ResponderInner { Ok(body) => body, Err(e) => { tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); - // NOTE: malformed request is a reason to ignore it + // malformed request is a reason to ignore it return None; } }; @@ -211,46 +249,48 @@ mod tests { node_info } - fn make_network(node_count: usize) -> Vec { + async fn make_network(node_count: usize) -> Vec { let keys = (0..node_count) .map(|_| ed25519::SecretKey::generate(&mut rand::thread_rng())) .collect::>(); + let all_peers = keys + .iter() + .map(|s| PeerId::from(ed25519::KeyPair::from(s).public_key)) + .collect::>(); + let mut nodes = keys .iter() - .map(|k| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), k)) + .map(|s| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), s, &all_peers)) .collect::>(); let bootstrap_info = std::iter::zip(&keys, &nodes) .map(|(key, node)| Arc::new(make_peer_info(key, node.network.local_addr().into()))) .collect::>(); - for node in &mut nodes { - let mut private_overlay_entries = node.private_overlay.write_entries(); - + for node in nodes.first() { for info in &bootstrap_info { if info.id == node.network.peer_id() { continue; } - - let handle = node - .network - .known_peers() - .insert(info.clone(), false) - .unwrap(); - private_overlay_entries.insert(&info.id, Some(handle)); + node.dht_client.add_peer(info.clone()).unwrap(); } } + for node in &nodes { + node.overlay_client.wait_for_peers(node_count - 1).await; + tracing::info!("found peers for {}", node.network.peer_id()); + } + nodes } #[tokio::test] - #[tracing_test::traced_test] async fn dispatcher_works() -> Result<()> { + tracing_subscriber::fmt::try_init().ok(); tracing::info!("dispatcher_works"); - let nodes = make_network(2); + let nodes = make_network(3).await; let point_id = PointId { location: crate::models::point::Location { diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index 24b17ec46..81f91b0ab 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1,3 +1,2 @@ mod dispatcher; -mod receiver; -mod uploader; +mod overlay_client; diff --git a/consensus/src/intercom/overlay_client.rs b/consensus/src/intercom/overlay_client.rs new file mode 100644 index 000000000..2faf17d00 --- /dev/null +++ b/consensus/src/intercom/overlay_client.rs @@ -0,0 +1,79 @@ +use std::ops::DerefMut; +use std::sync::Arc; + +use ahash::RandomState; +use tokio::sync::broadcast; +use tokio::sync::broadcast::error::RecvError; + +use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; +use tycho_util::futures::JoinTask; +use tycho_util::{FastDashMap, FastDashSet}; + +#[derive(Clone)] +pub struct OverlayClient { + pub peers: Arc>, + pub overlay: PrivateOverlay, +} + +impl OverlayClient { + pub fn new(node_count: usize, overlay: PrivateOverlay, local_id: PeerId) -> Self { + let peers = Arc::new(FastDashSet::::with_capacity_and_hasher( + node_count, + RandomState::new(), + )); + tokio::spawn(Self::listen( + peers.clone(), + overlay.clone().read_entries().subscribe(), + local_id, + )); + Self { peers, overlay } + } + + pub async fn wait_for_peers(&self, node_count: usize) { + if self.peers.len() >= node_count { + return; + } else { + let mut rx = self.overlay.read_entries().subscribe(); + while self.peers.len() < node_count { + match rx.recv().await { + Ok(PrivateOverlayEntriesEvent::Resolved(_)) => {} + _ => {} + } + } + } + } + + async fn listen( + peers: Arc>, + mut rx: broadcast::Receiver, + local_id: PeerId, + ) { + loop { + match rx.recv().await { + Ok(PrivateOverlayEntriesEvent::Added(_)) => {} + Ok(PrivateOverlayEntriesEvent::Resolved(node)) => { + if node != local_id { + peers.insert(node); + } + } + Ok(PrivateOverlayEntriesEvent::Removed(node)) => { + if node != local_id { + peers.remove(&node); + } + } + Err(RecvError::Closed) => { + let msg = + "Fatal: peer info updates channel closed, cannot maintain node connectivity"; + tracing::error!(msg); + panic!("{msg}") + } + Err(RecvError::Lagged(qnt)) => { + tracing::warn!( + "Skipped {qnt} peer info updates, node connectivity may suffer. \ + Consider increasing channel capacity." + ) + } + } + } + } +} diff --git a/consensus/src/intercom/receiver.rs b/consensus/src/intercom/receiver.rs deleted file mode 100644 index 85b0baca4..000000000 --- a/consensus/src/intercom/receiver.rs +++ /dev/null @@ -1 +0,0 @@ -pub struct Receiver {} diff --git a/consensus/src/intercom/uploader.rs b/consensus/src/intercom/uploader.rs deleted file mode 100644 index 1d4b54f75..000000000 --- a/consensus/src/intercom/uploader.rs +++ /dev/null @@ -1 +0,0 @@ -pub struct Uploader {} From 87e255e5e54633bf24aaadee15ebf27f7fda54a9 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Sun, 17 Mar 2024 03:32:35 +0300 Subject: [PATCH 09/32] feat(consensus): verify regular points --- consensus/src/engine/dag.rs | 123 ++++------- consensus/src/engine/mod.rs | 5 +- consensus/src/engine/neighbour_watch.rs | 2 +- consensus/src/engine/node_schedule.rs | 1 - consensus/src/engine/peer_schedule.rs | 257 +++++++++++++++++++++++ consensus/src/engine/verifier.rs | 229 ++++++++++++++++++++ consensus/src/intercom/dispatcher.rs | 27 +-- consensus/src/intercom/mod.rs | 1 - consensus/src/intercom/overlay_client.rs | 79 ------- consensus/src/models/point.rs | 48 +++-- consensus/src/tasks/downloader.rs | 5 +- 11 files changed, 586 insertions(+), 191 deletions(-) delete mode 100644 consensus/src/engine/node_schedule.rs create mode 100644 consensus/src/engine/peer_schedule.rs create mode 100644 consensus/src/engine/verifier.rs delete mode 100644 consensus/src/intercom/overlay_client.rs diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index b2da1626c..1278ef58d 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -6,21 +6,20 @@ use std::sync::{Arc, OnceLock, Weak}; use ahash::RandomState; use anyhow::{anyhow, Result}; use futures_util::FutureExt; -use tokio::task::JoinSet; use tycho_network::PeerId; use tycho_util::futures::{JoinTask, Shared}; use tycho_util::FastDashMap; -use crate::models::point::{Digest, Point, Round, Signature}; -use crate::tasks::downloader::DownloadTask; +use crate::engine::verifier::Verifier; +use crate::models::point::{Digest, Point, PointId, Round, Signature}; pub struct IndexedPoint { - point: Point, + pub point: Point, // proof_for: Option>, // includes: Vec>, // witness: Vec>, - is_committed: AtomicBool, + pub is_committed: AtomicBool, } impl IndexedPoint { @@ -34,31 +33,36 @@ impl IndexedPoint { #[derive(Clone)] pub enum DagPoint { - /* Downloading, // -> Validating | Invalid | Unknown */ - /* Validating(Arc), // -> Valid | Invalid */ - Valid(Arc), // needed to blame equivocation or graph connectivity violations - Invalid(Arc), // invalidates dependent point; needed to blame equivocation - NotExists, // invalidates dependent point; blame with caution + // valid, needed to blame equivocation or graph connectivity violations + Trusted(Arc), + // is a valid container, but we doubt author's fairness at the moment of validating; + // we do not sign such point, but others may include it without consequences; + // consensus will decide whether to sign its proof or not; we shall ban the author + Suspicious(Arc), + Invalid(Arc), // invalidates dependent point; needed to blame equivocation + NotExists(Arc), // invalidates dependent point; blame author of dependent point } impl DagPoint { pub fn is_valid(&self) -> bool { match self { - DagPoint::Valid(_) => true, + DagPoint::Trusted(_) => true, + DagPoint::Suspicious(_) => true, _ => false, } } pub fn valid(&self) -> Option> { match self { - DagPoint::Valid(point) => Some(point.clone()), + DagPoint::Trusted(point) => Some(point.clone()), + DagPoint::Suspicious(point) => Some(point.clone()), _ => None, } } } #[derive(Default)] -struct DagLocation { +pub struct DagLocation { // one of the points at current location // was proven by the next point of a node; // even if we marked this point as invalid, consensus may override our decision @@ -68,22 +72,17 @@ struct DagLocation { // other (equivocated) points may be received as includes, witnesses or a proven vertex; // we have to include signed points as dependencies in our next block signed_by_me: OnceLock<(Digest, Round, Signature)>, - // if we rejected to sign previous point, - // we require a node to skip the current round; - // if we require to skip after responding with a signature - - // our node cannot invalidate a block retrospectively - no_points_expected: AtomicBool, // only one of the point versions at current location // may become proven by the next round point(s) of a node; // even if we marked a proven point as invalid, consensus may override our decision - versions: BTreeMap>>, + pub versions: BTreeMap>>, } -struct DagRound { - round: Round, +pub struct DagRound { + pub round: Round, node_count: u8, - locations: FastDashMap, - prev: Weak, + pub locations: FastDashMap, + pub prev: Weak, } impl DagRound { @@ -107,65 +106,33 @@ impl DagRound { point_fut.await.0.valid() } - pub fn add(&self, point: Point) -> Result>> { - anyhow::ensure!(point.body.location.round == self.round, "wrong point round"); - anyhow::ensure!(point.is_integrity_ok(), "point integrity check failed"); + pub fn add(&self, point: Box, verifier: &Verifier) -> Shared> { + if &point.body.location.round != &self.round { + panic! {"Coding error: dag round mismatches point round"} + } let mut location = self .locations .entry(point.body.location.author) .or_default(); - fn add_dependency( - round: &Arc, - node: &PeerId, - digest: &Digest, - dependencies: &mut JoinSet, - ) { - let mut loc = round.locations.entry(*node).or_default(); - let fut = loc - .versions - .entry(*digest) - .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) - .clone(); - dependencies.spawn(fut.map(|a| a.0)); - } - - Ok(match location.versions.entry(point.digest) { + match location.versions.entry(point.digest.clone()) { btree_map::Entry::Occupied(entry) => entry.get().clone(), - btree_map::Entry::Vacant(entry) => { - let mut dependencies = JoinSet::new(); - if let Some(r_1) = self.prev.upgrade() { - for (node, digest) in &point.body.includes { - add_dependency(&r_1, &node, &digest, &mut dependencies); - } - if let Some(r_2) = r_1.prev.upgrade() { - for (node, digest) in &point.body.witness { - add_dependency(&r_2, &node, &digest, &mut dependencies); - } - }; - }; - - let fut = Shared::new(JoinTask::new(async move { - while let Some(res) = dependencies.join_next().await { - match res { - Ok(value) if value.is_valid() => continue, - Ok(_) => return DagPoint::Invalid(Arc::new(point)), - Err(e) => { - if e.is_panic() { - std::panic::resume_unwind(e.into_panic()); - } - unreachable!(); - } - } - } - - DagPoint::Valid(Arc::new(IndexedPoint::new(point))) - })); - - entry.insert(fut).clone() - } - }) + btree_map::Entry::Vacant(entry) => entry + .insert(Shared::new(verifier.verify(&self, point))) + .clone(), + } + // Todo calling site may return signature only for Trusted point + // Detected point equivocation does not invalidate the point, it just + // prevents us (as a fair actor) from returning our signature to the author. + // Such a point may be included in our next "includes" or "witnesses", + // but neither its inclusion nor omitting is required: as we don't + // return our signature, our dependencies cannot be validated against it. + // Equally, we immediately stop communicating with the equivocating node, + // without invalidating any of its points (no matter historical or future). + // The proof for equivocated point cannot be signed + // as we've banned the author on network layer. + // Anyway, no more than one of equivocated points may become a vertex. } } @@ -232,7 +199,7 @@ impl Dag { pub async fn vertex_by(&self, proof: &IndexedPoint) -> Option> { let digest = &proof.point.body.proof.as_ref()?.digest; - let round = proof.point.body.location.round.prev()?; + let round = proof.point.body.location.round.prev(); let dag_round = self.round_at(round)?; dag_round .valid_point(&proof.point.body.location.author, digest) @@ -253,9 +220,7 @@ impl Dag { )); }; - let Some(mut cur_includes_round) = anchor.point.body.location.round.prev() else { - return Err(anyhow!("anchor proof @ 0 cannot exist")); - }; + let mut cur_includes_round = anchor.point.body.location.round.prev(); /* r+0 */ let mut r = [ anchor.point.body.includes.clone(), // points @ r+0 diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index 8bbeea5f3..252bf80b9 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,4 +1,5 @@ pub mod dag; -mod neighbour_watch; -mod node_schedule; +pub mod neighbour_watch; +pub mod peer_schedule; pub mod threshold_clock; +pub mod verifier; diff --git a/consensus/src/engine/neighbour_watch.rs b/consensus/src/engine/neighbour_watch.rs index e5af2f1cb..121cb933d 100644 --- a/consensus/src/engine/neighbour_watch.rs +++ b/consensus/src/engine/neighbour_watch.rs @@ -33,7 +33,7 @@ impl NeighbourWatch { valid = e.round < round && e.time < time // node either skipped a round, or provided evidences for prev block - && round.prev().map_or(e.round.0 + 1 < round.0, |prev| e.round <= prev); + && e.round <= round.prev(); if e.round < round { (*e).round = round }; diff --git a/consensus/src/engine/node_schedule.rs b/consensus/src/engine/node_schedule.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/engine/node_schedule.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/engine/peer_schedule.rs b/consensus/src/engine/peer_schedule.rs new file mode 100644 index 000000000..2187a36ed --- /dev/null +++ b/consensus/src/engine/peer_schedule.rs @@ -0,0 +1,257 @@ +use std::array; +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::ops::Range; +use std::sync::Arc; + +use ahash::RandomState; +use parking_lot::Mutex; +use tokio::sync::broadcast::error::RecvError; + +use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; +use tycho_util::FastHashMap; + +use crate::models::point::Round; + +/* + As validators are elected for wall-clock time range, + the round of validator set switch is not known beforehand + and will be determined by the time in anchor vertices: + it must reach some predefined time range, + when new set is supposed to be online and begin to request points, + and a (relatively high) predefined number of support rounds must follow + for the anchor chain to be committed by majority and for new nodes to gather data. + The switch will occur for validator sets as a whole, at a single leaderless round. +*/ +#[derive(Clone)] +pub struct PeerSchedule { + // TODO pub leader_schedule: + // FIXME determine if our local_id is in next epoch + inner: Arc>, + overlay: PrivateOverlay, + pub local_id: PeerId, // FIXME move into schedule when it starts to change with new epoch +} + +impl PeerSchedule { + pub fn new( + current_epoch_start: Round, + current_peers: &Vec, + overlay: &PrivateOverlay, + local_id: &PeerId, + ) -> Self { + let mut current_peers = current_peers.clone(); + current_peers.retain(|p| p != local_id); + let this = Self { + inner: Arc::new(Mutex::new(PeerScheduleInner::new( + current_epoch_start, + ¤t_peers, + ))), + overlay: overlay.clone(), + local_id: local_id.clone(), + }; + tokio::spawn(this.clone().listen()); + this + } + + // To sign a point or to query for points, we need to know the intersection of: + // * which nodes are in the validator set during the round of interest + // * which nodes are able to connect at the moment + /// TODO replace bool with AtomicBool? use Arc? to return map with auto refresh + pub async fn wait_for_peers(&self, round: Round, node_count: usize) { + let mut rx = self.overlay.read_entries().subscribe(); + let mut peers = (*self.peers_for(round)).clone(); + let mut count = peers.iter().filter(|(_, &is_resolved)| is_resolved).count(); + while count < node_count { + match rx.recv().await { + Ok(PrivateOverlayEntriesEvent::Resolved(peer_id)) if peer_id != self.local_id => { + if let Some(resolved) = peers.get_mut(&peer_id) { + if !*resolved { + count += 1; + } + *resolved = true; + } + } + Ok(PrivateOverlayEntriesEvent::Removed(peer_id)) if peer_id != self.local_id => { + if let Some(resolved) = peers.get_mut(&peer_id) { + if *resolved { + count -= 1; + } + *resolved = false; + } + } + _ => {} + } + } + } + + pub fn peers_for(&self, round: Round) -> Arc> { + let mut inner = self.inner.lock(); + inner.peers_for_index_plus_one(inner.index_plus_one(round)) + } + + pub fn peers_for_array( + &self, + rounds: [Round; N], + ) -> [Arc>; N] { + let mut inner = self.inner.lock(); + array::from_fn(|i| inner.peers_for_index_plus_one(inner.index_plus_one(rounds[i]))) + } + + /// does not return empty maps + pub fn peers_for_range(&self, rounds: Range) -> Vec>> { + if rounds.end <= rounds.start { + return vec![]; + } + let mut inner = self.inner.lock(); + let mut first = inner.index_plus_one(rounds.start); + let last = inner.index_plus_one(rounds.end.prev()); + if 0 == first && first < last { + first += 1; // exclude inner.empty + } + (first..=last) + .into_iter() + .map(|i| inner.peers_for_index_plus_one(i)) + .filter(|m| !m.is_empty()) + .collect() + } + + /// on epoch change + pub fn rotate(&self) { + // make next from previous + let mut inner = self.inner.lock(); + let Some(next) = inner.next_epoch_start else { + let msg = "Fatal: attempt to change epoch, but next epoch start is not set"; + tracing::error!("{msg}"); + panic!("{msg}"); + }; + inner.prev_epoch_start = inner.cur_epoch_start; + inner.cur_epoch_start = next; + inner.next_epoch_start = None; + + if !inner.peers_resolved[0].is_empty() { + Arc::make_mut(&mut inner.peers_resolved[0]).clear(); + } + inner.peers_resolved.rotate_left(1); + } + + /// after successful sync to current epoch + /// and validating all points from previous peer set + /// free some memory and ignore overlay updates + pub fn forget_previous(&self) { + let mut inner = self.inner.lock(); + if !inner.peers_resolved[0].is_empty() { + Arc::make_mut(&mut inner.peers_resolved[0]).clear(); + } + } + + pub fn set_next_start(&self, round: Round) { + let mut inner = self.inner.lock(); + _ = inner.next_epoch_start.replace(round); + } + + pub fn set_next_peers(&self, peers: &Vec) { + let mut inner = self.inner.lock(); + let next = inner.peers_resolved[2].as_ref(); + let old = peers + .iter() + .filter_map(|p| next.get(p).map(|b| (p.clone(), *b))) + .collect::>(); + let mut next = Arc::make_mut(&mut inner.peers_resolved[2]); + next.clear(); + next.extend(peers.clone().into_iter().map(|a| (a, false))); + next.extend(old); + } + + /// Returns [true] if update was successfully applied + fn set_resolved(&self, node: &PeerId, resolved: bool) -> bool { + let mut is_applied = false; + let mut inner = self.inner.lock(); + for i in 0..inner.peers_resolved.len() { + let Some(b) = Arc::make_mut(&mut inner.peers_resolved[i]).get_mut(node) else { + continue; + }; + *b = resolved; + is_applied = true; + } + is_applied + } + + async fn listen(self) { + let mut rx = self.overlay.read_entries().subscribe(); + loop { + match rx.recv().await { + Ok(ref event @ PrivateOverlayEntriesEvent::Resolved(node)) + if node != self.local_id => + { + if !self.set_resolved(&node, true) { + tracing::debug!("Skipped {event:?}"); + } + } + Ok(ref event @ PrivateOverlayEntriesEvent::Removed(node)) + if node != self.local_id => + { + if !self.set_resolved(&node, true) { + tracing::debug!("Skipped {event:?}"); + } + } + Err(RecvError::Closed) => { + let msg = "Fatal: peer info updates channel closed, \ + cannot maintain node connectivity"; + tracing::error!(msg); + panic!("{msg}") + } + Err(RecvError::Lagged(qnt)) => { + tracing::warn!( + "Skipped {qnt} peer info updates, node connectivity may suffer. \ + Consider increasing channel capacity." + ) + } + Ok(_) => {} + } + } + } +} + +pub struct PeerScheduleInner { + // order to select leader by coin flip + peers_resolved: [Arc>; 3], + prev_epoch_start: Round, + cur_epoch_start: Round, + next_epoch_start: Option, + empty: Arc>, +} + +impl PeerScheduleInner { + fn new(current_epoch_start: Round, current_peers: &Vec) -> Self { + Self { + peers_resolved: [ + Default::default(), + Arc::new(current_peers.iter().map(|p| (p.clone(), false)).collect()), + Default::default(), + ], + prev_epoch_start: Round(0), + cur_epoch_start: current_epoch_start, + next_epoch_start: None, + empty: Default::default(), + } + } + + fn index_plus_one(&self, round: Round) -> u8 { + if self.next_epoch_start.map_or(false, |r| r <= round) { + 3 + } else if self.cur_epoch_start <= round { + 2 + } else if self.prev_epoch_start <= round { + 1 + } else { + 0 + } + } + + fn peers_for_index_plus_one(&self, index: u8) -> Arc> { + match index { + 0 => self.empty.clone(), + x if x <= 3 => self.peers_resolved[x as usize - 1].clone(), + _ => unreachable!(), + } + } +} diff --git a/consensus/src/engine/verifier.rs b/consensus/src/engine/verifier.rs new file mode 100644 index 000000000..49fdff7e3 --- /dev/null +++ b/consensus/src/engine/verifier.rs @@ -0,0 +1,229 @@ +use std::sync::Arc; + +use futures_util::future; +use futures_util::FutureExt; +use tokio::task::JoinSet; + +use tycho_network::PeerId; +use tycho_util::futures::{JoinTask, Shared}; + +use crate::engine::dag::{DagPoint, DagRound, IndexedPoint}; +use crate::engine::peer_schedule::PeerSchedule; +use crate::models::point::{Digest, Location, Point}; +use crate::tasks::downloader::DownloadTask; + +pub struct Verifier { + peer_schedule: PeerSchedule, +} + +impl Verifier { + pub fn verify( + &self, + r_0 /* r+0 */: &DagRound, + point /* @ r+0 */: Box, + ) -> JoinTask { + if &point.body.location.round != &r_0.round { + panic! {"Coding error: dag round mismatches point round"} + } + if !point.is_integrity_ok() { + let not_exists = DagPoint::NotExists(Arc::new(point.id())); // cannot use point body + JoinTask::new(future::ready(not_exists)) + } else if !(point.is_well_formed() && self.is_list_of_signers_ok(&point)) { + let invalid = DagPoint::Invalid(Arc::new(*point)); + JoinTask::new(future::ready(invalid)) + } else if let Some(r_1) = r_0.prev.upgrade() { + let dependencies = Self::gather_deps(r_1, &point); + JoinTask::new(Self::check_deps(point, dependencies)) + } else { + // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. + // Any point @ r+0 will be committed, only if it has valid proof @ r+1 + // included into valid anchor chain, i.e. validated by consensus. + let trusted = DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))); + JoinTask::new(future::ready(trusted)) + } + } + + fn gather_deps(r_1 /* r-1 */: Arc, point /* @ r+0 */: &Point) -> JoinSet { + fn add_dependency( + round: &Arc, + node: &PeerId, + digest: &Digest, + dependencies: &mut JoinSet, + ) { + let mut loc = round.locations.entry(*node).or_default(); + let fut = loc + .versions + .entry(digest.clone()) + .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) + .clone(); + dependencies.spawn(fut.map(|a| a.0)); + } + + let mut dependencies = JoinSet::new(); + let author = &point.body.location.author; + + if let Some(loc) = r_1.locations.get(author) { + // to check for equivocation or mandatory skip of a round + for version in loc.versions.values() { + dependencies.spawn(version.clone().map(|a| a.0)); + } + } + for (node, digest) in &point.body.includes { + // integrity check passed, so includes contain author's prev point proof + add_dependency(&r_1, &node, &digest, &mut dependencies); + } + if let Some(r_2) = r_1.prev.upgrade() { + for (node, digest) in &point.body.witness { + add_dependency(&r_2, &node, &digest, &mut dependencies); + } + }; + dependencies + } + + async fn check_deps(point: Box, mut dependencies: JoinSet) -> DagPoint { + // point is well-formed if we got here, so point.proof matches point.includes + let proven = point.body.proof.as_ref().map(|p| &p.digest).clone(); + let prev_loc = Location { + round: point.body.location.round.prev(), + author: point.body.location.author, + }; + + // The node must have no points in previous location + // in case it provide no proof for previous point. + // But equivocation does not invalidate the point. + // Invalid dependency is the author's fault. + let mut is_suspicious = false; + while let Some(res) = dependencies.join_next().await { + match res { + Ok(DagPoint::Trusted(valid) | DagPoint::Suspicious(valid)) => { + if prev_loc == valid.point.body.location { + match proven { + None => return DagPoint::Invalid(Arc::new(*point)), + Some(v) if v == &valid.point.digest => { + if !Self::is_proof_ok(&point, &valid.point) { + return DagPoint::Invalid(Arc::new(*point)); + } // else: ok proof + } + Some(_) => is_suspicious = true, // equivocation + } + } // else: valid dependency + } + Ok(DagPoint::Invalid(invalid)) => { + if prev_loc == invalid.body.location { + match proven { + // node must have skipped prev_loc.round + None => return DagPoint::Invalid(Arc::new(*point)), + Some(v) if v == &invalid.digest => { + return DagPoint::Invalid(Arc::new(*point)) + } + Some(_) => is_suspicious = true, // equivocation + } + } else { + return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency + } + } + Ok(DagPoint::NotExists(not_exists)) => { + if prev_loc == not_exists.location { + match proven { + Some(v) if v == ¬_exists.digest => { + return DagPoint::Invalid(Arc::new(*point)) + } + _ => {} // dependency of some other point; we've banned the sender + } + } else { + return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency + } + } + Err(e) => { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + unreachable!(); + } + } + } + if is_suspicious { + DagPoint::Suspicious(Arc::new(IndexedPoint::new(*point))) + } else { + DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))) + } + } + + /// blame author and every dependent point's author + fn is_list_of_signers_ok(&self, point /* @ r+0 */: &Point) -> bool { + let Some(proof /* @ r-1 */) = &point.body.proof else { + return true; + }; + let [ + same_round_peers/* @ r-1 */, + next_round_peers/* @ r+0 */ + ] = self.peer_schedule.peers_for_array([ + point.body.location.round.prev(), + point.body.location.round + ]); + //TODO may there be a problem ? + // the size of required validator set is determined by point's round, + // but if the next round is a new epoch start, amount of available signers may change + + // may include author's signature already contained in proven point, no matter + if proof.evidence.len() < ((same_round_peers.len() + 2) / 3) * 3 + 1 { + return false; + } + + for (peer, _) in proof.evidence.iter() { + if !(same_round_peers.contains_key(peer) || next_round_peers.contains_key(peer)) { + // two validator sets are the same except the first round of a new epoch; + // unexpected peer, thus invalid + return false; + } + } + true + } + + /// blame author and every dependent point's author + fn is_proof_ok(point /* @ r+0 */: &Point, proven: &Point /* @ r-1 */) -> bool { + if point.body.location.author != proven.body.location.author { + unreachable! {"Coding error: mismatched authors of proof and its vertex"} + } + if point.body.location.round.prev() != proven.body.location.round { + unreachable! {"Coding error: mismatched rounds of proof and its vertex"} + } + let Some(proof) = &point.body.proof else { + unreachable! {"Coding error: passed point doesn't contain proof for a given vertex"} + }; + if proof.digest != proven.digest { + unreachable! {"Coding error: mismatched previous point of the same author"} + } + if !(point.body.time >= proven.body.time) { + return false; // time must be non-decreasing + } + let Some(body) = bincode::serialize(&proven.body).ok() else { + // should be removed after move to TL + unreachable! {"Library error: failed to serialize point body"} + }; + for (peer, sig) in proof.evidence.iter() { + let Some(pubkey) = peer.as_public_key() else { + // should have been validated outside mempool + unreachable! {"Config error: failed to convert peer id into public key"} + }; + let sig: Result<[u8; 64], _> = sig.0.to_vec().try_into(); + let Some(sig) = sig.ok() else { + // unexpected bytes used as a signature, thus invalid + return false; + }; + if !pubkey.verify_raw(body.as_slice(), &sig) { + return false; + } + } + true + } + + // Todo: leader chain validation - for leaders only (including time) + + // Todo: anchor inclusion validation and time based on it + + // todo: time validation based on now() - for directly received (for signature) points (roots) + // leave time only in leader (anchor) blocks? + + // todo: shallow validation during sync ? +} diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index 72050e620..33511b5f8 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -12,8 +12,8 @@ use tycho_network::{ PrivateOverlay, Response, Router, Service, ServiceRequest, Version, }; use tycho_util::futures::BoxFutureOrNoop; +use tycho_util::FastHashSet; -use crate::intercom::overlay_client::OverlayClient; use crate::models::point::{Location, Point, PointId, Round, Signature}; #[derive(Serialize, Deserialize, Debug)] @@ -47,7 +47,7 @@ struct PointResponse { } pub struct Dispatcher { - pub overlay_client: OverlayClient, + pub overlay: PrivateOverlay, pub dht_client: DhtClient, network: Network, } @@ -107,10 +107,8 @@ impl Dispatcher { overlay_tasks.spawn(network.clone()); - let overlay_client = OverlayClient::new(all_peers.len(), private_overlay, local_id); - Self { - overlay_client, + overlay: private_overlay, dht_client, network, } @@ -139,10 +137,7 @@ impl Dispatcher { body: Bytes::from(bincode::serialize(data)?), }; - self.overlay_client - .overlay - .query(&self.network, node, request) - .await + self.overlay.query(&self.network, node, request).await } fn parse_response(node: &PeerId, body: &Bytes) -> Result { @@ -226,6 +221,7 @@ impl ResponderInner { #[cfg(test)] mod tests { + use crate::engine::peer_schedule::PeerSchedule; use tycho_network::{Address, PeerInfo}; use tycho_util::time::now_sec; @@ -268,7 +264,11 @@ mod tests { .map(|(key, node)| Arc::new(make_peer_info(key, node.network.local_addr().into()))) .collect::>(); - for node in nodes.first() { + let schedules = std::iter::zip(&all_peers, &nodes) + .map(|(peer_id, node)| PeerSchedule::new(Round(0), &all_peers, &node.overlay, peer_id)) + .collect::>(); + + if let Some(node) = nodes.first() { for info in &bootstrap_info { if info.id == node.network.peer_id() { continue; @@ -277,9 +277,10 @@ mod tests { } } - for node in &nodes { - node.overlay_client.wait_for_peers(node_count - 1).await; - tracing::info!("found peers for {}", node.network.peer_id()); + let all_peers = FastHashSet::from_iter(all_peers.into_iter()); + for sch in &schedules { + sch.wait_for_peers(Round(1), node_count - 1).await; + tracing::info!("found peers for {}", sch.local_id); } nodes diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index 81f91b0ab..2b75d8eab 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1,2 +1 @@ mod dispatcher; -mod overlay_client; diff --git a/consensus/src/intercom/overlay_client.rs b/consensus/src/intercom/overlay_client.rs deleted file mode 100644 index 2faf17d00..000000000 --- a/consensus/src/intercom/overlay_client.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::ops::DerefMut; -use std::sync::Arc; - -use ahash::RandomState; -use tokio::sync::broadcast; -use tokio::sync::broadcast::error::RecvError; - -use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; -use tycho_util::futures::JoinTask; -use tycho_util::{FastDashMap, FastDashSet}; - -#[derive(Clone)] -pub struct OverlayClient { - pub peers: Arc>, - pub overlay: PrivateOverlay, -} - -impl OverlayClient { - pub fn new(node_count: usize, overlay: PrivateOverlay, local_id: PeerId) -> Self { - let peers = Arc::new(FastDashSet::::with_capacity_and_hasher( - node_count, - RandomState::new(), - )); - tokio::spawn(Self::listen( - peers.clone(), - overlay.clone().read_entries().subscribe(), - local_id, - )); - Self { peers, overlay } - } - - pub async fn wait_for_peers(&self, node_count: usize) { - if self.peers.len() >= node_count { - return; - } else { - let mut rx = self.overlay.read_entries().subscribe(); - while self.peers.len() < node_count { - match rx.recv().await { - Ok(PrivateOverlayEntriesEvent::Resolved(_)) => {} - _ => {} - } - } - } - } - - async fn listen( - peers: Arc>, - mut rx: broadcast::Receiver, - local_id: PeerId, - ) { - loop { - match rx.recv().await { - Ok(PrivateOverlayEntriesEvent::Added(_)) => {} - Ok(PrivateOverlayEntriesEvent::Resolved(node)) => { - if node != local_id { - peers.insert(node); - } - } - Ok(PrivateOverlayEntriesEvent::Removed(node)) => { - if node != local_id { - peers.remove(&node); - } - } - Err(RecvError::Closed) => { - let msg = - "Fatal: peer info updates channel closed, cannot maintain node connectivity"; - tracing::error!(msg); - panic!("{msg}") - } - Err(RecvError::Lagged(qnt)) => { - tracing::warn!( - "Skipped {qnt} peer info updates, node connectivity may suffer. \ - Consider increasing channel capacity." - ) - } - } - } - } -} diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index f6e4586f0..12aa0f99d 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -9,22 +9,25 @@ use sha2::{Digest as Sha2Digest, Sha256}; use tycho_network::PeerId; use tycho_util::FastHashMap; -#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Digest(pub [u8; 32]); #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Signature(pub Bytes); -#[derive(Copy, Clone, Serialize, Deserialize, PartialOrd, PartialEq, Debug)] +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Round(pub u32); impl Round { - pub fn prev(&self) -> Option { - self.0.checked_sub(1).map(Round) + pub fn prev(&self) -> Round { + self.0 + .checked_sub(1) + .map(Round) + .unwrap_or_else(|| panic!("DAG round number overflow, fix dag initial configuration")) } } -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)] pub struct Location { pub round: Round, pub author: PeerId, @@ -97,18 +100,35 @@ pub struct Point { } impl Point { + pub fn id(&self) -> PointId { + PointId { + location: self.body.location.clone(), + digest: self.digest.clone(), + } + } + + /// Failed integrity means the point may be created by someone else. + /// blame every dependent point author and the sender of this point, + /// do not use the author from point's body pub fn is_integrity_ok(&self) -> bool { let pubkey = self.body.location.author.as_public_key(); let body = bincode::serialize(&self.body).ok(); let sig: Result<[u8; 64], _> = self.signature.0.to_vec().try_into(); - if let Some(((pubkey, body), sig)) = pubkey.zip(body).zip(sig.ok()) { - let mut hasher = Sha256::new(); - hasher.update(body.as_slice()); - hasher.update(sig.as_slice()); - let digest = Digest(hasher.finalize().into()); - pubkey.verify_raw(body.as_slice(), &sig) && digest == self.digest - } else { - false - } + let Some(((pubkey, body), sig)) = pubkey.zip(body).zip(sig.ok()) else { + return false; + }; + let mut hasher = Sha256::new(); + hasher.update(body.as_slice()); + hasher.update(sig.as_slice()); + let digest = Digest(hasher.finalize().into()); + pubkey.verify_raw(body.as_slice(), &sig) && digest == self.digest + } + + /// blame author and every dependent point's author + pub fn is_well_formed(&self) -> bool { + let author = &self.body.location.author; + let prev_included = self.body.includes.get(&author); + let prev_proven = self.body.proof.as_ref().map(|p| &p.digest); + prev_included == prev_proven } } diff --git a/consensus/src/tasks/downloader.rs b/consensus/src/tasks/downloader.rs index 917cfd35a..c02c7279b 100644 --- a/consensus/src/tasks/downloader.rs +++ b/consensus/src/tasks/downloader.rs @@ -4,7 +4,10 @@ use std::task::{Context, Poll}; use crate::engine::dag::DagPoint; -pub struct DownloadTask {} +pub struct DownloadTask { + // point's author is a top priority; fallback priority is (any) dependent point's author + // recursively: every dependency is expected to be signed by 2/3+1 +} impl Future for DownloadTask { type Output = DagPoint; From 6ee31a67894990b56dee0473eed6e8f03014e34e Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 22 Mar 2024 15:20:03 +0300 Subject: [PATCH 10/32] feat(consensus): verify with anchor points --- Cargo.lock | 10 + consensus/Cargo.toml | 3 +- consensus/src/engine/dag.rs | 165 +++++++++------- consensus/src/engine/mod.rs | 2 +- consensus/src/engine/neighbour_watch.rs | 47 ----- consensus/src/engine/node_count.rs | 37 ++++ consensus/src/engine/peer_schedule.rs | 6 +- consensus/src/engine/verifier.rs | 252 ++++++++++++++++++------ consensus/src/intercom/dispatcher.rs | 6 +- consensus/src/models/point.rs | 195 ++++++++++++++++-- 10 files changed, 517 insertions(+), 206 deletions(-) delete mode 100644 consensus/src/engine/neighbour_watch.rs create mode 100644 consensus/src/engine/node_count.rs diff --git a/Cargo.lock b/Cargo.lock index e86f5e2c6..1a9e510d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1252,6 +1252,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_pcg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" +dependencies = [ + "rand_core", +] + [[package]] name = "raw-cpuid" version = "11.0.1" @@ -1963,6 +1972,7 @@ dependencies = [ "futures-util", "parking_lot", "rand", + "rand_pcg", "serde", "sha2", "thiserror", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index c5da9934e..6aea82315 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -14,6 +14,8 @@ dashmap = "5.4" everscale-crypto = "0.2" futures-util = { version = "0.3" } parking_lot = "0.12" +rand = { version = "0.8" } +rand_pcg = { version = "0.3" } serde = { version = "1.0", features = ["derive"] } sha2 = "0.10" tokio = { version = "1", features = ["rt"] } @@ -31,7 +33,6 @@ thiserror = "1.0" ahash = "0.8" [dev-dependencies] -rand = "0.8" tokio = { version = "1", features = ["rt-multi-thread", "macros"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 1278ef58d..6f5130261 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -1,16 +1,18 @@ use std::collections::{btree_map, BTreeMap, VecDeque}; -use std::num::{NonZeroU8, NonZeroUsize}; +use std::num::NonZeroU8; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock, Weak}; use ahash::RandomState; -use anyhow::{anyhow, Result}; use futures_util::FutureExt; +use rand::{Rng, SeedableRng}; use tycho_network::PeerId; use tycho_util::futures::{JoinTask, Shared}; use tycho_util::FastDashMap; +use crate::engine::node_count::NodeCount; +use crate::engine::peer_schedule::PeerSchedule; use crate::engine::verifier::Verifier; use crate::models::point::{Digest, Point, PointId, Round, Signature}; @@ -33,7 +35,7 @@ impl IndexedPoint { #[derive(Clone)] pub enum DagPoint { - // valid, needed to blame equivocation or graph connectivity violations + // valid without demur, needed to blame equivocation or graph connectivity violations Trusted(Arc), // is a valid container, but we doubt author's fairness at the moment of validating; // we do not sign such point, but others may include it without consequences; @@ -78,23 +80,60 @@ pub struct DagLocation { pub versions: BTreeMap>>, } +pub enum AnchorStage { + Candidate(PeerId), + Proof(PeerId), + Trigger(PeerId), +} + +impl AnchorStage { + pub fn of(round: Round, peer_schedule: &PeerSchedule) -> Option { + const WAVE_SIZE: u32 = 4; + let anchor_candidate_round = (round.0 / WAVE_SIZE) * WAVE_SIZE + 1; + + let [leader_peers, current_peers] = + peer_schedule.peers_for_array([Round(anchor_candidate_round), round]); + // reproducible global coin + let leader_index = rand_pcg::Pcg32::seed_from_u64(anchor_candidate_round as u64) + .gen_range(0..leader_peers.len()); + let Some(leader) = leader_peers + .iter() + .nth(leader_index) + .map(|(peer_id, _)| peer_id) + else { + panic!("Fatal: selecting a leader from an empty validator set") + }; + if !current_peers.contains_key(leader) { + return None; + }; + match round.0 % WAVE_SIZE { + 0 => None, // both genesis and trailing (proof inclusion) round + 1 => Some(AnchorStage::Candidate(leader.clone())), + 2 => Some(AnchorStage::Proof(leader.clone())), + 3 => Some(AnchorStage::Trigger(leader.clone())), + _ => unreachable!(), + } + } +} + pub struct DagRound { pub round: Round, - node_count: u8, + node_count: NodeCount, + pub anchor_stage: Option, pub locations: FastDashMap, pub prev: Weak, } impl DagRound { - fn new(round: Round, node_count: NonZeroU8, prev: Option<&Arc>) -> Self { + fn new(round: Round, peer_schedule: &PeerSchedule, prev: Option>) -> Self { + let peers = peer_schedule.peers_for(round); + let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); Self { round, - node_count: ((node_count.get() + 2) / 3) * 3 + 1, // 3F+1 - locations: FastDashMap::with_capacity_and_hasher( - node_count.get() as usize, - RandomState::new(), - ), - prev: prev.map_or(Weak::new(), |a| Arc::downgrade(a)), + node_count: NodeCount::new(peers.len()), + anchor_stage: AnchorStage::of(round, peer_schedule), + locations, + prev: prev.unwrap_or_else(|| Weak::new()), } } @@ -106,7 +145,11 @@ impl DagRound { point_fut.await.0.valid() } - pub fn add(&self, point: Box, verifier: &Verifier) -> Shared> { + pub fn add( + self: Arc, + point: Box, + peer_schedule: &PeerSchedule, + ) -> Shared> { if &point.body.location.round != &self.round { panic! {"Coding error: dag round mismatches point round"} } @@ -119,88 +162,59 @@ impl DagRound { match location.versions.entry(point.digest.clone()) { btree_map::Entry::Occupied(entry) => entry.get().clone(), btree_map::Entry::Vacant(entry) => entry - .insert(Shared::new(verifier.verify(&self, point))) + .insert(Shared::new(Verifier::verify( + self.clone(), + point, + peer_schedule, + ))) .clone(), } - // Todo calling site may return signature only for Trusted point - // Detected point equivocation does not invalidate the point, it just - // prevents us (as a fair actor) from returning our signature to the author. - // Such a point may be included in our next "includes" or "witnesses", - // but neither its inclusion nor omitting is required: as we don't - // return our signature, our dependencies cannot be validated against it. - // Equally, we immediately stop communicating with the equivocating node, - // without invalidating any of its points (no matter historical or future). - // The proof for equivocated point cannot be signed - // as we've banned the author on network layer. - // Anyway, no more than one of equivocated points may become a vertex. } } -#[derive(Debug, thiserror::Error)] -pub enum DagError { - #[error("Dag empty")] - Empty, - #[error("Point not in dag")] - PointNotInDag, - #[error("Round not in dag")] - RoundNotInDag, -} pub struct Dag { current: Round, // from the oldest to the current round; newer ones are in the future - rounds: VecDeque>, // TODO VecDeque>> for sync + rounds: BTreeMap>, + peer_schedule: PeerSchedule, } impl Dag { - pub fn new(round: Round, node_count: NonZeroU8) -> Self { + pub fn new(round: Round, peer_schedule: PeerSchedule) -> Self { Self { current: round, - rounds: VecDeque::from([Arc::new(DagRound::new(round, node_count, None))]), + rounds: BTreeMap::from([(round, Arc::new(DagRound::new(round, &peer_schedule, None)))]), + peer_schedule, } } // TODO new point is checked against the dag only if it has valid sig, time and round // TODO download from neighbours - pub fn fill_up_to(&mut self, round: Round, node_count: NonZeroU8) -> Result<()> { - match self.rounds.front().map(|f| f.round) { + pub fn fill_up_to(&mut self, round: Round) { + match self.rounds.last_key_value().map(|(k, v)| k) { None => unreachable!("DAG empty"), - Some(front) => { - for round in front.0..round.0 { - self.rounds.push_front(Arc::new(DagRound::new( - Round(round + 1), - node_count, - self.rounds.front(), - ))) + Some(last) => { + for round in (last.0..round.0).into_iter().map(|i| Round(i + 1)) { + let prev = self.rounds.last_key_value().map(|(_, v)| Arc::downgrade(v)); + self.rounds.entry(round).or_insert_with(|| { + Arc::new(DagRound::new(round, &self.peer_schedule, prev)) + }); } - Ok(()) } } } - pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroUsize) { - if let Some(tail) = self - .index_of(anchor_at) - .and_then(|a| a.checked_sub(dag_depth.get())) - { - self.rounds.drain(0..tail); + // TODO the next "little anchor candidate that could" must have at least full dag depth + pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroU8) { + if let Some(tail) = anchor_at.0.checked_sub(dag_depth.get() as u32) { + self.rounds = self.rounds.split_off(&Round(tail)); }; } - fn round_at(&self, round: Round) -> Option> { - self.rounds.get(self.index_of(round)?).cloned() - } - - fn index_of(&self, round: Round) -> Option { - match self.rounds.back().map(|b| b.round) { - Some(back) if back <= round => Some((round.0 - back.0) as usize), - _ => None, - } - } - pub async fn vertex_by(&self, proof: &IndexedPoint) -> Option> { let digest = &proof.point.body.proof.as_ref()?.digest; let round = proof.point.body.location.round.prev(); - let dag_round = self.round_at(round)?; + let dag_round = self.rounds.get(&round)?; dag_round .valid_point(&proof.point.body.location.author, digest) .await @@ -209,16 +223,24 @@ impl Dag { // @return historically ordered vertices (back to front is older to newer) pub async fn gather_uncommitted( &self, - anchor_proof: &IndexedPoint, + anchor_trigger: &IndexedPoint, // dag_depth: usize, - ) -> Result>> { + ) -> VecDeque> { // anchor must be a vertex @ r+1, proven with point @ r+2 + let Some(anchor_proof) = self.vertex_by(&anchor_trigger).await else { + panic!( + "Coding error: anchor trigger @ {} is not in DAG", + &anchor_trigger.point.body.location.round.0 + ); + }; + _ = anchor_trigger; // no more needed for commit let Some(anchor) = self.vertex_by(&anchor_proof).await else { - return Err(anyhow!( - "anchor proof @ {} not in dag", + panic!( + "Coding error: anchor proof @ {} is not in DAG", &anchor_proof.point.body.location.round.0 - )); + ); }; + _ = anchor_proof; // no more needed for commit let mut cur_includes_round = anchor.point.body.location.round.prev(); /* r+0 */ @@ -235,7 +257,8 @@ impl Dag { // TODO visited rounds count must be equal to dag depth: // read/download non-existent rounds and drop too old ones while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self - .round_at(cur_includes_round) + .rounds + .get(&cur_includes_round) .and_then(|cur| cur.prev.upgrade().map(|prev| (cur, prev))) .filter(|_| !r.iter().all(BTreeMap::is_empty)) { @@ -277,6 +300,6 @@ impl Dag { cur_includes_round = vertex_round.round; // next r+0 r.rotate_left(1); } - Ok(uncommitted) + uncommitted } } diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index 252bf80b9..4ac072def 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,5 +1,5 @@ pub mod dag; -pub mod neighbour_watch; +pub mod node_count; pub mod peer_schedule; pub mod threshold_clock; pub mod verifier; diff --git a/consensus/src/engine/neighbour_watch.rs b/consensus/src/engine/neighbour_watch.rs deleted file mode 100644 index 121cb933d..000000000 --- a/consensus/src/engine/neighbour_watch.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::time::SystemTime; - -use tycho_network::PeerId; -use tycho_util::FastDashMap; - -use crate::models::point::{Point, Round}; - -// from latest block -struct NodeInfo { - round: Round, - time: SystemTime, -} - -pub struct NeighbourWatch { - nodes: FastDashMap, -} - -impl NeighbourWatch { - /// every node must provide: - /// * increasing rounds (two points per same round are equivocation) - /// * time increasing with every round - /// * no prev_point - in case of a gap in rounds (no weak links) - /// * prev_point - in case node made no gaps in rounds - /// * TODO: insert linked (previous) point first, then current one; or move to DAG - pub fn verify(&mut self, point: &Point) -> bool { - let round = point.body.location.round; - let time = point.body.time; - let mut valid = true; - // TODO move to as-is validation: let mut valid = prev_round.map_or(true, |prev| prev.0 + 1 == round.0); - self.nodes - .entry(point.body.location.author.clone()) - .and_modify(|e| { - valid = e.round < round - && e.time < time - // node either skipped a round, or provided evidences for prev block - && e.round <= round.prev(); - if e.round < round { - (*e).round = round - }; - if e.time < time { - (*e).time = time - }; - }) - .or_insert(NodeInfo { round, time }); - valid - } -} diff --git a/consensus/src/engine/node_count.rs b/consensus/src/engine/node_count.rs new file mode 100644 index 000000000..12e94e243 --- /dev/null +++ b/consensus/src/engine/node_count.rs @@ -0,0 +1,37 @@ +#[derive(Copy, Clone)] +pub struct NodeCount(u8); + +impl From for usize { + fn from(count: NodeCount) -> Self { + count.0 as usize + } +} + +impl NodeCount { + pub fn new(total_peers: usize) -> Self { + if total_peers < 3 { + panic!("Fatal: node count {total_peers} < 3"); + } + let count = ((total_peers + 2) / 3) * 3 + 1; + let count = u8::try_from(count).unwrap_or_else(|e| { + panic!("Fatal: node count {total_peers} exceeds u8 after rounding to 3F+1: {e:?}"); + }); + NodeCount(count) + } + + pub fn majority_with_me(&self) -> Self { + Self((self.0 / 3) * 2 + 1) + } + + pub fn majority_except_me(&self) -> Self { + Self((self.0 / 3) * 2) + } + + pub fn reliable_minority(&self) -> Self { + Self(self.0 / 3 + 1) + } + + pub fn unreliable(&self) -> Self { + Self(self.0 / 3) + } +} diff --git a/consensus/src/engine/peer_schedule.rs b/consensus/src/engine/peer_schedule.rs index 2187a36ed..e9c7cf052 100644 --- a/consensus/src/engine/peer_schedule.rs +++ b/consensus/src/engine/peer_schedule.rs @@ -10,6 +10,7 @@ use tokio::sync::broadcast::error::RecvError; use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; use tycho_util::FastHashMap; +use crate::engine::node_count::NodeCount; use crate::models::point::Round; /* @@ -24,7 +25,6 @@ use crate::models::point::Round; */ #[derive(Clone)] pub struct PeerSchedule { - // TODO pub leader_schedule: // FIXME determine if our local_id is in next epoch inner: Arc>, overlay: PrivateOverlay, @@ -56,11 +56,11 @@ impl PeerSchedule { // * which nodes are in the validator set during the round of interest // * which nodes are able to connect at the moment /// TODO replace bool with AtomicBool? use Arc? to return map with auto refresh - pub async fn wait_for_peers(&self, round: Round, node_count: usize) { + pub async fn wait_for_peers(&self, round: Round, node_count: NodeCount) { let mut rx = self.overlay.read_entries().subscribe(); let mut peers = (*self.peers_for(round)).clone(); let mut count = peers.iter().filter(|(_, &is_resolved)| is_resolved).count(); - while count < node_count { + while count < node_count.into() { match rx.recv().await { Ok(PrivateOverlayEntriesEvent::Resolved(peer_id)) if peer_id != self.local_id => { if let Some(resolved) = peers.get_mut(&peer_id) { diff --git a/consensus/src/engine/verifier.rs b/consensus/src/engine/verifier.rs index 49fdff7e3..164279973 100644 --- a/consensus/src/engine/verifier.rs +++ b/consensus/src/engine/verifier.rs @@ -7,62 +7,162 @@ use tokio::task::JoinSet; use tycho_network::PeerId; use tycho_util::futures::{JoinTask, Shared}; -use crate::engine::dag::{DagPoint, DagRound, IndexedPoint}; +use crate::engine::dag::{AnchorStage, DagPoint, DagRound, IndexedPoint}; +use crate::engine::node_count::NodeCount; use crate::engine::peer_schedule::PeerSchedule; -use crate::models::point::{Digest, Location, Point}; +use crate::models::point::{Digest, Link, Location, Point}; use crate::tasks::downloader::DownloadTask; -pub struct Verifier { - peer_schedule: PeerSchedule, -} +/* +Note on equivocation. +Detected point equivocation does not invalidate the point, it just + prevents us (as a fair actor) from returning our signature to the author. +Such a point may be included in our next "includes" or "witnesses", + but neither its inclusion nor omitting is required: as we don't + return our signature, our dependencies cannot be validated against it. +Equally, we immediately stop communicating with the equivocating node, + without invalidating any of its points (no matter historical or future). +We will not sign the proof for equivocated point + as we've banned the author on network layer. +Anyway, no more than one of equivocated points may become a vertex. +*/ + +pub struct Verifier; impl Verifier { + // todo outside, for points to sign only: check time bounds before validation, sign only Trusted + // todo: shallow verification during sync to close a gap, trusting first vertex contents: + // take any vertex and its proof point, check signatures for the vertex, + // and use all vertex dependencies recursively as Trusted without any checks + // with 3-rounds-wide sliding window that moves backwards + pub fn verify( - &self, - r_0 /* r+0 */: &DagRound, + r_0 /* r+0 */: Arc, point /* @ r+0 */: Box, + peer_schedule: &PeerSchedule, ) -> JoinTask { if &point.body.location.round != &r_0.round { panic! {"Coding error: dag round mismatches point round"} } if !point.is_integrity_ok() { let not_exists = DagPoint::NotExists(Arc::new(point.id())); // cannot use point body - JoinTask::new(future::ready(not_exists)) - } else if !(point.is_well_formed() && self.is_list_of_signers_ok(&point)) { + return JoinTask::new(future::ready(not_exists)); + } + let mut dependencies = JoinSet::new(); + if !({ + point.is_well_formed() + && Self::is_self_links_ok(&point, &r_0) + && Self::is_list_of_signers_ok(&point, peer_schedule) + // the last task spawns if ok - in order not to walk through every dag round twice + && Self::add_anchor_links_if_ok(&point, r_0.clone(), &mut dependencies) + }) { let invalid = DagPoint::Invalid(Arc::new(*point)); - JoinTask::new(future::ready(invalid)) - } else if let Some(r_1) = r_0.prev.upgrade() { - let dependencies = Self::gather_deps(r_1, &point); - JoinTask::new(Self::check_deps(point, dependencies)) - } else { - // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. - // Any point @ r+0 will be committed, only if it has valid proof @ r+1 - // included into valid anchor chain, i.e. validated by consensus. - let trusted = DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))); - JoinTask::new(future::ready(trusted)) + return JoinTask::new(future::ready(invalid)); + } + if let Some(r_1) = r_0.prev.upgrade() { + Self::gather_deps(&point, &r_1, &mut dependencies); + return JoinTask::new(Self::check_deps(point, dependencies)); } + // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. + // Any point @ r+0 will be committed, only if it has valid proof @ r+1 + // included into valid anchor chain, i.e. validated by consensus. + let trusted = DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))); + JoinTask::new(future::ready(trusted)) } - fn gather_deps(r_1 /* r-1 */: Arc, point /* @ r+0 */: &Point) -> JoinSet { - fn add_dependency( - round: &Arc, - node: &PeerId, - digest: &Digest, - dependencies: &mut JoinSet, - ) { - let mut loc = round.locations.entry(*node).or_default(); - let fut = loc - .versions - .entry(digest.clone()) - .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) - .clone(); - dependencies.spawn(fut.map(|a| a.0)); + fn is_self_links_ok(point /* @ r+0 */: &Point, dag_round /* r+0 */: &DagRound) -> bool { + // existence of proofs in leader blocks is a part of point's well-form-ness check + match &dag_round.anchor_stage { + // no one may link to self + None | Some(AnchorStage::Candidate(_)) => { + point.body.last_anchor_proof != Link::ToSelf + && point.body.last_anchor_trigger != Link::ToSelf + } + // leader must link to own point while others must not + Some(AnchorStage::Proof(leader_id)) => { + (leader_id == point.body.location.author) + == (point.body.last_anchor_proof == Link::ToSelf) + } + Some(AnchorStage::Trigger(leader_id)) => { + (leader_id == point.body.location.author) + == (point.body.last_anchor_trigger == Link::ToSelf) + } } + } - let mut dependencies = JoinSet::new(); - let author = &point.body.location.author; + /// may visit every DAG round kept in memory + fn add_anchor_links_if_ok( + point: &Point, // @ r+0 + mut dag_round: Arc, // start with r+0 + dependencies: &mut JoinSet, + ) -> bool { + let mut links = vec![ + (point.last_anchor_proof_id(), false), + (point.last_anchor_trigger_id(), true), + ]; + let mut linked_with_round = Vec::with_capacity(2); + while !links.is_empty() { + links.retain(|(linked, is_trigger)| { + let found = linked.location.round == dag_round.round; + if found { + match (&dag_round.anchor_stage, is_trigger) { + // AnchorStage::Candidate(_) requires nothing special + (Some(AnchorStage::Proof(leader_id)), false) + if leader_id == linked.location.author => {} + (Some(AnchorStage::Trigger(leader_id)), true) + if leader_id == linked.location.author => {} + _ => return false, // link not to round's leader + } + linked_with_round.push(( + linked.location.author.clone(), + linked.digest.clone(), + dag_round.clone(), + )); + } + !found + }); + if dag_round.prev.upgrade().map(|r| dag_round = r).is_none() { + // if links in point exceed DAG depth, consider them valid by now; + // either dependencies have more recent link and point will be invalidated later, + // or author was less successful to get fresh data and did not commit for long + // (thus keeps more history in its local Dag) + break; + } + } + // valid linked points will be in dag without this addition by recursion, + // while we need to get invalid ones to blame current point + for (author, digest, dag_round) in linked_with_round { + // skip self links + if dag_round.round < point.body.location.round { + // will add the same point from direct dependencies twice, + // we can do better but nothing terrible + Self::add_dependency(&author, &digest, &dag_round, dependencies); + } + } + true + } + + fn add_dependency( + node: &PeerId, + digest: &Digest, + round: &DagRound, + dependencies: &mut JoinSet, + ) { + let mut loc = round.locations.entry(*node).or_default(); + let fut = loc + .versions + .entry(digest.clone()) + .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) + .clone(); + dependencies.spawn(fut.map(|a| a.0)); + } - if let Some(loc) = r_1.locations.get(author) { + fn gather_deps( + point /* @ r+0 */: &Point, + r_1 /* r-1 */: &DagRound, + dependencies: &mut JoinSet, + ) { + if let Some(loc) = r_1.locations.get(&point.body.location.author) { // to check for equivocation or mandatory skip of a round for version in loc.versions.values() { dependencies.spawn(version.clone().map(|a| a.0)); @@ -70,19 +170,19 @@ impl Verifier { } for (node, digest) in &point.body.includes { // integrity check passed, so includes contain author's prev point proof - add_dependency(&r_1, &node, &digest, &mut dependencies); + Self::add_dependency(&node, &digest, &r_1, dependencies); } + if let Some(r_2) = r_1.prev.upgrade() { for (node, digest) in &point.body.witness { - add_dependency(&r_2, &node, &digest, &mut dependencies); + Self::add_dependency(&node, &digest, &r_2, dependencies); } }; - dependencies } async fn check_deps(point: Box, mut dependencies: JoinSet) -> DagPoint { // point is well-formed if we got here, so point.proof matches point.includes - let proven = point.body.proof.as_ref().map(|p| &p.digest).clone(); + let proven_vertex = point.body.proof.as_ref().map(|p| &p.digest).clone(); let prev_loc = Location { round: point.body.location.round.prev(), author: point.body.location.author, @@ -93,30 +193,63 @@ impl Verifier { // But equivocation does not invalidate the point. // Invalid dependency is the author's fault. let mut is_suspicious = false; + // last is meant to be the last among all dependencies + let anchor_trigger_id = point.last_anchor_trigger_id(); + let anchor_proof_id = point.last_anchor_proof_id(); + let anchor_trigger_through = point.last_anchor_trigger_through(); + let anchor_proof_through = point.last_anchor_proof_through(); while let Some(res) = dependencies.join_next().await { match res { Ok(DagPoint::Trusted(valid) | DagPoint::Suspicious(valid)) => { if prev_loc == valid.point.body.location { - match proven { - None => return DagPoint::Invalid(Arc::new(*point)), - Some(v) if v == &valid.point.digest => { + match proven_vertex { + Some(vertex_digest) if &valid.point.digest == vertex_digest => { if !Self::is_proof_ok(&point, &valid.point) { return DagPoint::Invalid(Arc::new(*point)); } // else: ok proof } Some(_) => is_suspicious = true, // equivocation + // the author must have provided the proof in current point + None => return DagPoint::Invalid(Arc::new(*point)), } } // else: valid dependency + if valid.point.last_anchor_trigger_round() > anchor_trigger_id.location.round + || valid.point.last_anchor_proof_round() > anchor_proof_id.location.round + { + // did not actualize the chain + return DagPoint::Invalid(Arc::new(*point)); + } + let valid_point_id = valid.point.id(); + if ({ + valid_point_id == anchor_trigger_through + && valid.point.last_anchor_trigger_id() != anchor_trigger_id + }) || ({ + valid_point_id == anchor_proof_through + && valid.point.last_anchor_proof_id() != anchor_proof_id + }) { + // path does not lead to destination + return DagPoint::Invalid(Arc::new(*point)); + } + if valid_point_id == anchor_proof_id && point.body.time < valid.point.body.time + { + // Any point that (in)directly includes anchor candidate through its proof + // must provide the time not less than candidate's to maintain + // non-decreasing time in committed anchor chain. + // The time of candidate's valid proof exactly satisfies such requirement: + // it either will be signed by majority (what unblocks the commit trigger), + // or the valid trigger will not be created. + return DagPoint::Invalid(Arc::new(*point)); + } } Ok(DagPoint::Invalid(invalid)) => { if prev_loc == invalid.body.location { - match proven { - // node must have skipped prev_loc.round - None => return DagPoint::Invalid(Arc::new(*point)), - Some(v) if v == &invalid.digest => { + match proven_vertex { + Some(vertex_digest) if &invalid.digest == vertex_digest => { return DagPoint::Invalid(Arc::new(*point)) } Some(_) => is_suspicious = true, // equivocation + // the author must have skipped previous round + None => return DagPoint::Invalid(Arc::new(*point)), } } else { return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency @@ -124,11 +257,11 @@ impl Verifier { } Ok(DagPoint::NotExists(not_exists)) => { if prev_loc == not_exists.location { - match proven { - Some(v) if v == ¬_exists.digest => { + match proven_vertex { + Some(vertex_digest) if ¬_exists.digest == vertex_digest => { return DagPoint::Invalid(Arc::new(*point)) } - _ => {} // dependency of some other point; we've banned the sender + _ => {} // dependency of some other point; we've banned that sender } } else { return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency @@ -150,14 +283,14 @@ impl Verifier { } /// blame author and every dependent point's author - fn is_list_of_signers_ok(&self, point /* @ r+0 */: &Point) -> bool { + fn is_list_of_signers_ok(point /* @ r+0 */: &Point, peer_schedule: &PeerSchedule) -> bool { let Some(proof /* @ r-1 */) = &point.body.proof else { return true; }; let [ same_round_peers/* @ r-1 */, next_round_peers/* @ r+0 */ - ] = self.peer_schedule.peers_for_array([ + ] = peer_schedule.peers_for_array([ point.body.location.round.prev(), point.body.location.round ]); @@ -166,7 +299,7 @@ impl Verifier { // but if the next round is a new epoch start, amount of available signers may change // may include author's signature already contained in proven point, no matter - if proof.evidence.len() < ((same_round_peers.len() + 2) / 3) * 3 + 1 { + if proof.evidence.len() < NodeCount::new(same_round_peers.len()).into() { return false; } @@ -194,8 +327,8 @@ impl Verifier { if proof.digest != proven.digest { unreachable! {"Coding error: mismatched previous point of the same author"} } - if !(point.body.time >= proven.body.time) { - return false; // time must be non-decreasing + if point.body.time < proven.body.time { + return false; // time must be non-decreasing by the same author } let Some(body) = bincode::serialize(&proven.body).ok() else { // should be removed after move to TL @@ -217,13 +350,4 @@ impl Verifier { } true } - - // Todo: leader chain validation - for leaders only (including time) - - // Todo: anchor inclusion validation and time based on it - - // todo: time validation based on now() - for directly received (for signature) points (roots) - // leave time only in leader (anchor) blocks? - - // todo: shallow validation during sync ? } diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index 33511b5f8..1fbc630b8 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -221,10 +221,11 @@ impl ResponderInner { #[cfg(test)] mod tests { - use crate::engine::peer_schedule::PeerSchedule; use tycho_network::{Address, PeerInfo}; use tycho_util::time::now_sec; + use crate::engine::node_count::NodeCount; + use crate::engine::peer_schedule::PeerSchedule; use crate::models::point::Digest; use super::*; @@ -279,7 +280,8 @@ mod tests { let all_peers = FastHashSet::from_iter(all_peers.into_iter()); for sch in &schedules { - sch.wait_for_peers(Round(1), node_count - 1).await; + sch.wait_for_peers(Round(1), NodeCount::new(node_count).majority_except_me()) + .await; tracing::info!("found peers for {}", sch.local_id); } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 12aa0f99d..b9a972f59 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -9,7 +9,7 @@ use sha2::{Digest as Sha2Digest, Sha256}; use tycho_network::PeerId; use tycho_util::FastHashMap; -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Digest(pub [u8; 32]); #[derive(Clone, Serialize, Deserialize, Debug)] @@ -27,13 +27,13 @@ impl Round { } } -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Location { pub round: Round, pub author: PeerId, } -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct PointId { pub location: Location, pub digest: Digest, @@ -45,10 +45,23 @@ pub struct PrevPoint { // any node may proof its vertex@r-1 with its point@r+0 only // pub round: Round, pub digest: Digest, - // >= 2F witnesses, point author excluded + // >= 2F witnesses, point author excluded, order does not matter pub evidence: FastHashMap, } +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] +pub enum Through { + Witness(PeerId), + Includes(PeerId), +} + +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] +pub enum Link { + ToSelf, + Direct(Through), + Indirect { to: PointId, path: Through }, +} + #[derive(Clone, Serialize, Deserialize, Debug)] pub struct PointBody { pub location: Location, // let it be @ r+0 @@ -58,19 +71,16 @@ pub struct PointBody { pub proof: Option, // >= 2F+1 points @ r-1, // signed by author @ r-1 with some additional points just mentioned; - // optionally includes author's own vertex (if exists). - // BTree provides repeatable order on every node + // mandatory includes author's own vertex iff proof is given. + // Repeatable order on every node needed for commit; map is used during validation pub includes: BTreeMap, // >= 0 points @ r-2, signed by author @ r-1 + // Repeatable order on every node needed for commit; map is used during validation pub witness: BTreeMap, - // the last known third point in a row by some leader; - // defines author's current anchor - pub last_commit_trigger: PointId, - // (only) for every leader node - three points in a row: - // in leader point @ r+0: prev leader proof - // in leader proof @ r+1: current leader point @ r+0 - // in commit trigger @ r+2: leader proof @ r+1 - pub leader_chain: Option, + // defines author's last committed anchor + pub last_anchor_trigger: Link, + // helps to maintain anchor chain linked without explicit DAG traverse + pub last_anchor_proof: Link, } impl PointBody { @@ -107,6 +117,19 @@ impl Point { } } + pub fn prev_id(&self) -> Option { + let Some(digest) = self.body.proof.as_ref().map(|p| &p.digest) else { + return None; + }; + Some(PointId { + location: Location { + round: self.body.location.round.prev(), + author: self.body.location.author, + }, + digest: digest.clone(), + }) + } + /// Failed integrity means the point may be created by someone else. /// blame every dependent point author and the sender of this point, /// do not use the author from point's body @@ -125,10 +148,148 @@ impl Point { } /// blame author and every dependent point's author + /// must be checked right after integrity, before any manipulations with the point pub fn is_well_formed(&self) -> bool { + // any genesis is suitable, round number may be taken from configs + const LAST_GENESIS_ROUND: Round = Round(0); let author = &self.body.location.author; - let prev_included = self.body.includes.get(&author); - let prev_proven = self.body.proof.as_ref().map(|p| &p.digest); - prev_included == prev_proven + let is_special_ok = match self.body.location.round { + LAST_GENESIS_ROUND => { + self.body.includes.is_empty() + && self.body.witness.is_empty() + && self.body.payload.is_empty() + && self.body.proof.is_none() + && self.body.last_anchor_proof == Link::ToSelf + && self.body.last_anchor_trigger == Link::ToSelf + } + round if round > LAST_GENESIS_ROUND => { + // no witness is possible at the round right after genesis; + // the other way: we may panic on round.prev().prev() while extracting link's round + (round.0 > LAST_GENESIS_ROUND.0 + 1 || self.body.witness.is_empty()) + // leader must maintain its chain of proofs, + // while others must link to previous points (checked at the end of this method); + // its decided later (using dag round data) whether current point belongs to leader + && !(self.body.last_anchor_proof == Link::ToSelf && self.body.proof.is_none()) + && !(self.body.last_anchor_trigger == Link::ToSelf && self.body.proof.is_none()) + } + _ => false, + }; + is_special_ok + // proof is listed in includes - to count for 2/3+1, verify and commit dependencies + && self.body.proof.as_ref().map(|p| &p.digest) == self.body.includes.get(&author) + && self.is_link_well_formed(&self.body.last_anchor_proof) + && self.is_link_well_formed(&self.body.last_anchor_trigger) + && match ( + self.last_anchor_proof_round(), + self.last_anchor_trigger_round(), + ) { + (x, LAST_GENESIS_ROUND) => x >= LAST_GENESIS_ROUND, + (LAST_GENESIS_ROUND, y) => y >= LAST_GENESIS_ROUND, + // equality is impossible due to commit waves do not start every round; + // anchor trigger may belong to a later round than proof and vice versa; + // no indirect links over genesis tombstone + (x, y) => x != y && x > LAST_GENESIS_ROUND && y > LAST_GENESIS_ROUND, + } + } + + fn is_link_well_formed(&self, link: &Link) -> bool { + match link { + Link::ToSelf => true, + Link::Direct(Through::Includes(peer)) => self.body.includes.contains_key(peer), + Link::Direct(Through::Witness(peer)) => self.body.witness.contains_key(peer), + Link::Indirect { + path: Through::Includes(peer), + to, + } => { + self.body.includes.contains_key(peer) + && to.location.round.0 + 1 < self.body.location.round.0 + } + Link::Indirect { + path: Through::Witness(peer), + to, + } => { + self.body.witness.contains_key(peer) + && to.location.round.0 + 2 < self.body.location.round.0 + } + } + } + + pub fn last_anchor_trigger_round(&self) -> Round { + self.get_linked_to_round(&self.body.last_anchor_trigger) + } + + pub fn last_anchor_proof_round(&self) -> Round { + self.get_linked_to_round(&self.body.last_anchor_proof) + } + + pub fn last_anchor_trigger_id(&self) -> PointId { + self.get_linked_to(&self.body.last_anchor_trigger) + } + + pub fn last_anchor_proof_id(&self) -> PointId { + self.get_linked_to(&self.body.last_anchor_proof) + } + + pub fn last_anchor_trigger_through(&self) -> PointId { + self.get_linked_through(&self.body.last_anchor_trigger) + } + + pub fn last_anchor_proof_through(&self) -> PointId { + self.get_linked_through(&self.body.last_anchor_proof) + } + + fn get_linked_to_round(&self, link: &Link) -> Round { + match link { + Link::ToSelf => self.body.location.round.clone(), + Link::Direct(Through::Includes(_)) => self.body.location.round.prev(), + Link::Direct(Through::Witness(_)) => self.body.location.round.prev().prev(), + Link::Indirect { to, .. } => to.location.round.clone(), + } + } + + fn get_linked_to(&self, link: &Link) -> PointId { + match link { + Link::ToSelf => self.id(), + Link::Direct(Through::Includes(peer)) => self.get_linked(peer, true), + Link::Direct(Through::Witness(peer)) => self.get_linked(peer, false), + Link::Indirect { to, .. } => to.clone(), + } + } + + fn get_linked_through(&self, link: &Link) -> PointId { + match link { + Link::Indirect { + path: Through::Includes(peer), + .. + } => self.get_linked(peer, true), + Link::Indirect { + path: Through::Witness(peer), + .. + } => self.get_linked(peer, false), + _ => self.get_linked_to(link), + } + } + + fn get_linked(&self, peer: &PeerId, through_includes: bool) -> PointId { + let through = if through_includes { + &self.body.includes + } else { + &self.body.witness + }; + let round = if through_includes { + self.body.location.round.prev() + } else { + self.body.location.round.prev().prev() + }; + PointId { + location: Location { + round, + author: peer.clone(), + }, + digest: through + .get(peer) + .expect("Coding error: usage of ill-formed point") + .clone(), + } } } From b38085ba08f8b1e09bffd44792aa477f4b609098 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 25 Mar 2024 20:05:16 +0300 Subject: [PATCH 11/32] build(consensus): use merged network updates --- consensus/src/engine/dag.rs | 5 +- consensus/src/engine/peer_schedule.rs | 168 ++++++++++++++++++-------- consensus/src/intercom/dispatcher.rs | 49 ++++---- consensus/src/tasks/downloader.rs | 2 +- 4 files changed, 145 insertions(+), 79 deletions(-) diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs index 6f5130261..da6df62e8 100644 --- a/consensus/src/engine/dag.rs +++ b/consensus/src/engine/dag.rs @@ -4,7 +4,6 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock, Weak}; use ahash::RandomState; -use futures_util::FutureExt; use rand::{Rng, SeedableRng}; use tycho_network::PeerId; @@ -191,9 +190,9 @@ impl Dag { // TODO new point is checked against the dag only if it has valid sig, time and round // TODO download from neighbours pub fn fill_up_to(&mut self, round: Round) { - match self.rounds.last_key_value().map(|(k, v)| k) { + match self.rounds.last_key_value() { None => unreachable!("DAG empty"), - Some(last) => { + Some((last, _)) => { for round in (last.0..round.0).into_iter().map(|i| Round(i + 1)) { let prev = self.rounds.last_key_value().map(|(_, v)| Arc::downgrade(v)); self.rounds.entry(round).or_insert_with(|| { diff --git a/consensus/src/engine/peer_schedule.rs b/consensus/src/engine/peer_schedule.rs index e9c7cf052..a0850073f 100644 --- a/consensus/src/engine/peer_schedule.rs +++ b/consensus/src/engine/peer_schedule.rs @@ -1,14 +1,16 @@ use std::array; -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::BTreeMap; use std::ops::Range; use std::sync::Arc; -use ahash::RandomState; +use futures_util::StreamExt; use parking_lot::Mutex; +use rand::prelude::IteratorRandom; +use tokio::sync::broadcast; use tokio::sync::broadcast::error::RecvError; +use tokio::task::AbortHandle; use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; -use tycho_util::FastHashMap; use crate::engine::node_count::NodeCount; use crate::models::point::Round; @@ -23,10 +25,21 @@ use crate::models::point::Round; for the anchor chain to be committed by majority and for new nodes to gather data. The switch will occur for validator sets as a whole, at a single leaderless round. */ +#[derive(Clone, PartialEq, Debug)] +pub enum PeerState { + Added, // not yet ready to connect + Resolved, // ready to connect + Removed, // will not be added again +} + #[derive(Clone)] pub struct PeerSchedule { // FIXME determine if our local_id is in next epoch inner: Arc>, + // Note: connection to self is always "Added" + // Note: updates are Resolved or Removed, sent single time + updates: broadcast::Sender<(PeerId, PeerState)>, + abort_resolve_peers: Arc>>, overlay: PrivateOverlay, pub local_id: PeerId, // FIXME move into schedule when it starts to change with new epoch } @@ -38,6 +51,7 @@ impl PeerSchedule { overlay: &PrivateOverlay, local_id: &PeerId, ) -> Self { + let (updates, _) = broadcast::channel(10); let mut current_peers = current_peers.clone(); current_peers.retain(|p| p != local_id); let this = Self { @@ -46,8 +60,11 @@ impl PeerSchedule { ¤t_peers, ))), overlay: overlay.clone(), + updates, + abort_resolve_peers: Default::default(), local_id: local_id.clone(), }; + this.respawn_resolve_task(); tokio::spawn(this.clone().listen()); this } @@ -57,25 +74,27 @@ impl PeerSchedule { // * which nodes are able to connect at the moment /// TODO replace bool with AtomicBool? use Arc? to return map with auto refresh pub async fn wait_for_peers(&self, round: Round, node_count: NodeCount) { - let mut rx = self.overlay.read_entries().subscribe(); + let mut rx = self.updates.subscribe(); let mut peers = (*self.peers_for(round)).clone(); - let mut count = peers.iter().filter(|(_, &is_resolved)| is_resolved).count(); + let mut count = peers + .iter() + .filter(|(_, state)| **state == PeerState::Resolved) + .count(); while count < node_count.into() { match rx.recv().await { - Ok(PrivateOverlayEntriesEvent::Resolved(peer_id)) if peer_id != self.local_id => { - if let Some(resolved) = peers.get_mut(&peer_id) { - if !*resolved { - count += 1; + Ok((peer_id, new_state)) if peer_id != self.local_id => { + if let Some(state) = peers.get_mut(&peer_id) { + match (&state, &new_state) { + (PeerState::Added, PeerState::Removed) => count -= 1, + (PeerState::Resolved, PeerState::Removed) => count -= 1, + (PeerState::Added, PeerState::Resolved) => count += 1, + (PeerState::Removed, PeerState::Resolved) => { + count += 1; // should not occur + tracing::warn!("peer {peer_id} is resolved after being removed") + } + (_, _) => {} } - *resolved = true; - } - } - Ok(PrivateOverlayEntriesEvent::Removed(peer_id)) if peer_id != self.local_id => { - if let Some(resolved) = peers.get_mut(&peer_id) { - if *resolved { - count -= 1; - } - *resolved = false; + *state = new_state; } } _ => {} @@ -83,25 +102,25 @@ impl PeerSchedule { } } - pub fn peers_for(&self, round: Round) -> Arc> { - let mut inner = self.inner.lock(); + pub fn peers_for(&self, round: Round) -> Arc> { + let inner = self.inner.lock(); inner.peers_for_index_plus_one(inner.index_plus_one(round)) } pub fn peers_for_array( &self, rounds: [Round; N], - ) -> [Arc>; N] { - let mut inner = self.inner.lock(); + ) -> [Arc>; N] { + let inner = self.inner.lock(); array::from_fn(|i| inner.peers_for_index_plus_one(inner.index_plus_one(rounds[i]))) } /// does not return empty maps - pub fn peers_for_range(&self, rounds: Range) -> Vec>> { + pub fn peers_for_range(&self, rounds: Range) -> Vec>> { if rounds.end <= rounds.start { return vec![]; } - let mut inner = self.inner.lock(); + let inner = self.inner.lock(); let mut first = inner.index_plus_one(rounds.start); let last = inner.index_plus_one(rounds.end.prev()); if 0 == first && first < last { @@ -149,47 +168,95 @@ impl PeerSchedule { } pub fn set_next_peers(&self, peers: &Vec) { + let mut all_peers = BTreeMap::new(); let mut inner = self.inner.lock(); - let next = inner.peers_resolved[2].as_ref(); + for i in 0..inner.peers_resolved.len() { + all_peers.extend(inner.peers_resolved[i].iter()); + } let old = peers .iter() - .filter_map(|p| next.get(p).map(|b| (p.clone(), *b))) + .filter_map(|peer_id| { + all_peers + .get(peer_id) + .map(|&state| (peer_id.clone(), state.clone())) + }) .collect::>(); - let mut next = Arc::make_mut(&mut inner.peers_resolved[2]); + let next = Arc::make_mut(&mut inner.peers_resolved[2]); next.clear(); - next.extend(peers.clone().into_iter().map(|a| (a, false))); + next.extend(peers.clone().into_iter().map(|a| (a, PeerState::Added))); next.extend(old); } /// Returns [true] if update was successfully applied - fn set_resolved(&self, node: &PeerId, resolved: bool) -> bool { + fn set_resolved(&self, peer_id: &PeerId, resolved: bool) -> bool { let mut is_applied = false; - let mut inner = self.inner.lock(); - for i in 0..inner.peers_resolved.len() { - let Some(b) = Arc::make_mut(&mut inner.peers_resolved[i]).get_mut(node) else { - continue; - }; - *b = resolved; - is_applied = true; + let new_state = if resolved { + PeerState::Resolved + } else { + PeerState::Removed + }; + { + let mut inner = self.inner.lock(); + for i in 0..inner.peers_resolved.len() { + let Some(b) = Arc::make_mut(&mut inner.peers_resolved[i]).get_mut(peer_id) else { + continue; + }; + if *b != new_state { + *b = new_state.clone(); + is_applied = true; + } + } + } + if is_applied { + _ = self.updates.send((peer_id.clone(), new_state)); } is_applied } + fn respawn_resolve_task(&self) { + let mut fut = futures_util::stream::FuturesUnordered::new(); + { + let entries = self.overlay.read_entries(); + for entry in entries + .iter() + .choose_multiple(&mut rand::thread_rng(), entries.len()) + { + // skip updates on self + if !(entry.peer_id == self.local_id || entry.resolver_handle.is_resolved()) { + let handle = entry.resolver_handle.clone(); + fut.push(async move { handle.wait_resolved().await }); + } + } + }; + let new_abort_handle = if fut.is_empty() { + None + } else { + let this = self.clone(); + let join = tokio::spawn(async move { + while let Some(known_peer_handle) = fut.next().await { + _ = this.set_resolved(&known_peer_handle.peer_info().id, true); + } + }); + Some(join.abort_handle()) + }; + let mut abort_resolve_handle = self.abort_resolve_peers.lock(); + if let Some(old) = abort_resolve_handle.as_ref() { + old.abort(); + }; + *abort_resolve_handle = new_abort_handle; + } + async fn listen(self) { let mut rx = self.overlay.read_entries().subscribe(); loop { match rx.recv().await { - Ok(ref event @ PrivateOverlayEntriesEvent::Resolved(node)) - if node != self.local_id => - { - if !self.set_resolved(&node, true) { - tracing::debug!("Skipped {event:?}"); - } - } Ok(ref event @ PrivateOverlayEntriesEvent::Removed(node)) if node != self.local_id => { - if !self.set_resolved(&node, true) { + if self.set_resolved(&node, false) { + // respawn resolve task with fewer peers to await + self.respawn_resolve_task(); + } else { tracing::debug!("Skipped {event:?}"); } } @@ -213,11 +280,11 @@ impl PeerSchedule { pub struct PeerScheduleInner { // order to select leader by coin flip - peers_resolved: [Arc>; 3], + peers_resolved: [Arc>; 3], prev_epoch_start: Round, cur_epoch_start: Round, next_epoch_start: Option, - empty: Arc>, + empty: Arc>, } impl PeerScheduleInner { @@ -225,7 +292,12 @@ impl PeerScheduleInner { Self { peers_resolved: [ Default::default(), - Arc::new(current_peers.iter().map(|p| (p.clone(), false)).collect()), + Arc::new( + current_peers + .iter() + .map(|p| (p.clone(), PeerState::Added)) + .collect(), + ), Default::default(), ], prev_epoch_start: Round(0), @@ -247,7 +319,7 @@ impl PeerScheduleInner { } } - fn peers_for_index_plus_one(&self, index: u8) -> Arc> { + fn peers_for_index_plus_one(&self, index: u8) -> Arc> { match index { 0 => self.empty.clone(), x if x <= 3 => self.peers_resolved[x as usize - 1].clone(), diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs index 1fbc630b8..ed042bb85 100644 --- a/consensus/src/intercom/dispatcher.rs +++ b/consensus/src/intercom/dispatcher.rs @@ -1,4 +1,4 @@ -use std::net::{Ipv4Addr, SocketAddr, ToSocketAddrs}; +use std::net::{Ipv4Addr, ToSocketAddrs}; use std::sync::Arc; use std::time::Duration; @@ -12,9 +12,8 @@ use tycho_network::{ PrivateOverlay, Response, Router, Service, ServiceRequest, Version, }; use tycho_util::futures::BoxFutureOrNoop; -use tycho_util::FastHashSet; -use crate::models::point::{Location, Point, PointId, Round, Signature}; +use crate::models::point::{Point, PointId, Round, Signature}; #[derive(Serialize, Deserialize, Debug)] enum MPRequest { @@ -35,14 +34,14 @@ enum MPResponse { } #[derive(Serialize, Deserialize, Debug)] -struct BroadcastResponse { +pub struct BroadcastResponse { // for requested point pub signature: Signature, // at the same round, if it was not skipped pub signer_point: Option, } #[derive(Serialize, Deserialize, Debug)] -struct PointResponse { +pub struct PointResponse { pub point: Option, } @@ -66,7 +65,7 @@ impl Dispatcher { // TODO receive configured services from general node, // move current setup to test below as it provides acceptable timing - let (dht_client_builder, dht_service) = DhtService::builder(local_id) + let (dht_tasks, dht_service) = DhtService::builder(local_id) .with_config(DhtConfig { local_info_announce_period: Duration::from_secs(1), max_local_info_announce_period_jitter: Duration::from_secs(1), @@ -76,25 +75,13 @@ impl Dispatcher { }) .build(); - let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) - .resolve_peers(true) - .with_entries(all_peers) - .build(Responder(Arc::new(ResponderInner {}))); - let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) - .with_config(OverlayConfig { - private_overlay_peer_resolve_period: Duration::from_secs(1), - private_overlay_peer_resolve_max_jitter: Duration::from_secs(1), - ..Default::default() - }) .with_dht_service(dht_service.clone()) .build(); - overlay_service.try_add_private_overlay(&private_overlay); - let router = Router::builder() - .route(dht_service) - .route(overlay_service) + .route(dht_service.clone()) + .route(overlay_service.clone()) .build(); let network = Network::builder() @@ -103,13 +90,21 @@ impl Dispatcher { .build(socket_addr, router) .unwrap(); - let dht_client = dht_client_builder.build(network.clone()); + dht_tasks.spawn(&network); + overlay_tasks.spawn(&network); + + let peer_resolver = dht_service.make_peer_resolver().build(&network); // network???? + + let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) + .with_peer_resolver(peer_resolver) + .with_entries(all_peers) + .build(Responder(Arc::new(ResponderInner {}))); - overlay_tasks.spawn(network.clone()); + overlay_service.add_private_overlay(&private_overlay); Self { overlay: private_overlay, - dht_client, + dht_client: dht_service.make_client(network.clone()), network, } } @@ -191,7 +186,7 @@ impl ResponderInner { }; let response = match body { - MPRequest::Broadcast { point } => { + MPRequest::Broadcast { .. } => { // 1.1 sigs for my block + 1.2 my next includes // ?? + 3.1 ask last MPResponse::Broadcast(BroadcastResponse { @@ -199,7 +194,7 @@ impl ResponderInner { signer_point: None, }) } - MPRequest::Point { id } => { + MPRequest::Point { .. } => { // 1.2 my next includes (merged with Broadcast flow) MPResponse::Point(PointResponse { point: None }) } @@ -256,7 +251,7 @@ mod tests { .map(|s| PeerId::from(ed25519::KeyPair::from(s).public_key)) .collect::>(); - let mut nodes = keys + let nodes = keys .iter() .map(|s| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), s, &all_peers)) .collect::>(); @@ -278,7 +273,7 @@ mod tests { } } - let all_peers = FastHashSet::from_iter(all_peers.into_iter()); + // let all_peers = FastHashSet::from_iter(all_peers.into_iter()); for sch in &schedules { sch.wait_for_peers(Round(1), NodeCount::new(node_count).majority_except_me()) .await; diff --git a/consensus/src/tasks/downloader.rs b/consensus/src/tasks/downloader.rs index c02c7279b..287c17b6d 100644 --- a/consensus/src/tasks/downloader.rs +++ b/consensus/src/tasks/downloader.rs @@ -12,7 +12,7 @@ pub struct DownloadTask { impl Future for DownloadTask { type Output = DagPoint; - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { todo!() } } From 5cbd7ee25945a8e61e851b45ad2e223027ea4e88 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 15 Apr 2024 06:25:03 +0300 Subject: [PATCH 12/32] feat(consensus): crate structure --- consensus/src/dag/anchor_stage.rs | 42 +++ consensus/src/dag/dag.rs | 165 +++++++++ consensus/src/dag/dag_location.rs | 204 +++++++++++ consensus/src/dag/dag_round.rs | 234 +++++++++++++ consensus/src/dag/mod.rs | 12 + consensus/src/dag/producer.rs | 229 +++++++++++++ consensus/src/{engine => dag}/verifier.rs | 195 +++++------ consensus/src/engine/dag.rs | 304 ----------------- consensus/src/engine/engine.rs | 247 ++++++++++++++ consensus/src/engine/mempool_config.rs | 29 ++ consensus/src/engine/mod.rs | 10 +- consensus/src/engine/node_count.rs | 37 -- consensus/src/engine/threshold_clock.rs | 10 - .../src/intercom/adapter/broadcast_filter.rs | 191 +++++++++++ consensus/src/intercom/adapter/broadcaster.rs | 244 ++++++++++++++ .../{tasks => intercom/adapter}/downloader.rs | 6 +- consensus/src/intercom/adapter/dto.rs | 24 ++ consensus/src/intercom/adapter/mod.rs | 13 + consensus/src/intercom/adapter/signer.rs | 244 ++++++++++++++ consensus/src/intercom/core/dispatcher.rs | 199 +++++++++++ consensus/src/intercom/core/dto.rs | 82 +++++ consensus/src/intercom/core/mod.rs | 9 + consensus/src/intercom/core/responder.rs | 100 ++++++ consensus/src/intercom/dispatcher.rs | 318 ------------------ consensus/src/intercom/dto.rs | 42 +++ consensus/src/intercom/mod.rs | 12 +- consensus/src/intercom/peer_schedule/mod.rs | 8 + .../peer_schedule}/peer_schedule.rs | 228 ++++++------- .../peer_schedule/peer_schedule_updater.rs | 91 +++++ consensus/src/lib.rs | 4 +- consensus/src/models/dag_point.rs | 69 ++++ consensus/src/models/mod.rs | 8 +- consensus/src/models/node_count.rs | 45 +++ consensus/src/models/point.rs | 158 ++++++--- consensus/src/tasks/broadcaster.rs | 1 - consensus/src/tasks/mod.rs | 4 - consensus/src/tasks/syncer.rs | 1 - consensus/src/tasks/uploader.rs | 1 - consensus/src/test_utils.rs | 208 ++++++++++++ util/src/futures/shared.rs | 21 +- 40 files changed, 3072 insertions(+), 977 deletions(-) create mode 100644 consensus/src/dag/anchor_stage.rs create mode 100644 consensus/src/dag/dag.rs create mode 100644 consensus/src/dag/dag_location.rs create mode 100644 consensus/src/dag/dag_round.rs create mode 100644 consensus/src/dag/mod.rs create mode 100644 consensus/src/dag/producer.rs rename consensus/src/{engine => dag}/verifier.rs (61%) delete mode 100644 consensus/src/engine/dag.rs create mode 100644 consensus/src/engine/engine.rs create mode 100644 consensus/src/engine/mempool_config.rs delete mode 100644 consensus/src/engine/node_count.rs delete mode 100644 consensus/src/engine/threshold_clock.rs create mode 100644 consensus/src/intercom/adapter/broadcast_filter.rs create mode 100644 consensus/src/intercom/adapter/broadcaster.rs rename consensus/src/{tasks => intercom/adapter}/downloader.rs (80%) create mode 100644 consensus/src/intercom/adapter/dto.rs create mode 100644 consensus/src/intercom/adapter/mod.rs create mode 100644 consensus/src/intercom/adapter/signer.rs create mode 100644 consensus/src/intercom/core/dispatcher.rs create mode 100644 consensus/src/intercom/core/dto.rs create mode 100644 consensus/src/intercom/core/mod.rs create mode 100644 consensus/src/intercom/core/responder.rs delete mode 100644 consensus/src/intercom/dispatcher.rs create mode 100644 consensus/src/intercom/dto.rs create mode 100644 consensus/src/intercom/peer_schedule/mod.rs rename consensus/src/{engine => intercom/peer_schedule}/peer_schedule.rs (52%) create mode 100644 consensus/src/intercom/peer_schedule/peer_schedule_updater.rs create mode 100644 consensus/src/models/dag_point.rs create mode 100644 consensus/src/models/node_count.rs delete mode 100644 consensus/src/tasks/broadcaster.rs delete mode 100644 consensus/src/tasks/mod.rs delete mode 100644 consensus/src/tasks/syncer.rs delete mode 100644 consensus/src/tasks/uploader.rs create mode 100644 consensus/src/test_utils.rs diff --git a/consensus/src/dag/anchor_stage.rs b/consensus/src/dag/anchor_stage.rs new file mode 100644 index 000000000..8222b4067 --- /dev/null +++ b/consensus/src/dag/anchor_stage.rs @@ -0,0 +1,42 @@ +use rand::{Rng, SeedableRng}; + +use tycho_network::PeerId; + +use crate::intercom::PeerSchedule; +use crate::models::Round; + +pub enum AnchorStage { + Candidate(PeerId), // TODO nothing special, remove + Proof(PeerId), + Trigger(PeerId), +} + +impl AnchorStage { + pub fn of(round: Round, peer_schedule: &PeerSchedule) -> Option { + const WAVE_SIZE: u32 = 4; + let anchor_candidate_round = (round.0 / WAVE_SIZE) * WAVE_SIZE + 1; + + let [leader_peers, current_peers] = + peer_schedule.peers_for_array([Round(anchor_candidate_round), round]); + // reproducible global coin + let leader_index = rand_pcg::Pcg32::seed_from_u64(anchor_candidate_round as u64) + .gen_range(0..leader_peers.len()); + let Some(leader) = leader_peers + .iter() + .nth(leader_index) + .map(|(peer_id, _)| peer_id) + else { + panic!("selecting a leader from an empty validator set") + }; + if !current_peers.contains_key(leader) { + return None; + }; + match round.0 % WAVE_SIZE { + 0 => None, // both genesis and trailing (proof inclusion) round + 1 => Some(AnchorStage::Candidate(leader.clone())), + 2 => Some(AnchorStage::Proof(leader.clone())), + 3 => Some(AnchorStage::Trigger(leader.clone())), + _ => unreachable!(), + } + } +} diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs new file mode 100644 index 000000000..89018333f --- /dev/null +++ b/consensus/src/dag/dag.rs @@ -0,0 +1,165 @@ +use std::collections::{BTreeMap, VecDeque}; +use std::num::NonZeroU8; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use crate::dag::DagRound; +use crate::models::{Point, PointId, Round, ValidPoint}; + +pub struct Dag { + // from the oldest to the current round; newer ones are in the future + rounds: BTreeMap, +} + +impl Dag { + // pub fn new(peer_schedule: &PeerSchedule) -> Self { + // Self { + // rounds: BTreeMap::from([(Arc::new(DagRound::new(round, &peer_schedule, None)))]), + // peer_schedule, + // } + // } + // + // // TODO new point is checked against the dag only if it has valid sig, time and round + // // TODO download from neighbours + // pub fn fill_up_to(&mut self, round: Round) { + // match self.rounds.last_key_value() { + // None => unreachable!("DAG empty"), + // Some((last, _)) => { + // for round in (last.0..round.0).into_iter().map(|i| Round(i + 1)) { + // let prev = self.rounds.last_key_value().map(|(_, v)| Arc::downgrade(v)); + // self.rounds.entry(round).or_insert_with(|| { + // Arc::new(DagRound::new(round, &self.peer_schedule, prev)) + // }); + // } + // } + // } + // } + + pub fn new() -> Self { + Self { + rounds: Default::default(), + } + } + + pub fn get_or_insert(&mut self, dag_round: DagRound) -> DagRound { + self.rounds + .entry(dag_round.round().clone()) + .or_insert(dag_round) + .clone() + } + + // TODO the next "little anchor candidate that could" must have at least full dag depth + pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroU8) { + if let Some(tail) = anchor_at.0.checked_sub(dag_depth.get() as u32) { + self.rounds = self.rounds.split_off(&Round(tail)); + }; + } + + async fn point_by_id(&self, point_id: &PointId) -> Option { + let dag_round = self.rounds.get(&point_id.location.round)?; + dag_round.valid_point(&point_id).await + } + + async fn vertex_by_proof(&self, proof: &ValidPoint) -> Option { + let dag_round = self.rounds.get(&proof.point.body.location.round.prev())?; + match &proof.point.body.proof { + Some(proven) => { + dag_round + .valid_point_exact(&proof.point.body.location.author, &proven.digest) + .await + } + None => None, + } + } + + // @return historically ordered vertices (back to front is older to newer) + pub async fn gather_uncommitted( + &self, + anchor_trigger: &PointId, + // dag_depth: usize, + ) -> VecDeque> { + let Some(anchor_trigger) = self.point_by_id(anchor_trigger).await else { + panic!( + "Coding error: anchor trigger @ {:?} is not in DAG", + &anchor_trigger.location.round + ); + }; + // anchor must be a vertex @ r+1, proven with point @ r+2 + let Some(anchor_proof) = self.vertex_by_proof(&anchor_trigger).await else { + panic!( + "Coding error: anchor proof @ {:?} is not in DAG", + &anchor_trigger.point.body.location.round.prev() + ); + }; + _ = anchor_trigger; // no more needed for commit + let Some(anchor) = self.vertex_by_proof(&anchor_proof).await else { + panic!( + "Coding error: anchor @ {:?} is not in DAG", + &anchor_proof.point.body.location.round.prev() + ); + }; + _ = anchor_proof; // no more needed for commit + + let mut cur_includes_round = anchor.point.body.location.round.prev(); /* r+0 */ + + let mut r = [ + anchor.point.body.includes.clone(), // points @ r+0 + anchor.point.body.witness.clone(), // points @ r-1 + BTreeMap::new(), // points @ r-2 + BTreeMap::new(), // points @ r-3 + ]; + _ = anchor; // anchor payload will be committed the next time + + let mut uncommitted = VecDeque::new(); + + // TODO visited rounds count must be equal to dag depth: + // read/download non-existent rounds and drop too old ones + while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self + .rounds + .get(&cur_includes_round) + .and_then(|cur| cur.prev().get().map(|prev| (cur, prev))) + .filter(|_| !r.iter().all(BTreeMap::is_empty)) + { + // take points @ r+0, and select their vertices @ r-1 for commit + // the order is of NodeId (public key) TODO shuffle deterministically, eg with anchor digest as a seed + while let Some((node, digest)) = &r[0].pop_first() { + // Every point must be valid (we've validated anchor dependencies already), + // but some points don't have previous one to proof as vertex. + // Any valid point among equivocated will do, as they include the same vertex. + if let Some(proof /* point @ r+0 */) = + proof_round.valid_point_exact(node, digest).await + { + if proof.is_committed.load(Ordering::Acquire) { + continue; + } + let author = &proof.point.body.location.author; + r[1].extend(proof.point.body.includes.clone()); // points @ r-1 + r[2].extend(proof.point.body.witness.clone()); // points @ r-2 + let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { + continue; + }; + if let Some(vertex /* point @ r-1 */) = vertex_round + .valid_point_exact(author, &digest) + .await + // select uncommitted ones, marking them as committed + // to exclude from the next commit + .filter(|vertex| { + vertex + .is_committed + .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) + .is_ok() + }) + { + // vertex will be skipped in r_1 as committed + r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 + r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 + uncommitted.push_back(vertex.point); // LIFO + } + } + } + cur_includes_round = vertex_round.round().clone(); // next r+0 + r.rotate_left(1); + } + uncommitted + } +} diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs new file mode 100644 index 000000000..2d6212bd1 --- /dev/null +++ b/consensus/src/dag/dag_location.rs @@ -0,0 +1,204 @@ +use std::collections::{btree_map, BTreeMap}; +use std::future::Future; +use std::ops::RangeInclusive; +use std::sync::{Arc, OnceLock}; + +use everscale_crypto::ed25519::KeyPair; +use futures_util::FutureExt; + +use tycho_util::futures::{JoinTask, Shared}; + +use crate::models::{DagPoint, Digest, Round, Signature, UnixTime, ValidPoint}; + +/// If DAG location exists, it must have non-empty `versions` map; +/// +/// Inclusion state is filled if it belongs to the 2 latest dag rounds +/// and will be used for own point production +/// +/// Note methods encapsulate mutability to preserve this invariant, a bit less panics +#[derive(Default)] +pub struct DagLocation { + // one of the points at current location + // was proven by the next point of a node; + // even if we marked this point as invalid, consensus may override our decision + // and we will have to sync + /* vertex: Option, */ + /// We can sign or reject just a single (e.g. first validated) point at the current location; + /// other (equivocated) points may be received as includes, witnesses or a proven vertex; + /// we have to include signed points @ r+0 & @ r-1 as dependencies in our point @ r+1. + /// Not needed for transitive dependencies. + state: InclusionState, + /// only one of the point versions at current location + /// may become proven by the next round point(s) of a node; + /// even if we marked a proven point as invalid, consensus may ignore our decision + versions: BTreeMap>>, +} + +impl DagLocation { + pub fn insert_own_point(&mut self, my_point: &DagPoint) { + let old = self.versions.insert( + my_point.digest().clone(), + Shared::new(JoinTask::new(futures_util::future::ready(my_point.clone()))), + ); + assert!( + old.is_none(), + "Coding error: own point is already inserted into DAG location" + ); + self.state.insert_own_point(my_point); + } + pub fn add_dependency(&mut self, digest: &Digest, init: I) -> Shared> + where + I: FnOnce() -> F, + F: Future + Send + 'static, + { + match self.versions.entry(digest.clone()) { + btree_map::Entry::Occupied(entry) => entry.get().clone(), + btree_map::Entry::Vacant(entry) => { + entry.insert(Shared::new(JoinTask::new(init()))).clone() + } + } + } + pub fn add_validate( + &mut self, + digest: &Digest, + init: I, + ) -> Option<&'_ Shared>> + where + I: FnOnce() -> F, + F: Future + Send + 'static, + { + // point that is validated depends on other equivocated points futures (if any) + // in the same location, so order of insertion matches order of futures' completion; + // to make signature we are interested in the first validated point only + // (others are at least suspicious and cannot be signed) + match self.versions.entry(digest.clone()) { + btree_map::Entry::Occupied(_) => return None, + btree_map::Entry::Vacant(entry) => { + let state = self.state.clone(); + let shared = entry.insert(Shared::new(JoinTask::new({ + // Note: cannot sign during validation, + // because current DAG round may advance concurrently + // TODO either leave output as is and reduce locking in 'inclusion state' + // (as single thread consumes them and makes signature), + // or better add global Watch CurrentDagRound (unify with broadcast filter!) + // and sign inside this future (remove futures unordered in signer) + init().inspect(move |dag_point| state.init(dag_point)) + }))); + Some(shared) + } + } + } + pub fn versions(&self) -> &'_ BTreeMap>> { + &self.versions + } + pub fn state(&self) -> &'_ InclusionState { + &self.state + } +} + +// Todo remove inner locks and introduce global current dag round watch simultaneously, see Signer +#[derive(Default, Clone)] +pub struct InclusionState(Arc>); + +impl InclusionState { + /// Must not be used for downloaded dependencies + pub fn init(&self, first_completed: &DagPoint) { + _ = self.0.get_or_init(|| { + let signed = OnceLock::new(); + if Signable::filter(first_completed).is_none() { + _ = signed.set(Err(())); + } + Signable { + first_completed: first_completed.clone(), + signed, + } + }); + } + fn insert_own_point(&self, my_point: &DagPoint) { + let signed = OnceLock::new(); + match Signable::filter(my_point) { + None => assert!(false, "Coding error: own point is not signable"), + Some(valid) => { + _ = signed.set(Ok(Signed { + at: valid.point.body.location.round.clone(), + with: valid.point.signature.clone(), + })) + } + }; + let result = self.0.set(Signable { + first_completed: my_point.clone(), + signed, + }); + assert!( + result.is_ok(), + "Coding error: own point initialized for inclusion twice" + ) + } + pub fn is_empty(&self) -> bool { + self.0.get().is_none() + } + pub fn signable(&self) -> Option<&'_ Signable> { + self.0.get().filter(|signable| !signable.is_completed()) + } + pub fn signed(&self) -> Option<&'_ Result> { + self.0.get()?.signed.get() + } + pub fn signed_point(&self, at: &Round) -> Option<&'_ ValidPoint> { + let signable = self.0.get()?; + if &signable.signed.get()?.as_ref().ok()?.at == at { + signable.first_completed.valid() + } else { + None + } + } +} + +pub struct Signable { + first_completed: DagPoint, + // signature cannot be rolled back, the point must be included as next point dependency + signed: OnceLock>, +} + +pub struct Signed { + pub at: Round, + pub with: Signature, +} + +impl Signable { + pub fn sign( + &self, + at: &Round, + key_pair: Option<&KeyPair>, // same round for own point and next round for other's + time_range: RangeInclusive, + ) -> bool { + let mut this_call_signed = false; + if let Some((valid, key_pair)) = Self::filter(&self.first_completed).zip(key_pair) { + if time_range.contains(&valid.point.body.time) { + _ = self.signed.get_or_init(|| { + this_call_signed = true; + Ok(Signed { + at: at.clone(), + with: valid.point.body.sign(key_pair), + }) + }); + } else if &valid.point.body.time < time_range.start() { + self.reject(); + } // else decide later + } else { + self.reject(); + } + this_call_signed + } + pub fn reject(&self) { + _ = self.signed.set(Err(())); + } + fn is_completed(&self) -> bool { + self.signed.get().is_some() + } + fn filter(first_completed: &DagPoint) -> Option<&ValidPoint> { + match first_completed { + DagPoint::Trusted(valid) => Some(valid), + _ => None, // including valid Suspicious + } + } +} diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs new file mode 100644 index 000000000..9e268e872 --- /dev/null +++ b/consensus/src/dag/dag_round.rs @@ -0,0 +1,234 @@ +use std::sync::{Arc, Weak}; + +use ahash::RandomState; +use everscale_crypto::ed25519::KeyPair; +use futures_util::future::BoxFuture; +use futures_util::FutureExt; + +use tycho_network::PeerId; +use tycho_util::FastDashMap; + +use crate::dag::anchor_stage::AnchorStage; +use crate::dag::{DagLocation, InclusionState, Verifier}; +use crate::engine::MempoolConfig; +use crate::intercom::PeerSchedule; +use crate::models::{DagPoint, Digest, NodeCount, Point, PointId, Round, ValidPoint}; + +#[derive(Clone)] +pub struct WeakDagRound(Weak); + +#[derive(Clone)] +pub struct DagRound(Arc); + +struct DagRoundInner { + round: Round, // immutable + node_count: NodeCount, // immutable + /// if key_pair is not empty, then the node may produce block at this round, + /// and also sign broadcasts during previous round + key_pair: Option>, // immutable + anchor_stage: Option, // immutable + locations: FastDashMap, + prev: WeakDagRound, // immutable ? +} + +impl WeakDagRound { + pub const BOTTOM: Self = WeakDagRound(Weak::new()); + pub fn get(&self) -> Option { + self.0.upgrade().map(DagRound) + } +} + +impl DagRound { + pub fn new(round: Round, peer_schedule: &PeerSchedule, prev: WeakDagRound) -> Self { + let peers = peer_schedule.peers_for(&round); + let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); + Self(Arc::new(DagRoundInner { + round, + node_count: NodeCount::try_from(peers.len()) + .expect(&format!("peer schedule updated for {round:?}")), + key_pair: peer_schedule.local_keys(&round), + anchor_stage: AnchorStage::of(round, peer_schedule), + locations, + prev, + })) + } + + pub fn next(&self, peer_schedule: &PeerSchedule) -> Self { + let next_round = self.round().next(); + let peers = peer_schedule.peers_for(&next_round); + let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); + Self(Arc::new(DagRoundInner { + round: next_round, + node_count: NodeCount::try_from(peers.len()) + .expect(&format!("peer schedule updated for {next_round:?}")), + key_pair: peer_schedule.local_keys(&next_round), + anchor_stage: AnchorStage::of(next_round, peer_schedule), + locations, + prev: self.as_weak(), + })) + } + + pub async fn genesis(genesis: &Arc, peer_schedule: &PeerSchedule) -> Self { + let locations = FastDashMap::with_capacity_and_hasher(1, RandomState::new()); + let round = genesis.body.location.round; + let this = Self(Arc::new(DagRoundInner { + round, + node_count: NodeCount::GENESIS, + key_pair: None, + anchor_stage: AnchorStage::of(round, peer_schedule), + locations, + prev: WeakDagRound::BOTTOM, + })); + this.insert_exact_validate(genesis, peer_schedule).await; + this + } + + pub fn round(&self) -> &'_ Round { + &self.0.round + } + + pub fn node_count(&self) -> &'_ NodeCount { + &self.0.node_count + } + + pub fn key_pair(&self) -> Option<&'_ KeyPair> { + self.0.key_pair.as_deref() + } + + pub fn anchor_stage(&self) -> Option<&'_ AnchorStage> { + self.0.anchor_stage.as_ref() + } + + pub fn edit(&self, author: &PeerId, edit: F) -> R + where + F: FnOnce(&mut DagLocation) -> R, + { + let mut loc = self.0.locations.entry(*author).or_default(); + edit(loc.value_mut()) + } + + pub fn view(&self, author: &PeerId, view: F) -> Option + where + F: FnOnce(&DagLocation) -> R, + { + self.0.locations.view(author, |_, v| view(v)) + } + + pub fn select<'a, F, R>(&'a self, mut filter_map: F) -> impl Iterator + 'a + where + F: FnMut((&PeerId, &DagLocation)) -> Option + 'a, + { + self.0 + .locations + .iter() + .filter_map(move |a| filter_map(a.pair())) + } + + pub fn prev(&self) -> &'_ WeakDagRound { + &self.0.prev + } + + pub fn as_weak(&self) -> WeakDagRound { + WeakDagRound(Arc::downgrade(&self.0)) + } + + pub async fn valid_point(&self, point_id: &PointId) -> Option { + match self.scan(&point_id.location.round) { + Some(linked) => { + linked + .valid_point_exact(&point_id.location.author, &point_id.digest) + .await + } + None => None, + } + } + + pub async fn valid_point_exact(&self, node: &PeerId, digest: &Digest) -> Option { + let point_fut = self.view(node, |loc| loc.versions().get(digest).cloned())??; + point_fut.await.0.valid().cloned() + } + + pub fn add(&self, point: &Arc) -> Option> { + self.scan(&point.body.location.round) + .and_then(|linked| linked.add_exact(&point)) + } + + fn add_exact(&self, point: &Arc) -> Option> { + if &point.body.location.round != self.round() { + panic!("Coding error: dag round mismatches point round on add") + } + let dag_round = self.clone(); + let digest = &point.digest; + self.edit(&point.body.location.author, |loc| { + let state = loc.state().clone(); + let point = point.clone(); + loc.add_validate(digest, || Verifier::validate(point, dag_round)) + .map(|first| first.clone().map(|_| state).boxed()) + }) + } + + // Todo leave for genesis, use for own points in tests + pub async fn insert_exact_validate( + &self, + point: &Arc, + peer_schedule: &PeerSchedule, + ) -> InclusionState { + if !Verifier::verify(point, peer_schedule).is_ok() { + panic!("Coding error: malformed point") + } + let point = Verifier::validate(point.clone(), self.clone()).await; + if point.valid().is_none() { + panic!("Coding error: not a valid point") + } + let state = self.insert_exact(&point); + if let Some(signable) = state.signable() { + signable.sign( + self.round(), + peer_schedule.local_keys(self.round()).as_deref(), + MempoolConfig::sign_time_range(), + ); + } + if state.signed_point(self.round()).is_none() { + panic!("Coding or configuration error: valid point cannot be signed; time issue?") + } + state + } + + pub fn insert_invalid(&self, dag_point: &DagPoint) -> Option { + if dag_point.valid().is_some() { + panic!("Coding error: failed to insert valid point as invalid") + } + self.scan(&dag_point.location().round) + .map(|linked| linked.insert_exact(dag_point)) + } + + fn insert_exact(&self, dag_point: &DagPoint) -> InclusionState { + if &dag_point.location().round != self.round() { + panic!("Coding error: dag round mismatches point round on insert") + } + self.edit(&dag_point.location().author, |loc| { + _ = loc.add_validate(dag_point.digest(), || { + futures_util::future::ready(dag_point.clone()) + }); + loc.state().clone() + }) + } + + pub fn scan(&self, round: &Round) -> Option { + if round > self.round() { + panic!("Coding error: cannot add future point into DAG round with scan") + } + let mut visited = self.clone(); + if round == self.round() { + return Some(visited); + } + while let Some(dag_round) = visited.prev().get() { + match dag_round.round().cmp(&round) { + core::cmp::Ordering::Less => return None, + core::cmp::Ordering::Equal => return Some(dag_round), + core::cmp::Ordering::Greater => visited = dag_round, + } + } + None + } +} diff --git a/consensus/src/dag/mod.rs b/consensus/src/dag/mod.rs new file mode 100644 index 000000000..9ed08bb7b --- /dev/null +++ b/consensus/src/dag/mod.rs @@ -0,0 +1,12 @@ +pub use dag::*; +pub use dag_location::*; +pub use dag_round::*; +pub use producer::*; +pub use verifier::*; + +mod anchor_stage; +mod dag; +mod dag_location; +mod dag_round; +mod producer; +mod verifier; diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs new file mode 100644 index 000000000..5bbca736c --- /dev/null +++ b/consensus/src/dag/producer.rs @@ -0,0 +1,229 @@ +use std::collections::BTreeMap; +use std::sync::Arc; + +use bytes::Bytes; + +use tycho_network::PeerId; + +use crate::dag::anchor_stage::AnchorStage; +use crate::dag::DagRound; +use crate::models::{Link, Location, Point, PointBody, PrevPoint, Round, Through, UnixTime}; + +// FIXME make it PointBuilder +pub struct Producer; + +impl Producer { + pub async fn new_point( + finished_round: &DagRound, + new_round: &DagRound, + prev_point: Option<&PrevPoint>, + payload: Vec, + ) -> Option> { + let key_pair = new_round.key_pair()?; + let local_id = PeerId::from(key_pair.public_key); + match new_round.anchor_stage() { + Some(AnchorStage::Proof(peer_id) | AnchorStage::Trigger(peer_id)) + if peer_id == &local_id && prev_point.is_none() => + { + // wave leader must skip new round if it failed to produce 3 points in a row + return None; + } + _ => {} + }; + let includes = Self::includes(finished_round); + let mut anchor_trigger = Self::link_from_includes(&local_id, &new_round, &includes, true); + let mut anchor_proof = Self::link_from_includes(&local_id, &new_round, &includes, false); + let witness = Self::witness(finished_round); + Self::update_link_from_witness(&mut anchor_trigger, finished_round.round(), &witness, true); + Self::update_link_from_witness(&mut anchor_proof, finished_round.round(), &witness, false); + let time = Self::get_time( + finished_round, + &local_id, + &anchor_proof, + prev_point, + &includes, + &witness, + ) + .await; + let includes = includes + .into_iter() + .map(|point| (point.body.location.author, point.digest.clone())) + .collect::>(); + let witness = witness + .into_iter() + .map(|point| (point.body.location.author, point.digest.clone())) + .collect::>(); + Some(Arc::new( + PointBody { + location: Location { + round: new_round.round().clone(), + author: local_id.clone(), + }, + time, + payload, + proof: prev_point.cloned(), + includes, + witness, + anchor_trigger, + anchor_proof, + } + .wrap(&key_pair), + )) + } + + fn includes(finished_round: &DagRound) -> Vec> { + let includes = finished_round + .select(|(_, loc)| { + loc.state() + .signed_point(finished_round.round()) + .map(|valid| valid.point.clone()) + }) + .collect::>(); + assert!( + includes.iter().count() >= finished_round.node_count().majority(), + "Coding error: producing point with not enough includes, check Signer logic" + ); + includes + } + + fn witness(finished_round: &DagRound) -> Vec> { + if let Some(witness_round) = finished_round.prev().get() { + witness_round + .select(|(_, loc)| { + loc.state() + .signed_point(finished_round.round()) + .map(|valid| valid.point.clone()) + }) + .collect() + } else { + vec![] + } + } + + fn link_from_includes( + local_id: &PeerId, + new_round: &DagRound, + includes: &Vec>, + is_for_trigger: bool, + ) -> Link { + match new_round.anchor_stage() { + Some(AnchorStage::Trigger(leader_id)) if is_for_trigger && leader_id == local_id => { + Link::ToSelf + } + Some(AnchorStage::Proof(leader_id)) if !is_for_trigger && leader_id == local_id => { + Link::ToSelf + } + _ => { + // TODO simplify to single iterator scan + let point = includes + .iter() + .max_by_key(|point| { + if is_for_trigger { + point.anchor_trigger_round() + } else { + point.anchor_proof_round() + } + }) + .expect("non-empty list of includes for own point"); + if point.body.location.round == new_round.round().prev() { + Link::Direct(Through::Includes(point.body.location.author.clone())) + } else { + let to = if is_for_trigger { + point.anchor_trigger_id() + } else { + point.anchor_proof_id() + }; + Link::Indirect { + to, + path: Through::Includes(point.body.location.author.clone()), + } + } + } + } + } + + fn update_link_from_witness( + link: &mut Link, + finished_round: &Round, + witness: &Vec>, + is_for_trigger: bool, + ) { + let link_round = match link { + Link::ToSelf | Link::Direct(_) => return, + Link::Indirect { to, .. } => to.location.round, + }; + fn last_round(point: &Point, is_for_trigger: bool) -> Round { + if is_for_trigger { + point.anchor_trigger_round() + } else { + point.anchor_proof_round() + } + } + let Some(point) = witness + .iter() + .filter(|point| last_round(&point, is_for_trigger) > link_round) + .max_by_key(|point| last_round(&point, is_for_trigger)) + else { + return; + }; + if point.body.location.round == finished_round.prev() { + *link = Link::Direct(Through::Witness(point.body.location.author)) + } else { + let to = if is_for_trigger { + point.anchor_trigger_id() + } else { + point.anchor_proof_id() + }; + *link = Link::Indirect { + to, + path: Through::Witness(point.body.location.author), + } + }; + } + + async fn get_time( + finished_round: &DagRound, + local_id: &PeerId, + anchor_proof: &Link, + prev_point: Option<&PrevPoint>, + includes: &Vec>, + witness: &Vec>, + ) -> UnixTime { + let mut time = UnixTime::now(); + if let Some(prev_point) = prev_point { + if let Some(valid) = finished_round + .valid_point_exact(&local_id, &prev_point.digest) + .await + { + time = valid.point.body.time.clone().max(time); + } + } + match anchor_proof { + Link::ToSelf => {} + Link::Direct(through) => { + let (peer_id, through) = match through { + Through::Includes(peer_id) => (peer_id, &includes), + Through::Witness(peer_id) => (peer_id, &witness), + }; + if let Some(point) = through + .iter() + .find(|point| point.body.location.author == peer_id) + { + time = point.body.time.clone().max(time); + } + } + Link::Indirect { to, .. } => { + // it's sufficient to check prev point - it can't have newer anchor proof + if prev_point.is_none() { + if let Some(valid) = finished_round.valid_point(&to).await { + time = valid.point.body.time.clone().max(time); + } else { + panic!("last anchor proof must stay in DAG until its payload is committed") + } + } + } + } + // TODO maybe take the greatest time among all point's dependencies - as they must be signed + time + } +} diff --git a/consensus/src/engine/verifier.rs b/consensus/src/dag/verifier.rs similarity index 61% rename from consensus/src/engine/verifier.rs rename to consensus/src/dag/verifier.rs index 164279973..7e407ea2a 100644 --- a/consensus/src/engine/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -1,17 +1,14 @@ use std::sync::Arc; -use futures_util::future; use futures_util::FutureExt; use tokio::task::JoinSet; use tycho_network::PeerId; -use tycho_util::futures::{JoinTask, Shared}; -use crate::engine::dag::{AnchorStage, DagPoint, DagRound, IndexedPoint}; -use crate::engine::node_count::NodeCount; -use crate::engine::peer_schedule::PeerSchedule; -use crate::models::point::{Digest, Link, Location, Point}; -use crate::tasks::downloader::DownloadTask; +use crate::dag::anchor_stage::AnchorStage; +use crate::dag::DagRound; +use crate::intercom::{Downloader, PeerSchedule}; +use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, ValidPoint}; /* Note on equivocation. @@ -30,82 +27,85 @@ Anyway, no more than one of equivocated points may become a vertex. pub struct Verifier; impl Verifier { - // todo outside, for points to sign only: check time bounds before validation, sign only Trusted - // todo: shallow verification during sync to close a gap, trusting first vertex contents: + // FIXME outside, for points to sign only: check time bounds before validation, sign only Trusted + // FIXME shallow verification during sync to close a gap, trusting first vertex contents: // take any vertex and its proof point, check signatures for the vertex, // and use all vertex dependencies recursively as Trusted without any checks // with 3-rounds-wide sliding window that moves backwards - pub fn verify( - r_0 /* r+0 */: Arc, - point /* @ r+0 */: Box, - peer_schedule: &PeerSchedule, - ) -> JoinTask { - if &point.body.location.round != &r_0.round { - panic! {"Coding error: dag round mismatches point round"} - } + /// the first and mandatory check of any Point received no matter where from + pub fn verify(point: &Arc, peer_schedule: &PeerSchedule) -> Result<(), DagPoint> { if !point.is_integrity_ok() { - let not_exists = DagPoint::NotExists(Arc::new(point.id())); // cannot use point body - return JoinTask::new(future::ready(not_exists)); + Err(DagPoint::NotExists(Arc::new(point.id()))) // cannot use point body + } else if !(point.is_well_formed() && Self::is_list_of_signers_ok(point, peer_schedule)) { + // the last task spawns if ok - in order not to walk through every dag round twice + Err(DagPoint::Invalid(point.clone())) + } else { + Ok(()) + } + } + + /// must be called iff [Self::verify] succeeded + pub async fn validate(point /* @ r+0 */: Arc, r_0 /* r+0 */: DagRound) -> DagPoint { + // TODO upgrade Weak whenever used to let Dag Round drop if some future hangs up for long + if &point.body.location.round != r_0.round() { + panic!("Coding error: dag round mismatches point round") } + let mut dependencies = JoinSet::new(); if !({ - point.is_well_formed() - && Self::is_self_links_ok(&point, &r_0) - && Self::is_list_of_signers_ok(&point, peer_schedule) + Self::is_self_links_ok(&point, &r_0) // the last task spawns if ok - in order not to walk through every dag round twice - && Self::add_anchor_links_if_ok(&point, r_0.clone(), &mut dependencies) + && Self::add_anchor_links_if_ok(&point, &r_0, &mut dependencies) }) { - let invalid = DagPoint::Invalid(Arc::new(*point)); - return JoinTask::new(future::ready(invalid)); + return DagPoint::Invalid(point.clone()); } - if let Some(r_1) = r_0.prev.upgrade() { + if let Some(r_1) = r_0.prev().get() { Self::gather_deps(&point, &r_1, &mut dependencies); - return JoinTask::new(Self::check_deps(point, dependencies)); + return Self::check_deps(&point, dependencies).await; } // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. // Any point @ r+0 will be committed, only if it has valid proof @ r+1 // included into valid anchor chain, i.e. validated by consensus. - let trusted = DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))); - JoinTask::new(future::ready(trusted)) + DagPoint::Trusted(ValidPoint::new(point.clone())) } fn is_self_links_ok(point /* @ r+0 */: &Point, dag_round /* r+0 */: &DagRound) -> bool { - // existence of proofs in leader blocks is a part of point's well-form-ness check - match &dag_round.anchor_stage { + // existence of proofs in leader points is a part of point's well-form-ness check + match &dag_round.anchor_stage() { // no one may link to self None | Some(AnchorStage::Candidate(_)) => { - point.body.last_anchor_proof != Link::ToSelf - && point.body.last_anchor_trigger != Link::ToSelf + point.body.anchor_proof != Link::ToSelf && point.body.anchor_trigger != Link::ToSelf } // leader must link to own point while others must not Some(AnchorStage::Proof(leader_id)) => { (leader_id == point.body.location.author) - == (point.body.last_anchor_proof == Link::ToSelf) + == (point.body.anchor_proof == Link::ToSelf) } Some(AnchorStage::Trigger(leader_id)) => { (leader_id == point.body.location.author) - == (point.body.last_anchor_trigger == Link::ToSelf) + == (point.body.anchor_trigger == Link::ToSelf) } } } /// may visit every DAG round kept in memory fn add_anchor_links_if_ok( - point: &Point, // @ r+0 - mut dag_round: Arc, // start with r+0 + point: &Point, // @ r+0 + dag_round: &DagRound, // start with r+0 dependencies: &mut JoinSet, ) -> bool { let mut links = vec![ - (point.last_anchor_proof_id(), false), - (point.last_anchor_trigger_id(), true), + (point.anchor_proof_id(), false), + (point.anchor_trigger_id(), true), ]; let mut linked_with_round = Vec::with_capacity(2); + let mut dag_round = dag_round.clone(); while !links.is_empty() { links.retain(|(linked, is_trigger)| { - let found = linked.location.round == dag_round.round; + let found = &linked.location.round == dag_round.round(); if found { - match (&dag_round.anchor_stage, is_trigger) { + match (&dag_round.anchor_stage(), is_trigger) { // AnchorStage::Candidate(_) requires nothing special (Some(AnchorStage::Proof(leader_id)), false) if leader_id == linked.location.author => {} @@ -121,7 +121,7 @@ impl Verifier { } !found }); - if dag_round.prev.upgrade().map(|r| dag_round = r).is_none() { + if dag_round.prev().get().map(|r| dag_round = r).is_none() { // if links in point exceed DAG depth, consider them valid by now; // either dependencies have more recent link and point will be invalidated later, // or author was less successful to get fresh data and did not commit for long @@ -133,7 +133,7 @@ impl Verifier { // while we need to get invalid ones to blame current point for (author, digest, dag_round) in linked_with_round { // skip self links - if dag_round.round < point.body.location.round { + if dag_round.round() < &point.body.location.round { // will add the same point from direct dependencies twice, // we can do better but nothing terrible Self::add_dependency(&author, &digest, &dag_round, dependencies); @@ -148,13 +148,8 @@ impl Verifier { round: &DagRound, dependencies: &mut JoinSet, ) { - let mut loc = round.locations.entry(*node).or_default(); - let fut = loc - .versions - .entry(digest.clone()) - .or_insert_with(|| Shared::new(JoinTask::new(DownloadTask {}))) - .clone(); - dependencies.spawn(fut.map(|a| a.0)); + let shared = round.edit(node, |loc| loc.add_dependency(digest, || Downloader {})); + dependencies.spawn(shared.map(|(dag_point, _)| dag_point)); } fn gather_deps( @@ -162,25 +157,24 @@ impl Verifier { r_1 /* r-1 */: &DagRound, dependencies: &mut JoinSet, ) { - if let Some(loc) = r_1.locations.get(&point.body.location.author) { - // to check for equivocation or mandatory skip of a round - for version in loc.versions.values() { - dependencies.spawn(version.clone().map(|a| a.0)); + r_1.view(&point.body.location.author, |loc| { + for (_, shared) in loc.versions() { + dependencies.spawn(shared.clone().map(|(dag_point, _)| dag_point)); } - } + }); for (node, digest) in &point.body.includes { // integrity check passed, so includes contain author's prev point proof Self::add_dependency(&node, &digest, &r_1, dependencies); } - if let Some(r_2) = r_1.prev.upgrade() { + if let Some(r_2) = r_1.prev().get() { for (node, digest) in &point.body.witness { Self::add_dependency(&node, &digest, &r_2, dependencies); } }; } - async fn check_deps(point: Box, mut dependencies: JoinSet) -> DagPoint { + async fn check_deps(point: &Arc, mut dependencies: JoinSet) -> DagPoint { // point is well-formed if we got here, so point.proof matches point.includes let proven_vertex = point.body.proof.as_ref().map(|p| &p.digest).clone(); let prev_loc = Location { @@ -194,10 +188,10 @@ impl Verifier { // Invalid dependency is the author's fault. let mut is_suspicious = false; // last is meant to be the last among all dependencies - let anchor_trigger_id = point.last_anchor_trigger_id(); - let anchor_proof_id = point.last_anchor_proof_id(); - let anchor_trigger_through = point.last_anchor_trigger_through(); - let anchor_proof_through = point.last_anchor_proof_through(); + let anchor_trigger_id = point.anchor_trigger_id(); + let anchor_proof_id = point.anchor_proof_id(); + let anchor_trigger_through = point.anchor_trigger_through(); + let anchor_proof_through = point.anchor_proof_through(); while let Some(res) = dependencies.join_next().await { match res { Ok(DagPoint::Trusted(valid) | DagPoint::Suspicious(valid)) => { @@ -205,30 +199,30 @@ impl Verifier { match proven_vertex { Some(vertex_digest) if &valid.point.digest == vertex_digest => { if !Self::is_proof_ok(&point, &valid.point) { - return DagPoint::Invalid(Arc::new(*point)); + return DagPoint::Invalid(point.clone()); } // else: ok proof } Some(_) => is_suspicious = true, // equivocation // the author must have provided the proof in current point - None => return DagPoint::Invalid(Arc::new(*point)), + None => return DagPoint::Invalid(point.clone()), } } // else: valid dependency - if valid.point.last_anchor_trigger_round() > anchor_trigger_id.location.round - || valid.point.last_anchor_proof_round() > anchor_proof_id.location.round + if valid.point.anchor_trigger_round() > anchor_trigger_id.location.round + || valid.point.anchor_proof_round() > anchor_proof_id.location.round { // did not actualize the chain - return DagPoint::Invalid(Arc::new(*point)); + return DagPoint::Invalid(point.clone()); } let valid_point_id = valid.point.id(); if ({ valid_point_id == anchor_trigger_through - && valid.point.last_anchor_trigger_id() != anchor_trigger_id + && valid.point.anchor_trigger_id() != anchor_trigger_id }) || ({ valid_point_id == anchor_proof_through - && valid.point.last_anchor_proof_id() != anchor_proof_id + && valid.point.anchor_proof_id() != anchor_proof_id }) { // path does not lead to destination - return DagPoint::Invalid(Arc::new(*point)); + return DagPoint::Invalid(point.clone()); } if valid_point_id == anchor_proof_id && point.body.time < valid.point.body.time { @@ -238,75 +232,72 @@ impl Verifier { // The time of candidate's valid proof exactly satisfies such requirement: // it either will be signed by majority (what unblocks the commit trigger), // or the valid trigger will not be created. - return DagPoint::Invalid(Arc::new(*point)); + return DagPoint::Invalid(point.clone()); } } Ok(DagPoint::Invalid(invalid)) => { if prev_loc == invalid.body.location { match proven_vertex { Some(vertex_digest) if &invalid.digest == vertex_digest => { - return DagPoint::Invalid(Arc::new(*point)) + return DagPoint::Invalid(point.clone()) } Some(_) => is_suspicious = true, // equivocation // the author must have skipped previous round - None => return DagPoint::Invalid(Arc::new(*point)), + None => return DagPoint::Invalid(point.clone()), } } else { - return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency + return DagPoint::Invalid(point.clone()); // just invalid dependency } } Ok(DagPoint::NotExists(not_exists)) => { if prev_loc == not_exists.location { match proven_vertex { Some(vertex_digest) if ¬_exists.digest == vertex_digest => { - return DagPoint::Invalid(Arc::new(*point)) + return DagPoint::Invalid(point.clone()) } _ => {} // dependency of some other point; we've banned that sender } } else { - return DagPoint::Invalid(Arc::new(*point)); // just invalid dependency + return DagPoint::Invalid(point.clone()); // just invalid dependency } } Err(e) => { if e.is_panic() { std::panic::resume_unwind(e.into_panic()); } - unreachable!(); } } } if is_suspicious { - DagPoint::Suspicious(Arc::new(IndexedPoint::new(*point))) + DagPoint::Suspicious(ValidPoint::new(point.clone())) } else { - DagPoint::Trusted(Arc::new(IndexedPoint::new(*point))) + DagPoint::Trusted(ValidPoint::new(point.clone())) } } /// blame author and every dependent point's author fn is_list_of_signers_ok(point /* @ r+0 */: &Point, peer_schedule: &PeerSchedule) -> bool { - let Some(proof /* @ r-1 */) = &point.body.proof else { + let Some(proven /* @ r-1 */) = &point.body.proof else { return true; }; - let [ - same_round_peers/* @ r-1 */, - next_round_peers/* @ r+0 */ - ] = peer_schedule.peers_for_array([ - point.body.location.round.prev(), - point.body.location.round - ]); - //TODO may there be a problem ? - // the size of required validator set is determined by point's round, - // but if the next round is a new epoch start, amount of available signers may change + // Every point producer @ r-1 must prove its delivery to 2/3+1 producers @ r+0 + // inside proving point @ r+0. + + // If author was in validator set @ r-1 and is not in validator set @ r+0, + // its point @ r-1 won't become a vertex because its proof point @ r+0 cannot be valid. + // That means: payloads from the last round of validation epoch are never collated. - // may include author's signature already contained in proven point, no matter - if proof.evidence.len() < NodeCount::new(same_round_peers.len()).into() { + let proof_round_peers /* @ r+0 */ = peer_schedule.peers_for(&point.body.location.round); + // reject point in case this node is not ready to accept: the point is from far future + let Ok(node_count) = NodeCount::try_from(proof_round_peers.len()) else { + return false; + }; + if proven.evidence.len() < node_count.majority_of_others() { return false; } - for (peer, _) in proof.evidence.iter() { - if !(same_round_peers.contains_key(peer) || next_round_peers.contains_key(peer)) { - // two validator sets are the same except the first round of a new epoch; - // unexpected peer, thus invalid + for (peer, _) in proven.evidence.iter() { + if !proof_round_peers.contains_key(peer) { return false; } } @@ -316,28 +307,28 @@ impl Verifier { /// blame author and every dependent point's author fn is_proof_ok(point /* @ r+0 */: &Point, proven: &Point /* @ r-1 */) -> bool { if point.body.location.author != proven.body.location.author { - unreachable! {"Coding error: mismatched authors of proof and its vertex"} + panic!("Coding error: mismatched authors of proof and its vertex") } if point.body.location.round.prev() != proven.body.location.round { - unreachable! {"Coding error: mismatched rounds of proof and its vertex"} + panic!("Coding error: mismatched rounds of proof and its vertex") } let Some(proof) = &point.body.proof else { - unreachable! {"Coding error: passed point doesn't contain proof for a given vertex"} + panic!("Coding error: passed point doesn't contain proof for a given vertex") }; if proof.digest != proven.digest { - unreachable! {"Coding error: mismatched previous point of the same author"} + panic!("Coding error: mismatched previous point of the same author") } if point.body.time < proven.body.time { return false; // time must be non-decreasing by the same author } let Some(body) = bincode::serialize(&proven.body).ok() else { // should be removed after move to TL - unreachable! {"Library error: failed to serialize point body"} + panic!("Library error: failed to serialize proven point body") }; for (peer, sig) in proof.evidence.iter() { let Some(pubkey) = peer.as_public_key() else { - // should have been validated outside mempool - unreachable! {"Config error: failed to convert peer id into public key"} + // should have been validated prior validator elections + panic!("Config error: failed to convert peer id into public key") }; let sig: Result<[u8; 64], _> = sig.0.to_vec().try_into(); let Some(sig) = sig.ok() else { diff --git a/consensus/src/engine/dag.rs b/consensus/src/engine/dag.rs deleted file mode 100644 index da6df62e8..000000000 --- a/consensus/src/engine/dag.rs +++ /dev/null @@ -1,304 +0,0 @@ -use std::collections::{btree_map, BTreeMap, VecDeque}; -use std::num::NonZeroU8; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, OnceLock, Weak}; - -use ahash::RandomState; -use rand::{Rng, SeedableRng}; - -use tycho_network::PeerId; -use tycho_util::futures::{JoinTask, Shared}; -use tycho_util::FastDashMap; - -use crate::engine::node_count::NodeCount; -use crate::engine::peer_schedule::PeerSchedule; -use crate::engine::verifier::Verifier; -use crate::models::point::{Digest, Point, PointId, Round, Signature}; - -pub struct IndexedPoint { - pub point: Point, - // proof_for: Option>, - // includes: Vec>, - // witness: Vec>, - pub is_committed: AtomicBool, -} - -impl IndexedPoint { - pub fn new(point: Point) -> Self { - Self { - point, - is_committed: AtomicBool::new(false), - } - } -} - -#[derive(Clone)] -pub enum DagPoint { - // valid without demur, needed to blame equivocation or graph connectivity violations - Trusted(Arc), - // is a valid container, but we doubt author's fairness at the moment of validating; - // we do not sign such point, but others may include it without consequences; - // consensus will decide whether to sign its proof or not; we shall ban the author - Suspicious(Arc), - Invalid(Arc), // invalidates dependent point; needed to blame equivocation - NotExists(Arc), // invalidates dependent point; blame author of dependent point -} - -impl DagPoint { - pub fn is_valid(&self) -> bool { - match self { - DagPoint::Trusted(_) => true, - DagPoint::Suspicious(_) => true, - _ => false, - } - } - - pub fn valid(&self) -> Option> { - match self { - DagPoint::Trusted(point) => Some(point.clone()), - DagPoint::Suspicious(point) => Some(point.clone()), - _ => None, - } - } -} - -#[derive(Default)] -pub struct DagLocation { - // one of the points at current location - // was proven by the next point of a node; - // even if we marked this point as invalid, consensus may override our decision - // and we will have to sync - /* vertex: Option, */ - // we can sign just a single point at the current location; - // other (equivocated) points may be received as includes, witnesses or a proven vertex; - // we have to include signed points as dependencies in our next block - signed_by_me: OnceLock<(Digest, Round, Signature)>, - // only one of the point versions at current location - // may become proven by the next round point(s) of a node; - // even if we marked a proven point as invalid, consensus may override our decision - pub versions: BTreeMap>>, -} - -pub enum AnchorStage { - Candidate(PeerId), - Proof(PeerId), - Trigger(PeerId), -} - -impl AnchorStage { - pub fn of(round: Round, peer_schedule: &PeerSchedule) -> Option { - const WAVE_SIZE: u32 = 4; - let anchor_candidate_round = (round.0 / WAVE_SIZE) * WAVE_SIZE + 1; - - let [leader_peers, current_peers] = - peer_schedule.peers_for_array([Round(anchor_candidate_round), round]); - // reproducible global coin - let leader_index = rand_pcg::Pcg32::seed_from_u64(anchor_candidate_round as u64) - .gen_range(0..leader_peers.len()); - let Some(leader) = leader_peers - .iter() - .nth(leader_index) - .map(|(peer_id, _)| peer_id) - else { - panic!("Fatal: selecting a leader from an empty validator set") - }; - if !current_peers.contains_key(leader) { - return None; - }; - match round.0 % WAVE_SIZE { - 0 => None, // both genesis and trailing (proof inclusion) round - 1 => Some(AnchorStage::Candidate(leader.clone())), - 2 => Some(AnchorStage::Proof(leader.clone())), - 3 => Some(AnchorStage::Trigger(leader.clone())), - _ => unreachable!(), - } - } -} - -pub struct DagRound { - pub round: Round, - node_count: NodeCount, - pub anchor_stage: Option, - pub locations: FastDashMap, - pub prev: Weak, -} - -impl DagRound { - fn new(round: Round, peer_schedule: &PeerSchedule, prev: Option>) -> Self { - let peers = peer_schedule.peers_for(round); - let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); - Self { - round, - node_count: NodeCount::new(peers.len()), - anchor_stage: AnchorStage::of(round, peer_schedule), - locations, - prev: prev.unwrap_or_else(|| Weak::new()), - } - } - - pub async fn valid_point(&self, node: &PeerId, digest: &Digest) -> Option> { - let point_fut = { - let location = self.locations.get(node)?; - location.versions.get(digest)?.clone() - }; - point_fut.await.0.valid() - } - - pub fn add( - self: Arc, - point: Box, - peer_schedule: &PeerSchedule, - ) -> Shared> { - if &point.body.location.round != &self.round { - panic! {"Coding error: dag round mismatches point round"} - } - - let mut location = self - .locations - .entry(point.body.location.author) - .or_default(); - - match location.versions.entry(point.digest.clone()) { - btree_map::Entry::Occupied(entry) => entry.get().clone(), - btree_map::Entry::Vacant(entry) => entry - .insert(Shared::new(Verifier::verify( - self.clone(), - point, - peer_schedule, - ))) - .clone(), - } - } -} - -pub struct Dag { - current: Round, - // from the oldest to the current round; newer ones are in the future - rounds: BTreeMap>, - peer_schedule: PeerSchedule, -} - -impl Dag { - pub fn new(round: Round, peer_schedule: PeerSchedule) -> Self { - Self { - current: round, - rounds: BTreeMap::from([(round, Arc::new(DagRound::new(round, &peer_schedule, None)))]), - peer_schedule, - } - } - - // TODO new point is checked against the dag only if it has valid sig, time and round - // TODO download from neighbours - pub fn fill_up_to(&mut self, round: Round) { - match self.rounds.last_key_value() { - None => unreachable!("DAG empty"), - Some((last, _)) => { - for round in (last.0..round.0).into_iter().map(|i| Round(i + 1)) { - let prev = self.rounds.last_key_value().map(|(_, v)| Arc::downgrade(v)); - self.rounds.entry(round).or_insert_with(|| { - Arc::new(DagRound::new(round, &self.peer_schedule, prev)) - }); - } - } - } - } - - // TODO the next "little anchor candidate that could" must have at least full dag depth - pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroU8) { - if let Some(tail) = anchor_at.0.checked_sub(dag_depth.get() as u32) { - self.rounds = self.rounds.split_off(&Round(tail)); - }; - } - - pub async fn vertex_by(&self, proof: &IndexedPoint) -> Option> { - let digest = &proof.point.body.proof.as_ref()?.digest; - let round = proof.point.body.location.round.prev(); - let dag_round = self.rounds.get(&round)?; - dag_round - .valid_point(&proof.point.body.location.author, digest) - .await - } - - // @return historically ordered vertices (back to front is older to newer) - pub async fn gather_uncommitted( - &self, - anchor_trigger: &IndexedPoint, - // dag_depth: usize, - ) -> VecDeque> { - // anchor must be a vertex @ r+1, proven with point @ r+2 - let Some(anchor_proof) = self.vertex_by(&anchor_trigger).await else { - panic!( - "Coding error: anchor trigger @ {} is not in DAG", - &anchor_trigger.point.body.location.round.0 - ); - }; - _ = anchor_trigger; // no more needed for commit - let Some(anchor) = self.vertex_by(&anchor_proof).await else { - panic!( - "Coding error: anchor proof @ {} is not in DAG", - &anchor_proof.point.body.location.round.0 - ); - }; - _ = anchor_proof; // no more needed for commit - - let mut cur_includes_round = anchor.point.body.location.round.prev(); /* r+0 */ - - let mut r = [ - anchor.point.body.includes.clone(), // points @ r+0 - anchor.point.body.witness.clone(), // points @ r-1 - BTreeMap::new(), // points @ r-2 - BTreeMap::new(), // points @ r-3 - ]; - _ = anchor; // anchor payload will be committed the next time - - let mut uncommitted = VecDeque::new(); - - // TODO visited rounds count must be equal to dag depth: - // read/download non-existent rounds and drop too old ones - while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self - .rounds - .get(&cur_includes_round) - .and_then(|cur| cur.prev.upgrade().map(|prev| (cur, prev))) - .filter(|_| !r.iter().all(BTreeMap::is_empty)) - { - // take points @ r+0, and select their vertices @ r-1 for commit - // the order is of NodeId (public key) - while let Some((node, digest)) = &r[0].pop_first() { - // Every point must be valid (we've validated anchor dependencies already), - // but some points don't have previous one to proof as vertex. - // Any valid point among equivocated will do, as they include the same vertex. - if let Some(proof /* point @ r+0 */) = proof_round.valid_point(node, digest).await { - if proof.is_committed.load(Ordering::Acquire) { - continue; - } - let author = &proof.point.body.location.author; - r[1].extend(proof.point.body.includes.clone()); // points @ r-1 - r[2].extend(proof.point.body.witness.clone()); // points @ r-2 - let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { - continue; - }; - if let Some(vertex /* point @ r-1 */) = vertex_round - .valid_point(author, &digest) - .await - // select uncommitted ones, marking them as committed - // to exclude from the next commit - .filter(|vertex| { - vertex - .is_committed - .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) - .is_ok() - }) - { - // vertex will be skipped in r_1 as committed - r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 - r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 - uncommitted.push_back(vertex); // LIFO - } - } - } - cur_includes_round = vertex_round.round; // next r+0 - r.rotate_left(1); - } - uncommitted - } -} diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs new file mode 100644 index 000000000..92fad8dbe --- /dev/null +++ b/consensus/src/engine/engine.rs @@ -0,0 +1,247 @@ +use std::sync::Arc; + +use everscale_crypto::ed25519::{KeyPair, SecretKey}; +use tokio::sync::{mpsc, Notify}; + +use tycho_network::{DhtClient, OverlayService, PeerId}; + +use crate::dag::{DagRound, Producer}; +use crate::intercom::{ + BroadcastFilter, Broadcaster, Dispatcher, PeerSchedule, PeerScheduleUpdater, Responder, Signer, +}; +use crate::models::{Point, PrevPoint}; + +pub struct Engine { + // dag: Arc>, + peer_schedule: Arc, + dispatcher: Dispatcher, + finished_dag_round: DagRound, + signer: Signer, + prev_point: Option, + cur_point: Option>, +} + +impl Engine { + pub async fn add_next_peers(&self, next_peers: Vec) {} + + pub async fn new( + secret_key: &SecretKey, + dht_client: &DhtClient, + overlay_service: &OverlayService, + peers: &Vec, + ) -> Self { + let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(KeyPair::from(secret_key)))); + + let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); + + let broadcast_filter = BroadcastFilter::new(peer_schedule.clone(), bcast_tx); + + let (sig_requests, sig_responses) = mpsc::unbounded_channel(); + + let dispatcher = Dispatcher::new( + &dht_client, + &overlay_service, + peers, + Responder::new(broadcast_filter.clone(), sig_requests), + ); + + let genesis = Arc::new(crate::test_utils::genesis()); + // finished epoch + peer_schedule.set_next_peers(&vec![genesis.body.location.author]); + peer_schedule.set_next_start(genesis.body.location.round); + peer_schedule.rotate(); + // current epoch + peer_schedule.set_next_start(genesis.body.location.round.next()); + peer_schedule.set_next_peers(peers); + peer_schedule.rotate(); + // start updater only after peers are populated into schedule + PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); + + // tOdO define if the last round is finished based on peer schedule + // move out from bcaster & signer ? where to get our last point from ? + + // tOdO в конце каждого раунда берем точку с триггером + // и комиттим + // * either own point contains Trigger + // * or search through last round to find the latest trigger + // * * can U do so without scan of a round ??? + + let finished_dag_round = DagRound::genesis(&genesis, &peer_schedule).await; + let signer = Signer::new(bcast_rx, sig_responses, finished_dag_round.round()); + + Self { + // dag: Arc::new(Mutex::new(dag)), + peer_schedule, + dispatcher, + finished_dag_round, + signer, + prev_point: None, + cur_point: None, + } + } + + pub async fn run(mut self) { + loop { + // FIXME must there be any next round as in Signer? check broadcast filter + let current_round = self.finished_dag_round.next(self.peer_schedule.as_ref()); + + self.cur_point = Producer::new_point( + &self.finished_dag_round, + ¤t_round, + self.prev_point.as_ref(), + vec![], + ) + .await; + + let bcaster_ready = Arc::new(Notify::new()); + // let this channel unbounded - there won't be many items, but every of them is essential + let (signer_signal_tx, mut signer_signal_rx) = mpsc::unbounded_channel(); + + // TODO change round, then + // apply peer schedule and config changes if some + // spawn signer + // spawn producer + broadcaster + // spawn commit + drop dag tail (async?!) into futures ordered + // it shouldn't take longer than round; + // the other way it should make the change of rounds slower, + // in order to prevent unlimited DAG growth + // sync if signer detected a gap exceeding dag depth + // join + if let Some(own_point) = &self.cur_point { + let own_state = current_round + .insert_exact_validate(&own_point, &self.peer_schedule) + .await; + let signer_run = tokio::spawn(self.signer.run( + current_round.clone(), + Some(own_point.clone()), + signer_signal_tx, + bcaster_ready.clone(), + )); + let bcaster_run = tokio::spawn( + Broadcaster::new( + &own_point, + &self.dispatcher, + &self.peer_schedule, + bcaster_ready, + signer_signal_rx, + ) + .run(), + ); + let joined = tokio::join!(signer_run, bcaster_run); + match joined { + (Ok(signer_upd), Ok(evidence_or_reject)) => { + self.signer = signer_upd; + self.finished_dag_round = current_round; // FIXME must fill gaps with empty rounds + self.prev_point = evidence_or_reject.ok().map(|evidence| PrevPoint { + digest: own_point.digest.clone(), + evidence, + }); + } + (Err(se), Err(be)) => { + panic!( + "Both Signer and Broadcaster panicked. Signer: {se:?}. Broadcaster: {be:?}" + ) + } + (Err(se), _) => { + panic!("Signer panicked: {se:?}") + } + (_, Err(be)) => { + panic!("Broadcaster panicked: {be:?}") + } + } + } else { + signer_signal_rx.close(); + bcaster_ready.notify_one(); + let signer_run = tokio::spawn(self.signer.run( + current_round.clone(), + None, + signer_signal_tx, + bcaster_ready, + )) + .await; + match signer_run { + Ok(signer_upd) => { + self.finished_dag_round = current_round; // FIXME must fill gaps with empty rounds + self.signer = signer_upd; + self.prev_point = None; + } + Err(se) => panic!("Signer panicked: {se:?}"), + } + } + } + } +} + +pub trait EngineTestExt { + fn dispatcher(&self) -> &'_ Dispatcher; +} + +impl EngineTestExt for Engine { + fn dispatcher(&self) -> &'_ Dispatcher { + &self.dispatcher + } +} + +// task 0: continue from where we stopped +// * load last state into DAG: some (un)finished round +// * create new round and point, if last round is finished +// -> start 1 & 2 +// +// (always) +// task 1: accept broadcasts though filter +// +// (@ r+0 iff in peer schedule for r+0) +// task 2: broadcast + ask for signatures (to/from peers scheduled @ r+1) +// (to support point receivers, even if "me" is not in schedule @ r+1) +// +// (@ r+0 iff in peer schedule for r+1) +// task 3: respond to signature requests (from peers @ [r-1; r+0]) +// (point authors must reject signatures they consider invalid) +// (new nodes at the beginning of a new validation epoch +// must sign points from the last round of a previous epoch) +// (fast nodes that reached the end of their validation epoch +// must continue to sign points of lagging nodes +// until new validator set starts producing its shard-blocks - +// they cannot finish the last round by counting signatures +// and will advance by receiving batch of points from broadcast filter) + +/* +async fn produce( + &self, + finished_round: &Arc, + prev_point: Option, + payload: Vec, + peer_schedule: &PeerSchedule, +) -> Option { + let new_round = Arc::new(finished_round.next(peer_schedule)); + self.broadcast_filter.advance_round(new_round.round()).await; + + if let Some(for_next_point) = self.peer_schedule.local_keys(&new_round.round().next()) { + // respond to signature requests (mandatory inclusions) + // _ = Signer::consume_broadcasts(filtered_rx, new_round.clone()); + // _ = Signer::on_validated(filtered_rx, new_round.clone(), Some(on_validated_tx)); + + if let Some(for_witness) = self.peer_schedule.local_keys(new_round.round()) { + // respond to signature requests to be included as witness + }; + } else { + // consume broadcasts without signing them + // _ = Signer::consume_broadcasts(filtered_rx, new_round.clone()); + }; + if let Some(for_current_point) = self.peer_schedule.local_keys(new_round.round()) { + let point = Producer::create_point( + finished_round, + &new_round, + &for_current_point, + prev_point, + payload, + ) + .await; + let bcaster = Broadcaster::new(&point, dispatcher, peer_schedule); + _ = bcaster.run().await; + // broadcast, gather signatures as a mean of delivery (even if not producing next block) + Some(point) + } else { + None + } +}*/ diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs new file mode 100644 index 000000000..05bb8f2fc --- /dev/null +++ b/consensus/src/engine/mempool_config.rs @@ -0,0 +1,29 @@ +use std::ops::RangeInclusive; +use std::time::Duration; + +use crate::models::UnixTime; + +pub struct MempoolConfig; + +impl MempoolConfig { + /// how far a signed point (by the time in its body) + /// may be in the future compared with local (wall) time + const CLOCK_SKEW: UnixTime = UnixTime::from_millis(5 * 1000); + /// how long a point from past remains eligible for signature and inclusion; + /// time in point body is compared with wall time; + /// if consensus makes no progress for such long, it will need a manual restart from a new genesis + const MAX_OUTDATED: UnixTime = UnixTime::from_millis(24 * 60 * 60 * 1000); + + /// see [CLOCK_SKEW](Self::CLOCK_SKEW) and [MAX_OUTDATED](Self::MAX_OUTDATED) + pub fn sign_time_range() -> RangeInclusive { + let now = UnixTime::now(); + now - Self::MAX_OUTDATED..=now + Self::CLOCK_SKEW + } + + /// we try to gather as many points and signatures as we can within some time frame; + /// this is a tradeoff between breaking on exactly 2F+1 elements + /// (dependencies and/or signatures), and waiting for slow nodes + pub const RETRY_INTERVAL: Duration = Duration::from_millis(100); + + const DAG_DEPTH: usize = 20; +} diff --git a/consensus/src/engine/mod.rs b/consensus/src/engine/mod.rs index 4ac072def..93b8c357c 100644 --- a/consensus/src/engine/mod.rs +++ b/consensus/src/engine/mod.rs @@ -1,5 +1,5 @@ -pub mod dag; -pub mod node_count; -pub mod peer_schedule; -pub mod threshold_clock; -pub mod verifier; +pub use engine::*; +pub use mempool_config::*; + +mod engine; +mod mempool_config; diff --git a/consensus/src/engine/node_count.rs b/consensus/src/engine/node_count.rs deleted file mode 100644 index 12e94e243..000000000 --- a/consensus/src/engine/node_count.rs +++ /dev/null @@ -1,37 +0,0 @@ -#[derive(Copy, Clone)] -pub struct NodeCount(u8); - -impl From for usize { - fn from(count: NodeCount) -> Self { - count.0 as usize - } -} - -impl NodeCount { - pub fn new(total_peers: usize) -> Self { - if total_peers < 3 { - panic!("Fatal: node count {total_peers} < 3"); - } - let count = ((total_peers + 2) / 3) * 3 + 1; - let count = u8::try_from(count).unwrap_or_else(|e| { - panic!("Fatal: node count {total_peers} exceeds u8 after rounding to 3F+1: {e:?}"); - }); - NodeCount(count) - } - - pub fn majority_with_me(&self) -> Self { - Self((self.0 / 3) * 2 + 1) - } - - pub fn majority_except_me(&self) -> Self { - Self((self.0 / 3) * 2) - } - - pub fn reliable_minority(&self) -> Self { - Self(self.0 / 3 + 1) - } - - pub fn unreliable(&self) -> Self { - Self(self.0 / 3) - } -} diff --git a/consensus/src/engine/threshold_clock.rs b/consensus/src/engine/threshold_clock.rs deleted file mode 100644 index 1d27cbbea..000000000 --- a/consensus/src/engine/threshold_clock.rs +++ /dev/null @@ -1,10 +0,0 @@ -use tycho_network::PeerId; -use tycho_util::FastHashSet; - -use crate::models::point::Round; - -pub struct ThresholdClock { - round: Round, - signatures_received: FastHashSet, - rejected: FastHashSet, // TODO reason -} diff --git a/consensus/src/intercom/adapter/broadcast_filter.rs b/consensus/src/intercom/adapter/broadcast_filter.rs new file mode 100644 index 000000000..be6d874c6 --- /dev/null +++ b/consensus/src/intercom/adapter/broadcast_filter.rs @@ -0,0 +1,191 @@ +use std::collections::BTreeMap; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::Arc; + +use tokio::sync::broadcast::error::RecvError; +use tokio::sync::mpsc; + +use tycho_network::PeerId; +use tycho_util::FastDashMap; + +use crate::dag::Verifier; +use crate::intercom::dto::{BroadcastResponse, PeerState}; +use crate::intercom::PeerSchedule; +use crate::models::{Digest, Location, NodeCount, Point, PointId, Round}; + +use super::dto::ConsensusEvent; + +pub struct BroadcastFilter { + // defend from spam from future rounds: + // should keep rounds greater than current dag round + last_by_peer: FastDashMap, + // very much like DAG structure, but without dependency check; + // just to determine reliably that consensus advanced without current node + by_round: FastDashMap< + Round, + ( + NodeCount, + BTreeMap>, + ), + >, + current_dag_round: AtomicU32, + peer_schedule: Arc, + output: mpsc::UnboundedSender, +} + +impl BroadcastFilter { + pub fn new( + peer_schedule: Arc, + output: mpsc::UnboundedSender, + ) -> Arc { + let this = Self { + last_by_peer: Default::default(), + by_round: Default::default(), + current_dag_round: Default::default(), // will advance with other peers + peer_schedule, + output, + }; + let this = Arc::new(this); + let listener = this.clone(); + tokio::spawn(listener.clean_cache()); + this + } + + async fn clean_cache(self: Arc) { + let mut rx = self.peer_schedule.updates(); + match rx.recv().await { + Ok((peer_id, PeerState::Removed)) => { + self.last_by_peer.remove(&peer_id); + } + Ok(_) => {} + Err(err @ RecvError::Lagged(_)) => { + tracing::warn!("peer schedule updates {err}"); + } + Err(err @ RecvError::Closed) => { + panic!("peer schedule updates {err}"); + } + } + } + + // TODO logic is doubtful because of contradiction in requirements: + // * we must determine the latest consensus round reliably: + // the current approach is to collect 1/3+1 points at the same future round + // => we should collect as much points as possible + // * we must defend the DAG and current cache from spam from future rounds, + // => we should discard points from the far future + + /// returns Vec of points to insert into DAG if consensus round is determined reliably + pub async fn add(&self, point: Arc) -> BroadcastResponse { + // dag @r+0 accepts broadcasts of [r-1; r+1] rounds; + // * points older than r-1 are rejected, but are sent to DAG for validation + // as they may be used by some point as a dependency + // * newer broadcasts are enqueued until 1/3+1 points per round collected + let dag_round = Round(self.current_dag_round.load(Ordering::Acquire)); + // for any node @ r+0, its DAG always contains [r-DAG_DEPTH-N; r+1] rounds, where N>=0 + let PointId { + location: Location { round, author }, + digest, + } = point.id(); + // conceal raw point, do not use it + let point = Verifier::verify(&point, &self.peer_schedule) + .map_or_else(ConsensusEvent::Invalid, |_| ConsensusEvent::Verified(point)); + if round <= dag_round.next() { + let response = if matches!(point, ConsensusEvent::Invalid(_)) { + BroadcastResponse::Rejected + } else if round >= dag_round.prev() { + BroadcastResponse::Accepted // we will sign, maybe + } else { + // too old, current node will not sign, but some point may include it + BroadcastResponse::Rejected + }; + _ = self.output.send(point); + return response; + } // else: either consensus moved forward without us, + // or we shouldn't accept the point yet, or this is spam + + let mut outdated_peer_round = None; + if *self + .last_by_peer + .entry(author) + .and_modify(|next| { + if *next < round { + if *next >= dag_round { + outdated_peer_round = Some(*next); + } + *next = round + } + }) + .or_insert(round) + > round + { + // equivocations are handled by DAG; + // node must not send broadcasts out-of order; + // TODO we should ban a peer that broadcasts its rounds out of order, + // though we cannot prove this decision for other nodes + return BroadcastResponse::Rejected; + }; + if let Some(to_delete) = outdated_peer_round { + // unfortunately, removals will occur every time node lags behind consensus + self.by_round.entry(to_delete).and_modify(|(_, authors)| { + // luckily no need to shrink a BTreeMap + // TODO ban the author, if we detect equivocation now; we won't be able to prove it + // if some signatures are invalid (it's another reason for a local ban) + authors.remove(&author); + }); + } + + let mut same_round = match self.by_round.entry(round).or_try_insert_with(|| { + // how many nodes should send broadcasts + NodeCount::try_from(self.peer_schedule.peers_for(&round).len()) + .map(|node_count| (node_count, Default::default())) + }) { + Ok(entry) => entry, + // will not accept broadcasts from not initialized validator set + Err(_) => return BroadcastResponse::TryLater, + }; + + let (node_count, ref mut same_round) = same_round.value_mut(); + same_round.entry(author).or_default().insert(digest, point); + if same_round.len() < node_count.reliable_minority() { + return BroadcastResponse::TryLater; // round is not yet determined + }; + _ = same_round; + + self.advance_round(&round).await; + BroadcastResponse::Accepted + } + + // drop everything up to the new round (inclusive), channelling cached points + pub async fn advance_round(&self, new_round: &Round) { + let Ok(old) = + self.current_dag_round + .fetch_update(Ordering::Release, Ordering::Relaxed, |old| { + Some(new_round.0).filter(|new| old < *new) + }) + else { + return; + }; + // if dag advanced more than by +1 round, include our potential witness points + // TODO it would be great to drain all contents up to the new round for performance, + // (no need to download discarded data) but only top 2 of them are truly necessary; + // looks like DashMap doesn't fit well + let mut data = if old < new_round.0 { + self.by_round.remove(&new_round.prev()) + } else { + None + } + .into_iter() + .chain(self.by_round.remove(&new_round)); + + while let Some((round, (_, by_author))) = data.next() { + _ = self.output.send(ConsensusEvent::Forward(round)); + for (_, points) in by_author { + for (_, point) in points { + _ = self.output.send(point); + } + } + } + // clear older rounds TODO: shrink to fit + self.by_round.retain(|round, _| round > new_round); + } +} diff --git a/consensus/src/intercom/adapter/broadcaster.rs b/consensus/src/intercom/adapter/broadcaster.rs new file mode 100644 index 000000000..6e8151f31 --- /dev/null +++ b/consensus/src/intercom/adapter/broadcaster.rs @@ -0,0 +1,244 @@ +use std::mem; +use std::sync::Arc; +use std::time::Duration; + +use futures_util::future::BoxFuture; +use futures_util::stream::FuturesUnordered; +use futures_util::StreamExt; +use tokio::sync::broadcast::{self, error::RecvError}; +use tokio::sync::{mpsc, Notify}; + +use tycho_network::PeerId; +use tycho_util::{FastHashMap, FastHashSet}; + +use crate::intercom::adapter::dto::SignerSignal; +use crate::intercom::dto::{BroadcastResponse, PeerState, SignatureResponse}; +use crate::intercom::{Dispatcher, PeerSchedule}; +use crate::models::{NodeCount, Point, Signature}; + +type BcastResult = anyhow::Result; +type SigResult = anyhow::Result; +const LOOP_DURATION: Duration = Duration::from_millis(100); + +pub struct Broadcaster { + point_body: Vec, + dispatcher: Dispatcher, + bcaster_ready: Arc, + signer_signal: mpsc::UnboundedReceiver, + is_signer_ready_ok: bool, + + peer_updates: broadcast::Receiver<(PeerId, PeerState)>, + removed_peers: FastHashSet, + // every connected peer should receive broadcast, but only signer's signatures are accountable + signers: FastHashSet, + signers_count: NodeCount, + // results + rejections: FastHashSet, + signatures: FastHashMap, + // TODO move generic logic out of dispatcher + bcast_request: tycho_network::Request, + bcast_peers: FastHashSet, + bcast_futs: FuturesUnordered>, + sig_request: tycho_network::Request, + sig_peers: FastHashSet, + sig_futs: FuturesUnordered>, +} + +impl Broadcaster { + pub fn new( + point: &Point, + dispatcher: &Dispatcher, + peer_schedule: &PeerSchedule, + bcaster_ready: Arc, + signer_signal: mpsc::UnboundedReceiver, + ) -> Self { + let point_body = bincode::serialize(&point.body).expect("own point serializes to bytes"); + let peer_updates = peer_schedule.updates(); + let signers = peer_schedule + .peers_for(&point.body.location.round.next()) + .iter() + .map(|(peer_id, _)| *peer_id) + .collect::>(); + let signers_count = NodeCount::try_from(signers.len()).unwrap(); + let bcast_peers = peer_schedule.all_resolved(); + let bcast_request = Dispatcher::broadcast_request(&point); + let sig_request = Dispatcher::signature_request(&point.body.location.round); + Self { + point_body, + dispatcher: dispatcher.clone(), + bcaster_ready, + signer_signal, + is_signer_ready_ok: false, + + peer_updates, + signers, + signers_count, + removed_peers: Default::default(), + rejections: Default::default(), + signatures: Default::default(), + + bcast_request, + bcast_peers, + bcast_futs: FuturesUnordered::new(), + + sig_request, + sig_peers: Default::default(), + sig_futs: FuturesUnordered::new(), + } + } + /// returns evidence for broadcast point + pub async fn run(mut self) -> Result, ()> { + // how this was supposed to work: + // * in short: broadcast to all and gather signatures from those who accepted the point + // * both broadcast and signature tasks have their own retry loop for every peer + // * also, if a peer is not yet ready to accept, there is a loop between tasks of both sorts + // (we ping such a peer with a short signature request instead of sending the whole point) + // * if any async task hangs for too long - try poll another sort of tasks + // * if no task of some sort - try poll another sort of tasks + // * periodically check if loop completion requirement is met (2F++ signs or 1/3+1++ fails) - + // is a tradeoff between gather at least 2F signatures and do not wait unresponsive peers + // (i.e. response bucketing where the last bucket is full and contains 2f-th element) + // i.e. at any moment any peer may be in a single state: + // * processing our broadcast request + // * processing our signature request + // * enqueued for any of two requests above + // * rejected to sign our point (incl. rejection of the point itself and incorrect sig) + // * successfully signed our point and dequeued + for peer_id in mem::take(&mut self.bcast_peers).iter() { + self.broadcast(peer_id) + } + loop { + tokio::select! { + Some((peer_id, result)) = self.bcast_futs.next() => { + self.match_broadcast_result(peer_id, result) + }, + Some((peer_id, result)) = self.sig_futs.next() => { + self.match_signature_result(peer_id, result) + }, + update = self.peer_updates.recv() => { + self.match_peer_updates(update) + } + Some(signer_signal) = self.signer_signal.recv() => { + match signer_signal { + SignerSignal::Ok => self.is_signer_ready_ok = true, + SignerSignal::Err => { + // even if we can return successful result, it will be discarded + return Err(()) + }, + SignerSignal::Retry => { + match self.check_if_ready() { + Some(result) => break result.map(|_| self.signatures), + None => self.retry(), + } + }, + } + } + } + } + } + fn check_if_ready(&mut self) -> Option> { + if self.rejections.len() >= self.signers_count.reliable_minority() { + self.bcaster_ready.notify_one(); + if self.is_signer_ready_ok { + return Some(Err(())); + } + } else if self.signatures.len() >= self.signers_count.majority_of_others() { + self.bcaster_ready.notify_one(); + if self.is_signer_ready_ok { + return Some(Ok(())); + } + } + None + } + fn retry(&mut self) { + for peer_id in mem::take(&mut self.sig_peers).iter() { + self.request_signature(peer_id); + } + for peer_id in mem::take(&mut self.bcast_peers).iter() { + self.broadcast(peer_id); + } + } + fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { + match result { + Ok((_peer_id, PeerState::Added)) => { /* ignore */ } + Ok((peer_id, PeerState::Resolved)) => self.broadcast(&peer_id), + Ok((peer_id, PeerState::Removed)) => _ = self.removed_peers.insert(peer_id), + Err(err @ RecvError::Lagged(_)) => { + tracing::warn!("Broadcaster peer updates {err}") + } + Err(err @ RecvError::Closed) => { + panic!("Broadcaster peer updates {err}") + } + } + } + fn match_broadcast_result(&mut self, peer_id: PeerId, result: BcastResult) { + match result { + Ok(BroadcastResponse::Accepted) => self.request_signature(&peer_id), + Ok(BroadcastResponse::TryLater) => _ = self.sig_peers.insert(peer_id), + Ok(BroadcastResponse::Rejected) => { + if self.signers.contains(&peer_id) { + self.rejections.insert(peer_id); + } + } + Err(error) => { + // TODO distinguish timeouts from models incompatibility etc + + // self.bcast_peers.push(peer_id); // let it retry + self.sig_peers.insert(peer_id); // lighter weight retry loop + tracing::warn!("on broadcasting own point: {error}"); + } + } + } + fn match_signature_result(&mut self, peer_id: PeerId, result: SigResult) { + match result { + Ok(SignatureResponse::Signature(signature)) => { + if self.signers.contains(&peer_id) { + if self.is_signature_ok(&peer_id, &signature) { + self.signatures.insert(peer_id, signature); + } else { + // any invalid signature lowers our chances + // to successfully finish current round + self.rejections.insert(peer_id); + } + } + } + Ok(SignatureResponse::NoPoint) => { + self.broadcast(&peer_id); + } + Ok(SignatureResponse::TryLater) => { + self.sig_peers.insert(peer_id); + } + Ok(SignatureResponse::Rejected) => { + if self.signers.contains(&peer_id) { + self.rejections.insert(peer_id); + } + } + Err(error) => { + // TODO distinguish timeouts from models incompatibility etc + self.sig_peers.insert(peer_id); // let it retry + tracing::warn!("on requesting signatures for own point: {error}"); + } + } + } + fn broadcast(&mut self, peer_id: &PeerId) { + if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { + self.bcast_futs + .push(self.dispatcher.request(&peer_id, &self.bcast_request)); + } + } + fn request_signature(&mut self, peer_id: &PeerId) { + if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { + self.sig_futs + .push(self.dispatcher.request(&peer_id, &self.sig_request)); + } + } + fn is_signature_ok(&self, peer_id: &PeerId, signature: &Signature) -> bool { + let sig_raw: Result<[u8; 64], _> = signature.0.to_vec().try_into(); + sig_raw + .ok() + .zip(peer_id.as_public_key()) + .map_or(false, |(sig_raw, pub_key)| { + pub_key.verify_raw(self.point_body.as_slice(), &sig_raw) + }) + } +} diff --git a/consensus/src/tasks/downloader.rs b/consensus/src/intercom/adapter/downloader.rs similarity index 80% rename from consensus/src/tasks/downloader.rs rename to consensus/src/intercom/adapter/downloader.rs index 287c17b6d..d0242f8d8 100644 --- a/consensus/src/tasks/downloader.rs +++ b/consensus/src/intercom/adapter/downloader.rs @@ -2,14 +2,14 @@ use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll}; -use crate::engine::dag::DagPoint; +use crate::models::DagPoint; -pub struct DownloadTask { +pub struct Downloader { // point's author is a top priority; fallback priority is (any) dependent point's author // recursively: every dependency is expected to be signed by 2/3+1 } -impl Future for DownloadTask { +impl Future for Downloader { type Output = DagPoint; fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { diff --git a/consensus/src/intercom/adapter/dto.rs b/consensus/src/intercom/adapter/dto.rs new file mode 100644 index 000000000..78a2c2ebe --- /dev/null +++ b/consensus/src/intercom/adapter/dto.rs @@ -0,0 +1,24 @@ +use std::sync::Arc; + +use crate::models::{DagPoint, Point, Round}; + +pub enum ConsensusEvent { + // allows not to peek but poll the channel when local dag is not ready yet + Forward(Round), + // well-formed, but not yet validated against DAG + Verified(Arc), + Invalid(DagPoint), +} + +/// * signer signals (Ok) when ready, broadcaster signals () when ready +/// * signer finishes only after broadcaster signalled () +/// * broadcaster finishes Ok only if signer signalled (Ok), signalling () to signer +/// * broadcaster must finish Ok/Err if signer signalled (Err), signalling () to signer +/// * broadcaster may finish Err, signalling () to signer +/// +/// => signer may run without broadcaster, as if broadcaster signalled () +pub enum SignerSignal { + Ok, + Err, + Retry, +} diff --git a/consensus/src/intercom/adapter/mod.rs b/consensus/src/intercom/adapter/mod.rs new file mode 100644 index 000000000..e79132593 --- /dev/null +++ b/consensus/src/intercom/adapter/mod.rs @@ -0,0 +1,13 @@ +pub use broadcast_filter::*; +pub use broadcaster::*; +pub use downloader::*; +pub use signer::*; + +// Note: intercom modules' responsibilities +// matches visibility of their internal DTOs + +mod broadcast_filter; +mod broadcaster; +mod downloader; +mod dto; +mod signer; diff --git a/consensus/src/intercom/adapter/signer.rs b/consensus/src/intercom/adapter/signer.rs new file mode 100644 index 000000000..53f233490 --- /dev/null +++ b/consensus/src/intercom/adapter/signer.rs @@ -0,0 +1,244 @@ +use std::mem; +use std::sync::Arc; + +use futures_util::future::BoxFuture; +use futures_util::stream::FuturesUnordered; +use futures_util::StreamExt; +use tokio::sync::{mpsc, oneshot, Notify}; + +use tycho_network::PeerId; + +use crate::dag::{DagRound, InclusionState}; +use crate::engine::MempoolConfig; +use crate::intercom::adapter::dto::{ConsensusEvent, SignerSignal}; +use crate::intercom::dto::SignatureResponse; +use crate::models::{Point, Round}; + +pub struct Signer { + from_bcast_filter: mpsc::UnboundedReceiver, + signature_requests: mpsc::UnboundedReceiver, + next_round: Round, + next_includes: FuturesUnordered>, +} + +impl Signer { + pub fn new( + from_bcast_filter: mpsc::UnboundedReceiver, + signature_requests: mpsc::UnboundedReceiver, + last_round: &Round, + ) -> Self { + Self { + from_bcast_filter, + signature_requests, + next_round: last_round.next(), + next_includes: FuturesUnordered::new(), + } + } + + pub async fn run( + mut self, + next_dag_round: DagRound, // r+1 + has_own_point: Option>, + signer_signal: mpsc::UnboundedSender, + bcaster_ready: Arc, + ) -> Self { + let current_dag_round = next_dag_round + .prev() + .get() + .expect("current DAG round must be linked into DAG chain"); + let mut includes = mem::take(&mut self.next_includes); + if current_dag_round.round() != &self.next_round { + includes.clear(); + }; + self.next_round = next_dag_round.round().clone(); + let task = SignerTask { + next_dag_round, + current_round: current_dag_round.clone(), + includes, + includes_ready: has_own_point.into_iter().count(), + next_includes: FuturesUnordered::new(), + + signer_signal, + bcaster_ready, + is_bcaster_ready: false, + }; + let result = task + .run(&mut self.from_bcast_filter, &mut self.signature_requests) + .await; + match result { + Ok(includes) => self.next_includes = includes, + Err(round) => self.next_round = round, + } + self + } + + pub fn next_round(&self) -> &'_ Round { + &self.next_round + } +} + +type SignatureRequest = (Round, PeerId, oneshot::Sender); +struct SignerTask { + // for node running @ r+0: + + // @ r+0, will become includes in point @ r+1 + // needed in order to not include same point twice - as an include and as a witness; + // need to drop them with round change + includes: FuturesUnordered>, + includes_ready: usize, + /// do not poll during this round, just pass to next round; + /// anyway should rewrite signing mechanics - look for comments inside [DagRound::add_exact] + next_includes: FuturesUnordered>, + + next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 + current_round: DagRound, // = r+0 + + signer_signal: mpsc::UnboundedSender, + bcaster_ready: Arc, + is_bcaster_ready: bool, +} + +impl SignerTask { + /// includes @ r+0 must include own point @ r+0 iff the one is produced + + /// returns includes for our point at the next round + async fn run( + mut self, + from_bcast_filter: &mut mpsc::UnboundedReceiver, + signature_requests: &mut mpsc::UnboundedReceiver, + ) -> Result>, Round> { + let mut retry_interval = tokio::time::interval(MempoolConfig::RETRY_INTERVAL); + loop { + tokio::select! { + request = signature_requests.recv() => match request { + Some((round, peer_id, callback)) => + _ = callback.send(self.signature_response(&round, &peer_id)), + None => panic!("channel with signature requests closed") + }, + filtered = from_bcast_filter.recv() => match filtered { + Some(consensus_event) => { + if let Err(round) = self.match_filtered(&consensus_event) { + _ = self.signer_signal.send(SignerSignal::Err); + return Err(round) + } + }, + None => panic!("channel from Broadcast Filter closed"), + }, + _ = self.bcaster_ready.notified() => { + self.is_bcaster_ready = true; + if self.includes_ready >= self.current_round.node_count().majority() { + return Ok(self.next_includes) + } + }, + _ = retry_interval.tick() => { + // point @ r+1 has to include 2F+1 broadcasts @ r+0 (we are @ r+0) + if self.includes_ready >= self.current_round.node_count().majority() { + _ = self.signer_signal.send(SignerSignal::Ok); + _ = self.signer_signal.send(SignerSignal::Retry); + if self.is_bcaster_ready { + return Ok(self.next_includes) + } + } else { + _ = self.signer_signal.send(SignerSignal::Retry); + } + }, + // FIXME not so great: some signature requests will be retried, + // just because this futures were not polled. Use global 'current dag round' round + // and sign inside shared join task in dag location, + // do not return location from DagLocation::add_validate(point) + Some(state) = self.includes.next() => { + // slow but at least may work + if let Some(signable) = state.signable() { + if signable.sign( + self.current_round.round(), + self.next_dag_round.key_pair(), + MempoolConfig::sign_time_range(), + ) { + self.includes_ready += 1; + } + } + }, + } + } + } + + fn signature_response(&mut self, round: &Round, author: &PeerId) -> SignatureResponse { + if round > self.current_round.round() { + return SignatureResponse::TryLater; // hold fast nodes from moving forward + }; + let Some(dag_round) = self.next_dag_round.scan(round) else { + return SignatureResponse::Rejected; // lagged too far from consensus and us + }; + // TODO do not state().clone() - mutating closure on location is easily used; + // need to remove inner locks from InclusionState and leave it guarded by DashMap; + // also sign points during their validation, see comments in DagLocation::add_validate() + let Some(state) = dag_round.view(author, |loc| loc.state().clone()) else { + return SignatureResponse::NoPoint; // retry broadcast, point was replaced in filter + }; + if let Some(signable) = state.signable() { + let key_pair = match self.next_dag_round.key_pair() { + // points @ current local dag round are includes for next round point + Some(key_pair) if round == self.current_round.round() => Some(key_pair), + // points @ previous local dag round are witness for next round point + Some(_) if round == &self.current_round.round().prev() => { + self.current_round.key_pair() + } + // point is too old, cannot include; + // Note: requests for future rounds are filtered out at the beginning of this method + _ => None, + }; + if signable.sign( + &self.current_round.round(), + key_pair, + MempoolConfig::sign_time_range(), + ) { + if round == self.current_round.round() { + self.includes_ready += 1; + } + } + } + match state.signed() { + Some(Ok(signed)) => SignatureResponse::Signature(signed.with.clone()), + Some(Err(())) => SignatureResponse::Rejected, + None => SignatureResponse::TryLater, + } + } + fn match_filtered(&self, filtered: &ConsensusEvent) -> Result<(), Round> { + match filtered { + ConsensusEvent::Forward(consensus_round) => { + match consensus_round.cmp(self.next_dag_round.round()) { + // we're too late, consensus moved forward + std::cmp::Ordering::Greater => return Err(consensus_round.clone()), + // we still have a chance to finish current round + std::cmp::Ordering::Equal => {} + // we are among the fastest nodes of consensus + std::cmp::Ordering::Less => {} + } + } + ConsensusEvent::Verified(point) => match &point.body.location.round { + x if x > self.next_dag_round.round() => { + panic!("Coding error: broadcast filter advanced while signer left behind") + } + x if x == self.next_dag_round.round() => { + if let Some(task) = self.next_dag_round.add(point) { + self.next_includes.push(task) + } + } + x if x == self.current_round.round() => { + if let Some(task) = self.current_round.add(point) { + self.includes.push(task) + } + } + _ => _ = self.current_round.add(&point), // maybe other's dependency + }, + ConsensusEvent::Invalid(dag_point) => { + if &dag_point.location().round > self.next_dag_round.round() { + panic!("Coding error: broadcast filter advanced while signer left behind") + } else { + _ = self.next_dag_round.insert_invalid(&dag_point); + } + } + }; + Ok(()) + } +} diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs new file mode 100644 index 000000000..0f6e4a91c --- /dev/null +++ b/consensus/src/intercom/core/dispatcher.rs @@ -0,0 +1,199 @@ +use anyhow::{anyhow, Result}; +use futures_util::future::BoxFuture; +use futures_util::FutureExt; + +use tycho_network::{DhtClient, Network, OverlayId, OverlayService, PeerId, PrivateOverlay}; + +use crate::intercom::core::dto::{MPRequest, MPResponse}; +use crate::intercom::core::responder::Responder; +use crate::intercom::dto::PointByIdResponse; +use crate::models::{Point, PointId, Round}; + +#[derive(Clone)] +pub struct Dispatcher { + pub overlay: PrivateOverlay, + network: Network, +} + +impl Dispatcher { + const PRIVATE_OVERLAY_ID: OverlayId = OverlayId(*b"ac87b6945b4f6f736963f7f65d025943"); + + pub fn new( + dht_client: &DhtClient, + overlay_service: &OverlayService, + all_peers: &Vec, + responder: Responder, + ) -> Self { + let dht_service = dht_client.service(); + let peer_resolver = dht_service.make_peer_resolver().build(dht_client.network()); + + let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) + .with_peer_resolver(peer_resolver) + .with_entries(all_peers) + .build(responder); + + overlay_service.add_private_overlay(&private_overlay); + + Self { + overlay: private_overlay, + network: dht_client.network().clone(), + } + } + + pub async fn point_by_id(&self, peer: &PeerId, id: &PointId) -> Result { + let request = (&MPRequest::PointById(id.clone())).into(); + let response = self.overlay.query(&self.network, peer, request).await?; + PointByIdResponse::try_from(MPResponse::try_from(&response)?) + } + + pub fn broadcast_request(point: &Point) -> tycho_network::Request { + (&MPRequest::Broadcast(point.clone())).into() + } + + pub fn signature_request(round: &Round) -> tycho_network::Request { + (&MPRequest::Signature(round.clone())).into() + } + + pub fn request( + &self, + peer_id: &PeerId, + request: &tycho_network::Request, + ) -> BoxFuture<'static, (PeerId, Result)> + where + T: TryFrom, + { + let peer_id = peer_id.clone(); + let request = request.clone(); + let overlay = self.overlay.clone(); + let network = self.network.clone(); + async move { + overlay + .query(&network, &peer_id, request) + .map(move |response| { + let response = response + .and_then(|r| MPResponse::try_from(&r)) + .and_then(T::try_from) + .map_err(|e| anyhow!("response from peer {peer_id}: {e}")); + (peer_id, response) + }) + .await + } + .boxed() + } +} + +pub trait DispatcherTestExt { + fn network(&self) -> &'_ Network; +} + +impl DispatcherTestExt for Dispatcher { + fn network(&self) -> &'_ Network { + &self.network + } +} + +/* FIXME +#[cfg(test)] +mod tests { + use tycho_network::{Address, PeerInfo}; + use tycho_util::time::now_sec; + + use crate::engine::node_count::NodeCount; + use crate::engine::peer_schedule::PeerSchedule; + use crate::models::point::Digest; + + use super::*; + + fn make_peer_info(key: &ed25519::SecretKey, address: Address) -> PeerInfo { + let keypair = ed25519::KeyPair::from(key); + let peer_id = PeerId::from(keypair.public_key); + + let now = now_sec(); + let mut peer_info = PeerInfo { + id: peer_id, + address_list: vec![address].into_boxed_slice(), + created_at: now, + expires_at: u32::MAX, + signature: Box::new([0; 64]), + }; + *peer_info.signature = keypair.sign(&peer_info); + peer_info + } + + async fn make_network(node_count: usize) -> Vec { + let keys = (0..node_count) + .map(|_| ed25519::SecretKey::generate(&mut rand::thread_rng())) + .collect::>(); + + let all_peers = keys + .iter() + .map(|s| PeerId::from(ed25519::KeyPair::from(s).public_key)) + .collect::>(); + + let nodes = keys + .iter() + .map(|s| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), s, &all_peers)) + .collect::>(); + + let bootstrap_info = std::iter::zip(&keys, &nodes) + .map(|(key, peer)| Arc::new(make_peer_info(key, peer.network.local_addr().into()))) + .collect::>(); + + let schedules = std::iter::zip(&all_peers, &nodes) + .map(|(peer_id, peer)| PeerSchedule::new(Round(0), &all_peers, &peer.overlay, peer_id)) + .collect::>(); + + if let Some(node) = nodes.first() { + for info in &bootstrap_info { + if info.id == node.network.peer_id() { + continue; + } + node.dht_client.add_peer(info.clone()).unwrap(); + } + } + + // let all_peers = FastHashSet::from_iter(all_peers.into_iter()); + for sch in &schedules { + sch.wait_for_peers(Round(1), NodeCount::new(node_count)) + .await; + tracing::info!("found peers for {}", sch.local_id); + } + + nodes + } + + #[tokio::test] + async fn dispatcher_works() -> Result<()> { + tracing_subscriber::fmt::try_init().ok(); + tracing::info!("dispatcher_works"); + + let peers = make_network(3).await; + + let point_id = PointId { + location: crate::models::point::Location { + round: Round(0), + author: PeerId([0u8; 32]), + }, + digest: Digest([0u8; 32]), + }; + + // FIXME must connect only to resolved peers + for i in 0..peers.len() { + for j in 0..peers.len() { + if i == j { + continue; + } + + let left = &peers[i]; + let right = &peers[j]; + + let point_opt = left + .point_by_id(right.network.peer_id(), point_id.clone()) + .await?; + assert!(point_opt.is_none()); + } + } + Ok(()) + } +} +*/ diff --git a/consensus/src/intercom/core/dto.rs b/consensus/src/intercom/core/dto.rs new file mode 100644 index 000000000..1c8cff9a4 --- /dev/null +++ b/consensus/src/intercom/core/dto.rs @@ -0,0 +1,82 @@ +use anyhow::anyhow; +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +use tycho_network::Version; + +use crate::intercom::dto::{BroadcastResponse, PointByIdResponse, SignatureResponse}; +use crate::models::{Point, PointId, Round}; + +#[derive(Serialize, Deserialize, Debug)] +pub enum MPRemoteResult { + Ok(MPResponse), + Err(String), +} + +#[derive(Serialize, Deserialize, Debug)] +pub enum MPRequest { + PointById(PointId), + Broadcast(Point), + Signature(Round), +} + +impl From<&MPRequest> for tycho_network::Request { + // TODO: move MPRequest et al to TL - won't need to copy Point + fn from(value: &MPRequest) -> Self { + tycho_network::Request { + version: Version::V1, + body: Bytes::from(bincode::serialize(value).expect("shouldn't happen")), + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub enum MPResponse { + PointById(PointByIdResponse), + Broadcast(BroadcastResponse), + Signature(SignatureResponse), +} + +impl TryFrom<&tycho_network::Response> for MPResponse { + type Error = anyhow::Error; + + fn try_from(response: &tycho_network::Response) -> Result { + match bincode::deserialize::(&response.body) { + Ok(MPRemoteResult::Ok(response)) => Ok(response), + Ok(MPRemoteResult::Err(e)) => Err(anyhow::Error::msg(e)), + Err(e) => Err(anyhow!("failed to deserialize: {e:?}")), + } + } +} +impl TryFrom for PointByIdResponse { + type Error = anyhow::Error; + + fn try_from(response: MPResponse) -> Result { + match response { + MPResponse::PointById(response) => Ok(response), + _ => Err(anyhow!("wrapper mismatch, expected PointById")), + } + } +} + +impl TryFrom for BroadcastResponse { + type Error = anyhow::Error; + + fn try_from(response: MPResponse) -> Result { + match response { + MPResponse::Broadcast(response) => Ok(response), + _ => Err(anyhow!("wrapper mismatch, expected Broadcast")), + } + } +} + +impl TryFrom for SignatureResponse { + type Error = anyhow::Error; + + fn try_from(response: MPResponse) -> Result { + match response { + MPResponse::Signature(response) => Ok(response), + _ => Err(anyhow!("wrapper mismatch, expected Signature")), + } + } +} diff --git a/consensus/src/intercom/core/mod.rs b/consensus/src/intercom/core/mod.rs new file mode 100644 index 000000000..6524ae78b --- /dev/null +++ b/consensus/src/intercom/core/mod.rs @@ -0,0 +1,9 @@ +pub use dispatcher::*; +pub use responder::*; + +// Note: intercom modules' responsibilities +// matches visibility of their internal DTOs + +mod dispatcher; +mod dto; +mod responder; diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs new file mode 100644 index 000000000..e02ffaa8b --- /dev/null +++ b/consensus/src/intercom/core/responder.rs @@ -0,0 +1,100 @@ +use std::sync::Arc; + +use bytes::Bytes; +use tokio::sync::{mpsc, oneshot}; + +use tycho_network::{PeerId, Response, Service, ServiceRequest, Version}; +use tycho_util::futures::BoxFutureOrNoop; + +use crate::intercom::core::dto::{MPRemoteResult, MPRequest, MPResponse}; +use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; +use crate::intercom::BroadcastFilter; +use crate::models::Round; + +pub struct Responder(Arc); + +impl Responder { + pub fn new( + broadcast_filter: Arc, + signature_requests: mpsc::UnboundedSender<( + Round, + PeerId, + oneshot::Sender, + )>, + ) -> Self { + Self(Arc::new(ResponderInner { + broadcast_filter, + signature_requests, + })) + } +} + +impl Service for Responder { + type QueryResponse = Response; + type OnQueryFuture = BoxFutureOrNoop>; + type OnMessageFuture = futures_util::future::Ready<()>; + type OnDatagramFuture = futures_util::future::Ready<()>; + + #[inline] + fn on_query(&self, req: ServiceRequest) -> Self::OnQueryFuture { + BoxFutureOrNoop::future(self.0.clone().handle(req)) + } + + #[inline] + fn on_message(&self, _req: ServiceRequest) -> Self::OnMessageFuture { + futures_util::future::ready(()) + } + + #[inline] + fn on_datagram(&self, _req: ServiceRequest) -> Self::OnDatagramFuture { + futures_util::future::ready(()) + } +} + +struct ResponderInner { + // state and storage components go here + broadcast_filter: Arc, + signature_requests: mpsc::UnboundedSender<(Round, PeerId, oneshot::Sender)>, +} + +impl ResponderInner { + async fn handle(self: Arc, req: ServiceRequest) -> Option { + let body = match bincode::deserialize::(&req.body) { + Ok(body) => body, + Err(e) => { + tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); + // malformed request is a reason to ignore it + return None; + } + }; + + let response = match body { + MPRequest::PointById(point_id) => MPResponse::PointById(PointByIdResponse(None)), + MPRequest::Broadcast(point) => { + MPResponse::Broadcast(self.broadcast_filter.add(Arc::new(point)).await) + } + MPRequest::Signature(round) => { + let (tx, rx) = oneshot::channel(); + _ = self + .signature_requests + .send((round, req.metadata.peer_id.clone(), tx)); + match rx.await { + Ok(response) => MPResponse::Signature(response), + Err(_) => MPResponse::Signature(SignatureResponse::TryLater), + } + } + }; + + Some(Response { + version: Version::default(), + body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { + Ok(data) => data, + Err(e) => { + tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata); + bincode::serialize(&MPRemoteResult::Err(format!("internal error"))) + .expect("must not fail") + } + }), + }) + } +} diff --git a/consensus/src/intercom/dispatcher.rs b/consensus/src/intercom/dispatcher.rs deleted file mode 100644 index ed042bb85..000000000 --- a/consensus/src/intercom/dispatcher.rs +++ /dev/null @@ -1,318 +0,0 @@ -use std::net::{Ipv4Addr, ToSocketAddrs}; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::{anyhow, Result}; -use bytes::Bytes; -use everscale_crypto::ed25519; -use serde::{Deserialize, Serialize}; - -use tycho_network::{ - DhtClient, DhtConfig, DhtService, Network, OverlayConfig, OverlayId, OverlayService, PeerId, - PrivateOverlay, Response, Router, Service, ServiceRequest, Version, -}; -use tycho_util::futures::BoxFutureOrNoop; - -use crate::models::point::{Point, PointId, Round, Signature}; - -#[derive(Serialize, Deserialize, Debug)] -enum MPRequest { - Broadcast { point: Point }, - Point { id: PointId }, -} - -#[derive(Serialize, Deserialize, Debug)] -enum MPRemoteResult { - Ok(MPResponse), - Err(String), -} - -#[derive(Serialize, Deserialize, Debug)] -enum MPResponse { - Broadcast(BroadcastResponse), - Point(PointResponse), -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct BroadcastResponse { - // for requested point - pub signature: Signature, - // at the same round, if it was not skipped - pub signer_point: Option, -} -#[derive(Serialize, Deserialize, Debug)] -pub struct PointResponse { - pub point: Option, -} - -pub struct Dispatcher { - pub overlay: PrivateOverlay, - pub dht_client: DhtClient, - network: Network, -} - -impl Dispatcher { - const PRIVATE_OVERLAY_ID: OverlayId = OverlayId(*b"ac87b6945b4f6f736963f7f65d025943"); - - pub fn new( - socket_addr: T, - key: &ed25519::SecretKey, - all_peers: &Vec, - ) -> Self { - let keypair = ed25519::KeyPair::from(key); - let local_id = PeerId::from(keypair.public_key); - - // TODO receive configured services from general node, - // move current setup to test below as it provides acceptable timing - - let (dht_tasks, dht_service) = DhtService::builder(local_id) - .with_config(DhtConfig { - local_info_announce_period: Duration::from_secs(1), - max_local_info_announce_period_jitter: Duration::from_secs(1), - routing_table_refresh_period: Duration::from_secs(1), - max_routing_table_refresh_period_jitter: Duration::from_secs(1), - ..Default::default() - }) - .build(); - - let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) - .with_dht_service(dht_service.clone()) - .build(); - - let router = Router::builder() - .route(dht_service.clone()) - .route(overlay_service.clone()) - .build(); - - let network = Network::builder() - .with_private_key(key.to_bytes()) - .with_service_name("mempool-network-service") - .build(socket_addr, router) - .unwrap(); - - dht_tasks.spawn(&network); - overlay_tasks.spawn(&network); - - let peer_resolver = dht_service.make_peer_resolver().build(&network); // network???? - - let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) - .with_peer_resolver(peer_resolver) - .with_entries(all_peers) - .build(Responder(Arc::new(ResponderInner {}))); - - overlay_service.add_private_overlay(&private_overlay); - - Self { - overlay: private_overlay, - dht_client: dht_service.make_client(network.clone()), - network, - } - } - - pub async fn broadcast(&self, node: &PeerId, point: Point) -> Result { - // TODO: move MPRequest et al to TL - won't need to copy Point - let response = self.query(node, &MPRequest::Broadcast { point }).await?; - match Self::parse_response(node, &response.body)? { - MPResponse::Broadcast(r) => Ok(r), - _ => Err(anyhow!("MPResponse::Broadcast: mismatched response")), - } - } - - pub async fn get_point(&self, node: &PeerId, id: PointId) -> Result { - let response = self.query(node, &MPRequest::Point { id }).await?; - match Self::parse_response(node, &response.body)? { - MPResponse::Point(r) => Ok(r), - _ => Err(anyhow!("MPResponse::Point: mismatched response")), - } - } - - async fn query(&self, node: &PeerId, data: &MPRequest) -> Result { - let request = tycho_network::Request { - version: Version::V1, - body: Bytes::from(bincode::serialize(data)?), - }; - - self.overlay.query(&self.network, node, request).await - } - - fn parse_response(node: &PeerId, body: &Bytes) -> Result { - match bincode::deserialize::(body) { - Ok(MPRemoteResult::Ok(response)) => Ok(response), - Ok(MPRemoteResult::Err(e)) => Err(anyhow::Error::msg(e)), - Err(e) => Err(anyhow!( - "failed to deserialize response from {node:?}: {e:?}" - )), - } - } -} - -struct Responder(Arc); - -impl Service for Responder { - type QueryResponse = Response; - type OnQueryFuture = BoxFutureOrNoop>; - type OnMessageFuture = futures_util::future::Ready<()>; - type OnDatagramFuture = futures_util::future::Ready<()>; - - #[inline] - fn on_query(&self, req: ServiceRequest) -> Self::OnQueryFuture { - BoxFutureOrNoop::future(self.0.clone().handle(req)) - } - - #[inline] - fn on_message(&self, _req: ServiceRequest) -> Self::OnMessageFuture { - futures_util::future::ready(()) - } - - #[inline] - fn on_datagram(&self, _req: ServiceRequest) -> Self::OnDatagramFuture { - futures_util::future::ready(()) - } -} - -struct ResponderInner { - // state and storage components go here -} - -impl ResponderInner { - async fn handle(self: Arc, req: ServiceRequest) -> Option { - let body = match bincode::deserialize::(&req.body) { - Ok(body) => body, - Err(e) => { - tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); - // malformed request is a reason to ignore it - return None; - } - }; - - let response = match body { - MPRequest::Broadcast { .. } => { - // 1.1 sigs for my block + 1.2 my next includes - // ?? + 3.1 ask last - MPResponse::Broadcast(BroadcastResponse { - signature: Signature(Bytes::new()), - signer_point: None, - }) - } - MPRequest::Point { .. } => { - // 1.2 my next includes (merged with Broadcast flow) - MPResponse::Point(PointResponse { point: None }) - } - }; - - Some(Response { - version: Version::default(), - body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { - Ok(data) => data, - Err(e) => { - tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata); - bincode::serialize(&MPRemoteResult::Err(format!("internal error"))) - .expect("must not fail") - } - }), - }) - } -} - -#[cfg(test)] -mod tests { - use tycho_network::{Address, PeerInfo}; - use tycho_util::time::now_sec; - - use crate::engine::node_count::NodeCount; - use crate::engine::peer_schedule::PeerSchedule; - use crate::models::point::Digest; - - use super::*; - - fn make_peer_info(key: &ed25519::SecretKey, address: Address) -> PeerInfo { - let keypair = ed25519::KeyPair::from(key); - let peer_id = PeerId::from(keypair.public_key); - - let now = now_sec(); - let mut node_info = PeerInfo { - id: peer_id, - address_list: vec![address].into_boxed_slice(), - created_at: now, - expires_at: u32::MAX, - signature: Box::new([0; 64]), - }; - *node_info.signature = keypair.sign(&node_info); - node_info - } - - async fn make_network(node_count: usize) -> Vec { - let keys = (0..node_count) - .map(|_| ed25519::SecretKey::generate(&mut rand::thread_rng())) - .collect::>(); - - let all_peers = keys - .iter() - .map(|s| PeerId::from(ed25519::KeyPair::from(s).public_key)) - .collect::>(); - - let nodes = keys - .iter() - .map(|s| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), s, &all_peers)) - .collect::>(); - - let bootstrap_info = std::iter::zip(&keys, &nodes) - .map(|(key, node)| Arc::new(make_peer_info(key, node.network.local_addr().into()))) - .collect::>(); - - let schedules = std::iter::zip(&all_peers, &nodes) - .map(|(peer_id, node)| PeerSchedule::new(Round(0), &all_peers, &node.overlay, peer_id)) - .collect::>(); - - if let Some(node) = nodes.first() { - for info in &bootstrap_info { - if info.id == node.network.peer_id() { - continue; - } - node.dht_client.add_peer(info.clone()).unwrap(); - } - } - - // let all_peers = FastHashSet::from_iter(all_peers.into_iter()); - for sch in &schedules { - sch.wait_for_peers(Round(1), NodeCount::new(node_count).majority_except_me()) - .await; - tracing::info!("found peers for {}", sch.local_id); - } - - nodes - } - - #[tokio::test] - async fn dispatcher_works() -> Result<()> { - tracing_subscriber::fmt::try_init().ok(); - tracing::info!("dispatcher_works"); - - let nodes = make_network(3).await; - - let point_id = PointId { - location: crate::models::point::Location { - round: Round(0), - author: PeerId([0u8; 32]), - }, - digest: Digest([0u8; 32]), - }; - - for i in 0..nodes.len() { - for j in 0..nodes.len() { - if i == j { - continue; - } - - let left = &nodes[i]; - let right = &nodes[j]; - - let PointResponse { point } = left - .get_point(right.network.peer_id(), point_id.clone()) - .await?; - assert!(point.is_none()); - } - } - Ok(()) - } -} diff --git a/consensus/src/intercom/dto.rs b/consensus/src/intercom/dto.rs new file mode 100644 index 000000000..d1439073e --- /dev/null +++ b/consensus/src/intercom/dto.rs @@ -0,0 +1,42 @@ +use serde::{Deserialize, Serialize}; + +use crate::models::{Point, Signature}; + +#[derive(Serialize, Deserialize, Debug)] +pub struct PointByIdResponse(pub Option); + +#[derive(Serialize, Deserialize, Debug)] +pub enum BroadcastResponse { + /// peer will verify and maybe sign the point + Accepted, + // TimeOut (disconnect) is a reason to retry also + /// peer did not reach the point's round yet + TryLater, + /// malformed point or peer is on a later round + Rejected, +} + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +pub enum SignatureResponse { + Signature(Signature), + /// peer dropped its state or just reached point's round + NoPoint, + // TimeOut (still verifying or disconnect) is also a reason to retry + /// * signer did not reach the point's round yet - lighter weight broadcast retry loop; + /// * signer still validates the point; + /// * clock skew: signer's wall time lags the time from point's body + TryLater, + /// * malformed point + /// * equivocation + /// * invalid dependency + /// * signer is on a future round + /// * signer's clock are too far in the future (probably consensus stalled for long) + Rejected, +} + +#[derive(Clone, PartialEq, Debug)] +pub enum PeerState { + Added, // not yet ready to connect; always includes local peer id + Resolved, // remote peer ready to connect + Removed, // remote peer will not be added again +} diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index 2b75d8eab..b12242a1d 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1 +1,11 @@ -mod dispatcher; +pub use adapter::*; +pub use core::*; +pub use peer_schedule::*; + +// Note: intercom modules' responsibilities +// matches visibility of their internal DTOs + +mod adapter; +mod core; +mod dto; +mod peer_schedule; diff --git a/consensus/src/intercom/peer_schedule/mod.rs b/consensus/src/intercom/peer_schedule/mod.rs new file mode 100644 index 000000000..667054f61 --- /dev/null +++ b/consensus/src/intercom/peer_schedule/mod.rs @@ -0,0 +1,8 @@ +pub use peer_schedule::*; +pub use peer_schedule_updater::*; + +// Note: intercom modules' responsibilities +// matches visibility of their internal DTOs + +mod peer_schedule; +mod peer_schedule_updater; diff --git a/consensus/src/engine/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs similarity index 52% rename from consensus/src/engine/peer_schedule.rs rename to consensus/src/intercom/peer_schedule/peer_schedule.rs index a0850073f..71933eaaf 100644 --- a/consensus/src/engine/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -3,86 +3,74 @@ use std::collections::BTreeMap; use std::ops::Range; use std::sync::Arc; -use futures_util::StreamExt; +use everscale_crypto::ed25519::KeyPair; use parking_lot::Mutex; -use rand::prelude::IteratorRandom; use tokio::sync::broadcast; -use tokio::sync::broadcast::error::RecvError; -use tokio::task::AbortHandle; -use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; +use tycho_network::PeerId; +use tycho_util::FastHashSet; -use crate::engine::node_count::NodeCount; -use crate::models::point::Round; +use crate::intercom::dto::PeerState; +use crate::models::{NodeCount, Round}; /* As validators are elected for wall-clock time range, the round of validator set switch is not known beforehand and will be determined by the time in anchor vertices: it must reach some predefined time range, - when new set is supposed to be online and begin to request points, + when the new set is supposed to be online and start to request points, and a (relatively high) predefined number of support rounds must follow - for the anchor chain to be committed by majority and for new nodes to gather data. - The switch will occur for validator sets as a whole, at a single leaderless round. + for the anchor chain to be committed by majority and for the new nodes to gather data. + The switch will occur for validator sets as a whole. */ -#[derive(Clone, PartialEq, Debug)] -pub enum PeerState { - Added, // not yet ready to connect - Resolved, // ready to connect - Removed, // will not be added again -} #[derive(Clone)] pub struct PeerSchedule { - // FIXME determine if our local_id is in next epoch + // FIXME remove mutex ( parking_lot ! ) + // and just restart updater when new peers or epoch start are known; + // use copy-on-write to replace Inner as a whole; + // maybe store schedule-per-round inside DAG round, but how to deal with download tasks? inner: Arc>, - // Note: connection to self is always "Added" - // Note: updates are Resolved or Removed, sent single time + // Connection to self is always "Added" + // Updates are Resolved or Removed, sent single time updates: broadcast::Sender<(PeerId, PeerState)>, - abort_resolve_peers: Arc>>, - overlay: PrivateOverlay, - pub local_id: PeerId, // FIXME move into schedule when it starts to change with new epoch + /// Keypair may be changed only with node restart, and is known before validator elections. + /// Node should use its keypair only to produce own and sign others points. + local_keys: Arc, } impl PeerSchedule { - pub fn new( - current_epoch_start: Round, - current_peers: &Vec, - overlay: &PrivateOverlay, - local_id: &PeerId, - ) -> Self { - let (updates, _) = broadcast::channel(10); - let mut current_peers = current_peers.clone(); - current_peers.retain(|p| p != local_id); + pub fn new(local_keys: Arc) -> Self { + // TODO channel size is subtle: it cannot be large, + // but any skipped event breaks 2F+1 guarantees + let (updates, _) = broadcast::channel(100); let this = Self { - inner: Arc::new(Mutex::new(PeerScheduleInner::new( - current_epoch_start, - ¤t_peers, - ))), - overlay: overlay.clone(), + inner: Arc::new(Mutex::new(PeerScheduleInner::new())), updates, - abort_resolve_peers: Default::default(), - local_id: local_id.clone(), + local_keys, }; - this.respawn_resolve_task(); - tokio::spawn(this.clone().listen()); this } + pub fn updates(&self) -> broadcast::Receiver<(PeerId, PeerState)> { + self.updates.subscribe() + } + // To sign a point or to query for points, we need to know the intersection of: // * which nodes are in the validator set during the round of interest // * which nodes are able to connect at the moment /// TODO replace bool with AtomicBool? use Arc? to return map with auto refresh - pub async fn wait_for_peers(&self, round: Round, node_count: NodeCount) { - let mut rx = self.updates.subscribe(); + pub async fn wait_for_peers(&self, round: &Round, node_count: NodeCount) { + let mut rx = self.updates(); let mut peers = (*self.peers_for(round)).clone(); let mut count = peers .iter() .filter(|(_, state)| **state == PeerState::Resolved) .count(); - while count < node_count.into() { + let local_id = self.local_id(); + while count < node_count.majority_of_others() { match rx.recv().await { - Ok((peer_id, new_state)) if peer_id != self.local_id => { + Ok((peer_id, new_state)) if peer_id != local_id => { if let Some(state) = peers.get_mut(&peer_id) { match (&state, &new_state) { (PeerState::Added, PeerState::Removed) => count -= 1, @@ -102,7 +90,44 @@ impl PeerSchedule { } } - pub fn peers_for(&self, round: Round) -> Arc> { + /// Note: keep private, it's just a local shorthand + pub(super) fn local_id(&self) -> PeerId { + self.local_keys.public_key.into() + } + + /// Note: signature designates signer's liability to include signed point's id into own point + /// at the next round (to compare one's evidence against others' includes and witnesses). + /// So any point is sent to nodes, scheduled for the next round only. + /// So: + /// * to create own point @ r+0, node needs a keypair for r+0 + /// * to sign others points @ r+0 during r+0 as inclusion for r+1, node needs a keypair for r+1 + /// * to sign others points @ r-1 during r+0 as a witness for r+1, node needs + /// * a keypair for r+0 to make a signature (as if it was wade during r-1) + /// * a keypair for r+1 to produce own point @ r+1 + /// + /// The other way: + /// any point @ r+0 contains signatures made by nodes with keys, scheduled for r+0 only: + /// * by the author at the same r+0 + /// * evidence of the author's point @ r-1: + /// * by those @ r-1 who includes @ r+0 (the direct receivers of the point @ r-1) + /// * by those @ r+0 who will witness @ r+1 (iff they are scheduled for r+0) + /// + /// Consensus progress is not guaranteed without witness (because of evidence requirement), + /// but we don't care if the consensus of an ending epoch stalls at its last round. + pub fn local_keys(&self, round: &Round) -> Option> { + if self.peers_for(round).contains_key(&self.local_id()) { + Some(self.local_keys.clone()) + } else { + None + } + } + + pub fn all_resolved(&self) -> FastHashSet { + let inner = self.inner.lock(); + inner.all_resolved() + } + + pub fn peers_for(&self, round: &Round) -> Arc> { let inner = self.inner.lock(); inner.peers_for_index_plus_one(inner.index_plus_one(round)) } @@ -112,17 +137,17 @@ impl PeerSchedule { rounds: [Round; N], ) -> [Arc>; N] { let inner = self.inner.lock(); - array::from_fn(|i| inner.peers_for_index_plus_one(inner.index_plus_one(rounds[i]))) + array::from_fn(|i| inner.peers_for_index_plus_one(inner.index_plus_one(&rounds[i]))) } /// does not return empty maps - pub fn peers_for_range(&self, rounds: Range) -> Vec>> { + pub fn peers_for_range(&self, rounds: &Range) -> Vec>> { if rounds.end <= rounds.start { return vec![]; } let inner = self.inner.lock(); - let mut first = inner.index_plus_one(rounds.start); - let last = inner.index_plus_one(rounds.end.prev()); + let mut first = inner.index_plus_one(&rounds.start); + let last = inner.index_plus_one(&rounds.end.prev()); if 0 == first && first < last { first += 1; // exclude inner.empty } @@ -138,9 +163,7 @@ impl PeerSchedule { // make next from previous let mut inner = self.inner.lock(); let Some(next) = inner.next_epoch_start else { - let msg = "Fatal: attempt to change epoch, but next epoch start is not set"; - tracing::error!("{msg}"); - panic!("{msg}"); + panic!("attempt to change epoch, but next epoch start is not set"); }; inner.prev_epoch_start = inner.cur_epoch_start; inner.cur_epoch_start = next; @@ -188,7 +211,7 @@ impl PeerSchedule { } /// Returns [true] if update was successfully applied - fn set_resolved(&self, peer_id: &PeerId, resolved: bool) -> bool { + pub(super) fn set_resolved(&self, peer_id: &PeerId, resolved: bool) -> bool { let mut is_applied = false; let new_state = if resolved { PeerState::Resolved @@ -212,73 +235,9 @@ impl PeerSchedule { } is_applied } - - fn respawn_resolve_task(&self) { - let mut fut = futures_util::stream::FuturesUnordered::new(); - { - let entries = self.overlay.read_entries(); - for entry in entries - .iter() - .choose_multiple(&mut rand::thread_rng(), entries.len()) - { - // skip updates on self - if !(entry.peer_id == self.local_id || entry.resolver_handle.is_resolved()) { - let handle = entry.resolver_handle.clone(); - fut.push(async move { handle.wait_resolved().await }); - } - } - }; - let new_abort_handle = if fut.is_empty() { - None - } else { - let this = self.clone(); - let join = tokio::spawn(async move { - while let Some(known_peer_handle) = fut.next().await { - _ = this.set_resolved(&known_peer_handle.peer_info().id, true); - } - }); - Some(join.abort_handle()) - }; - let mut abort_resolve_handle = self.abort_resolve_peers.lock(); - if let Some(old) = abort_resolve_handle.as_ref() { - old.abort(); - }; - *abort_resolve_handle = new_abort_handle; - } - - async fn listen(self) { - let mut rx = self.overlay.read_entries().subscribe(); - loop { - match rx.recv().await { - Ok(ref event @ PrivateOverlayEntriesEvent::Removed(node)) - if node != self.local_id => - { - if self.set_resolved(&node, false) { - // respawn resolve task with fewer peers to await - self.respawn_resolve_task(); - } else { - tracing::debug!("Skipped {event:?}"); - } - } - Err(RecvError::Closed) => { - let msg = "Fatal: peer info updates channel closed, \ - cannot maintain node connectivity"; - tracing::error!(msg); - panic!("{msg}") - } - Err(RecvError::Lagged(qnt)) => { - tracing::warn!( - "Skipped {qnt} peer info updates, node connectivity may suffer. \ - Consider increasing channel capacity." - ) - } - Ok(_) => {} - } - } - } } -pub struct PeerScheduleInner { +struct PeerScheduleInner { // order to select leader by coin flip peers_resolved: [Arc>; 3], prev_epoch_start: Round, @@ -288,31 +247,22 @@ pub struct PeerScheduleInner { } impl PeerScheduleInner { - fn new(current_epoch_start: Round, current_peers: &Vec) -> Self { + fn new() -> Self { Self { - peers_resolved: [ - Default::default(), - Arc::new( - current_peers - .iter() - .map(|p| (p.clone(), PeerState::Added)) - .collect(), - ), - Default::default(), - ], + peers_resolved: Default::default(), prev_epoch_start: Round(0), - cur_epoch_start: current_epoch_start, + cur_epoch_start: Round(0), next_epoch_start: None, empty: Default::default(), } } - fn index_plus_one(&self, round: Round) -> u8 { - if self.next_epoch_start.map_or(false, |r| r <= round) { + fn index_plus_one(&self, round: &Round) -> u8 { + if self.next_epoch_start.as_ref().map_or(false, |r| r <= round) { 3 - } else if self.cur_epoch_start <= round { + } else if &self.cur_epoch_start <= round { 2 - } else if self.prev_epoch_start <= round { + } else if &self.prev_epoch_start <= round { 1 } else { 0 @@ -326,4 +276,14 @@ impl PeerScheduleInner { _ => unreachable!(), } } + + fn all_resolved(&self) -> FastHashSet { + self.peers_resolved[0] + .iter() + .chain(self.peers_resolved[1].iter()) + .chain(self.peers_resolved[2].iter()) + .filter(|(_, state)| *state == &PeerState::Resolved) + .map(|(peer_id, _)| *peer_id) + .collect() + } } diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs new file mode 100644 index 000000000..597c3d85a --- /dev/null +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -0,0 +1,91 @@ +use std::sync::Arc; + +use futures_util::StreamExt; +use parking_lot::Mutex; +use rand::prelude::IteratorRandom; +use tokio::sync::broadcast::error::RecvError; +use tokio::task::AbortHandle; + +use tycho_network::{PrivateOverlay, PrivateOverlayEntriesEvent}; + +use crate::intercom::PeerSchedule; + +#[derive(Clone)] +pub struct PeerScheduleUpdater { + overlay: PrivateOverlay, + peer_schedule: Arc, + abort_resolve_peers: Arc>>, +} + +impl PeerScheduleUpdater { + pub fn run(overlay: PrivateOverlay, peer_schedule: Arc) { + let this = Self { + overlay, + peer_schedule, + abort_resolve_peers: Default::default(), + }; + this.respawn_resolve_task(); + tokio::spawn(this.listen()); + } + + fn respawn_resolve_task(&self) { + let mut fut = futures_util::stream::FuturesUnordered::new(); + { + let local_id = self.peer_schedule.local_id(); + let entries = self.overlay.read_entries(); + for entry in entries + .iter() + .choose_multiple(&mut rand::thread_rng(), entries.len()) + { + // skip updates on self + if !(entry.peer_id == local_id || entry.resolver_handle.is_resolved()) { + let handle = entry.resolver_handle.clone(); + fut.push(async move { handle.wait_resolved().await }); + } + } + }; + let new_abort_handle = if fut.is_empty() { + None + } else { + let peer_schedule = self.peer_schedule.clone(); + let join = tokio::spawn(async move { + while let Some(known_peer_handle) = fut.next().await { + _ = peer_schedule.set_resolved(&known_peer_handle.peer_info().id, true); + } + }); + Some(join.abort_handle()) + }; + let mut abort_resolve_handle = self.abort_resolve_peers.lock(); + if let Some(old) = abort_resolve_handle.as_ref() { + old.abort(); + }; + *abort_resolve_handle = new_abort_handle; + } + + async fn listen(self) { + let mut rx = self.overlay.read_entries().subscribe(); + let local_id = self.peer_schedule.local_id(); + loop { + match rx.recv().await { + Ok(ref event @ PrivateOverlayEntriesEvent::Removed(node)) if node != local_id => { + if self.peer_schedule.set_resolved(&node, false) { + // respawn resolve task with fewer peers to await + self.respawn_resolve_task(); + } else { + tracing::debug!("Skipped {event:?}"); + } + } + Err(RecvError::Closed) => { + panic!("peer info updates channel closed, cannot maintain node connectivity") + } + Err(RecvError::Lagged(qnt)) => { + tracing::warn!( + "Skipped {qnt} peer info updates, node connectivity may suffer. \ + Consider increasing channel capacity." + ) + } + Ok(_) => {} + } + } + } +} diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index 73afa7366..cb4f2509f 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -1,4 +1,6 @@ +#![allow(dead_code)] // temporarily suppress warns +pub(crate) mod dag; pub(crate) mod engine; pub(crate) mod intercom; pub(crate) mod models; -pub(crate) mod tasks; +pub(crate) mod test_utils; diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs new file mode 100644 index 000000000..fcd873bfb --- /dev/null +++ b/consensus/src/models/dag_point.rs @@ -0,0 +1,69 @@ +use std::sync::atomic::AtomicBool; +use std::sync::Arc; + +use crate::models::point::{Digest, Location, Point, PointId}; + +#[derive(Clone)] +pub struct ValidPoint { + pub point: Arc, + pub is_committed: Arc, +} + +impl ValidPoint { + pub fn new(point: Arc) -> Self { + Self { + point, + is_committed: Arc::new(AtomicBool::new(false)), + } + } +} + +#[derive(Clone)] +pub enum DagPoint { + // FIXME time skew is determined at the moment of signature response and is not reentrant + /// valid without demur, needed to blame equivocation or graph connectivity violations + Trusted(ValidPoint), + /// is a valid container, but we doubt author's fairness at the moment of validating; + /// we do not sign such a point, but others may include it without consequences; + /// consensus will decide whether to sign its proof or not; we shall ban the author anyway + Suspicious(ValidPoint), + /// invalidates dependent point; needed to blame equivocation + Invalid(Arc), + /// invalidates dependent point; blame author of dependent point + NotExists(Arc), +} + +impl DagPoint { + pub fn valid(&self) -> Option<&'_ ValidPoint> { + match self { + DagPoint::Trusted(valid) => Some(valid), + DagPoint::Suspicious(valid) => Some(valid), + _ => None, + } + } + + pub fn id(&self) -> PointId { + PointId { + location: self.location().clone(), + digest: self.digest().clone(), + } + } + + pub fn location(&self) -> &'_ Location { + match self { + DagPoint::Trusted(valid) => &valid.point.body.location, + DagPoint::Suspicious(valid) => &valid.point.body.location, + DagPoint::Invalid(point) => &point.body.location, + DagPoint::NotExists(id) => &id.location, + } + } + + pub fn digest(&self) -> &'_ Digest { + match self { + DagPoint::Trusted(valid) => &valid.point.digest, + DagPoint::Suspicious(valid) => &valid.point.digest, + DagPoint::Invalid(point) => &point.digest, + DagPoint::NotExists(id) => &id.digest, + } + } +} diff --git a/consensus/src/models/mod.rs b/consensus/src/models/mod.rs index a199ff751..a59795f00 100644 --- a/consensus/src/models/mod.rs +++ b/consensus/src/models/mod.rs @@ -1 +1,7 @@ -pub mod point; +pub use dag_point::*; +pub use node_count::*; +pub use point::*; + +mod dag_point; +mod node_count; +mod point; diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs new file mode 100644 index 000000000..955c94a1d --- /dev/null +++ b/consensus/src/models/node_count.rs @@ -0,0 +1,45 @@ +#[derive(Copy, Clone)] +pub struct NodeCount(usize); + +impl TryFrom for NodeCount { + type Error = &'static str; + fn try_from(total_peers: usize) -> Result { + // may occur if peer_schedule is empty + let count = if total_peers < 3 { + return Err("not enough nodes to run consensus"); + } else { + ((total_peers + 2) / 3) * 3 + 1 // ceil up to 3F+1 + }; + if count < total_peers { + panic!("node count {total_peers} overflows after rounding up to 3F+1"); + } + Ok(NodeCount((count - 1) / 3)) // 1F + } +} + +impl NodeCount { + pub const GENESIS: Self = Self(0); + /* + pub fn full(&self) -> usize { + self.0 * 3 + 1 + } + */ + pub fn majority(&self) -> usize { + self.0 * 2 + 1 + } + + /// excluding either current node or the point's author, depending on the context + pub fn majority_of_others(&self) -> usize { + // yes, genesis has the contradiction: reliable minority > majority of others + self.0 * 2 + } + + pub fn reliable_minority(&self) -> usize { + self.0 + 1 + } + /* + pub fn unreliable(&self) -> usize { + self.0 + } + */ +} diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index b9a972f59..606815f80 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -1,8 +1,8 @@ use std::collections::BTreeMap; -use std::time::SystemTime; +use std::ops::{Add, Sub}; use bytes::Bytes; -use everscale_crypto::ed25519::ExpandedSecretKey; +use everscale_crypto::ed25519::KeyPair; use serde::{Deserialize, Serialize}; use sha2::{Digest as Sha2Digest, Sha256}; @@ -12,18 +12,58 @@ use tycho_util::FastHashMap; #[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Digest(pub [u8; 32]); -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Signature(pub Bytes); -#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Round(pub u32); +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct UnixTime(u64); + +impl UnixTime { + pub const fn from_millis(millis: u64) -> Self { + Self(millis) + } + pub fn now() -> Self { + Self( + u64::try_from( + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("current time since unix epoch") + .as_millis(), + ) + .expect("current Unix time in millis as u64"), + ) + } +} + +impl Add for UnixTime { + type Output = Self; + fn add(self, rhs: Self) -> Self::Output { + Self(self.0.saturating_add(rhs.0)) + } +} + +impl Sub for UnixTime { + type Output = Self; + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0.saturating_sub(rhs.0)) + } +} + impl Round { pub fn prev(&self) -> Round { self.0 .checked_sub(1) .map(Round) - .unwrap_or_else(|| panic!("DAG round number overflow, fix dag initial configuration")) + .expect("DAG round number underflow, fix dag initial configuration") + } + pub fn next(&self) -> Round { + self.0 + .checked_add(1) + .map(Round) + .expect("DAG round number overflow, inner type exhausted") } } @@ -45,8 +85,13 @@ pub struct PrevPoint { // any node may proof its vertex@r-1 with its point@r+0 only // pub round: Round, pub digest: Digest, - // >= 2F witnesses, point author excluded, order does not matter + /// `>= 2F` neighbours, order does not matter; + /// point author is excluded: everyone must use the proven point to validate its proof pub evidence: FastHashMap, + // TODO if we use TL, then every node can sign hash of a point's body (not all body bytes) + // so we can include that hash into PrevPoint + // to check signatures inside BroadcastFilter::verify() without waiting for DAG + // (if that will be fast enough to respond without overlay query timeout) } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] @@ -65,38 +110,48 @@ pub enum Link { #[derive(Clone, Serialize, Deserialize, Debug)] pub struct PointBody { pub location: Location, // let it be @ r+0 - pub time: SystemTime, + pub time: UnixTime, pub payload: Vec, - // of the same author + /// by the same author pub proof: Option, - // >= 2F+1 points @ r-1, - // signed by author @ r-1 with some additional points just mentioned; - // mandatory includes author's own vertex iff proof is given. - // Repeatable order on every node needed for commit; map is used during validation + /// `>= 2F+1` points @ r-1, + /// signed by author @ r-1 with some additional points just mentioned; + /// mandatory includes author's own vertex iff proof is given. + /// Repeatable order on every node is needed for commit; map is used during validation pub includes: BTreeMap, - // >= 0 points @ r-2, signed by author @ r-1 - // Repeatable order on every node needed for commit; map is used during validation + /// `>= 0` points @ r-2, signed by author @ r-1 + /// Repeatable order on every node needed for commit; map is used during validation pub witness: BTreeMap, - // defines author's last committed anchor - pub last_anchor_trigger: Link, - // helps to maintain anchor chain linked without explicit DAG traverse - pub last_anchor_proof: Link, + /// last included by author; defines author's last committed anchor + pub anchor_trigger: Link, + /// last included by author; maintains anchor chain linked without explicit DAG traverse + pub anchor_proof: Link, } impl PointBody { - pub fn wrap(self, secret: ExpandedSecretKey) -> Option { - let body = bincode::serialize(&self).ok()?; - let pubkey = self.location.author.as_public_key()?; - let sig = secret.sign_raw(body.as_slice(), &pubkey); + pub fn wrap(self, local_keypair: &KeyPair) -> Point { + assert_eq!( + self.location.author, + PeerId::from(local_keypair.public_key), + "produced point author must match local key pair" + ); + let body = bincode::serialize(&self).expect("shouldn't happen"); + let sig = local_keypair.sign_raw(body.as_slice()); let mut hasher = Sha256::new(); hasher.update(body.as_slice()); hasher.update(sig.as_slice()); let digest = Digest(hasher.finalize().into()); - Some(Point { + Point { body: self, signature: Signature(Bytes::from(sig.to_vec())), digest, - }) + } + } + + pub fn sign(&self, local_keypair: &KeyPair) -> Signature { + let body = bincode::serialize(&self).expect("shouldn't happen"); + let sig = local_keypair.sign_raw(body.as_slice()); + Signature(Bytes::from(sig.to_vec())) } } @@ -159,8 +214,8 @@ impl Point { && self.body.witness.is_empty() && self.body.payload.is_empty() && self.body.proof.is_none() - && self.body.last_anchor_proof == Link::ToSelf - && self.body.last_anchor_trigger == Link::ToSelf + && self.body.anchor_proof == Link::ToSelf + && self.body.anchor_trigger == Link::ToSelf } round if round > LAST_GENESIS_ROUND => { // no witness is possible at the round right after genesis; @@ -169,19 +224,21 @@ impl Point { // leader must maintain its chain of proofs, // while others must link to previous points (checked at the end of this method); // its decided later (using dag round data) whether current point belongs to leader - && !(self.body.last_anchor_proof == Link::ToSelf && self.body.proof.is_none()) - && !(self.body.last_anchor_trigger == Link::ToSelf && self.body.proof.is_none()) + && !(self.body.anchor_proof == Link::ToSelf && self.body.proof.is_none()) + && !(self.body.anchor_trigger == Link::ToSelf && self.body.proof.is_none()) } _ => false, }; is_special_ok // proof is listed in includes - to count for 2/3+1, verify and commit dependencies && self.body.proof.as_ref().map(|p| &p.digest) == self.body.includes.get(&author) - && self.is_link_well_formed(&self.body.last_anchor_proof) - && self.is_link_well_formed(&self.body.last_anchor_trigger) + // in contrast, evidence must contain only signatures of others + && self.body.proof.as_ref().map_or(true, |p| !p.evidence.contains_key(author)) + && self.is_link_well_formed(&self.body.anchor_proof) + && self.is_link_well_formed(&self.body.anchor_trigger) && match ( - self.last_anchor_proof_round(), - self.last_anchor_trigger_round(), + self.anchor_proof_round(), + self.anchor_trigger_round(), ) { (x, LAST_GENESIS_ROUND) => x >= LAST_GENESIS_ROUND, (LAST_GENESIS_ROUND, y) => y >= LAST_GENESIS_ROUND, @@ -214,28 +271,30 @@ impl Point { } } - pub fn last_anchor_trigger_round(&self) -> Round { - self.get_linked_to_round(&self.body.last_anchor_trigger) + // TODO maybe implement field accessors parameterized by combination of enums + + pub fn anchor_trigger_round(&self) -> Round { + self.get_linked_to_round(&self.body.anchor_trigger) } - pub fn last_anchor_proof_round(&self) -> Round { - self.get_linked_to_round(&self.body.last_anchor_proof) + pub fn anchor_proof_round(&self) -> Round { + self.get_linked_to_round(&self.body.anchor_proof) } - pub fn last_anchor_trigger_id(&self) -> PointId { - self.get_linked_to(&self.body.last_anchor_trigger) + pub fn anchor_trigger_id(&self) -> PointId { + self.get_linked_to(&self.body.anchor_trigger) } - pub fn last_anchor_proof_id(&self) -> PointId { - self.get_linked_to(&self.body.last_anchor_proof) + pub fn anchor_proof_id(&self) -> PointId { + self.get_linked_to(&self.body.anchor_proof) } - pub fn last_anchor_trigger_through(&self) -> PointId { - self.get_linked_through(&self.body.last_anchor_trigger) + pub fn anchor_trigger_through(&self) -> PointId { + self.get_linked_through(&self.body.anchor_trigger) } - pub fn last_anchor_proof_through(&self) -> PointId { - self.get_linked_through(&self.body.last_anchor_proof) + pub fn anchor_proof_through(&self) -> PointId { + self.get_linked_through(&self.body.anchor_proof) } fn get_linked_to_round(&self, link: &Link) -> Round { @@ -271,15 +330,10 @@ impl Point { } fn get_linked(&self, peer: &PeerId, through_includes: bool) -> PointId { - let through = if through_includes { - &self.body.includes - } else { - &self.body.witness - }; - let round = if through_includes { - self.body.location.round.prev() + let (through, round) = if through_includes { + (&self.body.includes, self.body.location.round.prev()) } else { - self.body.location.round.prev().prev() + (&self.body.witness, self.body.location.round.prev().prev()) }; PointId { location: Location { diff --git a/consensus/src/tasks/broadcaster.rs b/consensus/src/tasks/broadcaster.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/tasks/broadcaster.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/tasks/mod.rs b/consensus/src/tasks/mod.rs deleted file mode 100644 index 81b1dbacb..000000000 --- a/consensus/src/tasks/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod broadcaster; -pub mod downloader; -pub mod syncer; -pub mod uploader; diff --git a/consensus/src/tasks/syncer.rs b/consensus/src/tasks/syncer.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/tasks/syncer.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/tasks/uploader.rs b/consensus/src/tasks/uploader.rs deleted file mode 100644 index 8b1378917..000000000 --- a/consensus/src/tasks/uploader.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs new file mode 100644 index 000000000..4cbcc154b --- /dev/null +++ b/consensus/src/test_utils.rs @@ -0,0 +1,208 @@ +use std::net::ToSocketAddrs; +use std::sync::Arc; +use std::time::Duration; + +use everscale_crypto::ed25519::{KeyPair, PublicKey, SecretKey}; +use tokio::sync::mpsc; + +use tycho_network::{DhtClient, DhtConfig, DhtService, Network, OverlayService, PeerId, Router}; + +use crate::intercom::{BroadcastFilter, Dispatcher, PeerSchedule, PeerScheduleUpdater, Responder}; +use crate::models::{Link, Location, Point, PointBody, Round, UnixTime}; + +const GENESIS_SECRET_KEY_BYTES: [u8; 32] = [0xAE; 32]; +const GENESIS_MILLIS: u64 = 0; +const GENESIS_ROUND: u32 = 0; + +pub fn genesis() -> Point { + let genesis_keys = KeyPair::from(&SecretKey::from_bytes(GENESIS_SECRET_KEY_BYTES)); + + PointBody { + location: Location { + round: Round(GENESIS_ROUND), + author: genesis_keys.public_key.into(), + }, + time: UnixTime::from_millis(GENESIS_MILLIS), + payload: vec![], + proof: None, + includes: Default::default(), + witness: Default::default(), + anchor_trigger: Link::ToSelf, + anchor_proof: Link::ToSelf, + } + .wrap(&genesis_keys) +} + +pub async fn bootstrap( + secret_key: &SecretKey, + dht_client: &DhtClient, + overlay_service: &OverlayService, + peers: &Vec, +) { + let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(KeyPair::from(secret_key)))); + + let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); + + let broadcast_filter = BroadcastFilter::new(peer_schedule.clone(), bcast_tx); + + let (sig_requests, sig_responses) = mpsc::unbounded_channel(); + + let dispatcher = Dispatcher::new( + &dht_client, + &overlay_service, + peers, + Responder::new(broadcast_filter.clone(), sig_requests), + ); + + let genesis = Arc::new(crate::test_utils::genesis()); + // finished epoch + peer_schedule.set_next_peers(&vec![genesis.body.location.author]); + peer_schedule.set_next_start(genesis.body.location.round); + peer_schedule.rotate(); + // current epoch + peer_schedule.set_next_start(genesis.body.location.round.next()); + peer_schedule.set_next_peers(peers); + peer_schedule.rotate(); + // start updater only after peers are populated into schedule + PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); + + // tOdO define if the last round is finished based on peer schedule + // move out from bcaster & signer ? where to get our last point from ? + + // tOdO в конце каждого раунда берем точку с триггером + // и комиттим + // * either own point contains Trigger + // * or search through last round to find the latest trigger + // * * can U do so without scan of a round ??? +} + +// TODO receive configured services from general node, +// move current setup to tests as it provides acceptable timing +// This dependencies should be passed from validator module to init mempool +fn from_validator( + socket_addr: T, + secret_key: &SecretKey, +) -> (DhtClient, OverlayService) { + let local_id = PeerId::from(PublicKey::from(secret_key)); + + let (dht_tasks, dht_service) = DhtService::builder(local_id) + .with_config(DhtConfig { + local_info_announce_period: Duration::from_secs(1), + max_local_info_announce_period_jitter: Duration::from_secs(1), + routing_table_refresh_period: Duration::from_secs(1), + max_routing_table_refresh_period_jitter: Duration::from_secs(1), + ..Default::default() + }) + .build(); + + let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) + .with_dht_service(dht_service.clone()) + .build(); + + let router = Router::builder() + .route(dht_service.clone()) + .route(overlay_service.clone()) + .build(); + + let network = Network::builder() + .with_private_key(secret_key.to_bytes()) + .with_service_name("mempool-test-network-service") + .build(socket_addr, router) + .unwrap(); + + dht_tasks.spawn(&network); + overlay_tasks.spawn(&network); + + (dht_service.make_client(network.clone()), overlay_service) +} + +#[cfg(test)] +mod tests { + use std::net::Ipv4Addr; + + use futures_util::stream::FuturesUnordered; + use tokio::task::JoinSet; + + use tycho_network::{Address, PeerInfo}; + use tycho_util::time::now_sec; + + use crate::engine::Engine; + use crate::engine::EngineTestExt; + use crate::intercom::DispatcherTestExt; + + use super::*; + + fn make_peer_info(key: &SecretKey, address: Address) -> PeerInfo { + let keypair = KeyPair::from(key); + let peer_id = PeerId::from(keypair.public_key); + + let now = now_sec(); + let mut peer_info = PeerInfo { + id: peer_id, + address_list: vec![address.clone()].into_boxed_slice(), + created_at: now, + expires_at: u32::MAX, + signature: Box::new([0; 64]), + }; + *peer_info.signature = keypair.sign(&peer_info); + peer_info + } + + async fn make_network(node_count: usize) -> Vec { + let keys = (0..node_count) + .map(|_| SecretKey::generate(&mut rand::thread_rng())) + .collect::>(); + + let all_peers = keys + .iter() + .map(|s| PeerId::from(KeyPair::from(s).public_key)) + .collect::>(); + + let from_validators = keys + .iter() + .map(|secret| from_validator((Ipv4Addr::LOCALHOST, 0), secret)) + .collect::>(); + + let mut engines = vec![]; + for (secret_key, (dht_client, overlay_service)) in keys.iter().zip(from_validators.iter()) { + let engine = Engine::new(secret_key, &dht_client, &overlay_service, &all_peers).await; + engines.push(engine); + } + + let peer_info = std::iter::zip(&keys, &engines) + .map(|(key, engine)| { + Arc::new(make_peer_info( + key, + engine.dispatcher().network().local_addr().into(), + )) + }) + .collect::>(); + + if let Some((dht_client, _)) = from_validators.first() { + for info in &peer_info { + if info.id == dht_client.network().peer_id() { + continue; + } + dht_client.add_peer(info.clone()).unwrap(); + } + } + engines + } + + #[tokio::test] + async fn engine_works() -> Result<(), ()> { + tracing_subscriber::fmt::try_init().ok(); + tracing::info!("engine_works"); + + let mut js = JoinSet::new(); + let engines = make_network(3) + .await + .into_iter() + .map(|engine| js.spawn(engine.run())) + .collect::>(); + while let Some(res) = js.join_next().await { + res.unwrap(); + } + Ok(()) + } +} diff --git a/util/src/futures/shared.rs b/util/src/futures/shared.rs index 1970d76d0..7cf2804d5 100644 --- a/util/src/futures/shared.rs +++ b/util/src/futures/shared.rs @@ -5,13 +5,15 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Weak}; use std::task::{Context, Poll}; -use futures_util::future::BoxFuture; use tokio::sync::{AcquireError, OwnedSemaphorePermit, Semaphore, TryAcquireError}; +type PermitFuture = + dyn Future> + Send + Sync + 'static; + #[must_use = "futures do nothing unless you `.await` or poll them"] pub struct Shared { inner: Option>>, - permit_fut: Option>>, + permit_fut: Option>>, permit: Option, } @@ -222,6 +224,21 @@ where } } } +/* FIXME remove if test will work +unsafe impl Send for Shared +where + Fut: Future + Send, + Fut::Output: Send + Sync, +{ +} + +unsafe impl Sync for Shared +where + Fut: Future + Send, + Fut::Output: Send + Sync, +{ +} +*/ unsafe impl Send for Inner where From 3c41a3b20a2da39f3517b1ca7d0f60f6f7b008ab Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 19 Apr 2024 01:19:18 +0300 Subject: [PATCH 13/32] fix(consensus): first test --- Cargo.lock | 41 +++- consensus/Cargo.toml | 29 +-- consensus/src/dag/anchor_stage.rs | 1 + consensus/src/dag/dag_location.rs | 9 +- consensus/src/dag/dag_round.rs | 34 +-- consensus/src/dag/producer.rs | 11 +- consensus/src/dag/verifier.rs | 39 +++- consensus/src/engine/engine.rs | 101 +++++---- consensus/src/engine/mempool_config.rs | 10 +- .../src/intercom/adapter/broadcast_filter.rs | 38 ++-- consensus/src/intercom/adapter/broadcaster.rs | 208 +++++++++++++----- consensus/src/intercom/adapter/dto.rs | 2 + consensus/src/intercom/adapter/signer.rs | 80 +++++-- consensus/src/intercom/core/dispatcher.rs | 10 - consensus/src/intercom/core/responder.rs | 5 +- consensus/src/intercom/dto.rs | 2 +- .../intercom/peer_schedule/peer_schedule.rs | 22 +- .../peer_schedule/peer_schedule_updater.rs | 14 +- consensus/src/models/dag_point.rs | 4 +- consensus/src/models/node_count.rs | 44 +++- consensus/src/models/point.rs | 96 +++++--- consensus/src/test_utils.rs | 132 +++++------ network/src/types/peer_id.rs | 4 +- 23 files changed, 612 insertions(+), 324 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 768c9e298..5fda94ca4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -707,6 +707,12 @@ version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c007b1ae3abe1cb6f85a16305acd418b7ca6343b953633fee2b76d8f108b830f" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "futures-core" version = "0.3.30" @@ -814,6 +820,16 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "indexmap" +version = "2.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itertools" version = "0.12.1" @@ -1117,10 +1133,13 @@ version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ + "backtrace", "cfg-if", "libc", + "petgraph", "redox_syscall", "smallvec", + "thread-id", "windows-targets 0.48.5", ] @@ -1185,6 +1204,16 @@ dependencies = [ "sha2", ] +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -1879,6 +1908,16 @@ dependencies = [ "syn 2.0.58", ] +[[package]] +name = "thread-id" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -2201,7 +2240,6 @@ dependencies = [ "anyhow", "bincode", "bytes", - "castaway", "dashmap", "everscale-crypto", "futures-util", @@ -2210,7 +2248,6 @@ dependencies = [ "rand_pcg", "serde", "sha2", - "thiserror", "tokio", "tracing", "tracing-subscriber", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index bac9fd790..11b77c57e 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -9,36 +9,31 @@ repository.workspace = true license.workspace = true [dependencies] -# crates.io deps +ahash = { workspace = true } anyhow = { workspace = true } bincode = { workspace = true } bytes = { workspace = true, features = ["serde"] } -castaway = "0.2" -dashmap = "5.4" -everscale-crypto = "0.2" +dashmap = { workspace = true } +everscale-crypto = { workspace = true } futures-util = { workspace = true } -parking_lot = "0.12" -rand = { version = "0.8" } -rand_pcg = { version = "0.3" } +parking_lot = { workspace = true } +rand = { workspace = true } serde = { workspace = true, features = ["derive"] } -sha2 = "0.10" -tokio = { version = "1", features = ["rt"] } +sha2 = { workspace = true } +tokio = { workspace = true, default-features = false } tracing = { workspace = true } weedb = { workspace = true } # local deps +rand_pcg = { version = "0.3" } tycho-network = { workspace = true } tycho-storage = { workspace = true } -tycho-util = { workspace = true } - -# temp -#hex = "0.4.3" -thiserror = "1.0" -ahash = "0.8" +tycho-util = { workspace = true, features = ["test"] } [dev-dependencies] -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +parking_lot = { workspace = true, features = ["deadlock_detection"] } +tokio = { workspace = true, default-features = false, features = ["rt-multi-thread", "macros"] } +tracing-subscriber = { workspace = true, features = ["env-filter"] } [lints] workspace = true diff --git a/consensus/src/dag/anchor_stage.rs b/consensus/src/dag/anchor_stage.rs index 8222b4067..94662876d 100644 --- a/consensus/src/dag/anchor_stage.rs +++ b/consensus/src/dag/anchor_stage.rs @@ -5,6 +5,7 @@ use tycho_network::PeerId; use crate::intercom::PeerSchedule; use crate::models::Round; +#[derive(Debug)] pub enum AnchorStage { Candidate(PeerId), // TODO nothing special, remove Proof(PeerId), diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index 2d6212bd1..9c11cfa95 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -8,7 +8,7 @@ use futures_util::FutureExt; use tycho_util::futures::{JoinTask, Shared}; -use crate::models::{DagPoint, Digest, Round, Signature, UnixTime, ValidPoint}; +use crate::models::{DagPoint, Digest, PointId, Round, Signature, UnixTime, ValidPoint}; /// If DAG location exists, it must have non-empty `versions` map; /// @@ -151,14 +151,19 @@ impl InclusionState { None } } + /// only for logging + pub fn init_id(&self) -> Option { + self.0.get().map(|signable| signable.first_completed.id()) + } } - +#[derive(Debug)] pub struct Signable { first_completed: DagPoint, // signature cannot be rolled back, the point must be included as next point dependency signed: OnceLock>, } +#[derive(Debug)] pub struct Signed { pub at: Round, pub with: Signature, diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 9e268e872..334e7d05b 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -68,19 +68,17 @@ impl DagRound { })) } - pub async fn genesis(genesis: &Arc, peer_schedule: &PeerSchedule) -> Self { + pub fn genesis(genesis: &Arc, peer_schedule: &PeerSchedule) -> Self { let locations = FastDashMap::with_capacity_and_hasher(1, RandomState::new()); let round = genesis.body.location.round; - let this = Self(Arc::new(DagRoundInner { + Self(Arc::new(DagRoundInner { round, node_count: NodeCount::GENESIS, key_pair: None, anchor_stage: AnchorStage::of(round, peer_schedule), locations, prev: WeakDagRound::BOTTOM, - })); - this.insert_exact_validate(genesis, peer_schedule).await; - this + })) } pub fn round(&self) -> &'_ Round { @@ -172,7 +170,7 @@ impl DagRound { &self, point: &Arc, peer_schedule: &PeerSchedule, - ) -> InclusionState { + ) -> Option { if !Verifier::verify(point, peer_schedule).is_ok() { panic!("Coding error: malformed point") } @@ -180,37 +178,45 @@ impl DagRound { if point.valid().is_none() { panic!("Coding error: not a valid point") } - let state = self.insert_exact(&point); + let Some(state) = self.insert_exact(&point) else { + return None; + }; + let state = state.await; if let Some(signable) = state.signable() { signable.sign( self.round(), - peer_schedule.local_keys(self.round()).as_deref(), + peer_schedule.local_keys(&self.round().next()).as_deref(), MempoolConfig::sign_time_range(), ); } if state.signed_point(self.round()).is_none() { panic!("Coding or configuration error: valid point cannot be signed; time issue?") } - state + Some(state) } - pub fn insert_invalid(&self, dag_point: &DagPoint) -> Option { + pub fn insert_invalid( + &self, + dag_point: &DagPoint, + ) -> Option> { if dag_point.valid().is_some() { panic!("Coding error: failed to insert valid point as invalid") } self.scan(&dag_point.location().round) .map(|linked| linked.insert_exact(dag_point)) + .flatten() } - fn insert_exact(&self, dag_point: &DagPoint) -> InclusionState { + fn insert_exact(&self, dag_point: &DagPoint) -> Option> { if &dag_point.location().round != self.round() { panic!("Coding error: dag round mismatches point round on insert") } self.edit(&dag_point.location().author, |loc| { - _ = loc.add_validate(dag_point.digest(), || { + let state = loc.state().clone(); + loc.add_validate(dag_point.digest(), || { futures_util::future::ready(dag_point.clone()) - }); - loc.state().clone() + }) + .map(|first| first.clone().map(|_| state).boxed()) }) } diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index 5bbca736c..53d63f124 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -114,7 +114,6 @@ impl Producer { Link::ToSelf } _ => { - // TODO simplify to single iterator scan let point = includes .iter() .max_by_key(|point| { @@ -125,7 +124,10 @@ impl Producer { } }) .expect("non-empty list of includes for own point"); - if point.body.location.round == new_round.round().prev() { + if point.body.location.round == new_round.round().prev() + && ((is_for_trigger && point.body.anchor_trigger == Link::ToSelf) + || (!is_for_trigger && point.body.anchor_proof == Link::ToSelf)) + { Link::Direct(Through::Includes(point.body.location.author.clone())) } else { let to = if is_for_trigger { @@ -166,7 +168,10 @@ impl Producer { else { return; }; - if point.body.location.round == finished_round.prev() { + if point.body.location.round == finished_round.prev() + && ((is_for_trigger && point.body.anchor_trigger == Link::ToSelf) + || (!is_for_trigger && point.body.anchor_proof == Link::ToSelf)) + { *link = Link::Direct(Through::Witness(point.body.location.author)) } else { let to = if is_for_trigger { diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 7e407ea2a..14295b810 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -7,6 +7,7 @@ use tycho_network::PeerId; use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; +use crate::engine::MempoolConfig; use crate::intercom::{Downloader, PeerSchedule}; use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, ValidPoint}; @@ -75,7 +76,9 @@ impl Verifier { match &dag_round.anchor_stage() { // no one may link to self None | Some(AnchorStage::Candidate(_)) => { - point.body.anchor_proof != Link::ToSelf && point.body.anchor_trigger != Link::ToSelf + (point.body.anchor_proof != Link::ToSelf + && point.body.anchor_trigger != Link::ToSelf) + || point.body.location.round == MempoolConfig::GENESIS_ROUND } // leader must link to own point while others must not Some(AnchorStage::Proof(leader_id)) => { @@ -277,6 +280,32 @@ impl Verifier { /// blame author and every dependent point's author fn is_list_of_signers_ok(point /* @ r+0 */: &Point, peer_schedule: &PeerSchedule) -> bool { + if point.body.location.round == MempoolConfig::GENESIS_ROUND { + return true; // all maps are empty for a well-formed genesis + } + let [ + witness_peers/* @ r-2 */ , + includes_peers /* @ r-1 */ , + proof_peers /* @ r+0 */ + ] = peer_schedule.peers_for_array([ + point.body.location.round.prev().prev(), + point.body.location.round.prev(), + point.body.location.round.clone(), + ]); + for (peer_id, _) in point.body.witness.iter() { + if !witness_peers.contains_key(peer_id) { + return false; + } + } + let node_count = NodeCount::new(includes_peers.len()); + if point.body.includes.len() < node_count.majority() { + return false; + }; + for (peer_id, _) in point.body.includes.iter() { + if !includes_peers.contains_key(peer_id) { + return false; + } + } let Some(proven /* @ r-1 */) = &point.body.proof else { return true; }; @@ -287,17 +316,15 @@ impl Verifier { // its point @ r-1 won't become a vertex because its proof point @ r+0 cannot be valid. // That means: payloads from the last round of validation epoch are never collated. - let proof_round_peers /* @ r+0 */ = peer_schedule.peers_for(&point.body.location.round); // reject point in case this node is not ready to accept: the point is from far future - let Ok(node_count) = NodeCount::try_from(proof_round_peers.len()) else { + let Ok(node_count) = NodeCount::try_from(proof_peers.len()) else { return false; }; if proven.evidence.len() < node_count.majority_of_others() { return false; } - - for (peer, _) in proven.evidence.iter() { - if !proof_round_peers.contains_key(peer) { + for (peer_id, _) in proven.evidence.iter() { + if !proof_peers.contains_key(peer_id) { return false; } } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 92fad8dbe..0f1d773d0 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -5,32 +5,33 @@ use tokio::sync::{mpsc, Notify}; use tycho_network::{DhtClient, OverlayService, PeerId}; -use crate::dag::{DagRound, Producer}; +use crate::dag::{Dag, DagRound, Producer}; use crate::intercom::{ BroadcastFilter, Broadcaster, Dispatcher, PeerSchedule, PeerScheduleUpdater, Responder, Signer, }; use crate::models::{Point, PrevPoint}; pub struct Engine { - // dag: Arc>, + dag: Dag, + local_id: Arc, peer_schedule: Arc, dispatcher: Dispatcher, - finished_dag_round: DagRound, signer: Signer, prev_point: Option, cur_point: Option>, + current_dag_round: DagRound, } impl Engine { - pub async fn add_next_peers(&self, next_peers: Vec) {} - pub async fn new( secret_key: &SecretKey, dht_client: &DhtClient, overlay_service: &OverlayService, peers: &Vec, ) -> Self { - let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(KeyPair::from(secret_key)))); + let key_pair = KeyPair::from(secret_key); + let local_id = Arc::new(format!("{:.4?}", PeerId::from(key_pair.public_key))); + let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(key_pair))); let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); @@ -47,12 +48,19 @@ impl Engine { let genesis = Arc::new(crate::test_utils::genesis()); // finished epoch - peer_schedule.set_next_peers(&vec![genesis.body.location.author]); + peer_schedule.set_next_peers(&vec![(genesis.body.location.author, false)]); peer_schedule.set_next_start(genesis.body.location.round); peer_schedule.rotate(); // current epoch peer_schedule.set_next_start(genesis.body.location.round.next()); - peer_schedule.set_next_peers(peers); + peer_schedule.set_next_peers( + &dispatcher + .overlay + .read_entries() + .iter() + .map(|a| (a.peer_id, a.resolver_handle.is_resolved())) + .collect(), + ); peer_schedule.rotate(); // start updater only after peers are populated into schedule PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); @@ -66,32 +74,37 @@ impl Engine { // * or search through last round to find the latest trigger // * * can U do so without scan of a round ??? - let finished_dag_round = DagRound::genesis(&genesis, &peer_schedule).await; - let signer = Signer::new(bcast_rx, sig_responses, finished_dag_round.round()); + let mut dag = Dag::new(); + let current_dag_round = dag.get_or_insert(DagRound::genesis(&genesis, &peer_schedule)); + + let genesis_state = current_dag_round + .insert_exact_validate(&genesis, &peer_schedule) + .await; + let signer = Signer::new( + local_id.clone(), + bcast_rx, + sig_responses, + genesis_state.into_iter(), + current_dag_round.round().clone(), + ); Self { - // dag: Arc::new(Mutex::new(dag)), + dag, + local_id, peer_schedule, dispatcher, - finished_dag_round, signer, prev_point: None, cur_point: None, + current_dag_round, } } pub async fn run(mut self) { loop { - // FIXME must there be any next round as in Signer? check broadcast filter - let current_round = self.finished_dag_round.next(self.peer_schedule.as_ref()); - - self.cur_point = Producer::new_point( - &self.finished_dag_round, - ¤t_round, - self.prev_point.as_ref(), - vec![], - ) - .await; + let next_dag_round = self + .dag + .get_or_insert(self.current_dag_round.next(self.peer_schedule.as_ref())); let bcaster_ready = Arc::new(Notify::new()); // let this channel unbounded - there won't be many items, but every of them is essential @@ -107,18 +120,20 @@ impl Engine { // in order to prevent unlimited DAG growth // sync if signer detected a gap exceeding dag depth // join - if let Some(own_point) = &self.cur_point { - let own_state = current_round + if let Some(own_point) = self.cur_point { + let own_state = self + .current_dag_round .insert_exact_validate(&own_point, &self.peer_schedule) .await; let signer_run = tokio::spawn(self.signer.run( - current_round.clone(), + next_dag_round.clone(), Some(own_point.clone()), signer_signal_tx, bcaster_ready.clone(), )); let bcaster_run = tokio::spawn( Broadcaster::new( + &self.local_id, &own_point, &self.dispatcher, &self.peer_schedule, @@ -130,12 +145,19 @@ impl Engine { let joined = tokio::join!(signer_run, bcaster_run); match joined { (Ok(signer_upd), Ok(evidence_or_reject)) => { - self.signer = signer_upd; - self.finished_dag_round = current_round; // FIXME must fill gaps with empty rounds self.prev_point = evidence_or_reject.ok().map(|evidence| PrevPoint { digest: own_point.digest.clone(), - evidence, + evidence: evidence.into_iter().collect(), }); + self.cur_point = Producer::new_point( + &self.current_dag_round, + &next_dag_round, + self.prev_point.as_ref(), + vec![], + ) + .await; + self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds + self.signer = signer_upd; } (Err(se), Err(be)) => { panic!( @@ -153,7 +175,7 @@ impl Engine { signer_signal_rx.close(); bcaster_ready.notify_one(); let signer_run = tokio::spawn(self.signer.run( - current_round.clone(), + next_dag_round.clone(), None, signer_signal_tx, bcaster_ready, @@ -161,9 +183,16 @@ impl Engine { .await; match signer_run { Ok(signer_upd) => { - self.finished_dag_round = current_round; // FIXME must fill gaps with empty rounds - self.signer = signer_upd; self.prev_point = None; + self.cur_point = Producer::new_point( + &self.current_dag_round, + &next_dag_round, + self.prev_point.as_ref(), + vec![], + ) + .await; + self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds + self.signer = signer_upd; } Err(se) => panic!("Signer panicked: {se:?}"), } @@ -172,16 +201,6 @@ impl Engine { } } -pub trait EngineTestExt { - fn dispatcher(&self) -> &'_ Dispatcher; -} - -impl EngineTestExt for Engine { - fn dispatcher(&self) -> &'_ Dispatcher { - &self.dispatcher - } -} - // task 0: continue from where we stopped // * load last state into DAG: some (un)finished round // * create new round and point, if last round is finished diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs index 05bb8f2fc..a5425daca 100644 --- a/consensus/src/engine/mempool_config.rs +++ b/consensus/src/engine/mempool_config.rs @@ -1,7 +1,7 @@ use std::ops::RangeInclusive; use std::time::Duration; -use crate::models::UnixTime; +use crate::models::{Round, UnixTime}; pub struct MempoolConfig; @@ -12,7 +12,7 @@ impl MempoolConfig { /// how long a point from past remains eligible for signature and inclusion; /// time in point body is compared with wall time; /// if consensus makes no progress for such long, it will need a manual restart from a new genesis - const MAX_OUTDATED: UnixTime = UnixTime::from_millis(24 * 60 * 60 * 1000); + const MAX_OUTDATED: UnixTime = UnixTime::from_millis(365 * 24 * 60 * 60 * 1000); /// see [CLOCK_SKEW](Self::CLOCK_SKEW) and [MAX_OUTDATED](Self::MAX_OUTDATED) pub fn sign_time_range() -> RangeInclusive { @@ -23,7 +23,9 @@ impl MempoolConfig { /// we try to gather as many points and signatures as we can within some time frame; /// this is a tradeoff between breaking on exactly 2F+1 elements /// (dependencies and/or signatures), and waiting for slow nodes - pub const RETRY_INTERVAL: Duration = Duration::from_millis(100); + pub const RETRY_INTERVAL: Duration = Duration::from_millis(1000); - const DAG_DEPTH: usize = 20; + pub const DAG_DEPTH: usize = 20; + + pub const GENESIS_ROUND: Round = Round(1); } diff --git a/consensus/src/intercom/adapter/broadcast_filter.rs b/consensus/src/intercom/adapter/broadcast_filter.rs index be6d874c6..fd3d1b988 100644 --- a/consensus/src/intercom/adapter/broadcast_filter.rs +++ b/consensus/src/intercom/adapter/broadcast_filter.rs @@ -59,7 +59,7 @@ impl BroadcastFilter { } Ok(_) => {} Err(err @ RecvError::Lagged(_)) => { - tracing::warn!("peer schedule updates {err}"); + tracing::error!("peer schedule updates {err}"); } Err(err @ RecvError::Closed) => { panic!("peer schedule updates {err}"); @@ -81,20 +81,30 @@ impl BroadcastFilter { // as they may be used by some point as a dependency // * newer broadcasts are enqueued until 1/3+1 points per round collected let dag_round = Round(self.current_dag_round.load(Ordering::Acquire)); + tracing::info!( + "filter @ {dag_round:?} got point @ {:?}", + point.body.location.round + ); // for any node @ r+0, its DAG always contains [r-DAG_DEPTH-N; r+1] rounds, where N>=0 let PointId { location: Location { round, author }, digest, } = point.id(); // conceal raw point, do not use it - let point = Verifier::verify(&point, &self.peer_schedule) - .map_or_else(ConsensusEvent::Invalid, |_| ConsensusEvent::Verified(point)); + let point = match Verifier::verify(&point, &self.peer_schedule) { + Ok(()) => ConsensusEvent::Verified(point), + Err(dag_point) => { + tracing::error!("filter @ {dag_round:?}: invalid, {:.4?}", point); + ConsensusEvent::Invalid(dag_point) + } + }; if round <= dag_round.next() { let response = if matches!(point, ConsensusEvent::Invalid(_)) { BroadcastResponse::Rejected } else if round >= dag_round.prev() { BroadcastResponse::Accepted // we will sign, maybe } else { + tracing::error!("Rejected 1"); // too old, current node will not sign, but some point may include it BroadcastResponse::Rejected }; @@ -122,6 +132,7 @@ impl BroadcastFilter { // node must not send broadcasts out-of order; // TODO we should ban a peer that broadcasts its rounds out of order, // though we cannot prove this decision for other nodes + tracing::error!("Rejected 2"); return BroadcastResponse::Rejected; }; if let Some(to_delete) = outdated_peer_round { @@ -133,23 +144,22 @@ impl BroadcastFilter { authors.remove(&author); }); } - - let mut same_round = match self.by_round.entry(round).or_try_insert_with(|| { + match self.by_round.entry(round).or_try_insert_with(|| { // how many nodes should send broadcasts NodeCount::try_from(self.peer_schedule.peers_for(&round).len()) .map(|node_count| (node_count, Default::default())) }) { - Ok(entry) => entry, // will not accept broadcasts from not initialized validator set Err(_) => return BroadcastResponse::TryLater, - }; - - let (node_count, ref mut same_round) = same_round.value_mut(); - same_round.entry(author).or_default().insert(digest, point); - if same_round.len() < node_count.reliable_minority() { - return BroadcastResponse::TryLater; // round is not yet determined - }; - _ = same_round; + Ok(mut entry) => { + let (node_count, ref mut same_round) = entry.value_mut(); + same_round.entry(author).or_default().insert(digest, point); + if same_round.len() < node_count.reliable_minority() { + tracing::info!("round is not yet determined"); + return BroadcastResponse::TryLater; // round is not yet determined + }; + } + } self.advance_round(&round).await; BroadcastResponse::Accepted diff --git a/consensus/src/intercom/adapter/broadcaster.rs b/consensus/src/intercom/adapter/broadcaster.rs index 6e8151f31..0fff3f857 100644 --- a/consensus/src/intercom/adapter/broadcaster.rs +++ b/consensus/src/intercom/adapter/broadcaster.rs @@ -1,6 +1,5 @@ use std::mem; use std::sync::Arc; -use std::time::Duration; use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; @@ -14,13 +13,15 @@ use tycho_util::{FastHashMap, FastHashSet}; use crate::intercom::adapter::dto::SignerSignal; use crate::intercom::dto::{BroadcastResponse, PeerState, SignatureResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; -use crate::models::{NodeCount, Point, Signature}; +use crate::models::{NodeCount, Point, Round, Signature}; type BcastResult = anyhow::Result; type SigResult = anyhow::Result; -const LOOP_DURATION: Duration = Duration::from_millis(100); pub struct Broadcaster { + local_id: Arc, + current_round: Round, + point_body: Vec, dispatcher: Dispatcher, bcaster_ready: Arc, @@ -46,6 +47,7 @@ pub struct Broadcaster { impl Broadcaster { pub fn new( + local_id: &Arc, point: &Point, dispatcher: &Dispatcher, peer_schedule: &PeerSchedule, @@ -59,11 +61,14 @@ impl Broadcaster { .iter() .map(|(peer_id, _)| *peer_id) .collect::>(); - let signers_count = NodeCount::try_from(signers.len()).unwrap(); + let signers_count = NodeCount::new(signers.len()); let bcast_peers = peer_schedule.all_resolved(); + tracing::info!("bcast_peers {}", bcast_peers.len()); let bcast_request = Dispatcher::broadcast_request(&point); let sig_request = Dispatcher::signature_request(&point.body.location.round); Self { + local_id: local_id.clone(), + current_round: point.body.location.round, point_body, dispatcher: dispatcher.clone(), bcaster_ready, @@ -104,8 +109,8 @@ impl Broadcaster { // * enqueued for any of two requests above // * rejected to sign our point (incl. rejection of the point itself and incorrect sig) // * successfully signed our point and dequeued - for peer_id in mem::take(&mut self.bcast_peers).iter() { - self.broadcast(peer_id) + for peer_id in mem::take(&mut self.bcast_peers) { + self.broadcast(&peer_id) } loop { tokio::select! { @@ -119,21 +124,36 @@ impl Broadcaster { self.match_peer_updates(update) } Some(signer_signal) = self.signer_signal.recv() => { - match signer_signal { - SignerSignal::Ok => self.is_signer_ready_ok = true, - SignerSignal::Err => { - // even if we can return successful result, it will be discarded - return Err(()) - }, - SignerSignal::Retry => { - match self.check_if_ready() { - Some(result) => break result.map(|_| self.signatures), - None => self.retry(), - } - }, + if let Some(result) = self.match_signer_signal(signer_signal) { + break result.map(|_| self.signatures) } } + else => { + panic!("bcaster unhandled"); + } + } + } + } + fn match_signer_signal(&mut self, signer_signal: SignerSignal) -> Option> { + tracing::info!( + "{} @ {:?} bcaster <= signer : {signer_signal:?}; sigs {} of {}; rejects {} of {}", + self.local_id, + self.current_round, + self.signatures.len(), + self.signers_count.majority_of_others(), + self.rejections.len(), + self.signers_count.reliable_minority(), + ); + match signer_signal { + SignerSignal::Ok => { + self.is_signer_ready_ok = true; + None } + SignerSignal::Err => { + // even if we can return successful result, it will be discarded + Some(Err(())) + } + SignerSignal::Retry => self.check_if_ready(), } } fn check_if_ready(&mut self) -> Option> { @@ -148,23 +168,30 @@ impl Broadcaster { return Some(Ok(())); } } - None - } - fn retry(&mut self) { - for peer_id in mem::take(&mut self.sig_peers).iter() { - self.request_signature(peer_id); + for peer_id in mem::take(&mut self.sig_peers) { + self.request_signature(&peer_id); } - for peer_id in mem::take(&mut self.bcast_peers).iter() { - self.broadcast(peer_id); + for peer_id in mem::take(&mut self.bcast_peers) { + self.broadcast(&peer_id); } + None } fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { match result { - Ok((_peer_id, PeerState::Added)) => { /* ignore */ } - Ok((peer_id, PeerState::Resolved)) => self.broadcast(&peer_id), - Ok((peer_id, PeerState::Removed)) => _ = self.removed_peers.insert(peer_id), + Ok(update) => { + tracing::info!( + "{} @ {:?} bcaster peer update: {update:?}", + self.local_id, + self.current_round + ); + match update { + (_peer_id, PeerState::Added) => { /* ignore */ } + (peer_id, PeerState::Resolved) => self.broadcast(&peer_id), + (peer_id, PeerState::Removed) => _ = self.removed_peers.insert(peer_id), + } + } Err(err @ RecvError::Lagged(_)) => { - tracing::warn!("Broadcaster peer updates {err}") + tracing::error!("Broadcaster peer updates {err}") } Err(err @ RecvError::Closed) => { panic!("Broadcaster peer updates {err}") @@ -173,50 +200,87 @@ impl Broadcaster { } fn match_broadcast_result(&mut self, peer_id: PeerId, result: BcastResult) { match result { - Ok(BroadcastResponse::Accepted) => self.request_signature(&peer_id), - Ok(BroadcastResponse::TryLater) => _ = self.sig_peers.insert(peer_id), - Ok(BroadcastResponse::Rejected) => { - if self.signers.contains(&peer_id) { - self.rejections.insert(peer_id); - } - } Err(error) => { // TODO distinguish timeouts from models incompatibility etc - // self.bcast_peers.push(peer_id); // let it retry self.sig_peers.insert(peer_id); // lighter weight retry loop - tracing::warn!("on broadcasting own point: {error}"); + tracing::error!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} broadcast error : {error}", + self.local_id, + self.current_round + ); + } + Ok(response) => { + if response == BroadcastResponse::Rejected { + tracing::warn!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + self.local_id, + self.current_round + ); + } else { + tracing::info!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + self.local_id, + self.current_round + ); + } + match response { + BroadcastResponse::Accepted => self.request_signature(&peer_id), + BroadcastResponse::TryLater => _ = self.sig_peers.insert(peer_id), + BroadcastResponse::Rejected => { + if self.signers.contains(&peer_id) { + self.rejections.insert(peer_id); + } + } + } } } } fn match_signature_result(&mut self, peer_id: PeerId, result: SigResult) { match result { - Ok(SignatureResponse::Signature(signature)) => { - if self.signers.contains(&peer_id) { - if self.is_signature_ok(&peer_id, &signature) { - self.signatures.insert(peer_id, signature); - } else { - // any invalid signature lowers our chances - // to successfully finish current round - self.rejections.insert(peer_id); - } - } - } - Ok(SignatureResponse::NoPoint) => { - self.broadcast(&peer_id); - } - Ok(SignatureResponse::TryLater) => { - self.sig_peers.insert(peer_id); - } - Ok(SignatureResponse::Rejected) => { - if self.signers.contains(&peer_id) { - self.rejections.insert(peer_id); - } - } Err(error) => { // TODO distinguish timeouts from models incompatibility etc self.sig_peers.insert(peer_id); // let it retry - tracing::warn!("on requesting signatures for own point: {error}"); + tracing::error!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} signature request error : {error}", + self.local_id, + self.current_round + ); + } + Ok(response) => { + if response == SignatureResponse::Rejected { + tracing::warn!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + self.local_id, + self.current_round + ); + } else { + tracing::info!( + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + self.local_id, + self.current_round + ); + }; + match response { + SignatureResponse::Signature(signature) => { + if self.signers.contains(&peer_id) { + if self.is_signature_ok(&peer_id, &signature) { + self.signatures.insert(peer_id, signature); + } else { + // any invalid signature lowers our chances + // to successfully finish current round + self.rejections.insert(peer_id); + } + } + } + SignatureResponse::NoPoint => self.broadcast(&peer_id), + SignatureResponse::TryLater => _ = self.sig_peers.insert(peer_id), + SignatureResponse::Rejected => { + if self.signers.contains(&peer_id) { + self.rejections.insert(peer_id); + } + } + } } } } @@ -224,12 +288,34 @@ impl Broadcaster { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.bcast_futs .push(self.dispatcher.request(&peer_id, &self.bcast_request)); + tracing::info!( + "{} @ {:?} bcaster => signer {peer_id:.4?}: broadcast", + self.local_id, + self.current_round + ); + } else { + tracing::warn!( + "{} @ {:?} bcaster => signer {peer_id:.4?}: broadcast impossible", + self.local_id, + self.current_round + ); } } fn request_signature(&mut self, peer_id: &PeerId) { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.sig_futs .push(self.dispatcher.request(&peer_id, &self.sig_request)); + tracing::info!( + "{} @ {:?} bcaster => signer {peer_id:.4?}: signature request", + self.local_id, + self.current_round + ); + } else { + tracing::warn!( + "{} @ {:?} bcaster => signer {peer_id:.4?}: signature request impossible", + self.local_id, + self.current_round + ); } } fn is_signature_ok(&self, peer_id: &PeerId, signature: &Signature) -> bool { diff --git a/consensus/src/intercom/adapter/dto.rs b/consensus/src/intercom/adapter/dto.rs index 78a2c2ebe..51ad84c42 100644 --- a/consensus/src/intercom/adapter/dto.rs +++ b/consensus/src/intercom/adapter/dto.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::models::{DagPoint, Point, Round}; +#[derive(Debug)] pub enum ConsensusEvent { // allows not to peek but poll the channel when local dag is not ready yet Forward(Round), @@ -17,6 +18,7 @@ pub enum ConsensusEvent { /// * broadcaster may finish Err, signalling () to signer /// /// => signer may run without broadcaster, as if broadcaster signalled () +#[derive(Debug)] pub enum SignerSignal { Ok, Err, diff --git a/consensus/src/intercom/adapter/signer.rs b/consensus/src/intercom/adapter/signer.rs index 53f233490..140377f38 100644 --- a/consensus/src/intercom/adapter/signer.rs +++ b/consensus/src/intercom/adapter/signer.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; -use futures_util::StreamExt; +use futures_util::{FutureExt, StreamExt}; use tokio::sync::{mpsc, oneshot, Notify}; use tycho_network::PeerId; @@ -15,6 +15,7 @@ use crate::intercom::dto::SignatureResponse; use crate::models::{Point, Round}; pub struct Signer { + local_id: Arc, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, next_round: Round, @@ -23,15 +24,20 @@ pub struct Signer { impl Signer { pub fn new( + local_id: Arc, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, - last_round: &Round, + next_includes: impl Iterator, + next_round: Round, ) -> Self { Self { + local_id, from_bcast_filter, signature_requests, - next_round: last_round.next(), - next_includes: FuturesUnordered::new(), + next_round, + next_includes: FuturesUnordered::from_iter( + next_includes.map(|a| futures_util::future::ready(a).boxed()), + ), } } @@ -52,8 +58,9 @@ impl Signer { }; self.next_round = next_dag_round.round().clone(); let task = SignerTask { - next_dag_round, + local_id: self.local_id.clone(), current_round: current_dag_round.clone(), + next_dag_round, includes, includes_ready: has_own_point.into_iter().count(), next_includes: FuturesUnordered::new(), @@ -80,6 +87,9 @@ impl Signer { type SignatureRequest = (Round, PeerId, oneshot::Sender); struct SignerTask { // for node running @ r+0: + local_id: Arc, + current_round: DagRound, // = r+0 + next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 // @ r+0, will become includes in point @ r+1 // needed in order to not include same point twice - as an include and as a witness; @@ -90,9 +100,6 @@ struct SignerTask { /// anyway should rewrite signing mechanics - look for comments inside [DagRound::add_exact] next_includes: FuturesUnordered>, - next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 - current_round: DagRound, // = r+0 - signer_signal: mpsc::UnboundedSender, bcaster_ready: Arc, is_bcaster_ready: bool, @@ -111,8 +118,14 @@ impl SignerTask { loop { tokio::select! { request = signature_requests.recv() => match request { - Some((round, peer_id, callback)) => - _ = callback.send(self.signature_response(&round, &peer_id)), + Some((round, peer_id, callback)) => { + let response = self.signature_response(&round, &peer_id); + tracing::info!( + "{} @ {:?} signer => bcaster {peer_id:.4?} @ {round:?} : {response:.4?}", + self.local_id, self.current_round.round() + ); + _ = callback.send(response); + } None => panic!("channel with signature requests closed") }, filtered = from_bcast_filter.recv() => match filtered { @@ -126,11 +139,21 @@ impl SignerTask { }, _ = self.bcaster_ready.notified() => { self.is_bcaster_ready = true; + tracing::info!( + "{} @ {:.4?} signer <= bcaster ready : includes {} of {}", + self.local_id, self.current_round.round(), + self.includes_ready, self.current_round.node_count().majority() + ); if self.includes_ready >= self.current_round.node_count().majority() { return Ok(self.next_includes) } }, _ = retry_interval.tick() => { + tracing::info!( + "{} @ {:.4?} signer retry : includes {} of {}", + self.local_id, self.current_round.round(), + self.includes_ready, self.current_round.node_count().majority() + ); // point @ r+1 has to include 2F+1 broadcasts @ r+0 (we are @ r+0) if self.includes_ready >= self.current_round.node_count().majority() { _ = self.signer_signal.send(SignerSignal::Ok); @@ -148,16 +171,33 @@ impl SignerTask { // do not return location from DagLocation::add_validate(point) Some(state) = self.includes.next() => { // slow but at least may work - if let Some(signable) = state.signable() { - if signable.sign( + let signed = if let Some(signable) = state.signable() { + signable.sign( self.current_round.round(), self.next_dag_round.key_pair(), MempoolConfig::sign_time_range(), - ) { - self.includes_ready += 1; - } + ) + } else { + state.signed().is_some() // FIXME this is very fragile duct tape + }; + if signed { + tracing::info!( + "{} @ {:.4?} includes {} +1 : {:.4?} {:.4?}", + self.local_id, self.current_round.round(), self.includes_ready, + state.init_id(), state.signed() + ); + self.includes_ready += 1; + } else { + tracing::warn!( + "{} @ {:.4?} includes {} : {:.4?} {:.4?}", + self.local_id, self.current_round.round(), self.includes_ready, + state.init_id(), state.signed() + ); } }, + else => { + panic!("signer unhandled"); + } } } } @@ -197,13 +237,19 @@ impl SignerTask { } } } - match state.signed() { + let res = match state.signed() { Some(Ok(signed)) => SignatureResponse::Signature(signed.with.clone()), Some(Err(())) => SignatureResponse::Rejected, None => SignatureResponse::TryLater, - } + }; + res } fn match_filtered(&self, filtered: &ConsensusEvent) -> Result<(), Round> { + tracing::info!( + "{} @ {:?} signer <= bcast filter : {filtered:.4?}", + self.local_id, + self.current_round.round() + ); match filtered { ConsensusEvent::Forward(consensus_round) => { match consensus_round.cmp(self.next_dag_round.round()) { diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index 0f6e4a91c..42dbb3cb8 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -82,16 +82,6 @@ impl Dispatcher { } } -pub trait DispatcherTestExt { - fn network(&self) -> &'_ Network; -} - -impl DispatcherTestExt for Dispatcher { - fn network(&self) -> &'_ Network { - &self.network - } -} - /* FIXME #[cfg(test)] mod tests { diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index e02ffaa8b..d0a3fd5b2 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -85,7 +85,7 @@ impl ResponderInner { } }; - Some(Response { + let res = Some(Response { version: Version::default(), body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { Ok(data) => data, @@ -95,6 +95,7 @@ impl ResponderInner { .expect("must not fail") } }), - }) + }); + res } } diff --git a/consensus/src/intercom/dto.rs b/consensus/src/intercom/dto.rs index d1439073e..c17040dd9 100644 --- a/consensus/src/intercom/dto.rs +++ b/consensus/src/intercom/dto.rs @@ -5,7 +5,7 @@ use crate::models::{Point, Signature}; #[derive(Serialize, Deserialize, Debug)] pub struct PointByIdResponse(pub Option); -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Debug)] pub enum BroadcastResponse { /// peer will verify and maybe sign the point Accepted, diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 71933eaaf..86f6e6bdc 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -53,6 +53,7 @@ impl PeerSchedule { } pub fn updates(&self) -> broadcast::Receiver<(PeerId, PeerState)> { + tracing::info!("subscribing to peer updates"); self.updates.subscribe() } @@ -124,7 +125,7 @@ impl PeerSchedule { pub fn all_resolved(&self) -> FastHashSet { let inner = self.inner.lock(); - inner.all_resolved() + inner.all_resolved(self.local_id()) } pub fn peers_for(&self, round: &Round) -> Arc> { @@ -190,7 +191,7 @@ impl PeerSchedule { _ = inner.next_epoch_start.replace(round); } - pub fn set_next_peers(&self, peers: &Vec) { + pub fn set_next_peers(&self, peers: &Vec<(PeerId, bool)>) { let mut all_peers = BTreeMap::new(); let mut inner = self.inner.lock(); for i in 0..inner.peers_resolved.len() { @@ -198,7 +199,7 @@ impl PeerSchedule { } let old = peers .iter() - .filter_map(|peer_id| { + .filter_map(|(peer_id, _)| { all_peers .get(peer_id) .map(|&state| (peer_id.clone(), state.clone())) @@ -206,7 +207,16 @@ impl PeerSchedule { .collect::>(); let next = Arc::make_mut(&mut inner.peers_resolved[2]); next.clear(); - next.extend(peers.clone().into_iter().map(|a| (a, PeerState::Added))); + next.extend(peers.clone().into_iter().map(|(peer_id, is_resolved)| { + ( + peer_id, + if is_resolved { + PeerState::Resolved + } else { + PeerState::Added + }, + ) + })); next.extend(old); } @@ -277,12 +287,12 @@ impl PeerScheduleInner { } } - fn all_resolved(&self) -> FastHashSet { + fn all_resolved(&self, local_id: PeerId) -> FastHashSet { self.peers_resolved[0] .iter() .chain(self.peers_resolved[1].iter()) .chain(self.peers_resolved[2].iter()) - .filter(|(_, state)| *state == &PeerState::Resolved) + .filter(|(peer_id, state)| *state == &PeerState::Resolved && peer_id != &local_id) .map(|(peer_id, _)| *peer_id) .collect() } diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs index 597c3d85a..692e714e4 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -19,6 +19,7 @@ pub struct PeerScheduleUpdater { impl PeerScheduleUpdater { pub fn run(overlay: PrivateOverlay, peer_schedule: Arc) { + tracing::info!("started peer schedule updater"); let this = Self { overlay, peer_schedule, @@ -29,9 +30,10 @@ impl PeerScheduleUpdater { } fn respawn_resolve_task(&self) { + let local_id = self.peer_schedule.local_id(); + tracing::info!("{local_id:.4?} respawn_resolve_task"); let mut fut = futures_util::stream::FuturesUnordered::new(); { - let local_id = self.peer_schedule.local_id(); let entries = self.overlay.read_entries(); for entry in entries .iter() @@ -63,16 +65,18 @@ impl PeerScheduleUpdater { } async fn listen(self) { - let mut rx = self.overlay.read_entries().subscribe(); let local_id = self.peer_schedule.local_id(); + tracing::info!("{local_id:.4?} listen peer updates"); + let mut rx = self.overlay.read_entries().subscribe(); loop { match rx.recv().await { Ok(ref event @ PrivateOverlayEntriesEvent::Removed(node)) if node != local_id => { + tracing::info!("{local_id:.4?} got {event:?}"); if self.peer_schedule.set_resolved(&node, false) { // respawn resolve task with fewer peers to await self.respawn_resolve_task(); } else { - tracing::debug!("Skipped {event:?}"); + tracing::info!("{local_id:.4?} Skipped {event:?}"); } } Err(RecvError::Closed) => { @@ -84,7 +88,9 @@ impl PeerScheduleUpdater { Consider increasing channel capacity." ) } - Ok(_) => {} + Ok(a) => { + tracing::warn!("{local_id:.4?} peer schedule updater missed {a:?}"); + } } } } diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs index fcd873bfb..85536b7e7 100644 --- a/consensus/src/models/dag_point.rs +++ b/consensus/src/models/dag_point.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use crate::models::point::{Digest, Location, Point, PointId}; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ValidPoint { pub point: Arc, pub is_committed: Arc, @@ -18,7 +18,7 @@ impl ValidPoint { } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum DagPoint { // FIXME time skew is determined at the moment of signature response and is not reentrant /// valid without demur, needed to blame equivocation or graph connectivity violations diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs index 955c94a1d..b7e32cb69 100644 --- a/consensus/src/models/node_count.rs +++ b/consensus/src/models/node_count.rs @@ -1,36 +1,58 @@ -#[derive(Copy, Clone)] +use std::fmt::Formatter; + +#[derive(Clone)] pub struct NodeCount(usize); +impl std::fmt::Debug for NodeCount { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("NodeCount(")?; + f.write_str(self.full().to_string().as_str())?; + f.write_str(")") + } +} + impl TryFrom for NodeCount { type Error = &'static str; fn try_from(total_peers: usize) -> Result { // may occur if peer_schedule is empty - let count = if total_peers < 3 { + if total_peers < 3 { return Err("not enough nodes to run consensus"); } else { - ((total_peers + 2) / 3) * 3 + 1 // ceil up to 3F+1 - }; - if count < total_peers { - panic!("node count {total_peers} overflows after rounding up to 3F+1"); + Ok(NodeCount::new(total_peers)) } - Ok(NodeCount((count - 1) / 3)) // 1F } } impl NodeCount { pub const GENESIS: Self = Self(0); - /* - pub fn full(&self) -> usize { + + pub fn new(total_peers: usize) -> Self { + // 1 matches the genesis + assert!( + total_peers != 0 && total_peers != 2, + "invalid node count: {total_peers}" + ); + // ceil up to 3F+1; assume the least possible amount of nodes is offline + let count = ((total_peers + 1) / 3) * 3 + 1; + assert!( + total_peers <= count, + "node count {total_peers} overflows after rounding up to 3F+1" + ); + NodeCount((count - 1) / 3) // 1F + } + + fn full(&self) -> usize { self.0 * 3 + 1 } - */ + pub fn majority(&self) -> usize { self.0 * 2 + 1 } /// excluding either current node or the point's author, depending on the context pub fn majority_of_others(&self) -> usize { - // yes, genesis has the contradiction: reliable minority > majority of others + // yes, genesis has the contradiction: reliable minority > majority of others; + // but no node may exist in genesis, thus cannot exclude itself from it self.0 * 2 } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 606815f80..263304d37 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::fmt::{Debug, Display, Formatter}; use std::ops::{Add, Sub}; use bytes::Bytes; @@ -7,17 +8,66 @@ use serde::{Deserialize, Serialize}; use sha2::{Digest as Sha2Digest, Sha256}; use tycho_network::PeerId; -use tycho_util::FastHashMap; -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] +use crate::engine::MempoolConfig; + +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct Digest(pub [u8; 32]); +impl Display for Digest { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let len = f.precision().unwrap_or(32); + for byte in self.0.iter().take(len) { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} -#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] +impl Debug for Digest { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("Digest(")?; + std::fmt::Display::fmt(self, f)?; + f.write_str(")") + } +} + +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct Signature(pub Bytes); +impl Display for Signature { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let len = f.precision().unwrap_or(64); + for byte in self.0.iter().take(len) { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} +impl Debug for Signature { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("Signature(")?; + std::fmt::Display::fmt(self, f)?; + f.write_str(")") + } +} #[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Round(pub u32); +impl Round { + pub fn prev(&self) -> Round { + self.0 + .checked_sub(1) + .map(Round) + .expect("DAG round number underflow, fix dag initial configuration") + } + pub fn next(&self) -> Round { + self.0 + .checked_add(1) + .map(Round) + .expect("DAG round number overflow, inner type exhausted") + } +} + #[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct UnixTime(u64); @@ -52,21 +102,6 @@ impl Sub for UnixTime { } } -impl Round { - pub fn prev(&self) -> Round { - self.0 - .checked_sub(1) - .map(Round) - .expect("DAG round number underflow, fix dag initial configuration") - } - pub fn next(&self) -> Round { - self.0 - .checked_add(1) - .map(Round) - .expect("DAG round number overflow, inner type exhausted") - } -} - #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Location { pub round: Round, @@ -87,7 +122,8 @@ pub struct PrevPoint { pub digest: Digest, /// `>= 2F` neighbours, order does not matter; /// point author is excluded: everyone must use the proven point to validate its proof - pub evidence: FastHashMap, + // Note: bincode may be non-stable on (de)serializing hashmaps due to different local order + pub evidence: BTreeMap, // TODO if we use TL, then every node can sign hash of a point's body (not all body bytes) // so we can include that hash into PrevPoint // to check signatures inside BroadcastFilter::verify() without waiting for DAG @@ -206,10 +242,9 @@ impl Point { /// must be checked right after integrity, before any manipulations with the point pub fn is_well_formed(&self) -> bool { // any genesis is suitable, round number may be taken from configs - const LAST_GENESIS_ROUND: Round = Round(0); let author = &self.body.location.author; let is_special_ok = match self.body.location.round { - LAST_GENESIS_ROUND => { + MempoolConfig::GENESIS_ROUND => { self.body.includes.is_empty() && self.body.witness.is_empty() && self.body.payload.is_empty() @@ -217,10 +252,10 @@ impl Point { && self.body.anchor_proof == Link::ToSelf && self.body.anchor_trigger == Link::ToSelf } - round if round > LAST_GENESIS_ROUND => { + round if round > MempoolConfig::GENESIS_ROUND => { // no witness is possible at the round right after genesis; // the other way: we may panic on round.prev().prev() while extracting link's round - (round.0 > LAST_GENESIS_ROUND.0 + 1 || self.body.witness.is_empty()) + (round > MempoolConfig::GENESIS_ROUND.next() || self.body.witness.is_empty()) // leader must maintain its chain of proofs, // while others must link to previous points (checked at the end of this method); // its decided later (using dag round data) whether current point belongs to leader @@ -236,16 +271,13 @@ impl Point { && self.body.proof.as_ref().map_or(true, |p| !p.evidence.contains_key(author)) && self.is_link_well_formed(&self.body.anchor_proof) && self.is_link_well_formed(&self.body.anchor_trigger) - && match ( - self.anchor_proof_round(), - self.anchor_trigger_round(), - ) { - (x, LAST_GENESIS_ROUND) => x >= LAST_GENESIS_ROUND, - (LAST_GENESIS_ROUND, y) => y >= LAST_GENESIS_ROUND, + && match (self.anchor_proof_round(), self.anchor_trigger_round()) { + (x, MempoolConfig::GENESIS_ROUND) => x >= MempoolConfig::GENESIS_ROUND, + (MempoolConfig::GENESIS_ROUND, y) => y >= MempoolConfig::GENESIS_ROUND, // equality is impossible due to commit waves do not start every round; // anchor trigger may belong to a later round than proof and vice versa; // no indirect links over genesis tombstone - (x, y) => x != y && x > LAST_GENESIS_ROUND && y > LAST_GENESIS_ROUND, + (x, y) => x != y && x > MempoolConfig::GENESIS_ROUND && y > MempoolConfig::GENESIS_ROUND, } } @@ -259,14 +291,14 @@ impl Point { to, } => { self.body.includes.contains_key(peer) - && to.location.round.0 + 1 < self.body.location.round.0 + && to.location.round.next() < self.body.location.round } Link::Indirect { path: Through::Witness(peer), to, } => { self.body.witness.contains_key(peer) - && to.location.round.0 + 2 < self.body.location.round.0 + && to.location.round.next().next() < self.body.location.round } } } diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 4cbcc154b..a2a1d1c46 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -3,23 +3,21 @@ use std::sync::Arc; use std::time::Duration; use everscale_crypto::ed25519::{KeyPair, PublicKey, SecretKey}; -use tokio::sync::mpsc; use tycho_network::{DhtClient, DhtConfig, DhtService, Network, OverlayService, PeerId, Router}; -use crate::intercom::{BroadcastFilter, Dispatcher, PeerSchedule, PeerScheduleUpdater, Responder}; -use crate::models::{Link, Location, Point, PointBody, Round, UnixTime}; +use crate::engine::MempoolConfig; +use crate::models::{Link, Location, Point, PointBody, UnixTime}; const GENESIS_SECRET_KEY_BYTES: [u8; 32] = [0xAE; 32]; -const GENESIS_MILLIS: u64 = 0; -const GENESIS_ROUND: u32 = 0; +const GENESIS_MILLIS: u64 = 1713225727398; pub fn genesis() -> Point { let genesis_keys = KeyPair::from(&SecretKey::from_bytes(GENESIS_SECRET_KEY_BYTES)); PointBody { location: Location { - round: Round(GENESIS_ROUND), + round: MempoolConfig::GENESIS_ROUND, author: genesis_keys.public_key.into(), }, time: UnixTime::from_millis(GENESIS_MILLIS), @@ -33,49 +31,6 @@ pub fn genesis() -> Point { .wrap(&genesis_keys) } -pub async fn bootstrap( - secret_key: &SecretKey, - dht_client: &DhtClient, - overlay_service: &OverlayService, - peers: &Vec, -) { - let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(KeyPair::from(secret_key)))); - - let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); - - let broadcast_filter = BroadcastFilter::new(peer_schedule.clone(), bcast_tx); - - let (sig_requests, sig_responses) = mpsc::unbounded_channel(); - - let dispatcher = Dispatcher::new( - &dht_client, - &overlay_service, - peers, - Responder::new(broadcast_filter.clone(), sig_requests), - ); - - let genesis = Arc::new(crate::test_utils::genesis()); - // finished epoch - peer_schedule.set_next_peers(&vec![genesis.body.location.author]); - peer_schedule.set_next_start(genesis.body.location.round); - peer_schedule.rotate(); - // current epoch - peer_schedule.set_next_start(genesis.body.location.round.next()); - peer_schedule.set_next_peers(peers); - peer_schedule.rotate(); - // start updater only after peers are populated into schedule - PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); - - // tOdO define if the last round is finished based on peer schedule - // move out from bcaster & signer ? where to get our last point from ? - - // tOdO в конце каждого раунда берем точку с триггером - // и комиттим - // * either own point contains Trigger - // * or search through last round to find the latest trigger - // * * can U do so without scan of a round ??? -} - // TODO receive configured services from general node, // move current setup to tests as it provides acceptable timing // This dependencies should be passed from validator module to init mempool @@ -88,9 +43,9 @@ fn from_validator( let (dht_tasks, dht_service) = DhtService::builder(local_id) .with_config(DhtConfig { local_info_announce_period: Duration::from_secs(1), - max_local_info_announce_period_jitter: Duration::from_secs(1), + local_info_announce_period_max_jitter: Duration::from_secs(1), routing_table_refresh_period: Duration::from_secs(1), - max_routing_table_refresh_period_jitter: Duration::from_secs(1), + routing_table_refresh_period_max_jitter: Duration::from_secs(1), ..Default::default() }) .build(); @@ -113,22 +68,22 @@ fn from_validator( dht_tasks.spawn(&network); overlay_tasks.spawn(&network); - (dht_service.make_client(network.clone()), overlay_service) + (dht_service.make_client(&network), overlay_service) } #[cfg(test)] mod tests { use std::net::Ipv4Addr; + use std::thread; + use std::time::Duration; - use futures_util::stream::FuturesUnordered; + use parking_lot::deadlock; use tokio::task::JoinSet; use tycho_network::{Address, PeerInfo}; use tycho_util::time::now_sec; use crate::engine::Engine; - use crate::engine::EngineTestExt; - use crate::intercom::DispatcherTestExt; use super::*; @@ -163,46 +118,75 @@ mod tests { .map(|secret| from_validator((Ipv4Addr::LOCALHOST, 0), secret)) .collect::>(); - let mut engines = vec![]; - for (secret_key, (dht_client, overlay_service)) in keys.iter().zip(from_validators.iter()) { - let engine = Engine::new(secret_key, &dht_client, &overlay_service, &all_peers).await; - engines.push(engine); - } - - let peer_info = std::iter::zip(&keys, &engines) - .map(|(key, engine)| { + let peer_info = std::iter::zip(&keys, &from_validators) + .map(|(key, (dht_client, _))| { Arc::new(make_peer_info( key, - engine.dispatcher().network().local_addr().into(), + dht_client.network().local_addr().into(), )) }) .collect::>(); - if let Some((dht_client, _)) = from_validators.first() { + for (dht_client, _) in from_validators.iter() { for info in &peer_info { if info.id == dht_client.network().peer_id() { continue; } - dht_client.add_peer(info.clone()).unwrap(); + assert!(dht_client.add_peer(info.clone()).unwrap(), "peer added"); } } + let mut engines = vec![]; + for (secret_key, (dht_client, overlay_service)) in keys.iter().zip(from_validators.iter()) { + let engine = Engine::new(secret_key, &dht_client, &overlay_service, &all_peers).await; + tracing::info!("created engine {}", dht_client.network().peer_id()); + engines.push(engine); + } + engines } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn engine_works() -> Result<(), ()> { - tracing_subscriber::fmt::try_init().ok(); - tracing::info!("engine_works"); + // tracing_subscriber::fmt::try_init().ok(); + // tracing::info!("engine_works"); + tycho_util::test::init_logger("engine_works"); + check_parking_lot(); + heart_beat(); let mut js = JoinSet::new(); - let engines = make_network(3) - .await - .into_iter() - .map(|engine| js.spawn(engine.run())) - .collect::>(); + for engine in make_network(3).await { + js.spawn(engine.run()); + } while let Some(res) = js.join_next().await { res.unwrap(); } Ok(()) } + + pub fn check_parking_lot() { + thread::spawn(move || loop { + thread::sleep(Duration::from_secs(10)); + let deadlocks = deadlock::check_deadlock(); + if deadlocks.is_empty() { + continue; + } + + tracing::error!("{} deadlocks detected", deadlocks.len()); + for (i, threads) in deadlocks.iter().enumerate() { + tracing::error!("Deadlock #{}", i); + for t in threads { + tracing::error!("Thread Id {:#?}", t.thread_id()); + tracing::error!("{:#?}", t.backtrace()); + } + } + }); + } + + pub fn heart_beat() { + // Create a background thread which checks for deadlocks every 10s + thread::spawn(move || loop { + thread::sleep(Duration::from_secs(1)); + tracing::info!("heart beat"); + }); + } } diff --git a/network/src/types/peer_id.rs b/network/src/types/peer_id.rs index 17147fc0f..f79c2280b 100644 --- a/network/src/types/peer_id.rs +++ b/network/src/types/peer_id.rs @@ -54,7 +54,9 @@ impl std::fmt::Display for PeerId { impl std::fmt::Debug for PeerId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "PeerId({self})") + f.write_str("PeerId(")?; + std::fmt::Display::fmt(self, f)?; + f.write_str(")") } } From bbca982a13fc826590ad011cc91912b7017be022 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 22 Apr 2024 07:52:19 +0300 Subject: [PATCH 14/32] feat(consensus): commit --- Cargo.lock | 1 + consensus/Cargo.toml | 1 + consensus/src/dag/anchor_stage.rs | 24 +- consensus/src/dag/dag.rs | 271 ++++++++++++------ consensus/src/dag/dag_round.rs | 19 +- consensus/src/dag/producer.rs | 8 +- consensus/src/dag/verifier.rs | 17 +- consensus/src/engine/engine.rs | 50 ++-- consensus/src/engine/mempool_config.rs | 2 +- consensus/src/intercom/adapter/broadcaster.rs | 4 +- consensus/src/intercom/core/dispatcher.rs | 106 ------- consensus/src/intercom/core/responder.rs | 5 +- consensus/src/models/dag_point.rs | 8 + 13 files changed, 282 insertions(+), 234 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fda94ca4..6402bf9fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2243,6 +2243,7 @@ dependencies = [ "dashmap", "everscale-crypto", "futures-util", + "itertools", "parking_lot", "rand", "rand_pcg", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index 11b77c57e..b2f5842b7 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -16,6 +16,7 @@ bytes = { workspace = true, features = ["serde"] } dashmap = { workspace = true } everscale-crypto = { workspace = true } futures-util = { workspace = true } +itertools = { workspace = true } parking_lot = { workspace = true } rand = { workspace = true } serde = { workspace = true, features = ["derive"] } diff --git a/consensus/src/dag/anchor_stage.rs b/consensus/src/dag/anchor_stage.rs index 94662876d..7a92c523d 100644 --- a/consensus/src/dag/anchor_stage.rs +++ b/consensus/src/dag/anchor_stage.rs @@ -1,3 +1,5 @@ +use std::sync::atomic::AtomicBool; + use rand::{Rng, SeedableRng}; use tycho_network::PeerId; @@ -8,8 +10,16 @@ use crate::models::Round; #[derive(Debug)] pub enum AnchorStage { Candidate(PeerId), // TODO nothing special, remove - Proof(PeerId), - Trigger(PeerId), + /// if anchor is locally committed then it must be marked as used (and vice versa) + Proof { + leader: PeerId, + is_used: AtomicBool, + }, + /// trigger is not necessary used - proof may be included by the next anchor and its own trigger + Trigger { + leader: PeerId, + is_used: AtomicBool, + }, } impl AnchorStage { @@ -35,8 +45,14 @@ impl AnchorStage { match round.0 % WAVE_SIZE { 0 => None, // both genesis and trailing (proof inclusion) round 1 => Some(AnchorStage::Candidate(leader.clone())), - 2 => Some(AnchorStage::Proof(leader.clone())), - 3 => Some(AnchorStage::Trigger(leader.clone())), + 2 => Some(AnchorStage::Proof { + leader: leader.clone(), + is_used: AtomicBool::new(false), + }), + 3 => Some(AnchorStage::Trigger { + leader: leader.clone(), + is_used: AtomicBool::new(false), + }), _ => unreachable!(), } } diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index 89018333f..9f770fc84 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -1,14 +1,20 @@ use std::collections::{BTreeMap, VecDeque}; -use std::num::NonZeroU8; use std::sync::atomic::Ordering; use std::sync::Arc; +use futures_util::stream::FuturesUnordered; +use futures_util::StreamExt; +use parking_lot::Mutex; + +use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; -use crate::models::{Point, PointId, Round, ValidPoint}; +use crate::engine::MempoolConfig; +use crate::models::{Point, Round, ValidPoint}; +#[derive(Clone)] pub struct Dag { // from the oldest to the current round; newer ones are in the future - rounds: BTreeMap, + rounds: Arc>>, } impl Dag { @@ -41,72 +47,172 @@ impl Dag { } } - pub fn get_or_insert(&mut self, dag_round: DagRound) -> DagRound { - self.rounds + pub fn get_or_insert(&self, dag_round: DagRound) -> DagRound { + let mut rounds = self.rounds.lock(); + rounds .entry(dag_round.round().clone()) .or_insert(dag_round) .clone() } - // TODO the next "little anchor candidate that could" must have at least full dag depth - pub fn drop_tail(&mut self, anchor_at: Round, dag_depth: NonZeroU8) { - if let Some(tail) = anchor_at.0.checked_sub(dag_depth.get() as u32) { - self.rounds = self.rounds.split_off(&Round(tail)); + // fixme must not be async + pub async fn commit( + self, + next_dag_round: DagRound, + ) -> VecDeque<(Arc, VecDeque>)> { + let Some(latest_trigger) = Self::latest_trigger(&next_dag_round).await else { + return VecDeque::new(); }; + let mut anchor_stack = Self::anchor_stack(&latest_trigger, next_dag_round.clone()).await; + let mut ordered = VecDeque::new(); + while let Some((anchor, anchor_round)) = anchor_stack.pop() { + self.drop_tail(anchor.point.body.location.round); + let committed = Self::gather_uncommitted(&anchor.point, &anchor_round).await; + ordered.push_back((anchor.point, committed)); + } + ordered } - async fn point_by_id(&self, point_id: &PointId) -> Option { - let dag_round = self.rounds.get(&point_id.location.round)?; - dag_round.valid_point(&point_id).await + async fn latest_trigger(next_round: &DagRound) -> Option { + let mut next_dag_round = next_round.clone(); + let mut latest_trigger = None; + while let Some(current_dag_round) = next_dag_round.prev().get() { + if let Some(AnchorStage::Trigger { + ref is_used, + ref leader, + }) = current_dag_round.anchor_stage() + { + if is_used.load(Ordering::Relaxed) { + break; + }; + let mut futs = FuturesUnordered::new(); + current_dag_round.view(leader, |loc| { + for (_, version) in loc.versions() { + futs.push(version.clone()) + } + }); + // FIXME traversing the DAG must not be async: we need the way to determine completed tasks + // its sufficient to use only ready futures at this point, must ignore downloading tasks + while let Some((found, _)) = futs.next().await { + if let Some(valid) = found.into_valid() { + _ = latest_trigger.insert(valid); + is_used.store(true, Ordering::Relaxed); + break; + } + } + }; + next_dag_round = current_dag_round; + } + latest_trigger } - async fn vertex_by_proof(&self, proof: &ValidPoint) -> Option { - let dag_round = self.rounds.get(&proof.point.body.location.round.prev())?; - match &proof.point.body.proof { - Some(proven) => { - dag_round - .valid_point_exact(&proof.point.body.location.author, &proven.digest) - .await + /// return order: newest (in depth) to oldest (on top); use with `vec.pop()` + async fn anchor_stack( + last_trigger: &ValidPoint, + mut future_round: DagRound, + ) -> Vec<(ValidPoint, DagRound)> { + assert_eq!( + last_trigger.point.prev_id(), + Some(last_trigger.point.anchor_proof_id()), + "invalid anchor proof link, trigger point must have been invalidated" + ); + let mut anchor_stack = Vec::new(); + let Some(mut proof) = future_round.vertex_by_proof(last_trigger).await else { + panic!("anchor proof round not in DAG") + }; + loop { + let Some(proof_round) = future_round.scan(&proof.point.body.location.round) else { + panic!("anchor proof round not in DAG while a point from it was received") + }; + if proof_round.round() == &MempoolConfig::GENESIS_ROUND { + break; + } + match proof_round.anchor_stage() { + Some(AnchorStage::Proof { + ref leader, + ref is_used, + }) => { + assert_eq!( + proof.point.body.location.round, + *proof_round.round(), + "anchor proof round does not match" + ); + assert_eq!( + proof.point.body.location.author, leader, + "anchor proof author does not match prescribed by round" + ); + let Some(anchor_round) = proof_round.prev().get() else { + break; + }; + if is_used.load(Ordering::Relaxed) { + break; + }; + let mut proofs = FuturesUnordered::new(); + proof_round.view(leader, |loc| { + for (_, version) in loc.versions() { + proofs.push(version.clone()) + } + }); + let mut anchor = None; + 'v: while let Some((proof, _)) = proofs.next().await { + if let Some(valid) = proof.into_valid() { + let Some(valid) = proof_round.vertex_by_proof(&valid).await else { + panic!("anchor proof is not linked to anchor, validation broken") + }; + _ = anchor.insert(valid); + is_used.store(true, Ordering::Relaxed); + break 'v; + } + } + let anchor = anchor + .expect("any anchor proof points to anchor point, validation is broken"); + anchor_stack.push((anchor.clone(), anchor_round.clone())); + + let Some(next_proof) = proof_round + .valid_point(&anchor.point.anchor_proof_id()) + .await + else { + break; + }; + proof = next_proof; + future_round = anchor_round; + } + _ => panic!("anchor proof round is not expected, validation is broken"), } - None => None, } + anchor_stack } - // @return historically ordered vertices (back to front is older to newer) - pub async fn gather_uncommitted( - &self, - anchor_trigger: &PointId, - // dag_depth: usize, - ) -> VecDeque> { - let Some(anchor_trigger) = self.point_by_id(anchor_trigger).await else { - panic!( - "Coding error: anchor trigger @ {:?} is not in DAG", - &anchor_trigger.location.round - ); - }; - // anchor must be a vertex @ r+1, proven with point @ r+2 - let Some(anchor_proof) = self.vertex_by_proof(&anchor_trigger).await else { - panic!( - "Coding error: anchor proof @ {:?} is not in DAG", - &anchor_trigger.point.body.location.round.prev() - ); - }; - _ = anchor_trigger; // no more needed for commit - let Some(anchor) = self.vertex_by_proof(&anchor_proof).await else { - panic!( - "Coding error: anchor @ {:?} is not in DAG", - &anchor_proof.point.body.location.round.prev() - ); + // TODO the next "little anchor candidate that could" must have at least full dag depth + fn drop_tail(&self, anchor_at: Round) { + if let Some(tail) = anchor_at.0.checked_sub(MempoolConfig::COMMIT_DEPTH) { + let mut rounds = self.rounds.lock(); + // TODO if sync is implemented as a second sub-graph - drop up to last linked + *rounds = rounds.split_off(&Round(tail)); }; - _ = anchor_proof; // no more needed for commit - - let mut cur_includes_round = anchor.point.body.location.round.prev(); /* r+0 */ + } + /// returns historically ordered vertices (back to front is older to newer) + /// + /// Note: at this point there is no way to check if passed point is really an anchor + async fn gather_uncommitted( + anchor /* @ r+1 */: &Point, + anchor_round /* r+1 */: &DagRound, + ) -> VecDeque> { + assert_eq!( + *anchor_round.round(), + anchor.body.location.round, + "passed anchor round does not match anchor point's round" + ); + let mut proof_round /* r+0 */ = anchor_round + .prev() + .get() + .expect("previous round for anchor point round must stay in DAG"); let mut r = [ - anchor.point.body.includes.clone(), // points @ r+0 - anchor.point.body.witness.clone(), // points @ r-1 - BTreeMap::new(), // points @ r-2 - BTreeMap::new(), // points @ r-3 + anchor.body.includes.clone(), // points @ r+0 + anchor.body.witness.clone(), // points @ r-1 + BTreeMap::new(), // points @ r-2 + BTreeMap::new(), // points @ r-3 ]; _ = anchor; // anchor payload will be committed the next time @@ -114,10 +220,9 @@ impl Dag { // TODO visited rounds count must be equal to dag depth: // read/download non-existent rounds and drop too old ones - while let Some((proof_round /* r+0 */, vertex_round /* r-1 */)) = self - .rounds - .get(&cur_includes_round) - .and_then(|cur| cur.prev().get().map(|prev| (cur, prev))) + while let Some(vertex_round /* r-1 */) = proof_round + .prev() + .get() .filter(|_| !r.iter().all(BTreeMap::is_empty)) { // take points @ r+0, and select their vertices @ r-1 for commit @@ -126,38 +231,36 @@ impl Dag { // Every point must be valid (we've validated anchor dependencies already), // but some points don't have previous one to proof as vertex. // Any valid point among equivocated will do, as they include the same vertex. - if let Some(proof /* point @ r+0 */) = + let Some(proof /* point @ r+0 */) = proof_round.valid_point_exact(node, digest).await + else { + panic!("point to commit not found in DAG") + }; + let author = &proof.point.body.location.author; + r[1].extend(proof.point.body.includes.clone()); // points @ r-1 + r[2].extend(proof.point.body.witness.clone()); // points @ r-2 + let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { + continue; + }; + let Some(vertex /* point @ r-1 */) = + vertex_round.valid_point_exact(author, &digest).await + else { + panic!("point to commit not found in DAG or wrong round") + }; + // select uncommitted ones, marking them as committed + // to exclude from the next commit + if vertex + .is_committed + .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) + .is_ok() { - if proof.is_committed.load(Ordering::Acquire) { - continue; - } - let author = &proof.point.body.location.author; - r[1].extend(proof.point.body.includes.clone()); // points @ r-1 - r[2].extend(proof.point.body.witness.clone()); // points @ r-2 - let Some(digest) = proof.point.body.proof.as_ref().map(|a| &a.digest) else { - continue; - }; - if let Some(vertex /* point @ r-1 */) = vertex_round - .valid_point_exact(author, &digest) - .await - // select uncommitted ones, marking them as committed - // to exclude from the next commit - .filter(|vertex| { - vertex - .is_committed - .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) - .is_ok() - }) - { - // vertex will be skipped in r_1 as committed - r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 - r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 - uncommitted.push_back(vertex.point); // LIFO - } + // vertex will be skipped in r_1 as committed + r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 + r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 + uncommitted.push_back(vertex.point); // LIFO } } - cur_includes_round = vertex_round.round().clone(); // next r+0 + proof_round = vertex_round; // next r+0 r.rotate_left(1); } uncommitted diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 334e7d05b..0dfec5e30 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -130,6 +130,18 @@ impl DagRound { WeakDagRound(Arc::downgrade(&self.0)) } + pub async fn vertex_by_proof(&self, proof: &ValidPoint) -> Option { + match proof.point.body.proof { + Some(ref proven) => { + let dag_round = self.scan(&proof.point.body.location.round.prev())?; + dag_round + .valid_point_exact(&proof.point.body.location.author, &proven.digest) + .await + } + None => None, + } + } + pub async fn valid_point(&self, point_id: &PointId) -> Option { match self.scan(&point_id.location.round) { Some(linked) => { @@ -221,9 +233,10 @@ impl DagRound { } pub fn scan(&self, round: &Round) -> Option { - if round > self.round() { - panic!("Coding error: cannot add future point into DAG round with scan") - } + assert!( + round <= self.round(), + "Coding error: cannot scan DAG rounds chain for a future round" + ); let mut visited = self.clone(); if round == self.round() { return Some(visited); diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index 53d63f124..f27f2aed4 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -22,8 +22,8 @@ impl Producer { let key_pair = new_round.key_pair()?; let local_id = PeerId::from(key_pair.public_key); match new_round.anchor_stage() { - Some(AnchorStage::Proof(peer_id) | AnchorStage::Trigger(peer_id)) - if peer_id == &local_id && prev_point.is_none() => + Some(AnchorStage::Proof { leader, .. } | AnchorStage::Trigger { leader, .. }) + if leader == &local_id && prev_point.is_none() => { // wave leader must skip new round if it failed to produce 3 points in a row return None; @@ -107,10 +107,10 @@ impl Producer { is_for_trigger: bool, ) -> Link { match new_round.anchor_stage() { - Some(AnchorStage::Trigger(leader_id)) if is_for_trigger && leader_id == local_id => { + Some(AnchorStage::Trigger { leader, .. }) if is_for_trigger && leader == local_id => { Link::ToSelf } - Some(AnchorStage::Proof(leader_id)) if !is_for_trigger && leader_id == local_id => { + Some(AnchorStage::Proof { leader, .. }) if !is_for_trigger && leader == local_id => { Link::ToSelf } _ => { diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 14295b810..357289220 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -81,12 +81,11 @@ impl Verifier { || point.body.location.round == MempoolConfig::GENESIS_ROUND } // leader must link to own point while others must not - Some(AnchorStage::Proof(leader_id)) => { - (leader_id == point.body.location.author) - == (point.body.anchor_proof == Link::ToSelf) + Some(AnchorStage::Proof { leader, .. }) => { + (leader == point.body.location.author) == (point.body.anchor_proof == Link::ToSelf) } - Some(AnchorStage::Trigger(leader_id)) => { - (leader_id == point.body.location.author) + Some(AnchorStage::Trigger { leader, .. }) => { + (leader == point.body.location.author) == (point.body.anchor_trigger == Link::ToSelf) } } @@ -110,10 +109,10 @@ impl Verifier { if found { match (&dag_round.anchor_stage(), is_trigger) { // AnchorStage::Candidate(_) requires nothing special - (Some(AnchorStage::Proof(leader_id)), false) - if leader_id == linked.location.author => {} - (Some(AnchorStage::Trigger(leader_id)), true) - if leader_id == linked.location.author => {} + (Some(AnchorStage::Proof { leader, .. }), false) + if leader == linked.location.author => {} + (Some(AnchorStage::Trigger { leader, .. }), true) + if leader == linked.location.author => {} _ => return false, // link not to round's leader } linked_with_round.push(( diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 0f1d773d0..d69a276ca 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; +use itertools::Itertools; use tokio::sync::{mpsc, Notify}; use tycho_network::{DhtClient, OverlayService, PeerId}; @@ -74,7 +75,7 @@ impl Engine { // * or search through last round to find the latest trigger // * * can U do so without scan of a round ??? - let mut dag = Dag::new(); + let dag = Dag::new(); let current_dag_round = dag.get_or_insert(DagRound::genesis(&genesis, &peer_schedule)); let genesis_state = current_dag_round @@ -110,6 +111,8 @@ impl Engine { // let this channel unbounded - there won't be many items, but every of them is essential let (signer_signal_tx, mut signer_signal_rx) = mpsc::unbounded_channel(); + let commit_run = tokio::spawn(self.dag.clone().commit(next_dag_round.clone())); + // TODO change round, then // apply peer schedule and config changes if some // spawn signer @@ -142,9 +145,10 @@ impl Engine { ) .run(), ); - let joined = tokio::join!(signer_run, bcaster_run); + let joined = tokio::join!(signer_run, bcaster_run, commit_run); match joined { - (Ok(signer_upd), Ok(evidence_or_reject)) => { + (Ok(signer_upd), Ok(evidence_or_reject), Ok(committed)) => { + tracing::info!("committed {:#.4?}", committed); self.prev_point = evidence_or_reject.ok().map(|evidence| PrevPoint { digest: own_point.digest.clone(), evidence: evidence.into_iter().collect(), @@ -159,16 +163,18 @@ impl Engine { self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds self.signer = signer_upd; } - (Err(se), Err(be)) => { - panic!( - "Both Signer and Broadcaster panicked. Signer: {se:?}. Broadcaster: {be:?}" - ) - } - (Err(se), _) => { - panic!("Signer panicked: {se:?}") - } - (_, Err(be)) => { - panic!("Broadcaster panicked: {be:?}") + (signer, bcaster, commit) => { + let msg = [ + (signer.err(), "signer"), + (bcaster.err(), "broadcaster"), + (commit.err(), "commit"), + ] + .into_iter() + .filter_map(|(res, name)| { + res.map(|err| format!("{name} task panicked: {err:?}")) + }) + .join("; \n"); + panic!("{}", msg) } } } else { @@ -179,10 +185,10 @@ impl Engine { None, signer_signal_tx, bcaster_ready, - )) - .await; - match signer_run { - Ok(signer_upd) => { + )); + match tokio::join!(signer_run, commit_run) { + (Ok(signer_upd), Ok(committed)) => { + tracing::info!("committed {:#.4?}", committed); self.prev_point = None; self.cur_point = Producer::new_point( &self.current_dag_round, @@ -194,7 +200,15 @@ impl Engine { self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds self.signer = signer_upd; } - Err(se) => panic!("Signer panicked: {se:?}"), + (signer, commit) => { + let msg = [(signer.err(), "signer"), (commit.err(), "commit")] + .into_iter() + .filter_map(|(res, name)| { + res.map(|err| format!("{name} task panicked: {err:?}")) + }) + .join("; \n"); + panic!("{}", msg) + } } } } diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs index a5425daca..3cd30bbdd 100644 --- a/consensus/src/engine/mempool_config.rs +++ b/consensus/src/engine/mempool_config.rs @@ -25,7 +25,7 @@ impl MempoolConfig { /// (dependencies and/or signatures), and waiting for slow nodes pub const RETRY_INTERVAL: Duration = Duration::from_millis(1000); - pub const DAG_DEPTH: usize = 20; + pub const COMMIT_DEPTH: u32 = 20; pub const GENESIS_ROUND: Round = Round(1); } diff --git a/consensus/src/intercom/adapter/broadcaster.rs b/consensus/src/intercom/adapter/broadcaster.rs index 0fff3f857..98f8e5110 100644 --- a/consensus/src/intercom/adapter/broadcaster.rs +++ b/consensus/src/intercom/adapter/broadcaster.rs @@ -250,13 +250,13 @@ impl Broadcaster { Ok(response) => { if response == SignatureResponse::Rejected { tracing::warn!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:.4?}", self.local_id, self.current_round ); } else { tracing::info!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:.4?}", self.local_id, self.current_round ); diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index 42dbb3cb8..dea6e7092 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -81,109 +81,3 @@ impl Dispatcher { .boxed() } } - -/* FIXME -#[cfg(test)] -mod tests { - use tycho_network::{Address, PeerInfo}; - use tycho_util::time::now_sec; - - use crate::engine::node_count::NodeCount; - use crate::engine::peer_schedule::PeerSchedule; - use crate::models::point::Digest; - - use super::*; - - fn make_peer_info(key: &ed25519::SecretKey, address: Address) -> PeerInfo { - let keypair = ed25519::KeyPair::from(key); - let peer_id = PeerId::from(keypair.public_key); - - let now = now_sec(); - let mut peer_info = PeerInfo { - id: peer_id, - address_list: vec![address].into_boxed_slice(), - created_at: now, - expires_at: u32::MAX, - signature: Box::new([0; 64]), - }; - *peer_info.signature = keypair.sign(&peer_info); - peer_info - } - - async fn make_network(node_count: usize) -> Vec { - let keys = (0..node_count) - .map(|_| ed25519::SecretKey::generate(&mut rand::thread_rng())) - .collect::>(); - - let all_peers = keys - .iter() - .map(|s| PeerId::from(ed25519::KeyPair::from(s).public_key)) - .collect::>(); - - let nodes = keys - .iter() - .map(|s| Dispatcher::new((Ipv4Addr::LOCALHOST, 0), s, &all_peers)) - .collect::>(); - - let bootstrap_info = std::iter::zip(&keys, &nodes) - .map(|(key, peer)| Arc::new(make_peer_info(key, peer.network.local_addr().into()))) - .collect::>(); - - let schedules = std::iter::zip(&all_peers, &nodes) - .map(|(peer_id, peer)| PeerSchedule::new(Round(0), &all_peers, &peer.overlay, peer_id)) - .collect::>(); - - if let Some(node) = nodes.first() { - for info in &bootstrap_info { - if info.id == node.network.peer_id() { - continue; - } - node.dht_client.add_peer(info.clone()).unwrap(); - } - } - - // let all_peers = FastHashSet::from_iter(all_peers.into_iter()); - for sch in &schedules { - sch.wait_for_peers(Round(1), NodeCount::new(node_count)) - .await; - tracing::info!("found peers for {}", sch.local_id); - } - - nodes - } - - #[tokio::test] - async fn dispatcher_works() -> Result<()> { - tracing_subscriber::fmt::try_init().ok(); - tracing::info!("dispatcher_works"); - - let peers = make_network(3).await; - - let point_id = PointId { - location: crate::models::point::Location { - round: Round(0), - author: PeerId([0u8; 32]), - }, - digest: Digest([0u8; 32]), - }; - - // FIXME must connect only to resolved peers - for i in 0..peers.len() { - for j in 0..peers.len() { - if i == j { - continue; - } - - let left = &peers[i]; - let right = &peers[j]; - - let point_opt = left - .point_by_id(right.network.peer_id(), point_id.clone()) - .await?; - assert!(point_opt.is_none()); - } - } - Ok(()) - } -} -*/ diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index d0a3fd5b2..e02ffaa8b 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -85,7 +85,7 @@ impl ResponderInner { } }; - let res = Some(Response { + Some(Response { version: Version::default(), body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { Ok(data) => data, @@ -95,7 +95,6 @@ impl ResponderInner { .expect("must not fail") } }), - }); - res + }) } } diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs index 85536b7e7..363262a23 100644 --- a/consensus/src/models/dag_point.rs +++ b/consensus/src/models/dag_point.rs @@ -34,6 +34,14 @@ pub enum DagPoint { } impl DagPoint { + pub fn into_valid(self) -> Option { + match self { + DagPoint::Trusted(valid) => Some(valid), + DagPoint::Suspicious(valid) => Some(valid), + _ => None, + } + } + pub fn valid(&self) -> Option<&'_ ValidPoint> { match self { DagPoint::Trusted(valid) => Some(valid), From bf6e9e83b75cf7689b9ae9bdfc2c99ce743d249c Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 26 Apr 2024 04:00:40 +0300 Subject: [PATCH 15/32] feat(consensus): downloader --- consensus/Cargo.toml | 2 +- consensus/src/dag/dag.rs | 2 +- consensus/src/dag/dag_location.rs | 4 +- consensus/src/dag/dag_round.rs | 22 +- consensus/src/dag/producer.rs | 2 +- consensus/src/dag/verifier.rs | 48 +++- consensus/src/engine/engine.rs | 219 +++++++-------- consensus/src/engine/mempool_config.rs | 15 +- consensus/src/intercom/adapter/downloader.rs | 18 -- consensus/src/intercom/adapter/dto.rs | 26 -- .../broadcast_filter.rs | 101 ++++--- .../{adapter => broadcast}/broadcaster.rs | 130 ++++----- .../signer.rs => broadcast/collector.rs} | 154 ++++++----- consensus/src/intercom/broadcast/dto.rs | 20 ++ .../intercom/{adapter => broadcast}/mod.rs | 8 +- consensus/src/intercom/core/dispatcher.rs | 7 +- consensus/src/intercom/core/mod.rs | 2 +- consensus/src/intercom/core/responder.rs | 41 ++- .../src/intercom/dependency/downloader.rs | 250 ++++++++++++++++++ consensus/src/intercom/dependency/mod.rs | 8 + consensus/src/intercom/dependency/uploader.rs | 57 ++++ consensus/src/intercom/dto.rs | 9 +- consensus/src/intercom/mod.rs | 8 +- consensus/src/intercom/peer_schedule/mod.rs | 2 +- .../intercom/peer_schedule/peer_schedule.rs | 52 ++-- consensus/src/models/node_count.rs | 34 ++- consensus/src/models/point.rs | 3 +- consensus/src/test_utils.rs | 2 +- 28 files changed, 816 insertions(+), 430 deletions(-) delete mode 100644 consensus/src/intercom/adapter/downloader.rs delete mode 100644 consensus/src/intercom/adapter/dto.rs rename consensus/src/intercom/{adapter => broadcast}/broadcast_filter.rs (80%) rename consensus/src/intercom/{adapter => broadcast}/broadcaster.rs (74%) rename consensus/src/intercom/{adapter/signer.rs => broadcast/collector.rs} (70%) create mode 100644 consensus/src/intercom/broadcast/dto.rs rename consensus/src/intercom/{adapter => broadcast}/mod.rs (56%) create mode 100644 consensus/src/intercom/dependency/downloader.rs create mode 100644 consensus/src/intercom/dependency/mod.rs create mode 100644 consensus/src/intercom/dependency/uploader.rs diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index b2f5842b7..dbfcea814 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -18,7 +18,7 @@ everscale-crypto = { workspace = true } futures-util = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } -rand = { workspace = true } +rand = { workspace = true, features = ["small_rng"] } serde = { workspace = true, features = ["derive"] } sha2 = { workspace = true } tokio = { workspace = true, default-features = false } diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index 9f770fc84..ef10f5571 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -185,7 +185,7 @@ impl Dag { // TODO the next "little anchor candidate that could" must have at least full dag depth fn drop_tail(&self, anchor_at: Round) { - if let Some(tail) = anchor_at.0.checked_sub(MempoolConfig::COMMIT_DEPTH) { + if let Some(tail) = anchor_at.0.checked_sub(MempoolConfig::COMMIT_DEPTH as u32) { let mut rounds = self.rounds.lock(); // TODO if sync is implemented as a second sub-graph - drop up to last linked *rounds = rounds.split_off(&Round(tail)); diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index 9c11cfa95..f25fa8c7d 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -81,7 +81,7 @@ impl DagLocation { // TODO either leave output as is and reduce locking in 'inclusion state' // (as single thread consumes them and makes signature), // or better add global Watch CurrentDagRound (unify with broadcast filter!) - // and sign inside this future (remove futures unordered in signer) + // and sign inside this future (remove futures unordered in collector) init().inspect(move |dag_point| state.init(dag_point)) }))); Some(shared) @@ -96,7 +96,7 @@ impl DagLocation { } } -// Todo remove inner locks and introduce global current dag round watch simultaneously, see Signer +// Todo remove inner locks and introduce global current dag round watch simultaneously, see Collector #[derive(Default, Clone)] pub struct InclusionState(Arc>); diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 0dfec5e30..cbe7260bd 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -11,7 +11,7 @@ use tycho_util::FastDashMap; use crate::dag::anchor_stage::AnchorStage; use crate::dag::{DagLocation, InclusionState, Verifier}; use crate::engine::MempoolConfig; -use crate::intercom::PeerSchedule; +use crate::intercom::{Downloader, PeerSchedule}; use crate::models::{DagPoint, Digest, NodeCount, Point, PointId, Round, ValidPoint}; #[derive(Clone)] @@ -158,12 +158,20 @@ impl DagRound { point_fut.await.0.valid().cloned() } - pub fn add(&self, point: &Arc) -> Option> { + pub fn add( + &self, + point: &Arc, + downloader: &Downloader, + ) -> Option> { self.scan(&point.body.location.round) - .and_then(|linked| linked.add_exact(&point)) + .and_then(|linked| linked.add_exact(&point, downloader)) } - fn add_exact(&self, point: &Arc) -> Option> { + fn add_exact( + &self, + point: &Arc, + downloader: &Downloader, + ) -> Option> { if &point.body.location.round != self.round() { panic!("Coding error: dag round mismatches point round on add") } @@ -172,7 +180,8 @@ impl DagRound { self.edit(&point.body.location.author, |loc| { let state = loc.state().clone(); let point = point.clone(); - loc.add_validate(digest, || Verifier::validate(point, dag_round)) + let downloader = downloader.clone(); + loc.add_validate(digest, || Verifier::validate(point, dag_round, downloader)) .map(|first| first.clone().map(|_| state).boxed()) }) } @@ -182,11 +191,12 @@ impl DagRound { &self, point: &Arc, peer_schedule: &PeerSchedule, + downloader: &Downloader, ) -> Option { if !Verifier::verify(point, peer_schedule).is_ok() { panic!("Coding error: malformed point") } - let point = Verifier::validate(point.clone(), self.clone()).await; + let point = Verifier::validate(point.clone(), self.clone(), downloader.clone()).await; if point.valid().is_none() { panic!("Coding error: not a valid point") } diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index f27f2aed4..0d0f7c2ef 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -81,7 +81,7 @@ impl Producer { .collect::>(); assert!( includes.iter().count() >= finished_round.node_count().majority(), - "Coding error: producing point with not enough includes, check Signer logic" + "Coding error: producing point with not enough includes, check Collector logic" ); includes } diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 357289220..5b75b8676 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -9,7 +9,7 @@ use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; use crate::engine::MempoolConfig; use crate::intercom::{Downloader, PeerSchedule}; -use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, ValidPoint}; +use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, PointId, ValidPoint}; /* Note on equivocation. @@ -47,7 +47,11 @@ impl Verifier { } /// must be called iff [Self::verify] succeeded - pub async fn validate(point /* @ r+0 */: Arc, r_0 /* r+0 */: DagRound) -> DagPoint { + pub async fn validate( + point /* @ r+0 */: Arc, + r_0 /* r+0 */: DagRound, + downloader: Downloader, + ) -> DagPoint { // TODO upgrade Weak whenever used to let Dag Round drop if some future hangs up for long if &point.body.location.round != r_0.round() { panic!("Coding error: dag round mismatches point round") @@ -57,12 +61,12 @@ impl Verifier { if !({ Self::is_self_links_ok(&point, &r_0) // the last task spawns if ok - in order not to walk through every dag round twice - && Self::add_anchor_links_if_ok(&point, &r_0, &mut dependencies) + && Self::add_anchor_links_if_ok(&point, &r_0, &downloader, &mut dependencies) }) { return DagPoint::Invalid(point.clone()); } if let Some(r_1) = r_0.prev().get() { - Self::gather_deps(&point, &r_1, &mut dependencies); + Self::gather_deps(&point, &r_1, &downloader, &mut dependencies); return Self::check_deps(&point, dependencies).await; } // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. @@ -95,6 +99,7 @@ impl Verifier { fn add_anchor_links_if_ok( point: &Point, // @ r+0 dag_round: &DagRound, // start with r+0 + downloader: &Downloader, dependencies: &mut JoinSet, ) -> bool { let mut links = vec![ @@ -138,40 +143,63 @@ impl Verifier { if dag_round.round() < &point.body.location.round { // will add the same point from direct dependencies twice, // we can do better but nothing terrible - Self::add_dependency(&author, &digest, &dag_round, dependencies); + Self::add_dependency( + &author, + &digest, + &dag_round, + &point.body.location.author, + downloader, + dependencies, + ); } } true } fn add_dependency( - node: &PeerId, + author: &PeerId, digest: &Digest, round: &DagRound, + dependant: &PeerId, + downloader: &Downloader, dependencies: &mut JoinSet, ) { - let shared = round.edit(node, |loc| loc.add_dependency(digest, || Downloader {})); + let downloader = downloader.clone(); + let shared = round.edit(author, |loc| { + loc.add_dependency(digest, move || { + let point_id = PointId { + location: Location { + author: author.clone(), + round: round.round().clone(), + }, + digest: digest.clone(), + }; + downloader.run(point_id, round.clone(), dependant.clone()) + }) + }); dependencies.spawn(shared.map(|(dag_point, _)| dag_point)); } fn gather_deps( point /* @ r+0 */: &Point, r_1 /* r-1 */: &DagRound, + downloader: &Downloader, dependencies: &mut JoinSet, ) { - r_1.view(&point.body.location.author, |loc| { + let author = &point.body.location.author; + r_1.view(author, |loc| { for (_, shared) in loc.versions() { dependencies.spawn(shared.clone().map(|(dag_point, _)| dag_point)); } }); for (node, digest) in &point.body.includes { // integrity check passed, so includes contain author's prev point proof - Self::add_dependency(&node, &digest, &r_1, dependencies); + Self::add_dependency(&node, &digest, &r_1, author, downloader, dependencies); } if let Some(r_2) = r_1.prev().get() { for (node, digest) in &point.body.witness { - Self::add_dependency(&node, &digest, &r_2, dependencies); + Self::add_dependency(&node, &digest, &r_2, author, downloader, dependencies); } }; } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index d69a276ca..75c484867 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -2,13 +2,15 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; use itertools::Itertools; -use tokio::sync::{mpsc, Notify}; +use tokio::sync::{mpsc, watch}; +use tokio::task::JoinSet; use tycho_network::{DhtClient, OverlayService, PeerId}; -use crate::dag::{Dag, DagRound, Producer}; +use crate::dag::{Dag, DagRound, Producer, WeakDagRound}; use crate::intercom::{ - BroadcastFilter, Broadcaster, Dispatcher, PeerSchedule, PeerScheduleUpdater, Responder, Signer, + BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, Dispatcher, Downloader, + PeerSchedule, PeerScheduleUpdater, Responder, Uploader, }; use crate::models::{Point, PrevPoint}; @@ -17,10 +19,13 @@ pub struct Engine { local_id: Arc, peer_schedule: Arc, dispatcher: Dispatcher, - signer: Signer, - prev_point: Option, + downloader: Downloader, + collector: Collector, + broadcast_filter: BroadcastFilter, cur_point: Option>, current_dag_round: DagRound, + top_dag_round_watch: watch::Sender, + tasks: JoinSet<()>, // should be JoinSet https://github.com/rust-lang/rust/issues/35121 } impl Engine { @@ -36,15 +41,23 @@ impl Engine { let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); - let broadcast_filter = BroadcastFilter::new(peer_schedule.clone(), bcast_tx); + let broadcast_filter = + BroadcastFilter::new(local_id.clone(), peer_schedule.clone(), bcast_tx); let (sig_requests, sig_responses) = mpsc::unbounded_channel(); + let (uploader_tx, uploader_rx) = mpsc::unbounded_channel(); + let dispatcher = Dispatcher::new( &dht_client, &overlay_service, peers, - Responder::new(broadcast_filter.clone(), sig_requests), + Responder::new( + local_id.clone(), + broadcast_filter.clone(), + sig_requests, + uploader_tx, + ), ); let genesis = Arc::new(crate::test_utils::genesis()); @@ -67,7 +80,7 @@ impl Engine { PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); // tOdO define if the last round is finished based on peer schedule - // move out from bcaster & signer ? where to get our last point from ? + // move out from bcaster & collector ? where to get our last point from ? // tOdO в конце каждого раунда берем точку с триггером // и комиттим @@ -78,11 +91,22 @@ impl Engine { let dag = Dag::new(); let current_dag_round = dag.get_or_insert(DagRound::genesis(&genesis, &peer_schedule)); + let (top_dag_round_watch, top_dag_round_rx) = watch::channel(current_dag_round.as_weak()); + + let mut tasks = JoinSet::new(); + let uploader = Uploader::new(uploader_rx, top_dag_round_rx); + tasks.spawn(async move { + uploader.run().await; + }); + + let downloader = Downloader::new(local_id.clone(), &dispatcher, &peer_schedule); + let genesis_state = current_dag_round - .insert_exact_validate(&genesis, &peer_schedule) + .insert_exact_validate(&genesis, &peer_schedule, &downloader) .await; - let signer = Signer::new( + let collector = Collector::new( local_id.clone(), + &downloader, bcast_rx, sig_responses, genesis_state.into_iter(), @@ -94,45 +118,53 @@ impl Engine { local_id, peer_schedule, dispatcher, - signer, - prev_point: None, + downloader, + collector, + broadcast_filter, cur_point: None, current_dag_round, + top_dag_round_watch, + tasks, } } - pub async fn run(mut self) { + pub async fn run(mut self) -> ! { loop { let next_dag_round = self .dag .get_or_insert(self.current_dag_round.next(self.peer_schedule.as_ref())); + self.top_dag_round_watch.send(next_dag_round.as_weak()).ok(); - let bcaster_ready = Arc::new(Notify::new()); + let (bcaster_ready_tx, bcaster_ready_rx) = mpsc::channel(1); // let this channel unbounded - there won't be many items, but every of them is essential - let (signer_signal_tx, mut signer_signal_rx) = mpsc::unbounded_channel(); + let (collector_signal_tx, mut collector_signal_rx) = mpsc::unbounded_channel(); let commit_run = tokio::spawn(self.dag.clone().commit(next_dag_round.clone())); - + let bcast_filter_upd = { + let bcast_filter = self.broadcast_filter.clone(); + let round = next_dag_round.round().clone(); + tokio::spawn(async move { bcast_filter.advance_round(&round) }) + }; // TODO change round, then // apply peer schedule and config changes if some - // spawn signer + // spawn collector // spawn producer + broadcaster // spawn commit + drop dag tail (async?!) into futures ordered // it shouldn't take longer than round; // the other way it should make the change of rounds slower, // in order to prevent unlimited DAG growth - // sync if signer detected a gap exceeding dag depth + // sync if collector detected a gap exceeding dag depth // join if let Some(own_point) = self.cur_point { let own_state = self .current_dag_round - .insert_exact_validate(&own_point, &self.peer_schedule) + .insert_exact_validate(&own_point, &self.peer_schedule, &self.downloader) .await; - let signer_run = tokio::spawn(self.signer.run( + let collector_run = tokio::spawn(self.collector.run( next_dag_round.clone(), Some(own_point.clone()), - signer_signal_tx, - bcaster_ready.clone(), + collector_signal_tx, + bcaster_ready_rx, )); let bcaster_run = tokio::spawn( Broadcaster::new( @@ -140,34 +172,38 @@ impl Engine { &own_point, &self.dispatcher, &self.peer_schedule, - bcaster_ready, - signer_signal_rx, + bcaster_ready_tx, + collector_signal_rx, ) .run(), ); - let joined = tokio::join!(signer_run, bcaster_run, commit_run); - match joined { - (Ok(signer_upd), Ok(evidence_or_reject), Ok(committed)) => { - tracing::info!("committed {:#.4?}", committed); - self.prev_point = evidence_or_reject.ok().map(|evidence| PrevPoint { + match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { + (Ok(collector_upd), Ok(evidence), Ok(committed), Ok(_bcast_filter_upd)) => { + tracing::info!("committed {:.4?}", committed); + let prev_point = Some(PrevPoint { digest: own_point.digest.clone(), evidence: evidence.into_iter().collect(), }); - self.cur_point = Producer::new_point( - &self.current_dag_round, - &next_dag_round, - self.prev_point.as_ref(), - vec![], - ) - .await; - self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds - self.signer = signer_upd; + if collector_upd.next_round() == next_dag_round.round() { + self.cur_point = Producer::new_point( + &self.current_dag_round, + &next_dag_round, + prev_point.as_ref(), + vec![], + ) + .await; + } else { + todo!("must fill gaps with empty rounds") + } + self.current_dag_round = next_dag_round; + self.collector = collector_upd; } - (signer, bcaster, commit) => { + (collector, bcaster, commit, bcast_filter_upd) => { let msg = [ - (signer.err(), "signer"), + (collector.err(), "collector"), (bcaster.err(), "broadcaster"), (commit.err(), "commit"), + (bcast_filter_upd.err(), "broadcast filter update"), ] .into_iter() .filter_map(|(res, name)| { @@ -178,35 +214,38 @@ impl Engine { } } } else { - signer_signal_rx.close(); - bcaster_ready.notify_one(); - let signer_run = tokio::spawn(self.signer.run( + collector_signal_rx.close(); + _ = bcaster_ready_tx.send(BroadcasterSignal::Ok).await; + let collector_run = tokio::spawn(self.collector.run( next_dag_round.clone(), None, - signer_signal_tx, - bcaster_ready, + collector_signal_tx, + bcaster_ready_rx, )); - match tokio::join!(signer_run, commit_run) { - (Ok(signer_upd), Ok(committed)) => { - tracing::info!("committed {:#.4?}", committed); - self.prev_point = None; + match tokio::join!(collector_run, commit_run, bcast_filter_upd) { + (Ok(collector_upd), Ok(committed), Ok(_bcast_filter_upd)) => { + tracing::info!("committed {:.4?}", committed); self.cur_point = Producer::new_point( &self.current_dag_round, &next_dag_round, - self.prev_point.as_ref(), + None, vec![], ) .await; self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds - self.signer = signer_upd; + self.collector = collector_upd; } - (signer, commit) => { - let msg = [(signer.err(), "signer"), (commit.err(), "commit")] - .into_iter() - .filter_map(|(res, name)| { - res.map(|err| format!("{name} task panicked: {err:?}")) - }) - .join("; \n"); + (collector, commit, bcast_filter_upd) => { + let msg = [ + (collector.err(), "collector"), + (commit.err(), "commit"), + (bcast_filter_upd.err(), "broadcast filter update"), + ] + .into_iter() + .filter_map(|(res, name)| { + res.map(|err| format!("{name} task panicked: {err:?}")) + }) + .join("; \n"); panic!("{}", msg) } } @@ -214,67 +253,3 @@ impl Engine { } } } - -// task 0: continue from where we stopped -// * load last state into DAG: some (un)finished round -// * create new round and point, if last round is finished -// -> start 1 & 2 -// -// (always) -// task 1: accept broadcasts though filter -// -// (@ r+0 iff in peer schedule for r+0) -// task 2: broadcast + ask for signatures (to/from peers scheduled @ r+1) -// (to support point receivers, even if "me" is not in schedule @ r+1) -// -// (@ r+0 iff in peer schedule for r+1) -// task 3: respond to signature requests (from peers @ [r-1; r+0]) -// (point authors must reject signatures they consider invalid) -// (new nodes at the beginning of a new validation epoch -// must sign points from the last round of a previous epoch) -// (fast nodes that reached the end of their validation epoch -// must continue to sign points of lagging nodes -// until new validator set starts producing its shard-blocks - -// they cannot finish the last round by counting signatures -// and will advance by receiving batch of points from broadcast filter) - -/* -async fn produce( - &self, - finished_round: &Arc, - prev_point: Option, - payload: Vec, - peer_schedule: &PeerSchedule, -) -> Option { - let new_round = Arc::new(finished_round.next(peer_schedule)); - self.broadcast_filter.advance_round(new_round.round()).await; - - if let Some(for_next_point) = self.peer_schedule.local_keys(&new_round.round().next()) { - // respond to signature requests (mandatory inclusions) - // _ = Signer::consume_broadcasts(filtered_rx, new_round.clone()); - // _ = Signer::on_validated(filtered_rx, new_round.clone(), Some(on_validated_tx)); - - if let Some(for_witness) = self.peer_schedule.local_keys(new_round.round()) { - // respond to signature requests to be included as witness - }; - } else { - // consume broadcasts without signing them - // _ = Signer::consume_broadcasts(filtered_rx, new_round.clone()); - }; - if let Some(for_current_point) = self.peer_schedule.local_keys(new_round.round()) { - let point = Producer::create_point( - finished_round, - &new_round, - &for_current_point, - prev_point, - payload, - ) - .await; - let bcaster = Broadcaster::new(&point, dispatcher, peer_schedule); - _ = bcaster.run().await; - // broadcast, gather signatures as a mean of delivery (even if not producing next block) - Some(point) - } else { - None - } -}*/ diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs index 3cd30bbdd..6a2c80f33 100644 --- a/consensus/src/engine/mempool_config.rs +++ b/consensus/src/engine/mempool_config.rs @@ -23,9 +23,20 @@ impl MempoolConfig { /// we try to gather as many points and signatures as we can within some time frame; /// this is a tradeoff between breaking on exactly 2F+1 elements /// (dependencies and/or signatures), and waiting for slow nodes - pub const RETRY_INTERVAL: Duration = Duration::from_millis(1000); + pub const RETRY_INTERVAL: Duration = Duration::from_millis(250); - pub const COMMIT_DEPTH: u32 = 20; + /// the least amount of [Round]s that are kept in DAG until they are discarded + pub const COMMIT_DEPTH: u8 = 20; pub const GENESIS_ROUND: Round = Round(1); + + /// should not be less than 3 (as in average 1 of 3 is unreliable and another one did not sign); + /// includes at least the author of dependant point and the author of dependency point; + /// increases exponentially on every attempt, until every node out of 2F+1 is queried once + /// or a verifiable point is found (ill-formed or incorrectly signed points are not eligible) + pub const DOWNLOAD_PEERS: u8 = 3; + + /// every failed response is accounted as point is not found; + /// 1/3+1 failed responses leads to invalidation of the point and all its dependants + pub const DOWNLOAD_TIMEOUT: Duration = Duration::from_millis(200); } diff --git a/consensus/src/intercom/adapter/downloader.rs b/consensus/src/intercom/adapter/downloader.rs deleted file mode 100644 index d0242f8d8..000000000 --- a/consensus/src/intercom/adapter/downloader.rs +++ /dev/null @@ -1,18 +0,0 @@ -use std::future::Future; -use std::pin::Pin; -use std::task::{Context, Poll}; - -use crate::models::DagPoint; - -pub struct Downloader { - // point's author is a top priority; fallback priority is (any) dependent point's author - // recursively: every dependency is expected to be signed by 2/3+1 -} - -impl Future for Downloader { - type Output = DagPoint; - - fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { - todo!() - } -} diff --git a/consensus/src/intercom/adapter/dto.rs b/consensus/src/intercom/adapter/dto.rs deleted file mode 100644 index 51ad84c42..000000000 --- a/consensus/src/intercom/adapter/dto.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::sync::Arc; - -use crate::models::{DagPoint, Point, Round}; - -#[derive(Debug)] -pub enum ConsensusEvent { - // allows not to peek but poll the channel when local dag is not ready yet - Forward(Round), - // well-formed, but not yet validated against DAG - Verified(Arc), - Invalid(DagPoint), -} - -/// * signer signals (Ok) when ready, broadcaster signals () when ready -/// * signer finishes only after broadcaster signalled () -/// * broadcaster finishes Ok only if signer signalled (Ok), signalling () to signer -/// * broadcaster must finish Ok/Err if signer signalled (Err), signalling () to signer -/// * broadcaster may finish Err, signalling () to signer -/// -/// => signer may run without broadcaster, as if broadcaster signalled () -#[derive(Debug)] -pub enum SignerSignal { - Ok, - Err, - Retry, -} diff --git a/consensus/src/intercom/adapter/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs similarity index 80% rename from consensus/src/intercom/adapter/broadcast_filter.rs rename to consensus/src/intercom/broadcast/broadcast_filter.rs index fd3d1b988..6b4f18997 100644 --- a/consensus/src/intercom/adapter/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -15,47 +15,41 @@ use crate::models::{Digest, Location, NodeCount, Point, PointId, Round}; use super::dto::ConsensusEvent; -pub struct BroadcastFilter { - // defend from spam from future rounds: - // should keep rounds greater than current dag round - last_by_peer: FastDashMap, - // very much like DAG structure, but without dependency check; - // just to determine reliably that consensus advanced without current node - by_round: FastDashMap< - Round, - ( - NodeCount, - BTreeMap>, - ), - >, - current_dag_round: AtomicU32, - peer_schedule: Arc, - output: mpsc::UnboundedSender, -} +#[derive(Clone)] +pub struct BroadcastFilter(Arc); impl BroadcastFilter { pub fn new( + local_id: Arc, peer_schedule: Arc, output: mpsc::UnboundedSender, - ) -> Arc { - let this = Self { + ) -> Self { + let this = Self(Arc::new(BroadcastFilterInner { + local_id, last_by_peer: Default::default(), by_round: Default::default(), current_dag_round: Default::default(), // will advance with other peers peer_schedule, output, - }; - let this = Arc::new(this); + })); let listener = this.clone(); tokio::spawn(listener.clean_cache()); this } - async fn clean_cache(self: Arc) { - let mut rx = self.peer_schedule.updates(); + pub fn add(&self, point: Arc) -> BroadcastResponse { + self.0.add(point) + } + + pub fn advance_round(&self, new_round: &Round) { + self.0.advance_round(new_round) + } + + async fn clean_cache(self) { + let mut rx = self.0.peer_schedule.updates(); match rx.recv().await { - Ok((peer_id, PeerState::Removed)) => { - self.last_by_peer.remove(&peer_id); + Ok((peer_id, PeerState::Unknown)) => { + self.0.last_by_peer.remove(&peer_id); } Ok(_) => {} Err(err @ RecvError::Lagged(_)) => { @@ -66,7 +60,28 @@ impl BroadcastFilter { } } } +} +struct BroadcastFilterInner { + local_id: Arc, + // defend from spam from future rounds: + // should keep rounds greater than current dag round + last_by_peer: FastDashMap, + // very much like DAG structure, but without dependency check; + // just to determine reliably that consensus advanced without current node + by_round: FastDashMap< + Round, + ( + NodeCount, + BTreeMap>, + ), + >, + current_dag_round: AtomicU32, + peer_schedule: Arc, + output: mpsc::UnboundedSender, +} + +impl BroadcastFilterInner { // TODO logic is doubtful because of contradiction in requirements: // * we must determine the latest consensus round reliably: // the current approach is to collect 1/3+1 points at the same future round @@ -75,26 +90,31 @@ impl BroadcastFilter { // => we should discard points from the far future /// returns Vec of points to insert into DAG if consensus round is determined reliably - pub async fn add(&self, point: Arc) -> BroadcastResponse { + fn add(&self, point: Arc) -> BroadcastResponse { + let local_id = &self.local_id; // dag @r+0 accepts broadcasts of [r-1; r+1] rounds; // * points older than r-1 are rejected, but are sent to DAG for validation // as they may be used by some point as a dependency // * newer broadcasts are enqueued until 1/3+1 points per round collected let dag_round = Round(self.current_dag_round.load(Ordering::Acquire)); - tracing::info!( - "filter @ {dag_round:?} got point @ {:?}", - point.body.location.round - ); // for any node @ r+0, its DAG always contains [r-DAG_DEPTH-N; r+1] rounds, where N>=0 let PointId { location: Location { round, author }, digest, } = point.id(); + + tracing::info!( + "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : received" + ); + // conceal raw point, do not use it let point = match Verifier::verify(&point, &self.peer_schedule) { Ok(()) => ConsensusEvent::Verified(point), Err(dag_point) => { - tracing::error!("filter @ {dag_round:?}: invalid, {:.4?}", point); + tracing::error!( + "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ + invalid {point:.4?}" + ); ConsensusEvent::Invalid(dag_point) } }; @@ -104,7 +124,10 @@ impl BroadcastFilter { } else if round >= dag_round.prev() { BroadcastResponse::Accepted // we will sign, maybe } else { - tracing::error!("Rejected 1"); + tracing::error!( + "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ + Rejected as too old round" + ); // too old, current node will not sign, but some point may include it BroadcastResponse::Rejected }; @@ -132,7 +155,10 @@ impl BroadcastFilter { // node must not send broadcasts out-of order; // TODO we should ban a peer that broadcasts its rounds out of order, // though we cannot prove this decision for other nodes - tracing::error!("Rejected 2"); + tracing::error!( + "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ + Rejected as out of order by round" + ); return BroadcastResponse::Rejected; }; if let Some(to_delete) = outdated_peer_round { @@ -155,18 +181,21 @@ impl BroadcastFilter { let (node_count, ref mut same_round) = entry.value_mut(); same_round.entry(author).or_default().insert(digest, point); if same_round.len() < node_count.reliable_minority() { - tracing::info!("round is not yet determined"); + tracing::info!( + "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ + round is not determined yet", + ); return BroadcastResponse::TryLater; // round is not yet determined }; } } - self.advance_round(&round).await; + self.advance_round(&round); BroadcastResponse::Accepted } // drop everything up to the new round (inclusive), channelling cached points - pub async fn advance_round(&self, new_round: &Round) { + fn advance_round(&self, new_round: &Round) { let Ok(old) = self.current_dag_round .fetch_update(Ordering::Release, Ordering::Relaxed, |old| { diff --git a/consensus/src/intercom/adapter/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs similarity index 74% rename from consensus/src/intercom/adapter/broadcaster.rs rename to consensus/src/intercom/broadcast/broadcaster.rs index 98f8e5110..7163264f1 100644 --- a/consensus/src/intercom/adapter/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -5,12 +5,12 @@ use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; use futures_util::StreamExt; use tokio::sync::broadcast::{self, error::RecvError}; -use tokio::sync::{mpsc, Notify}; +use tokio::sync::mpsc; use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; -use crate::intercom::adapter::dto::SignerSignal; +use crate::intercom::broadcast::dto::CollectorSignal; use crate::intercom::dto::{BroadcastResponse, PeerState, SignatureResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; use crate::models::{NodeCount, Point, Round, Signature}; @@ -18,15 +18,20 @@ use crate::models::{NodeCount, Point, Round, Signature}; type BcastResult = anyhow::Result; type SigResult = anyhow::Result; +#[derive(Debug)] +pub enum BroadcasterSignal { + Ok, + Err, +} + pub struct Broadcaster { local_id: Arc, current_round: Round, point_body: Vec, dispatcher: Dispatcher, - bcaster_ready: Arc, - signer_signal: mpsc::UnboundedReceiver, - is_signer_ready_ok: bool, + bcaster_signal: mpsc::Sender, + collector_signal: mpsc::UnboundedReceiver, peer_updates: broadcast::Receiver<(PeerId, PeerState)>, removed_peers: FastHashSet, @@ -36,7 +41,7 @@ pub struct Broadcaster { // results rejections: FastHashSet, signatures: FastHashMap, - // TODO move generic logic out of dispatcher + // TODO move generic logic close to dispatcher, also check DownloadTask bcast_request: tycho_network::Request, bcast_peers: FastHashSet, bcast_futs: FuturesUnordered>, @@ -51,8 +56,8 @@ impl Broadcaster { point: &Point, dispatcher: &Dispatcher, peer_schedule: &PeerSchedule, - bcaster_ready: Arc, - signer_signal: mpsc::UnboundedReceiver, + bcaster_signal: mpsc::Sender, + collector_signal: mpsc::UnboundedReceiver, ) -> Self { let point_body = bincode::serialize(&point.body).expect("own point serializes to bytes"); let peer_updates = peer_schedule.updates(); @@ -62,8 +67,12 @@ impl Broadcaster { .map(|(peer_id, _)| *peer_id) .collect::>(); let signers_count = NodeCount::new(signers.len()); - let bcast_peers = peer_schedule.all_resolved(); - tracing::info!("bcast_peers {}", bcast_peers.len()); + let collectors = peer_schedule.all_resolved(); + tracing::info!( + "{local_id} @ {:?} collectors count = {}", + point.body.location.round, + collectors.len() + ); let bcast_request = Dispatcher::broadcast_request(&point); let sig_request = Dispatcher::signature_request(&point.body.location.round); Self { @@ -71,9 +80,8 @@ impl Broadcaster { current_round: point.body.location.round, point_body, dispatcher: dispatcher.clone(), - bcaster_ready, - signer_signal, - is_signer_ready_ok: false, + bcaster_signal, + collector_signal, peer_updates, signers, @@ -83,7 +91,7 @@ impl Broadcaster { signatures: Default::default(), bcast_request, - bcast_peers, + bcast_peers: collectors, bcast_futs: FuturesUnordered::new(), sig_request, @@ -92,7 +100,7 @@ impl Broadcaster { } } /// returns evidence for broadcast point - pub async fn run(mut self) -> Result, ()> { + pub async fn run(mut self) -> FastHashMap { // how this was supposed to work: // * in short: broadcast to all and gather signatures from those who accepted the point // * both broadcast and signature tasks have their own retry loop for every peer @@ -123,9 +131,9 @@ impl Broadcaster { update = self.peer_updates.recv() => { self.match_peer_updates(update) } - Some(signer_signal) = self.signer_signal.recv() => { - if let Some(result) = self.match_signer_signal(signer_signal) { - break result.map(|_| self.signatures) + Some(collector_signal) = self.collector_signal.recv() => { + if self.should_finish(collector_signal).await { + break self.signatures } } else => { @@ -134,9 +142,9 @@ impl Broadcaster { } } } - fn match_signer_signal(&mut self, signer_signal: SignerSignal) -> Option> { + async fn should_finish(&mut self, collector_signal: CollectorSignal) -> bool { tracing::info!( - "{} @ {:?} bcaster <= signer : {signer_signal:?}; sigs {} of {}; rejects {} of {}", + "{} @ {:?} bcaster <= Collector::{collector_signal:?} : sigs {} of {}, rejects {} of {}", self.local_id, self.current_round, self.signatures.len(), @@ -144,50 +152,42 @@ impl Broadcaster { self.rejections.len(), self.signers_count.reliable_minority(), ); - match signer_signal { - SignerSignal::Ok => { - self.is_signer_ready_ok = true; - None - } - SignerSignal::Err => { - // even if we can return successful result, it will be discarded - Some(Err(())) - } - SignerSignal::Retry => self.check_if_ready(), - } - } - fn check_if_ready(&mut self) -> Option> { - if self.rejections.len() >= self.signers_count.reliable_minority() { - self.bcaster_ready.notify_one(); - if self.is_signer_ready_ok { - return Some(Err(())); - } - } else if self.signatures.len() >= self.signers_count.majority_of_others() { - self.bcaster_ready.notify_one(); - if self.is_signer_ready_ok { - return Some(Ok(())); + match collector_signal { + // though we return successful result, it will be discarded on Err + CollectorSignal::Finish | CollectorSignal::Err => true, + CollectorSignal::Retry => { + if self.rejections.len() >= self.signers_count.reliable_minority() { + _ = self.bcaster_signal.send(BroadcasterSignal::Err).await; + return true; + } + if self.signatures.len() >= self.signers_count.majority_of_others() { + _ = self.bcaster_signal.send(BroadcasterSignal::Ok).await; + } + for peer_id in mem::take(&mut self.sig_peers) { + self.request_signature(&peer_id); + } + for peer_id in mem::take(&mut self.bcast_peers) { + self.broadcast(&peer_id); + } + false } } - for peer_id in mem::take(&mut self.sig_peers) { - self.request_signature(&peer_id); - } - for peer_id in mem::take(&mut self.bcast_peers) { - self.broadcast(&peer_id); - } - None } fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { match result { - Ok(update) => { + Ok((peer_id, new_state)) => { tracing::info!( - "{} @ {:?} bcaster peer update: {update:?}", + "{} @ {:?} bcaster peer update: {peer_id:?} -> {new_state:?}", self.local_id, self.current_round ); - match update { - (_peer_id, PeerState::Added) => { /* ignore */ } - (peer_id, PeerState::Resolved) => self.broadcast(&peer_id), - (peer_id, PeerState::Removed) => _ = self.removed_peers.insert(peer_id), + match new_state { + PeerState::Resolved => { + self.removed_peers.remove(&peer_id); + self.rejections.remove(&peer_id); + self.broadcast(&peer_id); + } + PeerState::Unknown => _ = self.removed_peers.insert(peer_id), } } Err(err @ RecvError::Lagged(_)) => { @@ -205,7 +205,7 @@ impl Broadcaster { // self.bcast_peers.push(peer_id); // let it retry self.sig_peers.insert(peer_id); // lighter weight retry loop tracing::error!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} broadcast error : {error}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} broadcast error : {error}", self.local_id, self.current_round ); @@ -213,13 +213,13 @@ impl Broadcaster { Ok(response) => { if response == BroadcastResponse::Rejected { tracing::warn!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:?}", self.local_id, self.current_round ); } else { tracing::info!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:?}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:?}", self.local_id, self.current_round ); @@ -242,7 +242,7 @@ impl Broadcaster { // TODO distinguish timeouts from models incompatibility etc self.sig_peers.insert(peer_id); // let it retry tracing::error!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} signature request error : {error}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} signature request error : {error}", self.local_id, self.current_round ); @@ -250,13 +250,13 @@ impl Broadcaster { Ok(response) => { if response == SignatureResponse::Rejected { tracing::warn!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:.4?}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:.4?}", self.local_id, self.current_round ); } else { tracing::info!( - "{} @ {:?} bcaster <= signer {peer_id:.4?} : {response:.4?}", + "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:.4?}", self.local_id, self.current_round ); @@ -289,13 +289,13 @@ impl Broadcaster { self.bcast_futs .push(self.dispatcher.request(&peer_id, &self.bcast_request)); tracing::info!( - "{} @ {:?} bcaster => signer {peer_id:.4?}: broadcast", + "{} @ {:?} bcaster => collector {peer_id:.4?}: broadcast", self.local_id, self.current_round ); } else { tracing::warn!( - "{} @ {:?} bcaster => signer {peer_id:.4?}: broadcast impossible", + "{} @ {:?} bcaster => collector {peer_id:.4?}: broadcast impossible", self.local_id, self.current_round ); @@ -306,13 +306,13 @@ impl Broadcaster { self.sig_futs .push(self.dispatcher.request(&peer_id, &self.sig_request)); tracing::info!( - "{} @ {:?} bcaster => signer {peer_id:.4?}: signature request", + "{} @ {:?} bcaster => collector {peer_id:.4?}: signature request", self.local_id, self.current_round ); } else { tracing::warn!( - "{} @ {:?} bcaster => signer {peer_id:.4?}: signature request impossible", + "{} @ {:?} bcaster => collector {peer_id:.4?}: signature request impossible", self.local_id, self.current_round ); diff --git a/consensus/src/intercom/adapter/signer.rs b/consensus/src/intercom/broadcast/collector.rs similarity index 70% rename from consensus/src/intercom/adapter/signer.rs rename to consensus/src/intercom/broadcast/collector.rs index 140377f38..e59e97999 100644 --- a/consensus/src/intercom/adapter/signer.rs +++ b/consensus/src/intercom/broadcast/collector.rs @@ -4,27 +4,30 @@ use std::sync::Arc; use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; use futures_util::{FutureExt, StreamExt}; -use tokio::sync::{mpsc, oneshot, Notify}; +use tokio::sync::{mpsc, oneshot}; use tycho_network::PeerId; use crate::dag::{DagRound, InclusionState}; use crate::engine::MempoolConfig; -use crate::intercom::adapter::dto::{ConsensusEvent, SignerSignal}; +use crate::intercom::broadcast::dto::{CollectorSignal, ConsensusEvent}; use crate::intercom::dto::SignatureResponse; +use crate::intercom::{BroadcasterSignal, Downloader}; use crate::models::{Point, Round}; -pub struct Signer { +pub struct Collector { local_id: Arc, + downloader: Downloader, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, next_round: Round, next_includes: FuturesUnordered>, } -impl Signer { +impl Collector { pub fn new( local_id: Arc, + downloader: &Downloader, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, next_includes: impl Iterator, @@ -32,6 +35,7 @@ impl Signer { ) -> Self { Self { local_id, + downloader: downloader.clone(), from_bcast_filter, signature_requests, next_round, @@ -45,29 +49,33 @@ impl Signer { mut self, next_dag_round: DagRound, // r+1 has_own_point: Option>, - signer_signal: mpsc::UnboundedSender, - bcaster_ready: Arc, + collector_signal: mpsc::UnboundedSender, + bcaster_signal: mpsc::Receiver, ) -> Self { let current_dag_round = next_dag_round .prev() .get() .expect("current DAG round must be linked into DAG chain"); - let mut includes = mem::take(&mut self.next_includes); - if current_dag_round.round() != &self.next_round { - includes.clear(); - }; + let includes = mem::take(&mut self.next_includes); + assert_eq!( + current_dag_round.round(), + &self.next_round, + "collector expected to be run at {:?}", + &self.next_round + ); self.next_round = next_dag_round.round().clone(); - let task = SignerTask { + let task = CollectorTask { local_id: self.local_id.clone(), + downloader: self.downloader.clone(), current_round: current_dag_round.clone(), next_dag_round, includes, includes_ready: has_own_point.into_iter().count(), next_includes: FuturesUnordered::new(), - signer_signal, - bcaster_ready, - is_bcaster_ready: false, + collector_signal, + bcaster_signal, + is_bcaster_ready_ok: false, }; let result = task .run(&mut self.from_bcast_filter, &mut self.signature_requests) @@ -85,9 +93,10 @@ impl Signer { } type SignatureRequest = (Round, PeerId, oneshot::Sender); -struct SignerTask { +struct CollectorTask { // for node running @ r+0: local_id: Arc, + downloader: Downloader, current_round: DagRound, // = r+0 next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 @@ -100,12 +109,12 @@ struct SignerTask { /// anyway should rewrite signing mechanics - look for comments inside [DagRound::add_exact] next_includes: FuturesUnordered>, - signer_signal: mpsc::UnboundedSender, - bcaster_ready: Arc, - is_bcaster_ready: bool, + collector_signal: mpsc::UnboundedSender, + bcaster_signal: mpsc::Receiver, + is_bcaster_ready_ok: bool, } -impl SignerTask { +impl CollectorTask { /// includes @ r+0 must include own point @ r+0 iff the one is produced /// returns includes for our point at the next round @@ -118,51 +127,36 @@ impl SignerTask { loop { tokio::select! { request = signature_requests.recv() => match request { - Some((round, peer_id, callback)) => { - let response = self.signature_response(&round, &peer_id); - tracing::info!( - "{} @ {:?} signer => bcaster {peer_id:.4?} @ {round:?} : {response:.4?}", - self.local_id, self.current_round.round() - ); - _ = callback.send(response); + Some((round, author, callback)) => { + _ = callback.send(self.signature_response(&round, &author)); } None => panic!("channel with signature requests closed") }, filtered = from_bcast_filter.recv() => match filtered { Some(consensus_event) => { if let Err(round) = self.match_filtered(&consensus_event) { - _ = self.signer_signal.send(SignerSignal::Err); + _ = self.collector_signal.send(CollectorSignal::Err); return Err(round) } }, None => panic!("channel from Broadcast Filter closed"), }, - _ = self.bcaster_ready.notified() => { - self.is_bcaster_ready = true; - tracing::info!( - "{} @ {:.4?} signer <= bcaster ready : includes {} of {}", - self.local_id, self.current_round.round(), - self.includes_ready, self.current_round.node_count().majority() - ); - if self.includes_ready >= self.current_round.node_count().majority() { + Some(bcaster_signal) = self.bcaster_signal.recv() => { + if self.should_fail(bcaster_signal) { + // has to jump over one round + return Err(self.next_dag_round.round().next()) + } + // bcaster sends its signal immediately after receiving Signal::Retry, + // so we don't have to wait for one more interval + if self.is_ready() { return Ok(self.next_includes) } }, _ = retry_interval.tick() => { - tracing::info!( - "{} @ {:.4?} signer retry : includes {} of {}", - self.local_id, self.current_round.round(), - self.includes_ready, self.current_round.node_count().majority() - ); - // point @ r+1 has to include 2F+1 broadcasts @ r+0 (we are @ r+0) - if self.includes_ready >= self.current_round.node_count().majority() { - _ = self.signer_signal.send(SignerSignal::Ok); - _ = self.signer_signal.send(SignerSignal::Retry); - if self.is_bcaster_ready { - return Ok(self.next_includes) - } + if self.is_ready() { + return Ok(self.next_includes) } else { - _ = self.signer_signal.send(SignerSignal::Retry); + _ = self.collector_signal.send(CollectorSignal::Retry); } }, // FIXME not so great: some signature requests will be retried, @@ -178,7 +172,7 @@ impl SignerTask { MempoolConfig::sign_time_range(), ) } else { - state.signed().is_some() // FIXME this is very fragile duct tape + state.signed().is_some() // FIXME very fragile duct tape }; if signed { tracing::info!( @@ -196,12 +190,46 @@ impl SignerTask { } }, else => { - panic!("signer unhandled"); + panic!("collector unhandled"); } } } } + fn should_fail(&mut self, signal: BroadcasterSignal) -> bool { + tracing::info!( + "{} @ {:.4?} collector <= Bcaster::{signal:?} : includes {} of {}", + self.local_id, + self.current_round.round(), + self.includes_ready, + self.current_round.node_count().majority() + ); + match signal { + BroadcasterSignal::Ok => { + self.is_bcaster_ready_ok = true; + self.bcaster_signal.close(); + false + } + BroadcasterSignal::Err => true, + } + } + + fn is_ready(&self) -> bool { + tracing::info!( + "{} @ {:.4?} collector self-check : includes {} of {}", + self.local_id, + self.current_round.round(), + self.includes_ready, + self.current_round.node_count().majority() + ); + // point @ r+1 has to include 2F+1 broadcasts @ r+0 (we are @ r+0) + let is_self_ready = self.includes_ready >= self.current_round.node_count().majority(); + if is_self_ready && self.is_bcaster_ready_ok { + _ = self.collector_signal.send(CollectorSignal::Finish); + } + is_self_ready && self.is_bcaster_ready_ok + } + fn signature_response(&mut self, round: &Round, author: &PeerId) -> SignatureResponse { if round > self.current_round.round() { return SignatureResponse::TryLater; // hold fast nodes from moving forward @@ -237,20 +265,26 @@ impl SignerTask { } } } - let res = match state.signed() { + let response = match state.signed() { Some(Ok(signed)) => SignatureResponse::Signature(signed.with.clone()), Some(Err(())) => SignatureResponse::Rejected, None => SignatureResponse::TryLater, }; - res + tracing::info!( + "{} @ {:?} collector => bcaster {author:.4?} @ {round:?} : {response:.4?}", + self.local_id, + self.current_round.round() + ); + response } - fn match_filtered(&self, filtered: &ConsensusEvent) -> Result<(), Round> { + + fn match_filtered(&self, consensus_event: &ConsensusEvent) -> Result<(), Round> { tracing::info!( - "{} @ {:?} signer <= bcast filter : {filtered:.4?}", + "{} @ {:?} collector <= bcast filter : {consensus_event:.4?}", self.local_id, self.current_round.round() ); - match filtered { + match consensus_event { ConsensusEvent::Forward(consensus_round) => { match consensus_round.cmp(self.next_dag_round.round()) { // we're too late, consensus moved forward @@ -263,23 +297,23 @@ impl SignerTask { } ConsensusEvent::Verified(point) => match &point.body.location.round { x if x > self.next_dag_round.round() => { - panic!("Coding error: broadcast filter advanced while signer left behind") + panic!("Coding error: broadcast filter advanced while collector left behind") } x if x == self.next_dag_round.round() => { - if let Some(task) = self.next_dag_round.add(point) { + if let Some(task) = self.next_dag_round.add(point, &self.downloader) { self.next_includes.push(task) } } x if x == self.current_round.round() => { - if let Some(task) = self.current_round.add(point) { + if let Some(task) = self.current_round.add(point, &self.downloader) { self.includes.push(task) } } - _ => _ = self.current_round.add(&point), // maybe other's dependency + _ => _ = self.current_round.add(&point, &self.downloader), // maybe other's dependency }, ConsensusEvent::Invalid(dag_point) => { if &dag_point.location().round > self.next_dag_round.round() { - panic!("Coding error: broadcast filter advanced while signer left behind") + panic!("Coding error: broadcast filter advanced while collector left behind") } else { _ = self.next_dag_round.insert_invalid(&dag_point); } diff --git a/consensus/src/intercom/broadcast/dto.rs b/consensus/src/intercom/broadcast/dto.rs new file mode 100644 index 000000000..3f3341087 --- /dev/null +++ b/consensus/src/intercom/broadcast/dto.rs @@ -0,0 +1,20 @@ +use std::sync::Arc; + +use crate::models::{DagPoint, Point, Round}; + +#[derive(Debug)] +pub enum ConsensusEvent { + // allows not to peek but poll the channel when local dag is not ready yet + Forward(Round), + // well-formed, but not yet validated against DAG + Verified(Arc), + Invalid(DagPoint), +} + +/// collector may run without broadcaster, as if broadcaster signalled Ok +#[derive(Debug)] +pub enum CollectorSignal { + Finish, + Err, + Retry, +} diff --git a/consensus/src/intercom/adapter/mod.rs b/consensus/src/intercom/broadcast/mod.rs similarity index 56% rename from consensus/src/intercom/adapter/mod.rs rename to consensus/src/intercom/broadcast/mod.rs index e79132593..5b9dec40a 100644 --- a/consensus/src/intercom/adapter/mod.rs +++ b/consensus/src/intercom/broadcast/mod.rs @@ -1,13 +1,11 @@ pub use broadcast_filter::*; pub use broadcaster::*; -pub use downloader::*; -pub use signer::*; +pub use collector::*; // Note: intercom modules' responsibilities -// matches visibility of their internal DTOs +// matches visibility of their internal DTOs mod broadcast_filter; mod broadcaster; -mod downloader; +mod collector; mod dto; -mod signer; diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index dea6e7092..f31c299ae 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -6,7 +6,6 @@ use tycho_network::{DhtClient, Network, OverlayId, OverlayService, PeerId, Priva use crate::intercom::core::dto::{MPRequest, MPResponse}; use crate::intercom::core::responder::Responder; -use crate::intercom::dto::PointByIdResponse; use crate::models::{Point, PointId, Round}; #[derive(Clone)] @@ -40,10 +39,8 @@ impl Dispatcher { } } - pub async fn point_by_id(&self, peer: &PeerId, id: &PointId) -> Result { - let request = (&MPRequest::PointById(id.clone())).into(); - let response = self.overlay.query(&self.network, peer, request).await?; - PointByIdResponse::try_from(MPResponse::try_from(&response)?) + pub fn point_by_id_request(&self, id: &PointId) -> tycho_network::Request { + (&MPRequest::PointById(id.clone())).into() } pub fn broadcast_request(point: &Point) -> tycho_network::Request { diff --git a/consensus/src/intercom/core/mod.rs b/consensus/src/intercom/core/mod.rs index 6524ae78b..701ef425e 100644 --- a/consensus/src/intercom/core/mod.rs +++ b/consensus/src/intercom/core/mod.rs @@ -2,7 +2,7 @@ pub use dispatcher::*; pub use responder::*; // Note: intercom modules' responsibilities -// matches visibility of their internal DTOs +// matches visibility of their internal DTOs mod dispatcher; mod dto; diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index e02ffaa8b..14e64c342 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -9,22 +9,26 @@ use tycho_util::futures::BoxFutureOrNoop; use crate::intercom::core::dto::{MPRemoteResult, MPRequest, MPResponse}; use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; use crate::intercom::BroadcastFilter; -use crate::models::Round; +use crate::models::{PointId, Round}; pub struct Responder(Arc); impl Responder { pub fn new( - broadcast_filter: Arc, + local_id: Arc, + broadcast_filter: BroadcastFilter, signature_requests: mpsc::UnboundedSender<( Round, PeerId, oneshot::Sender, )>, + uploads: mpsc::UnboundedSender<(PointId, oneshot::Sender)>, ) -> Self { Self(Arc::new(ResponderInner { + local_id, broadcast_filter, signature_requests, + uploads, })) } } @@ -53,8 +57,10 @@ impl Service for Responder { struct ResponderInner { // state and storage components go here - broadcast_filter: Arc, + local_id: Arc, + broadcast_filter: BroadcastFilter, signature_requests: mpsc::UnboundedSender<(Round, PeerId, oneshot::Sender)>, + uploads: mpsc::UnboundedSender<(PointId, oneshot::Sender)>, } impl ResponderInner { @@ -69,18 +75,35 @@ impl ResponderInner { }; let response = match body { - MPRequest::PointById(point_id) => MPResponse::PointById(PointByIdResponse(None)), + MPRequest::PointById(point_id) => { + let (tx, rx) = oneshot::channel(); + self.uploads.send((point_id, tx)).ok(); + match rx.await { + Ok(response) => MPResponse::PointById(response), + Err(e) => panic!("Responder point by id await of request failed: {e}"), + }; + MPResponse::PointById(PointByIdResponse(None)) + } MPRequest::Broadcast(point) => { - MPResponse::Broadcast(self.broadcast_filter.add(Arc::new(point)).await) + MPResponse::Broadcast(self.broadcast_filter.add(Arc::new(point))) } MPRequest::Signature(round) => { let (tx, rx) = oneshot::channel(); - _ = self - .signature_requests - .send((round, req.metadata.peer_id.clone(), tx)); + self.signature_requests + .send((round, req.metadata.peer_id.clone(), tx)) + .ok(); match rx.await { Ok(response) => MPResponse::Signature(response), - Err(_) => MPResponse::Signature(SignatureResponse::TryLater), + Err(e) => { + let response = SignatureResponse::TryLater; + tracing::error!( + "{} responder => collector {:.4?} @ {round:?} : \ + {response:?} due to oneshot {e}", + self.local_id, + req.metadata.peer_id + ); + MPResponse::Signature(response) + } } } }; diff --git a/consensus/src/intercom/dependency/downloader.rs b/consensus/src/intercom/dependency/downloader.rs new file mode 100644 index 000000000..b8440a3ad --- /dev/null +++ b/consensus/src/intercom/dependency/downloader.rs @@ -0,0 +1,250 @@ +use std::iter; +use std::sync::Arc; + +use futures_util::future::BoxFuture; +use futures_util::stream::FuturesUnordered; +use futures_util::{FutureExt, StreamExt}; +use rand::prelude::{IteratorRandom, SmallRng}; +use rand::SeedableRng; +use tokio::sync::broadcast::error::RecvError; +use tokio::sync::{broadcast, watch}; +use tokio::time::error::Elapsed; + +use tycho_network::PeerId; +use tycho_util::FastHashMap; + +use crate::dag::{DagRound, Verifier, WeakDagRound}; +use crate::engine::MempoolConfig; +use crate::intercom::dto::{PeerState, PointByIdResponse}; +use crate::intercom::{Dispatcher, PeerSchedule}; +use crate::models::{DagPoint, NodeCount, PointId}; + +type DownloadResult = anyhow::Result; + +#[derive(Clone)] +pub struct Downloader { + local_id: Arc, + dispatcher: Dispatcher, + peer_schedule: PeerSchedule, +} + +impl Downloader { + pub fn new( + local_id: Arc, + dispatcher: &Dispatcher, + peer_schedule: &PeerSchedule, + ) -> Self { + Self { + local_id, + peer_schedule: peer_schedule.clone(), + dispatcher: dispatcher.clone(), + } + } + + pub async fn run( + self, + point_id: PointId, + point_round: DagRound, + // TODO it would be great to increase the number of dependants in-flight, + // but then the DAG needs to store some sort of updatable state machine + // instead of opaque Shared> + dependant: PeerId, + ) -> DagPoint { + assert_eq!( + point_id.location.round, + *point_round.round(), + "point and DAG round mismatch" + ); + // request point from its signers (any dependant is among them as point is already verified) + let all_peers = self.peer_schedule.peers_for(&point_round.round().next()); + let Ok(node_count) = NodeCount::try_from(all_peers.len()) else { + return DagPoint::NotExists(Arc::new(point_id)); + }; + // query author no matter if it is in the next round, but that can't affect 3F+1 + let all_peers = iter::once((point_id.location.author, PeerState::Resolved)) + // overwrite author's entry if it isn't really resolved; + .chain(all_peers.iter().map(|(peer_id, state)| (*peer_id, *state))) + .collect::>(); + if all_peers.is_empty() { + return DagPoint::NotExists(Arc::new(point_id)); + }; + let mut priorities = vec![dependant, point_id.location.author]; + priorities.dedup(); + let (has_resolved_tx, has_resolved_rx) = watch::channel(false); + DownloadTask { + weak_dag_round: point_round.as_weak(), + node_count, + request: self.dispatcher.point_by_id_request(&point_id), + point_id, + updates: self.peer_schedule.updates(), + has_resolved_tx, + has_resolved_rx, + in_flight: FuturesUnordered::new(), + all_peers, + parent: self, + attempt: 0, + } + .run(&priorities) + .await + } +} + +struct DownloadTask { + parent: Downloader, + + weak_dag_round: WeakDagRound, + node_count: NodeCount, + + request: tycho_network::Request, + point_id: PointId, + + all_peers: FastHashMap, + updates: broadcast::Receiver<(PeerId, PeerState)>, + has_resolved_tx: watch::Sender, + has_resolved_rx: watch::Receiver, + in_flight: FuturesUnordered< + BoxFuture<'static, (PeerId, Result, Elapsed>)>, + >, + attempt: u8, +} + +impl DownloadTask { + // point's author is a top priority; fallback priority is (any) dependent point's author + // recursively: every dependency is expected to be signed by 2/3+1 + pub async fn run(mut self, priorities: &Vec) -> DagPoint { + self.download_priorities(priorities); + self.download(); + loop { + tokio::select! { + Some((peer_id, resolved)) = self.in_flight.next() => + match self.match_resolved(peer_id, resolved).await { + Some(dag_point) => break dag_point, + None => continue + }, + update = self.updates.recv() => self.match_peer_updates(update), + } + } + } + + fn download_priorities(&mut self, priorities: &Vec) { + let priorities = priorities + .into_iter() + .filter(|p| { + self.all_peers + .get(p) + .map_or(false, |&s| s == PeerState::Resolved) + }) + .collect::>(); + for resolved_priority in priorities { + self.all_peers.remove_entry(resolved_priority); + self.download_one(resolved_priority); + } + } + + fn download(&mut self) { + self.attempt += 1; + let count = (MempoolConfig::DOWNLOAD_PEERS as usize) + .saturating_pow(self.attempt as u32) + .saturating_sub(self.in_flight.len()); + + for peer_id in self + .all_peers + .iter() + .filter(|(_, &p)| p == PeerState::Resolved) + .choose_multiple(&mut SmallRng::from_entropy(), count) + .into_iter() + .map(|(peer_id, _)| *peer_id) + .collect::>() + { + self.all_peers.remove_entry(&peer_id); + self.download_one(&peer_id); + } + } + + fn download_one(&mut self, peer_id: &PeerId) { + let peer_id = peer_id.clone(); + self.in_flight.push( + tokio::time::timeout( + MempoolConfig::DOWNLOAD_TIMEOUT, + self.parent + .dispatcher + .request::(&peer_id, &self.request), + ) + .map(move |result| (peer_id, result.map(|(_, r)| r))) + .boxed(), + ); + } + + async fn match_resolved( + &mut self, + peer_id: PeerId, + resolved: Result, Elapsed>, + ) -> Option { + match resolved { + Err(_timeout) => _ = self.all_peers.remove(&peer_id), + Ok(Err(_network_err)) => _ = self.all_peers.remove(&peer_id), + Ok(Ok(PointByIdResponse(None))) => _ = self.all_peers.remove(&peer_id), + Ok(Ok(PointByIdResponse(Some(point)))) => { + if point.id() != self.point_id { + _ = self.all_peers.remove(&peer_id); + } + let Some(dag_round) = self.weak_dag_round.get() else { + // no more retries, too late; + // DAG could not have moved if this point was needed for commit + return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); + }; + let point = Arc::new(point); + match Verifier::verify(&point, &self.parent.peer_schedule) { + Ok(()) => { + return Some( + Verifier::validate(point, dag_round, self.parent.clone()).await, + ) + } + Err(invalid @ DagPoint::Invalid(_)) => return Some(invalid), + Err(_not_exists) => _ = self.all_peers.remove(&peer_id), // ain't reliable peer + } + } + }; + // the point does not exist when only 1F left unqeried, + // assuming author and dependant are queried or unavailable + if self.all_peers.len() < self.node_count.reliable_minority() { + return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); + } + if self.in_flight.is_empty() { + self.has_resolved_tx.send(false).ok(); + self.download(); + }; + if self.in_flight.is_empty() { + // mempool inclusion guarantees must be satisfied if less than 2F+1 nodes are online; + // so we should stall, waiting for peers to connect + if let Err(e) = self.has_resolved_rx.wait_for(|is| *is).await { + panic!("Downloader waiting for new resolved peer {e}") + }; + self.download(); + }; + None + } + + fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { + match result { + Ok((peer_id, new)) => { + let mut is_resolved = false; + self.all_peers.entry(peer_id).and_modify(|old| { + if *old == PeerState::Unknown && new == PeerState::Resolved { + is_resolved = true; + } + *old = new; + }); + if is_resolved { + self.has_resolved_tx.send(true).ok(); + } + } + Err(err @ RecvError::Lagged(_)) => { + tracing::error!("Downloader peer updates {err}") + } + Err(err @ RecvError::Closed) => { + panic!("Downloader peer updates {err}") + } + } + } +} diff --git a/consensus/src/intercom/dependency/mod.rs b/consensus/src/intercom/dependency/mod.rs new file mode 100644 index 000000000..47118c5fd --- /dev/null +++ b/consensus/src/intercom/dependency/mod.rs @@ -0,0 +1,8 @@ +pub use downloader::*; +pub use uploader::*; + +// Note: intercom modules' responsibilities +// matches visibility of their internal DTOs + +mod downloader; +mod uploader; diff --git a/consensus/src/intercom/dependency/uploader.rs b/consensus/src/intercom/dependency/uploader.rs new file mode 100644 index 000000000..b3a2c81f5 --- /dev/null +++ b/consensus/src/intercom/dependency/uploader.rs @@ -0,0 +1,57 @@ +use std::ops::Deref; +use std::sync::Arc; + +use tokio::sync::{mpsc, oneshot, watch}; + +use crate::dag::WeakDagRound; +use crate::intercom::dto::PointByIdResponse; +use crate::models::{DagPoint, Point, PointId}; + +pub struct Uploader { + requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, + top_dag_round: watch::Receiver, +} + +impl Uploader { + pub fn new( + requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, + top_dag_round: watch::Receiver, + ) -> Self { + Self { + requests, + top_dag_round, + } + } + + pub async fn run(mut self) -> ! { + while let Some((point_id, callback)) = self.requests.recv().await { + if let Err(_) = callback.send(PointByIdResponse( + self.find(&point_id).await.map(|p| p.deref().clone()), + )) { + tracing::error!("Uploader result channel closed for {point_id:.4?}"); + }; + } + panic!("Uploader incoming channel closed") + } + + async fn find(&self, point_id: &PointId) -> Option> { + let top_dag_round = self.top_dag_round.borrow().get()?; + let shared = top_dag_round + .scan(&point_id.location.round) + .map(|dag_round| { + dag_round + .view(&point_id.location.author, |loc| { + loc.versions().get(&point_id.digest).cloned() + }) + .flatten() + }) + .flatten()?; + // keep such matching private to Uploader, it must not be used elsewhere + match shared.await { + (DagPoint::Trusted(valid), _) => Some(valid.point), + (DagPoint::Suspicious(valid), _) => Some(valid.point), + (DagPoint::Invalid(invalid), _) => Some(invalid), + (DagPoint::NotExists(_), _) => None, + } + } +} diff --git a/consensus/src/intercom/dto.rs b/consensus/src/intercom/dto.rs index c17040dd9..9c541763d 100644 --- a/consensus/src/intercom/dto.rs +++ b/consensus/src/intercom/dto.rs @@ -34,9 +34,10 @@ pub enum SignatureResponse { Rejected, } -#[derive(Clone, PartialEq, Debug)] +#[derive(Copy, Clone, PartialEq, Debug)] pub enum PeerState { - Added, // not yet ready to connect; always includes local peer id - Resolved, // remote peer ready to connect - Removed, // remote peer will not be added again + /// Not yet ready to connect or already disconnected; always includes local peer id. + Unknown, + /// remote peer ready to connect + Resolved, } diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index b12242a1d..60d1b6c11 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1,11 +1,13 @@ -pub use adapter::*; +pub use broadcast::*; pub use core::*; +pub use dependency::*; pub use peer_schedule::*; // Note: intercom modules' responsibilities -// matches visibility of their internal DTOs +// matches visibility of their internal DTOs -mod adapter; +mod broadcast; mod core; +mod dependency; mod dto; mod peer_schedule; diff --git a/consensus/src/intercom/peer_schedule/mod.rs b/consensus/src/intercom/peer_schedule/mod.rs index 667054f61..4d4c20a98 100644 --- a/consensus/src/intercom/peer_schedule/mod.rs +++ b/consensus/src/intercom/peer_schedule/mod.rs @@ -2,7 +2,7 @@ pub use peer_schedule::*; pub use peer_schedule_updater::*; // Note: intercom modules' responsibilities -// matches visibility of their internal DTOs +// matches visibility of their internal DTOs mod peer_schedule; mod peer_schedule_updater; diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 86f6e6bdc..00fc9db96 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -29,7 +29,7 @@ pub struct PeerSchedule { // FIXME remove mutex ( parking_lot ! ) // and just restart updater when new peers or epoch start are known; // use copy-on-write to replace Inner as a whole; - // maybe store schedule-per-round inside DAG round, but how to deal with download tasks? + // maybe store schedule-per-round inside DAG round, but how to deal with download tasks then? inner: Arc>, // Connection to self is always "Added" // Updates are Resolved or Removed, sent single time @@ -52,6 +52,7 @@ impl PeerSchedule { this } + /// Does not return updates on local peer_id pub fn updates(&self) -> broadcast::Receiver<(PeerId, PeerState)> { tracing::info!("subscribing to peer updates"); self.updates.subscribe() @@ -60,32 +61,21 @@ impl PeerSchedule { // To sign a point or to query for points, we need to know the intersection of: // * which nodes are in the validator set during the round of interest // * which nodes are able to connect at the moment - /// TODO replace bool with AtomicBool? use Arc? to return map with auto refresh pub async fn wait_for_peers(&self, round: &Round, node_count: NodeCount) { let mut rx = self.updates(); - let mut peers = (*self.peers_for(round)).clone(); - let mut count = peers - .iter() - .filter(|(_, state)| **state == PeerState::Resolved) - .count(); + let peers = (*self.peers_for(round)).clone(); let local_id = self.local_id(); - while count < node_count.majority_of_others() { + let mut resolved = peers + .iter() + .filter(|(&peer_id, &state)| state == PeerState::Resolved && peer_id != local_id) + .map(|(peer_id, _)| *peer_id) + .collect::>(); + while resolved.len() < node_count.majority_of_others() { match rx.recv().await { - Ok((peer_id, new_state)) if peer_id != local_id => { - if let Some(state) = peers.get_mut(&peer_id) { - match (&state, &new_state) { - (PeerState::Added, PeerState::Removed) => count -= 1, - (PeerState::Resolved, PeerState::Removed) => count -= 1, - (PeerState::Added, PeerState::Resolved) => count += 1, - (PeerState::Removed, PeerState::Resolved) => { - count += 1; // should not occur - tracing::warn!("peer {peer_id} is resolved after being removed") - } - (_, _) => {} - } - *state = new_state; - } - } + Ok((peer_id, new_state)) => match new_state { + PeerState::Resolved => _ = resolved.insert(peer_id), + PeerState::Unknown => _ = resolved.remove(&peer_id), + }, _ => {} } } @@ -192,6 +182,7 @@ impl PeerSchedule { } pub fn set_next_peers(&self, peers: &Vec<(PeerId, bool)>) { + let local_id = self.local_id(); let mut all_peers = BTreeMap::new(); let mut inner = self.inner.lock(); for i in 0..inner.peers_resolved.len() { @@ -199,21 +190,18 @@ impl PeerSchedule { } let old = peers .iter() - .filter_map(|(peer_id, _)| { - all_peers - .get(peer_id) - .map(|&state| (peer_id.clone(), state.clone())) - }) + .filter_map(|(peer_id, _)| all_peers.get(peer_id).map(|state| (*peer_id, *state))) .collect::>(); + // detach existing copies - they are tightened to use-site DAG round let next = Arc::make_mut(&mut inner.peers_resolved[2]); next.clear(); next.extend(peers.clone().into_iter().map(|(peer_id, is_resolved)| { ( peer_id, - if is_resolved { + if is_resolved && peer_id != local_id { PeerState::Resolved } else { - PeerState::Added + PeerState::Unknown }, ) })); @@ -223,10 +211,10 @@ impl PeerSchedule { /// Returns [true] if update was successfully applied pub(super) fn set_resolved(&self, peer_id: &PeerId, resolved: bool) -> bool { let mut is_applied = false; - let new_state = if resolved { + let new_state = if resolved && peer_id != self.local_id() { PeerState::Resolved } else { - PeerState::Removed + PeerState::Unknown }; { let mut inner = self.inner.lock(); diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs index b7e32cb69..f06859a4a 100644 --- a/consensus/src/models/node_count.rs +++ b/consensus/src/models/node_count.rs @@ -1,13 +1,9 @@ -use std::fmt::Formatter; - #[derive(Clone)] -pub struct NodeCount(usize); +pub struct NodeCount(u8); impl std::fmt::Debug for NodeCount { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str("NodeCount(")?; - f.write_str(self.full().to_string().as_str())?; - f.write_str(")") + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NodeCount({})", self.full()) } } @@ -32,32 +28,34 @@ impl NodeCount { total_peers != 0 && total_peers != 2, "invalid node count: {total_peers}" ); - // ceil up to 3F+1; assume the least possible amount of nodes is offline - let count = ((total_peers + 1) / 3) * 3 + 1; + // ceil up to 3F+1 and scale down to 1F, + // assuming the least possible amount of nodes is not in validator set + let one_f = (total_peers + 1) / 3; assert!( - total_peers <= count, - "node count {total_peers} overflows after rounding up to 3F+1" + u8::try_from(one_f).is_ok(), + "node count 1F={one_f} overflows u8 after scaling {total_peers} up to 3F+1" ); - NodeCount((count - 1) / 3) // 1F + NodeCount(one_f as u8) } fn full(&self) -> usize { - self.0 * 3 + 1 + self.0 as usize * 3 + 1 } pub fn majority(&self) -> usize { - self.0 * 2 + 1 + self.0 as usize * 2 + 1 } /// excluding either current node or the point's author, depending on the context pub fn majority_of_others(&self) -> usize { - // yes, genesis has the contradiction: reliable minority > majority of others; - // but no node may exist in genesis, thus cannot exclude itself from it - self.0 * 2 + // at first glance, genesis has a contradiction: reliable minority > majority of others; + // but a real node cannot exist in genesis, thus cannot exclude itself from it + self.0 as usize * 2 } + /// at least one node is reliable pub fn reliable_minority(&self) -> usize { - self.0 + 1 + self.0 as usize + 1 } /* pub fn unreliable(&self) -> usize { diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 263304d37..7a70c5c11 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -122,7 +122,7 @@ pub struct PrevPoint { pub digest: Digest, /// `>= 2F` neighbours, order does not matter; /// point author is excluded: everyone must use the proven point to validate its proof - // Note: bincode may be non-stable on (de)serializing hashmaps due to different local order + // Note: bincode may be non-stable on (de)serializing HashMap due to different local order pub evidence: BTreeMap, // TODO if we use TL, then every node can sign hash of a point's body (not all body bytes) // so we can include that hash into PrevPoint @@ -191,6 +191,7 @@ impl PointBody { } } +// Todo: Arc => Point(Arc<...{...}>) #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Point { pub body: PointBody, diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index a2a1d1c46..dc3d71e35 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -154,7 +154,7 @@ mod tests { check_parking_lot(); heart_beat(); let mut js = JoinSet::new(); - for engine in make_network(3).await { + for engine in make_network(4).await { js.spawn(engine.run()); } while let Some(res) = js.join_next().await { From e3bbc5ba8b73414e6842772040e38e803b40ece5 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Mon, 29 Apr 2024 11:10:01 +0300 Subject: [PATCH 16/32] fix(consensus): debug run on 4 nodes --- consensus/src/dag/dag.rs | 76 +++-- consensus/src/dag/dag_location.rs | 21 +- consensus/src/dag/dag_round.rs | 7 +- consensus/src/dag/producer.rs | 54 +++- consensus/src/engine/engine.rs | 271 +++++++++--------- .../intercom/broadcast/broadcast_filter.rs | 2 +- .../src/intercom/broadcast/broadcaster.rs | 69 +++-- consensus/src/intercom/broadcast/collector.rs | 265 ++++++++++------- consensus/src/intercom/broadcast/dto.rs | 17 +- consensus/src/intercom/core/dispatcher.rs | 1 + consensus/src/intercom/dependency/uploader.rs | 8 +- .../intercom/peer_schedule/peer_schedule.rs | 42 +-- .../peer_schedule/peer_schedule_updater.rs | 26 +- consensus/src/models/dag_point.rs | 7 + consensus/src/models/mod.rs | 2 + consensus/src/models/node_count.rs | 2 +- consensus/src/models/point.rs | 6 + consensus/src/models/ugly.rs | 55 ++++ 18 files changed, 544 insertions(+), 387 deletions(-) create mode 100644 consensus/src/models/ugly.rs diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index ef10f5571..dabf22a20 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, VecDeque}; +use std::collections::BTreeMap; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -9,66 +9,55 @@ use parking_lot::Mutex; use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; use crate::engine::MempoolConfig; +use crate::intercom::PeerSchedule; use crate::models::{Point, Round, ValidPoint}; #[derive(Clone)] pub struct Dag { - // from the oldest to the current round; newer ones are in the future + // from the oldest to the current round; newer ones are in the future; + // rounds: Arc>>, } impl Dag { - // pub fn new(peer_schedule: &PeerSchedule) -> Self { - // Self { - // rounds: BTreeMap::from([(Arc::new(DagRound::new(round, &peer_schedule, None)))]), - // peer_schedule, - // } - // } - // - // // TODO new point is checked against the dag only if it has valid sig, time and round - // // TODO download from neighbours - // pub fn fill_up_to(&mut self, round: Round) { - // match self.rounds.last_key_value() { - // None => unreachable!("DAG empty"), - // Some((last, _)) => { - // for round in (last.0..round.0).into_iter().map(|i| Round(i + 1)) { - // let prev = self.rounds.last_key_value().map(|(_, v)| Arc::downgrade(v)); - // self.rounds.entry(round).or_insert_with(|| { - // Arc::new(DagRound::new(round, &self.peer_schedule, prev)) - // }); - // } - // } - // } - // } - - pub fn new() -> Self { + pub fn new(dag_round: DagRound) -> Self { + let mut rounds = BTreeMap::new(); + rounds.insert(dag_round.round().clone(), dag_round); Self { - rounds: Default::default(), + rounds: Arc::new(Mutex::new(rounds)), } } - pub fn get_or_insert(&self, dag_round: DagRound) -> DagRound { + pub fn top(&self, round: &Round, peer_schedule: &PeerSchedule) -> DagRound { let mut rounds = self.rounds.lock(); - rounds - .entry(dag_round.round().clone()) - .or_insert(dag_round) - .clone() + let mut top = match rounds.last_key_value() { + None => unreachable!("DAG cannot be empty"), + Some((_, top)) => top.clone(), + }; + if (top.round().0 + MempoolConfig::COMMIT_DEPTH as u32) < round.0 { + unimplemented!("sync") + } + for _ in top.round().next().0..=round.0 { + top = rounds + .entry(top.round().next()) + .or_insert(top.next(peer_schedule)) + .clone(); + } + top } // fixme must not be async - pub async fn commit( - self, - next_dag_round: DagRound, - ) -> VecDeque<(Arc, VecDeque>)> { + /// result is in historical order + pub async fn commit(self, next_dag_round: DagRound) -> Vec<(Arc, Vec>)> { let Some(latest_trigger) = Self::latest_trigger(&next_dag_round).await else { - return VecDeque::new(); + return Vec::new(); }; let mut anchor_stack = Self::anchor_stack(&latest_trigger, next_dag_round.clone()).await; - let mut ordered = VecDeque::new(); + let mut ordered = Vec::new(); while let Some((anchor, anchor_round)) = anchor_stack.pop() { self.drop_tail(anchor.point.body.location.round); let committed = Self::gather_uncommitted(&anchor.point, &anchor_round).await; - ordered.push_back((anchor.point, committed)); + ordered.push((anchor.point, committed)); } ordered } @@ -192,13 +181,13 @@ impl Dag { }; } - /// returns historically ordered vertices (back to front is older to newer) + /// returns historically ordered vertices /// /// Note: at this point there is no way to check if passed point is really an anchor async fn gather_uncommitted( anchor /* @ r+1 */: &Point, anchor_round /* r+1 */: &DagRound, - ) -> VecDeque> { + ) -> Vec> { assert_eq!( *anchor_round.round(), anchor.body.location.round, @@ -216,7 +205,7 @@ impl Dag { ]; _ = anchor; // anchor payload will be committed the next time - let mut uncommitted = VecDeque::new(); + let mut uncommitted = Vec::new(); // TODO visited rounds count must be equal to dag depth: // read/download non-existent rounds and drop too old ones @@ -257,12 +246,13 @@ impl Dag { // vertex will be skipped in r_1 as committed r[2].extend(vertex.point.body.includes.clone()); // points @ r-2 r[3].extend(vertex.point.body.witness.clone()); // points @ r-3 - uncommitted.push_back(vertex.point); // LIFO + uncommitted.push(vertex.point); } } proof_round = vertex_round; // next r+0 r.rotate_left(1); } + uncommitted.reverse(); uncommitted } } diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index f25fa8c7d..15edaae7b 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -8,7 +8,7 @@ use futures_util::FutureExt; use tycho_util::futures::{JoinTask, Shared}; -use crate::models::{DagPoint, Digest, PointId, Round, Signature, UnixTime, ValidPoint}; +use crate::models::{DagPoint, Digest, Round, Signature, UnixTime, ValidPoint}; /// If DAG location exists, it must have non-empty `versions` map; /// @@ -105,7 +105,7 @@ impl InclusionState { pub fn init(&self, first_completed: &DagPoint) { _ = self.0.get_or_init(|| { let signed = OnceLock::new(); - if Signable::filter(first_completed).is_none() { + if first_completed.trusted().is_none() { _ = signed.set(Err(())); } Signable { @@ -116,8 +116,8 @@ impl InclusionState { } fn insert_own_point(&self, my_point: &DagPoint) { let signed = OnceLock::new(); - match Signable::filter(my_point) { - None => assert!(false, "Coding error: own point is not signable"), + match my_point.trusted() { + None => assert!(false, "Coding error: own point is not trusted"), Some(valid) => { _ = signed.set(Ok(Signed { at: valid.point.body.location.round.clone(), @@ -151,9 +151,8 @@ impl InclusionState { None } } - /// only for logging - pub fn init_id(&self) -> Option { - self.0.get().map(|signable| signable.first_completed.id()) + pub fn point(&self) -> Option<&DagPoint> { + self.0.get().map(|signable| &signable.first_completed) } } #[derive(Debug)] @@ -177,7 +176,7 @@ impl Signable { time_range: RangeInclusive, ) -> bool { let mut this_call_signed = false; - if let Some((valid, key_pair)) = Self::filter(&self.first_completed).zip(key_pair) { + if let Some((valid, key_pair)) = self.first_completed.trusted().zip(key_pair) { if time_range.contains(&valid.point.body.time) { _ = self.signed.get_or_init(|| { this_call_signed = true; @@ -200,10 +199,4 @@ impl Signable { fn is_completed(&self) -> bool { self.signed.get().is_some() } - fn filter(first_completed: &DagPoint) -> Option<&ValidPoint> { - match first_completed { - DagPoint::Trusted(valid) => Some(valid), - _ => None, // including valid Suspicious - } - } } diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index cbe7260bd..875cfb7f5 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -1,6 +1,5 @@ use std::sync::{Arc, Weak}; -use ahash::RandomState; use everscale_crypto::ed25519::KeyPair; use futures_util::future::BoxFuture; use futures_util::FutureExt; @@ -41,7 +40,7 @@ impl WeakDagRound { impl DagRound { pub fn new(round: Round, peer_schedule: &PeerSchedule, prev: WeakDagRound) -> Self { let peers = peer_schedule.peers_for(&round); - let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); + let locations = FastDashMap::with_capacity_and_hasher(peers.len(), Default::default()); Self(Arc::new(DagRoundInner { round, node_count: NodeCount::try_from(peers.len()) @@ -56,7 +55,7 @@ impl DagRound { pub fn next(&self, peer_schedule: &PeerSchedule) -> Self { let next_round = self.round().next(); let peers = peer_schedule.peers_for(&next_round); - let locations = FastDashMap::with_capacity_and_hasher(peers.len(), RandomState::new()); + let locations = FastDashMap::with_capacity_and_hasher(peers.len(), Default::default()); Self(Arc::new(DagRoundInner { round: next_round, node_count: NodeCount::try_from(peers.len()) @@ -69,7 +68,7 @@ impl DagRound { } pub fn genesis(genesis: &Arc, peer_schedule: &PeerSchedule) -> Self { - let locations = FastDashMap::with_capacity_and_hasher(1, RandomState::new()); + let locations = FastDashMap::with_capacity_and_hasher(1, Default::default()); let round = genesis.body.location.round; Self(Arc::new(DagRoundInner { round, diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index 0d0f7c2ef..b364d51cb 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -14,14 +14,14 @@ pub struct Producer; impl Producer { pub async fn new_point( - finished_round: &DagRound, - new_round: &DagRound, + current_round: &DagRound, prev_point: Option<&PrevPoint>, payload: Vec, ) -> Option> { - let key_pair = new_round.key_pair()?; + let finished_round = current_round.prev().get()?; + let key_pair = current_round.key_pair()?; let local_id = PeerId::from(key_pair.public_key); - match new_round.anchor_stage() { + match current_round.anchor_stage() { Some(AnchorStage::Proof { leader, .. } | AnchorStage::Trigger { leader, .. }) if leader == &local_id && prev_point.is_none() => { @@ -30,14 +30,16 @@ impl Producer { } _ => {} }; - let includes = Self::includes(finished_round); - let mut anchor_trigger = Self::link_from_includes(&local_id, &new_round, &includes, true); - let mut anchor_proof = Self::link_from_includes(&local_id, &new_round, &includes, false); - let witness = Self::witness(finished_round); + let includes = Self::includes(&finished_round); + let mut anchor_trigger = + Self::link_from_includes(&local_id, ¤t_round, &includes, true); + let mut anchor_proof = + Self::link_from_includes(&local_id, ¤t_round, &includes, false); + let witness = Self::witness(&finished_round); Self::update_link_from_witness(&mut anchor_trigger, finished_round.round(), &witness, true); Self::update_link_from_witness(&mut anchor_proof, finished_round.round(), &witness, false); let time = Self::get_time( - finished_round, + &finished_round, &local_id, &anchor_proof, prev_point, @@ -49,6 +51,17 @@ impl Producer { .into_iter() .map(|point| (point.body.location.author, point.digest.clone())) .collect::>(); + assert!( + prev_point.map_or(true, |prev| includes + .get(&local_id) + .map_or(false, |digest| digest == &prev.digest)), + "must include own point if it exists" + ); + assert!( + prev_point.map_or(true, |prev| prev.evidence.len() + >= current_round.node_count().majority_of_others()), + "Collected not enough evidence, check Broadcaster logic" + ); let witness = witness .into_iter() .map(|point| (point.body.location.author, point.digest.clone())) @@ -56,7 +69,7 @@ impl Producer { Some(Arc::new( PointBody { location: Location { - round: new_round.round().clone(), + round: current_round.round().clone(), author: local_id.clone(), }, time, @@ -75,8 +88,19 @@ impl Producer { let includes = finished_round .select(|(_, loc)| { loc.state() - .signed_point(finished_round.round()) - .map(|valid| valid.point.clone()) + .point() + .map(|dag_point| dag_point.trusted()) + .flatten() + .filter(|_| { + loc.state() + .signed() + .map(|r| { + r.as_ref() + .map_or(false, |s| &s.at == finished_round.round()) + }) + .unwrap_or(true) + }) + .map(|dag_point| dag_point.point.clone()) }) .collect::>(); assert!( @@ -102,11 +126,11 @@ impl Producer { fn link_from_includes( local_id: &PeerId, - new_round: &DagRound, + current_round: &DagRound, includes: &Vec>, is_for_trigger: bool, ) -> Link { - match new_round.anchor_stage() { + match current_round.anchor_stage() { Some(AnchorStage::Trigger { leader, .. }) if is_for_trigger && leader == local_id => { Link::ToSelf } @@ -124,7 +148,7 @@ impl Producer { } }) .expect("non-empty list of includes for own point"); - if point.body.location.round == new_round.round().prev() + if point.body.location.round == current_round.round().prev() && ((is_for_trigger && point.body.anchor_trigger == Link::ToSelf) || (!is_for_trigger && point.body.anchor_proof == Link::ToSelf)) { diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 75c484867..2278cefdb 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -2,30 +2,28 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; use itertools::Itertools; -use tokio::sync::{mpsc, watch}; +use tokio::sync::{mpsc, oneshot, watch}; use tokio::task::JoinSet; use tycho_network::{DhtClient, OverlayService, PeerId}; -use crate::dag::{Dag, DagRound, Producer, WeakDagRound}; +use crate::dag::{Dag, DagRound, InclusionState, Producer}; use crate::intercom::{ - BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, Dispatcher, Downloader, - PeerSchedule, PeerScheduleUpdater, Responder, Uploader, + BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, CollectorSignal, Dispatcher, + Downloader, PeerSchedule, PeerScheduleUpdater, Responder, Uploader, }; -use crate::models::{Point, PrevPoint}; +use crate::models::{PrevPoint, Ugly}; pub struct Engine { - dag: Dag, local_id: Arc, + dag: Dag, peer_schedule: Arc, dispatcher: Dispatcher, downloader: Downloader, collector: Collector, broadcast_filter: BroadcastFilter, - cur_point: Option>, - current_dag_round: DagRound, - top_dag_round_watch: watch::Sender, - tasks: JoinSet<()>, // should be JoinSet https://github.com/rust-lang/rust/issues/35121 + top_dag_round_watch: watch::Sender, + tasks: JoinSet<()>, // should be JoinSet } impl Engine { @@ -36,7 +34,7 @@ impl Engine { peers: &Vec, ) -> Self { let key_pair = KeyPair::from(secret_key); - let local_id = Arc::new(format!("{:.4?}", PeerId::from(key_pair.public_key))); + let local_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(key_pair))); let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); @@ -61,43 +59,31 @@ impl Engine { ); let genesis = Arc::new(crate::test_utils::genesis()); + let peer_schedule_updater = + PeerScheduleUpdater::new(dispatcher.overlay.clone(), peer_schedule.clone()); // finished epoch - peer_schedule.set_next_peers(&vec![(genesis.body.location.author, false)]); peer_schedule.set_next_start(genesis.body.location.round); + peer_schedule_updater.set_next_peers(&vec![genesis.body.location.author]); peer_schedule.rotate(); // current epoch peer_schedule.set_next_start(genesis.body.location.round.next()); - peer_schedule.set_next_peers( - &dispatcher - .overlay - .read_entries() - .iter() - .map(|a| (a.peer_id, a.resolver_handle.is_resolved())) - .collect(), - ); - peer_schedule.rotate(); // start updater only after peers are populated into schedule - PeerScheduleUpdater::run(dispatcher.overlay.clone(), peer_schedule.clone()); - - // tOdO define if the last round is finished based on peer schedule - // move out from bcaster & collector ? where to get our last point from ? - - // tOdO в конце каждого раунда берем точку с триггером - // и комиттим - // * either own point contains Trigger - // * or search through last round to find the latest trigger - // * * can U do so without scan of a round ??? + peer_schedule_updater.set_next_peers(peers); + peer_schedule.rotate(); - let dag = Dag::new(); - let current_dag_round = dag.get_or_insert(DagRound::genesis(&genesis, &peer_schedule)); + let current_dag_round = DagRound::genesis(&genesis, &peer_schedule); + let dag = Dag::new(current_dag_round.clone()); - let (top_dag_round_watch, top_dag_round_rx) = watch::channel(current_dag_round.as_weak()); + let (top_dag_round_tx, top_dag_round_rx) = watch::channel(current_dag_round.clone()); let mut tasks = JoinSet::new(); let uploader = Uploader::new(uploader_rx, top_dag_round_rx); tasks.spawn(async move { uploader.run().await; }); + tasks.spawn(async move { + peer_schedule_updater.run().await; + }); let downloader = Downloader::new(local_id.clone(), &dispatcher, &peer_schedule); @@ -110,34 +96,92 @@ impl Engine { bcast_rx, sig_responses, genesis_state.into_iter(), - current_dag_round.round().clone(), + current_dag_round.round().next(), ); Self { - dag, local_id, + dag, peer_schedule, dispatcher, downloader, collector, broadcast_filter, - cur_point: None, - current_dag_round, - top_dag_round_watch, + top_dag_round_watch: top_dag_round_tx, tasks, } } + async fn bcaster_run( + local_id: Arc, + produce_own_point: bool, + dispatcher: Dispatcher, + peer_schedule: Arc, + downloader: Downloader, + current_dag_round: DagRound, + prev_point: Option, + own_point_state: oneshot::Sender, + bcaster_ready_tx: mpsc::Sender, + mut collector_signal_rx: mpsc::UnboundedReceiver, + ) -> Option { + if produce_own_point { + if let Some(own_point) = + Producer::new_point(¤t_dag_round, prev_point.as_ref(), vec![]).await + { + let state = current_dag_round + .insert_exact_validate(&own_point, &peer_schedule, &downloader) + .await + .expect("own produced point must be valid"); + own_point_state.send(state).ok(); + let evidence = Broadcaster::new( + &local_id, + &own_point, + &dispatcher, + &peer_schedule, + bcaster_ready_tx, + collector_signal_rx, + ) + .run() + .await; + return Some(PrevPoint { + digest: own_point.digest.clone(), + evidence: evidence.into_iter().collect(), + }); + } + } + _ = own_point_state; + collector_signal_rx.close(); + bcaster_ready_tx.send(BroadcasterSignal::Ok).await.ok(); + None + } pub async fn run(mut self) -> ! { + let mut prev_point: Option = None; + let mut produce_own_point = true; loop { + let current_dag_round = self + .dag + .top(self.collector.next_round(), &self.peer_schedule); let next_dag_round = self .dag - .get_or_insert(self.current_dag_round.next(self.peer_schedule.as_ref())); - self.top_dag_round_watch.send(next_dag_round.as_weak()).ok(); + .top(¤t_dag_round.round().next(), &self.peer_schedule); + self.top_dag_round_watch.send(next_dag_round.clone()).ok(); let (bcaster_ready_tx, bcaster_ready_rx) = mpsc::channel(1); // let this channel unbounded - there won't be many items, but every of them is essential - let (collector_signal_tx, mut collector_signal_rx) = mpsc::unbounded_channel(); + let (collector_signal_tx, collector_signal_rx) = mpsc::unbounded_channel(); + let (own_point_state_tx, own_point_state_rx) = oneshot::channel(); + let bcaster_run = tokio::spawn(Self::bcaster_run( + self.local_id.clone(), + produce_own_point, + self.dispatcher.clone(), + self.peer_schedule.clone(), + self.downloader.clone(), + current_dag_round.clone(), + prev_point, + own_point_state_tx, + bcaster_ready_tx, + collector_signal_rx, + )); let commit_run = tokio::spawn(self.dag.clone().commit(next_dag_round.clone())); let bcast_filter_upd = { @@ -145,109 +189,52 @@ impl Engine { let round = next_dag_round.round().clone(); tokio::spawn(async move { bcast_filter.advance_round(&round) }) }; - // TODO change round, then - // apply peer schedule and config changes if some - // spawn collector - // spawn producer + broadcaster - // spawn commit + drop dag tail (async?!) into futures ordered - // it shouldn't take longer than round; - // the other way it should make the change of rounds slower, - // in order to prevent unlimited DAG growth - // sync if collector detected a gap exceeding dag depth - // join - if let Some(own_point) = self.cur_point { - let own_state = self - .current_dag_round - .insert_exact_validate(&own_point, &self.peer_schedule, &self.downloader) - .await; - let collector_run = tokio::spawn(self.collector.run( - next_dag_round.clone(), - Some(own_point.clone()), - collector_signal_tx, - bcaster_ready_rx, - )); - let bcaster_run = tokio::spawn( - Broadcaster::new( - &self.local_id, - &own_point, - &self.dispatcher, - &self.peer_schedule, - bcaster_ready_tx, - collector_signal_rx, - ) - .run(), - ); - match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { - (Ok(collector_upd), Ok(evidence), Ok(committed), Ok(_bcast_filter_upd)) => { - tracing::info!("committed {:.4?}", committed); - let prev_point = Some(PrevPoint { - digest: own_point.digest.clone(), - evidence: evidence.into_iter().collect(), - }); - if collector_upd.next_round() == next_dag_round.round() { - self.cur_point = Producer::new_point( - &self.current_dag_round, - &next_dag_round, - prev_point.as_ref(), - vec![], - ) - .await; - } else { - todo!("must fill gaps with empty rounds") - } - self.current_dag_round = next_dag_round; - self.collector = collector_upd; - } - (collector, bcaster, commit, bcast_filter_upd) => { - let msg = [ - (collector.err(), "collector"), - (bcaster.err(), "broadcaster"), - (commit.err(), "commit"), - (bcast_filter_upd.err(), "broadcast filter update"), - ] + + let collector_run = tokio::spawn(self.collector.run( + next_dag_round.clone(), + own_point_state_rx, + collector_signal_tx, + bcaster_ready_rx, + )); + + match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { + (Ok(collector_upd), Ok(new_prev_point), Ok(committed), Ok(_bcast_filter_upd)) => { + let committed = committed .into_iter() - .filter_map(|(res, name)| { - res.map(|err| format!("{name} task panicked: {err:?}")) + .map(|(anchor, history)| { + let history = history + .into_iter() + .map(|point| format!("{:?}", point.id().ugly())) + .join(", "); + format!( + "anchor {:?} time {} : [ {history} ]", + anchor.id().ugly(), + anchor.body.time + ) }) - .join("; \n"); - panic!("{}", msg) - } + .join(" ; "); + tracing::info!( + "{} @ {:?} committed {committed}", + self.local_id, + current_dag_round.round() + ); + prev_point = new_prev_point; + produce_own_point = next_dag_round.round() == collector_upd.next_round(); + self.collector = collector_upd; } - } else { - collector_signal_rx.close(); - _ = bcaster_ready_tx.send(BroadcasterSignal::Ok).await; - let collector_run = tokio::spawn(self.collector.run( - next_dag_round.clone(), - None, - collector_signal_tx, - bcaster_ready_rx, - )); - match tokio::join!(collector_run, commit_run, bcast_filter_upd) { - (Ok(collector_upd), Ok(committed), Ok(_bcast_filter_upd)) => { - tracing::info!("committed {:.4?}", committed); - self.cur_point = Producer::new_point( - &self.current_dag_round, - &next_dag_round, - None, - vec![], - ) - .await; - self.current_dag_round = next_dag_round; // FIXME must fill gaps with empty rounds - self.collector = collector_upd; - } - (collector, commit, bcast_filter_upd) => { - let msg = [ - (collector.err(), "collector"), - (commit.err(), "commit"), - (bcast_filter_upd.err(), "broadcast filter update"), - ] - .into_iter() - .filter_map(|(res, name)| { - res.map(|err| format!("{name} task panicked: {err:?}")) - }) - .join("; \n"); - panic!("{}", msg) - } + (collector, bcaster, commit, bcast_filter_upd) => { + let msg = [ + (collector.err(), "collector"), + (bcaster.err(), "broadcaster"), + (commit.err(), "commit"), + (bcast_filter_upd.err(), "broadcast filter update"), + ] + .into_iter() + .filter_map(|(res, name)| { + res.map(|err| format!("{name} task panicked: {err:?}")) + }) + .join("; \n"); + panic!("{}", msg) } } } diff --git a/consensus/src/intercom/broadcast/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs index 6b4f18997..40f4cbdaf 100644 --- a/consensus/src/intercom/broadcast/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -118,7 +118,7 @@ impl BroadcastFilterInner { ConsensusEvent::Invalid(dag_point) } }; - if round <= dag_round.next() { + if round <= dag_round { let response = if matches!(point, ConsensusEvent::Invalid(_)) { BroadcastResponse::Rejected } else if round >= dag_round.prev() { diff --git a/consensus/src/intercom/broadcast/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs index 7163264f1..4fc785625 100644 --- a/consensus/src/intercom/broadcast/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -10,7 +10,7 @@ use tokio::sync::mpsc; use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; -use crate::intercom::broadcast::dto::CollectorSignal; +use crate::intercom::broadcast::collector::CollectorSignal; use crate::intercom::dto::{BroadcastResponse, PeerState, SignatureResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; use crate::models::{NodeCount, Point, Round, Signature}; @@ -122,6 +122,11 @@ impl Broadcaster { } loop { tokio::select! { + Some(collector_signal) = self.collector_signal.recv() => { + if self.should_finish(collector_signal).await { + break self.signatures + } + } Some((peer_id, result)) = self.bcast_futs.next() => { self.match_broadcast_result(peer_id, result) }, @@ -131,17 +136,13 @@ impl Broadcaster { update = self.peer_updates.recv() => { self.match_peer_updates(update) } - Some(collector_signal) = self.collector_signal.recv() => { - if self.should_finish(collector_signal).await { - break self.signatures - } - } else => { panic!("bcaster unhandled"); } } } } + async fn should_finish(&mut self, collector_signal: CollectorSignal) -> bool { tracing::info!( "{} @ {:?} bcaster <= Collector::{collector_signal:?} : sigs {} of {}, rejects {} of {}", @@ -173,31 +174,7 @@ impl Broadcaster { } } } - fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { - match result { - Ok((peer_id, new_state)) => { - tracing::info!( - "{} @ {:?} bcaster peer update: {peer_id:?} -> {new_state:?}", - self.local_id, - self.current_round - ); - match new_state { - PeerState::Resolved => { - self.removed_peers.remove(&peer_id); - self.rejections.remove(&peer_id); - self.broadcast(&peer_id); - } - PeerState::Unknown => _ = self.removed_peers.insert(peer_id), - } - } - Err(err @ RecvError::Lagged(_)) => { - tracing::error!("Broadcaster peer updates {err}") - } - Err(err @ RecvError::Closed) => { - panic!("Broadcaster peer updates {err}") - } - } - } + fn match_broadcast_result(&mut self, peer_id: PeerId, result: BcastResult) { match result { Err(error) => { @@ -236,6 +213,7 @@ impl Broadcaster { } } } + fn match_signature_result(&mut self, peer_id: PeerId, result: SigResult) { match result { Err(error) => { @@ -284,6 +262,7 @@ impl Broadcaster { } } } + fn broadcast(&mut self, peer_id: &PeerId) { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.bcast_futs @@ -301,6 +280,7 @@ impl Broadcaster { ); } } + fn request_signature(&mut self, peer_id: &PeerId) { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.sig_futs @@ -318,6 +298,7 @@ impl Broadcaster { ); } } + fn is_signature_ok(&self, peer_id: &PeerId, signature: &Signature) -> bool { let sig_raw: Result<[u8; 64], _> = signature.0.to_vec().try_into(); sig_raw @@ -327,4 +308,30 @@ impl Broadcaster { pub_key.verify_raw(self.point_body.as_slice(), &sig_raw) }) } + + fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { + match result { + Ok((peer_id, new_state)) => { + tracing::info!( + "{} @ {:?} bcaster peer update: {peer_id:?} -> {new_state:?}", + self.local_id, + self.current_round + ); + match new_state { + PeerState::Resolved => { + self.removed_peers.remove(&peer_id); + self.rejections.remove(&peer_id); + self.broadcast(&peer_id); + } + PeerState::Unknown => _ = self.removed_peers.insert(peer_id), + } + } + Err(err @ RecvError::Lagged(_)) => { + tracing::error!("Broadcaster peer updates {err}") + } + Err(err @ RecvError::Closed) => { + panic!("Broadcaster peer updates {err}") + } + } + } } diff --git a/consensus/src/intercom/broadcast/collector.rs b/consensus/src/intercom/broadcast/collector.rs index e59e97999..8a390a74e 100644 --- a/consensus/src/intercom/broadcast/collector.rs +++ b/consensus/src/intercom/broadcast/collector.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::mem; use std::sync::Arc; @@ -7,13 +8,22 @@ use futures_util::{FutureExt, StreamExt}; use tokio::sync::{mpsc, oneshot}; use tycho_network::PeerId; +use tycho_util::FastHashSet; use crate::dag::{DagRound, InclusionState}; use crate::engine::MempoolConfig; -use crate::intercom::broadcast::dto::{CollectorSignal, ConsensusEvent}; +use crate::intercom::broadcast::dto::ConsensusEvent; use crate::intercom::dto::SignatureResponse; use crate::intercom::{BroadcasterSignal, Downloader}; -use crate::models::{Point, Round}; +use crate::models::{Round, Ugly}; + +/// collector may run without broadcaster, as if broadcaster signalled Ok +#[derive(Debug)] +pub enum CollectorSignal { + Finish, + Err, + Retry, +} pub struct Collector { local_id: Arc, @@ -48,7 +58,7 @@ impl Collector { pub async fn run( mut self, next_dag_round: DagRound, // r+1 - has_own_point: Option>, + own_point_state: oneshot::Receiver, collector_signal: mpsc::UnboundedSender, bcaster_signal: mpsc::Receiver, ) -> Self { @@ -57,6 +67,19 @@ impl Collector { .get() .expect("current DAG round must be linked into DAG chain"); let includes = mem::take(&mut self.next_includes); + includes.push( + (async move { + match own_point_state.await { + Ok(state) => state, + Err(_) => { + futures_util::pending!(); + unreachable!() + } + } + }) + .boxed(), + ); + assert_eq!( current_dag_round.round(), &self.next_round, @@ -64,13 +87,18 @@ impl Collector { &self.next_round ); self.next_round = next_dag_round.round().clone(); + let includes_ready = FastHashSet::with_capacity_and_hasher( + current_dag_round.node_count().full(), + Default::default(), + ); let task = CollectorTask { local_id: self.local_id.clone(), downloader: self.downloader.clone(), current_round: current_dag_round.clone(), next_dag_round, includes, - includes_ready: has_own_point.into_iter().count(), + includes_ready, + is_includes_ready: false, next_includes: FuturesUnordered::new(), collector_signal, @@ -104,7 +132,8 @@ struct CollectorTask { // needed in order to not include same point twice - as an include and as a witness; // need to drop them with round change includes: FuturesUnordered>, - includes_ready: usize, + includes_ready: FastHashSet, + is_includes_ready: bool, /// do not poll during this round, just pass to next round; /// anyway should rewrite signing mechanics - look for comments inside [DagRound::add_exact] next_includes: FuturesUnordered>, @@ -126,21 +155,6 @@ impl CollectorTask { let mut retry_interval = tokio::time::interval(MempoolConfig::RETRY_INTERVAL); loop { tokio::select! { - request = signature_requests.recv() => match request { - Some((round, author, callback)) => { - _ = callback.send(self.signature_response(&round, &author)); - } - None => panic!("channel with signature requests closed") - }, - filtered = from_bcast_filter.recv() => match filtered { - Some(consensus_event) => { - if let Err(round) = self.match_filtered(&consensus_event) { - _ = self.collector_signal.send(CollectorSignal::Err); - return Err(round) - } - }, - None => panic!("channel from Broadcast Filter closed"), - }, Some(bcaster_signal) = self.bcaster_signal.recv() => { if self.should_fail(bcaster_signal) { // has to jump over one round @@ -159,35 +173,41 @@ impl CollectorTask { _ = self.collector_signal.send(CollectorSignal::Retry); } }, - // FIXME not so great: some signature requests will be retried, - // just because this futures were not polled. Use global 'current dag round' round - // and sign inside shared join task in dag location, - // do not return location from DagLocation::add_validate(point) + filtered = from_bcast_filter.recv() => match filtered { + Some(consensus_event) => { + if let Err(round) = self.match_filtered(&consensus_event) { + self.collector_signal.send(CollectorSignal::Err).ok(); + return Err(round) + } + }, + None => panic!("channel from Broadcast Filter closed"), + }, Some(state) = self.includes.next() => { - // slow but at least may work - let signed = if let Some(signable) = state.signable() { - signable.sign( - self.current_round.round(), - self.next_dag_round.key_pair(), - MempoolConfig::sign_time_range(), - ) - } else { - state.signed().is_some() // FIXME very fragile duct tape - }; - if signed { - tracing::info!( - "{} @ {:.4?} includes {} +1 : {:.4?} {:.4?}", - self.local_id, self.current_round.round(), self.includes_ready, - state.init_id(), state.signed() - ); - self.includes_ready += 1; - } else { - tracing::warn!( - "{} @ {:.4?} includes {} : {:.4?} {:.4?}", - self.local_id, self.current_round.round(), self.includes_ready, - state.init_id(), state.signed() - ); + self.on_inclusion_validated(&state) + }, + Some(state) = self.next_includes.next() => { + if let Some(valid) = state.point().map(|p| p.valid()).flatten() { + self.is_includes_ready = true; + match valid.point.body.location.round.cmp(self.next_dag_round.round()) { + Ordering::Less => panic!("Coding error: next includes futures contain current round"), + Ordering::Greater => { + tracing::error!("Collector was left behind while bcast filter advanced??"); + self.collector_signal.send(CollectorSignal::Err).ok(); + return Err(valid.point.body.location.round); + }, + Ordering::Equal => { + if self.is_ready() { + return Ok(self.next_includes) + } + } + } + } + }, + request = signature_requests.recv() => match request { + Some((round, author, callback)) => { + _ = callback.send(self.signature_response(&round, &author)); } + None => panic!("channel with signature requests closed") }, else => { panic!("collector unhandled"); @@ -201,7 +221,7 @@ impl CollectorTask { "{} @ {:.4?} collector <= Bcaster::{signal:?} : includes {} of {}", self.local_id, self.current_round.round(), - self.includes_ready, + self.includes_ready.len(), self.current_round.node_count().majority() ); match signal { @@ -214,20 +234,113 @@ impl CollectorTask { } } - fn is_ready(&self) -> bool { + fn is_ready(&mut self) -> bool { tracing::info!( "{} @ {:.4?} collector self-check : includes {} of {}", self.local_id, self.current_round.round(), - self.includes_ready, + self.includes_ready.len(), self.current_round.node_count().majority() ); // point @ r+1 has to include 2F+1 broadcasts @ r+0 (we are @ r+0) - let is_self_ready = self.includes_ready >= self.current_round.node_count().majority(); - if is_self_ready && self.is_bcaster_ready_ok { + self.is_includes_ready |= + self.includes_ready.len() >= self.current_round.node_count().majority(); + if self.is_includes_ready && self.is_bcaster_ready_ok { _ = self.collector_signal.send(CollectorSignal::Finish); } - is_self_ready && self.is_bcaster_ready_ok + self.is_includes_ready && self.is_bcaster_ready_ok + } + + fn match_filtered(&self, consensus_event: &ConsensusEvent) -> Result<(), Round> { + tracing::info!( + "{} @ {:?} collector <= bcast filter : {:?}", + self.local_id, + self.current_round.round(), + consensus_event.ugly() + ); + match consensus_event { + ConsensusEvent::Forward(consensus_round) => { + match consensus_round.cmp(self.next_dag_round.round()) { + // we're too late, consensus moved forward + std::cmp::Ordering::Greater => return Err(consensus_round.clone()), + // we still have a chance to finish current round + std::cmp::Ordering::Equal => {} + // we are among the fastest nodes of consensus + std::cmp::Ordering::Less => {} + } + } + ConsensusEvent::Verified(point) => match &point.body.location.round { + x if x > self.next_dag_round.round() => { + panic!( + "{} @ {:?} Coding error: broadcast filter advanced \ + while collector left behind; event: {:?}", + self.local_id, + self.current_round.round(), + consensus_event.ugly() + ) + } + x if x == self.next_dag_round.round() => { + if let Some(task) = self.next_dag_round.add(point, &self.downloader) { + self.next_includes.push(task) + } + } + x if x == self.current_round.round() => { + if let Some(task) = self.current_round.add(point, &self.downloader) { + self.includes.push(task) + } + } + _ => _ = self.current_round.add(&point, &self.downloader), // maybe other's dependency + }, + ConsensusEvent::Invalid(dag_point) => { + if &dag_point.location().round > self.next_dag_round.round() { + panic!( + "{} @ {:?} Coding error: broadcast filter advanced \ + while collector left behind; event: {:?}", + self.local_id, + self.current_round.round(), + consensus_event.ugly() + ) + } else { + _ = self.next_dag_round.insert_invalid(&dag_point); + } + } + }; + Ok(()) + } + + // FIXME not so great: some signature requests will be retried, + // just because this futures were not polled. Use global 'current dag round' round + // and sign inside shared join task in dag location, + // do not return location from DagLocation::add_validate(point) + fn on_inclusion_validated(&mut self, state: &InclusionState) { + // slow but at least may work + if let Some(signable) = state.signable() { + signable.sign( + self.current_round.round(), + self.next_dag_round.key_pair(), + MempoolConfig::sign_time_range(), + ); + }; + if let Some(signed) = state.signed_point(self.current_round.round()) { + self.includes_ready + .insert(signed.point.body.location.author); + tracing::info!( + "{} @ {:.4?} includes {} +1 : {:?}", + self.local_id, + self.current_round.round(), + self.includes_ready.len(), + signed.point.id().ugly() + ); + } else { + tracing::warn!( + "{} @ {:.4?} includes {} : {:?} {:.4?}", + self.local_id, + self.current_round.round(), + self.includes_ready.len(), + state.point().map(|a| a.id()).as_ref().map(|a| a.ugly()), + state.signed() + ); + } } fn signature_response(&mut self, round: &Round, author: &PeerId) -> SignatureResponse { @@ -261,7 +374,7 @@ impl CollectorTask { MempoolConfig::sign_time_range(), ) { if round == self.current_round.round() { - self.includes_ready += 1; + self.includes_ready.insert(author.clone()); } } } @@ -277,48 +390,4 @@ impl CollectorTask { ); response } - - fn match_filtered(&self, consensus_event: &ConsensusEvent) -> Result<(), Round> { - tracing::info!( - "{} @ {:?} collector <= bcast filter : {consensus_event:.4?}", - self.local_id, - self.current_round.round() - ); - match consensus_event { - ConsensusEvent::Forward(consensus_round) => { - match consensus_round.cmp(self.next_dag_round.round()) { - // we're too late, consensus moved forward - std::cmp::Ordering::Greater => return Err(consensus_round.clone()), - // we still have a chance to finish current round - std::cmp::Ordering::Equal => {} - // we are among the fastest nodes of consensus - std::cmp::Ordering::Less => {} - } - } - ConsensusEvent::Verified(point) => match &point.body.location.round { - x if x > self.next_dag_round.round() => { - panic!("Coding error: broadcast filter advanced while collector left behind") - } - x if x == self.next_dag_round.round() => { - if let Some(task) = self.next_dag_round.add(point, &self.downloader) { - self.next_includes.push(task) - } - } - x if x == self.current_round.round() => { - if let Some(task) = self.current_round.add(point, &self.downloader) { - self.includes.push(task) - } - } - _ => _ = self.current_round.add(&point, &self.downloader), // maybe other's dependency - }, - ConsensusEvent::Invalid(dag_point) => { - if &dag_point.location().round > self.next_dag_round.round() { - panic!("Coding error: broadcast filter advanced while collector left behind") - } else { - _ = self.next_dag_round.insert_invalid(&dag_point); - } - } - }; - Ok(()) - } } diff --git a/consensus/src/intercom/broadcast/dto.rs b/consensus/src/intercom/broadcast/dto.rs index 3f3341087..f2a5e896f 100644 --- a/consensus/src/intercom/broadcast/dto.rs +++ b/consensus/src/intercom/broadcast/dto.rs @@ -1,6 +1,7 @@ +use std::fmt::{Debug, Formatter}; use std::sync::Arc; -use crate::models::{DagPoint, Point, Round}; +use crate::models::{DagPoint, Point, Round, Ugly, UglyPrint}; #[derive(Debug)] pub enum ConsensusEvent { @@ -11,10 +12,12 @@ pub enum ConsensusEvent { Invalid(DagPoint), } -/// collector may run without broadcaster, as if broadcaster signalled Ok -#[derive(Debug)] -pub enum CollectorSignal { - Finish, - Err, - Retry, +impl Debug for UglyPrint<'_, ConsensusEvent> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.0 { + ConsensusEvent::Verified(point) => write!(f, "Verified({:?})", point.ugly())?, + fwd => Debug::fmt(fwd, f)?, + }; + Ok(()) + } } diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index f31c299ae..18931ca20 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -44,6 +44,7 @@ impl Dispatcher { } pub fn broadcast_request(point: &Point) -> tycho_network::Request { + // TODO use send message for broadcast, leave Rejected/TryLater only in sig request (&MPRequest::Broadcast(point.clone())).into() } diff --git a/consensus/src/intercom/dependency/uploader.rs b/consensus/src/intercom/dependency/uploader.rs index b3a2c81f5..92eae38b4 100644 --- a/consensus/src/intercom/dependency/uploader.rs +++ b/consensus/src/intercom/dependency/uploader.rs @@ -3,19 +3,19 @@ use std::sync::Arc; use tokio::sync::{mpsc, oneshot, watch}; -use crate::dag::WeakDagRound; +use crate::dag::DagRound; use crate::intercom::dto::PointByIdResponse; use crate::models::{DagPoint, Point, PointId}; pub struct Uploader { requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, - top_dag_round: watch::Receiver, + top_dag_round: watch::Receiver, } impl Uploader { pub fn new( requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, - top_dag_round: watch::Receiver, + top_dag_round: watch::Receiver, ) -> Self { Self { requests, @@ -35,7 +35,7 @@ impl Uploader { } async fn find(&self, point_id: &PointId) -> Option> { - let top_dag_round = self.top_dag_round.borrow().get()?; + let top_dag_round = self.top_dag_round.borrow().clone(); let shared = top_dag_round .scan(&point_id.location.round) .map(|dag_round| { diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 00fc9db96..808673507 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -7,7 +7,7 @@ use everscale_crypto::ed25519::KeyPair; use parking_lot::Mutex; use tokio::sync::broadcast; -use tycho_network::PeerId; +use tycho_network::{PeerId, PrivateOverlay}; use tycho_util::FastHashSet; use crate::intercom::dto::PeerState; @@ -181,31 +181,35 @@ impl PeerSchedule { _ = inner.next_epoch_start.replace(round); } - pub fn set_next_peers(&self, peers: &Vec<(PeerId, bool)>) { + /// use [updater](super::PeerScheduleUpdater::set_next_peers()) + pub(super) fn set_next_peers(&self, peers: &Vec, overlay: &PrivateOverlay) { let local_id = self.local_id(); - let mut all_peers = BTreeMap::new(); let mut inner = self.inner.lock(); - for i in 0..inner.peers_resolved.len() { - all_peers.extend(inner.peers_resolved[i].iter()); - } - let old = peers + // check resolved peers only after blocking other threads from updating inner; + // note that entries are under read lock + let resolved = overlay + .read_entries() + .iter() + .filter(|a| a.resolver_handle.is_resolved()) + .map(|a| a.peer_id.clone()) + .collect::>(); + let peers = peers .iter() - .filter_map(|(peer_id, _)| all_peers.get(peer_id).map(|state| (*peer_id, *state))) + .map(|peer_id| { + ( + peer_id.clone(), + if resolved.contains(&peer_id) && peer_id != local_id { + PeerState::Resolved + } else { + PeerState::Unknown + }, + ) + }) .collect::>(); // detach existing copies - they are tightened to use-site DAG round let next = Arc::make_mut(&mut inner.peers_resolved[2]); next.clear(); - next.extend(peers.clone().into_iter().map(|(peer_id, is_resolved)| { - ( - peer_id, - if is_resolved && peer_id != local_id { - PeerState::Resolved - } else { - PeerState::Unknown - }, - ) - })); - next.extend(old); + next.extend(peers); } /// Returns [true] if update was successfully applied diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs index 692e714e4..df039621b 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -6,7 +6,7 @@ use rand::prelude::IteratorRandom; use tokio::sync::broadcast::error::RecvError; use tokio::task::AbortHandle; -use tycho_network::{PrivateOverlay, PrivateOverlayEntriesEvent}; +use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; use crate::intercom::PeerSchedule; @@ -18,15 +18,22 @@ pub struct PeerScheduleUpdater { } impl PeerScheduleUpdater { - pub fn run(overlay: PrivateOverlay, peer_schedule: Arc) { - tracing::info!("started peer schedule updater"); - let this = Self { + pub fn new(overlay: PrivateOverlay, peer_schedule: Arc) -> Self { + Self { overlay, peer_schedule, abort_resolve_peers: Default::default(), - }; - this.respawn_resolve_task(); - tokio::spawn(this.listen()); + } + } + + pub async fn run(self) -> ! { + tracing::info!("started peer schedule updater"); + self.respawn_resolve_task(); + self.listen().await + } + + pub fn set_next_peers(&self, peers: &Vec) { + self.peer_schedule.set_next_peers(&peers, &self.overlay) } fn respawn_resolve_task(&self) { @@ -34,6 +41,9 @@ impl PeerScheduleUpdater { tracing::info!("{local_id:.4?} respawn_resolve_task"); let mut fut = futures_util::stream::FuturesUnordered::new(); { + // Note: set_next_peers() and respawn_resolve_task() will not deadlock + // although peer_schedule.inner is locked in two opposite orders + // because only read read lock on overlay entries is taken let entries = self.overlay.read_entries(); for entry in entries .iter() @@ -64,7 +74,7 @@ impl PeerScheduleUpdater { *abort_resolve_handle = new_abort_handle; } - async fn listen(self) { + async fn listen(self) -> ! { let local_id = self.peer_schedule.local_id(); tracing::info!("{local_id:.4?} listen peer updates"); let mut rx = self.overlay.read_entries().subscribe(); diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs index 363262a23..0bd54fb99 100644 --- a/consensus/src/models/dag_point.rs +++ b/consensus/src/models/dag_point.rs @@ -50,6 +50,13 @@ impl DagPoint { } } + pub fn trusted(&self) -> Option<&'_ ValidPoint> { + match self { + DagPoint::Trusted(valid) => Some(valid), + _ => None, + } + } + pub fn id(&self) -> PointId { PointId { location: self.location().clone(), diff --git a/consensus/src/models/mod.rs b/consensus/src/models/mod.rs index a59795f00..5638ab50b 100644 --- a/consensus/src/models/mod.rs +++ b/consensus/src/models/mod.rs @@ -1,7 +1,9 @@ pub use dag_point::*; pub use node_count::*; pub use point::*; +pub use ugly::*; mod dag_point; mod node_count; mod point; +mod ugly; diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs index f06859a4a..452819953 100644 --- a/consensus/src/models/node_count.rs +++ b/consensus/src/models/node_count.rs @@ -38,7 +38,7 @@ impl NodeCount { NodeCount(one_f as u8) } - fn full(&self) -> usize { + pub fn full(&self) -> usize { self.0 as usize * 3 + 1 } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 7a70c5c11..5cbacb36f 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -102,6 +102,12 @@ impl Sub for UnixTime { } } +impl Display for UnixTime { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.0, f) + } +} + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Location { pub round: Round, diff --git a/consensus/src/models/ugly.rs b/consensus/src/models/ugly.rs new file mode 100644 index 000000000..e21298005 --- /dev/null +++ b/consensus/src/models/ugly.rs @@ -0,0 +1,55 @@ +use std::fmt::{Debug, Formatter}; + +use tycho_network::PeerId; + +use crate::models::{Location, Point, PointId}; + +pub struct UglyPrint<'a, T>(pub &'a T); + +pub trait Ugly { + fn ugly(&self) -> UglyPrint<'_, Self> + where + Self: Sized; +} + +impl Ugly for T +where + T: Sized, + for<'a> UglyPrint<'a, T>: Debug, +{ + fn ugly(&self) -> UglyPrint<'_, T> { + UglyPrint(self) + } +} + +impl Debug for UglyPrint<'_, PeerId> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:.4?}", self.0) + } +} + +impl Debug for UglyPrint<'_, Location> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:.4?} @ {:?}", self.0.author, self.0.round.0) + } +} + +impl Debug for UglyPrint<'_, PointId> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "PointId( {:.4} @ {} # {:.4} )", + self.0.location.author, self.0.location.round.0, self.0.digest + ) + } +} + +impl Debug for UglyPrint<'_, Point> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Point {{ Id( {:.4} @ {} # {:.4} ), .. }}", + self.0.body.location.author, self.0.body.location.round.0, self.0.digest + ) + } +} From d42a850eb66192c1c3a2f4ff599ab68f44d5bcb9 Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Tue, 30 Apr 2024 18:07:51 +0200 Subject: [PATCH 17/32] feat(mempool-adapter): connect mempool adapter and collator engine (wip) --- Cargo.lock | 2 + collator/Cargo.toml | 6 + collator/src/mempool/mempool_adapter.rs | 246 ++++++++---------- collator/src/mempool/mempool_adapter_std.rs | 268 ++++++++++++++++++++ collator/src/mempool/mod.rs | 5 +- collator/src/mempool/types.rs | 23 +- consensus/src/engine/engine.rs | 11 +- consensus/src/lib.rs | 4 + consensus/src/models/point.rs | 4 + 9 files changed, 417 insertions(+), 152 deletions(-) create mode 100644 collator/src/mempool/mempool_adapter_std.rs diff --git a/Cargo.lock b/Cargo.lock index 6402bf9fa..9bd58344f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2219,6 +2219,7 @@ dependencies = [ "everscale-crypto", "everscale-types", "futures-util", + "parking_lot", "rand", "tl-proto", "tokio", @@ -2226,6 +2227,7 @@ dependencies = [ "tracing-subscriber", "tracing-test", "tycho-block-util", + "tycho-consensus", "tycho-core", "tycho-network", "tycho-storage", diff --git a/collator/Cargo.toml b/collator/Cargo.toml index 0a605596a..52db8b065 100644 --- a/collator/Cargo.toml +++ b/collator/Cargo.toml @@ -13,6 +13,7 @@ license.workspace = true anyhow = { workspace = true } async-trait = { workspace = true } futures-util = { workspace = true } +parking_lot = { workspace = true } rand = { workspace = true } tl-proto = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "signal"] } @@ -30,6 +31,11 @@ tycho-storage = { workspace = true } tycho-util = { workspace = true } tycho-block-util = { workspace = true } +#TODO: should be here? +tycho-consensus = {workspace = true} + + + [dev-dependencies] tracing-test = { workspace = true } diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index c218b12de..c8250ba9d 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -1,130 +1,122 @@ -use std::{ - collections::BTreeMap, - sync::{Arc, RwLock}, -}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; use anyhow::{anyhow, Result}; use async_trait::async_trait; - -use everscale_types::{ - cell::{CellBuilder, CellSliceRange, HashBytes}, - models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}, -}; -use rand::Rng; +use everscale_crypto::ed25519::SecretKey; +use everscale_types::boc::Boc; +use everscale_types::cell::HashBytes; +use everscale_types::models::ExtInMsgInfo; +use everscale_types::prelude::{Cell, CellBuilder, Load}; +use futures_util::TryStreamExt; +use parking_lot::RwLock; +use tokio::sync::mpsc::{Sender, UnboundedReceiver}; use tycho_block_util::state::ShardStateStuff; +use tycho_consensus::Point; +use tycho_network::{DhtClient, OverlayService, PeerId}; +use tycho_util::FastDashMap; +use crate::mempool::types::ExternalMessage; +use crate::mempool::{MempoolAdapter, MempoolAnchor, MempoolAnchorId}; use crate::tracing_targets; -use super::types::{MempoolAnchor, MempoolAnchorId}; - -#[cfg(test)] -#[path = "tests/mempool_adapter_tests.rs"] -pub(super) mod tests; - -// EVENTS EMITTER AMD LISTENER - -//TODO: remove emitter -#[async_trait] -pub(crate) trait MempoolEventEmitter { - /// When mempool produced new committed anchor - async fn on_new_anchor_event(&self, anchor: Arc); +pub struct MempoolAdapterImpl { + //TODO: replace with rocksdb + anchors: Arc>>>, } -#[async_trait] -pub(crate) trait MempoolEventListener: Send + Sync { - /// Process new anchor from mempool - async fn on_new_anchor(&self, anchor: Arc) -> Result<()>; -} +impl MempoolAdapterImpl { + pub async fn new( + secret_key: SecretKey, + dht_client: DhtClient, + overlay_service: OverlayService, + peers: Vec, + ) -> Arc { + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); + let anchors = Arc::new(RwLock::new(BTreeMap::new())); -// ADAPTER + let (tx, rx) = tokio::sync::mpsc::unbounded_channel::, Vec>)>>(); -#[async_trait] -pub(crate) trait MempoolAdapter: Send + Sync + 'static { - /// Create an adapter, that connects to mempool then starts to listen mempool for new anchors, - /// and handles requests to mempool from the collation process - fn create(listener: Arc) -> Self; + let engine = + tycho_consensus::Engine::new(&secret_key, &dht_client, &overlay_service, &peers, tx) + .await; - /// Schedule task to process new master block state (may perform gc or nodes rotation) - async fn enqueue_process_new_mc_block_state( - &self, - mc_state: Arc, - ) -> Result<()>; + tokio::spawn(async move { engine.run() }); - /// Request, await, and return anchor from connected mempool by id. - /// Return None if the requested anchor does not exist. - /// - /// (TODO) Cache anchor to handle similar request from collator of another shard - async fn get_anchor_by_id( - &self, - anchor_id: MempoolAnchorId, - ) -> Result>>; + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); - /// Request, await, and return the next anchor after the specified previous one. - /// If anchor was not produced yet then await until mempool does this. - /// - /// (TODO) ? Should return Error if mempool does not reply fro a long timeout - async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result>; + let mempool_adapter = Arc::new(Self { anchors }); - /// Clean cache from all anchors that before specified. - /// We can do this for anchors that processed in blocks - /// which included in signed master - we do not need them anymore - async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; -} + //start handling mempool anchors + tokio::spawn(parse_points(mempool_adapter.clone(), rx)); -pub struct MempoolAdapterStdImpl { - listener: Arc, + mempool_adapter + } - _stub_anchors_cache: Arc>>>, + fn add_anchor(&self, anchor: Arc) { + let mut guard = self.anchors.write(); + guard.insert(anchor.id(), anchor); + } } -#[async_trait] -impl MempoolAdapter for MempoolAdapterStdImpl { - fn create(listener: Arc) -> Self { - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); - - //TODO: make real implementation, currently runs stub task - // that produces the repeating set of anchors - let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); +pub async fn parse_points( + adapter: Arc, + mut rx: UnboundedReceiver, Vec>)>>, +) { + while let Some(commited) = rx.recv().await { + commited.into_iter().for_each(|(anchor, points)| { + let mut external_messages = HashMap::::new(); + + for point in points { + 'message: for message in &point.body.payload { + let cell = match Boc::decode(message) { + Ok(cell) => cell, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to deserialize bytes into cell. Error: {e:?}"); //TODO: should handle errors properly? + continue 'message; + } + }; + + let mut slice = match cell.as_slice() { + Ok(slice) => slice, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to make slice from cell. Error: {e:?}"); + continue 'message; + } + }; + + let ext_in_message = match ExtInMsgInfo::load_from(&mut slice) { + Ok(message) => message, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Bad cell. Failed to deserialize to ExtInMsgInfo. Err: {e:?}"); + continue 'message; + } + }; + + let external_message = ExternalMessage::new(cell.clone(), ext_in_message ); + external_messages.insert(*cell.repr_hash(), external_message); - tokio::spawn({ - let listener = listener.clone(); - let stub_anchors_cache = stub_anchors_cache.clone(); - async move { - let mut anchor_id = 0; - loop { - let rnd_round_interval = rand::thread_rng().gen_range(400..600); - tokio::time::sleep(tokio::time::Duration::from_millis(rnd_round_interval * 6)) - .await; - anchor_id += 1; - let anchor = _stub_create_random_anchor_with_stub_externals(anchor_id); - { - let mut anchor_cache_rw = stub_anchors_cache - .write() - .map_err(|e| anyhow!("Poison error on write lock: {:?}", e)) - .unwrap(); - tracing::debug!( - target: tracing_targets::MEMPOOL_ADAPTER, - "Random anchor (id: {}, chain_time: {}, externals: {}) added to cache", - anchor.id(), - anchor.chain_time(), - anchor.externals_count(), - ); - anchor_cache_rw.insert(anchor_id, anchor.clone()); - } - listener.on_new_anchor(anchor).await.unwrap(); } } - }); - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Stub anchors generator started"); - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); + let messages = external_messages + .into_iter() + .map(|m| Arc::new(m.1)) + .collect::>(); - Self { - listener, - _stub_anchors_cache: stub_anchors_cache, - } + let anchor = Arc::new(MempoolAnchor::new( + anchor.body.location.round.0, + anchor.body.time.as_u64(), + messages + )); + + adapter.add_anchor(anchor); + }) } +} +#[async_trait] +impl MempoolAdapter for MempoolAdapterImpl { async fn enqueue_process_new_mc_block_state( &self, mc_state: Arc, @@ -141,13 +133,11 @@ impl MempoolAdapter for MempoolAdapterStdImpl { async fn get_anchor_by_id( &self, anchor_id: MempoolAnchorId, - ) -> Result>> { + ) -> anyhow::Result>> { //TODO: make real implementation, currently only return anchor from local cache let res = { - let anchors_cache_r = self - ._stub_anchors_cache - .read() - .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + let anchors_cache_r = self.anchors.read(); + anchors_cache_r.get(&anchor_id).cloned() }; if res.is_some() { @@ -179,9 +169,8 @@ impl MempoolAdapter for MempoolAdapterStdImpl { loop { { let anchors_cache_r = self - ._stub_anchors_cache - .read() - .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + .anchors + .read(); let mut range = anchors_cache_r.range(( std::ops::Bound::Excluded(prev_anchor_id), @@ -226,43 +215,10 @@ impl MempoolAdapter for MempoolAdapterStdImpl { async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()> { let mut anchors_cache_rw = self - ._stub_anchors_cache - .write() - .map_err(|e| anyhow!("Poison error on write lock: {:?}", e))?; + .anchors + .write(); + anchors_cache_rw.retain(|anchor_id, _| anchor_id >= &before_anchor_id); Ok(()) } } - -fn _stub_create_random_anchor_with_stub_externals( - anchor_id: MempoolAnchorId, -) -> Arc { - let chain_time = std::time::SystemTime::now() - .duration_since(std::time::SystemTime::UNIX_EPOCH) - .unwrap() - .as_millis() as u64; - let externals_count: i32 = rand::thread_rng().gen_range(-10..10).max(0); - let mut externals = vec![]; - for i in 0..externals_count { - let rand_addr = (0..32).map(|_| rand::random::()).collect::>(); - let rand_addr = HashBytes::from_slice(&rand_addr); - let mut msg_cell_builder = CellBuilder::new(); - msg_cell_builder.store_u32(anchor_id).unwrap(); - msg_cell_builder.store_u64(chain_time).unwrap(); - msg_cell_builder.store_u32(i as u32).unwrap(); - let msg_cell = msg_cell_builder.build().unwrap(); - let msg_cell_range = CellSliceRange::full(&*msg_cell); - let msg = OwnedMessage { - info: MsgInfo::ExtIn(ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }), - body: (msg_cell, msg_cell_range), - init: None, - layout: None, - }; - externals.push(Arc::new(msg)); - } - - Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) -} diff --git a/collator/src/mempool/mempool_adapter_std.rs b/collator/src/mempool/mempool_adapter_std.rs new file mode 100644 index 000000000..658c07917 --- /dev/null +++ b/collator/src/mempool/mempool_adapter_std.rs @@ -0,0 +1,268 @@ +use std::{ + collections::BTreeMap, + sync::{Arc, RwLock}, +}; + +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use everscale_crypto::ed25519::SecretKey; + +use everscale_types::{ + cell::{CellBuilder, CellSliceRange, HashBytes}, + models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}, +}; +use rand::Rng; +use tycho_block_util::state::ShardStateStuff; +use tycho_network::{DhtClient, OverlayService, PeerId}; + +use crate::tracing_targets; +use crate::validator::types::OverlayNumber; + +use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; + +#[cfg(test)] +#[path = "tests/mempool_adapter_tests.rs"] +pub(super) mod tests; + +// EVENTS EMITTER AMD LISTENER + +//TODO: remove emitter +#[async_trait] +pub(crate) trait MempoolEventEmitter { + /// When mempool produced new committed anchor + async fn on_new_anchor_event(&self, anchor: Arc); +} + +#[async_trait] +pub(crate) trait MempoolEventListener: Send + Sync { + /// Process new anchor from mempool + async fn on_new_anchor(&self, anchor: Arc) -> Result<()>; +} + +// ADAPTER + +#[async_trait] +pub(crate) trait MempoolAdapter: Send + Sync + 'static { + /// Schedule task to process new master block state (may perform gc or nodes rotation) + async fn enqueue_process_new_mc_block_state( + &self, + mc_state: Arc, + ) -> Result<()>; + + /// Request, await, and return anchor from connected mempool by id. + /// Return None if the requested anchor does not exist. + /// + /// (TODO) Cache anchor to handle similar request from collator of another shard + async fn get_anchor_by_id( + &self, + anchor_id: MempoolAnchorId, + ) -> Result>>; + + /// Request, await, and return the next anchor after the specified previous one. + /// If anchor was not produced yet then await until mempool does this. + /// + /// (TODO) ? Should return Error if mempool does not reply fro a long timeout + async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result>; + + /// Clean cache from all anchors that before specified. + /// We can do this for anchors that processed in blocks + /// which included in signed master - we do not need them anymore + async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; +} + +pub struct MempoolAdapterStdImpl { + listener: Arc, + + _stub_anchors_cache: Arc>>>, +} + +#[async_trait] +impl MempoolAdapter for MempoolAdapterStdImpl { + async fn enqueue_process_new_mc_block_state( + &self, + mc_state: Arc, + ) -> Result<()> { + //TODO: make real implementation, currently does nothing + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "STUB: New masterchain state (block_id: {}) processing enqueued to mempool", + mc_state.block_id().as_short_id(), + ); + Ok(()) + } + + async fn get_anchor_by_id( + &self, + anchor_id: MempoolAnchorId, + ) -> Result>> { + //TODO: make real implementation, currently only return anchor from local cache + let res = { + let anchors_cache_r = self + ._stub_anchors_cache + .read() + .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + anchors_cache_r.get(&anchor_id).cloned() + }; + if res.is_some() { + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Requested anchor (id: {}) found in local cache", anchor_id); + } else { + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "Requested anchor (id: {}) was not found in local cache", + anchor_id + ); + tracing::trace!(target: tracing_targets::MEMPOOL_ADAPTER, "STUB: Requesting anchor (id: {}) in mempool...", anchor_id); + let response_duration = tokio::time::Duration::from_millis(107); + tokio::time::sleep(response_duration).await; + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "STUB: Requested anchor (id: {}) was not found in mempool (responded in {} ms)", + anchor_id, + response_duration.as_millis(), + ); + } + Ok(res) + } + + async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result> { + //TODO: make real implementation, currently only return anchor from local cache + + let mut stub_first_attempt = true; + let mut request_timer = std::time::Instant::now(); + loop { + { + let anchors_cache_r = self + ._stub_anchors_cache + .read() + .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + + let mut range = anchors_cache_r.range(( + std::ops::Bound::Excluded(prev_anchor_id), + std::ops::Bound::Unbounded, + )); + + if let Some((next_id, next)) = range.next() { + if stub_first_attempt { + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "Found in cache next anchor (id: {}) after specified previous (id: {})", + next_id, + prev_anchor_id, + ); + } else { + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "STUB: Returned next anchor (id: {}) after previous (id: {}) from mempool (responded in {} ms)", + next_id, + prev_anchor_id, + request_timer.elapsed().as_millis(), + ); + } + return Ok(next.clone()); + } else if stub_first_attempt { + tracing::info!( + target: tracing_targets::MEMPOOL_ADAPTER, + "There is no next anchor in cache after previous (id: {}). STUB: Requested it from mempool. Waiting...", + prev_anchor_id + ); + } + } + + // stub waiting some time until new emulated anchors be added to cache + if stub_first_attempt { + request_timer = std::time::Instant::now(); + } + stub_first_attempt = false; + tokio::time::sleep(tokio::time::Duration::from_millis(1020)).await; + } + } + + async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()> { + let mut anchors_cache_rw = self + ._stub_anchors_cache + .write() + .map_err(|e| anyhow!("Poison error on write lock: {:?}", e))?; + anchors_cache_rw.retain(|anchor_id, _| anchor_id >= &before_anchor_id); + Ok(()) + } +} + +fn _stub_create_random_anchor_with_stub_externals( + anchor_id: MempoolAnchorId, +) -> Arc { + let chain_time = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis() as u64; + let externals_count: i32 = rand::thread_rng().gen_range(-10..10).max(0); + let mut externals = vec![]; + for i in 0..externals_count { + let rand_addr = (0..32).map(|_| rand::random::()).collect::>(); + let rand_addr = HashBytes::from_slice(&rand_addr); + let mut msg_cell_builder = CellBuilder::new(); + msg_cell_builder.store_u32(anchor_id).unwrap(); + msg_cell_builder.store_u64(chain_time).unwrap(); + msg_cell_builder.store_u32(i as u32).unwrap(); + let msg_cell = msg_cell_builder.build().unwrap(); + let msg = ExternalMessage::new( + msg_cell, + ExtInMsgInfo { + dst: IntAddr::Std(StdAddr::new(0, rand_addr)), + ..Default::default() + }, + ); + externals.push(Arc::new(msg)); + } + + Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) +} + +impl MempoolAdapterStdImpl { + fn new( + listener: Arc, + ) -> Self { + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); + + //TODO: make real implementation, currently runs stub task + // that produces the repeating set of anchors + let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); + + tokio::spawn({ + let listener = listener.clone(); + let stub_anchors_cache = stub_anchors_cache.clone(); + async move { + let mut anchor_id = 0; + loop { + let rnd_round_interval = rand::thread_rng().gen_range(400..600); + tokio::time::sleep(tokio::time::Duration::from_millis(rnd_round_interval * 6)) + .await; + anchor_id += 1; + let anchor = _stub_create_random_anchor_with_stub_externals(anchor_id); + { + let mut anchor_cache_rw = stub_anchors_cache + .write() + .map_err(|e| anyhow!("Poison error on write lock: {:?}", e)) + .unwrap(); + tracing::debug!( + target: tracing_targets::MEMPOOL_ADAPTER, + "Random anchor (id: {}, chain_time: {}, externals: {}) added to cache", + anchor.id(), + anchor.chain_time(), + anchor.externals_count(), + ); + anchor_cache_rw.insert(anchor_id, anchor.clone()); + } + listener.on_new_anchor(anchor).await.unwrap(); + } + } + }); + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Stub anchors generator started"); + + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); + + Self { + listener, + _stub_anchors_cache: stub_anchors_cache, + } + } +} diff --git a/collator/src/mempool/mod.rs b/collator/src/mempool/mod.rs index 761b0157d..2b4bf9b3d 100644 --- a/collator/src/mempool/mod.rs +++ b/collator/src/mempool/mod.rs @@ -1,7 +1,8 @@ mod builder; -mod mempool_adapter; +mod mempool_adapter_std; mod types; +mod mempool_adapter; pub use builder::{MempoolAdapterBuilder, MempoolAdapterBuilderStdImpl}; -pub use mempool_adapter::*; +pub use mempool_adapter_std::*; pub(crate) use types::{MempoolAnchor, MempoolAnchorId}; diff --git a/collator/src/mempool/types.rs b/collator/src/mempool/types.rs index 1b5868dc0..1267c5496 100644 --- a/collator/src/mempool/types.rs +++ b/collator/src/mempool/types.rs @@ -1,18 +1,33 @@ use std::sync::Arc; -use everscale_types::models::OwnedMessage; +use everscale_types::models::{ExtInMsgInfo, OwnedMessage}; +use everscale_types::prelude::Cell; // TYPES pub(crate) type MempoolAnchorId = u32; +pub(crate) struct ExternalMessage { + message_cell: Cell, + message_info: ExtInMsgInfo +} + +impl ExternalMessage { + pub fn new(message_cell: Cell, message_info: ExtInMsgInfo) -> ExternalMessage { + Self { + message_cell, + message_info, + } + } +} + pub(crate) struct MempoolAnchor { id: MempoolAnchorId, chain_time: u64, - externals: Vec>, + externals: Vec>, } impl MempoolAnchor { - pub fn new(id: MempoolAnchorId, chain_time: u64, externals: Vec>) -> Self { + pub fn new(id: MempoolAnchorId, chain_time: u64, externals: Vec>) -> Self { Self { id, chain_time, @@ -34,7 +49,7 @@ impl MempoolAnchor { pub fn externals_iterator( &self, from_idx: usize, - ) -> impl Iterator> + '_ { + ) -> impl Iterator> + '_ { self.externals.iter().skip(from_idx).cloned() } } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 2278cefdb..15d782c26 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; use itertools::Itertools; use tokio::sync::{mpsc, oneshot, watch}; +use tokio::sync::mpsc::{Sender, UnboundedSender}; use tokio::task::JoinSet; use tycho_network::{DhtClient, OverlayService, PeerId}; @@ -12,7 +13,7 @@ use crate::intercom::{ BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, CollectorSignal, Dispatcher, Downloader, PeerSchedule, PeerScheduleUpdater, Responder, Uploader, }; -use crate::models::{PrevPoint, Ugly}; +use crate::models::{Point, PrevPoint, Ugly}; pub struct Engine { local_id: Arc, @@ -24,6 +25,7 @@ pub struct Engine { broadcast_filter: BroadcastFilter, top_dag_round_watch: watch::Sender, tasks: JoinSet<()>, // should be JoinSet + tx: UnboundedSender, Vec>)>> } impl Engine { @@ -32,6 +34,8 @@ impl Engine { dht_client: &DhtClient, overlay_service: &OverlayService, peers: &Vec, + tx: UnboundedSender, Vec>)>> + ) -> Self { let key_pair = KeyPair::from(secret_key); let local_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); @@ -109,6 +113,7 @@ impl Engine { broadcast_filter, top_dag_round_watch: top_dag_round_tx, tasks, + tx } } @@ -199,6 +204,10 @@ impl Engine { match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { (Ok(collector_upd), Ok(new_prev_point), Ok(committed), Ok(_bcast_filter_upd)) => { + if let Err(e) = self.tx.send(committed.clone()) { + tracing::error!("Failed tp send anchor commit message tp mpsc channel. Err: {e:?}"); + } + let committed = committed .into_iter() .map(|(anchor, history)| { diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index cb4f2509f..64241b245 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -4,3 +4,7 @@ pub(crate) mod engine; pub(crate) mod intercom; pub(crate) mod models; pub(crate) mod test_utils; + + +pub use engine::Engine; +pub use models::Point; \ No newline at end of file diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 5cbacb36f..37df9258c 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -86,6 +86,10 @@ impl UnixTime { .expect("current Unix time in millis as u64"), ) } + + pub fn as_u64(&self) -> u64 { + self.0 + } } impl Add for UnixTime { From cf27d0684373716ce3b474f0d9965ad18a3dffdd Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Wed, 1 May 2024 00:20:02 +0300 Subject: [PATCH 18/32] fix(consensus): run --- consensus/src/dag/dag_location.rs | 17 +- consensus/src/dag/producer.rs | 19 +- consensus/src/dag/verifier.rs | 10 +- consensus/src/engine/engine.rs | 84 +++++---- consensus/src/engine/mempool_config.rs | 4 +- .../intercom/broadcast/broadcast_filter.rs | 173 +++++++----------- .../src/intercom/broadcast/broadcaster.rs | 76 +++----- consensus/src/intercom/broadcast/collector.rs | 127 +++++++------ consensus/src/intercom/broadcast/dto.rs | 22 +++ consensus/src/intercom/core/dispatcher.rs | 39 +++- consensus/src/intercom/core/dto.rs | 38 ++-- consensus/src/intercom/core/responder.rs | 63 ++++--- .../src/intercom/dependency/downloader.rs | 151 +++++++++++---- consensus/src/intercom/dependency/uploader.rs | 19 +- consensus/src/intercom/dto.rs | 11 -- .../intercom/peer_schedule/peer_schedule.rs | 2 +- consensus/src/models/node_count.rs | 4 +- consensus/src/models/ugly.rs | 21 ++- consensus/src/test_utils.rs | 4 +- 19 files changed, 523 insertions(+), 361 deletions(-) diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index 15edaae7b..c93df8101 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -54,7 +54,12 @@ impl DagLocation { match self.versions.entry(digest.clone()) { btree_map::Entry::Occupied(entry) => entry.get().clone(), btree_map::Entry::Vacant(entry) => { - entry.insert(Shared::new(JoinTask::new(init()))).clone() + let state = self.state.clone(); + entry + .insert(Shared::new(JoinTask::new( + init().inspect(move |dag_point| state.init(dag_point)), + ))) + .clone() } } } @@ -155,6 +160,14 @@ impl InclusionState { self.0.get().map(|signable| &signable.first_completed) } } + +// Todo actually we are not interested in the round of making a signature, +// but a round of the first (and only) reference. +// Since the single version of a point is decided as eligible for signature (trusted), +// it may be included immediately; no need to include it twice. +// One cannot include point with the time lesser than the proven anchor candidate - +// and that's all for the global time sequence, i.e. any node with a great time skew +// can produce points to be included and committed, but cannot accomplish leader's requirements. #[derive(Debug)] pub struct Signable { first_completed: DagPoint, @@ -193,7 +206,7 @@ impl Signable { } this_call_signed } - pub fn reject(&self) { + fn reject(&self) { _ = self.signed.set(Err(())); } fn is_completed(&self) -> bool { diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index b364d51cb..80a9a15c6 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -91,15 +91,10 @@ impl Producer { .point() .map(|dag_point| dag_point.trusted()) .flatten() - .filter(|_| { - loc.state() - .signed() - .map(|r| { - r.as_ref() - .map_or(false, |s| &s.at == finished_round.round()) - }) - .unwrap_or(true) - }) + // TODO refactor Signable: we are interested not in the round of signature, + // but whether was a point already included or not (just in order not to + // include it twice); repeating inclusions are suboptimal but still correct + .filter(|_| loc.state().signed().map_or(true, |r| r.is_ok())) .map(|dag_point| dag_point.point.clone()) }) .collect::>(); @@ -252,7 +247,11 @@ impl Producer { } } } - // TODO maybe take the greatest time among all point's dependencies - as they must be signed + // No need to take the greatest time among all point's dependencies - + // only leader's time is significant and every node will have its chance + // (or its chain will be rejected). Better throw away a single node's point + // than requiring the whole consensus to wait once the point was included. + // Todo: use proven anchor candidate's time, as it is unique time } } diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 5b75b8676..64a3d1484 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -141,7 +141,7 @@ impl Verifier { for (author, digest, dag_round) in linked_with_round { // skip self links if dag_round.round() < &point.body.location.round { - // will add the same point from direct dependencies twice, + // TODO will add the same point from direct dependencies twice, // we can do better but nothing terrible Self::add_dependency( &author, @@ -256,12 +256,16 @@ impl Verifier { } if valid_point_id == anchor_proof_id && point.body.time < valid.point.body.time { - // Any point that (in)directly includes anchor candidate through its proof + // Any point that includes anchor candidate through its proof // must provide the time not less than candidate's to maintain // non-decreasing time in committed anchor chain. // The time of candidate's valid proof exactly satisfies such requirement: - // it either will be signed by majority (what unblocks the commit trigger), + // it either will be signed by majority (that unblocks the commit trigger), // or the valid trigger will not be created. + // FIXME better use the time from the proven anchor candidate - + // though it's an additional dependency during validation, + // it is proven and can't be manipulated + // (i.e. do not rely on anything unless 2F+1 signatures provided) return DagPoint::Invalid(point.clone()); } } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 2278cefdb..acd6f4a88 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -12,10 +12,10 @@ use crate::intercom::{ BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, CollectorSignal, Dispatcher, Downloader, PeerSchedule, PeerScheduleUpdater, Responder, Uploader, }; -use crate::models::{PrevPoint, Ugly}; +use crate::models::{Point, PrevPoint, Ugly}; pub struct Engine { - local_id: Arc, + log_id: Arc, dag: Dag, peer_schedule: Arc, dispatcher: Dispatcher, @@ -34,13 +34,13 @@ impl Engine { peers: &Vec, ) -> Self { let key_pair = KeyPair::from(secret_key); - let local_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); + let log_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(key_pair))); let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); let broadcast_filter = - BroadcastFilter::new(local_id.clone(), peer_schedule.clone(), bcast_tx); + BroadcastFilter::new(log_id.clone(), peer_schedule.clone(), bcast_tx); let (sig_requests, sig_responses) = mpsc::unbounded_channel(); @@ -51,7 +51,7 @@ impl Engine { &overlay_service, peers, Responder::new( - local_id.clone(), + log_id.clone(), broadcast_filter.clone(), sig_requests, uploader_tx, @@ -77,21 +77,27 @@ impl Engine { let (top_dag_round_tx, top_dag_round_rx) = watch::channel(current_dag_round.clone()); let mut tasks = JoinSet::new(); - let uploader = Uploader::new(uploader_rx, top_dag_round_rx); + let uploader = Uploader::new(log_id.clone(), uploader_rx, top_dag_round_rx); tasks.spawn(async move { uploader.run().await; }); tasks.spawn(async move { peer_schedule_updater.run().await; }); + tasks.spawn({ + let broadcast_filter = broadcast_filter.clone(); + async move { + broadcast_filter.clear_cache().await; + } + }); - let downloader = Downloader::new(local_id.clone(), &dispatcher, &peer_schedule); + let downloader = Downloader::new(log_id.clone(), &dispatcher, &peer_schedule); let genesis_state = current_dag_round .insert_exact_validate(&genesis, &peer_schedule, &downloader) .await; let collector = Collector::new( - local_id.clone(), + log_id.clone(), &downloader, bcast_rx, sig_responses, @@ -100,7 +106,7 @@ impl Engine { ); Self { - local_id, + log_id, dag, peer_schedule, dispatcher, @@ -113,7 +119,7 @@ impl Engine { } async fn bcaster_run( - local_id: Arc, + log_id: Arc, produce_own_point: bool, dispatcher: Dispatcher, peer_schedule: Arc, @@ -134,7 +140,7 @@ impl Engine { .expect("own produced point must be valid"); own_point_state.send(state).ok(); let evidence = Broadcaster::new( - &local_id, + log_id.clone(), &own_point, &dispatcher, &peer_schedule, @@ -166,12 +172,14 @@ impl Engine { .top(¤t_dag_round.round().next(), &self.peer_schedule); self.top_dag_round_watch.send(next_dag_round.clone()).ok(); + tracing::info!("{} @ {:?}", self.log_id, current_dag_round.round()); + let (bcaster_ready_tx, bcaster_ready_rx) = mpsc::channel(1); // let this channel unbounded - there won't be many items, but every of them is essential let (collector_signal_tx, collector_signal_rx) = mpsc::unbounded_channel(); let (own_point_state_tx, own_point_state_rx) = oneshot::channel(); let bcaster_run = tokio::spawn(Self::bcaster_run( - self.local_id.clone(), + self.log_id.clone(), produce_own_point, self.dispatcher.clone(), self.peer_schedule.clone(), @@ -186,7 +194,7 @@ impl Engine { let commit_run = tokio::spawn(self.dag.clone().commit(next_dag_round.clone())); let bcast_filter_upd = { let bcast_filter = self.broadcast_filter.clone(); - let round = next_dag_round.round().clone(); + let round = current_dag_round.round().clone(); tokio::spawn(async move { bcast_filter.advance_round(&round) }) }; @@ -199,25 +207,7 @@ impl Engine { match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { (Ok(collector_upd), Ok(new_prev_point), Ok(committed), Ok(_bcast_filter_upd)) => { - let committed = committed - .into_iter() - .map(|(anchor, history)| { - let history = history - .into_iter() - .map(|point| format!("{:?}", point.id().ugly())) - .join(", "); - format!( - "anchor {:?} time {} : [ {history} ]", - anchor.id().ugly(), - anchor.body.time - ) - }) - .join(" ; "); - tracing::info!( - "{} @ {:?} committed {committed}", - self.local_id, - current_dag_round.round() - ); + Self::log_committed(&self.log_id, ¤t_dag_round, &committed); prev_point = new_prev_point; produce_own_point = next_dag_round.round() == collector_upd.next_round(); self.collector = collector_upd; @@ -239,4 +229,34 @@ impl Engine { } } } + + fn log_committed( + log_id: &String, + current_dag_round: &DagRound, + committed: &Vec<(Arc, Vec>)>, + ) { + if committed.is_empty() { + return; + } + if tracing::enabled!(tracing::Level::INFO) { + let committed = committed + .into_iter() + .map(|(anchor, history)| { + let history = history + .iter() + .map(|point| format!("{:?}", point.id().ugly())) + .join(", "); + format!( + "anchor {:?} time {} : [ {history} ]", + anchor.id().ugly(), + anchor.body.time + ) + }) + .join(" ; "); + tracing::info!( + "{log_id} @ {:?} committed {committed}", + current_dag_round.round(), + ); + } + } } diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs index 6a2c80f33..c4d1d4eda 100644 --- a/consensus/src/engine/mempool_config.rs +++ b/consensus/src/engine/mempool_config.rs @@ -23,7 +23,7 @@ impl MempoolConfig { /// we try to gather as many points and signatures as we can within some time frame; /// this is a tradeoff between breaking on exactly 2F+1 elements /// (dependencies and/or signatures), and waiting for slow nodes - pub const RETRY_INTERVAL: Duration = Duration::from_millis(250); + pub const RETRY_INTERVAL: Duration = Duration::from_millis(150); /// the least amount of [Round]s that are kept in DAG until they are discarded pub const COMMIT_DEPTH: u8 = 20; @@ -38,5 +38,5 @@ impl MempoolConfig { /// every failed response is accounted as point is not found; /// 1/3+1 failed responses leads to invalidation of the point and all its dependants - pub const DOWNLOAD_TIMEOUT: Duration = Duration::from_millis(200); + pub const DOWNLOAD_TIMEOUT: Duration = Duration::from_millis(50); } diff --git a/consensus/src/intercom/broadcast/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs index 40f4cbdaf..674055374 100644 --- a/consensus/src/intercom/broadcast/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -2,6 +2,7 @@ use std::collections::BTreeMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; +use itertools::Itertools; use tokio::sync::broadcast::error::RecvError; use tokio::sync::mpsc; @@ -9,7 +10,8 @@ use tycho_network::PeerId; use tycho_util::FastDashMap; use crate::dag::Verifier; -use crate::intercom::dto::{BroadcastResponse, PeerState}; +use crate::engine::MempoolConfig; +use crate::intercom::dto::PeerState; use crate::intercom::PeerSchedule; use crate::models::{Digest, Location, NodeCount, Point, PointId, Round}; @@ -20,50 +22,50 @@ pub struct BroadcastFilter(Arc); impl BroadcastFilter { pub fn new( - local_id: Arc, + log_id: Arc, peer_schedule: Arc, output: mpsc::UnboundedSender, ) -> Self { - let this = Self(Arc::new(BroadcastFilterInner { - local_id, + Self(Arc::new(BroadcastFilterInner { + log_id, last_by_peer: Default::default(), by_round: Default::default(), current_dag_round: Default::default(), // will advance with other peers peer_schedule, output, - })); - let listener = this.clone(); - tokio::spawn(listener.clean_cache()); - this + })) } - pub fn add(&self, point: Arc) -> BroadcastResponse { - self.0.add(point) + pub fn add(&self, point: Arc) { + self.0.add(point); } pub fn advance_round(&self, new_round: &Round) { self.0.advance_round(new_round) } - async fn clean_cache(self) { + pub async fn clear_cache(self) -> ! { let mut rx = self.0.peer_schedule.updates(); - match rx.recv().await { - Ok((peer_id, PeerState::Unknown)) => { - self.0.last_by_peer.remove(&peer_id); - } - Ok(_) => {} - Err(err @ RecvError::Lagged(_)) => { - tracing::error!("peer schedule updates {err}"); - } - Err(err @ RecvError::Closed) => { - panic!("peer schedule updates {err}"); + loop { + match rx.recv().await { + Ok((peer_id, PeerState::Unknown)) => { + // assume peers aren't removed from DHT immediately + self.0.last_by_peer.remove(&peer_id); + } + Ok(_) => {} + Err(err @ RecvError::Lagged(_)) => { + tracing::error!("peer schedule updates {err}"); + } + Err(err @ RecvError::Closed) => { + panic!("peer schedule updates {err}"); + } } } } } struct BroadcastFilterInner { - local_id: Arc, + log_id: Arc, // defend from spam from future rounds: // should keep rounds greater than current dag round last_by_peer: FastDashMap, @@ -90,141 +92,108 @@ impl BroadcastFilterInner { // => we should discard points from the far future /// returns Vec of points to insert into DAG if consensus round is determined reliably - fn add(&self, point: Arc) -> BroadcastResponse { - let local_id = &self.local_id; - // dag @r+0 accepts broadcasts of [r-1; r+1] rounds; - // * points older than r-1 are rejected, but are sent to DAG for validation - // as they may be used by some point as a dependency - // * newer broadcasts are enqueued until 1/3+1 points per round collected - let dag_round = Round(self.current_dag_round.load(Ordering::Acquire)); + fn add(&self, point: Arc) { + let local_id = &self.log_id; // for any node @ r+0, its DAG always contains [r-DAG_DEPTH-N; r+1] rounds, where N>=0 + let dag_round = Round(self.current_dag_round.load(Ordering::Acquire)); let PointId { location: Location { round, author }, digest, } = point.id(); - tracing::info!( - "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : received" - ); + tracing::debug!("{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?}"); // conceal raw point, do not use it let point = match Verifier::verify(&point, &self.peer_schedule) { Ok(()) => ConsensusEvent::Verified(point), Err(dag_point) => { tracing::error!( - "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ - invalid {point:.4?}" + "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ + Invalid {point:.4?}" ); ConsensusEvent::Invalid(dag_point) } }; if round <= dag_round { - let response = if matches!(point, ConsensusEvent::Invalid(_)) { - BroadcastResponse::Rejected - } else if round >= dag_round.prev() { - BroadcastResponse::Accepted // we will sign, maybe - } else { - tracing::error!( - "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ - Rejected as too old round" - ); - // too old, current node will not sign, but some point may include it - BroadcastResponse::Rejected - }; - _ = self.output.send(point); - return response; + self.output.send(point).ok(); + return; } // else: either consensus moved forward without us, - // or we shouldn't accept the point yet, or this is spam + // or we shouldn't accept the point yet, or it's a spam - let mut outdated_peer_round = None; if *self .last_by_peer .entry(author) - .and_modify(|next| { - if *next < round { - if *next >= dag_round { - outdated_peer_round = Some(*next); - } - *next = round + .and_modify(|last_by_peer| { + if *last_by_peer < round { + *last_by_peer = round } }) .or_insert(round) > round { - // equivocations are handled by DAG; - // node must not send broadcasts out-of order; // TODO we should ban a peer that broadcasts its rounds out of order, // though we cannot prove this decision for other nodes tracing::error!( - "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ - Rejected as out of order by round" + "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ + out of order by round" ); - return BroadcastResponse::Rejected; + return; }; - if let Some(to_delete) = outdated_peer_round { - // unfortunately, removals will occur every time node lags behind consensus - self.by_round.entry(to_delete).and_modify(|(_, authors)| { - // luckily no need to shrink a BTreeMap - // TODO ban the author, if we detect equivocation now; we won't be able to prove it - // if some signatures are invalid (it's another reason for a local ban) - authors.remove(&author); - }); - } match self.by_round.entry(round).or_try_insert_with(|| { // how many nodes should send broadcasts NodeCount::try_from(self.peer_schedule.peers_for(&round).len()) .map(|node_count| (node_count, Default::default())) }) { - // will not accept broadcasts from not initialized validator set - Err(_) => return BroadcastResponse::TryLater, + Err(_) => { + // will not accept broadcasts from not initialized validator set + return; + } Ok(mut entry) => { let (node_count, ref mut same_round) = entry.value_mut(); + // TODO ban the author, if we detect equivocation now; we won't be able to prove it + // if some signatures are invalid (it's another reason for a local ban) same_round.entry(author).or_default().insert(digest, point); if same_round.len() < node_count.reliable_minority() { - tracing::info!( - "{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?} : \ + tracing::debug!( + "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ round is not determined yet", ); - return BroadcastResponse::TryLater; // round is not yet determined + return; }; } } - self.advance_round(&round); - BroadcastResponse::Accepted } // drop everything up to the new round (inclusive), channelling cached points fn advance_round(&self, new_round: &Round) { - let Ok(old) = + for round in (self.current_dag_round.load(Ordering::Acquire)..=new_round.0).map(Round) { + self.output.send(ConsensusEvent::Forward(round)).ok(); + // allow filter to channel messages only after Forward was sent self.current_dag_round .fetch_update(Ordering::Release, Ordering::Relaxed, |old| { - Some(new_round.0).filter(|new| old < *new) + Some(round.0).filter(|new| old < *new) }) - else { - return; - }; - // if dag advanced more than by +1 round, include our potential witness points - // TODO it would be great to drain all contents up to the new round for performance, - // (no need to download discarded data) but only top 2 of them are truly necessary; - // looks like DashMap doesn't fit well - let mut data = if old < new_round.0 { - self.by_round.remove(&new_round.prev()) - } else { - None - } - .into_iter() - .chain(self.by_round.remove(&new_round)); - - while let Some((round, (_, by_author))) = data.next() { - _ = self.output.send(ConsensusEvent::Forward(round)); - for (_, points) in by_author { - for (_, point) in points { - _ = self.output.send(point); - } + .ok(); + // map entry is not used by filter anymore + for event in self + .by_round + .remove(&round) + .into_iter() + .map(|(_, (_, v))| v.into_iter()) + .flatten() + .map(|(_, v)| v.into_iter().map(|(_, v)| v)) + .flatten() + // inside actually only (in) valid points by the same round + .sorted_by(ConsensusEvent::priority) + { + self.output.send(event).ok(); } } - // clear older rounds TODO: shrink to fit - self.by_round.retain(|round, _| round > new_round); + // TODO there must be some config value - when node needs to sync; + // values too far in the future are some garbage, must ban authors + self.by_round.retain(|round, _| { + new_round < round && round.0 <= new_round.0 + MempoolConfig::COMMIT_DEPTH as u32 + }); } } diff --git a/consensus/src/intercom/broadcast/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs index 4fc785625..c2af75146 100644 --- a/consensus/src/intercom/broadcast/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -11,11 +11,11 @@ use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; use crate::intercom::broadcast::collector::CollectorSignal; -use crate::intercom::dto::{BroadcastResponse, PeerState, SignatureResponse}; +use crate::intercom::dto::{PeerState, SignatureResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; use crate::models::{NodeCount, Point, Round, Signature}; -type BcastResult = anyhow::Result; +type BcastResult = anyhow::Result<()>; type SigResult = anyhow::Result; #[derive(Debug)] @@ -25,7 +25,7 @@ pub enum BroadcasterSignal { } pub struct Broadcaster { - local_id: Arc, + log_id: Arc, current_round: Round, point_body: Vec, @@ -52,7 +52,7 @@ pub struct Broadcaster { impl Broadcaster { pub fn new( - local_id: &Arc, + log_id: Arc, point: &Point, dispatcher: &Dispatcher, peer_schedule: &PeerSchedule, @@ -68,15 +68,15 @@ impl Broadcaster { .collect::>(); let signers_count = NodeCount::new(signers.len()); let collectors = peer_schedule.all_resolved(); - tracing::info!( - "{local_id} @ {:?} collectors count = {}", + tracing::debug!( + "{log_id} @ {:?} collectors count = {}", point.body.location.round, collectors.len() ); let bcast_request = Dispatcher::broadcast_request(&point); let sig_request = Dispatcher::signature_request(&point.body.location.round); Self { - local_id: local_id.clone(), + log_id, current_round: point.body.location.round, point_body, dispatcher: dispatcher.clone(), @@ -144,9 +144,9 @@ impl Broadcaster { } async fn should_finish(&mut self, collector_signal: CollectorSignal) -> bool { - tracing::info!( + tracing::debug!( "{} @ {:?} bcaster <= Collector::{collector_signal:?} : sigs {} of {}, rejects {} of {}", - self.local_id, + self.log_id, self.current_round, self.signatures.len(), self.signers_count.majority_of_others(), @@ -183,33 +183,17 @@ impl Broadcaster { self.sig_peers.insert(peer_id); // lighter weight retry loop tracing::error!( "{} @ {:?} bcaster <= collector {peer_id:.4?} broadcast error : {error}", - self.local_id, + self.log_id, self.current_round ); } - Ok(response) => { - if response == BroadcastResponse::Rejected { - tracing::warn!( - "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:?}", - self.local_id, - self.current_round - ); - } else { - tracing::info!( - "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:?}", - self.local_id, - self.current_round - ); - } - match response { - BroadcastResponse::Accepted => self.request_signature(&peer_id), - BroadcastResponse::TryLater => _ = self.sig_peers.insert(peer_id), - BroadcastResponse::Rejected => { - if self.signers.contains(&peer_id) { - self.rejections.insert(peer_id); - } - } - } + Ok(_) => { + tracing::debug!( + "{} @ {:?} bcaster <= collector {peer_id:.4?} : broadcast accepted", + self.log_id, + self.current_round + ); + self.request_signature(&peer_id); } } } @@ -221,7 +205,7 @@ impl Broadcaster { self.sig_peers.insert(peer_id); // let it retry tracing::error!( "{} @ {:?} bcaster <= collector {peer_id:.4?} signature request error : {error}", - self.local_id, + self.log_id, self.current_round ); } @@ -229,13 +213,13 @@ impl Broadcaster { if response == SignatureResponse::Rejected { tracing::warn!( "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:.4?}", - self.local_id, + self.log_id, self.current_round ); } else { - tracing::info!( + tracing::debug!( "{} @ {:?} bcaster <= collector {peer_id:.4?} : {response:.4?}", - self.local_id, + self.log_id, self.current_round ); }; @@ -266,16 +250,16 @@ impl Broadcaster { fn broadcast(&mut self, peer_id: &PeerId) { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.bcast_futs - .push(self.dispatcher.request(&peer_id, &self.bcast_request)); - tracing::info!( + .push(self.dispatcher.send(&peer_id, &self.bcast_request)); + tracing::debug!( "{} @ {:?} bcaster => collector {peer_id:.4?}: broadcast", - self.local_id, + self.log_id, self.current_round ); } else { tracing::warn!( "{} @ {:?} bcaster => collector {peer_id:.4?}: broadcast impossible", - self.local_id, + self.log_id, self.current_round ); } @@ -284,16 +268,16 @@ impl Broadcaster { fn request_signature(&mut self, peer_id: &PeerId) { if self.removed_peers.is_empty() || !self.removed_peers.remove(&peer_id) { self.sig_futs - .push(self.dispatcher.request(&peer_id, &self.sig_request)); - tracing::info!( + .push(self.dispatcher.query(&peer_id, &self.sig_request)); + tracing::debug!( "{} @ {:?} bcaster => collector {peer_id:.4?}: signature request", - self.local_id, + self.log_id, self.current_round ); } else { tracing::warn!( "{} @ {:?} bcaster => collector {peer_id:.4?}: signature request impossible", - self.local_id, + self.log_id, self.current_round ); } @@ -314,7 +298,7 @@ impl Broadcaster { Ok((peer_id, new_state)) => { tracing::info!( "{} @ {:?} bcaster peer update: {peer_id:?} -> {new_state:?}", - self.local_id, + self.log_id, self.current_round ); match new_state { diff --git a/consensus/src/intercom/broadcast/collector.rs b/consensus/src/intercom/broadcast/collector.rs index 8a390a74e..50a916988 100644 --- a/consensus/src/intercom/broadcast/collector.rs +++ b/consensus/src/intercom/broadcast/collector.rs @@ -26,7 +26,7 @@ pub enum CollectorSignal { } pub struct Collector { - local_id: Arc, + log_id: Arc, downloader: Downloader, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, @@ -36,7 +36,7 @@ pub struct Collector { impl Collector { pub fn new( - local_id: Arc, + log_id: Arc, downloader: &Downloader, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, @@ -44,7 +44,7 @@ impl Collector { next_round: Round, ) -> Self { Self { - local_id, + log_id, downloader: downloader.clone(), from_bcast_filter, signature_requests, @@ -92,7 +92,7 @@ impl Collector { Default::default(), ); let task = CollectorTask { - local_id: self.local_id.clone(), + log_id: self.log_id.clone(), downloader: self.downloader.clone(), current_round: current_dag_round.clone(), next_dag_round, @@ -123,7 +123,7 @@ impl Collector { type SignatureRequest = (Round, PeerId, oneshot::Sender); struct CollectorTask { // for node running @ r+0: - local_id: Arc, + log_id: Arc, downloader: Downloader, current_round: DagRound, // = r+0 next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 @@ -185,22 +185,9 @@ impl CollectorTask { Some(state) = self.includes.next() => { self.on_inclusion_validated(&state) }, - Some(state) = self.next_includes.next() => { - if let Some(valid) = state.point().map(|p| p.valid()).flatten() { - self.is_includes_ready = true; - match valid.point.body.location.round.cmp(self.next_dag_round.round()) { - Ordering::Less => panic!("Coding error: next includes futures contain current round"), - Ordering::Greater => { - tracing::error!("Collector was left behind while bcast filter advanced??"); - self.collector_signal.send(CollectorSignal::Err).ok(); - return Err(valid.point.body.location.round); - }, - Ordering::Equal => { - if self.is_ready() { - return Ok(self.next_includes) - } - } - } + Some(state) = self.next_includes.next(), if ! self.is_includes_ready => { + if let Some(result) = self.jump_up(state) { + return result.map(|_ | self.next_includes) } }, request = signature_requests.recv() => match request { @@ -217,9 +204,9 @@ impl CollectorTask { } fn should_fail(&mut self, signal: BroadcasterSignal) -> bool { - tracing::info!( + tracing::debug!( "{} @ {:.4?} collector <= Bcaster::{signal:?} : includes {} of {}", - self.local_id, + self.log_id, self.current_round.round(), self.includes_ready.len(), self.current_round.node_count().majority() @@ -235,9 +222,9 @@ impl CollectorTask { } fn is_ready(&mut self) -> bool { - tracing::info!( + tracing::debug!( "{} @ {:.4?} collector self-check : includes {} of {}", - self.local_id, + self.log_id, self.current_round.round(), self.includes_ready.len(), self.current_round.node_count().majority() @@ -251,10 +238,41 @@ impl CollectorTask { self.is_includes_ready && self.is_bcaster_ready_ok } - fn match_filtered(&self, consensus_event: &ConsensusEvent) -> Result<(), Round> { + fn jump_up(&mut self, state: InclusionState) -> Option> { + // its ok to discard invalid state from `next_includes` queue + let point_round = state.point()?.valid()?.point.body.location.round; tracing::info!( + "{} @ {:?} maybe jump to {point_round:?}", + self.log_id, + self.current_round.round() + ); + // will be signed on the next round + self.next_includes + .push(futures_util::future::ready(state).boxed()); + self.is_includes_ready = true; + match point_round.cmp(self.next_dag_round.round()) { + Ordering::Less => { + panic!("Coding error: next includes futures contain current or previous round") + } + Ordering::Greater => { + tracing::error!("Collector was left behind while broadcast filter advanced ?"); + self.collector_signal.send(CollectorSignal::Err).ok(); + Some(Err(point_round)) + } + Ordering::Equal => { + if self.is_ready() { + Some(Ok(())) + } else { + None + } + } + } + } + + fn match_filtered(&self, consensus_event: &ConsensusEvent) -> Result<(), Round> { + tracing::debug!( "{} @ {:?} collector <= bcast filter : {:?}", - self.local_id, + self.log_id, self.current_round.round(), consensus_event.ugly() ); @@ -274,7 +292,7 @@ impl CollectorTask { panic!( "{} @ {:?} Coding error: broadcast filter advanced \ while collector left behind; event: {:?}", - self.local_id, + self.log_id, self.current_round.round(), consensus_event.ugly() ) @@ -296,7 +314,7 @@ impl CollectorTask { panic!( "{} @ {:?} Coding error: broadcast filter advanced \ while collector left behind; event: {:?}", - self.local_id, + self.log_id, self.current_round.round(), consensus_event.ugly() ) @@ -321,40 +339,45 @@ impl CollectorTask { MempoolConfig::sign_time_range(), ); }; - if let Some(signed) = state.signed_point(self.current_round.round()) { - self.includes_ready - .insert(signed.point.body.location.author); - tracing::info!( - "{} @ {:.4?} includes {} +1 : {:?}", - self.local_id, - self.current_round.round(), - self.includes_ready.len(), - signed.point.id().ugly() - ); - } else { - tracing::warn!( - "{} @ {:.4?} includes {} : {:?} {:.4?}", - self.local_id, - self.current_round.round(), - self.includes_ready.len(), - state.point().map(|a| a.id()).as_ref().map(|a| a.ugly()), - state.signed() - ); + if let Some(Ok(_)) = state.signed() { + if let Some(dag_point) = state + .point() + .filter(|dp| dp.location().round == *self.current_round.round()) + { + self.includes_ready.insert(dag_point.location().author); + tracing::debug!( + "{} @ {:.4?} includes {} +1 : {:?}", + self.log_id, + self.current_round.round(), + self.includes_ready.len(), + dag_point.id().ugly() + ); + return; + } + return; } + tracing::debug!( + "{} @ {:.4?} includes {} : {:?} {:.4?}", + self.log_id, + self.current_round.round(), + self.includes_ready.len(), + state.point().map(|a| a.id()).as_ref().map(|a| a.ugly()), + state.signed() + ); } fn signature_response(&mut self, round: &Round, author: &PeerId) -> SignatureResponse { if round > self.current_round.round() { return SignatureResponse::TryLater; // hold fast nodes from moving forward }; - let Some(dag_round) = self.next_dag_round.scan(round) else { + let Some(dag_round) = self.current_round.scan(round) else { return SignatureResponse::Rejected; // lagged too far from consensus and us }; // TODO do not state().clone() - mutating closure on location is easily used; // need to remove inner locks from InclusionState and leave it guarded by DashMap; // also sign points during their validation, see comments in DagLocation::add_validate() let Some(state) = dag_round.view(author, |loc| loc.state().clone()) else { - return SignatureResponse::NoPoint; // retry broadcast, point was replaced in filter + return SignatureResponse::NoPoint; // retry broadcast }; if let Some(signable) = state.signable() { let key_pair = match self.next_dag_round.key_pair() { @@ -383,9 +406,9 @@ impl CollectorTask { Some(Err(())) => SignatureResponse::Rejected, None => SignatureResponse::TryLater, }; - tracing::info!( + tracing::debug!( "{} @ {:?} collector => bcaster {author:.4?} @ {round:?} : {response:.4?}", - self.local_id, + self.log_id, self.current_round.round() ); response diff --git a/consensus/src/intercom/broadcast/dto.rs b/consensus/src/intercom/broadcast/dto.rs index f2a5e896f..ccd4e8bde 100644 --- a/consensus/src/intercom/broadcast/dto.rs +++ b/consensus/src/intercom/broadcast/dto.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::fmt::{Debug, Formatter}; use std::sync::Arc; @@ -12,6 +13,27 @@ pub enum ConsensusEvent { Invalid(DagPoint), } +impl ConsensusEvent { + pub fn priority(a: &Self, b: &Self) -> Ordering { + match (a, b) { + // all forwards first + (ConsensusEvent::Forward(a), ConsensusEvent::Forward(b)) => a.cmp(b), + (ConsensusEvent::Forward(_), _) => Ordering::Greater, + // then all invalid ones - by round + (ConsensusEvent::Invalid(_), ConsensusEvent::Forward(_)) => Ordering::Less, + (ConsensusEvent::Invalid(a), ConsensusEvent::Invalid(b)) => { + a.location().round.cmp(&b.location().round) + } + (ConsensusEvent::Invalid(_), ConsensusEvent::Verified(_)) => Ordering::Greater, + // then all valid ones - by round + (ConsensusEvent::Verified(a), ConsensusEvent::Verified(b)) => { + a.body.location.round.cmp(&b.body.location.round) + } + (ConsensusEvent::Verified(_), _) => Ordering::Less, + } + } +} + impl Debug for UglyPrint<'_, ConsensusEvent> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self.0 { diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index 18931ca20..835faa9f1 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -4,7 +4,7 @@ use futures_util::FutureExt; use tycho_network::{DhtClient, Network, OverlayId, OverlayService, PeerId, PrivateOverlay}; -use crate::intercom::core::dto::{MPRequest, MPResponse}; +use crate::intercom::core::dto::{MPQuery, MPResponse}; use crate::intercom::core::responder::Responder; use crate::models::{Point, PointId, Round}; @@ -40,19 +40,14 @@ impl Dispatcher { } pub fn point_by_id_request(&self, id: &PointId) -> tycho_network::Request { - (&MPRequest::PointById(id.clone())).into() - } - - pub fn broadcast_request(point: &Point) -> tycho_network::Request { - // TODO use send message for broadcast, leave Rejected/TryLater only in sig request - (&MPRequest::Broadcast(point.clone())).into() + (&MPQuery::PointById(id.clone())).into() } pub fn signature_request(round: &Round) -> tycho_network::Request { - (&MPRequest::Signature(round.clone())).into() + (&MPQuery::Signature(round.clone())).into() } - pub fn request( + pub fn query( &self, peer_id: &PeerId, request: &tycho_network::Request, @@ -78,4 +73,30 @@ impl Dispatcher { } .boxed() } + + pub fn broadcast_request(point: &Point) -> tycho_network::Request { + point.into() + } + + pub fn send( + &self, + peer_id: &PeerId, + request: &tycho_network::Request, + ) -> BoxFuture<'static, (PeerId, Result<()>)> { + let peer_id = peer_id.clone(); + let request = request.clone(); + let overlay = self.overlay.clone(); + let network = self.network.clone(); + async move { + overlay + .send(&network, &peer_id, request) + .map(move |response| { + let response = + response.map_err(|e| anyhow!("response from peer {peer_id}: {e}")); + (peer_id, response) + }) + .await + } + .boxed() + } } diff --git a/consensus/src/intercom/core/dto.rs b/consensus/src/intercom/core/dto.rs index 1c8cff9a4..f1299fc4e 100644 --- a/consensus/src/intercom/core/dto.rs +++ b/consensus/src/intercom/core/dto.rs @@ -4,25 +4,33 @@ use serde::{Deserialize, Serialize}; use tycho_network::Version; -use crate::intercom::dto::{BroadcastResponse, PointByIdResponse, SignatureResponse}; +use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; use crate::models::{Point, PointId, Round}; #[derive(Serialize, Deserialize, Debug)] -pub enum MPRemoteResult { +pub enum MPQueryResult { Ok(MPResponse), Err(String), } #[derive(Serialize, Deserialize, Debug)] -pub enum MPRequest { +pub enum MPQuery { PointById(PointId), - Broadcast(Point), Signature(Round), } -impl From<&MPRequest> for tycho_network::Request { +impl From<&Point> for tycho_network::Request { + fn from(value: &Point) -> Self { + tycho_network::Request { + version: Version::V1, + body: Bytes::from(bincode::serialize(value).expect("shouldn't happen")), + } + } +} + +impl From<&MPQuery> for tycho_network::Request { // TODO: move MPRequest et al to TL - won't need to copy Point - fn from(value: &MPRequest) -> Self { + fn from(value: &MPQuery) -> Self { tycho_network::Request { version: Version::V1, body: Bytes::from(bincode::serialize(value).expect("shouldn't happen")), @@ -33,7 +41,6 @@ impl From<&MPRequest> for tycho_network::Request { #[derive(Serialize, Deserialize, Debug)] pub enum MPResponse { PointById(PointByIdResponse), - Broadcast(BroadcastResponse), Signature(SignatureResponse), } @@ -41,9 +48,9 @@ impl TryFrom<&tycho_network::Response> for MPResponse { type Error = anyhow::Error; fn try_from(response: &tycho_network::Response) -> Result { - match bincode::deserialize::(&response.body) { - Ok(MPRemoteResult::Ok(response)) => Ok(response), - Ok(MPRemoteResult::Err(e)) => Err(anyhow::Error::msg(e)), + match bincode::deserialize::(&response.body) { + Ok(MPQueryResult::Ok(response)) => Ok(response), + Ok(MPQueryResult::Err(e)) => Err(anyhow::Error::msg(e)), Err(e) => Err(anyhow!("failed to deserialize: {e:?}")), } } @@ -59,17 +66,6 @@ impl TryFrom for PointByIdResponse { } } -impl TryFrom for BroadcastResponse { - type Error = anyhow::Error; - - fn try_from(response: MPResponse) -> Result { - match response { - MPResponse::Broadcast(response) => Ok(response), - _ => Err(anyhow!("wrapper mismatch, expected Broadcast")), - } - } -} - impl TryFrom for SignatureResponse { type Error = anyhow::Error; diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index 14e64c342..8d2e23ff5 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -6,16 +6,16 @@ use tokio::sync::{mpsc, oneshot}; use tycho_network::{PeerId, Response, Service, ServiceRequest, Version}; use tycho_util::futures::BoxFutureOrNoop; -use crate::intercom::core::dto::{MPRemoteResult, MPRequest, MPResponse}; +use crate::intercom::core::dto::{MPQuery, MPQueryResult, MPResponse}; use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; use crate::intercom::BroadcastFilter; -use crate::models::{PointId, Round}; +use crate::models::{Point, PointId, Round, Ugly}; pub struct Responder(Arc); impl Responder { pub fn new( - local_id: Arc, + log_id: Arc, broadcast_filter: BroadcastFilter, signature_requests: mpsc::UnboundedSender<( Round, @@ -25,7 +25,7 @@ impl Responder { uploads: mpsc::UnboundedSender<(PointId, oneshot::Sender)>, ) -> Self { Self(Arc::new(ResponderInner { - local_id, + log_id, broadcast_filter, signature_requests, uploads, @@ -41,12 +41,12 @@ impl Service for Responder { #[inline] fn on_query(&self, req: ServiceRequest) -> Self::OnQueryFuture { - BoxFutureOrNoop::future(self.0.clone().handle(req)) + BoxFutureOrNoop::future(self.0.clone().handle_query(req)) } #[inline] - fn on_message(&self, _req: ServiceRequest) -> Self::OnMessageFuture { - futures_util::future::ready(()) + fn on_message(&self, req: ServiceRequest) -> Self::OnMessageFuture { + futures_util::future::ready(self.0.clone().handle_broadcast(req)) } #[inline] @@ -57,15 +57,15 @@ impl Service for Responder { struct ResponderInner { // state and storage components go here - local_id: Arc, + log_id: Arc, broadcast_filter: BroadcastFilter, signature_requests: mpsc::UnboundedSender<(Round, PeerId, oneshot::Sender)>, uploads: mpsc::UnboundedSender<(PointId, oneshot::Sender)>, } impl ResponderInner { - async fn handle(self: Arc, req: ServiceRequest) -> Option { - let body = match bincode::deserialize::(&req.body) { + async fn handle_query(self: Arc, req: ServiceRequest) -> Option { + let body = match bincode::deserialize::(&req.body) { Ok(body) => body, Err(e) => { tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); @@ -75,19 +75,24 @@ impl ResponderInner { }; let response = match body { - MPRequest::PointById(point_id) => { + MPQuery::PointById(point_id) => { let (tx, rx) = oneshot::channel(); - self.uploads.send((point_id, tx)).ok(); + self.uploads.send((point_id.clone(), tx)).ok(); match rx.await { - Ok(response) => MPResponse::PointById(response), + Ok(response) => { + tracing::debug!( + "{} upload to {:.4?} : {:?} {}", + self.log_id, + req.metadata.peer_id, + point_id.ugly(), + response.0.as_ref().map_or("not found", |_| "ok"), + ); + MPResponse::PointById(response) + } Err(e) => panic!("Responder point by id await of request failed: {e}"), - }; - MPResponse::PointById(PointByIdResponse(None)) - } - MPRequest::Broadcast(point) => { - MPResponse::Broadcast(self.broadcast_filter.add(Arc::new(point))) + } } - MPRequest::Signature(round) => { + MPQuery::Signature(round) => { let (tx, rx) = oneshot::channel(); self.signature_requests .send((round, req.metadata.peer_id.clone(), tx)) @@ -99,7 +104,7 @@ impl ResponderInner { tracing::error!( "{} responder => collector {:.4?} @ {round:?} : \ {response:?} due to oneshot {e}", - self.local_id, + self.log_id, req.metadata.peer_id ); MPResponse::Signature(response) @@ -110,14 +115,28 @@ impl ResponderInner { Some(Response { version: Version::default(), - body: Bytes::from(match bincode::serialize(&MPRemoteResult::Ok(response)) { + body: Bytes::from(match bincode::serialize(&MPQueryResult::Ok(response)) { Ok(data) => data, Err(e) => { tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata); - bincode::serialize(&MPRemoteResult::Err(format!("internal error"))) + bincode::serialize(&MPQueryResult::Err("internal error".to_string())) .expect("must not fail") } }), }) } + + fn handle_broadcast(self: Arc, req: ServiceRequest) { + match bincode::deserialize::(&req.body) { + Ok(point) => self.broadcast_filter.add(Arc::new(point)), + Err(e) => { + tracing::error!( + "unexpected broadcast from {:?}: {e:?}", + req.metadata.peer_id + ); + // malformed request is a reason to ignore it + return; + } + }; + } } diff --git a/consensus/src/intercom/dependency/downloader.rs b/consensus/src/intercom/dependency/downloader.rs index b8440a3ad..0d656eeff 100644 --- a/consensus/src/intercom/dependency/downloader.rs +++ b/consensus/src/intercom/dependency/downloader.rs @@ -11,31 +11,27 @@ use tokio::sync::{broadcast, watch}; use tokio::time::error::Elapsed; use tycho_network::PeerId; -use tycho_util::FastHashMap; +use tycho_util::{FastHashMap, FastHashSet}; use crate::dag::{DagRound, Verifier, WeakDagRound}; use crate::engine::MempoolConfig; use crate::intercom::dto::{PeerState, PointByIdResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; -use crate::models::{DagPoint, NodeCount, PointId}; +use crate::models::{DagPoint, NodeCount, PointId, Ugly}; type DownloadResult = anyhow::Result; #[derive(Clone)] pub struct Downloader { - local_id: Arc, + log_id: Arc, dispatcher: Dispatcher, peer_schedule: PeerSchedule, } impl Downloader { - pub fn new( - local_id: Arc, - dispatcher: &Dispatcher, - peer_schedule: &PeerSchedule, - ) -> Self { + pub fn new(log_id: Arc, dispatcher: &Dispatcher, peer_schedule: &PeerSchedule) -> Self { Self { - local_id, + log_id, peer_schedule: peer_schedule.clone(), dispatcher: dispatcher.clone(), } @@ -56,20 +52,34 @@ impl Downloader { "point and DAG round mismatch" ); // request point from its signers (any dependant is among them as point is already verified) - let all_peers = self.peer_schedule.peers_for(&point_round.round().next()); + let mut all_peers = self + .peer_schedule + .peers_for(&point_round.round().next()) + .iter() + .map(|(peer_id, state)| (*peer_id, *state)) + .collect::>(); let Ok(node_count) = NodeCount::try_from(all_peers.len()) else { return DagPoint::NotExists(Arc::new(point_id)); }; // query author no matter if it is in the next round, but that can't affect 3F+1 - let all_peers = iter::once((point_id.location.author, PeerState::Resolved)) - // overwrite author's entry if it isn't really resolved; - .chain(all_peers.iter().map(|(peer_id, state)| (*peer_id, *state))) - .collect::>(); + let completed = if all_peers.contains_key(&point_id.location.author) { + 0 + } else if self + .peer_schedule + .all_resolved() + .contains(&point_id.location.author) + { + all_peers.insert(point_id.location.author, PeerState::Resolved); + -1 + } else { + 0 + }; if all_peers.is_empty() { return DagPoint::NotExists(Arc::new(point_id)); }; - let mut priorities = vec![dependant, point_id.location.author]; - priorities.dedup(); + let mandatory = iter::once(dependant) + .chain(iter::once(point_id.location.author)) + .collect(); let (has_resolved_tx, has_resolved_rx) = watch::channel(false); DownloadTask { weak_dag_round: point_round.as_weak(), @@ -80,11 +90,13 @@ impl Downloader { has_resolved_tx, has_resolved_rx, in_flight: FuturesUnordered::new(), + completed, + mandatory, all_peers, parent: self, attempt: 0, } - .run(&priorities) + .run() .await } } @@ -99,20 +111,22 @@ struct DownloadTask { point_id: PointId, all_peers: FastHashMap, + mandatory: FastHashSet, updates: broadcast::Receiver<(PeerId, PeerState)>, has_resolved_tx: watch::Sender, has_resolved_rx: watch::Receiver, in_flight: FuturesUnordered< BoxFuture<'static, (PeerId, Result, Elapsed>)>, >, + completed: i16, attempt: u8, } impl DownloadTask { // point's author is a top priority; fallback priority is (any) dependent point's author // recursively: every dependency is expected to be signed by 2/3+1 - pub async fn run(mut self, priorities: &Vec) -> DagPoint { - self.download_priorities(priorities); + pub async fn run(mut self) -> DagPoint { + self.download_mandatory(); self.download(); loop { tokio::select! { @@ -126,18 +140,20 @@ impl DownloadTask { } } - fn download_priorities(&mut self, priorities: &Vec) { - let priorities = priorities - .into_iter() + fn download_mandatory(&mut self) { + let mandatory = self + .mandatory + .iter() .filter(|p| { self.all_peers .get(p) .map_or(false, |&s| s == PeerState::Resolved) }) + .cloned() .collect::>(); - for resolved_priority in priorities { - self.all_peers.remove_entry(resolved_priority); - self.download_one(resolved_priority); + for peer_id in mandatory { + self.all_peers.remove_entry(&peer_id); + self.download_one(&peer_id); } } @@ -145,7 +161,8 @@ impl DownloadTask { self.attempt += 1; let count = (MempoolConfig::DOWNLOAD_PEERS as usize) .saturating_pow(self.attempt as u32) - .saturating_sub(self.in_flight.len()); + .saturating_sub(self.in_flight.len()) + .max(self.all_peers.len()); for peer_id in self .all_peers @@ -168,7 +185,7 @@ impl DownloadTask { MempoolConfig::DOWNLOAD_TIMEOUT, self.parent .dispatcher - .request::(&peer_id, &self.request), + .query::(&peer_id, &self.request), ) .map(move |result| (peer_id, result.map(|(_, r)| r))) .boxed(), @@ -181,33 +198,89 @@ impl DownloadTask { resolved: Result, Elapsed>, ) -> Option { match resolved { - Err(_timeout) => _ = self.all_peers.remove(&peer_id), - Ok(Err(_network_err)) => _ = self.all_peers.remove(&peer_id), - Ok(Ok(PointByIdResponse(None))) => _ = self.all_peers.remove(&peer_id), + Err(_timeout) => { + tracing::error!("{} : {peer_id:.4?} timed out", self.parent.log_id); + } + Ok(Err(network_err)) => { + tracing::error!( + "{} : {peer_id:.4?} network error: {network_err}", + self.parent.log_id + ); + } + Ok(Ok(PointByIdResponse(None))) => { + if self.mandatory.remove(&peer_id) { + // it's a ban + tracing::error!( + "{} : {peer_id:.4?} must have returned {:?}", + self.parent.log_id, + self.point_id.ugly() + ); + } else { + tracing::debug!( + "{} : {peer_id:.4?} didn't return {:?}", + self.parent.log_id, + self.point_id.ugly() + ); + } + } Ok(Ok(PointByIdResponse(Some(point)))) => { if point.id() != self.point_id { - _ = self.all_peers.remove(&peer_id); + // it's a ban + tracing::error!( + "{} : {peer_id:.4?} returned wrong point", + self.parent.log_id + ); } let Some(dag_round) = self.weak_dag_round.get() else { - // no more retries, too late; + tracing::warn!( + "{} : {peer_id:.4?} no more retries, local DAG moved far forward", + self.parent.log_id + ); // DAG could not have moved if this point was needed for commit return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); }; let point = Arc::new(point); match Verifier::verify(&point, &self.parent.peer_schedule) { Ok(()) => { - return Some( - Verifier::validate(point, dag_round, self.parent.clone()).await, - ) + let validated = + Verifier::validate(point, dag_round, self.parent.clone()).await; + if validated.trusted().is_some() { + tracing::debug!( + "{} : downloaded dependency {:?}", + self.parent.log_id, + validated.ugly() + ) + } else { + tracing::error!( + "{} : downloaded dependency validated as {:?}", + self.parent.log_id, + validated.ugly() + ) + } + return Some(validated); + } + Err(invalid @ DagPoint::Invalid(_)) => { + tracing::error!( + "{} : downloaded dependency {:?}", + self.parent.log_id, + invalid.ugly() + ); + return Some(invalid); + } + Err(_not_exists) => { + tracing::error!( + "{} : downloaded dependency {:?}, peer is not reliable", + self.parent.log_id, + _not_exists.ugly() + ); } - Err(invalid @ DagPoint::Invalid(_)) => return Some(invalid), - Err(_not_exists) => _ = self.all_peers.remove(&peer_id), // ain't reliable peer } } }; - // the point does not exist when only 1F left unqeried, + // the point does not exist when only 1F left unqueried, // assuming author and dependant are queried or unavailable - if self.all_peers.len() < self.node_count.reliable_minority() { + self.completed += 1; + if self.completed >= self.node_count.majority() as i16 { return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); } if self.in_flight.is_empty() { diff --git a/consensus/src/intercom/dependency/uploader.rs b/consensus/src/intercom/dependency/uploader.rs index 92eae38b4..fb3b9d1f1 100644 --- a/consensus/src/intercom/dependency/uploader.rs +++ b/consensus/src/intercom/dependency/uploader.rs @@ -5,19 +5,22 @@ use tokio::sync::{mpsc, oneshot, watch}; use crate::dag::DagRound; use crate::intercom::dto::PointByIdResponse; -use crate::models::{DagPoint, Point, PointId}; +use crate::models::{DagPoint, Point, PointId, Ugly}; pub struct Uploader { + log_id: Arc, requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, top_dag_round: watch::Receiver, } impl Uploader { pub fn new( + log_id: Arc, requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, top_dag_round: watch::Receiver, ) -> Self { Self { + log_id, requests, top_dag_round, } @@ -25,10 +28,13 @@ impl Uploader { pub async fn run(mut self) -> ! { while let Some((point_id, callback)) = self.requests.recv().await { - if let Err(_) = callback.send(PointByIdResponse( - self.find(&point_id).await.map(|p| p.deref().clone()), - )) { - tracing::error!("Uploader result channel closed for {point_id:.4?}"); + let found = self.find(&point_id).await.map(|p| p.deref().clone()); + if let Err(_) = callback.send(PointByIdResponse(found)) { + tracing::warn!( + "{} Uploader result channel closed for {:?}, requester's downloader timed out ? ", + self.log_id, + point_id.ugly() + ); }; } panic!("Uploader incoming channel closed") @@ -36,6 +42,9 @@ impl Uploader { async fn find(&self, point_id: &PointId) -> Option> { let top_dag_round = self.top_dag_round.borrow().clone(); + if &point_id.location.round > top_dag_round.round() { + return None; + } let shared = top_dag_round .scan(&point_id.location.round) .map(|dag_round| { diff --git a/consensus/src/intercom/dto.rs b/consensus/src/intercom/dto.rs index 9c541763d..3f3057183 100644 --- a/consensus/src/intercom/dto.rs +++ b/consensus/src/intercom/dto.rs @@ -5,17 +5,6 @@ use crate::models::{Point, Signature}; #[derive(Serialize, Deserialize, Debug)] pub struct PointByIdResponse(pub Option); -#[derive(Serialize, Deserialize, PartialEq, Debug)] -pub enum BroadcastResponse { - /// peer will verify and maybe sign the point - Accepted, - // TimeOut (disconnect) is a reason to retry also - /// peer did not reach the point's round yet - TryLater, - /// malformed point or peer is on a later round - Rejected, -} - #[derive(Serialize, Deserialize, PartialEq, Debug)] pub enum SignatureResponse { Signature(Signature), diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 808673507..630011e77 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -54,7 +54,7 @@ impl PeerSchedule { /// Does not return updates on local peer_id pub fn updates(&self) -> broadcast::Receiver<(PeerId, PeerState)> { - tracing::info!("subscribing to peer updates"); + tracing::debug!("subscribing to peer updates"); self.updates.subscribe() } diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs index 452819953..d2b91a13a 100644 --- a/consensus/src/models/node_count.rs +++ b/consensus/src/models/node_count.rs @@ -32,8 +32,8 @@ impl NodeCount { // assuming the least possible amount of nodes is not in validator set let one_f = (total_peers + 1) / 3; assert!( - u8::try_from(one_f).is_ok(), - "node count 1F={one_f} overflows u8 after scaling {total_peers} up to 3F+1" + u8::try_from(one_f * 3 + 1).is_ok(), + "node count 3F+1={one_f} overflows u8 after ceiling {total_peers}" ); NodeCount(one_f as u8) } diff --git a/consensus/src/models/ugly.rs b/consensus/src/models/ugly.rs index e21298005..4599aadd6 100644 --- a/consensus/src/models/ugly.rs +++ b/consensus/src/models/ugly.rs @@ -2,7 +2,7 @@ use std::fmt::{Debug, Formatter}; use tycho_network::PeerId; -use crate::models::{Location, Point, PointId}; +use crate::models::{DagPoint, Location, Point, PointId}; pub struct UglyPrint<'a, T>(pub &'a T); @@ -53,3 +53,22 @@ impl Debug for UglyPrint<'_, Point> { ) } } + +impl Debug for UglyPrint<'_, DagPoint> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.0 { + DagPoint::Trusted(_) => f.write_str("Trusted(")?, + DagPoint::Suspicious(_) => f.write_str("Suspicious(")?, + DagPoint::Invalid(_) => f.write_str("Invalid(")?, + DagPoint::NotExists(_) => f.write_str("NotExists(")?, + }; + write!( + f, + "Point {{ Id( {:.4} @ {} # {:.4} ), .. }}", + self.0.location().author, + self.0.location().round.0, + self.0.digest() + )?; + f.write_str(")") + } +} diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index dc3d71e35..1f17c35dc 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -132,7 +132,9 @@ mod tests { if info.id == dht_client.network().peer_id() { continue; } - assert!(dht_client.add_peer(info.clone()).unwrap(), "peer added"); + dht_client + .add_peer(info.clone()) + .expect("add peer to dht client"); } } let mut engines = vec![]; From a34675bb091512f68b5743509016a362706bad46 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Wed, 1 May 2024 00:57:45 +0300 Subject: [PATCH 19/32] feat(consensus): filter in test logger --- consensus/src/test_utils.rs | 2 +- network/tests/private_overlay.rs | 2 +- network/tests/public_overlay.rs | 2 +- util/src/test/logger.rs | 6 ++++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 1f17c35dc..7f636fede 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -151,7 +151,7 @@ mod tests { async fn engine_works() -> Result<(), ()> { // tracing_subscriber::fmt::try_init().ok(); // tracing::info!("engine_works"); - tycho_util::test::init_logger("engine_works"); + tycho_util::test::init_logger("engine_works", "info,tycho_consensus=debug"); check_parking_lot(); heart_beat(); diff --git a/network/tests/private_overlay.rs b/network/tests/private_overlay.rs index 2b21cf586..b90be9d8d 100644 --- a/network/tests/private_overlay.rs +++ b/network/tests/private_overlay.rs @@ -83,7 +83,7 @@ fn make_network(node_count: usize) -> Vec { #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn private_overlays_accessible() -> Result<()> { - tycho_util::test::init_logger("private_overlays_accessible"); + tycho_util::test::init_logger("private_overlays_accessible", "debug"); let nodes = make_network(20); diff --git a/network/tests/public_overlay.rs b/network/tests/public_overlay.rs index 8a52f4851..a822fcc8f 100644 --- a/network/tests/public_overlay.rs +++ b/network/tests/public_overlay.rs @@ -76,7 +76,7 @@ fn make_network(node_count: usize) -> Vec { #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn public_overlays_accessible() -> Result<()> { - tycho_util::test::init_logger("public_overlays_accessible"); + tycho_util::test::init_logger("public_overlays_accessible", "debug"); #[derive(Debug, Default)] struct PeerState { diff --git a/util/src/test/logger.rs b/util/src/test/logger.rs index f7a0279a9..1499d3dcf 100644 --- a/util/src/test/logger.rs +++ b/util/src/test/logger.rs @@ -1,6 +1,8 @@ -pub fn init_logger(test_name: &str) { +use tracing_subscriber::EnvFilter; + +pub fn init_logger(test_name: &str, filter: &str) { tracing_subscriber::fmt() - .with_env_filter(tracing_subscriber::EnvFilter::new("debug")) + .with_env_filter(EnvFilter::try_new(filter).expect("tracing directives")) .try_init() .ok(); From f692760615ef550f59d53d3dcfe7611c713de1e3 Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Wed, 1 May 2024 10:56:09 +0200 Subject: [PATCH 20/32] fix(mempool-adapter): make mempool adapter buildable --- collator/src/mempool/mempool_adapter.rs | 6 +- collator/src/mempool/mempool_adapter_std.rs | 99 ++++++++++----------- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index c8250ba9d..ed069a4b5 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -17,7 +17,7 @@ use tycho_network::{DhtClient, OverlayService, PeerId}; use tycho_util::FastDashMap; use crate::mempool::types::ExternalMessage; -use crate::mempool::{MempoolAdapter, MempoolAnchor, MempoolAnchorId}; +use crate::mempool::{MempoolAdapter, MempoolAnchor, MempoolAnchorId, MempoolEventListener}; use crate::tracing_targets; pub struct MempoolAdapterImpl { @@ -117,6 +117,10 @@ pub async fn parse_points( #[async_trait] impl MempoolAdapter for MempoolAdapterImpl { + fn create(listener: Arc) -> Self { + todo!() + } + async fn enqueue_process_new_mc_block_state( &self, mc_state: Arc, diff --git a/collator/src/mempool/mempool_adapter_std.rs b/collator/src/mempool/mempool_adapter_std.rs index 658c07917..7a65633e1 100644 --- a/collator/src/mempool/mempool_adapter_std.rs +++ b/collator/src/mempool/mempool_adapter_std.rs @@ -43,6 +43,9 @@ pub(crate) trait MempoolEventListener: Send + Sync { #[async_trait] pub(crate) trait MempoolAdapter: Send + Sync + 'static { + /// Create an adapter, that connects to mempool then starts to listen mempool for new anchors, + /// and handles requests to mempool from the collation process + fn create(listener: Arc) -> Self; /// Schedule task to process new master block state (may perform gc or nodes rotation) async fn enqueue_process_new_mc_block_state( &self, @@ -78,6 +81,52 @@ pub struct MempoolAdapterStdImpl { #[async_trait] impl MempoolAdapter for MempoolAdapterStdImpl { + fn create(listener: Arc) -> Self { + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); + + //TODO: make real implementation, currently runs stub task + // that produces the repeating set of anchors + let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); + + tokio::spawn({ + let listener = listener.clone(); + let stub_anchors_cache = stub_anchors_cache.clone(); + async move { + let mut anchor_id = 0; + loop { + let rnd_round_interval = rand::thread_rng().gen_range(400..600); + tokio::time::sleep(tokio::time::Duration::from_millis(rnd_round_interval * 6)) + .await; + anchor_id += 1; + let anchor = _stub_create_random_anchor_with_stub_externals(anchor_id); + { + let mut anchor_cache_rw = stub_anchors_cache + .write() + .map_err(|e| anyhow!("Poison error on write lock: {:?}", e)) + .unwrap(); + tracing::debug!( + target: tracing_targets::MEMPOOL_ADAPTER, + "Random anchor (id: {}, chain_time: {}, externals: {}) added to cache", + anchor.id(), + anchor.chain_time(), + anchor.externals_count(), + ); + anchor_cache_rw.insert(anchor_id, anchor.clone()); + } + listener.on_new_anchor(anchor).await.unwrap(); + } + } + }); + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Stub anchors generator started"); + + tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); + + Self { + listener, + _stub_anchors_cache: stub_anchors_cache, + } + } + async fn enqueue_process_new_mc_block_state( &self, mc_state: Arc, @@ -216,53 +265,3 @@ fn _stub_create_random_anchor_with_stub_externals( Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) } - -impl MempoolAdapterStdImpl { - fn new( - listener: Arc, - ) -> Self { - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); - - //TODO: make real implementation, currently runs stub task - // that produces the repeating set of anchors - let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); - - tokio::spawn({ - let listener = listener.clone(); - let stub_anchors_cache = stub_anchors_cache.clone(); - async move { - let mut anchor_id = 0; - loop { - let rnd_round_interval = rand::thread_rng().gen_range(400..600); - tokio::time::sleep(tokio::time::Duration::from_millis(rnd_round_interval * 6)) - .await; - anchor_id += 1; - let anchor = _stub_create_random_anchor_with_stub_externals(anchor_id); - { - let mut anchor_cache_rw = stub_anchors_cache - .write() - .map_err(|e| anyhow!("Poison error on write lock: {:?}", e)) - .unwrap(); - tracing::debug!( - target: tracing_targets::MEMPOOL_ADAPTER, - "Random anchor (id: {}, chain_time: {}, externals: {}) added to cache", - anchor.id(), - anchor.chain_time(), - anchor.externals_count(), - ); - anchor_cache_rw.insert(anchor_id, anchor.clone()); - } - listener.on_new_anchor(anchor).await.unwrap(); - } - } - }); - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Stub anchors generator started"); - - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); - - Self { - listener, - _stub_anchors_cache: stub_anchors_cache, - } - } -} From d655a0efb08bf3ba48be558c558a815588b8c1b0 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Wed, 1 May 2024 20:51:00 +0300 Subject: [PATCH 21/32] chore(consensus): actualize todos --- consensus/src/dag/anchor_stage.rs | 16 ++--- consensus/src/dag/dag.rs | 19 +++--- consensus/src/dag/dag_round.rs | 9 +-- consensus/src/dag/producer.rs | 1 - consensus/src/dag/verifier.rs | 2 +- consensus/src/engine/engine.rs | 12 +++- .../intercom/broadcast/broadcast_filter.rs | 46 ++++++--------- .../src/intercom/broadcast/broadcaster.rs | 2 +- consensus/src/intercom/core/dto.rs | 51 ++++++++++------ consensus/src/intercom/core/responder.rs | 59 ++++++++----------- consensus/src/models/dag_point.rs | 1 - 11 files changed, 107 insertions(+), 111 deletions(-) diff --git a/consensus/src/dag/anchor_stage.rs b/consensus/src/dag/anchor_stage.rs index 7a92c523d..adbfdfd33 100644 --- a/consensus/src/dag/anchor_stage.rs +++ b/consensus/src/dag/anchor_stage.rs @@ -9,17 +9,10 @@ use crate::models::Round; #[derive(Debug)] pub enum AnchorStage { - Candidate(PeerId), // TODO nothing special, remove /// if anchor is locally committed then it must be marked as used (and vice versa) - Proof { - leader: PeerId, - is_used: AtomicBool, - }, + Proof { leader: PeerId, is_used: AtomicBool }, /// trigger is not necessary used - proof may be included by the next anchor and its own trigger - Trigger { - leader: PeerId, - is_used: AtomicBool, - }, + Trigger { leader: PeerId, is_used: AtomicBool }, } impl AnchorStage { @@ -43,8 +36,9 @@ impl AnchorStage { return None; }; match round.0 % WAVE_SIZE { - 0 => None, // both genesis and trailing (proof inclusion) round - 1 => Some(AnchorStage::Candidate(leader.clone())), + // 0 is a leaderless support round (that actually follows every leader point chain) + // 1 is an anchor candidate (surprisingly, nothing special about this point) + 0 | 1 => None, 2 => Some(AnchorStage::Proof { leader: leader.clone(), is_used: AtomicBool::new(false), diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index dabf22a20..167a3b89b 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -46,15 +46,21 @@ impl Dag { top } - // fixme must not be async + // Note: cannot be non-async, as we cannot use only InclusionState: + // some committed point may be DagPoint::Suspicious thus not the first validated locally /// result is in historical order pub async fn commit(self, next_dag_round: DagRound) -> Vec<(Arc, Vec>)> { + // TODO finding the latest trigger must not take long, better try later + // than wait long for some DagPoint::NotFound, slowing down whole Engine let Some(latest_trigger) = Self::latest_trigger(&next_dag_round).await else { return Vec::new(); }; + // when we have a valid trigger, its every point of it's subdag is validated successfully let mut anchor_stack = Self::anchor_stack(&latest_trigger, next_dag_round.clone()).await; let mut ordered = Vec::new(); while let Some((anchor, anchor_round)) = anchor_stack.pop() { + // Note every next "little anchor candidate that could" must have at least full dag depth + // Note if sync is implemented as a second sub-graph - drop up to the last linked in chain self.drop_tail(anchor.point.body.location.round); let committed = Self::gather_uncommitted(&anchor.point, &anchor_round).await; ordered.push((anchor.point, committed)); @@ -80,8 +86,8 @@ impl Dag { futs.push(version.clone()) } }); - // FIXME traversing the DAG must not be async: we need the way to determine completed tasks - // its sufficient to use only ready futures at this point, must ignore downloading tasks + // Fixme We may take any first completed valid point, but we should not wait long; + // can we determine the trigger some other way, maybe inside Collector? while let Some((found, _)) = futs.next().await { if let Some(valid) = found.into_valid() { _ = latest_trigger.insert(valid); @@ -172,11 +178,9 @@ impl Dag { anchor_stack } - // TODO the next "little anchor candidate that could" must have at least full dag depth fn drop_tail(&self, anchor_at: Round) { if let Some(tail) = anchor_at.0.checked_sub(MempoolConfig::COMMIT_DEPTH as u32) { let mut rounds = self.rounds.lock(); - // TODO if sync is implemented as a second sub-graph - drop up to last linked *rounds = rounds.split_off(&Round(tail)); }; } @@ -207,15 +211,14 @@ impl Dag { let mut uncommitted = Vec::new(); - // TODO visited rounds count must be equal to dag depth: - // read/download non-existent rounds and drop too old ones while let Some(vertex_round /* r-1 */) = proof_round .prev() .get() .filter(|_| !r.iter().all(BTreeMap::is_empty)) { // take points @ r+0, and select their vertices @ r-1 for commit - // the order is of NodeId (public key) TODO shuffle deterministically, eg with anchor digest as a seed + // the order is of NodeId (public key) + // TODO shuffle deterministically, eg with anchor digest as a seed while let Some((node, digest)) = &r[0].pop_first() { // Every point must be valid (we've validated anchor dependencies already), // but some points don't have previous one to proof as vertex. diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 875cfb7f5..084aca0c8 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -185,8 +185,8 @@ impl DagRound { }) } - // Todo leave for genesis, use for own points in tests - pub async fn insert_exact_validate( + /// for genesis and own points + pub async fn insert_exact_sign( &self, point: &Arc, peer_schedule: &PeerSchedule, @@ -199,10 +199,7 @@ impl DagRound { if point.valid().is_none() { panic!("Coding error: not a valid point") } - let Some(state) = self.insert_exact(&point) else { - return None; - }; - let state = state.await; + let state = self.insert_exact(&point)?.await; if let Some(signable) = state.signable() { signable.sign( self.round(), diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index 80a9a15c6..e0acaed30 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -9,7 +9,6 @@ use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; use crate::models::{Link, Location, Point, PointBody, PrevPoint, Round, Through, UnixTime}; -// FIXME make it PointBuilder pub struct Producer; impl Producer { diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 64a3d1484..38df53792 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -79,7 +79,7 @@ impl Verifier { // existence of proofs in leader points is a part of point's well-form-ness check match &dag_round.anchor_stage() { // no one may link to self - None | Some(AnchorStage::Candidate(_)) => { + None => { (point.body.anchor_proof != Link::ToSelf && point.body.anchor_trigger != Link::ToSelf) || point.body.location.round == MempoolConfig::GENESIS_ROUND diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index acd6f4a88..dfd51d79b 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -8,6 +8,7 @@ use tokio::task::JoinSet; use tycho_network::{DhtClient, OverlayService, PeerId}; use crate::dag::{Dag, DagRound, InclusionState, Producer}; +use crate::engine::MempoolConfig; use crate::intercom::{ BroadcastFilter, Broadcaster, BroadcasterSignal, Collector, CollectorSignal, Dispatcher, Downloader, PeerSchedule, PeerScheduleUpdater, Responder, Uploader, @@ -59,6 +60,13 @@ impl Engine { ); let genesis = Arc::new(crate::test_utils::genesis()); + // check only genesis round as it is widely used in point validation. + // if some nodes use distinct genesis data, their first points will be rejected + assert_eq!( + genesis.body.location.round, + MempoolConfig::GENESIS_ROUND, + "genesis point round must match genesis round from config" + ); let peer_schedule_updater = PeerScheduleUpdater::new(dispatcher.overlay.clone(), peer_schedule.clone()); // finished epoch @@ -94,7 +102,7 @@ impl Engine { let downloader = Downloader::new(log_id.clone(), &dispatcher, &peer_schedule); let genesis_state = current_dag_round - .insert_exact_validate(&genesis, &peer_schedule, &downloader) + .insert_exact_sign(&genesis, &peer_schedule, &downloader) .await; let collector = Collector::new( log_id.clone(), @@ -135,7 +143,7 @@ impl Engine { Producer::new_point(¤t_dag_round, prev_point.as_ref(), vec![]).await { let state = current_dag_round - .insert_exact_validate(&own_point, &peer_schedule, &downloader) + .insert_exact_sign(&own_point, &peer_schedule, &downloader) .await .expect("own produced point must be valid"); own_point_state.send(state).ok(); diff --git a/consensus/src/intercom/broadcast/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs index 674055374..27d2b7fab 100644 --- a/consensus/src/intercom/broadcast/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -2,7 +2,6 @@ use std::collections::BTreeMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; -use itertools::Itertools; use tokio::sync::broadcast::error::RecvError; use tokio::sync::mpsc; @@ -71,20 +70,14 @@ struct BroadcastFilterInner { last_by_peer: FastDashMap, // very much like DAG structure, but without dependency check; // just to determine reliably that consensus advanced without current node - by_round: FastDashMap< - Round, - ( - NodeCount, - BTreeMap>, - ), - >, + by_round: FastDashMap>>)>, current_dag_round: AtomicU32, peer_schedule: Arc, output: mpsc::UnboundedSender, } impl BroadcastFilterInner { - // TODO logic is doubtful because of contradiction in requirements: + // Note logic still under consideration because of contradiction in requirements: // * we must determine the latest consensus round reliably: // the current approach is to collect 1/3+1 points at the same future round // => we should collect as much points as possible @@ -103,19 +96,17 @@ impl BroadcastFilterInner { tracing::debug!("{local_id} @ {dag_round:?} filter <= bcaster {author:.4?} @ {round:?}"); - // conceal raw point, do not use it - let point = match Verifier::verify(&point, &self.peer_schedule) { - Ok(()) => ConsensusEvent::Verified(point), - Err(dag_point) => { - tracing::error!( - "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ - Invalid {point:.4?}" - ); - ConsensusEvent::Invalid(dag_point) - } - }; + let verified = Verifier::verify(&point, &self.peer_schedule); + if verified.is_err() { + tracing::error!( + "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ + Invalid {point:.4?}" + ); + } if round <= dag_round { - self.output.send(point).ok(); + let event = + verified.map_or_else(ConsensusEvent::Invalid, |_| ConsensusEvent::Verified(point)); + self.output.send(event).ok(); return; } // else: either consensus moved forward without us, // or we shouldn't accept the point yet, or it's a spam @@ -131,7 +122,7 @@ impl BroadcastFilterInner { .or_insert(round) > round { - // TODO we should ban a peer that broadcasts its rounds out of order, + // we should ban a peer that broadcasts its rounds out of order, // though we cannot prove this decision for other nodes tracing::error!( "{local_id} @ {dag_round:?} filter => bcaster {author:.4?} @ {round:?} : \ @@ -139,6 +130,9 @@ impl BroadcastFilterInner { ); return; }; + if verified.is_err() { + return; // do not determine next round by garbage points; it's a ban + } match self.by_round.entry(round).or_try_insert_with(|| { // how many nodes should send broadcasts NodeCount::try_from(self.peer_schedule.peers_for(&round).len()) @@ -150,7 +144,7 @@ impl BroadcastFilterInner { } Ok(mut entry) => { let (node_count, ref mut same_round) = entry.value_mut(); - // TODO ban the author, if we detect equivocation now; we won't be able to prove it + // ban the author, if we detect equivocation now; we won't be able to prove it // if some signatures are invalid (it's another reason for a local ban) same_round.entry(author).or_default().insert(digest, point); if same_round.len() < node_count.reliable_minority() { @@ -176,7 +170,7 @@ impl BroadcastFilterInner { }) .ok(); // map entry is not used by filter anymore - for event in self + for point in self .by_round .remove(&round) .into_iter() @@ -184,10 +178,8 @@ impl BroadcastFilterInner { .flatten() .map(|(_, v)| v.into_iter().map(|(_, v)| v)) .flatten() - // inside actually only (in) valid points by the same round - .sorted_by(ConsensusEvent::priority) { - self.output.send(event).ok(); + self.output.send(ConsensusEvent::Verified(point)).ok(); } } // TODO there must be some config value - when node needs to sync; diff --git a/consensus/src/intercom/broadcast/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs index c2af75146..b4a45cbd3 100644 --- a/consensus/src/intercom/broadcast/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -41,7 +41,7 @@ pub struct Broadcaster { // results rejections: FastHashSet, signatures: FastHashMap, - // TODO move generic logic close to dispatcher, also check DownloadTask + bcast_request: tycho_network::Request, bcast_peers: FastHashSet, bcast_futs: FuturesUnordered>, diff --git a/consensus/src/intercom/core/dto.rs b/consensus/src/intercom/core/dto.rs index f1299fc4e..3926d4961 100644 --- a/consensus/src/intercom/core/dto.rs +++ b/consensus/src/intercom/core/dto.rs @@ -2,23 +2,12 @@ use anyhow::anyhow; use bytes::Bytes; use serde::{Deserialize, Serialize}; -use tycho_network::Version; +use tycho_network::{Response, ServiceRequest, Version}; use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; use crate::models::{Point, PointId, Round}; -#[derive(Serialize, Deserialize, Debug)] -pub enum MPQueryResult { - Ok(MPResponse), - Err(String), -} - -#[derive(Serialize, Deserialize, Debug)] -pub enum MPQuery { - PointById(PointId), - Signature(Round), -} - +// broadcast uses simple send_message with () return value impl From<&Point> for tycho_network::Request { fn from(value: &Point) -> Self { tycho_network::Request { @@ -28,6 +17,12 @@ impl From<&Point> for tycho_network::Request { } } +#[derive(Serialize, Deserialize, Debug)] +pub enum MPQuery { + PointById(PointId), + Signature(Round), +} + impl From<&MPQuery> for tycho_network::Request { // TODO: move MPRequest et al to TL - won't need to copy Point fn from(value: &MPQuery) -> Self { @@ -38,23 +33,43 @@ impl From<&MPQuery> for tycho_network::Request { } } +impl TryFrom<&ServiceRequest> for MPQuery { + type Error = anyhow::Error; + + fn try_from(request: &ServiceRequest) -> Result { + Ok(bincode::deserialize::(&request.body)?) + } +} + #[derive(Serialize, Deserialize, Debug)] pub enum MPResponse { PointById(PointByIdResponse), Signature(SignatureResponse), } -impl TryFrom<&tycho_network::Response> for MPResponse { +impl TryFrom<&MPResponse> for Response { type Error = anyhow::Error; - fn try_from(response: &tycho_network::Response) -> Result { - match bincode::deserialize::(&response.body) { - Ok(MPQueryResult::Ok(response)) => Ok(response), - Ok(MPQueryResult::Err(e)) => Err(anyhow::Error::msg(e)), + fn try_from(value: &MPResponse) -> Result { + let body = Bytes::from(bincode::serialize(value)?); + Ok(Response { + version: Version::default(), + body, + }) + } +} + +impl TryFrom<&Response> for MPResponse { + type Error = anyhow::Error; + + fn try_from(response: &Response) -> Result { + match bincode::deserialize::(&response.body) { + Ok(response) => Ok(response), Err(e) => Err(anyhow!("failed to deserialize: {e:?}")), } } } + impl TryFrom for PointByIdResponse { type Error = anyhow::Error; diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index 8d2e23ff5..ba9d65851 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -1,12 +1,11 @@ use std::sync::Arc; -use bytes::Bytes; use tokio::sync::{mpsc, oneshot}; -use tycho_network::{PeerId, Response, Service, ServiceRequest, Version}; +use tycho_network::{PeerId, Response, Service, ServiceRequest}; use tycho_util::futures::BoxFutureOrNoop; -use crate::intercom::core::dto::{MPQuery, MPQueryResult, MPResponse}; +use crate::intercom::core::dto::{MPQuery, MPResponse}; use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; use crate::intercom::BroadcastFilter; use crate::models::{Point, PointId, Round, Ugly}; @@ -65,32 +64,27 @@ struct ResponderInner { impl ResponderInner { async fn handle_query(self: Arc, req: ServiceRequest) -> Option { - let body = match bincode::deserialize::(&req.body) { - Ok(body) => body, - Err(e) => { - tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id); - // malformed request is a reason to ignore it - return None; - } - }; + let body = MPQuery::try_from(&req) + .inspect_err(|e| { + tracing::error!("unexpected request from {:?}: {e:?}", req.metadata.peer_id) + }) + .ok()?; // malformed request is a reason to ignore it let response = match body { MPQuery::PointById(point_id) => { let (tx, rx) = oneshot::channel(); self.uploads.send((point_id.clone(), tx)).ok(); - match rx.await { - Ok(response) => { - tracing::debug!( - "{} upload to {:.4?} : {:?} {}", - self.log_id, - req.metadata.peer_id, - point_id.ugly(), - response.0.as_ref().map_or("not found", |_| "ok"), - ); - MPResponse::PointById(response) - } - Err(e) => panic!("Responder point by id await of request failed: {e}"), - } + let response = rx + .await // not recoverable, must be avoided, thus panic + .expect("Responder point by id await of request failed"); + tracing::debug!( + "{} upload to {:.4?} : {:?} {}", + self.log_id, + req.metadata.peer_id, + point_id.ugly(), + response.0.as_ref().map_or("not found", |_| "ok"), + ); + MPResponse::PointById(response) } MPQuery::Signature(round) => { let (tx, rx) = oneshot::channel(); @@ -100,6 +94,7 @@ impl ResponderInner { match rx.await { Ok(response) => MPResponse::Signature(response), Err(e) => { + // it's a recoverable error let response = SignatureResponse::TryLater; tracing::error!( "{} responder => collector {:.4?} @ {round:?} : \ @@ -113,17 +108,11 @@ impl ResponderInner { } }; - Some(Response { - version: Version::default(), - body: Bytes::from(match bincode::serialize(&MPQueryResult::Ok(response)) { - Ok(data) => data, - Err(e) => { - tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata); - bincode::serialize(&MPQueryResult::Err("internal error".to_string())) - .expect("must not fail") - } - }), - }) + Response::try_from(&response) + .inspect_err(|e| { + tracing::error!("failed to serialize response to {:?}: {e:?}", req.metadata) + }) + .ok() } fn handle_broadcast(self: Arc, req: ServiceRequest) { diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs index 0bd54fb99..56e26c858 100644 --- a/consensus/src/models/dag_point.rs +++ b/consensus/src/models/dag_point.rs @@ -20,7 +20,6 @@ impl ValidPoint { #[derive(Clone, Debug)] pub enum DagPoint { - // FIXME time skew is determined at the moment of signature response and is not reentrant /// valid without demur, needed to blame equivocation or graph connectivity violations Trusted(ValidPoint), /// is a valid container, but we doubt author's fairness at the moment of validating; From 0a9be77a0d51f268c2192772d17d9fb98499481b Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Thu, 2 May 2024 08:34:52 +0200 Subject: [PATCH 22/32] chore(mempool-adapter): cargo.lock --- Cargo.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a0c94bcd8..7ff7ad7d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2177,6 +2177,7 @@ dependencies = [ "everscale-types", "futures-util", "log", + "parking_lot", "rand", "sha2", "tempfile", @@ -2187,6 +2188,7 @@ dependencies = [ "tracing-subscriber", "tracing-test", "tycho-block-util", + "tycho-consensus", "tycho-core", "tycho-network", "tycho-storage", From 560b2e53e0673cd6f3a6e6057ba50ee96d5e34ee Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Thu, 2 May 2024 14:37:31 +0200 Subject: [PATCH 23/32] chore(mempool-adapter): Mempool cleaning + refactoring --- collator/src/manager/collation_processor.rs | 2 +- collator/src/mempool/mempool_adapter.rs | 94 +++++++++---------- collator/src/mempool/mempool_adapter_std.rs | 4 +- .../mempool/tests/mempool_adapter_tests.rs | 4 +- collator/tests/collation_tests.rs | 4 +- consensus/src/dag/dag.rs | 17 +++- consensus/src/engine/engine.rs | 14 ++- 7 files changed, 74 insertions(+), 65 deletions(-) diff --git a/collator/src/manager/collation_processor.rs b/collator/src/manager/collation_processor.rs index fda94cb01..dd33f12af 100644 --- a/collator/src/manager/collation_processor.rs +++ b/collator/src/manager/collation_processor.rs @@ -749,7 +749,7 @@ where //TODO: in current implementation CollationProcessor should not notify mempool // about one master block more than once, but better to handle repeated request here or at mempool mpool_adapter - .enqueue_process_new_mc_block_state(mc_state) + .enqueue_process_new_mc_block_state(Arc::new(mc_state)) .await } diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index ed069a4b5..4fe18490a 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -35,10 +35,10 @@ impl MempoolAdapterImpl { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); let anchors = Arc::new(RwLock::new(BTreeMap::new())); - let (tx, rx) = tokio::sync::mpsc::unbounded_channel::, Vec>)>>(); + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<(Arc, Vec>)>(); let engine = - tycho_consensus::Engine::new(&secret_key, &dht_client, &overlay_service, &peers, tx) + tycho_consensus::Engine::new(&secret_key, &dht_client, &overlay_service, &peers, sender) .await; tokio::spawn(async move { engine.run() }); @@ -48,7 +48,7 @@ impl MempoolAdapterImpl { let mempool_adapter = Arc::new(Self { anchors }); //start handling mempool anchors - tokio::spawn(parse_points(mempool_adapter.clone(), rx)); + tokio::spawn(parse_points(mempool_adapter.clone(), receiver)); mempool_adapter } @@ -61,57 +61,55 @@ impl MempoolAdapterImpl { pub async fn parse_points( adapter: Arc, - mut rx: UnboundedReceiver, Vec>)>>, + mut rx: UnboundedReceiver<(Arc, Vec>)>, ) { - while let Some(commited) = rx.recv().await { - commited.into_iter().for_each(|(anchor, points)| { - let mut external_messages = HashMap::::new(); - - for point in points { - 'message: for message in &point.body.payload { - let cell = match Boc::decode(message) { - Ok(cell) => cell, - Err(e) => { - tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to deserialize bytes into cell. Error: {e:?}"); //TODO: should handle errors properly? - continue 'message; - } - }; - - let mut slice = match cell.as_slice() { - Ok(slice) => slice, - Err(e) => { - tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to make slice from cell. Error: {e:?}"); - continue 'message; - } - }; - - let ext_in_message = match ExtInMsgInfo::load_from(&mut slice) { - Ok(message) => message, - Err(e) => { - tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Bad cell. Failed to deserialize to ExtInMsgInfo. Err: {e:?}"); - continue 'message; - } - }; - - let external_message = ExternalMessage::new(cell.clone(), ext_in_message ); - external_messages.insert(*cell.repr_hash(), external_message); + while let Some((anchor, points)) = rx.recv().await { + let mut external_messages = HashMap::::new(); + + for point in points { + 'message: for message in &point.body.payload { + let cell = match Boc::decode(message) { + Ok(cell) => cell, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to deserialize bytes into cell. Error: {e:?}"); //TODO: should handle errors properly? + continue 'message; + } + }; + + let mut slice = match cell.as_slice() { + Ok(slice) => slice, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to make slice from cell. Error: {e:?}"); + continue 'message; + } + }; + + let ext_in_message = match ExtInMsgInfo::load_from(&mut slice) { + Ok(message) => message, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Bad cell. Failed to deserialize to ExtInMsgInfo. Err: {e:?}"); + continue 'message; + } + }; + + let external_message = ExternalMessage::new(cell.clone(), ext_in_message ); + external_messages.insert(*cell.repr_hash(), external_message); - } } + } - let messages = external_messages - .into_iter() - .map(|m| Arc::new(m.1)) - .collect::>(); + let messages = external_messages + .into_iter() + .map(|m| Arc::new(m.1)) + .collect::>(); - let anchor = Arc::new(MempoolAnchor::new( - anchor.body.location.round.0, - anchor.body.time.as_u64(), - messages - )); + let anchor = Arc::new(MempoolAnchor::new( + anchor.body.location.round.0, + anchor.body.time.as_u64(), + messages + )); - adapter.add_anchor(anchor); - }) + adapter.add_anchor(anchor); } } diff --git a/collator/src/mempool/mempool_adapter_std.rs b/collator/src/mempool/mempool_adapter_std.rs index 7a65633e1..8fbf52e2f 100644 --- a/collator/src/mempool/mempool_adapter_std.rs +++ b/collator/src/mempool/mempool_adapter_std.rs @@ -73,14 +73,14 @@ pub(crate) trait MempoolAdapter: Send + Sync + 'static { async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; } -pub struct MempoolAdapterStdImpl { +pub struct MempoolAdapterStubImpl { listener: Arc, _stub_anchors_cache: Arc>>>, } #[async_trait] -impl MempoolAdapter for MempoolAdapterStdImpl { +impl MempoolAdapter for MempoolAdapterStubImpl { fn create(listener: Arc) -> Self { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); diff --git a/collator/src/mempool/tests/mempool_adapter_tests.rs b/collator/src/mempool/tests/mempool_adapter_tests.rs index 0d1360eee..2cceee8d1 100644 --- a/collator/src/mempool/tests/mempool_adapter_tests.rs +++ b/collator/src/mempool/tests/mempool_adapter_tests.rs @@ -5,7 +5,7 @@ use async_trait::async_trait; use crate::{mempool::MempoolAnchor, test_utils::try_init_test_tracing}; -use super::{MempoolAdapter, MempoolAdapterStdImpl, MempoolEventListener}; +use super::{MempoolAdapter, MempoolAdapterStubImpl, MempoolEventListener}; struct MempoolEventStubListener; #[async_trait] @@ -25,7 +25,7 @@ impl MempoolEventListener for MempoolEventStubListener { async fn test_stub_anchors_generator() -> Result<()> { try_init_test_tracing(tracing_subscriber::filter::LevelFilter::TRACE); - let adapter = MempoolAdapterStdImpl::create(Arc::new(MempoolEventStubListener {})); + let adapter = MempoolAdapterStubImpl::create(Arc::new(MempoolEventStubListener {})); // try get not existing anchor by id let opt_anchor = adapter.get_anchor_by_id(10).await?; diff --git a/collator/tests/collation_tests.rs b/collator/tests/collation_tests.rs index 39e19cd1e..50c23b836 100644 --- a/collator/tests/collation_tests.rs +++ b/collator/tests/collation_tests.rs @@ -4,7 +4,7 @@ use tycho_block_util::state::MinRefMcStateTracker; use tycho_collator::test_utils::prepare_test_storage; use tycho_collator::{ manager::CollationManager, - mempool::{MempoolAdapterBuilder, MempoolAdapterBuilderStdImpl, MempoolAdapterStdImpl}, + mempool::{MempoolAdapterBuilder, MempoolAdapterBuilderStdImpl, MempoolAdapterStubImpl}, state_node::{StateNodeAdapterBuilder, StateNodeAdapterBuilderStdImpl}, test_utils::try_init_test_tracing, types::CollationConfig, @@ -35,7 +35,7 @@ async fn test_collation_process_on_stubs() { block_strider.run().await.unwrap(); - let mpool_adapter_builder = MempoolAdapterBuilderStdImpl::::new(); + let mpool_adapter_builder = MempoolAdapterBuilderStdImpl::::new(); let state_node_adapter_builder = StateNodeAdapterBuilderStdImpl::new(storage.clone()); let mut rnd = rand::thread_rng(); diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index 167a3b89b..fdd3582c2 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -4,13 +4,15 @@ use std::sync::Arc; use futures_util::stream::FuturesUnordered; use futures_util::StreamExt; +use itertools::Itertools; use parking_lot::Mutex; +use tokio::sync::mpsc::UnboundedSender; use crate::dag::anchor_stage::AnchorStage; use crate::dag::DagRound; use crate::engine::MempoolConfig; use crate::intercom::PeerSchedule; -use crate::models::{Point, Round, ValidPoint}; +use crate::models::{Point, Round, Ugly, ValidPoint}; #[derive(Clone)] pub struct Dag { @@ -49,7 +51,11 @@ impl Dag { // Note: cannot be non-async, as we cannot use only InclusionState: // some committed point may be DagPoint::Suspicious thus not the first validated locally /// result is in historical order - pub async fn commit(self, next_dag_round: DagRound) -> Vec<(Arc, Vec>)> { + pub async fn commit( + self, + next_dag_round: DagRound, + commit_sender: UnboundedSender<(Arc, Vec>)>, + ) -> Vec<(Arc, Vec>)> { // TODO finding the latest trigger must not take long, better try later // than wait long for some DagPoint::NotFound, slowing down whole Engine let Some(latest_trigger) = Self::latest_trigger(&next_dag_round).await else { @@ -65,6 +71,13 @@ impl Dag { let committed = Self::gather_uncommitted(&anchor.point, &anchor_round).await; ordered.push((anchor.point, committed)); } + + ordered.iter().for_each(|x| { + if let Err(e) = commit_sender.send(x.clone()) { + tracing::error!("Failed to send anchor commit message tp mpsc channel. Err: {e:?}"); //TODO: handle error properly + } + }); + ordered } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index a262c61ab..fbd70e5b7 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -26,7 +26,7 @@ pub struct Engine { broadcast_filter: BroadcastFilter, top_dag_round_watch: watch::Sender, tasks: JoinSet<()>, // should be JoinSet - tx: UnboundedSender, Vec>)>> + committed_anchors_sender: UnboundedSender<(Arc, Vec>)> } impl Engine { @@ -35,7 +35,7 @@ impl Engine { dht_client: &DhtClient, overlay_service: &OverlayService, peers: &Vec, - tx: UnboundedSender, Vec>)>> + tx: UnboundedSender<(Arc, Vec>)> ) -> Self { let key_pair = KeyPair::from(secret_key); @@ -127,7 +127,7 @@ impl Engine { broadcast_filter, top_dag_round_watch: top_dag_round_tx, tasks, - tx + committed_anchors_sender: tx } } @@ -204,7 +204,9 @@ impl Engine { collector_signal_rx, )); - let commit_run = tokio::spawn(self.dag.clone().commit(next_dag_round.clone())); + let commit_run = tokio::spawn( + self.dag.clone().commit(next_dag_round.clone(), self.committed_anchors_sender.clone()) + ); let bcast_filter_upd = { let bcast_filter = self.broadcast_filter.clone(); let round = current_dag_round.round().clone(); @@ -220,10 +222,6 @@ impl Engine { match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { (Ok(collector_upd), Ok(new_prev_point), Ok(committed), Ok(_bcast_filter_upd)) => { - if let Err(e) = self.tx.send(committed.clone()) { - tracing::error!("Failed tp send anchor commit message tp mpsc channel. Err: {e:?}"); - } - Self::log_committed(&self.log_id, ¤t_dag_round, &committed); prev_point = new_prev_point; produce_own_point = next_dag_round.round() == collector_upd.next_round(); From ca6c70c85390ca9a2e9327207da45d110369b719 Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Thu, 2 May 2024 15:05:36 +0200 Subject: [PATCH 24/32] chore(mempool-adapter): Refactoring after merging master to feature/mempool_adapter --- collator/src/mempool/mempool_adapter.rs | 62 +++++++++++-- ...adapter_std.rs => mempool_adapter_stub.rs} | 87 ++++--------------- collator/src/mempool/mod.rs | 2 +- .../mempool/tests/mempool_adapter_tests.rs | 4 +- 4 files changed, 76 insertions(+), 79 deletions(-) rename collator/src/mempool/{mempool_adapter_std.rs => mempool_adapter_stub.rs} (74%) diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index 4fe18490a..3e9979f7d 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -17,9 +17,64 @@ use tycho_network::{DhtClient, OverlayService, PeerId}; use tycho_util::FastDashMap; use crate::mempool::types::ExternalMessage; -use crate::mempool::{MempoolAdapter, MempoolAnchor, MempoolAnchorId, MempoolEventListener}; +use crate::mempool::{MempoolAnchor, MempoolAnchorId}; use crate::tracing_targets; +pub trait MempoolAdapterFactory { + type Adapter: MempoolAdapter; + + fn create(&self, listener: Arc) -> Self::Adapter; +} + +impl MempoolAdapterFactory for F + where + F: Fn(Arc) -> R, + R: MempoolAdapter, +{ + type Adapter = R; + + fn create(&self, listener: Arc) -> Self::Adapter { + self(listener) + } +} + +// EVENTS LISTENER + +#[async_trait] +pub trait MempoolEventListener: Send + Sync { + /// Process new anchor from mempool + async fn on_new_anchor(&self, anchor: Arc) -> Result<()>; +} + +// ADAPTER + +#[async_trait] +pub trait MempoolAdapter: Send + Sync + 'static { + /// Schedule task to process new master block state (may perform gc or nodes rotation) + async fn enqueue_process_new_mc_block_state(&self, mc_state: ShardStateStuff) -> Result<()>; + + /// Request, await, and return anchor from connected mempool by id. + /// Return None if the requested anchor does not exist. + /// + /// (TODO) Cache anchor to handle similar request from collator of another shard + async fn get_anchor_by_id( + &self, + anchor_id: MempoolAnchorId, + ) -> Result>>; + + /// Request, await, and return the next anchor after the specified previous one. + /// If anchor was not produced yet then await until mempool does this. + /// + /// (TODO) ? Should return Error if mempool does not reply fro a long timeout + async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result>; + + /// Clean cache from all anchors that before specified. + /// We can do this for anchors that processed in blocks + /// which included in signed master - we do not need them anymore + async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; +} + + pub struct MempoolAdapterImpl { //TODO: replace with rocksdb anchors: Arc>>>, @@ -115,13 +170,10 @@ pub async fn parse_points( #[async_trait] impl MempoolAdapter for MempoolAdapterImpl { - fn create(listener: Arc) -> Self { - todo!() - } async fn enqueue_process_new_mc_block_state( &self, - mc_state: Arc, + mc_state: ShardStateStuff, ) -> Result<()> { //TODO: make real implementation, currently does nothing tracing::info!( diff --git a/collator/src/mempool/mempool_adapter_std.rs b/collator/src/mempool/mempool_adapter_stub.rs similarity index 74% rename from collator/src/mempool/mempool_adapter_std.rs rename to collator/src/mempool/mempool_adapter_stub.rs index 8fbf52e2f..d34d37da7 100644 --- a/collator/src/mempool/mempool_adapter_std.rs +++ b/collator/src/mempool/mempool_adapter_stub.rs @@ -5,7 +5,6 @@ use std::{ use anyhow::{anyhow, Result}; use async_trait::async_trait; -use everscale_crypto::ed25519::SecretKey; use everscale_types::{ cell::{CellBuilder, CellSliceRange, HashBytes}, @@ -13,10 +12,9 @@ use everscale_types::{ }; use rand::Rng; use tycho_block_util::state::ShardStateStuff; -use tycho_network::{DhtClient, OverlayService, PeerId}; +use crate::mempool::{MempoolAdapter, MempoolEventListener}; use crate::tracing_targets; -use crate::validator::types::OverlayNumber; use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; @@ -24,54 +22,7 @@ use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; #[path = "tests/mempool_adapter_tests.rs"] pub(super) mod tests; -// EVENTS EMITTER AMD LISTENER - -//TODO: remove emitter -#[async_trait] -pub(crate) trait MempoolEventEmitter { - /// When mempool produced new committed anchor - async fn on_new_anchor_event(&self, anchor: Arc); -} - -#[async_trait] -pub(crate) trait MempoolEventListener: Send + Sync { - /// Process new anchor from mempool - async fn on_new_anchor(&self, anchor: Arc) -> Result<()>; -} - -// ADAPTER - -#[async_trait] -pub(crate) trait MempoolAdapter: Send + Sync + 'static { - /// Create an adapter, that connects to mempool then starts to listen mempool for new anchors, - /// and handles requests to mempool from the collation process - fn create(listener: Arc) -> Self; - /// Schedule task to process new master block state (may perform gc or nodes rotation) - async fn enqueue_process_new_mc_block_state( - &self, - mc_state: Arc, - ) -> Result<()>; - - /// Request, await, and return anchor from connected mempool by id. - /// Return None if the requested anchor does not exist. - /// - /// (TODO) Cache anchor to handle similar request from collator of another shard - async fn get_anchor_by_id( - &self, - anchor_id: MempoolAnchorId, - ) -> Result>>; - - /// Request, await, and return the next anchor after the specified previous one. - /// If anchor was not produced yet then await until mempool does this. - /// - /// (TODO) ? Should return Error if mempool does not reply fro a long timeout - async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result>; - - /// Clean cache from all anchors that before specified. - /// We can do this for anchors that processed in blocks - /// which included in signed master - we do not need them anymore - async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; -} +// FACTORY pub struct MempoolAdapterStubImpl { listener: Arc, @@ -79,9 +30,8 @@ pub struct MempoolAdapterStubImpl { _stub_anchors_cache: Arc>>>, } -#[async_trait] -impl MempoolAdapter for MempoolAdapterStubImpl { - fn create(listener: Arc) -> Self { +impl MempoolAdapterStubImpl { + pub fn new(listener: Arc) -> Self { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); //TODO: make real implementation, currently runs stub task @@ -126,11 +76,11 @@ impl MempoolAdapter for MempoolAdapterStubImpl { _stub_anchors_cache: stub_anchors_cache, } } +} - async fn enqueue_process_new_mc_block_state( - &self, - mc_state: Arc, - ) -> Result<()> { +#[async_trait] +impl MempoolAdapter for MempoolAdapterStubImpl { + async fn enqueue_process_new_mc_block_state(&self, mc_state: ShardStateStuff) -> Result<()> { //TODO: make real implementation, currently does nothing tracing::info!( target: tracing_targets::MEMPOOL_ADAPTER, @@ -239,11 +189,8 @@ impl MempoolAdapter for MempoolAdapterStubImpl { fn _stub_create_random_anchor_with_stub_externals( anchor_id: MempoolAnchorId, ) -> Arc { - let chain_time = std::time::SystemTime::now() - .duration_since(std::time::SystemTime::UNIX_EPOCH) - .unwrap() - .as_millis() as u64; - let externals_count: i32 = rand::thread_rng().gen_range(-10..10).max(0); + let chain_time = anchor_id as u64 * 471 * 6 % 1000000000; + let externals_count = chain_time as i32 % 10; let mut externals = vec![]; for i in 0..externals_count { let rand_addr = (0..32).map(|_| rand::random::()).collect::>(); @@ -253,15 +200,13 @@ fn _stub_create_random_anchor_with_stub_externals( msg_cell_builder.store_u64(chain_time).unwrap(); msg_cell_builder.store_u32(i as u32).unwrap(); let msg_cell = msg_cell_builder.build().unwrap(); - let msg = ExternalMessage::new( - msg_cell, - ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }, - ); + let msg_cell_range = CellSliceRange::full(&*msg_cell); + let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { + dst: IntAddr::Std(StdAddr::new(0, rand_addr)), + ..Default::default() + }); externals.push(Arc::new(msg)); } Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) -} +} \ No newline at end of file diff --git a/collator/src/mempool/mod.rs b/collator/src/mempool/mod.rs index b97d5c136..71e561120 100644 --- a/collator/src/mempool/mod.rs +++ b/collator/src/mempool/mod.rs @@ -1,6 +1,6 @@ mod mempool_adapter; mod types; -mod mempool_adapter_std; +mod mempool_adapter_stub; pub use mempool_adapter::*; pub(crate) use types::{MempoolAnchor, MempoolAnchorId}; diff --git a/collator/src/mempool/tests/mempool_adapter_tests.rs b/collator/src/mempool/tests/mempool_adapter_tests.rs index b2dfed069..185773af5 100644 --- a/collator/src/mempool/tests/mempool_adapter_tests.rs +++ b/collator/src/mempool/tests/mempool_adapter_tests.rs @@ -5,7 +5,7 @@ use async_trait::async_trait; use crate::{mempool::MempoolAnchor, test_utils::try_init_test_tracing}; -use super::{MempoolAdapter, MempoolAdapterStdImpl, MempoolEventListener}; +use super::{MempoolAdapter, MempoolAdapterStubImpl, MempoolEventListener}; struct MempoolEventStubListener; #[async_trait] @@ -25,7 +25,7 @@ impl MempoolEventListener for MempoolEventStubListener { async fn test_stub_anchors_generator() -> Result<()> { try_init_test_tracing(tracing_subscriber::filter::LevelFilter::TRACE); - let adapter = MempoolAdapterStdImpl::new(Arc::new(MempoolEventStubListener {})); + let adapter = MempoolAdapterStubImpl::new(Arc::new(MempoolEventStubListener {})); // try get not existing anchor by id let opt_anchor = adapter.get_anchor_by_id(10).await?; From 3c8b9ce9563d3b6eca9e333c406ea0253966628c Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Thu, 2 May 2024 17:45:06 +0300 Subject: [PATCH 25/32] feat(consensus): cli --- Cargo.lock | 4 + Cargo.toml | 1 + collator/src/mempool/mempool_adapter.rs | 40 ++- collator/src/mempool/mempool_adapter_stub.rs | 15 +- collator/src/mempool/mod.rs | 2 +- collator/src/mempool/types.rs | 2 +- consensus/Cargo.toml | 12 +- consensus/examples/consensus_node.rs | 242 +++++++++++++++++++ consensus/src/dag/dag.rs | 46 +++- consensus/src/engine/engine.rs | 56 ++--- consensus/src/lib.rs | 5 +- consensus/src/test_utils.rs | 99 +++++--- simulator/Cargo.toml | 2 +- 13 files changed, 409 insertions(+), 117 deletions(-) create mode 100644 consensus/examples/consensus_node.rs diff --git a/Cargo.lock b/Cargo.lock index eb7c189f1..9da4679ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2202,17 +2202,21 @@ dependencies = [ "anyhow", "bincode", "bytes", + "clap", "dashmap", "everscale-crypto", "futures-util", + "hex", "itertools", "parking_lot", "rand", "rand_pcg", "serde", + "serde_json", "sha2", "tokio", "tracing", + "tracing-appender", "tracing-subscriber", "tycho-network", "tycho-storage", diff --git a/Cargo.toml b/Cargo.toml index 7f95dd9ce..08b642cab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ pkcs8 = "0.10" quick_cache = "0.4.1" quinn = { version = "0.10", default-features = false, features = ["runtime-tokio", "tls-rustls"] } rand = "0.8" +rand_pcg = { version = "0.3" } rcgen = "0.11" ring = "0.16" rlimit = "0.10.1" diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index 3e9979f7d..b0af0042a 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -27,9 +27,9 @@ pub trait MempoolAdapterFactory { } impl MempoolAdapterFactory for F - where - F: Fn(Arc) -> R, - R: MempoolAdapter, +where + F: Fn(Arc) -> R, + R: MempoolAdapter, { type Adapter = R; @@ -74,7 +74,6 @@ pub trait MempoolAdapter: Send + Sync + 'static { async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; } - pub struct MempoolAdapterImpl { //TODO: replace with rocksdb anchors: Arc>>>, @@ -90,11 +89,17 @@ impl MempoolAdapterImpl { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); let anchors = Arc::new(RwLock::new(BTreeMap::new())); - let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<(Arc, Vec>)>(); + let (sender, receiver) = + tokio::sync::mpsc::unbounded_channel::<(Arc, Vec>)>(); - let engine = - tycho_consensus::Engine::new(&secret_key, &dht_client, &overlay_service, &peers, sender) - .await; + let engine = tycho_consensus::Engine::new( + &secret_key, + &dht_client, + &overlay_service, + &peers, + sender, + ) + .await; tokio::spawn(async move { engine.run() }); @@ -147,9 +152,8 @@ pub async fn parse_points( } }; - let external_message = ExternalMessage::new(cell.clone(), ext_in_message ); + let external_message = ExternalMessage::new(cell.clone(), ext_in_message); external_messages.insert(*cell.repr_hash(), external_message); - } } @@ -161,7 +165,7 @@ pub async fn parse_points( let anchor = Arc::new(MempoolAnchor::new( anchor.body.location.round.0, anchor.body.time.as_u64(), - messages + messages, )); adapter.add_anchor(anchor); @@ -170,11 +174,7 @@ pub async fn parse_points( #[async_trait] impl MempoolAdapter for MempoolAdapterImpl { - - async fn enqueue_process_new_mc_block_state( - &self, - mc_state: ShardStateStuff, - ) -> Result<()> { + async fn enqueue_process_new_mc_block_state(&self, mc_state: ShardStateStuff) -> Result<()> { //TODO: make real implementation, currently does nothing tracing::info!( target: tracing_targets::MEMPOOL_ADAPTER, @@ -222,9 +222,7 @@ impl MempoolAdapter for MempoolAdapterImpl { let mut request_timer = std::time::Instant::now(); loop { { - let anchors_cache_r = self - .anchors - .read(); + let anchors_cache_r = self.anchors.read(); let mut range = anchors_cache_r.range(( std::ops::Bound::Excluded(prev_anchor_id), @@ -268,9 +266,7 @@ impl MempoolAdapter for MempoolAdapterImpl { } async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()> { - let mut anchors_cache_rw = self - .anchors - .write(); + let mut anchors_cache_rw = self.anchors.write(); anchors_cache_rw.retain(|anchor_id, _| anchor_id >= &before_anchor_id); Ok(()) diff --git a/collator/src/mempool/mempool_adapter_stub.rs b/collator/src/mempool/mempool_adapter_stub.rs index d34d37da7..1ed74d9b6 100644 --- a/collator/src/mempool/mempool_adapter_stub.rs +++ b/collator/src/mempool/mempool_adapter_stub.rs @@ -6,13 +6,13 @@ use std::{ use anyhow::{anyhow, Result}; use async_trait::async_trait; +use crate::mempool::{MempoolAdapter, MempoolEventListener}; use everscale_types::{ cell::{CellBuilder, CellSliceRange, HashBytes}, models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}, }; use rand::Rng; use tycho_block_util::state::ShardStateStuff; -use crate::mempool::{MempoolAdapter, MempoolEventListener}; use crate::tracing_targets; @@ -201,12 +201,15 @@ fn _stub_create_random_anchor_with_stub_externals( msg_cell_builder.store_u32(i as u32).unwrap(); let msg_cell = msg_cell_builder.build().unwrap(); let msg_cell_range = CellSliceRange::full(&*msg_cell); - let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }); + let msg = ExternalMessage::new( + msg_cell, + ExtInMsgInfo { + dst: IntAddr::Std(StdAddr::new(0, rand_addr)), + ..Default::default() + }, + ); externals.push(Arc::new(msg)); } Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) -} \ No newline at end of file +} diff --git a/collator/src/mempool/mod.rs b/collator/src/mempool/mod.rs index 71e561120..b09349d0b 100644 --- a/collator/src/mempool/mod.rs +++ b/collator/src/mempool/mod.rs @@ -1,6 +1,6 @@ mod mempool_adapter; -mod types; mod mempool_adapter_stub; +mod types; pub use mempool_adapter::*; pub(crate) use types::{MempoolAnchor, MempoolAnchorId}; diff --git a/collator/src/mempool/types.rs b/collator/src/mempool/types.rs index cf29ceb48..502a2713e 100644 --- a/collator/src/mempool/types.rs +++ b/collator/src/mempool/types.rs @@ -9,7 +9,7 @@ pub type MempoolAnchorId = u32; pub(crate) struct ExternalMessage { message_cell: Cell, - message_info: ExtInMsgInfo + message_info: ExtInMsgInfo, } impl ExternalMessage { diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index dbfcea814..7cd028c52 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -8,6 +8,10 @@ rust-version.workspace = true repository.workspace = true license.workspace = true +[[example]] +name = "consensus-node" +path = "examples/consensus_node.rs" + [dependencies] ahash = { workspace = true } anyhow = { workspace = true } @@ -19,14 +23,20 @@ futures-util = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } rand = { workspace = true, features = ["small_rng"] } +rand_pcg = { workspace = true } serde = { workspace = true, features = ["derive"] } sha2 = { workspace = true } tokio = { workspace = true, default-features = false } tracing = { workspace = true } weedb = { workspace = true } +# examples' dependencies +clap = { workspace = true } +hex = { workspace = true } +serde_json = { workspace = true } +tracing-appender = { workspace = true } + # local deps -rand_pcg = { version = "0.3" } tycho-network = { workspace = true } tycho-storage = { workspace = true } tycho-util = { workspace = true, features = ["test"] } diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs new file mode 100644 index 000000000..069ae7a69 --- /dev/null +++ b/consensus/examples/consensus_node.rs @@ -0,0 +1,242 @@ +//! Run tests with this env: +//! ```text +//! RUST_LOG=info,tycho_consensus=debug +//! ``` + +use std::io::IsTerminal; +use std::net::SocketAddr; +use std::sync::Arc; + +use anyhow::Result; +use clap::{Parser, Subcommand}; +use everscale_crypto::ed25519; +use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::{fmt, EnvFilter, Layer}; + +use tycho_consensus::test_utils::drain_anchors; +use tycho_consensus::Engine; +use tycho_network::{DhtConfig, NetworkConfig, PeerId, PeerInfo}; +use tycho_util::time::now_sec; + +#[tokio::main] +async fn main() -> Result<()> { + Cli::parse().run().await +} + +/// Tycho network node. +#[derive(Parser)] +struct Cli { + #[clap(subcommand)] + cmd: Cmd, +} + +impl Cli { + async fn run(self) -> Result<()> { + let enable_persistent_logs = std::env::var("TYCHO_PERSISTENT_LOGS").is_ok(); + + let collector = tracing_subscriber::registry().with( + fmt::Layer::new() + .with_ansi(std::io::stdout().is_terminal()) + .compact() + .with_writer(std::io::stdout) + .with_filter(EnvFilter::from_default_env()), + ); + + if enable_persistent_logs { + let file_appender = tracing_appender::rolling::hourly("logs", "tycho-consensus"); + let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender); + + let collector = collector.with( + fmt::Layer::new() + .with_ansi(false) + .compact() + .with_writer(non_blocking) + .with_filter(EnvFilter::new("trace")), //todo: update with needed crates + ); + tracing::subscriber::set_global_default(collector)?; + } else { + tracing::subscriber::set_global_default(collector)?; + }; + + match self.cmd { + Cmd::Run(cmd) => cmd.run().await, + Cmd::GenKey(cmd) => cmd.run(), + Cmd::GenDht(cmd) => cmd.run(), + } + } +} + +#[derive(Subcommand)] +enum Cmd { + Run(CmdRun), + GenKey(CmdGenKey), + GenDht(CmdGenDht), +} + +/// run a node +#[derive(Parser)] +struct CmdRun { + /// local node address + addr: SocketAddr, + + /// node secret key + #[clap(long)] + key: String, + + /// path to the node config + #[clap(long)] + config: Option, + + /// path to the global config + #[clap(long)] + global_config: String, +} + +impl CmdRun { + async fn run(self) -> Result<()> { + let node_config = self + .config + .map(NodeConfig::from_file) + .transpose()? + .unwrap_or_default(); + let global_config = GlobalConfig::from_file(self.global_config)?; + + let secret_key = parse_key(&self.key)?; + + let (dht_client, overlay) = tycho_consensus::test_utils::from_validator( + self.addr, + &secret_key, + node_config.dht, + node_config.network, + ); + + let all_peers = global_config + .bootstrap_peers + .iter() + .map(|info| info.id) + .collect::>(); + + let mut initial_peer_count = 0usize; + for peer in global_config.bootstrap_peers { + let is_new = dht_client.add_peer(Arc::new(peer))?; + initial_peer_count += is_new as usize; + } + + let (committed_tx, committed_rx) = mpsc::unbounded_channel(); + let engine = + Engine::new(&secret_key, &dht_client, &overlay, &all_peers, committed_tx).await; + drain_anchors(committed_rx); + + tracing::info!( + local_id = %dht_client.network().peer_id(), + addr = %self.addr, + initial_peer_count, + "node started" + ); + + tokio::spawn(engine.run()); + + futures_util::future::pending().await + } +} + +/// generate a key +#[derive(Parser)] +struct CmdGenKey {} + +impl CmdGenKey { + fn run(self) -> Result<()> { + let secret_key = ed25519::SecretKey::generate(&mut rand::thread_rng()); + let public_key = ed25519::PublicKey::from(&secret_key); + let peer_id = PeerId::from(public_key); + + let data = serde_json::json!({ + "key": hex::encode(secret_key.as_bytes()), + "peer_id": peer_id.to_string(), + }); + let output = if std::io::stdin().is_terminal() { + serde_json::to_string_pretty(&data) + } else { + serde_json::to_string(&data) + }?; + println!("{output}"); + Ok(()) + } +} + +/// generate a dht node info +#[derive(Parser)] +struct CmdGenDht { + /// local node address + addr: SocketAddr, + + /// node secret key + #[clap(long)] + key: String, + + /// time to live in seconds (default: unlimited) + #[clap(long)] + ttl: Option, +} + +impl CmdGenDht { + fn run(self) -> Result<()> { + let entry = tycho_consensus::test_utils::make_peer_info( + &parse_key(&self.key)?, + self.addr.into(), + self.ttl, + ); + let output = if std::io::stdin().is_terminal() { + serde_json::to_string_pretty(&entry) + } else { + serde_json::to_string(&entry) + }?; + println!("{output}"); + Ok(()) + } +} + +#[derive(Serialize, Deserialize)] +struct GlobalConfig { + bootstrap_peers: Vec, +} + +impl GlobalConfig { + fn from_file(path: impl AsRef) -> Result { + let config: Self = { + let data = std::fs::read_to_string(path.as_ref())?; + serde_json::from_str(&data)? + }; + + let now = now_sec(); + for peer in &config.bootstrap_peers { + anyhow::ensure!(peer.is_valid(now), "invalid peer info for {}", peer.id); + } + + Ok(config) + } +} + +#[derive(Default, Serialize, Deserialize)] +#[serde(default)] +struct NodeConfig { + network: NetworkConfig, + dht: DhtConfig, +} + +impl NodeConfig { + fn from_file(path: impl AsRef) -> Result { + let data = std::fs::read_to_string(path.as_ref())?; + let config = serde_json::from_str(&data)?; + Ok(config) + } +} + +fn parse_key(key: &str) -> Result { + match hex::decode(key)?.try_into() { + Ok(bytes) => Ok(ed25519::SecretKey::from_bytes(bytes)), + Err(_) => anyhow::bail!("invalid secret key"), + } +} diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index fdd3582c2..9988fd9e6 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -53,13 +53,14 @@ impl Dag { /// result is in historical order pub async fn commit( self, + log_id: Arc, next_dag_round: DagRound, - commit_sender: UnboundedSender<(Arc, Vec>)>, - ) -> Vec<(Arc, Vec>)> { + committed: UnboundedSender<(Arc, Vec>)>, + ) { // TODO finding the latest trigger must not take long, better try later // than wait long for some DagPoint::NotFound, slowing down whole Engine let Some(latest_trigger) = Self::latest_trigger(&next_dag_round).await else { - return Vec::new(); + return; }; // when we have a valid trigger, its every point of it's subdag is validated successfully let mut anchor_stack = Self::anchor_stack(&latest_trigger, next_dag_round.clone()).await; @@ -72,13 +73,13 @@ impl Dag { ordered.push((anchor.point, committed)); } - ordered.iter().for_each(|x| { - if let Err(e) = commit_sender.send(x.clone()) { - tracing::error!("Failed to send anchor commit message tp mpsc channel. Err: {e:?}"); //TODO: handle error properly - } - }); + Self::log_committed(&log_id, next_dag_round.round().prev(), &ordered); - ordered + for anchor_with_history in ordered { + committed + .send(anchor_with_history) // not recoverable + .expect("Failed to send anchor commit message tp mpsc channel"); + } } async fn latest_trigger(next_round: &DagRound) -> Option { @@ -271,4 +272,31 @@ impl Dag { uncommitted.reverse(); uncommitted } + + fn log_committed( + log_id: &str, + current_round: Round, + committed: &Vec<(Arc, Vec>)>, + ) { + if committed.is_empty() { + return; + } + if tracing::enabled!(tracing::Level::INFO) { + let committed = committed + .into_iter() + .map(|(anchor, history)| { + let history = history + .iter() + .map(|point| format!("{:?}", point.id().ugly())) + .join(", "); + format!( + "anchor {:?} time {} : [ {history} ]", + anchor.id().ugly(), + anchor.body.time + ) + }) + .join(" ; "); + tracing::info!("{log_id} @ {current_round:?} committed {committed}"); + } + } } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index fbd70e5b7..e7a963308 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; use itertools::Itertools; +use tokio::sync::mpsc::UnboundedSender; use tokio::sync::{mpsc, oneshot, watch}; -use tokio::sync::mpsc::{Sender, UnboundedSender}; use tokio::task::JoinSet; use tycho_network::{DhtClient, OverlayService, PeerId}; @@ -26,7 +26,7 @@ pub struct Engine { broadcast_filter: BroadcastFilter, top_dag_round_watch: watch::Sender, tasks: JoinSet<()>, // should be JoinSet - committed_anchors_sender: UnboundedSender<(Arc, Vec>)> + committed: UnboundedSender<(Arc, Vec>)>, } impl Engine { @@ -35,8 +35,7 @@ impl Engine { dht_client: &DhtClient, overlay_service: &OverlayService, peers: &Vec, - tx: UnboundedSender<(Arc, Vec>)> - + committed: UnboundedSender<(Arc, Vec>)>, ) -> Self { let key_pair = KeyPair::from(secret_key); let log_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); @@ -127,7 +126,7 @@ impl Engine { broadcast_filter, top_dag_round_watch: top_dag_round_tx, tasks, - committed_anchors_sender: tx + committed, } } @@ -191,6 +190,7 @@ impl Engine { // let this channel unbounded - there won't be many items, but every of them is essential let (collector_signal_tx, collector_signal_rx) = mpsc::unbounded_channel(); let (own_point_state_tx, own_point_state_rx) = oneshot::channel(); + let bcaster_run = tokio::spawn(Self::bcaster_run( self.log_id.clone(), produce_own_point, @@ -204,10 +204,13 @@ impl Engine { collector_signal_rx, )); - let commit_run = tokio::spawn( - self.dag.clone().commit(next_dag_round.clone(), self.committed_anchors_sender.clone()) - ); - let bcast_filter_upd = { + let commit_run = tokio::spawn(self.dag.clone().commit( + self.log_id.clone(), + next_dag_round.clone(), + self.committed.clone(), + )); + + let bcast_filter_run = { let bcast_filter = self.broadcast_filter.clone(); let round = current_dag_round.round().clone(); tokio::spawn(async move { bcast_filter.advance_round(&round) }) @@ -220,9 +223,8 @@ impl Engine { bcaster_ready_rx, )); - match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_upd) { - (Ok(collector_upd), Ok(new_prev_point), Ok(committed), Ok(_bcast_filter_upd)) => { - Self::log_committed(&self.log_id, ¤t_dag_round, &committed); + match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_run) { + (Ok(collector_upd), Ok(new_prev_point), Ok(()), Ok(())) => { prev_point = new_prev_point; produce_own_point = next_dag_round.round() == collector_upd.next_round(); self.collector = collector_upd; @@ -244,34 +246,4 @@ impl Engine { } } } - - fn log_committed( - log_id: &String, - current_dag_round: &DagRound, - committed: &Vec<(Arc, Vec>)>, - ) { - if committed.is_empty() { - return; - } - if tracing::enabled!(tracing::Level::INFO) { - let committed = committed - .into_iter() - .map(|(anchor, history)| { - let history = history - .iter() - .map(|point| format!("{:?}", point.id().ugly())) - .join(", "); - format!( - "anchor {:?} time {} : [ {history} ]", - anchor.id().ugly(), - anchor.body.time - ) - }) - .join(" ; "); - tracing::info!( - "{log_id} @ {:?} committed {committed}", - current_dag_round.round(), - ); - } - } } diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index 64241b245..894349114 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -3,8 +3,7 @@ pub(crate) mod dag; pub(crate) mod engine; pub(crate) mod intercom; pub(crate) mod models; -pub(crate) mod test_utils; - +pub mod test_utils; pub use engine::Engine; -pub use models::Point; \ No newline at end of file +pub use models::Point; diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 7f636fede..e1a5043bb 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -1,10 +1,15 @@ use std::net::ToSocketAddrs; use std::sync::Arc; -use std::time::Duration; use everscale_crypto::ed25519::{KeyPair, PublicKey, SecretKey}; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio::task::JoinHandle; -use tycho_network::{DhtClient, DhtConfig, DhtService, Network, OverlayService, PeerId, Router}; +use tycho_network::{ + Address, DhtClient, DhtConfig, DhtService, Network, NetworkConfig, OverlayService, PeerId, + PeerInfo, Router, +}; +use tycho_util::time::now_sec; use crate::engine::MempoolConfig; use crate::models::{Link, Location, Point, PointBody, UnixTime}; @@ -31,23 +36,35 @@ pub fn genesis() -> Point { .wrap(&genesis_keys) } +pub fn make_peer_info(key: &SecretKey, address: Address, ttl: Option) -> PeerInfo { + let keypair = KeyPair::from(key); + let peer_id = PeerId::from(keypair.public_key); + + let now = now_sec(); + let mut peer_info = PeerInfo { + id: peer_id, + address_list: vec![address.clone()].into_boxed_slice(), + created_at: now, + expires_at: ttl.unwrap_or(u32::MAX), + signature: Box::new([0; 64]), + }; + *peer_info.signature = keypair.sign(&peer_info); + peer_info +} + // TODO receive configured services from general node, // move current setup to tests as it provides acceptable timing // This dependencies should be passed from validator module to init mempool -fn from_validator( +pub fn from_validator( socket_addr: T, secret_key: &SecretKey, + dht_config: DhtConfig, + network_config: NetworkConfig, ) -> (DhtClient, OverlayService) { let local_id = PeerId::from(PublicKey::from(secret_key)); let (dht_tasks, dht_service) = DhtService::builder(local_id) - .with_config(DhtConfig { - local_info_announce_period: Duration::from_secs(1), - local_info_announce_period_max_jitter: Duration::from_secs(1), - routing_table_refresh_period: Duration::from_secs(1), - routing_table_refresh_period_max_jitter: Duration::from_secs(1), - ..Default::default() - }) + .with_config(dht_config) .build(); let (overlay_tasks, overlay_service) = OverlayService::builder(local_id) @@ -60,6 +77,7 @@ fn from_validator( .build(); let network = Network::builder() + .with_config(network_config) .with_private_key(secret_key.to_bytes()) .with_service_name("mempool-test-network-service") .build(socket_addr, router) @@ -71,38 +89,34 @@ fn from_validator( (dht_service.make_client(&network), overlay_service) } +pub fn drain_anchors( + mut committed: UnboundedReceiver<(Arc, Vec>)>, +) -> JoinHandle<()> { + tokio::spawn(async move { + loop { + _ = committed + .recv() + .await + .expect("committed anchor reader must be alive"); + } + }) +} + #[cfg(test)] mod tests { use std::net::Ipv4Addr; + use std::sync::Arc; use std::thread; use std::time::Duration; use parking_lot::deadlock; + use tokio::sync::mpsc; use tokio::task::JoinSet; - use tycho_network::{Address, PeerInfo}; - use tycho_util::time::now_sec; - use crate::engine::Engine; use super::*; - fn make_peer_info(key: &SecretKey, address: Address) -> PeerInfo { - let keypair = KeyPair::from(key); - let peer_id = PeerId::from(keypair.public_key); - - let now = now_sec(); - let mut peer_info = PeerInfo { - id: peer_id, - address_list: vec![address.clone()].into_boxed_slice(), - created_at: now, - expires_at: u32::MAX, - signature: Box::new([0; 64]), - }; - *peer_info.signature = keypair.sign(&peer_info); - peer_info - } - async fn make_network(node_count: usize) -> Vec { let keys = (0..node_count) .map(|_| SecretKey::generate(&mut rand::thread_rng())) @@ -115,7 +129,20 @@ mod tests { let from_validators = keys .iter() - .map(|secret| from_validator((Ipv4Addr::LOCALHOST, 0), secret)) + .map(|secret| { + from_validator( + (Ipv4Addr::LOCALHOST, 0), + secret, + DhtConfig { + local_info_announce_period: Duration::from_secs(1), + local_info_announce_period_max_jitter: Duration::from_secs(1), + routing_table_refresh_period: Duration::from_secs(1), + routing_table_refresh_period_max_jitter: Duration::from_secs(1), + ..Default::default() + }, + NetworkConfig::default(), + ) + }) .collect::>(); let peer_info = std::iter::zip(&keys, &from_validators) @@ -123,6 +150,7 @@ mod tests { Arc::new(make_peer_info( key, dht_client.network().local_addr().into(), + None, )) }) .collect::>(); @@ -138,11 +166,20 @@ mod tests { } } let mut engines = vec![]; + let (committed_tx, committed_rx) = mpsc::unbounded_channel(); for (secret_key, (dht_client, overlay_service)) in keys.iter().zip(from_validators.iter()) { - let engine = Engine::new(secret_key, &dht_client, &overlay_service, &all_peers).await; + let engine = Engine::new( + secret_key, + &dht_client, + &overlay_service, + &all_peers, + committed_tx.clone(), + ) + .await; tracing::info!("created engine {}", dht_client.network().peer_id()); engines.push(engine); } + drain_anchors(committed_rx); engines } diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index e718f8725..809d83e48 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -19,7 +19,7 @@ anyhow = { workspace = true } clap = { workspace = true } hex = { workspace = true } rand = { workspace = true } -serde = { workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } [lints] From 37cfe0879118c928198e279e5597eb0d92fbeb95 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Thu, 9 May 2024 17:14:53 +0300 Subject: [PATCH 26/32] chore: apply rustfmt --- cli/src/node/mod.rs | 8 ++-- collator/src/mempool/mempool_adapter.rs | 11 ++--- collator/src/mempool/mempool_adapter_stub.rs | 37 ++++++-------- consensus/examples/consensus_node.rs | 3 +- consensus/src/dag/anchor_stage.rs | 1 - consensus/src/dag/dag.rs | 5 +- consensus/src/dag/dag_location.rs | 3 +- consensus/src/dag/dag_round.rs | 1 - consensus/src/dag/producer.rs | 1 - consensus/src/dag/verifier.rs | 48 +++++++++++-------- consensus/src/engine/engine.rs | 1 - .../intercom/broadcast/broadcast_filter.rs | 4 +- .../src/intercom/broadcast/broadcaster.rs | 4 +- consensus/src/intercom/broadcast/collector.rs | 5 +- consensus/src/intercom/core/dispatcher.rs | 1 - consensus/src/intercom/core/dto.rs | 1 - consensus/src/intercom/core/responder.rs | 1 - .../src/intercom/dependency/downloader.rs | 1 - consensus/src/intercom/mod.rs | 3 +- .../intercom/peer_schedule/peer_schedule.rs | 19 ++++---- .../peer_schedule/peer_schedule_updater.rs | 1 - consensus/src/models/node_count.rs | 8 ++-- consensus/src/models/point.rs | 1 - consensus/src/test_utils.rs | 4 +- .../src/store/shard_state/store_state_raw.rs | 9 ++-- 25 files changed, 74 insertions(+), 107 deletions(-) diff --git a/cli/src/node/mod.rs b/cli/src/node/mod.rs index 9b5e55e08..f90b474af 100644 --- a/cli/src/node/mod.rs +++ b/cli/src/node/mod.rs @@ -439,14 +439,12 @@ impl Node { let state_storage = self.storage.shard_state_storage(); for state in to_import { - let (handle, status) = handle_storage.create_or_load_handle( - state.block_id(), - BlockMetaData { + let (handle, status) = + handle_storage.create_or_load_handle(state.block_id(), BlockMetaData { is_key_block: true, gen_utime, mc_ref_seqno: 0, - }, - ); + }); let stored = state_storage .store_state(&handle, &state) diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index 1feb65048..90742c341 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -247,13 +247,10 @@ fn _stub_create_random_anchor_with_stub_externals( msg_cell_builder.store_u64(chain_time).unwrap(); msg_cell_builder.store_u32(i as u32).unwrap(); let msg_cell = msg_cell_builder.build().unwrap(); - let msg = ExternalMessage::new( - msg_cell, - ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }, - ); + let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { + dst: IntAddr::Std(StdAddr::new(0, rand_addr)), + ..Default::default() + }); externals.push(Arc::new(msg)); } diff --git a/collator/src/mempool/mempool_adapter_stub.rs b/collator/src/mempool/mempool_adapter_stub.rs index 1ed74d9b6..969930f84 100644 --- a/collator/src/mempool/mempool_adapter_stub.rs +++ b/collator/src/mempool/mempool_adapter_stub.rs @@ -1,22 +1,16 @@ -use std::{ - collections::BTreeMap, - sync::{Arc, RwLock}, -}; +use std::collections::BTreeMap; +use std::sync::{Arc, RwLock}; use anyhow::{anyhow, Result}; use async_trait::async_trait; - -use crate::mempool::{MempoolAdapter, MempoolEventListener}; -use everscale_types::{ - cell::{CellBuilder, CellSliceRange, HashBytes}, - models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}, -}; +use everscale_types::cell::{CellBuilder, CellSliceRange, HashBytes}; +use everscale_types::models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}; use rand::Rng; use tycho_block_util::state::ShardStateStuff; -use crate::tracing_targets; - use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; +use crate::mempool::{MempoolAdapter, MempoolEventListener}; +use crate::tracing_targets; #[cfg(test)] #[path = "tests/mempool_adapter_tests.rs"] @@ -34,7 +28,7 @@ impl MempoolAdapterStubImpl { pub fn new(listener: Arc) -> Self { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); - //TODO: make real implementation, currently runs stub task + // TODO: make real implementation, currently runs stub task // that produces the repeating set of anchors let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); @@ -81,7 +75,7 @@ impl MempoolAdapterStubImpl { #[async_trait] impl MempoolAdapter for MempoolAdapterStubImpl { async fn enqueue_process_new_mc_block_state(&self, mc_state: ShardStateStuff) -> Result<()> { - //TODO: make real implementation, currently does nothing + // TODO: make real implementation, currently does nothing tracing::info!( target: tracing_targets::MEMPOOL_ADAPTER, "STUB: New masterchain state (block_id: {}) processing enqueued to mempool", @@ -94,7 +88,7 @@ impl MempoolAdapter for MempoolAdapterStubImpl { &self, anchor_id: MempoolAnchorId, ) -> Result>> { - //TODO: make real implementation, currently only return anchor from local cache + // TODO: make real implementation, currently only return anchor from local cache let res = { let anchors_cache_r = self ._stub_anchors_cache @@ -124,7 +118,7 @@ impl MempoolAdapter for MempoolAdapterStubImpl { } async fn get_next_anchor(&self, prev_anchor_id: MempoolAnchorId) -> Result> { - //TODO: make real implementation, currently only return anchor from local cache + // TODO: make real implementation, currently only return anchor from local cache let mut stub_first_attempt = true; let mut request_timer = std::time::Instant::now(); @@ -201,13 +195,10 @@ fn _stub_create_random_anchor_with_stub_externals( msg_cell_builder.store_u32(i as u32).unwrap(); let msg_cell = msg_cell_builder.build().unwrap(); let msg_cell_range = CellSliceRange::full(&*msg_cell); - let msg = ExternalMessage::new( - msg_cell, - ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }, - ); + let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { + dst: IntAddr::Std(StdAddr::new(0, rand_addr)), + ..Default::default() + }); externals.push(Arc::new(msg)); } diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs index 069ae7a69..c455ab190 100644 --- a/consensus/examples/consensus_node.rs +++ b/consensus/examples/consensus_node.rs @@ -14,7 +14,6 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::{fmt, EnvFilter, Layer}; - use tycho_consensus::test_utils::drain_anchors; use tycho_consensus::Engine; use tycho_network::{DhtConfig, NetworkConfig, PeerId, PeerInfo}; @@ -53,7 +52,7 @@ impl Cli { .with_ansi(false) .compact() .with_writer(non_blocking) - .with_filter(EnvFilter::new("trace")), //todo: update with needed crates + .with_filter(EnvFilter::new("trace")), // todo: update with needed crates ); tracing::subscriber::set_global_default(collector)?; } else { diff --git a/consensus/src/dag/anchor_stage.rs b/consensus/src/dag/anchor_stage.rs index adbfdfd33..a3b7efcd3 100644 --- a/consensus/src/dag/anchor_stage.rs +++ b/consensus/src/dag/anchor_stage.rs @@ -1,7 +1,6 @@ use std::sync::atomic::AtomicBool; use rand::{Rng, SeedableRng}; - use tycho_network::PeerId; use crate::intercom::PeerSchedule; diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index 9988fd9e6..60a94bcde 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -17,7 +17,6 @@ use crate::models::{Point, Round, Ugly, ValidPoint}; #[derive(Clone)] pub struct Dag { // from the oldest to the current round; newer ones are in the future; - // rounds: Arc>>, } @@ -203,8 +202,8 @@ impl Dag { /// /// Note: at this point there is no way to check if passed point is really an anchor async fn gather_uncommitted( - anchor /* @ r+1 */: &Point, - anchor_round /* r+1 */: &DagRound, + anchor: &Point, // @ r+1 + anchor_round: &DagRound, // r+1 ) -> Vec> { assert_eq!( *anchor_round.round(), diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index c93df8101..5b50c802b 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -5,7 +5,6 @@ use std::sync::{Arc, OnceLock}; use everscale_crypto::ed25519::KeyPair; use futures_util::FutureExt; - use tycho_util::futures::{JoinTask, Shared}; use crate::models::{DagPoint, Digest, Round, Signature, UnixTime, ValidPoint}; @@ -22,7 +21,7 @@ pub struct DagLocation { // was proven by the next point of a node; // even if we marked this point as invalid, consensus may override our decision // and we will have to sync - /* vertex: Option, */ + // vertex: Option, /// We can sign or reject just a single (e.g. first validated) point at the current location; /// other (equivocated) points may be received as includes, witnesses or a proven vertex; /// we have to include signed points @ r+0 & @ r-1 as dependencies in our point @ r+1. diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 084aca0c8..7da9b4575 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -3,7 +3,6 @@ use std::sync::{Arc, Weak}; use everscale_crypto::ed25519::KeyPair; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use tycho_network::PeerId; use tycho_util::FastDashMap; diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index e0acaed30..3c2f702f4 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -2,7 +2,6 @@ use std::collections::BTreeMap; use std::sync::Arc; use bytes::Bytes; - use tycho_network::PeerId; use crate::dag::anchor_stage::AnchorStage; diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 38df53792..8a18d1347 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use futures_util::FutureExt; use tokio::task::JoinSet; - use tycho_network::PeerId; use crate::dag::anchor_stage::AnchorStage; @@ -11,19 +10,17 @@ use crate::engine::MempoolConfig; use crate::intercom::{Downloader, PeerSchedule}; use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, PointId, ValidPoint}; -/* -Note on equivocation. -Detected point equivocation does not invalidate the point, it just - prevents us (as a fair actor) from returning our signature to the author. -Such a point may be included in our next "includes" or "witnesses", - but neither its inclusion nor omitting is required: as we don't - return our signature, our dependencies cannot be validated against it. -Equally, we immediately stop communicating with the equivocating node, - without invalidating any of its points (no matter historical or future). -We will not sign the proof for equivocated point - as we've banned the author on network layer. -Anyway, no more than one of equivocated points may become a vertex. -*/ +// Note on equivocation. +// Detected point equivocation does not invalidate the point, it just +// prevents us (as a fair actor) from returning our signature to the author. +// Such a point may be included in our next "includes" or "witnesses", +// but neither its inclusion nor omitting is required: as we don't +// return our signature, our dependencies cannot be validated against it. +// Equally, we immediately stop communicating with the equivocating node, +// without invalidating any of its points (no matter historical or future). +// We will not sign the proof for equivocated point +// as we've banned the author on network layer. +// Anyway, no more than one of equivocated points may become a vertex. pub struct Verifier; @@ -48,8 +45,8 @@ impl Verifier { /// must be called iff [Self::verify] succeeded pub async fn validate( - point /* @ r+0 */: Arc, - r_0 /* r+0 */: DagRound, + point: Arc, // @ r+0 + r_0: DagRound, // r+0 downloader: Downloader, ) -> DagPoint { // TODO upgrade Weak whenever used to let Dag Round drop if some future hangs up for long @@ -75,7 +72,10 @@ impl Verifier { DagPoint::Trusted(ValidPoint::new(point.clone())) } - fn is_self_links_ok(point /* @ r+0 */: &Point, dag_round /* r+0 */: &DagRound) -> bool { + fn is_self_links_ok( + point: &Point, // @ r+0 + dag_round: &DagRound, // r+0 + ) -> bool { // existence of proofs in leader points is a part of point's well-form-ness check match &dag_round.anchor_stage() { // no one may link to self @@ -181,8 +181,8 @@ impl Verifier { } fn gather_deps( - point /* @ r+0 */: &Point, - r_1 /* r-1 */: &DagRound, + point: &Point, // @ r+0 + r_1: &DagRound, // r-1 downloader: &Downloader, dependencies: &mut JoinSet, ) { @@ -310,7 +310,10 @@ impl Verifier { } /// blame author and every dependent point's author - fn is_list_of_signers_ok(point /* @ r+0 */: &Point, peer_schedule: &PeerSchedule) -> bool { + fn is_list_of_signers_ok( + point: &Point, // @ r+0 + peer_schedule: &PeerSchedule, + ) -> bool { if point.body.location.round == MempoolConfig::GENESIS_ROUND { return true; // all maps are empty for a well-formed genesis } @@ -363,7 +366,10 @@ impl Verifier { } /// blame author and every dependent point's author - fn is_proof_ok(point /* @ r+0 */: &Point, proven: &Point /* @ r-1 */) -> bool { + fn is_proof_ok( + point: &Point, // @ r+0 + proven: &Point, // @ r-1 + ) -> bool { if point.body.location.author != proven.body.location.author { panic!("Coding error: mismatched authors of proof and its vertex") } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index e7a963308..e959b744c 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -5,7 +5,6 @@ use itertools::Itertools; use tokio::sync::mpsc::UnboundedSender; use tokio::sync::{mpsc, oneshot, watch}; use tokio::task::JoinSet; - use tycho_network::{DhtClient, OverlayService, PeerId}; use crate::dag::{Dag, DagRound, InclusionState, Producer}; diff --git a/consensus/src/intercom/broadcast/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs index 27d2b7fab..e958e189a 100644 --- a/consensus/src/intercom/broadcast/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -4,18 +4,16 @@ use std::sync::Arc; use tokio::sync::broadcast::error::RecvError; use tokio::sync::mpsc; - use tycho_network::PeerId; use tycho_util::FastDashMap; +use super::dto::ConsensusEvent; use crate::dag::Verifier; use crate::engine::MempoolConfig; use crate::intercom::dto::PeerState; use crate::intercom::PeerSchedule; use crate::models::{Digest, Location, NodeCount, Point, PointId, Round}; -use super::dto::ConsensusEvent; - #[derive(Clone)] pub struct BroadcastFilter(Arc); diff --git a/consensus/src/intercom/broadcast/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs index b4a45cbd3..39e94be61 100644 --- a/consensus/src/intercom/broadcast/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -4,9 +4,9 @@ use std::sync::Arc; use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; use futures_util::StreamExt; -use tokio::sync::broadcast::{self, error::RecvError}; +use tokio::sync::broadcast::error::RecvError; +use tokio::sync::broadcast::{self}; use tokio::sync::mpsc; - use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; diff --git a/consensus/src/intercom/broadcast/collector.rs b/consensus/src/intercom/broadcast/collector.rs index 50a916988..df29f4b7e 100644 --- a/consensus/src/intercom/broadcast/collector.rs +++ b/consensus/src/intercom/broadcast/collector.rs @@ -6,7 +6,6 @@ use futures_util::future::BoxFuture; use futures_util::stream::FuturesUnordered; use futures_util::{FutureExt, StreamExt}; use tokio::sync::{mpsc, oneshot}; - use tycho_network::PeerId; use tycho_util::FastHashSet; @@ -126,7 +125,7 @@ struct CollectorTask { log_id: Arc, downloader: Downloader, current_round: DagRound, // = r+0 - next_dag_round: DagRound, // = r+1 is always in DAG; contains the keypair to produce point @ r+1 + next_dag_round: DagRound, /* = r+1 is always in DAG; contains the keypair to produce point @ r+1 */ // @ r+0, will become includes in point @ r+1 // needed in order to not include same point twice - as an include and as a witness; @@ -307,7 +306,7 @@ impl CollectorTask { self.includes.push(task) } } - _ => _ = self.current_round.add(&point, &self.downloader), // maybe other's dependency + _ => _ = self.current_round.add(&point, &self.downloader), /* maybe other's dependency */ }, ConsensusEvent::Invalid(dag_point) => { if &dag_point.location().round > self.next_dag_round.round() { diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index 835faa9f1..5c6901ae2 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -1,7 +1,6 @@ use anyhow::{anyhow, Result}; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use tycho_network::{DhtClient, Network, OverlayId, OverlayService, PeerId, PrivateOverlay}; use crate::intercom::core::dto::{MPQuery, MPResponse}; diff --git a/consensus/src/intercom/core/dto.rs b/consensus/src/intercom/core/dto.rs index 3926d4961..17ede7dc6 100644 --- a/consensus/src/intercom/core/dto.rs +++ b/consensus/src/intercom/core/dto.rs @@ -1,7 +1,6 @@ use anyhow::anyhow; use bytes::Bytes; use serde::{Deserialize, Serialize}; - use tycho_network::{Response, ServiceRequest, Version}; use crate::intercom::dto::{PointByIdResponse, SignatureResponse}; diff --git a/consensus/src/intercom/core/responder.rs b/consensus/src/intercom/core/responder.rs index ba9d65851..072af9ecc 100644 --- a/consensus/src/intercom/core/responder.rs +++ b/consensus/src/intercom/core/responder.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; - use tycho_network::{PeerId, Response, Service, ServiceRequest}; use tycho_util::futures::BoxFutureOrNoop; diff --git a/consensus/src/intercom/dependency/downloader.rs b/consensus/src/intercom/dependency/downloader.rs index 0d656eeff..d50ba662b 100644 --- a/consensus/src/intercom/dependency/downloader.rs +++ b/consensus/src/intercom/dependency/downloader.rs @@ -9,7 +9,6 @@ use rand::SeedableRng; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, watch}; use tokio::time::error::Elapsed; - use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; diff --git a/consensus/src/intercom/mod.rs b/consensus/src/intercom/mod.rs index 60d1b6c11..a8f5064b5 100644 --- a/consensus/src/intercom/mod.rs +++ b/consensus/src/intercom/mod.rs @@ -1,5 +1,6 @@ -pub use broadcast::*; pub use core::*; + +pub use broadcast::*; pub use dependency::*; pub use peer_schedule::*; diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 630011e77..2c51d0005 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -6,23 +6,20 @@ use std::sync::Arc; use everscale_crypto::ed25519::KeyPair; use parking_lot::Mutex; use tokio::sync::broadcast; - use tycho_network::{PeerId, PrivateOverlay}; use tycho_util::FastHashSet; use crate::intercom::dto::PeerState; use crate::models::{NodeCount, Round}; -/* - As validators are elected for wall-clock time range, - the round of validator set switch is not known beforehand - and will be determined by the time in anchor vertices: - it must reach some predefined time range, - when the new set is supposed to be online and start to request points, - and a (relatively high) predefined number of support rounds must follow - for the anchor chain to be committed by majority and for the new nodes to gather data. - The switch will occur for validator sets as a whole. -*/ +// As validators are elected for wall-clock time range, +// the round of validator set switch is not known beforehand +// and will be determined by the time in anchor vertices: +// it must reach some predefined time range, +// when the new set is supposed to be online and start to request points, +// and a (relatively high) predefined number of support rounds must follow +// for the anchor chain to be committed by majority and for the new nodes to gather data. +// The switch will occur for validator sets as a whole. #[derive(Clone)] pub struct PeerSchedule { diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs index df039621b..fd2c3d5af 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -5,7 +5,6 @@ use parking_lot::Mutex; use rand::prelude::IteratorRandom; use tokio::sync::broadcast::error::RecvError; use tokio::task::AbortHandle; - use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; use crate::intercom::PeerSchedule; diff --git a/consensus/src/models/node_count.rs b/consensus/src/models/node_count.rs index d2b91a13a..7bedca054 100644 --- a/consensus/src/models/node_count.rs +++ b/consensus/src/models/node_count.rs @@ -57,9 +57,7 @@ impl NodeCount { pub fn reliable_minority(&self) -> usize { self.0 as usize + 1 } - /* - pub fn unreliable(&self) -> usize { - self.0 - } - */ + // pub fn unreliable(&self) -> usize { + // self.0 + // } } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 37df9258c..4a74285da 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -6,7 +6,6 @@ use bytes::Bytes; use everscale_crypto::ed25519::KeyPair; use serde::{Deserialize, Serialize}; use sha2::{Digest as Sha2Digest, Sha256}; - use tycho_network::PeerId; use crate::engine::MempoolConfig; diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 22084634a..35a29ef50 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -4,7 +4,6 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, PublicKey, SecretKey}; use tokio::sync::mpsc::UnboundedReceiver; use tokio::task::JoinHandle; - use tycho_network::{ Address, DhtClient, DhtConfig, DhtService, Network, NetworkConfig, OverlayService, PeerId, PeerInfo, Router, ToSocket, @@ -113,9 +112,8 @@ mod tests { use tokio::sync::mpsc; use tokio::task::JoinSet; - use crate::engine::Engine; - use super::*; + use crate::engine::Engine; async fn make_network(node_count: usize) -> Vec { let keys = (0..node_count) diff --git a/storage/src/store/shard_state/store_state_raw.rs b/storage/src/store/shard_state/store_state_raw.rs index 78cedb954..ebcbed638 100644 --- a/storage/src/store/shard_state/store_state_raw.rs +++ b/storage/src/store/shard_state/store_state_raw.rs @@ -592,12 +592,9 @@ mod test { } tracing::info!("Decompressed the archive"); - let db = Db::open( - current_test_path.join("rocksdb"), - DbConfig { - rocksdb_lru_capacity: ByteSize::mb(256), - }, - )?; + let db = Db::open(current_test_path.join("rocksdb"), DbConfig { + rocksdb_lru_capacity: ByteSize::mb(256), + })?; let file_db = FileDb::new(current_test_path.join("file_db"))?; let cells_storage = CellStorage::new(db.clone(), 100_000_000); From 2bc79c4166a4247753d8ee060d1ff1db1ddb9199 Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Fri, 10 May 2024 11:26:36 +0200 Subject: [PATCH 27/32] fix(mempool-adapter): fix mempool-adapter merge problem, apply rust fmt to code. Slightly rework mempool adapter factory --- Cargo.lock | 227 +++++++++--------- cli/Cargo.toml | 3 + cli/src/node/mod.rs | 9 +- collator/src/manager/mod.rs | 2 +- collator/src/mempool/mempool_adapter.rs | 219 ++++++++++------- collator/src/mempool/mempool_adapter_stub.rs | 7 +- collator/src/mempool/mod.rs | 1 + .../mempool/tests/mempool_adapter_tests.rs | 4 +- collator/src/mempool/types.rs | 4 +- collator/src/types.rs | 9 +- consensus/src/engine/engine.rs | 10 +- consensus/src/intercom/core/dispatcher.rs | 2 +- .../intercom/peer_schedule/peer_schedule.rs | 2 +- .../peer_schedule/peer_schedule_updater.rs | 2 +- 14 files changed, 277 insertions(+), 224 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c49328228..d6e7371a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,47 +41,48 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.13" +version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" [[package]] name = "anstyle-parse" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.2" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" dependencies = [ "anstyle", "windows-sys 0.52.0", @@ -89,9 +90,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" dependencies = [ "backtrace", ] @@ -149,14 +150,14 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] name = "autocfg" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" @@ -181,9 +182,9 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" -version = "0.22.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64ct" @@ -218,7 +219,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -287,9 +288,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.95" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" +checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" dependencies = [ "jobserver", "libc", @@ -353,7 +354,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -364,9 +365,9 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" [[package]] name = "const-oid" @@ -481,7 +482,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -534,9 +535,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "der" @@ -614,7 +615,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -671,9 +672,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -697,7 +698,7 @@ dependencies = [ [[package]] name = "everscale-types" version = "0.1.0-rc.6" -source = "git+https://github.com/broxus/everscale-types.git?branch=tycho#98544f3ef1fde79846d1d9312048241e70374853" +source = "git+https://github.com/broxus/everscale-types.git?branch=tycho#3c9953382a37efab6d19ad9076b5072e043c779b" dependencies = [ "ahash", "base64 0.21.7", @@ -717,11 +718,11 @@ dependencies = [ [[package]] name = "everscale-types-proc" version = "0.1.4" -source = "git+https://github.com/broxus/everscale-types.git?branch=tycho#98544f3ef1fde79846d1d9312048241e70374853" +source = "git+https://github.com/broxus/everscale-types.git?branch=tycho#3c9953382a37efab6d19ad9076b5072e043c779b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -832,7 +833,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -877,9 +878,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", @@ -900,9 +901,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "heck" @@ -984,7 +985,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.6", + "socket2 0.5.7", "tokio", "tower-service", "tracing", @@ -1048,6 +1049,12 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itertools" version = "0.12.1" @@ -1095,9 +1102,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] name = "libloading" @@ -1286,11 +1293,10 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" dependencies = [ - "autocfg", "num-integer", "num-traits", ] @@ -1312,9 +1318,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -1397,7 +1403,7 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" dependencies = [ - "base64 0.22.0", + "base64 0.22.1", "serde", ] @@ -1409,9 +1415,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.9" +version = "2.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "311fb059dee1a7b802f036316d790138c613a4e8b180c822e3925a662e9f0c95" +checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8" dependencies = [ "memchr", "thiserror", @@ -1420,9 +1426,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.9" +version = "2.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73541b156d32197eecda1a4014d7f868fd2bcb3c550d5386087cfba442bf69c" +checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459" dependencies = [ "pest", "pest_generator", @@ -1430,22 +1436,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.9" +version = "2.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35eeed0a3fab112f75165fdc026b3913f4183133f19b49be773ac9ea966e8bd" +checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] name = "pest_meta" -version = "2.7.9" +version = "2.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2adbf29bb9776f28caece835398781ab24435585fe0d4dc1374a61db5accedca" +checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd" dependencies = [ "once_cell", "pest", @@ -1454,9 +1460,9 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", "indexmap", @@ -1479,7 +1485,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -1536,19 +1542,19 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] name = "proc-macro2" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" dependencies = [ "unicode-ident", ] @@ -1643,7 +1649,7 @@ checksum = "055b4e778e8feb9f93c4e439f71dc2156ef13360b432b799e179a8c4cdf0b1d7" dependencies = [ "bytes", "libc", - "socket2 0.5.6", + "socket2 0.5.7", "tracing", "windows-sys 0.48.0", ] @@ -1708,9 +1714,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.1" +version = "11.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" dependencies = [ "bitflags 2.5.0", ] @@ -1851,9 +1857,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -1916,15 +1922,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" +checksum = "092474d1a01ea8278f69e6a358998405fae5b8b963ddaeb2b0b04a128bf1dfb0" [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "scopeguard" @@ -1944,35 +1950,35 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.199" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" +checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.199" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" +checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] name = "serde_json" -version = "1.0.116" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "indexmap", "itoa", @@ -2061,9 +2067,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2128,9 +2134,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.60" +version = "2.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9" dependencies = [ "proc-macro2", "quote", @@ -2151,9 +2157,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.30.11" +version = "0.30.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83" +checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae" dependencies = [ "cfg-if", "core-foundation-sys", @@ -2184,22 +2190,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.59" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" +checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.59" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" +checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -2311,7 +2317,7 @@ dependencies = [ "proc-macro2", "quote", "rustc-hash", - "syn 2.0.60", + "syn 2.0.61", "tl-scheme", ] @@ -2342,7 +2348,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.6", + "socket2 0.5.7", "tokio-macros", "windows-sys 0.48.0", ] @@ -2355,21 +2361,20 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", "futures-sink", "pin-project-lite", "tokio", - "tracing", ] [[package]] @@ -2409,7 +2414,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -2494,7 +2499,7 @@ checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] @@ -2579,7 +2584,7 @@ name = "tycho-cli" version = "0.0.1" dependencies = [ "anyhow", - "base64 0.22.0", + "base64 0.22.1", "clap", "everscale-crypto", "everscale-types", @@ -2704,7 +2709,7 @@ dependencies = [ "ahash", "anyhow", "arc-swap", - "base64 0.22.0", + "base64 0.22.1", "bytes", "castaway", "clap", @@ -2726,7 +2731,7 @@ dependencies = [ "rustls-webpki", "serde", "serde_json", - "socket2 0.5.6", + "socket2 0.5.7", "thiserror", "tl-proto", "tokio", @@ -2758,7 +2763,7 @@ version = "0.0.1" dependencies = [ "anyhow", "arc-swap", - "base64 0.22.0", + "base64 0.22.1", "bumpalo", "bytes", "bytesize", @@ -2948,7 +2953,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", "wasm-bindgen-shared", ] @@ -2970,7 +2975,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3211,22 +3216,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.61", ] [[package]] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ca0dd06ef..377fcce7c 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -47,6 +47,9 @@ tycho-network = { workspace = true } tycho-storage = { workspace = true } tycho-util = { workspace = true } +[dev-dependencies] +tycho-collator = { workspace = true, features = ["test"] } + [build-dependencies] anyhow = { workspace = true } rustc_version = { workspace = true } diff --git a/cli/src/node/mod.rs b/cli/src/node/mod.rs index f90b474af..ed71ab163 100644 --- a/cli/src/node/mod.rs +++ b/cli/src/node/mod.rs @@ -12,7 +12,7 @@ use futures_util::future::BoxFuture; use tycho_block_util::state::{MinRefMcStateTracker, ShardStateStuff}; use tycho_collator::collator::CollatorStdImplFactory; use tycho_collator::manager::CollationManager; -use tycho_collator::mempool::MempoolAdapterStdImpl; +use tycho_collator::mempool::MempoolAdapterfactoryStd; use tycho_collator::msg_queue::MessageQueueAdapterStdImpl; use tycho_collator::state_node::{StateNodeAdapter, StateNodeAdapterStdImpl}; use tycho_collator::types::{CollationConfig, ValidatorNetwork}; @@ -486,14 +486,17 @@ impl Node { supported_block_version: 50, supported_capabilities: supported_capabilities(), max_collate_threads: 1, - test_validators_keypairs: vec![], }; let collation_manager = CollationManager::start( collation_config, Arc::new(MessageQueueAdapterStdImpl::default()), |listener| StateNodeAdapterStdImpl::new(listener, self.storage.clone()), - MempoolAdapterStdImpl::new, + MempoolAdapterfactoryStd::new( + self.keypair.clone(), + self.dht_client.clone(), + self.overlay_service.clone(), + ), ValidatorStdImplFactory { network: ValidatorNetwork { overlay_service: self.overlay_service.clone(), diff --git a/collator/src/manager/mod.rs b/collator/src/manager/mod.rs index 9dc9f337d..64075707f 100644 --- a/collator/src/manager/mod.rs +++ b/collator/src/manager/mod.rs @@ -223,7 +223,7 @@ where Arc::new(state_node_adapter_factory.create(arc_dispatcher.clone())); // create mempool adapter - let mpool_adapter = Arc::new(mpool_adapter_factory.create(arc_dispatcher.clone())); + let mpool_adapter = mpool_adapter_factory.create(arc_dispatcher.clone()); // create validator and start its tasks queue let validator = validator_factory.create(ValidatorContext { diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index 90742c341..ed74222f6 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -1,14 +1,21 @@ -use std::collections::BTreeMap; -use std::sync::{Arc, RwLock}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::Result; use async_trait::async_trait; -use everscale_types::cell::{CellBuilder, CellSliceRange, HashBytes}; -use everscale_types::models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}; -use rand::Rng; +use everscale_crypto::ed25519::KeyPair; +use everscale_types::boc::Boc; +use everscale_types::cell::HashBytes; +use everscale_types::models::ExtInMsgInfo; +use everscale_types::prelude::Load; +use parking_lot::RwLock; +use tokio::sync::mpsc::UnboundedReceiver; use tycho_block_util::state::ShardStateStuff; +use tycho_consensus::Point; +use tycho_network::{DhtClient, OverlayService}; -use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; +use crate::mempool::types::ExternalMessage; +use crate::mempool::{MempoolAnchor, MempoolAnchorId}; use crate::tracing_targets; #[cfg(test)] @@ -20,7 +27,7 @@ pub(super) mod tests; pub trait MempoolAdapterFactory { type Adapter: MempoolAdapter; - fn create(&self, listener: Arc) -> Self::Adapter; + fn create(&self, listener: Arc) -> Arc; } impl MempoolAdapterFactory for F @@ -30,8 +37,8 @@ where { type Adapter = R; - fn create(&self, listener: Arc) -> Self::Adapter { - self(listener) + fn create(&self, listener: Arc) -> Arc { + Arc::new(self(listener)) } } @@ -71,57 +78,128 @@ pub trait MempoolAdapter: Send + Sync + 'static { async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()>; } -pub struct MempoolAdapterStdImpl { - listener: Arc, +pub struct MempoolAdapterfactoryStd { + key_pair: Arc, + dht_client: DhtClient, + overlay_service: OverlayService, +} + +impl MempoolAdapterfactoryStd { + pub fn new( + key_pair: Arc, + dht_client: DhtClient, + overlay_service: OverlayService, + ) -> MempoolAdapterfactoryStd { + Self { + key_pair, + dht_client, + overlay_service, + } + } +} + +impl MempoolAdapterFactory for MempoolAdapterfactoryStd { + type Adapter = MempoolAdapterStdImpl; + + fn create(&self, _: Arc) -> Arc { + MempoolAdapterStdImpl::new( + self.key_pair.clone(), + self.dht_client.clone(), + self.overlay_service.clone(), + ) + } +} - _stub_anchors_cache: Arc>>>, +pub struct MempoolAdapterStdImpl { + // TODO: replace with rocksdb + anchors: Arc>>>, } impl MempoolAdapterStdImpl { - pub fn new(listener: Arc) -> Self { + pub fn new( + key_pair: Arc, + dht_client: DhtClient, + overlay_service: OverlayService, + ) -> Arc { tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Creating mempool adapter..."); + let anchors = Arc::new(RwLock::new(BTreeMap::new())); - // TODO: make real implementation, currently runs stub task - // that produces the repeating set of anchors - let stub_anchors_cache = Arc::new(RwLock::new(BTreeMap::new())); - - tokio::spawn({ - let listener = listener.clone(); - let stub_anchors_cache = stub_anchors_cache.clone(); - async move { - let mut anchor_id = 0; - loop { - let rnd_round_interval = rand::thread_rng().gen_range(400..600); - tokio::time::sleep(tokio::time::Duration::from_millis(rnd_round_interval * 6)) - .await; - anchor_id += 1; - let anchor = _stub_create_random_anchor_with_stub_externals(anchor_id); - { - let mut anchor_cache_rw = stub_anchors_cache - .write() - .map_err(|e| anyhow!("Poison error on write lock: {:?}", e)) - .unwrap(); - tracing::debug!( - target: tracing_targets::MEMPOOL_ADAPTER, - "Random anchor (id: {}, chain_time: {}, externals: {}) added to cache", - anchor.id(), - anchor.chain_time(), - anchor.externals_count(), - ); - anchor_cache_rw.insert(anchor_id, anchor.clone()); - } - listener.on_new_anchor(anchor).await.unwrap(); - } - } + let (sender, receiver) = + tokio::sync::mpsc::unbounded_channel::<(Arc, Vec>)>(); + + tokio::spawn(async move { + let engine = + tycho_consensus::Engine::new(key_pair, &dht_client, &overlay_service, sender).await; + + engine.run().await; }); - tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Stub anchors generator started"); tracing::info!(target: tracing_targets::MEMPOOL_ADAPTER, "Mempool adapter created"); - Self { - listener, - _stub_anchors_cache: stub_anchors_cache, + let mempool_adapter = Arc::new(Self { anchors }); + + // start handling mempool anchors + tokio::spawn(parse_points(mempool_adapter.clone(), receiver)); + + mempool_adapter + } + + fn add_anchor(&self, anchor: Arc) { + let mut guard = self.anchors.write(); + guard.insert(anchor.id(), anchor); + } +} + +pub async fn parse_points( + adapter: Arc, + mut rx: UnboundedReceiver<(Arc, Vec>)>, +) { + while let Some((anchor, points)) = rx.recv().await { + let mut external_messages = HashMap::::new(); + + for point in points { + 'message: for message in &point.body.payload { + let cell = match Boc::decode(message) { + Ok(cell) => cell, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to deserialize bytes into cell. Error: {e:?}"); // TODO: should handle errors properly? + continue 'message; + } + }; + + let mut slice = match cell.as_slice() { + Ok(slice) => slice, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Failed to make slice from cell. Error: {e:?}"); + continue 'message; + } + }; + + let ext_in_message = match ExtInMsgInfo::load_from(&mut slice) { + Ok(message) => message, + Err(e) => { + tracing::error!(target: tracing_targets::MEMPOOL_ADAPTER, "Bad cell. Failed to deserialize to ExtInMsgInfo. Err: {e:?}"); + continue 'message; + } + }; + + let external_message = ExternalMessage::new(cell.clone(), ext_in_message); + external_messages.insert(*cell.repr_hash(), external_message); + } } + + let messages = external_messages + .into_iter() + .map(|m| Arc::new(m.1)) + .collect::>(); + + let anchor = Arc::new(MempoolAnchor::new( + anchor.body.location.round.0, + anchor.body.time.as_u64(), + messages, + )); + + adapter.add_anchor(anchor); } } @@ -143,10 +221,8 @@ impl MempoolAdapter for MempoolAdapterStdImpl { ) -> Result>> { // TODO: make real implementation, currently only return anchor from local cache let res = { - let anchors_cache_r = self - ._stub_anchors_cache - .read() - .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + let anchors_cache_r = self.anchors.read(); + anchors_cache_r.get(&anchor_id).cloned() }; if res.is_some() { @@ -177,10 +253,7 @@ impl MempoolAdapter for MempoolAdapterStdImpl { let mut request_timer = std::time::Instant::now(); loop { { - let anchors_cache_r = self - ._stub_anchors_cache - .read() - .map_err(|e| anyhow!("Poison error on read lock: {:?}", e))?; + let anchors_cache_r = self.anchors.read(); let mut range = anchors_cache_r.range(( std::ops::Bound::Excluded(prev_anchor_id), @@ -224,35 +297,9 @@ impl MempoolAdapter for MempoolAdapterStdImpl { } async fn clear_anchors_cache(&self, before_anchor_id: MempoolAnchorId) -> Result<()> { - let mut anchors_cache_rw = self - ._stub_anchors_cache - .write() - .map_err(|e| anyhow!("Poison error on write lock: {:?}", e))?; + let mut anchors_cache_rw = self.anchors.write(); + anchors_cache_rw.retain(|anchor_id, _| anchor_id >= &before_anchor_id); Ok(()) } } - -fn _stub_create_random_anchor_with_stub_externals( - anchor_id: MempoolAnchorId, -) -> Arc { - let chain_time = anchor_id as u64 * 471 * 6 % 1000000000; - let externals_count = chain_time as i32 % 10; - let mut externals = vec![]; - for i in 0..externals_count { - let rand_addr = (0..32).map(|_| rand::random::()).collect::>(); - let rand_addr = HashBytes::from_slice(&rand_addr); - let mut msg_cell_builder = CellBuilder::new(); - msg_cell_builder.store_u32(anchor_id).unwrap(); - msg_cell_builder.store_u64(chain_time).unwrap(); - msg_cell_builder.store_u32(i as u32).unwrap(); - let msg_cell = msg_cell_builder.build().unwrap(); - let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { - dst: IntAddr::Std(StdAddr::new(0, rand_addr)), - ..Default::default() - }); - externals.push(Arc::new(msg)); - } - - Arc::new(MempoolAnchor::new(anchor_id, chain_time, externals)) -} diff --git a/collator/src/mempool/mempool_adapter_stub.rs b/collator/src/mempool/mempool_adapter_stub.rs index 969930f84..c9a8afd0d 100644 --- a/collator/src/mempool/mempool_adapter_stub.rs +++ b/collator/src/mempool/mempool_adapter_stub.rs @@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock}; use anyhow::{anyhow, Result}; use async_trait::async_trait; -use everscale_types::cell::{CellBuilder, CellSliceRange, HashBytes}; -use everscale_types::models::{ExtInMsgInfo, IntAddr, MsgInfo, OwnedMessage, StdAddr}; +use everscale_types::cell::{CellBuilder, HashBytes}; +use everscale_types::models::{ExtInMsgInfo, IntAddr, StdAddr}; use rand::Rng; use tycho_block_util::state::ShardStateStuff; use super::types::{ExternalMessage, MempoolAnchor, MempoolAnchorId}; -use crate::mempool::{MempoolAdapter, MempoolEventListener}; +use crate::mempool::mempool_adapter::{MempoolAdapter, MempoolEventListener}; use crate::tracing_targets; #[cfg(test)] @@ -194,7 +194,6 @@ fn _stub_create_random_anchor_with_stub_externals( msg_cell_builder.store_u64(chain_time).unwrap(); msg_cell_builder.store_u32(i as u32).unwrap(); let msg_cell = msg_cell_builder.build().unwrap(); - let msg_cell_range = CellSliceRange::full(&*msg_cell); let msg = ExternalMessage::new(msg_cell, ExtInMsgInfo { dst: IntAddr::Std(StdAddr::new(0, rand_addr)), ..Default::default() diff --git a/collator/src/mempool/mod.rs b/collator/src/mempool/mod.rs index b09349d0b..745256375 100644 --- a/collator/src/mempool/mod.rs +++ b/collator/src/mempool/mod.rs @@ -3,4 +3,5 @@ mod mempool_adapter_stub; mod types; pub use mempool_adapter::*; +pub use mempool_adapter_stub::MempoolAdapterStubImpl; pub(crate) use types::{MempoolAnchor, MempoolAnchorId}; diff --git a/collator/src/mempool/tests/mempool_adapter_tests.rs b/collator/src/mempool/tests/mempool_adapter_tests.rs index 982ec0b83..d6c780716 100644 --- a/collator/src/mempool/tests/mempool_adapter_tests.rs +++ b/collator/src/mempool/tests/mempool_adapter_tests.rs @@ -4,7 +4,7 @@ use anyhow::Result; use async_trait::async_trait; use super::{MempoolAdapter, MempoolEventListener}; -use crate::mempool::{MempoolAdapterStdImpl, MempoolAnchor}; +use crate::mempool::{MempoolAdapterStdImpl, MempoolAdapterStubImpl, MempoolAnchor}; use crate::test_utils::try_init_test_tracing; struct MempoolEventStubListener; @@ -25,7 +25,7 @@ impl MempoolEventListener for MempoolEventStubListener { async fn test_stub_anchors_generator() -> Result<()> { try_init_test_tracing(tracing_subscriber::filter::LevelFilter::TRACE); - let adapter = MempoolAdapterStdImpl::new(Arc::new(MempoolEventStubListener {})); + let adapter = MempoolAdapterStubImpl::new(Arc::new(MempoolEventStubListener {})); // try get not existing anchor by id let opt_anchor = adapter.get_anchor_by_id(10).await?; diff --git a/collator/src/mempool/types.rs b/collator/src/mempool/types.rs index 502a2713e..1242d5478 100644 --- a/collator/src/mempool/types.rs +++ b/collator/src/mempool/types.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use everscale_types::models::{ExtInMsgInfo, OwnedMessage}; +use everscale_types::models::ExtInMsgInfo; use everscale_types::prelude::Cell; // TYPES @@ -21,7 +21,7 @@ impl ExternalMessage { } } -pub(crate) struct MempoolAnchor { +pub struct MempoolAnchor { id: MempoolAnchorId, chain_time: u64, externals: Vec>, diff --git a/collator/src/types.rs b/collator/src/types.rs index 250567e10..beaebd775 100644 --- a/collator/src/types.rs +++ b/collator/src/types.rs @@ -1,13 +1,10 @@ use std::sync::Arc; -use anyhow::Result; use everscale_crypto::ed25519::KeyPair; -use everscale_types::cell::{CellBuilder, HashBytes}; -use everscale_types::models::{ - Block, BlockId, OwnedMessage, ShardIdent, ShardStateUnsplit, Signature, -}; +use everscale_types::cell::HashBytes; +use everscale_types::models::{Block, BlockId, OwnedMessage, ShardIdent, Signature}; use tycho_block_util::block::{BlockStuffAug, ValidatorSubsetInfo}; -use tycho_block_util::state::{MinRefMcStateTracker, ShardStateStuff}; +use tycho_block_util::state::ShardStateStuff; use tycho_network::{DhtClient, OverlayService, PeerResolver}; use tycho_util::FastHashMap; diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index e959b744c..02349f26e 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -30,15 +30,13 @@ pub struct Engine { impl Engine { pub async fn new( - secret_key: &SecretKey, + key_pair: Arc, dht_client: &DhtClient, overlay_service: &OverlayService, - peers: &Vec, committed: UnboundedSender<(Arc, Vec>)>, ) -> Self { - let key_pair = KeyPair::from(secret_key); let log_id = Arc::new(format!("{:?}", PeerId::from(key_pair.public_key).ugly())); - let peer_schedule = Arc::new(PeerSchedule::new(Arc::new(key_pair))); + let peer_schedule = Arc::new(PeerSchedule::new(key_pair)); let (bcast_tx, bcast_rx) = mpsc::unbounded_channel(); @@ -52,7 +50,7 @@ impl Engine { let dispatcher = Dispatcher::new( &dht_client, &overlay_service, - peers, + &[], // TODO: FIX PEERS Responder::new( log_id.clone(), broadcast_filter.clone(), @@ -78,7 +76,7 @@ impl Engine { // current epoch peer_schedule.set_next_start(genesis.body.location.round.next()); // start updater only after peers are populated into schedule - peer_schedule_updater.set_next_peers(peers); + peer_schedule_updater.set_next_peers(&[]); // TODO: FIX PEERS peer_schedule.rotate(); let current_dag_round = DagRound::genesis(&genesis, &peer_schedule); diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index 5c6901ae2..b123c4bac 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -19,7 +19,7 @@ impl Dispatcher { pub fn new( dht_client: &DhtClient, overlay_service: &OverlayService, - all_peers: &Vec, + all_peers: &[PeerId], responder: Responder, ) -> Self { let dht_service = dht_client.service(); diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 2c51d0005..01fe4536f 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -179,7 +179,7 @@ impl PeerSchedule { } /// use [updater](super::PeerScheduleUpdater::set_next_peers()) - pub(super) fn set_next_peers(&self, peers: &Vec, overlay: &PrivateOverlay) { + pub(super) fn set_next_peers(&self, peers: &[PeerId], overlay: &PrivateOverlay) { let local_id = self.local_id(); let mut inner = self.inner.lock(); // check resolved peers only after blocking other threads from updating inner; diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs index fd2c3d5af..6c7c01e33 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -31,7 +31,7 @@ impl PeerScheduleUpdater { self.listen().await } - pub fn set_next_peers(&self, peers: &Vec) { + pub fn set_next_peers(&self, peers: &[PeerId]) { self.peer_schedule.set_next_peers(&peers, &self.overlay) } From 2c94141faba2d161079542652283ad5cc66088ed Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Fri, 10 May 2024 13:30:46 +0200 Subject: [PATCH 28/32] fix(mempool-adapter): fix project build --- cli/src/node/mod.rs | 1 + collator/tests/collation_tests.rs | 4 ++-- consensus/examples/consensus_node.rs | 8 ++++++-- consensus/src/test_utils.rs | 19 +++++++++---------- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/cli/src/node/mod.rs b/cli/src/node/mod.rs index ed71ab163..9a17fd0f5 100644 --- a/cli/src/node/mod.rs +++ b/cli/src/node/mod.rs @@ -486,6 +486,7 @@ impl Node { supported_block_version: 50, supported_capabilities: supported_capabilities(), max_collate_threads: 1, + test_validators_keypairs: vec![] }; let collation_manager = CollationManager::start( diff --git a/collator/tests/collation_tests.rs b/collator/tests/collation_tests.rs index 8ce0ad7dc..c3e51c96f 100644 --- a/collator/tests/collation_tests.rs +++ b/collator/tests/collation_tests.rs @@ -7,7 +7,7 @@ use futures_util::future::BoxFuture; use tycho_block_util::state::MinRefMcStateTracker; use tycho_collator::collator::CollatorStdImplFactory; use tycho_collator::manager::CollationManager; -use tycho_collator::mempool::MempoolAdapterStdImpl; +use tycho_collator::mempool::{MempoolAdapterStdImpl, MempoolAdapterStubImpl}; use tycho_collator::msg_queue::MessageQueueAdapterStdImpl; use tycho_collator::state_node::{StateNodeAdapter, StateNodeAdapterStdImpl}; use tycho_collator::test_utils::{prepare_test_storage, try_init_test_tracing}; @@ -98,7 +98,7 @@ async fn test_collation_process_on_stubs() { config, Arc::new(MessageQueueAdapterStdImpl::default()), |listener| StateNodeAdapterStdImpl::new(listener, storage.clone()), - |listener| MempoolAdapterStdImpl::new(listener), + |listener| MempoolAdapterStubImpl::new(listener), ValidatorStdImplFactory { network: node_network.clone().into(), config: ValidatorConfig { diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs index c455ab190..a9427eda6 100644 --- a/consensus/examples/consensus_node.rs +++ b/consensus/examples/consensus_node.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use anyhow::Result; use clap::{Parser, Subcommand}; use everscale_crypto::ed25519; +use everscale_crypto::ed25519::KeyPair; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tracing_subscriber::layer::SubscriberExt; @@ -103,6 +104,7 @@ impl CmdRun { let global_config = GlobalConfig::from_file(self.global_config)?; let secret_key = parse_key(&self.key)?; + let key_pair = Arc::new(KeyPair::from(&secret_key)); let (dht_client, overlay) = tycho_consensus::test_utils::from_validator( self.addr, @@ -125,7 +127,7 @@ impl CmdRun { let (committed_tx, committed_rx) = mpsc::unbounded_channel(); let engine = - Engine::new(&secret_key, &dht_client, &overlay, &all_peers, committed_tx).await; + Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx).await; drain_anchors(committed_rx); tracing::info!( @@ -182,8 +184,10 @@ struct CmdGenDht { impl CmdGenDht { fn run(self) -> Result<()> { + let secret_key = parse_key(&self.key)?; + let key_pair = Arc::new(KeyPair::from(&secret_key)); let entry = tycho_consensus::test_utils::make_peer_info( - &parse_key(&self.key)?, + key_pair, self.addr.into(), self.ttl, ); diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 35a29ef50..b3b6bf959 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -35,8 +35,7 @@ pub fn genesis() -> Point { .wrap(&genesis_keys) } -pub fn make_peer_info(key: &SecretKey, address: Address, ttl: Option) -> PeerInfo { - let keypair = KeyPair::from(key); +pub fn make_peer_info(keypair: Arc, address: Address, ttl: Option) -> PeerInfo { let peer_id = PeerId::from(keypair.public_key); let now = now_sec(); @@ -116,21 +115,22 @@ mod tests { use crate::engine::Engine; async fn make_network(node_count: usize) -> Vec { + let secret_key = SecretKey::generate(&mut rand::thread_rng()); let keys = (0..node_count) - .map(|_| SecretKey::generate(&mut rand::thread_rng())) + .map(|_| Arc::new(KeyPair::from(&secret_key))) .collect::>(); let all_peers = keys .iter() - .map(|s| PeerId::from(KeyPair::from(s).public_key)) + .map(|s| PeerId::from(s.public_key)) .collect::>(); let from_validators = keys .iter() - .map(|secret| { + .map(|key_pair| { from_validator( (Ipv4Addr::LOCALHOST, 0), - secret, + &secret_key, DhtConfig { local_info_announce_period: Duration::from_secs(1), local_info_announce_period_max_jitter: Duration::from_secs(1), @@ -146,7 +146,7 @@ mod tests { let peer_info = std::iter::zip(&keys, &from_validators) .map(|(key, (dht_client, _))| { Arc::new(make_peer_info( - key, + key.clone(), dht_client.network().local_addr().into(), None, )) @@ -165,12 +165,11 @@ mod tests { } let mut engines = vec![]; let (committed_tx, committed_rx) = mpsc::unbounded_channel(); - for (secret_key, (dht_client, overlay_service)) in keys.iter().zip(from_validators.iter()) { + for (key_pair, (dht_client, overlay_service)) in keys.into_iter().zip(from_validators.iter()) { let engine = Engine::new( - secret_key, + key_pair.clone(), &dht_client, &overlay_service, - &all_peers, committed_tx.clone(), ) .await; From 67023a38de8b179f6b737e207871d5665f17bce7 Mon Sep 17 00:00:00 2001 From: Stanislav Eliseev Date: Fri, 10 May 2024 13:32:42 +0200 Subject: [PATCH 29/32] fix(mempool-adapter): cargo fmt --- cli/src/node/mod.rs | 2 +- consensus/examples/consensus_node.rs | 10 +++------- consensus/src/test_utils.rs | 4 +++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cli/src/node/mod.rs b/cli/src/node/mod.rs index 9a17fd0f5..f1645d50a 100644 --- a/cli/src/node/mod.rs +++ b/cli/src/node/mod.rs @@ -486,7 +486,7 @@ impl Node { supported_block_version: 50, supported_capabilities: supported_capabilities(), max_collate_threads: 1, - test_validators_keypairs: vec![] + test_validators_keypairs: vec![], }; let collation_manager = CollationManager::start( diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs index a9427eda6..de9288219 100644 --- a/consensus/examples/consensus_node.rs +++ b/consensus/examples/consensus_node.rs @@ -126,8 +126,7 @@ impl CmdRun { } let (committed_tx, committed_rx) = mpsc::unbounded_channel(); - let engine = - Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx).await; + let engine = Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx).await; drain_anchors(committed_rx); tracing::info!( @@ -186,11 +185,8 @@ impl CmdGenDht { fn run(self) -> Result<()> { let secret_key = parse_key(&self.key)?; let key_pair = Arc::new(KeyPair::from(&secret_key)); - let entry = tycho_consensus::test_utils::make_peer_info( - key_pair, - self.addr.into(), - self.ttl, - ); + let entry = + tycho_consensus::test_utils::make_peer_info(key_pair, self.addr.into(), self.ttl); let output = if std::io::stdin().is_terminal() { serde_json::to_string_pretty(&entry) } else { diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index b3b6bf959..7c0434b3c 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -165,7 +165,9 @@ mod tests { } let mut engines = vec![]; let (committed_tx, committed_rx) = mpsc::unbounded_channel(); - for (key_pair, (dht_client, overlay_service)) in keys.into_iter().zip(from_validators.iter()) { + for (key_pair, (dht_client, overlay_service)) in + keys.into_iter().zip(from_validators.iter()) + { let engine = Engine::new( key_pair.clone(), &dht_client, From e1283b1367d83d231a5122c465a52cbe7d6f60ef Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 10 May 2024 20:49:04 +0300 Subject: [PATCH 30/32] fix(consensus): changes from debug session --- Cargo.lock | 1 + consensus/Cargo.toml | 4 + consensus/examples/consensus_node.rs | 13 +- consensus/src/dag/dag_location.rs | 21 +- consensus/src/dag/dag_round.rs | 6 +- consensus/src/dag/producer.rs | 30 ++- consensus/src/dag/verifier.rs | 17 +- consensus/src/engine/engine.rs | 186 +++++++++++------- consensus/src/engine/mempool_config.rs | 2 +- .../src/intercom/broadcast/broadcaster.rs | 74 +++++-- .../src/intercom/dependency/downloader.rs | 52 ++--- consensus/src/intercom/dependency/uploader.rs | 50 +++-- consensus/src/intercom/dto.rs | 26 ++- .../intercom/peer_schedule/peer_schedule.rs | 14 ++ consensus/src/models/dag_point.rs | 7 + consensus/src/models/point.rs | 94 +++++---- consensus/src/test_utils.rs | 182 +++++++++-------- 17 files changed, 440 insertions(+), 339 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d6e7371a1..c3c859970 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2662,6 +2662,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "tikv-jemallocator", "tokio", "tracing", "tracing-appender", diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index 7cd028c52..779116800 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -45,6 +45,10 @@ tycho-util = { workspace = true, features = ["test"] } parking_lot = { workspace = true, features = ["deadlock_detection"] } tokio = { workspace = true, default-features = false, features = ["rt-multi-thread", "macros"] } tracing-subscriber = { workspace = true, features = ["env-filter"] } +tikv-jemallocator = { workspace = true, features = [ + "unprefixed_malloc_on_supported_platforms", + "background_threads", +]} [lints] workspace = true diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs index de9288219..1316205d5 100644 --- a/consensus/examples/consensus_node.rs +++ b/consensus/examples/consensus_node.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use anyhow::Result; use clap::{Parser, Subcommand}; use everscale_crypto::ed25519; -use everscale_crypto::ed25519::KeyPair; +use everscale_crypto::ed25519::{KeyPair, PublicKey}; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tracing_subscriber::layer::SubscriberExt; @@ -119,15 +119,18 @@ impl CmdRun { .map(|info| info.id) .collect::>(); - let mut initial_peer_count = 0usize; + let mut initial_peer_count = 1_usize; + let local_id = PeerId::from(PublicKey::from(&secret_key)); for peer in global_config.bootstrap_peers { - let is_new = dht_client.add_peer(Arc::new(peer))?; - initial_peer_count += is_new as usize; + if peer.id != local_id { + let is_new = dht_client.add_peer(Arc::new(peer))?; + initial_peer_count += is_new as usize; + } } let (committed_tx, committed_rx) = mpsc::unbounded_channel(); let engine = Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx).await; - drain_anchors(committed_rx); + tokio::spawn(drain_anchors(committed_rx)); tracing::info!( local_id = %dht_client.network().peer_id(), diff --git a/consensus/src/dag/dag_location.rs b/consensus/src/dag/dag_location.rs index 5b50c802b..2ab029c71 100644 --- a/consensus/src/dag/dag_location.rs +++ b/consensus/src/dag/dag_location.rs @@ -34,18 +34,7 @@ pub struct DagLocation { } impl DagLocation { - pub fn insert_own_point(&mut self, my_point: &DagPoint) { - let old = self.versions.insert( - my_point.digest().clone(), - Shared::new(JoinTask::new(futures_util::future::ready(my_point.clone()))), - ); - assert!( - old.is_none(), - "Coding error: own point is already inserted into DAG location" - ); - self.state.insert_own_point(my_point); - } - pub fn add_dependency(&mut self, digest: &Digest, init: I) -> Shared> + pub fn get_or_init(&mut self, digest: &Digest, init: I) -> Shared> where I: FnOnce() -> F, F: Future + Send + 'static, @@ -62,11 +51,7 @@ impl DagLocation { } } } - pub fn add_validate( - &mut self, - digest: &Digest, - init: I, - ) -> Option<&'_ Shared>> + pub fn init(&mut self, digest: &Digest, init: I) -> Option<&'_ Shared>> where I: FnOnce() -> F, F: Future + Send + 'static, @@ -194,7 +179,7 @@ impl Signable { this_call_signed = true; Ok(Signed { at: at.clone(), - with: valid.point.body.sign(key_pair), + with: Signature::new(key_pair, &valid.point.digest), }) }); } else if &valid.point.body.time < time_range.start() { diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 7da9b4575..6881dbfb7 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -153,7 +153,7 @@ impl DagRound { pub async fn valid_point_exact(&self, node: &PeerId, digest: &Digest) -> Option { let point_fut = self.view(node, |loc| loc.versions().get(digest).cloned())??; - point_fut.await.0.valid().cloned() + point_fut.await.0.into_valid() } pub fn add( @@ -179,7 +179,7 @@ impl DagRound { let state = loc.state().clone(); let point = point.clone(); let downloader = downloader.clone(); - loc.add_validate(digest, || Verifier::validate(point, dag_round, downloader)) + loc.init(digest, || Verifier::validate(point, dag_round, downloader)) .map(|first| first.clone().map(|_| state).boxed()) }) } @@ -230,7 +230,7 @@ impl DagRound { } self.edit(&dag_point.location().author, |loc| { let state = loc.state().clone(); - loc.add_validate(dag_point.digest(), || { + loc.init(dag_point.digest(), || { futures_util::future::ready(dag_point.clone()) }) .map(|first| first.clone().map(|_| state).boxed()) diff --git a/consensus/src/dag/producer.rs b/consensus/src/dag/producer.rs index 3c2f702f4..69b2c79a3 100644 --- a/consensus/src/dag/producer.rs +++ b/consensus/src/dag/producer.rs @@ -64,22 +64,20 @@ impl Producer { .into_iter() .map(|point| (point.body.location.author, point.digest.clone())) .collect::>(); - Some(Arc::new( - PointBody { - location: Location { - round: current_round.round().clone(), - author: local_id.clone(), - }, - time, - payload, - proof: prev_point.cloned(), - includes, - witness, - anchor_trigger, - anchor_proof, - } - .wrap(&key_pair), - )) + + Some(Point::new(key_pair, PointBody { + location: Location { + round: current_round.round().clone(), + author: local_id.clone(), + }, + time, + payload, + proof: prev_point.cloned(), + includes, + witness, + anchor_trigger, + anchor_proof, + })) } fn includes(finished_round: &DagRound) -> Vec> { diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 8a18d1347..035d1ab8b 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -166,7 +166,7 @@ impl Verifier { ) { let downloader = downloader.clone(); let shared = round.edit(author, |loc| { - loc.add_dependency(digest, move || { + loc.get_or_init(digest, move || { let point_id = PointId { location: Location { author: author.clone(), @@ -385,21 +385,8 @@ impl Verifier { if point.body.time < proven.body.time { return false; // time must be non-decreasing by the same author } - let Some(body) = bincode::serialize(&proven.body).ok() else { - // should be removed after move to TL - panic!("Library error: failed to serialize proven point body") - }; for (peer, sig) in proof.evidence.iter() { - let Some(pubkey) = peer.as_public_key() else { - // should have been validated prior validator elections - panic!("Config error: failed to convert peer id into public key") - }; - let sig: Result<[u8; 64], _> = sig.0.to_vec().try_into(); - let Some(sig) = sig.ok() else { - // unexpected bytes used as a signature, thus invalid - return false; - }; - if !pubkey.verify_raw(body.as_slice(), &sig) { + if !sig.verifies(peer, &proof.digest) { return false; } } diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index 02349f26e..c4ab32f54 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, SecretKey}; use itertools::Itertools; use tokio::sync::mpsc::UnboundedSender; -use tokio::sync::{mpsc, oneshot, watch}; -use tokio::task::JoinSet; +use tokio::sync::{mpsc, oneshot, RwLock}; +use tokio::task::{JoinError, JoinSet}; use tycho_network::{DhtClient, OverlayService, PeerId}; use crate::dag::{Dag, DagRound, InclusionState, Producer}; @@ -21,9 +21,10 @@ pub struct Engine { peer_schedule: Arc, dispatcher: Dispatcher, downloader: Downloader, + broadcaster: Broadcaster, collector: Collector, broadcast_filter: BroadcastFilter, - top_dag_round_watch: watch::Sender, + top_dag_round: Arc>, tasks: JoinSet<()>, // should be JoinSet committed: UnboundedSender<(Arc, Vec>)>, } @@ -58,8 +59,9 @@ impl Engine { uploader_tx, ), ); + let broadcaster = Broadcaster::new(log_id.clone(), &dispatcher); - let genesis = Arc::new(crate::test_utils::genesis()); + let genesis = crate::test_utils::genesis(); // check only genesis round as it is widely used in point validation. // if some nodes use distinct genesis data, their first points will be rejected assert_eq!( @@ -82,10 +84,10 @@ impl Engine { let current_dag_round = DagRound::genesis(&genesis, &peer_schedule); let dag = Dag::new(current_dag_round.clone()); - let (top_dag_round_tx, top_dag_round_rx) = watch::channel(current_dag_round.clone()); + let top_dag_round = Arc::new(RwLock::new(current_dag_round.clone())); let mut tasks = JoinSet::new(); - let uploader = Uploader::new(log_id.clone(), uploader_rx, top_dag_round_rx); + let uploader = Uploader::new(log_id.clone(), uploader_rx, top_dag_round.clone()); tasks.spawn(async move { uploader.run().await; }); @@ -119,58 +121,17 @@ impl Engine { peer_schedule, dispatcher, downloader, + broadcaster, collector, broadcast_filter, - top_dag_round_watch: top_dag_round_tx, + top_dag_round, tasks, committed, } } - async fn bcaster_run( - log_id: Arc, - produce_own_point: bool, - dispatcher: Dispatcher, - peer_schedule: Arc, - downloader: Downloader, - current_dag_round: DagRound, - prev_point: Option, - own_point_state: oneshot::Sender, - bcaster_ready_tx: mpsc::Sender, - mut collector_signal_rx: mpsc::UnboundedReceiver, - ) -> Option { - if produce_own_point { - if let Some(own_point) = - Producer::new_point(¤t_dag_round, prev_point.as_ref(), vec![]).await - { - let state = current_dag_round - .insert_exact_sign(&own_point, &peer_schedule, &downloader) - .await - .expect("own produced point must be valid"); - own_point_state.send(state).ok(); - let evidence = Broadcaster::new( - log_id.clone(), - &own_point, - &dispatcher, - &peer_schedule, - bcaster_ready_tx, - collector_signal_rx, - ) - .run() - .await; - return Some(PrevPoint { - digest: own_point.digest.clone(), - evidence: evidence.into_iter().collect(), - }); - } - } - _ = own_point_state; - collector_signal_rx.close(); - bcaster_ready_tx.send(BroadcasterSignal::Ok).await.ok(); - None - } pub async fn run(mut self) -> ! { - let mut prev_point: Option = None; + let mut prev_point: Option> = None; let mut produce_own_point = true; loop { let current_dag_round = self @@ -179,7 +140,6 @@ impl Engine { let next_dag_round = self .dag .top(¤t_dag_round.round().next(), &self.peer_schedule); - self.top_dag_round_watch.send(next_dag_round.clone()).ok(); tracing::info!("{} @ {:?}", self.log_id, current_dag_round.round()); @@ -189,10 +149,11 @@ impl Engine { let (own_point_state_tx, own_point_state_rx) = oneshot::channel(); let bcaster_run = tokio::spawn(Self::bcaster_run( - self.log_id.clone(), produce_own_point, - self.dispatcher.clone(), + self.broadcaster, self.peer_schedule.clone(), + self.top_dag_round.clone(), + next_dag_round.clone(), self.downloader.clone(), current_dag_round.clone(), prev_point, @@ -221,26 +182,117 @@ impl Engine { )); match tokio::join!(collector_run, bcaster_run, commit_run, bcast_filter_run) { - (Ok(collector_upd), Ok(new_prev_point), Ok(()), Ok(())) => { + (Ok(collector_upd), Ok((bcaster, new_prev_point)), Ok(()), Ok(())) => { + self.broadcaster = bcaster; prev_point = new_prev_point; produce_own_point = next_dag_round.round() == collector_upd.next_round(); self.collector = collector_upd; } - (collector, bcaster, commit, bcast_filter_upd) => { - let msg = [ - (collector.err(), "collector"), - (bcaster.err(), "broadcaster"), - (commit.err(), "commit"), - (bcast_filter_upd.err(), "broadcast filter update"), - ] - .into_iter() - .filter_map(|(res, name)| { - res.map(|err| format!("{name} task panicked: {err:?}")) - }) - .join("; \n"); - panic!("{}", msg) + (collector, bcaster, commit, bcast_filter_upd) => Self::panic_on_join(&[ + (collector.err(), "collector"), + (bcaster.err(), "broadcaster"), + (commit.err(), "commit"), + (bcast_filter_upd.err(), "broadcast filter update"), + ]), + } + } + } + + async fn bcaster_run( + produce_own_point: bool, + mut broadcaster: Broadcaster, + peer_schedule: Arc, + top_dag_round: Arc>, + next_dag_round: DagRound, + downloader: Downloader, + current_dag_round: DagRound, + prev_point: Option>, + own_point_state: oneshot::Sender, + bcaster_ready_tx: mpsc::Sender, + mut collector_signal_rx: mpsc::UnboundedReceiver, + ) -> (Broadcaster, Option>) { + if produce_own_point { + let new_point = tokio::spawn(Self::produce( + current_dag_round, + prev_point, + peer_schedule.clone(), + downloader, + own_point_state, + )); + // must signal to uploader before start of broadcast + let top_dag_round_upd = + tokio::spawn(Self::update_top_round(top_dag_round, next_dag_round)); + let new_point = match tokio::join!(new_point, top_dag_round_upd) { + (Ok(new_point), Ok(())) => new_point, + (new_point, top_dag_round) => { + Self::panic_on_join(&[ + (new_point.err(), "new point producer"), + (top_dag_round.err(), "top dag round update"), + ]); } + }; + if let Some(own_point) = new_point { + let evidence = broadcaster + .run( + &own_point, + &peer_schedule, + bcaster_ready_tx, + collector_signal_rx, + ) + .await; + let prev_point = PrevPoint { + digest: own_point.digest.clone(), + evidence: evidence.into_iter().collect(), + }; + return (broadcaster, Some(Arc::new(prev_point))); } + } else { + _ = own_point_state; + Self::update_top_round(top_dag_round, next_dag_round).await; + } + collector_signal_rx.close(); + bcaster_ready_tx.send(BroadcasterSignal::Ok).await.ok(); + (broadcaster, None) + } + + async fn produce( + current_dag_round: DagRound, + prev_point: Option>, + peer_schedule: Arc, + downloader: Downloader, + own_point_state: oneshot::Sender, + ) -> Option> { + if let Some(own_point) = + Producer::new_point(¤t_dag_round, prev_point.as_deref(), vec![]).await + { + let state = current_dag_round + .insert_exact_sign(&own_point, &peer_schedule, &downloader) + .await + .expect("own produced point must be valid"); + own_point_state.send(state).ok(); + Some(own_point) + } else { + // _ = own_point_state; dropped + None } } + + async fn update_top_round(top_dag_round: Arc>, top: DagRound) { + // must wait while active uploads (from previous round) are enqueued to read; + // it would be incorrect to serve uploads from outdated round + // since the start of new point broadcast + let mut write = top_dag_round.write().await; + *write = top; + } + + fn panic_on_join(maybe_err: &[(Option, &'static str)]) -> ! { + let msg = maybe_err + .iter() + .filter_map(|(res, name)| { + res.as_ref() + .map(|err| format!("{name} task panicked: {err:?}")) + }) + .join("; \n"); + panic!("{}", msg) + } } diff --git a/consensus/src/engine/mempool_config.rs b/consensus/src/engine/mempool_config.rs index c4d1d4eda..3a37e5d78 100644 --- a/consensus/src/engine/mempool_config.rs +++ b/consensus/src/engine/mempool_config.rs @@ -38,5 +38,5 @@ impl MempoolConfig { /// every failed response is accounted as point is not found; /// 1/3+1 failed responses leads to invalidation of the point and all its dependants - pub const DOWNLOAD_TIMEOUT: Duration = Duration::from_millis(50); + pub const DOWNLOAD_SPAWN_INTERVAL: Duration = Duration::from_millis(50); } diff --git a/consensus/src/intercom/broadcast/broadcaster.rs b/consensus/src/intercom/broadcast/broadcaster.rs index 39e94be61..d06731f52 100644 --- a/consensus/src/intercom/broadcast/broadcaster.rs +++ b/consensus/src/intercom/broadcast/broadcaster.rs @@ -13,7 +13,7 @@ use tycho_util::{FastHashMap, FastHashSet}; use crate::intercom::broadcast::collector::CollectorSignal; use crate::intercom::dto::{PeerState, SignatureResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; -use crate::models::{NodeCount, Point, Round, Signature}; +use crate::models::{Digest, NodeCount, Point, Round, Signature}; type BcastResult = anyhow::Result<()>; type SigResult = anyhow::Result; @@ -26,10 +26,49 @@ pub enum BroadcasterSignal { pub struct Broadcaster { log_id: Arc, - current_round: Round, + dispatcher: Dispatcher, + // do not throw away unfinished broadcasts from previous round + bcasts_outdated: FuturesUnordered>, +} + +impl Broadcaster { + pub fn new(log_id: Arc, dispatcher: &Dispatcher) -> Self { + Self { + log_id, + dispatcher: dispatcher.clone(), + bcasts_outdated: FuturesUnordered::new(), + } + } + pub async fn run( + &mut self, + point: &Point, + peer_schedule: &PeerSchedule, + bcaster_signal: mpsc::Sender, + collector_signal: mpsc::UnboundedReceiver, + ) -> FastHashMap { + let mut task = BroadcasterTask::new( + self.log_id.clone(), + point, + &self.dispatcher, + peer_schedule, + bcaster_signal, + collector_signal, + mem::take(&mut self.bcasts_outdated), + ); + task.run().await; + self.bcasts_outdated.extend(task.bcast_futs); + self.bcasts_outdated.extend(task.bcasts_outdated); + task.signatures + } +} - point_body: Vec, +struct BroadcasterTask { + log_id: Arc, dispatcher: Dispatcher, + bcasts_outdated: FuturesUnordered>, + + current_round: Round, + point_digest: Digest, bcaster_signal: mpsc::Sender, collector_signal: mpsc::UnboundedReceiver, @@ -50,16 +89,16 @@ pub struct Broadcaster { sig_futs: FuturesUnordered>, } -impl Broadcaster { - pub fn new( +impl BroadcasterTask { + fn new( log_id: Arc, point: &Point, dispatcher: &Dispatcher, peer_schedule: &PeerSchedule, bcaster_signal: mpsc::Sender, collector_signal: mpsc::UnboundedReceiver, + bcasts_outdated: FuturesUnordered>, ) -> Self { - let point_body = bincode::serialize(&point.body).expect("own point serializes to bytes"); let peer_updates = peer_schedule.updates(); let signers = peer_schedule .peers_for(&point.body.location.round.next()) @@ -77,9 +116,11 @@ impl Broadcaster { let sig_request = Dispatcher::signature_request(&point.body.location.round); Self { log_id, - current_round: point.body.location.round, - point_body, dispatcher: dispatcher.clone(), + bcasts_outdated, + + current_round: point.body.location.round, + point_digest: point.digest.clone(), bcaster_signal, collector_signal, @@ -100,7 +141,7 @@ impl Broadcaster { } } /// returns evidence for broadcast point - pub async fn run(mut self) -> FastHashMap { + pub async fn run(&mut self) { // how this was supposed to work: // * in short: broadcast to all and gather signatures from those who accepted the point // * both broadcast and signature tasks have their own retry loop for every peer @@ -122,9 +163,10 @@ impl Broadcaster { } loop { tokio::select! { + Some(_) = self.bcasts_outdated.next() => {} // let them complete Some(collector_signal) = self.collector_signal.recv() => { if self.should_finish(collector_signal).await { - break self.signatures + break; } } Some((peer_id, result)) = self.bcast_futs.next() => { @@ -226,7 +268,7 @@ impl Broadcaster { match response { SignatureResponse::Signature(signature) => { if self.signers.contains(&peer_id) { - if self.is_signature_ok(&peer_id, &signature) { + if signature.verifies(&peer_id, &self.point_digest) { self.signatures.insert(peer_id, signature); } else { // any invalid signature lowers our chances @@ -283,16 +325,6 @@ impl Broadcaster { } } - fn is_signature_ok(&self, peer_id: &PeerId, signature: &Signature) -> bool { - let sig_raw: Result<[u8; 64], _> = signature.0.to_vec().try_into(); - sig_raw - .ok() - .zip(peer_id.as_public_key()) - .map_or(false, |(sig_raw, pub_key)| { - pub_key.verify_raw(self.point_body.as_slice(), &sig_raw) - }) - } - fn match_peer_updates(&mut self, result: Result<(PeerId, PeerState), RecvError>) { match result { Ok((peer_id, new_state)) => { diff --git a/consensus/src/intercom/dependency/downloader.rs b/consensus/src/intercom/dependency/downloader.rs index d50ba662b..f41982161 100644 --- a/consensus/src/intercom/dependency/downloader.rs +++ b/consensus/src/intercom/dependency/downloader.rs @@ -8,7 +8,6 @@ use rand::prelude::{IteratorRandom, SmallRng}; use rand::SeedableRng; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, watch}; -use tokio::time::error::Elapsed; use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; @@ -51,7 +50,7 @@ impl Downloader { "point and DAG round mismatch" ); // request point from its signers (any dependant is among them as point is already verified) - let mut all_peers = self + let all_peers = self .peer_schedule .peers_for(&point_round.round().next()) .iter() @@ -61,17 +60,11 @@ impl Downloader { return DagPoint::NotExists(Arc::new(point_id)); }; // query author no matter if it is in the next round, but that can't affect 3F+1 - let completed = if all_peers.contains_key(&point_id.location.author) { - 0 - } else if self - .peer_schedule - .all_resolved() - .contains(&point_id.location.author) + let completed = match !all_peers.contains_key(&point_id.location.author) + && self.peer_schedule.is_resolved(&point_id.location.author) { - all_peers.insert(point_id.location.author, PeerState::Resolved); - -1 - } else { - 0 + true => -1, + false => 0, }; if all_peers.is_empty() { return DagPoint::NotExists(Arc::new(point_id)); @@ -114,9 +107,7 @@ struct DownloadTask { updates: broadcast::Receiver<(PeerId, PeerState)>, has_resolved_tx: watch::Sender, has_resolved_rx: watch::Receiver, - in_flight: FuturesUnordered< - BoxFuture<'static, (PeerId, Result, Elapsed>)>, - >, + in_flight: FuturesUnordered)>>, completed: i16, attempt: u8, } @@ -127,6 +118,7 @@ impl DownloadTask { pub async fn run(mut self) -> DagPoint { self.download_mandatory(); self.download(); + let mut interval = tokio::time::interval(MempoolConfig::DOWNLOAD_SPAWN_INTERVAL); loop { tokio::select! { Some((peer_id, resolved)) = self.in_flight.next() => @@ -134,6 +126,7 @@ impl DownloadTask { Some(dag_point) => break dag_point, None => continue }, + _ = interval.tick() => self.download(), update = self.updates.recv() => self.match_peer_updates(update), } } @@ -159,9 +152,8 @@ impl DownloadTask { fn download(&mut self) { self.attempt += 1; let count = (MempoolConfig::DOWNLOAD_PEERS as usize) - .saturating_pow(self.attempt as u32) - .saturating_sub(self.in_flight.len()) - .max(self.all_peers.len()); + .saturating_mul(self.attempt as usize) + .min(self.all_peers.len()); for peer_id in self .all_peers @@ -180,33 +172,26 @@ impl DownloadTask { fn download_one(&mut self, peer_id: &PeerId) { let peer_id = peer_id.clone(); self.in_flight.push( - tokio::time::timeout( - MempoolConfig::DOWNLOAD_TIMEOUT, - self.parent - .dispatcher - .query::(&peer_id, &self.request), - ) - .map(move |result| (peer_id, result.map(|(_, r)| r))) - .boxed(), + self.parent + .dispatcher + .query::(&peer_id, &self.request) + .boxed(), ); } async fn match_resolved( &mut self, peer_id: PeerId, - resolved: Result, Elapsed>, + resolved: anyhow::Result, ) -> Option { match resolved { - Err(_timeout) => { - tracing::error!("{} : {peer_id:.4?} timed out", self.parent.log_id); - } - Ok(Err(network_err)) => { + Err(network_err) => { tracing::error!( "{} : {peer_id:.4?} network error: {network_err}", self.parent.log_id ); } - Ok(Ok(PointByIdResponse(None))) => { + Ok(PointByIdResponse(None)) => { if self.mandatory.remove(&peer_id) { // it's a ban tracing::error!( @@ -222,7 +207,7 @@ impl DownloadTask { ); } } - Ok(Ok(PointByIdResponse(Some(point)))) => { + Ok(PointByIdResponse(Some(point))) => { if point.id() != self.point_id { // it's a ban tracing::error!( @@ -238,7 +223,6 @@ impl DownloadTask { // DAG could not have moved if this point was needed for commit return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); }; - let point = Arc::new(point); match Verifier::verify(&point, &self.parent.peer_schedule) { Ok(()) => { let validated = diff --git a/consensus/src/intercom/dependency/uploader.rs b/consensus/src/intercom/dependency/uploader.rs index fb3b9d1f1..2db9a888b 100644 --- a/consensus/src/intercom/dependency/uploader.rs +++ b/consensus/src/intercom/dependency/uploader.rs @@ -1,23 +1,23 @@ -use std::ops::Deref; use std::sync::Arc; -use tokio::sync::{mpsc, oneshot, watch}; +use tokio::sync::{mpsc, oneshot, RwLock}; +use tycho_util::futures::{JoinTask, Shared}; use crate::dag::DagRound; use crate::intercom::dto::PointByIdResponse; -use crate::models::{DagPoint, Point, PointId, Ugly}; +use crate::models::{DagPoint, PointId, Ugly}; pub struct Uploader { log_id: Arc, requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, - top_dag_round: watch::Receiver, + top_dag_round: Arc>, } impl Uploader { pub fn new( log_id: Arc, requests: mpsc::UnboundedReceiver<(PointId, oneshot::Sender)>, - top_dag_round: watch::Receiver, + top_dag_round: Arc>, ) -> Self { Self { log_id, @@ -28,10 +28,14 @@ impl Uploader { pub async fn run(mut self) -> ! { while let Some((point_id, callback)) = self.requests.recv().await { - let found = self.find(&point_id).await.map(|p| p.deref().clone()); + let found = match self.find(&point_id).await { + // uploader must hide points that it accounts as not eligible for signature + Some(shared) => shared.await.0.into_trusted().map(|trusted| trusted.point), + None => None, + }; if let Err(_) = callback.send(PointByIdResponse(found)) { - tracing::warn!( - "{} Uploader result channel closed for {:?}, requester's downloader timed out ? ", + tracing::debug!( + "{} Uploader result channel closed, requester {:?} cancelled download", self.log_id, point_id.ugly() ); @@ -40,27 +44,19 @@ impl Uploader { panic!("Uploader incoming channel closed") } - async fn find(&self, point_id: &PointId) -> Option> { - let top_dag_round = self.top_dag_round.borrow().clone(); + // Note: drops strong ref to DagRound as soon as possible - to let it be gone with weak ones + async fn find(&self, point_id: &PointId) -> Option>> { + let top_dag_round = { + let read = self.top_dag_round.read().await; + read.clone() + }; if &point_id.location.round > top_dag_round.round() { return None; } - let shared = top_dag_round - .scan(&point_id.location.round) - .map(|dag_round| { - dag_round - .view(&point_id.location.author, |loc| { - loc.versions().get(&point_id.digest).cloned() - }) - .flatten() - }) - .flatten()?; - // keep such matching private to Uploader, it must not be used elsewhere - match shared.await { - (DagPoint::Trusted(valid), _) => Some(valid.point), - (DagPoint::Suspicious(valid), _) => Some(valid.point), - (DagPoint::Invalid(invalid), _) => Some(invalid), - (DagPoint::NotExists(_), _) => None, - } + top_dag_round + .scan(&point_id.location.round)? + .view(&point_id.location.author, |loc| { + loc.versions().get(&point_id.digest).cloned() + })? } } diff --git a/consensus/src/intercom/dto.rs b/consensus/src/intercom/dto.rs index 3f3057183..033b1bca9 100644 --- a/consensus/src/intercom/dto.rs +++ b/consensus/src/intercom/dto.rs @@ -1,9 +1,29 @@ -use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::models::{Point, Signature}; -#[derive(Serialize, Deserialize, Debug)] -pub struct PointByIdResponse(pub Option); +#[derive(Debug)] +pub struct PointByIdResponse(pub Option>); +impl Serialize for PointByIdResponse { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.0.as_deref().serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for PointByIdResponse { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let opt = Option::::deserialize(deserializer)?; + Ok(PointByIdResponse(opt.map(|point| Arc::new(point)))) + } +} #[derive(Serialize, Deserialize, PartialEq, Debug)] pub enum SignatureResponse { diff --git a/consensus/src/intercom/peer_schedule/peer_schedule.rs b/consensus/src/intercom/peer_schedule/peer_schedule.rs index 01fe4536f..e3e3b1315 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule.rs @@ -115,6 +115,11 @@ impl PeerSchedule { inner.all_resolved(self.local_id()) } + pub fn is_resolved(&self, peer_id: &PeerId) -> bool { + let inner = self.inner.lock(); + inner.is_resolved(peer_id) + } + pub fn peers_for(&self, round: &Round) -> Arc> { let inner = self.inner.lock(); inner.peers_for_index_plus_one(inner.index_plus_one(round)) @@ -285,4 +290,13 @@ impl PeerScheduleInner { .map(|(peer_id, _)| *peer_id) .collect() } + + fn is_resolved(&self, peer_id: &PeerId) -> bool { + // used only in Downloader, such order fits its needs + self.peers_resolved[0] + .get(peer_id) + .or_else(|| self.peers_resolved[2].get(peer_id)) + .or_else(|| self.peers_resolved[1].get(peer_id)) + .map_or(false, |state| *state == PeerState::Resolved) + } } diff --git a/consensus/src/models/dag_point.rs b/consensus/src/models/dag_point.rs index 56e26c858..c2a0df445 100644 --- a/consensus/src/models/dag_point.rs +++ b/consensus/src/models/dag_point.rs @@ -41,6 +41,13 @@ impl DagPoint { } } + pub fn into_trusted(self) -> Option { + match self { + DagPoint::Trusted(valid) => Some(valid), + _ => None, + } + } + pub fn valid(&self) -> Option<&'_ ValidPoint> { match self { DagPoint::Trusted(valid) => Some(valid), diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index 4a74285da..d40a2969f 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -1,6 +1,7 @@ use std::collections::BTreeMap; use std::fmt::{Debug, Display, Formatter}; use std::ops::{Add, Sub}; +use std::sync::Arc; use bytes::Bytes; use everscale_crypto::ed25519::KeyPair; @@ -11,7 +12,8 @@ use tycho_network::PeerId; use crate::engine::MempoolConfig; #[derive(Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] -pub struct Digest(pub [u8; 32]); +pub struct Digest([u8; 32]); + impl Display for Digest { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let len = f.precision().unwrap_or(32); @@ -30,8 +32,18 @@ impl Debug for Digest { } } +impl Digest { + fn new(point_body: &PointBody) -> Self { + let body = bincode::serialize(&point_body).expect("shouldn't happen"); + let mut hasher = Sha256::new(); + hasher.update(body.as_slice()); + Self(hasher.finalize().into()) + } +} + #[derive(Clone, Serialize, Deserialize, PartialEq)] -pub struct Signature(pub Bytes); +pub struct Signature(Bytes); + impl Display for Signature { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let len = f.precision().unwrap_or(64); @@ -49,6 +61,23 @@ impl Debug for Signature { } } +impl Signature { + pub fn new(local_keypair: &KeyPair, digest: &Digest) -> Self { + let sig = local_keypair.sign_raw(digest.0.as_slice()); + Self(Bytes::from(sig.to_vec())) + } + + pub fn verifies(&self, signer: &PeerId, digest: &Digest) -> bool { + let sig_raw: Result<[u8; 64], _> = self.0.to_vec().try_into(); + sig_raw + .ok() + .zip(signer.as_public_key()) + .map_or(false, |(sig_raw, pub_key)| { + pub_key.verify_raw(digest.0.as_slice(), &sig_raw) + }) + } +} + #[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Round(pub u32); @@ -173,44 +202,31 @@ pub struct PointBody { pub anchor_proof: Link, } -impl PointBody { - pub fn wrap(self, local_keypair: &KeyPair) -> Point { - assert_eq!( - self.location.author, - PeerId::from(local_keypair.public_key), - "produced point author must match local key pair" - ); - let body = bincode::serialize(&self).expect("shouldn't happen"); - let sig = local_keypair.sign_raw(body.as_slice()); - let mut hasher = Sha256::new(); - hasher.update(body.as_slice()); - hasher.update(sig.as_slice()); - let digest = Digest(hasher.finalize().into()); - Point { - body: self, - signature: Signature(Bytes::from(sig.to_vec())), - digest, - } - } - - pub fn sign(&self, local_keypair: &KeyPair) -> Signature { - let body = bincode::serialize(&self).expect("shouldn't happen"); - let sig = local_keypair.sign_raw(body.as_slice()); - Signature(Bytes::from(sig.to_vec())) - } -} - // Todo: Arc => Point(Arc<...{...}>) #[derive(Clone, Serialize, Deserialize, Debug)] pub struct Point { pub body: PointBody, - // author's signature for the body - pub signature: Signature, - // hash of both data and author's signature + // hash of the point's body (includes author peer id) pub digest: Digest, + // author's signature for the digest + pub signature: Signature, } impl Point { + pub fn new(local_keypair: &KeyPair, point_body: PointBody) -> Arc { + assert_eq!( + point_body.location.author, + PeerId::from(local_keypair.public_key), + "produced point author must match local key pair" + ); + let digest = Digest::new(&point_body); + Arc::new(Point { + body: point_body, + signature: Signature::new(local_keypair, &digest), + digest, + }) + } + pub fn id(&self) -> PointId { PointId { location: self.body.location.clone(), @@ -235,17 +251,9 @@ impl Point { /// blame every dependent point author and the sender of this point, /// do not use the author from point's body pub fn is_integrity_ok(&self) -> bool { - let pubkey = self.body.location.author.as_public_key(); - let body = bincode::serialize(&self.body).ok(); - let sig: Result<[u8; 64], _> = self.signature.0.to_vec().try_into(); - let Some(((pubkey, body), sig)) = pubkey.zip(body).zip(sig.ok()) else { - return false; - }; - let mut hasher = Sha256::new(); - hasher.update(body.as_slice()); - hasher.update(sig.as_slice()); - let digest = Digest(hasher.finalize().into()); - pubkey.verify_raw(body.as_slice(), &sig) && digest == self.digest + self.signature + .verifies(&self.body.location.author, &self.digest) + && self.digest == Digest::new(&self.body) } /// blame author and every dependent point's author diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index 7c0434b3c..c4eaab943 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -1,9 +1,7 @@ -use std::net::ToSocketAddrs; use std::sync::Arc; use everscale_crypto::ed25519::{KeyPair, PublicKey, SecretKey}; use tokio::sync::mpsc::UnboundedReceiver; -use tokio::task::JoinHandle; use tycho_network::{ Address, DhtClient, DhtConfig, DhtService, Network, NetworkConfig, OverlayService, PeerId, PeerInfo, Router, ToSocket, @@ -16,10 +14,10 @@ use crate::models::{Link, Location, Point, PointBody, UnixTime}; const GENESIS_SECRET_KEY_BYTES: [u8; 32] = [0xAE; 32]; const GENESIS_MILLIS: u64 = 1713225727398; -pub fn genesis() -> Point { +pub fn genesis() -> Arc { let genesis_keys = KeyPair::from(&SecretKey::from_bytes(GENESIS_SECRET_KEY_BYTES)); - PointBody { + Point::new(&genesis_keys, PointBody { location: Location { round: MempoolConfig::GENESIS_ROUND, author: genesis_keys.public_key.into(), @@ -31,8 +29,7 @@ pub fn genesis() -> Point { witness: Default::default(), anchor_trigger: Link::ToSelf, anchor_proof: Link::ToSelf, - } - .wrap(&genesis_keys) + }) } pub fn make_peer_info(keypair: Arc, address: Address, ttl: Option) -> PeerInfo { @@ -87,123 +84,136 @@ pub fn from_validator( (dht_service.make_client(&network), overlay_service) } -pub fn drain_anchors( - mut committed: UnboundedReceiver<(Arc, Vec>)>, -) -> JoinHandle<()> { - tokio::spawn(async move { - loop { - _ = committed - .recv() - .await - .expect("committed anchor reader must be alive"); - } - }) +pub async fn drain_anchors(mut committed: UnboundedReceiver<(Arc, Vec>)>) { + loop { + _ = committed + .recv() + .await + .expect("committed anchor reader must be alive"); + } } #[cfg(test)] mod tests { use std::net::Ipv4Addr; use std::sync::Arc; - use std::thread; use std::time::Duration; use parking_lot::deadlock; use tokio::sync::mpsc; - use tokio::task::JoinSet; use super::*; use crate::engine::Engine; - async fn make_network(node_count: usize) -> Vec { - let secret_key = SecretKey::generate(&mut rand::thread_rng()); + #[global_allocator] + static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + + fn make_network( + node_count: usize, + workers_per_node: usize, + ) -> Vec> { let keys = (0..node_count) - .map(|_| Arc::new(KeyPair::from(&secret_key))) + .map(|_| SecretKey::generate(&mut rand::thread_rng())) .collect::>(); let all_peers = keys .iter() - .map(|s| PeerId::from(s.public_key)) + .map(|s| PeerId::from(KeyPair::from(s).public_key)) .collect::>(); - let from_validators = keys + let addresses = keys .iter() - .map(|key_pair| { - from_validator( - (Ipv4Addr::LOCALHOST, 0), - &secret_key, - DhtConfig { - local_info_announce_period: Duration::from_secs(1), - local_info_announce_period_max_jitter: Duration::from_secs(1), - routing_table_refresh_period: Duration::from_secs(1), - routing_table_refresh_period_max_jitter: Duration::from_secs(1), - ..Default::default() - }, - NetworkConfig::default(), - ) + .map(|_| { + std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)) + .expect("bind udp socket") + .local_addr() + .expect("local address") + .into() }) - .collect::>(); + .collect::>(); - let peer_info = std::iter::zip(&keys, &from_validators) - .map(|(key, (dht_client, _))| { - Arc::new(make_peer_info( - key.clone(), - dht_client.network().local_addr().into(), - None, - )) - }) + let peer_info = keys + .iter() + .zip(addresses.iter()) + .map(|(key, addr)| Arc::new(make_peer_info(key, addr.clone(), None))) .collect::>(); - for (dht_client, _) in from_validators.iter() { - for info in &peer_info { - if info.id == dht_client.network().peer_id() { - continue; - } - dht_client - .add_peer(info.clone()) - .expect("add peer to dht client"); - } - } - let mut engines = vec![]; - let (committed_tx, committed_rx) = mpsc::unbounded_channel(); - for (key_pair, (dht_client, overlay_service)) in - keys.into_iter().zip(from_validators.iter()) + let mut handles = vec![]; + for ((secret_key, address), peer_id) in keys + .into_iter() + .zip(addresses.into_iter()) + .zip(peer_info.iter().map(|p| p.id)) { - let engine = Engine::new( - key_pair.clone(), - &dht_client, - &overlay_service, - committed_tx.clone(), - ) - .await; - tracing::info!("created engine {}", dht_client.network().peer_id()); - engines.push(engine); + let all_peers = all_peers.clone(); + let peer_info = peer_info.clone(); + let handle = std::thread::spawn(move || { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .worker_threads(workers_per_node) + .thread_name(format!("tokio-runtime-{peer_id:.4?}")) + .build() + .expect("new tokio runtime") + .block_on(async move { + let (dht_client, overlay_service) = from_validator( + address, + &secret_key, + DhtConfig { + local_info_announce_period: Duration::from_secs(1), + local_info_announce_period_max_jitter: Duration::from_secs(1), + routing_table_refresh_period: Duration::from_secs(1), + routing_table_refresh_period_max_jitter: Duration::from_secs(1), + ..Default::default() + }, + NetworkConfig::default(), + ); + for info in &peer_info { + if info.id != dht_client.network().peer_id() { + dht_client + .add_peer(info.clone()) + .expect("add peer to dht client"); + } + } + + let (committed_tx, committed_rx) = mpsc::unbounded_channel(); + tokio::spawn(drain_anchors(committed_rx)); + let engine = Engine::new( + &secret_key, + &dht_client, + &overlay_service, + &all_peers, + committed_tx.clone(), + ) + .await; + tracing::info!("created engine {}", dht_client.network().peer_id()); + engine.run().await; + }); + }); + handles.push(handle); } - drain_anchors(committed_rx); - - engines + handles } - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] - async fn engine_works() -> Result<(), ()> { + #[test] + fn engine_works() -> Result<(), anyhow::Error> { // tracing_subscriber::fmt::try_init().ok(); // tracing::info!("engine_works"); - tycho_util::test::init_logger("engine_works", "info,tycho_consensus=debug"); + tycho_util::test::init_logger( + "engine_works", + "info,tycho_consensus=info,tycho_network=info", + ); - check_parking_lot(); + // check_parking_lot(); heart_beat(); - let mut js = JoinSet::new(); - for engine in make_network(4).await { - js.spawn(engine.run()); - } - while let Some(res) = js.join_next().await { - res.unwrap(); + let handles = make_network(21, 2); + for handle in handles { + handle.join().unwrap(); } Ok(()) } pub fn check_parking_lot() { - thread::spawn(move || loop { - thread::sleep(Duration::from_secs(10)); + std::thread::spawn(move || loop { + std::thread::sleep(Duration::from_secs(10)); let deadlocks = deadlock::check_deadlock(); if deadlocks.is_empty() { continue; @@ -222,8 +232,8 @@ mod tests { pub fn heart_beat() { // Create a background thread which checks for deadlocks every 10s - thread::spawn(move || loop { - thread::sleep(Duration::from_secs(1)); + std::thread::spawn(move || loop { + std::thread::sleep(Duration::from_secs(1)); tracing::info!("heart beat"); }); } From e32c0cf3c458abe957e75ea1354b17b1b5f46e7b Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 10 May 2024 20:51:19 +0300 Subject: [PATCH 31/32] feat(consensus): init engine outside of constructor --- collator/src/mempool/mempool_adapter.rs | 7 +- consensus/examples/consensus_node.rs | 7 +- consensus/src/dag/dag.rs | 14 +-- consensus/src/dag/dag_round.rs | 16 +++- consensus/src/dag/verifier.rs | 2 +- consensus/src/engine/engine.rs | 86 +++++++++++-------- .../intercom/broadcast/broadcast_filter.rs | 2 +- consensus/src/intercom/broadcast/collector.rs | 14 +-- consensus/src/intercom/core/dispatcher.rs | 2 - .../peer_schedule/peer_schedule_updater.rs | 74 ++++++++++------ consensus/src/models/point.rs | 6 +- consensus/src/test_utils.rs | 18 ++-- network/src/overlay/private_overlay.rs | 7 ++ 13 files changed, 157 insertions(+), 98 deletions(-) diff --git a/collator/src/mempool/mempool_adapter.rs b/collator/src/mempool/mempool_adapter.rs index ed74222f6..2c16b8527 100644 --- a/collator/src/mempool/mempool_adapter.rs +++ b/collator/src/mempool/mempool_adapter.rs @@ -128,9 +128,10 @@ impl MempoolAdapterStdImpl { tokio::sync::mpsc::unbounded_channel::<(Arc, Vec>)>(); tokio::spawn(async move { - let engine = - tycho_consensus::Engine::new(key_pair, &dht_client, &overlay_service, sender).await; - + let mut engine = + tycho_consensus::Engine::new(key_pair, &dht_client, &overlay_service, sender); + // TODO replace with some sensible init before run + engine.init_with_genesis(&[]).await; engine.run().await; }); diff --git a/consensus/examples/consensus_node.rs b/consensus/examples/consensus_node.rs index 1316205d5..a8b70b64c 100644 --- a/consensus/examples/consensus_node.rs +++ b/consensus/examples/consensus_node.rs @@ -129,7 +129,8 @@ impl CmdRun { } let (committed_tx, committed_rx) = mpsc::unbounded_channel(); - let engine = Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx).await; + let mut engine = Engine::new(key_pair.clone(), &dht_client, &overlay, committed_tx); + engine.init_with_genesis(all_peers.as_slice()).await; tokio::spawn(drain_anchors(committed_rx)); tracing::info!( @@ -187,9 +188,9 @@ struct CmdGenDht { impl CmdGenDht { fn run(self) -> Result<()> { let secret_key = parse_key(&self.key)?; - let key_pair = Arc::new(KeyPair::from(&secret_key)); + let key_pair = KeyPair::from(&secret_key); let entry = - tycho_consensus::test_utils::make_peer_info(key_pair, self.addr.into(), self.ttl); + tycho_consensus::test_utils::make_peer_info(&key_pair, self.addr.into(), self.ttl); let output = if std::io::stdin().is_terminal() { serde_json::to_string_pretty(&entry) } else { diff --git a/consensus/src/dag/dag.rs b/consensus/src/dag/dag.rs index 60a94bcde..dc7846c82 100644 --- a/consensus/src/dag/dag.rs +++ b/consensus/src/dag/dag.rs @@ -21,18 +21,22 @@ pub struct Dag { } impl Dag { - pub fn new(dag_round: DagRound) -> Self { - let mut rounds = BTreeMap::new(); - rounds.insert(dag_round.round().clone(), dag_round); + pub fn new() -> Self { Self { - rounds: Arc::new(Mutex::new(rounds)), + rounds: Arc::new(Mutex::new(BTreeMap::new())), } } + pub fn init(&self, dag_round: DagRound) { + let mut rounds = self.rounds.lock(); + assert!(rounds.is_empty(), "DAG already initialized"); + rounds.insert(dag_round.round().clone(), dag_round); + } + pub fn top(&self, round: &Round, peer_schedule: &PeerSchedule) -> DagRound { let mut rounds = self.rounds.lock(); let mut top = match rounds.last_key_value() { - None => unreachable!("DAG cannot be empty"), + None => unreachable!("DAG cannot be empty if properly initialized?"), Some((_, top)) => top.clone(), }; if (top.round().0 + MempoolConfig::COMMIT_DEPTH as u32) < round.0 { diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index 6881dbfb7..a7e9a9aa1 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -37,6 +37,18 @@ impl WeakDagRound { } impl DagRound { + /// stub that must remain unlinked into DAG chain and only to be replaced + pub fn unusable() -> Self { + Self(Arc::new(DagRoundInner { + round: Round::BOTTOM, + node_count: NodeCount::GENESIS, + key_pair: None, + anchor_stage: None, + locations: FastDashMap::default(), + prev: WeakDagRound::BOTTOM, + })) + } + pub fn new(round: Round, peer_schedule: &PeerSchedule, prev: WeakDagRound) -> Self { let peers = peer_schedule.peers_for(&round); let locations = FastDashMap::with_capacity_and_hasher(peers.len(), Default::default()); @@ -195,8 +207,8 @@ impl DagRound { panic!("Coding error: malformed point") } let point = Verifier::validate(point.clone(), self.clone(), downloader.clone()).await; - if point.valid().is_none() { - panic!("Coding error: not a valid point") + if point.trusted().is_none() { + panic!("Coding error: not a trusted point") } let state = self.insert_exact(&point)?.await; if let Some(signable) = state.signable() { diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 035d1ab8b..1dc2b2c69 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -368,7 +368,7 @@ impl Verifier { /// blame author and every dependent point's author fn is_proof_ok( point: &Point, // @ r+0 - proven: &Point, // @ r-1 + proven: &Point, // @ r-1 ) -> bool { if point.body.location.author != proven.body.location.author { panic!("Coding error: mismatched authors of proof and its vertex") diff --git a/consensus/src/engine/engine.rs b/consensus/src/engine/engine.rs index c4ab32f54..d036c2786 100644 --- a/consensus/src/engine/engine.rs +++ b/consensus/src/engine/engine.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use everscale_crypto::ed25519::{KeyPair, SecretKey}; +use everscale_crypto::ed25519::KeyPair; use itertools::Itertools; use tokio::sync::mpsc::UnboundedSender; use tokio::sync::{mpsc, oneshot, RwLock}; @@ -19,6 +19,7 @@ pub struct Engine { log_id: Arc, dag: Dag, peer_schedule: Arc, + peer_schedule_updater: PeerScheduleUpdater, dispatcher: Dispatcher, downloader: Downloader, broadcaster: Broadcaster, @@ -30,7 +31,7 @@ pub struct Engine { } impl Engine { - pub async fn new( + pub fn new( key_pair: Arc, dht_client: &DhtClient, overlay_service: &OverlayService, @@ -51,7 +52,6 @@ impl Engine { let dispatcher = Dispatcher::new( &dht_client, &overlay_service, - &[], // TODO: FIX PEERS Responder::new( log_id.clone(), broadcast_filter.clone(), @@ -61,38 +61,21 @@ impl Engine { ); let broadcaster = Broadcaster::new(log_id.clone(), &dispatcher); - let genesis = crate::test_utils::genesis(); - // check only genesis round as it is widely used in point validation. - // if some nodes use distinct genesis data, their first points will be rejected - assert_eq!( - genesis.body.location.round, - MempoolConfig::GENESIS_ROUND, - "genesis point round must match genesis round from config" - ); let peer_schedule_updater = PeerScheduleUpdater::new(dispatcher.overlay.clone(), peer_schedule.clone()); - // finished epoch - peer_schedule.set_next_start(genesis.body.location.round); - peer_schedule_updater.set_next_peers(&vec![genesis.body.location.author]); - peer_schedule.rotate(); - // current epoch - peer_schedule.set_next_start(genesis.body.location.round.next()); - // start updater only after peers are populated into schedule - peer_schedule_updater.set_next_peers(&[]); // TODO: FIX PEERS - peer_schedule.rotate(); - - let current_dag_round = DagRound::genesis(&genesis, &peer_schedule); - let dag = Dag::new(current_dag_round.clone()); - let top_dag_round = Arc::new(RwLock::new(current_dag_round.clone())); + let top_dag_round = Arc::new(RwLock::new(DagRound::unusable())); let mut tasks = JoinSet::new(); let uploader = Uploader::new(log_id.clone(), uploader_rx, top_dag_round.clone()); tasks.spawn(async move { uploader.run().await; }); - tasks.spawn(async move { - peer_schedule_updater.run().await; + tasks.spawn({ + let peer_schedule_updater = peer_schedule_updater.clone(); + async move { + peer_schedule_updater.run().await; + } }); tasks.spawn({ let broadcast_filter = broadcast_filter.clone(); @@ -103,22 +86,13 @@ impl Engine { let downloader = Downloader::new(log_id.clone(), &dispatcher, &peer_schedule); - let genesis_state = current_dag_round - .insert_exact_sign(&genesis, &peer_schedule, &downloader) - .await; - let collector = Collector::new( - log_id.clone(), - &downloader, - bcast_rx, - sig_responses, - genesis_state.into_iter(), - current_dag_round.round().next(), - ); + let collector = Collector::new(log_id.clone(), &downloader, bcast_rx, sig_responses); Self { log_id, - dag, + dag: Dag::new(), peer_schedule, + peer_schedule_updater, dispatcher, downloader, broadcaster, @@ -130,6 +104,42 @@ impl Engine { } } + pub async fn init_with_genesis(&mut self, next_peers: &[PeerId]) { + let genesis = crate::test_utils::genesis(); + assert!( + genesis.body.location.round > *self.top_dag_round.read().await.round(), + "genesis point round is too low" + ); + // check only genesis round as it is widely used in point validation. + // if some nodes use distinct genesis data, their first points will be rejected + assert_eq!( + genesis.body.location.round, + MempoolConfig::GENESIS_ROUND, + "genesis point round must match genesis round from config" + ); + // finished epoch + self.peer_schedule + .set_next_start(genesis.body.location.round); + self.peer_schedule_updater + .set_next_peers(&vec![genesis.body.location.author], false); + self.peer_schedule.rotate(); + // current epoch + self.peer_schedule + .set_next_start(genesis.body.location.round.next()); + // start updater only after peers are populated into schedule + self.peer_schedule_updater.set_next_peers(next_peers, true); + self.peer_schedule.rotate(); + + let current_dag_round = DagRound::genesis(&genesis, &self.peer_schedule); + self.dag.init(current_dag_round.clone()); + + let genesis_state = current_dag_round + .insert_exact_sign(&genesis, &self.peer_schedule, &self.downloader) + .await; + self.collector + .init(current_dag_round.round().next(), genesis_state.into_iter()); + } + pub async fn run(mut self) -> ! { let mut prev_point: Option> = None; let mut produce_own_point = true; diff --git a/consensus/src/intercom/broadcast/broadcast_filter.rs b/consensus/src/intercom/broadcast/broadcast_filter.rs index e958e189a..55b72f64b 100644 --- a/consensus/src/intercom/broadcast/broadcast_filter.rs +++ b/consensus/src/intercom/broadcast/broadcast_filter.rs @@ -27,7 +27,7 @@ impl BroadcastFilter { log_id, last_by_peer: Default::default(), by_round: Default::default(), - current_dag_round: Default::default(), // will advance with other peers + current_dag_round: AtomicU32::new(Round::BOTTOM.0), // will advance with other peers peer_schedule, output, })) diff --git a/consensus/src/intercom/broadcast/collector.rs b/consensus/src/intercom/broadcast/collector.rs index df29f4b7e..c8e46fbe4 100644 --- a/consensus/src/intercom/broadcast/collector.rs +++ b/consensus/src/intercom/broadcast/collector.rs @@ -39,21 +39,23 @@ impl Collector { downloader: &Downloader, from_bcast_filter: mpsc::UnboundedReceiver, signature_requests: mpsc::UnboundedReceiver, - next_includes: impl Iterator, - next_round: Round, ) -> Self { Self { log_id, downloader: downloader.clone(), from_bcast_filter, signature_requests, - next_round, - next_includes: FuturesUnordered::from_iter( - next_includes.map(|a| futures_util::future::ready(a).boxed()), - ), + next_round: Round::BOTTOM, + next_includes: FuturesUnordered::new(), } } + pub fn init(&mut self, next_round: Round, next_includes: impl Iterator) { + self.next_round = next_round; + self.next_includes + .extend(next_includes.map(|a| futures_util::future::ready(a).boxed())); + } + pub async fn run( mut self, next_dag_round: DagRound, // r+1 diff --git a/consensus/src/intercom/core/dispatcher.rs b/consensus/src/intercom/core/dispatcher.rs index b123c4bac..a3a5a45bd 100644 --- a/consensus/src/intercom/core/dispatcher.rs +++ b/consensus/src/intercom/core/dispatcher.rs @@ -19,7 +19,6 @@ impl Dispatcher { pub fn new( dht_client: &DhtClient, overlay_service: &OverlayService, - all_peers: &[PeerId], responder: Responder, ) -> Self { let dht_service = dht_client.service(); @@ -27,7 +26,6 @@ impl Dispatcher { let private_overlay = PrivateOverlay::builder(Self::PRIVATE_OVERLAY_ID) .with_peer_resolver(peer_resolver) - .with_entries(all_peers) .build(responder); overlay_service.add_private_overlay(&private_overlay); diff --git a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs index 6c7c01e33..c9fb6be3c 100644 --- a/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs +++ b/consensus/src/intercom/peer_schedule/peer_schedule_updater.rs @@ -1,11 +1,17 @@ +use std::future::Future; use std::sync::Arc; +use futures_util::stream::FuturesUnordered; use futures_util::StreamExt; use parking_lot::Mutex; -use rand::prelude::IteratorRandom; +use rand::prelude::SmallRng; +use rand::SeedableRng; use tokio::sync::broadcast::error::RecvError; use tokio::task::AbortHandle; -use tycho_network::{PeerId, PrivateOverlay, PrivateOverlayEntriesEvent}; +use tycho_network::{ + KnownPeerHandle, PeerId, PrivateOverlay, PrivateOverlayEntriesEvent, + PrivateOverlayEntriesReadGuard, PrivateOverlayEntriesWriteGuard, +}; use crate::intercom::PeerSchedule; @@ -27,40 +33,54 @@ impl PeerScheduleUpdater { pub async fn run(self) -> ! { tracing::info!("started peer schedule updater"); - self.respawn_resolve_task(); + self.respawn_resolve_task(self.resolved_waiters(self.overlay.read_entries())); self.listen().await } - pub fn set_next_peers(&self, peers: &[PeerId]) { - self.peer_schedule.set_next_peers(&peers, &self.overlay) + pub fn set_next_peers(&self, peers: &[PeerId], update_overlay: bool) { + if update_overlay { + let mut entries: PrivateOverlayEntriesWriteGuard<'_> = self.overlay.write_entries(); + for peer_id in peers { + entries.insert(peer_id); + } + self.respawn_resolve_task(self.resolved_waiters(entries.downgrade())); + } + self.peer_schedule.set_next_peers(&peers, &self.overlay); } - fn respawn_resolve_task(&self) { + fn resolved_waiters( + &self, + entries: PrivateOverlayEntriesReadGuard<'_>, + ) -> FuturesUnordered + Sized + Send + 'static> { let local_id = self.peer_schedule.local_id(); - tracing::info!("{local_id:.4?} respawn_resolve_task"); - let mut fut = futures_util::stream::FuturesUnordered::new(); - { - // Note: set_next_peers() and respawn_resolve_task() will not deadlock - // although peer_schedule.inner is locked in two opposite orders - // because only read read lock on overlay entries is taken - let entries = self.overlay.read_entries(); - for entry in entries - .iter() - .choose_multiple(&mut rand::thread_rng(), entries.len()) - { - // skip updates on self - if !(entry.peer_id == local_id || entry.resolver_handle.is_resolved()) { - let handle = entry.resolver_handle.clone(); - fut.push(async move { handle.wait_resolved().await }); - } + let fut = FuturesUnordered::new(); + // Note: set_next_peers() and respawn_resolve_task() will not deadlock + // although peer_schedule.inner is locked in two opposite orders + // because only read read lock on overlay entries is taken + for entry in entries.choose_multiple(&mut SmallRng::from_entropy(), entries.len()) { + // skip updates on self + if !(entry.peer_id == local_id || entry.resolver_handle.is_resolved()) { + let handle = entry.resolver_handle.clone(); + fut.push(async move { handle.wait_resolved().await }); } - }; - let new_abort_handle = if fut.is_empty() { + } + fut + } + + fn respawn_resolve_task( + &self, + mut resolved_waiters: FuturesUnordered< + impl Future + Sized + Send + 'static, + >, + ) { + let local_id = self.peer_schedule.local_id(); + tracing::info!("{local_id:.4?} respawn_resolve_task"); + let new_abort_handle = if resolved_waiters.is_empty() { None } else { let peer_schedule = self.peer_schedule.clone(); let join = tokio::spawn(async move { - while let Some(known_peer_handle) = fut.next().await { + while let Some(known_peer_handle) = resolved_waiters.next().await { _ = peer_schedule.set_resolved(&known_peer_handle.peer_info().id, true); } }); @@ -83,7 +103,9 @@ impl PeerScheduleUpdater { tracing::info!("{local_id:.4?} got {event:?}"); if self.peer_schedule.set_resolved(&node, false) { // respawn resolve task with fewer peers to await - self.respawn_resolve_task(); + self.respawn_resolve_task( + self.resolved_waiters(self.overlay.read_entries()), + ); } else { tracing::info!("{local_id:.4?} Skipped {event:?}"); } diff --git a/consensus/src/models/point.rs b/consensus/src/models/point.rs index d40a2969f..ce850896f 100644 --- a/consensus/src/models/point.rs +++ b/consensus/src/models/point.rs @@ -82,13 +82,15 @@ impl Signature { pub struct Round(pub u32); impl Round { - pub fn prev(&self) -> Round { + /// stub that cannot be used even by genesis round + pub const BOTTOM: Self = Self(0); + pub fn prev(&self) -> Self { self.0 .checked_sub(1) .map(Round) .expect("DAG round number underflow, fix dag initial configuration") } - pub fn next(&self) -> Round { + pub fn next(&self) -> Self { self.0 .checked_add(1) .map(Round) diff --git a/consensus/src/test_utils.rs b/consensus/src/test_utils.rs index c4eaab943..04ed55e5f 100644 --- a/consensus/src/test_utils.rs +++ b/consensus/src/test_utils.rs @@ -32,7 +32,7 @@ pub fn genesis() -> Arc { }) } -pub fn make_peer_info(keypair: Arc, address: Address, ttl: Option) -> PeerInfo { +pub fn make_peer_info(keypair: &KeyPair, address: Address, ttl: Option) -> PeerInfo { let peer_id = PeerId::from(keypair.public_key); let now = now_sec(); @@ -114,11 +114,12 @@ mod tests { ) -> Vec> { let keys = (0..node_count) .map(|_| SecretKey::generate(&mut rand::thread_rng())) + .map(|secret| (secret, Arc::new(KeyPair::from(&secret)))) .collect::>(); let all_peers = keys .iter() - .map(|s| PeerId::from(KeyPair::from(s).public_key)) + .map(|(_, kp)| PeerId::from(kp.public_key)) .collect::>(); let addresses = keys @@ -135,11 +136,11 @@ mod tests { let peer_info = keys .iter() .zip(addresses.iter()) - .map(|(key, addr)| Arc::new(make_peer_info(key, addr.clone(), None))) + .map(|((_, key_pair), addr)| Arc::new(make_peer_info(key_pair, addr.clone(), None))) .collect::>(); let mut handles = vec![]; - for ((secret_key, address), peer_id) in keys + for (((secret_key, key_pair), address), peer_id) in keys .into_iter() .zip(addresses.into_iter()) .zip(peer_info.iter().map(|p| p.id)) @@ -176,14 +177,13 @@ mod tests { let (committed_tx, committed_rx) = mpsc::unbounded_channel(); tokio::spawn(drain_anchors(committed_rx)); - let engine = Engine::new( - &secret_key, + let mut engine = Engine::new( + key_pair, &dht_client, &overlay_service, - &all_peers, committed_tx.clone(), - ) - .await; + ); + engine.init_with_genesis(all_peers.as_slice()).await; tracing::info!("created engine {}", dht_client.network().peer_id()); engine.run().await; }); diff --git a/network/src/overlay/private_overlay.rs b/network/src/overlay/private_overlay.rs index 0b3cc84b0..5b9a6d56f 100644 --- a/network/src/overlay/private_overlay.rs +++ b/network/src/overlay/private_overlay.rs @@ -330,6 +330,13 @@ impl std::ops::DerefMut for PrivateOverlayEntriesWriteGuard<'_> { } } +impl<'a> PrivateOverlayEntriesWriteGuard<'a> { + pub fn downgrade(self) -> PrivateOverlayEntriesReadGuard<'a> { + let entries = RwLockWriteGuard::downgrade(self.entries); + PrivateOverlayEntriesReadGuard { entries } + } +} + pub struct PrivateOverlayEntriesReadGuard<'a> { entries: RwLockReadGuard<'a, PrivateOverlayEntries>, } From 0758bf4abcb3d3bd5b3a94335200e9b1372e30c9 Mon Sep 17 00:00:00 2001 From: Kirill Mikheev Date: Fri, 10 May 2024 21:37:38 +0300 Subject: [PATCH 32/32] fix(consensus): hold less strong links to dag round --- consensus/src/dag/dag_round.rs | 11 ++-- consensus/src/dag/verifier.rs | 51 ++++++++++++------- .../src/intercom/dependency/downloader.rs | 33 ++++++------ 3 files changed, 58 insertions(+), 37 deletions(-) diff --git a/consensus/src/dag/dag_round.rs b/consensus/src/dag/dag_round.rs index a7e9a9aa1..3d7fcbf46 100644 --- a/consensus/src/dag/dag_round.rs +++ b/consensus/src/dag/dag_round.rs @@ -13,9 +13,14 @@ use crate::intercom::{Downloader, PeerSchedule}; use crate::models::{DagPoint, Digest, NodeCount, Point, PointId, Round, ValidPoint}; #[derive(Clone)] +/// Allows memory allocated by DAG to be freed pub struct WeakDagRound(Weak); #[derive(Clone)] +/// do not pass to backwards-recursive async tasks +/// (where DAG_DEPTH is just a logical limit, but is not explicitly applicable) +/// to prevent severe memory leaks of a whole DAG round +/// (in case congested tokio runtime reorders futures), use [WeakDagRound] for that pub struct DagRound(Arc); struct DagRoundInner { @@ -30,7 +35,7 @@ struct DagRoundInner { } impl WeakDagRound { - pub const BOTTOM: Self = WeakDagRound(Weak::new()); + const BOTTOM: Self = WeakDagRound(Weak::new()); pub fn get(&self) -> Option { self.0.upgrade().map(DagRound) } @@ -185,7 +190,7 @@ impl DagRound { if &point.body.location.round != self.round() { panic!("Coding error: dag round mismatches point round on add") } - let dag_round = self.clone(); + let dag_round = self.as_weak(); let digest = &point.digest; self.edit(&point.body.location.author, |loc| { let state = loc.state().clone(); @@ -206,7 +211,7 @@ impl DagRound { if !Verifier::verify(point, peer_schedule).is_ok() { panic!("Coding error: malformed point") } - let point = Verifier::validate(point.clone(), self.clone(), downloader.clone()).await; + let point = Verifier::validate(point.clone(), self.as_weak(), downloader.clone()).await; if point.trusted().is_none() { panic!("Coding error: not a trusted point") } diff --git a/consensus/src/dag/verifier.rs b/consensus/src/dag/verifier.rs index 1dc2b2c69..e2a10f229 100644 --- a/consensus/src/dag/verifier.rs +++ b/consensus/src/dag/verifier.rs @@ -5,10 +5,12 @@ use tokio::task::JoinSet; use tycho_network::PeerId; use crate::dag::anchor_stage::AnchorStage; -use crate::dag::DagRound; +use crate::dag::{DagRound, WeakDagRound}; use crate::engine::MempoolConfig; use crate::intercom::{Downloader, PeerSchedule}; -use crate::models::{DagPoint, Digest, Link, Location, NodeCount, Point, PointId, ValidPoint}; +use crate::models::{ + DagPoint, Digest, Link, Location, NodeCount, Point, PointId, Ugly, ValidPoint, +}; // Note on equivocation. // Detected point equivocation does not invalidate the point, it just @@ -46,9 +48,16 @@ impl Verifier { /// must be called iff [Self::verify] succeeded pub async fn validate( point: Arc, // @ r+0 - r_0: DagRound, // r+0 + r_0: WeakDagRound, // r+0 downloader: Downloader, ) -> DagPoint { + let Some(r_0) = r_0.get() else { + tracing::warn!( + "cannot validate {:?}, local DAG moved far forward", + point.id().ugly() + ); + return DagPoint::NotExists(Arc::new(point.id())); + }; // TODO upgrade Weak whenever used to let Dag Round drop if some future hangs up for long if &point.body.location.round != r_0.round() { panic!("Coding error: dag round mismatches point round") @@ -62,14 +71,17 @@ impl Verifier { }) { return DagPoint::Invalid(point.clone()); } - if let Some(r_1) = r_0.prev().get() { - Self::gather_deps(&point, &r_1, &downloader, &mut dependencies); - return Self::check_deps(&point, dependencies).await; - } - // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. - // Any point @ r+0 will be committed, only if it has valid proof @ r+1 - // included into valid anchor chain, i.e. validated by consensus. - DagPoint::Trusted(ValidPoint::new(point.clone())) + let Some(r_1) = r_0.prev().get() else { + // If r-1 exceeds dag depth, the arg point @ r+0 is considered valid by itself. + // Any point @ r+0 will be committed, only if it has valid proof @ r+1 + // included into valid anchor chain, i.e. validated by consensus. + return DagPoint::Trusted(ValidPoint::new(point.clone())); + }; + Self::gather_deps(&point, &r_1, &downloader, &mut dependencies); + // drop strong links before await + _ = r_0; + _ = r_1; + Self::check_deps(&point, dependencies).await } fn is_self_links_ok( @@ -128,12 +140,15 @@ impl Verifier { } !found }); - if dag_round.prev().get().map(|r| dag_round = r).is_none() { - // if links in point exceed DAG depth, consider them valid by now; - // either dependencies have more recent link and point will be invalidated later, - // or author was less successful to get fresh data and did not commit for long - // (thus keeps more history in its local Dag) - break; + match dag_round.prev().get() { + Some(r) => dag_round = r, + None => { + // if links in point exceed DAG depth, consider them valid by now; + // either dependencies have more recent link and point will be invalidated later, + // or author was less successful to get fresh data and did not commit for long + // (thus keeps more history in its local Dag) + break; + } } } // valid linked points will be in dag without this addition by recursion, @@ -174,7 +189,7 @@ impl Verifier { }, digest: digest.clone(), }; - downloader.run(point_id, round.clone(), dependant.clone()) + downloader.run(point_id, round.as_weak(), dependant.clone()) }) }); dependencies.spawn(shared.map(|(dag_point, _)| dag_point)); diff --git a/consensus/src/intercom/dependency/downloader.rs b/consensus/src/intercom/dependency/downloader.rs index f41982161..b5046ae73 100644 --- a/consensus/src/intercom/dependency/downloader.rs +++ b/consensus/src/intercom/dependency/downloader.rs @@ -11,7 +11,7 @@ use tokio::sync::{broadcast, watch}; use tycho_network::PeerId; use tycho_util::{FastHashMap, FastHashSet}; -use crate::dag::{DagRound, Verifier, WeakDagRound}; +use crate::dag::{Verifier, WeakDagRound}; use crate::engine::MempoolConfig; use crate::intercom::dto::{PeerState, PointByIdResponse}; use crate::intercom::{Dispatcher, PeerSchedule}; @@ -38,21 +38,24 @@ impl Downloader { pub async fn run( self, point_id: PointId, - point_round: DagRound, + point_round: WeakDagRound, // TODO it would be great to increase the number of dependants in-flight, // but then the DAG needs to store some sort of updatable state machine // instead of opaque Shared> dependant: PeerId, ) -> DagPoint { + let Some(point_round_temp) = point_round.get() else { + return DagPoint::NotExists(Arc::new(point_id)); + }; assert_eq!( point_id.location.round, - *point_round.round(), + *point_round_temp.round(), "point and DAG round mismatch" ); // request point from its signers (any dependant is among them as point is already verified) let all_peers = self .peer_schedule - .peers_for(&point_round.round().next()) + .peers_for(&point_round_temp.round().next()) .iter() .map(|(peer_id, state)| (*peer_id, *state)) .collect::>(); @@ -73,8 +76,9 @@ impl Downloader { .chain(iter::once(point_id.location.author)) .collect(); let (has_resolved_tx, has_resolved_rx) = watch::channel(false); + _ = point_round_temp; // do not leak strong ref across unlimited await DownloadTask { - weak_dag_round: point_round.as_weak(), + weak_dag_round: point_round, node_count, request: self.dispatcher.point_by_id_request(&point_id), point_id, @@ -193,7 +197,8 @@ impl DownloadTask { } Ok(PointByIdResponse(None)) => { if self.mandatory.remove(&peer_id) { - // it's a ban + // it's a ban in case permanent storage is used, + // the other way - peer can could have advanced on full DAG_DEPTH already tracing::error!( "{} : {peer_id:.4?} must have returned {:?}", self.parent.log_id, @@ -215,18 +220,14 @@ impl DownloadTask { self.parent.log_id ); } - let Some(dag_round) = self.weak_dag_round.get() else { - tracing::warn!( - "{} : {peer_id:.4?} no more retries, local DAG moved far forward", - self.parent.log_id - ); - // DAG could not have moved if this point was needed for commit - return Some(DagPoint::NotExists(Arc::new(self.point_id.clone()))); - }; match Verifier::verify(&point, &self.parent.peer_schedule) { Ok(()) => { - let validated = - Verifier::validate(point, dag_round, self.parent.clone()).await; + let validated = Verifier::validate( + point, + self.weak_dag_round.clone(), + self.parent.clone(), + ) + .await; if validated.trusted().is_some() { tracing::debug!( "{} : downloaded dependency {:?}",