From e659405241692feb94030a8145e0b66a1a248641 Mon Sep 17 00:00:00 2001 From: Friedel Ziegelmayer Date: Wed, 23 Oct 2024 14:10:15 +0200 Subject: [PATCH] refactor: move iroh-gossip to external repo (#2826) the source now lives in https://github.com/n0-computer/iroh-gossip --- .github/workflows/ci.yml | 4 +- .github/workflows/tests.yaml | 4 +- Cargo.lock | 7 +- Cargo.toml | 12 +- iroh-base/Cargo.toml | 2 +- iroh-blobs/Cargo.toml | 8 +- iroh-cli/Cargo.toml | 4 +- iroh-dns-server/Cargo.toml | 6 +- iroh-docs/Cargo.toml | 12 +- iroh-gossip/Cargo.toml | 67 -- iroh-gossip/README.md | 29 - iroh-gossip/examples/chat.rs | 325 --------- iroh-gossip/src/lib.rs | 15 - iroh-gossip/src/metrics.rs | 69 -- iroh-gossip/src/net.rs | 1008 ---------------------------- iroh-gossip/src/net/handles.rs | 276 -------- iroh-gossip/src/net/util.rs | 128 ---- iroh-gossip/src/proto.rs | 376 ----------- iroh-gossip/src/proto/hyparview.rs | 718 -------------------- iroh-gossip/src/proto/plumtree.rs | 878 ------------------------ iroh-gossip/src/proto/state.rs | 353 ---------- iroh-gossip/src/proto/tests.rs | 468 ------------- iroh-gossip/src/proto/topic.rs | 346 ---------- iroh-gossip/src/proto/util.rs | 470 ------------- iroh-net/Cargo.toml | 6 +- iroh/Cargo.toml | 14 +- 26 files changed, 42 insertions(+), 5563 deletions(-) delete mode 100644 iroh-gossip/Cargo.toml delete mode 100644 iroh-gossip/README.md delete mode 100644 iroh-gossip/examples/chat.rs delete mode 100644 iroh-gossip/src/lib.rs delete mode 100644 iroh-gossip/src/metrics.rs delete mode 100644 iroh-gossip/src/net.rs delete mode 100644 iroh-gossip/src/net/handles.rs delete mode 100644 iroh-gossip/src/net/util.rs delete mode 100644 iroh-gossip/src/proto.rs delete mode 100644 iroh-gossip/src/proto/hyparview.rs delete mode 100644 iroh-gossip/src/proto/plumtree.rs delete mode 100644 iroh-gossip/src/proto/state.rs delete mode 100644 iroh-gossip/src/proto/tests.rs delete mode 100644 iroh-gossip/src/proto/topic.rs delete mode 100644 iroh-gossip/src/proto/util.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5b5290d62e..127c1037f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -190,7 +190,7 @@ jobs: # uses: obi1kenobi/cargo-semver-checks-action@v2 uses: n0-computer/cargo-semver-checks-action@feat-baseline with: - package: iroh, iroh-base, iroh-blobs, iroh-cli, iroh-dns-server, iroh-gossip, iroh-metrics, iroh-net, iroh-net-bench, iroh-docs + package: iroh, iroh-base, iroh-blobs, iroh-cli, iroh-dns-server, iroh-metrics, iroh-net, iroh-net-bench, iroh-docs baseline-rev: ${{ env.HEAD_COMMIT_SHA }} use-cache: false @@ -298,7 +298,7 @@ jobs: netsim_branch: "main" sim_paths: "sims/iroh/iroh.json,sims/integration" pr_number: ${{ github.event.pull_request.number || '' }} - + docker_build_and_test: name: Docker Test if: "github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'flaky-test')" diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 7d9700bfc2..540a74f5ab 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -23,7 +23,7 @@ env: RUSTFLAGS: -Dwarnings RUSTDOCFLAGS: -Dwarnings SCCACHE_CACHE_SIZE: "50G" - CRATES_LIST: "iroh,iroh-blobs,iroh-gossip,iroh-metrics,iroh-net,iroh-net-bench,iroh-docs,iroh-test,iroh-cli,iroh-dns-server" + CRATES_LIST: "iroh,iroh-blobs,iroh-metrics,iroh-net,iroh-net-bench,iroh-docs,iroh-test,iroh-cli,iroh-dns-server" IROH_FORCE_STAGING_RELAYS: "1" jobs: @@ -218,7 +218,7 @@ jobs: env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG'}} NEXTEST_EXPERIMENTAL_LIBTEST_JSON: 1 - + - name: upload results if: ${{ failure() && inputs.flaky }} uses: actions/upload-artifact@v4 diff --git a/Cargo.lock b/Cargo.lock index 6fb8e95860..dce940306f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2732,11 +2732,12 @@ dependencies = [ [[package]] name = "iroh-gossip" version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b434d455389493ff2b2ecbab035c12eb3762f24d04080855ecd4956bf7739448" dependencies = [ "anyhow", "async-channel", "bytes", - "clap", "derive_more", "ed25519-dalek", "futures-concurrency", @@ -2747,17 +2748,13 @@ dependencies = [ "iroh-blake3", "iroh-metrics", "iroh-net", - "iroh-test", "postcard", "rand", - "rand_chacha", "rand_core", "serde", "tokio", "tokio-util", "tracing", - "tracing-subscriber", - "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4e4aeed480..e137c81d73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,6 @@ members = [ "iroh-blobs", "iroh-base", "iroh-dns-server", - "iroh-gossip", "iroh-metrics", "iroh-net", "iroh-docs", @@ -19,7 +18,7 @@ debug = true [profile.dev-ci] inherits = 'dev' -opt-level = 1 +opt-level = 1 [profile.optimized-release] inherits = 'release' @@ -46,3 +45,12 @@ unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)"] } [workspace.lints.clippy] unused-async = "warn" + +# Temporary fix for dependencies +[patch.crates-io] +iroh-base = { path = "./iroh-base" } +iroh-net = { path = "./iroh-net" } +iroh-blobs = { path = "./iroh-blobs" } +iroh-metrics = { path = "./iroh-metrics" } +iroh-docs = { path = "./iroh-docs" } +iroh-test = { path = "./iroh-test" } diff --git a/iroh-base/Cargo.toml b/iroh-base/Cargo.toml index 577e8387bb..9c00b1e9d8 100644 --- a/iroh-base/Cargo.toml +++ b/iroh-base/Cargo.toml @@ -41,7 +41,7 @@ url = { version = "2.5.0", features = ["serde"], optional = true } getrandom = { version = "0.2", default-features = false, optional = true } [dev-dependencies] -iroh-test = { path = "../iroh-test" } +iroh-test = "0.27.0" proptest = "1.0.0" serde_json = "1.0.107" serde_test = "1.0.176" diff --git a/iroh-blobs/Cargo.toml b/iroh-blobs/Cargo.toml index af83e98ea0..bd1c34edfe 100644 --- a/iroh-blobs/Cargo.toml +++ b/iroh-blobs/Cargo.toml @@ -27,10 +27,10 @@ futures-lite = "2.3" genawaiter = { version = "0.99.1", features = ["futures03"] } hashlink = { version = "0.9.0", optional = true } hex = "0.4.3" -iroh-base = { version = "0.27.0", features = ["redb"], path = "../iroh-base" } +iroh-base = { version = "0.27.0", features = ["redb"] } iroh-io = { version = "0.6.0", features = ["stats"] } -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics", default-features = false } -iroh-net = { version = "0.27.0", path = "../iroh-net" } +iroh-metrics = { version = "0.27.0", default-features = false } +iroh-net = { version = "0.27.0" } num_cpus = "1.15.0" oneshot = "0.1.8" parking_lot = { version = "0.12.1", optional = true } @@ -55,7 +55,7 @@ tracing-futures = "0.2.5" [dev-dependencies] http-body = "0.4.5" iroh-blobs = { path = ".", features = ["downloader"] } -iroh-test = { path = "../iroh-test" } +iroh-test = "0.27.0" futures-buffered = "0.2.4" proptest = "1.0.0" serde_json = "1.0.107" diff --git a/iroh-cli/Cargo.toml b/iroh-cli/Cargo.toml index 67fb5ba6d2..2fe907240f 100644 --- a/iroh-cli/Cargo.toml +++ b/iroh-cli/Cargo.toml @@ -41,8 +41,8 @@ hex = "0.4.3" human-time = "0.1.6" indicatif = { version = "0.17", features = ["tokio"] } iroh = { version = "0.27.0", path = "../iroh", features = ["metrics"] } -iroh-gossip = { version = "0.27.0", path = "../iroh-gossip" } -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics" } +iroh-gossip = "0.27.0" +iroh-metrics = { version = "0.27.0" } parking_lot = "0.12.1" pkarr = { version = "2.2.0", default-features = false } portable-atomic = "1" diff --git a/iroh-dns-server/Cargo.toml b/iroh-dns-server/Cargo.toml index eb45cd186d..94d947fe71 100644 --- a/iroh-dns-server/Cargo.toml +++ b/iroh-dns-server/Cargo.toml @@ -24,7 +24,7 @@ governor = "0.6.3" hickory-proto = "=0.25.0-alpha.2" hickory-server = { version = "=0.25.0-alpha.2", features = ["dns-over-rustls"] } http = "1.0.0" -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics" } +iroh-metrics = { version = "0.27.0" } lru = "0.12.3" mainline = "2.0.1" parking_lot = "0.12.1" @@ -53,8 +53,8 @@ z32 = "1.1.1" [dev-dependencies] hickory-resolver = "=0.25.0-alpha.2" -iroh-net = { version = "0.27.0", path = "../iroh-net" } -iroh-test = { path = "../iroh-test" } +iroh-net = { version = "0.27.0" } +iroh-test = "0.27.0" pkarr = { version = "2.2.0", features = ["rand"] } [package.metadata.docs.rs] diff --git a/iroh-docs/Cargo.toml b/iroh-docs/Cargo.toml index b9ddd299e3..c4c65c8e2a 100644 --- a/iroh-docs/Cargo.toml +++ b/iroh-docs/Cargo.toml @@ -25,11 +25,11 @@ futures-buffered = "0.2.4" futures-lite = "2.3.0" futures-util = { version = "0.3.25" } hex = "0.4" -iroh-base = { version = "0.27.0", path = "../iroh-base" } -iroh-blobs = { version = "0.27.0", path = "../iroh-blobs", optional = true, features = ["downloader"] } -iroh-gossip = { version = "0.27.0", path = "../iroh-gossip", optional = true } -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics", default-features = false } -iroh-net = { version = "0.27.0", optional = true, path = "../iroh-net" } +iroh-base = { version = "0.27.0" } +iroh-blobs = { version = "0.27.0", optional = true, features = ["downloader"] } +iroh-gossip = { version = "0.27.0", optional = true } +iroh-metrics = { version = "0.27.0", default-features = false } +iroh-net = { version = "0.27.0", optional = true } lru = "0.12" num_enum = "0.7" postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } @@ -48,7 +48,7 @@ tokio-util = { version = "0.7.12", optional = true, features = ["codec", "io-uti tracing = "0.1" [dev-dependencies] -iroh-test = { path = "../iroh-test" } +iroh-test = "0.27.0" rand_chacha = "0.3.1" tokio = { version = "1", features = ["sync", "macros"] } proptest = "1.2.0" diff --git a/iroh-gossip/Cargo.toml b/iroh-gossip/Cargo.toml deleted file mode 100644 index 50fe4846c2..0000000000 --- a/iroh-gossip/Cargo.toml +++ /dev/null @@ -1,67 +0,0 @@ -[package] -name = "iroh-gossip" -version = "0.27.0" -edition = "2021" -readme = "README.md" -description = "gossip messages over broadcast trees" -license = "MIT/Apache-2.0" -authors = ["n0 team"] -repository = "https://github.com/n0-computer/iroh" - -# Sadly this also needs to be updated in .github/workflows/ci.yml -rust-version = "1.76" - -[lints] -workspace = true - -[dependencies] -anyhow = { version = "1" } -async-channel = { version = "2.3.1", optional = true } -blake3 = { package = "iroh-blake3", version = "1.4.5"} -bytes = { version = "1.7", features = ["serde"] } -derive_more = { version = "1.0.0", features = ["add", "debug", "deref", "display", "from", "try_into", "into"] } -ed25519-dalek = { version = "2.0.0", features = ["serde", "rand_core"] } -indexmap = "2.0" -iroh-base = { version = "0.27.0", path = "../iroh-base" } -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics" } -postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } -rand = { version = "0.8.5", features = ["std_rng"] } -rand_core = "0.6.4" -serde = { version = "1.0.164", features = ["derive"] } - -# net dependencies (optional) -futures-lite = { version = "2.3", optional = true } -futures-concurrency = { version = "7.6.1", optional = true } -futures-util = { version = "0.3.30", optional = true } -iroh-net = { path = "../iroh-net", version = "0.27.0", optional = true, default-features = false } -tokio = { version = "1", optional = true, features = ["io-util", "sync", "rt", "macros", "net", "fs"] } -tokio-util = { version = "0.7.12", optional = true, features = ["codec", "rt"] } -tracing = "0.1" - -[dev-dependencies] -clap = { version = "4", features = ["derive"] } -iroh-net = { path = "../iroh-net", version = "0.27.0", default-features = false, features = ["test-utils"] } -iroh-test = { path = "../iroh-test" } -rand_chacha = "0.3.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } -url = "2.4.0" - -[features] -default = ["net"] -net = [ - "dep:futures-lite", - "dep:iroh-net", - "dep:tokio", - "dep:tokio-util", - "dep:async-channel", - "dep:futures-util", - "dep:futures-concurrency" -] - -[[example]] -name = "chat" -required-features = ["net"] - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "iroh_docsrs"] diff --git a/iroh-gossip/README.md b/iroh-gossip/README.md deleted file mode 100644 index 40da90d54f..0000000000 --- a/iroh-gossip/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# iroh-gossip - -This crate implements the `iroh-gossip` protocol. -It is based on *epidemic broadcast trees* to disseminate messages among a swarm of peers interested in a *topic*. -The implementation is based on the papers [HyParView](https://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf) and [PlumTree](https://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf). - -The crate is made up from two modules: -The `proto` module is the protocol implementation, as a state machine without any IO. -The `net` module connects the protocol to the networking stack from `iroh-net`. - -The `net` module is optional behind the `net` feature flag (enabled by default). - - -# License - -This project is licensed under either of - - * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or - http://www.apache.org/licenses/LICENSE-2.0) - * MIT license ([LICENSE-MIT](LICENSE-MIT) or - http://opensource.org/licenses/MIT) - -at your option. - -### Contribution - -Unless you explicitly state otherwise, any contribution intentionally submitted -for inclusion in this project by you, as defined in the Apache-2.0 license, -shall be dual licensed as above, without any additional terms or conditions. diff --git a/iroh-gossip/examples/chat.rs b/iroh-gossip/examples/chat.rs deleted file mode 100644 index 5d99ba10e6..0000000000 --- a/iroh-gossip/examples/chat.rs +++ /dev/null @@ -1,325 +0,0 @@ -use std::{ - collections::HashMap, - fmt, - net::{Ipv4Addr, SocketAddrV4}, - str::FromStr, -}; - -use anyhow::{bail, Context, Result}; -use bytes::Bytes; -use clap::Parser; -use ed25519_dalek::Signature; -use futures_lite::StreamExt; -use iroh_base::base32; -use iroh_gossip::{ - net::{Event, Gossip, GossipEvent, GossipReceiver, GOSSIP_ALPN}, - proto::TopicId, -}; -use iroh_net::{ - key::{PublicKey, SecretKey}, - relay::{RelayMap, RelayMode, RelayUrl}, - Endpoint, NodeAddr, -}; -use serde::{Deserialize, Serialize}; -use tracing::warn; - -/// Chat over iroh-gossip -/// -/// This broadcasts signed messages over iroh-gossip and verifies signatures -/// on received messages. -/// -/// By default a new node id is created when starting the example. To reuse your identity, -/// set the `--secret-key` flag with the secret key printed on a previous invocation. -/// -/// By default, the relay server run by n0 is used. To use a local relay server, run -/// cargo run --bin iroh-relay --features iroh-relay -- --dev -/// in another terminal and then set the `-d http://localhost:3340` flag on this example. -#[derive(Parser, Debug)] -struct Args { - /// secret key to derive our node id from. - #[clap(long)] - secret_key: Option, - /// Set a custom relay server. By default, the relay server hosted by n0 will be used. - #[clap(short, long)] - relay: Option, - /// Disable relay completely. - #[clap(long)] - no_relay: bool, - /// Set your nickname. - #[clap(short, long)] - name: Option, - /// Set the bind port for our socket. By default, a random port will be used. - #[clap(short, long, default_value = "0")] - bind_port: u16, - #[clap(subcommand)] - command: Command, -} - -#[derive(Parser, Debug)] -enum Command { - /// Open a chat room for a topic and print a ticket for others to join. - /// - /// If no topic is provided, a new topic will be created. - Open { - /// Optionally set the topic id (32 bytes, as base32 string). - topic: Option, - }, - /// Join a chat room from a ticket. - Join { - /// The ticket, as base32 string. - ticket: String, - }, -} - -#[tokio::main] -async fn main() -> Result<()> { - tracing_subscriber::fmt::init(); - let args = Args::parse(); - - // parse the cli command - let (topic, peers) = match &args.command { - Command::Open { topic } => { - let topic = topic.unwrap_or_else(|| TopicId::from_bytes(rand::random())); - println!("> opening chat room for topic {topic}"); - (topic, vec![]) - } - Command::Join { ticket } => { - let Ticket { topic, peers } = Ticket::from_str(ticket)?; - println!("> joining chat room for topic {topic}"); - (topic, peers) - } - }; - - // parse or generate our secret key - let secret_key = match args.secret_key { - None => SecretKey::generate(), - Some(key) => key.parse()?, - }; - println!("> our secret key: {secret_key}"); - - // configure our relay map - let relay_mode = match (args.no_relay, args.relay) { - (false, None) => RelayMode::Default, - (false, Some(url)) => RelayMode::Custom(RelayMap::from_url(url)), - (true, None) => RelayMode::Disabled, - (true, Some(_)) => bail!("You cannot set --no-relay and --relay at the same time"), - }; - println!("> using relay servers: {}", fmt_relay_mode(&relay_mode)); - - // build our magic endpoint - let endpoint = Endpoint::builder() - .secret_key(secret_key) - .alpns(vec![GOSSIP_ALPN.to_vec()]) - .relay_mode(relay_mode) - .bind_addr_v4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, args.bind_port)) - .bind() - .await?; - println!("> our node id: {}", endpoint.node_id()); - - let my_addr = endpoint.node_addr().await?; - // create the gossip protocol - let gossip = Gossip::from_endpoint(endpoint.clone(), Default::default(), &my_addr.info); - - // print a ticket that includes our own node id and endpoint addresses - let ticket = { - let me = endpoint.node_addr().await?; - let peers = peers.iter().cloned().chain([me]).collect(); - Ticket { topic, peers } - }; - println!("> ticket to join us: {ticket}"); - - // spawn our endpoint loop that forwards incoming connections to the gossiper - tokio::spawn(endpoint_loop(endpoint.clone(), gossip.clone())); - - // join the gossip topic by connecting to known peers, if any - let peer_ids = peers.iter().map(|p| p.node_id).collect(); - if peers.is_empty() { - println!("> waiting for peers to join us..."); - } else { - println!("> trying to connect to {} peers...", peers.len()); - // add the peer addrs from the ticket to our endpoint's addressbook so that they can be dialed - for peer in peers.into_iter() { - endpoint.add_node_addr(peer)?; - } - }; - let (sender, receiver) = gossip.join(topic, peer_ids).await?.split(); - println!("> connected!"); - - // broadcast our name, if set - if let Some(name) = args.name { - let message = Message::AboutMe { name }; - let encoded_message = SignedMessage::sign_and_encode(endpoint.secret_key(), &message)?; - sender.broadcast(encoded_message).await?; - } - - // subscribe and print loop - tokio::spawn(subscribe_loop(receiver)); - - // spawn an input thread that reads stdin - // not using tokio here because they recommend this for "technical reasons" - let (line_tx, mut line_rx) = tokio::sync::mpsc::channel(1); - std::thread::spawn(move || input_loop(line_tx)); - - // broadcast each line we type - println!("> type a message and hit enter to broadcast..."); - while let Some(text) = line_rx.recv().await { - let message = Message::Message { text: text.clone() }; - let encoded_message = SignedMessage::sign_and_encode(endpoint.secret_key(), &message)?; - sender.broadcast(encoded_message).await?; - println!("> sent: {text}"); - } - - Ok(()) -} - -async fn subscribe_loop(mut receiver: GossipReceiver) -> Result<()> { - // init a peerid -> name hashmap - let mut names = HashMap::new(); - while let Some(event) = receiver.try_next().await? { - if let Event::Gossip(GossipEvent::Received(msg)) = event { - let (from, message) = SignedMessage::verify_and_decode(&msg.content)?; - match message { - Message::AboutMe { name } => { - names.insert(from, name.clone()); - println!("> {} is now known as {}", from.fmt_short(), name); - } - Message::Message { text } => { - let name = names - .get(&from) - .map_or_else(|| from.fmt_short(), String::to_string); - println!("{}: {}", name, text); - } - } - } - } - Ok(()) -} - -async fn endpoint_loop(endpoint: Endpoint, gossip: Gossip) { - while let Some(incoming) = endpoint.accept().await { - let conn = match incoming.accept() { - Ok(conn) => conn, - Err(err) => { - warn!("incoming connection failed: {err:#}"); - // we can carry on in these cases: - // this can be caused by retransmitted datagrams - continue; - } - }; - let gossip = gossip.clone(); - tokio::spawn(async move { - if let Err(err) = handle_connection(conn, gossip).await { - println!("> connection closed: {err}"); - } - }); - } -} - -async fn handle_connection( - mut conn: iroh_net::endpoint::Connecting, - gossip: Gossip, -) -> anyhow::Result<()> { - let alpn = conn.alpn().await?; - let conn = conn.await?; - let peer_id = iroh_net::endpoint::get_remote_node_id(&conn)?; - match alpn.as_ref() { - GOSSIP_ALPN => gossip.handle_connection(conn).await.context(format!( - "connection to {peer_id} with ALPN {} failed", - String::from_utf8_lossy(&alpn) - ))?, - _ => println!("> ignoring connection from {peer_id}: unsupported ALPN protocol"), - } - Ok(()) -} - -fn input_loop(line_tx: tokio::sync::mpsc::Sender) -> Result<()> { - let mut buffer = String::new(); - let stdin = std::io::stdin(); // We get `Stdin` here. - loop { - stdin.read_line(&mut buffer)?; - line_tx.blocking_send(buffer.clone())?; - buffer.clear(); - } -} - -#[derive(Debug, Serialize, Deserialize)] -struct SignedMessage { - from: PublicKey, - data: Bytes, - signature: Signature, -} - -impl SignedMessage { - pub fn verify_and_decode(bytes: &[u8]) -> Result<(PublicKey, Message)> { - let signed_message: Self = postcard::from_bytes(bytes)?; - let key: PublicKey = signed_message.from; - key.verify(&signed_message.data, &signed_message.signature)?; - let message: Message = postcard::from_bytes(&signed_message.data)?; - Ok((signed_message.from, message)) - } - - pub fn sign_and_encode(secret_key: &SecretKey, message: &Message) -> Result { - let data: Bytes = postcard::to_stdvec(&message)?.into(); - let signature = secret_key.sign(&data); - let from: PublicKey = secret_key.public(); - let signed_message = Self { - from, - data, - signature, - }; - let encoded = postcard::to_stdvec(&signed_message)?; - Ok(encoded.into()) - } -} - -#[derive(Debug, Serialize, Deserialize)] -enum Message { - AboutMe { name: String }, - Message { text: String }, -} - -#[derive(Debug, Serialize, Deserialize)] -struct Ticket { - topic: TopicId, - peers: Vec, -} -impl Ticket { - /// Deserializes from bytes. - fn from_bytes(bytes: &[u8]) -> Result { - postcard::from_bytes(bytes).map_err(Into::into) - } - /// Serializes to bytes. - pub fn to_bytes(&self) -> Vec { - postcard::to_stdvec(self).expect("postcard::to_stdvec is infallible") - } -} - -/// Serializes to base32. -impl fmt::Display for Ticket { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", base32::fmt(self.to_bytes())) - } -} - -/// Deserializes from base32. -impl FromStr for Ticket { - type Err = anyhow::Error; - fn from_str(s: &str) -> Result { - Self::from_bytes(&base32::parse_vec(s)?) - } -} - -// helpers - -fn fmt_relay_mode(relay_mode: &RelayMode) -> String { - match relay_mode { - RelayMode::Disabled => "None".to_string(), - RelayMode::Default => "Default Relay (production) servers".to_string(), - RelayMode::Staging => "Default Relay (staging) servers".to_string(), - RelayMode::Custom(map) => map - .urls() - .map(|url| url.to_string()) - .collect::>() - .join(", "), - } -} diff --git a/iroh-gossip/src/lib.rs b/iroh-gossip/src/lib.rs deleted file mode 100644 index 1db6ce71cc..0000000000 --- a/iroh-gossip/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! Broadcast messages to peers subscribed to a topic -//! -//! The crate is designed to be used from the [iroh] crate, which provides a -//! [high level interface](https://docs.rs/iroh/latest/iroh/client/gossip/index.html), -//! but can also be used standalone. -//! -//! [iroh]: https://docs.rs/iroh -#![deny(missing_docs, rustdoc::broken_intra_doc_links)] -#![cfg_attr(iroh_docsrs, feature(doc_cfg))] - -pub mod metrics; -#[cfg(feature = "net")] -#[cfg_attr(iroh_docsrs, doc(cfg(feature = "net")))] -pub mod net; -pub mod proto; diff --git a/iroh-gossip/src/metrics.rs b/iroh-gossip/src/metrics.rs deleted file mode 100644 index 0de9680eb2..0000000000 --- a/iroh-gossip/src/metrics.rs +++ /dev/null @@ -1,69 +0,0 @@ -//! Metrics for iroh-gossip - -use iroh_metrics::{ - core::{Counter, Metric}, - struct_iterable::Iterable, -}; - -/// Enum of metrics for the module -#[allow(missing_docs)] -#[derive(Debug, Clone, Iterable)] -pub struct Metrics { - pub msgs_ctrl_sent: Counter, - pub msgs_ctrl_recv: Counter, - pub msgs_data_sent: Counter, - pub msgs_data_recv: Counter, - pub msgs_data_sent_size: Counter, - pub msgs_data_recv_size: Counter, - pub msgs_ctrl_sent_size: Counter, - pub msgs_ctrl_recv_size: Counter, - pub neighbor_up: Counter, - pub neighbor_down: Counter, - pub actor_tick_main: Counter, - pub actor_tick_rx: Counter, - pub actor_tick_endpoint: Counter, - pub actor_tick_dialer: Counter, - pub actor_tick_dialer_success: Counter, - pub actor_tick_dialer_failure: Counter, - pub actor_tick_in_event_rx: Counter, - pub actor_tick_timers: Counter, -} - -impl Default for Metrics { - fn default() -> Self { - Self { - msgs_ctrl_sent: Counter::new("Number of control messages sent"), - msgs_ctrl_recv: Counter::new("Number of control messages received"), - msgs_data_sent: Counter::new("Number of data messages sent"), - msgs_data_recv: Counter::new("Number of data messages received"), - msgs_data_sent_size: Counter::new("Total size of all data messages sent"), - msgs_data_recv_size: Counter::new("Total size of all data messages received"), - msgs_ctrl_sent_size: Counter::new("Total size of all control messages sent"), - msgs_ctrl_recv_size: Counter::new("Total size of all control messages received"), - neighbor_up: Counter::new("Number of times we connected to a peer"), - neighbor_down: Counter::new("Number of times we disconnected from a peer"), - actor_tick_main: Counter::new("Number of times the main actor loop ticked"), - actor_tick_rx: Counter::new("Number of times the actor ticked for a message received"), - actor_tick_endpoint: Counter::new( - "Number of times the actor ticked for an endpoint event", - ), - actor_tick_dialer: Counter::new("Number of times the actor ticked for a dialer event"), - actor_tick_dialer_success: Counter::new( - "Number of times the actor ticked for a successful dialer event", - ), - actor_tick_dialer_failure: Counter::new( - "Number of times the actor ticked for a failed dialer event", - ), - actor_tick_in_event_rx: Counter::new( - "Number of times the actor ticked for an incoming event", - ), - actor_tick_timers: Counter::new("Number of times the actor ticked for a timer event"), - } - } -} - -impl Metric for Metrics { - fn name() -> &'static str { - "gossip" - } -} diff --git a/iroh-gossip/src/net.rs b/iroh-gossip/src/net.rs deleted file mode 100644 index 02799609c6..0000000000 --- a/iroh-gossip/src/net.rs +++ /dev/null @@ -1,1008 +0,0 @@ -//! Networking for the `iroh-gossip` protocol - -use std::{ - collections::{BTreeSet, HashMap, HashSet, VecDeque}, - pin::Pin, - sync::Arc, - task::{Context, Poll}, - time::Instant, -}; - -use anyhow::{anyhow, Context as _, Result}; -use bytes::BytesMut; -use futures_concurrency::{ - future::TryJoin, - stream::{stream_group, StreamGroup}, -}; -use futures_lite::{stream::Stream, StreamExt}; -use futures_util::TryFutureExt; -use iroh_metrics::inc; -use iroh_net::{ - dialer::Dialer, - endpoint::{get_remote_node_id, Connection, DirectAddr}, - key::PublicKey, - AddrInfo, Endpoint, NodeAddr, NodeId, -}; -use rand::rngs::StdRng; -use rand_core::SeedableRng; -use tokio::{sync::mpsc, task::JoinSet}; -use tokio_util::task::AbortOnDropHandle; -use tracing::{debug, error_span, trace, warn, Instrument}; - -use self::util::{read_message, write_message, Timers}; -use crate::{ - metrics::Metrics, - proto::{self, PeerData, Scope, TopicId}, -}; - -mod handles; -pub mod util; - -pub use self::handles::{ - Command, CommandStream, Event, GossipEvent, GossipReceiver, GossipSender, GossipTopic, - JoinOptions, Message, -}; - -/// ALPN protocol name -pub const GOSSIP_ALPN: &[u8] = b"/iroh-gossip/0"; -/// Default channel capacity for topic subscription channels (one per topic) -const TOPIC_EVENTS_DEFAULT_CAP: usize = 2048; -/// Default channel capacity for topic subscription channels (one per topic) -const TOPIC_COMMANDS_DEFAULT_CAP: usize = 2048; -/// Channel capacity for the send queue (one per connection) -const SEND_QUEUE_CAP: usize = 64; -/// Channel capacity for the ToActor message queue (single) -const TO_ACTOR_CAP: usize = 64; -/// Channel capacity for the InEvent message queue (single) -const IN_EVENT_CAP: usize = 1024; -/// Name used for logging when new node addresses are added from gossip. -const SOURCE_NAME: &str = "gossip"; - -/// Events emitted from the gossip protocol -pub type ProtoEvent = proto::Event; -/// Commands for the gossip protocol -pub type ProtoCommand = proto::Command; - -type InEvent = proto::InEvent; -type OutEvent = proto::OutEvent; -type Timer = proto::Timer; -type ProtoMessage = proto::Message; - -/// Publish and subscribe on gossiping topics. -/// -/// Each topic is a separate broadcast tree with separate memberships. -/// -/// A topic has to be joined before you can publish or subscribe on the topic. -/// To join the swarm for a topic, you have to know the [`PublicKey`] of at least one peer that also joined the topic. -/// -/// Messages published on the swarm will be delivered to all peers that joined the swarm for that -/// topic. You will also be relaying (gossiping) messages published by other peers. -/// -/// With the default settings, the protocol will maintain up to 5 peer connections per topic. -/// -/// Even though the [`Gossip`] is created from a [`Endpoint`], it does not accept connections -/// itself. You should run an accept loop on the [`Endpoint`] yourself, check the ALPN protocol of incoming -/// connections, and if the ALPN protocol equals [`GOSSIP_ALPN`], forward the connection to the -/// gossip actor through [Self::handle_connection]. -/// -/// The gossip actor will, however, initiate new connections to other peers by itself. -#[derive(Debug, Clone)] -pub struct Gossip { - to_actor_tx: mpsc::Sender, - _actor_handle: Arc>, - max_message_size: usize, -} - -impl Gossip { - /// Spawn a gossip actor and get a handle for it - pub fn from_endpoint(endpoint: Endpoint, config: proto::Config, my_addr: &AddrInfo) -> Self { - let peer_id = endpoint.node_id(); - let dialer = Dialer::new(endpoint.clone()); - let state = proto::State::new( - peer_id, - encode_peer_data(my_addr).unwrap(), - config, - rand::rngs::StdRng::from_entropy(), - ); - let (to_actor_tx, to_actor_rx) = mpsc::channel(TO_ACTOR_CAP); - let (in_event_tx, in_event_rx) = mpsc::channel(IN_EVENT_CAP); - - let me = endpoint.node_id().fmt_short(); - let max_message_size = state.max_message_size(); - let actor = Actor { - endpoint, - state, - dialer, - to_actor_rx, - in_event_rx, - in_event_tx, - timers: Timers::new(), - command_rx: StreamGroup::new().keyed(), - peers: Default::default(), - topics: Default::default(), - quit_queue: Default::default(), - connection_tasks: Default::default(), - }; - - let actor_handle = tokio::spawn( - async move { - if let Err(err) = actor.run().await { - warn!("gossip actor closed with error: {err:?}"); - } - } - .instrument(error_span!("gossip", %me)), - ); - Self { - to_actor_tx, - _actor_handle: Arc::new(AbortOnDropHandle::new(actor_handle)), - max_message_size, - } - } - - /// Get the maximum message size configured for this gossip actor. - pub fn max_message_size(&self) -> usize { - self.max_message_size - } - - /// Handle an incoming [`Connection`]. - /// - /// Make sure to check the ALPN protocol yourself before passing the connection. - pub async fn handle_connection(&self, conn: Connection) -> anyhow::Result<()> { - let peer_id = get_remote_node_id(&conn)?; - self.send(ToActor::HandleConnection(peer_id, ConnOrigin::Accept, conn)) - .await?; - Ok(()) - } - - /// Join a gossip topic with the default options and wait for at least one active connection. - pub async fn join(&self, topic_id: TopicId, bootstrap: Vec) -> Result { - let mut sub = self.join_with_opts(topic_id, JoinOptions::with_bootstrap(bootstrap)); - sub.joined().await?; - Ok(sub) - } - - /// Join a gossip topic with options. - /// - /// Returns a [`GossipTopic`] instantly. To wait for at least one connection to be established, - /// you can await [`GossipTopic::joined`]. - /// - /// Messages will be queued until a first connection is available. If the internal channel becomes full, - /// the oldest messages will be dropped from the channel. - pub fn join_with_opts(&self, topic_id: TopicId, opts: JoinOptions) -> GossipTopic { - let (command_tx, command_rx) = async_channel::bounded(TOPIC_COMMANDS_DEFAULT_CAP); - let command_rx: CommandStream = Box::pin(command_rx); - let event_rx = self.join_with_stream(topic_id, opts, command_rx); - GossipTopic::new(command_tx, Box::pin(event_rx)) - } - - /// Join a gossip topic with options and an externally-created update stream. - /// - /// This method differs from [`Self::join_with_opts`] by letting you pass in a `updates` command stream yourself - /// instead of using a channel created for you. - /// - /// It returns a stream of events. If you want to wait for the topic to become active, wait for - /// the [`GossipEvent::Joined`] event. - pub fn join_with_stream( - &self, - topic_id: TopicId, - options: JoinOptions, - updates: CommandStream, - ) -> impl Stream> + Send + 'static { - let (event_tx, event_rx) = async_channel::bounded(options.subscription_capacity); - let to_actor_tx = self.to_actor_tx.clone(); - let channels = SubscriberChannels { - command_rx: updates, - event_tx, - }; - // We spawn a task to send the subscribe action to the actor, because we want the send to - // succeed even if the returned stream is dropped right away without being polled, because - // it is legit to keep only the `updates` stream and drop the event stream. This situation - // is handled fine within the actor, but we have to make sure that the message reaches the - // actor. - let task = tokio::task::spawn(async move { - to_actor_tx - .send(ToActor::Join { - topic_id, - bootstrap: options.bootstrap, - channels, - }) - .await - .map_err(|_| anyhow!("Gossip actor dropped")) - }); - async move { - task.await - .map_err(|err| anyhow!("Task for sending to gossip actor failed: {err:?}"))??; - Ok(event_rx) - } - .try_flatten_stream() - } - - async fn send(&self, event: ToActor) -> anyhow::Result<()> { - self.to_actor_tx - .send(event) - .await - .map_err(|_| anyhow!("gossip actor dropped")) - } -} - -/// Input messages for the gossip [`Actor`]. -#[derive(derive_more::Debug)] -enum ToActor { - /// Handle a new QUIC connection, either from accept (external to the actor) or from connect - /// (happens internally in the actor). - HandleConnection(PublicKey, ConnOrigin, #[debug("Connection")] Connection), - Join { - topic_id: TopicId, - bootstrap: BTreeSet, - channels: SubscriberChannels, - }, -} - -/// Actor that sends and handles messages between the connection and main state loops -struct Actor { - /// Protocol state - state: proto::State, - /// The endpoint through which we dial peers - endpoint: Endpoint, - /// Dial machine to connect to peers - dialer: Dialer, - /// Input messages to the actor - to_actor_rx: mpsc::Receiver, - /// Sender for the state input (cloned into the connection loops) - in_event_tx: mpsc::Sender, - /// Input events to the state (emitted from the connection loops) - in_event_rx: mpsc::Receiver, - /// Queued timers - timers: Timers, - /// Map of topics to their state. - topics: HashMap, - /// Map of peers to their state. - peers: HashMap, - /// Stream of commands from topic handles. - command_rx: stream_group::Keyed, - /// Internal queue of topic to close because all handles were dropped. - quit_queue: VecDeque, - /// Tasks for the connection loops, to keep track of panics. - connection_tasks: JoinSet<()>, -} - -impl Actor { - pub async fn run(mut self) -> anyhow::Result<()> { - // Watch for changes in direct addresses to update our peer data. - let mut direct_addresses_stream = self.endpoint.direct_addresses(); - // Watch for changes of our home relay to update our peer data. - let mut home_relay_stream = self.endpoint.watch_home_relay(); - - // With each gossip message we provide addressing information to reach our node. - // We wait until at least one direct address is discovered. - let mut current_addresses = direct_addresses_stream - .next() - .await - .ok_or_else(|| anyhow!("Failed to discover direct addresses"))?; - let peer_data = our_peer_data(&self.endpoint, ¤t_addresses)?; - self.handle_in_event(InEvent::UpdatePeerData(peer_data), Instant::now()) - .await?; - - let mut i = 0; - loop { - i += 1; - trace!(?i, "tick"); - inc!(Metrics, actor_tick_main); - tokio::select! { - biased; - msg = self.to_actor_rx.recv() => { - trace!(?i, "tick: to_actor_rx"); - inc!(Metrics, actor_tick_rx); - match msg { - Some(msg) => self.handle_to_actor_msg(msg, Instant::now()).await?, - None => { - debug!("all gossip handles dropped, stop gossip actor"); - break; - } - } - }, - Some((key, (topic, command))) = self.command_rx.next(), if !self.command_rx.is_empty() => { - trace!(?i, "tick: command_rx"); - self.handle_command(topic, key, command).await?; - }, - Some(new_addresses) = direct_addresses_stream.next() => { - trace!(?i, "tick: new_endpoints"); - inc!(Metrics, actor_tick_endpoint); - current_addresses = new_addresses; - let peer_data = our_peer_data(&self.endpoint, ¤t_addresses)?; - self.handle_in_event(InEvent::UpdatePeerData(peer_data), Instant::now()).await?; - } - Some(_relay_url) = home_relay_stream.next() => { - let peer_data = our_peer_data(&self.endpoint, ¤t_addresses)?; - self.handle_in_event(InEvent::UpdatePeerData(peer_data), Instant::now()).await?; - } - (peer_id, res) = self.dialer.next_conn() => { - trace!(?i, "tick: dialer"); - inc!(Metrics, actor_tick_dialer); - match res { - Ok(conn) => { - debug!(peer = ?peer_id, "dial successful"); - inc!(Metrics, actor_tick_dialer_success); - self.handle_connection(peer_id, ConnOrigin::Dial, conn); - } - Err(err) => { - warn!(peer = ?peer_id, "dial failed: {err}"); - inc!(Metrics, actor_tick_dialer_failure); - } - } - } - event = self.in_event_rx.recv() => { - trace!(?i, "tick: in_event_rx"); - inc!(Metrics, actor_tick_in_event_rx); - match event { - Some(event) => { - self.handle_in_event(event, Instant::now()).await.context("in_event_rx.recv -> handle_in_event")?; - } - None => unreachable!() - } - } - drain = self.timers.wait_and_drain() => { - trace!(?i, "tick: timers"); - inc!(Metrics, actor_tick_timers); - let now = Instant::now(); - for (_instant, timer) in drain { - self.handle_in_event(InEvent::TimerExpired(timer), now).await.context("timers.drain_expired -> handle_in_event")?; - } - } - Some(res) = self.connection_tasks.join_next(), if !self.connection_tasks.is_empty() => { - if let Err(err) = res { - if !err.is_cancelled() { - warn!("connection task panicked: {err:?}"); - } - } - } - } - } - Ok(()) - } - - async fn handle_command( - &mut self, - topic: TopicId, - key: stream_group::Key, - command: Option, - ) -> anyhow::Result<()> { - debug!(?topic, ?key, ?command, "handle command"); - let Some(state) = self.topics.get_mut(&topic) else { - // TODO: unreachable? - warn!("received command for unknown topic"); - return Ok(()); - }; - let TopicState { - command_rx_keys, - event_senders, - .. - } = state; - match command { - Some(command) => { - let command = match command { - Command::Broadcast(message) => ProtoCommand::Broadcast(message, Scope::Swarm), - Command::BroadcastNeighbors(message) => { - ProtoCommand::Broadcast(message, Scope::Neighbors) - } - Command::JoinPeers(peers) => ProtoCommand::Join(peers), - }; - self.handle_in_event(proto::InEvent::Command(topic, command), Instant::now()) - .await?; - } - None => { - command_rx_keys.remove(&key); - if command_rx_keys.is_empty() && event_senders.is_empty() { - self.quit_queue.push_back(topic); - self.process_quit_queue().await?; - } - } - } - Ok(()) - } - - fn handle_connection(&mut self, peer_id: NodeId, origin: ConnOrigin, conn: Connection) { - // Check that we only keep one connection per peer per direction. - if let Some(peer_info) = self.peers.get(&peer_id) { - if matches!(origin, ConnOrigin::Dial) && peer_info.conn_dialed.is_some() { - warn!(?peer_id, ?origin, "ignoring connection: already accepted"); - return; - } - if matches!(origin, ConnOrigin::Accept) && peer_info.conn_accepted.is_some() { - warn!(?peer_id, ?origin, "ignoring connection: already accepted"); - return; - } - } - - let mut peer_info = self.peers.remove(&peer_id).unwrap_or_default(); - - // Store the connection so that we can terminate it when the peer is removed. - match origin { - ConnOrigin::Dial => { - peer_info.conn_dialed = Some(conn.clone()); - } - ConnOrigin::Accept => { - peer_info.conn_accepted = Some(conn.clone()); - } - } - - // Extract the queue of pending messages. - let queue = match &mut peer_info.state { - PeerState::Pending { queue } => std::mem::take(queue), - PeerState::Active { .. } => Default::default(), - }; - - let (send_tx, send_rx) = mpsc::channel(SEND_QUEUE_CAP); - let max_message_size = self.state.max_message_size(); - let in_event_tx = self.in_event_tx.clone(); - - // Spawn a task for this connection - self.connection_tasks.spawn( - async move { - match connection_loop( - peer_id, - conn, - origin, - send_rx, - &in_event_tx, - max_message_size, - queue, - ) - .await - { - Ok(()) => debug!("connection closed without error"), - Err(err) => warn!("connection closed: {err:?}"), - } - in_event_tx - .send(InEvent::PeerDisconnected(peer_id)) - .await - .ok(); - } - .instrument(error_span!("gossip_conn", peer = %peer_id.fmt_short())), - ); - - peer_info.state = match peer_info.state { - PeerState::Pending { .. } => PeerState::Active { send_tx }, - PeerState::Active { send_tx } => PeerState::Active { send_tx }, - }; - - self.peers.insert(peer_id, peer_info); - } - - async fn handle_to_actor_msg(&mut self, msg: ToActor, now: Instant) -> anyhow::Result<()> { - trace!("handle to_actor {msg:?}"); - match msg { - ToActor::HandleConnection(peer_id, origin, conn) => { - self.handle_connection(peer_id, origin, conn) - } - ToActor::Join { - topic_id, - bootstrap, - channels, - } => { - let state = self.topics.entry(topic_id).or_default(); - let TopicState { - neighbors, - event_senders, - command_rx_keys, - joined, - } = state; - if *joined { - let neighbors = neighbors.iter().copied().collect(); - channels - .event_tx - .try_send(Ok(Event::Gossip(GossipEvent::Joined(neighbors)))) - .ok(); - } - - event_senders.push(channels.event_tx); - let command_rx = TopicCommandStream::new(topic_id, channels.command_rx); - let key = self.command_rx.insert(command_rx); - command_rx_keys.insert(key); - - self.handle_in_event( - InEvent::Command( - topic_id, - ProtoCommand::Join(bootstrap.into_iter().collect()), - ), - now, - ) - .await?; - } - } - Ok(()) - } - - async fn handle_in_event(&mut self, event: InEvent, now: Instant) -> anyhow::Result<()> { - self.handle_in_event_inner(event, now).await?; - self.process_quit_queue().await?; - Ok(()) - } - - async fn process_quit_queue(&mut self) -> anyhow::Result<()> { - while let Some(topic_id) = self.quit_queue.pop_front() { - self.handle_in_event_inner( - InEvent::Command(topic_id, ProtoCommand::Quit), - Instant::now(), - ) - .await?; - self.topics.remove(&topic_id); - } - Ok(()) - } - - async fn handle_in_event_inner(&mut self, event: InEvent, now: Instant) -> anyhow::Result<()> { - if matches!(event, InEvent::TimerExpired(_)) { - trace!(?event, "handle in_event"); - } else { - debug!(?event, "handle in_event"); - }; - if let InEvent::PeerDisconnected(peer) = &event { - self.peers.remove(peer); - } - let out = self.state.handle(event, now); - for event in out { - if matches!(event, OutEvent::ScheduleTimer(_, _)) { - trace!(?event, "handle out_event"); - } else { - debug!(?event, "handle out_event"); - }; - match event { - OutEvent::SendMessage(peer_id, message) => { - let info = self.peers.entry(peer_id).or_default(); - match &mut info.state { - PeerState::Active { send_tx } => { - if let Err(_err) = send_tx.send(message).await { - // Removing the peer is handled by the in_event PeerDisconnected sent - // at the end of the connection task. - warn!("connection loop for {peer_id:?} dropped"); - } - } - PeerState::Pending { queue } => { - if queue.is_empty() { - self.dialer.queue_dial(peer_id, GOSSIP_ALPN); - } - queue.push(message); - } - } - } - OutEvent::EmitEvent(topic_id, event) => { - let Some(state) = self.topics.get_mut(&topic_id) else { - // TODO: unreachable? - warn!(?topic_id, "gossip state emitted event for unknown topic"); - continue; - }; - let TopicState { - joined, - neighbors, - event_senders, - command_rx_keys, - } = state; - let event = if let ProtoEvent::NeighborUp(neighbor) = event { - neighbors.insert(neighbor); - if !*joined { - *joined = true; - GossipEvent::Joined(vec![neighbor]) - } else { - GossipEvent::NeighborUp(neighbor) - } - } else { - event.into() - }; - event_senders.send(&event); - if event_senders.is_empty() && command_rx_keys.is_empty() { - self.quit_queue.push_back(topic_id); - } - } - OutEvent::ScheduleTimer(delay, timer) => { - self.timers.insert(now + delay, timer); - } - OutEvent::DisconnectPeer(peer_id) => { - if let Some(peer) = self.peers.remove(&peer_id) { - if let Some(conn) = peer.conn_dialed { - conn.close(0u8.into(), b"close from disconnect"); - } - if let Some(conn) = peer.conn_accepted { - conn.close(0u8.into(), b"close from disconnect"); - } - drop(peer.state); - } - } - OutEvent::PeerData(node_id, data) => match decode_peer_data(&data) { - Err(err) => warn!("Failed to decode {data:?} from {node_id}: {err}"), - Ok(info) => { - debug!(peer = ?node_id, "add known addrs: {info:?}"); - let node_addr = NodeAddr { node_id, info }; - if let Err(err) = self - .endpoint - .add_node_addr_with_source(node_addr, SOURCE_NAME) - { - debug!(peer = ?node_id, "add known failed: {err:?}"); - } - } - }, - } - } - Ok(()) - } -} - -#[derive(Debug, Default)] -struct PeerInfo { - state: PeerState, - conn_dialed: Option, - conn_accepted: Option, -} - -#[derive(Debug)] -enum PeerState { - Pending { queue: Vec }, - Active { send_tx: mpsc::Sender }, -} - -impl Default for PeerState { - fn default() -> Self { - PeerState::Pending { queue: Vec::new() } - } -} - -#[derive(Debug, Default)] -struct TopicState { - joined: bool, - neighbors: BTreeSet, - event_senders: EventSenders, - command_rx_keys: HashSet, -} - -/// Whether a connection is initiated by us (Dial) or by the remote peer (Accept) -#[derive(Debug, Clone, Copy)] -enum ConnOrigin { - Accept, - Dial, -} -#[derive(derive_more::Debug)] -struct SubscriberChannels { - event_tx: async_channel::Sender>, - #[debug("CommandStream")] - command_rx: CommandStream, -} - -async fn connection_loop( - from: PublicKey, - conn: Connection, - origin: ConnOrigin, - mut send_rx: mpsc::Receiver, - in_event_tx: &mpsc::Sender, - max_message_size: usize, - queue: Vec, -) -> anyhow::Result<()> { - let (mut send, mut recv) = match origin { - ConnOrigin::Accept => conn.accept_bi().await?, - ConnOrigin::Dial => conn.open_bi().await?, - }; - debug!("connection established"); - let mut send_buf = BytesMut::new(); - let mut recv_buf = BytesMut::new(); - - let send_loop = async { - for msg in queue { - write_message(&mut send, &mut send_buf, &msg, max_message_size).await? - } - while let Some(msg) = send_rx.recv().await { - write_message(&mut send, &mut send_buf, &msg, max_message_size).await? - } - Ok::<_, anyhow::Error>(()) - }; - - let recv_loop = async { - loop { - let msg = read_message(&mut recv, &mut recv_buf, max_message_size).await?; - match msg { - None => break, - Some(msg) => in_event_tx.send(InEvent::RecvMessage(from, msg)).await?, - } - } - Ok::<_, anyhow::Error>(()) - }; - - (send_loop, recv_loop).try_join().await?; - - Ok(()) -} - -fn encode_peer_data(info: &AddrInfo) -> anyhow::Result { - let bytes = postcard::to_stdvec(info)?; - anyhow::ensure!(!bytes.is_empty(), "encoding empty peer data: {:?}", info); - Ok(PeerData::new(bytes)) -} - -fn decode_peer_data(peer_data: &PeerData) -> anyhow::Result { - let bytes = peer_data.as_bytes(); - if bytes.is_empty() { - return Ok(AddrInfo::default()); - } - let info = postcard::from_bytes(bytes)?; - Ok(info) -} - -#[derive(Debug, Default)] -struct EventSenders { - senders: Vec<(async_channel::Sender>, bool)>, -} - -impl EventSenders { - fn is_empty(&self) -> bool { - self.senders.is_empty() - } - - fn push(&mut self, sender: async_channel::Sender>) { - self.senders.push((sender, false)); - } - - /// Send an event to all subscribers. - /// - /// This will not wait until the sink is full, but send a `Lagged` response if the sink is almost full. - fn send(&mut self, event: &GossipEvent) { - self.senders.retain_mut(|(send, lagged)| { - // If the stream is disconnected, we don't need to send to it. - if send.is_closed() { - return false; - } - - // Check if the send buffer is almost full, and send a lagged response if it is. - let cap = send.capacity().expect("we only use bounded channels"); - let event = if send.len() >= cap - 1 { - if *lagged { - return true; - } - *lagged = true; - Event::Lagged - } else { - *lagged = false; - Event::Gossip(event.clone()) - }; - match send.try_send(Ok(event)) { - Ok(()) => true, - Err(async_channel::TrySendError::Full(_)) => true, - Err(async_channel::TrySendError::Closed(_)) => false, - } - }) - } -} - -#[derive(derive_more::Debug)] -struct TopicCommandStream { - topic_id: TopicId, - #[debug("CommandStream")] - stream: CommandStream, - closed: bool, -} - -impl TopicCommandStream { - fn new(topic_id: TopicId, stream: CommandStream) -> Self { - Self { - topic_id, - stream, - closed: false, - } - } -} - -impl Stream for TopicCommandStream { - type Item = (TopicId, Option); - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - if self.closed { - return Poll::Ready(None); - } - match Pin::new(&mut self.stream).poll_next(cx) { - Poll::Ready(Some(item)) => Poll::Ready(Some((self.topic_id, Some(item)))), - Poll::Ready(None) => { - self.closed = true; - Poll::Ready(Some((self.topic_id, None))) - } - Poll::Pending => Poll::Pending, - } - } -} - -fn our_peer_data(endpoint: &Endpoint, direct_addresses: &BTreeSet) -> Result { - let addr = NodeAddr::from_parts( - endpoint.node_id(), - endpoint.home_relay(), - direct_addresses.iter().map(|x| x.addr), - ); - encode_peer_data(&addr.info) -} - -#[cfg(test)] -mod test { - use std::time::Duration; - - use bytes::Bytes; - use futures_concurrency::future::TryJoin; - use iroh_net::{ - key::SecretKey, - relay::{RelayMap, RelayMode}, - }; - use tokio::{spawn, time::timeout}; - use tokio_util::sync::CancellationToken; - use tracing::info; - - use super::*; - - async fn create_endpoint( - rng: &mut rand_chacha::ChaCha12Rng, - relay_map: RelayMap, - ) -> anyhow::Result { - let ep = Endpoint::builder() - .secret_key(SecretKey::generate_with_rng(rng)) - .alpns(vec![GOSSIP_ALPN.to_vec()]) - .relay_mode(RelayMode::Custom(relay_map)) - .insecure_skip_relay_cert_verify(true) - .bind() - .await?; - - ep.watch_home_relay().next().await; - Ok(ep) - } - - async fn endpoint_loop( - endpoint: Endpoint, - gossip: Gossip, - cancel: CancellationToken, - ) -> anyhow::Result<()> { - loop { - tokio::select! { - biased; - _ = cancel.cancelled() => break, - incoming = endpoint.accept() => match incoming { - None => break, - Some(incoming) => { - let connecting = match incoming.accept() { - Ok(connecting) => connecting, - Err(err) => { - warn!("incoming connection failed: {err:#}"); - // we can carry on in these cases: - // this can be caused by retransmitted datagrams - continue; - } - }; - gossip.handle_connection(connecting.await?).await? - } - } - } - } - Ok(()) - } - - #[tokio::test] - async fn gossip_net_smoke() { - let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(1); - let _guard = iroh_test::logging::setup(); - let (relay_map, relay_url, _guard) = - iroh_net::test_utils::run_relay_server().await.unwrap(); - - let ep1 = create_endpoint(&mut rng, relay_map.clone()).await.unwrap(); - let ep2 = create_endpoint(&mut rng, relay_map.clone()).await.unwrap(); - let ep3 = create_endpoint(&mut rng, relay_map.clone()).await.unwrap(); - let addr1 = AddrInfo { - relay_url: Some(relay_url.clone()), - direct_addresses: Default::default(), - }; - let addr2 = AddrInfo { - relay_url: Some(relay_url.clone()), - direct_addresses: Default::default(), - }; - let addr3 = AddrInfo { - relay_url: Some(relay_url.clone()), - direct_addresses: Default::default(), - }; - - let go1 = Gossip::from_endpoint(ep1.clone(), Default::default(), &addr1); - let go2 = Gossip::from_endpoint(ep2.clone(), Default::default(), &addr2); - let go3 = Gossip::from_endpoint(ep3.clone(), Default::default(), &addr3); - debug!("peer1 {:?}", ep1.node_id()); - debug!("peer2 {:?}", ep2.node_id()); - debug!("peer3 {:?}", ep3.node_id()); - let pi1 = ep1.node_id(); - let pi2 = ep2.node_id(); - - let cancel = CancellationToken::new(); - let tasks = [ - spawn(endpoint_loop(ep1.clone(), go1.clone(), cancel.clone())), - spawn(endpoint_loop(ep2.clone(), go2.clone(), cancel.clone())), - spawn(endpoint_loop(ep3.clone(), go3.clone(), cancel.clone())), - ]; - - debug!("----- adding peers ----- "); - let topic: TopicId = blake3::hash(b"foobar").into(); - - let addr1 = NodeAddr::new(pi1).with_relay_url(relay_url.clone()); - let addr2 = NodeAddr::new(pi2).with_relay_url(relay_url); - ep2.add_node_addr(addr1.clone()).unwrap(); - ep3.add_node_addr(addr2).unwrap(); - - debug!("----- joining ----- "); - // join the topics and wait for the connection to succeed - let [sub1, mut sub2, mut sub3] = [ - go1.join(topic, vec![]), - go2.join(topic, vec![pi1]), - go3.join(topic, vec![pi2]), - ] - .try_join() - .await - .unwrap(); - - let (sink1, _stream1) = sub1.split(); - - let len = 2; - - // publish messages on node1 - let pub1 = spawn(async move { - for i in 0..len { - let message = format!("hi{}", i); - info!("go1 broadcast: {message:?}"); - sink1.broadcast(message.into_bytes().into()).await.unwrap(); - tokio::time::sleep(Duration::from_micros(1)).await; - } - }); - - // wait for messages on node2 - let sub2 = spawn(async move { - let mut recv = vec![]; - loop { - let ev = sub2.next().await.unwrap().unwrap(); - info!("go2 event: {ev:?}"); - if let Event::Gossip(GossipEvent::Received(msg)) = ev { - recv.push(msg.content); - } - if recv.len() == len { - return recv; - } - } - }); - - // wait for messages on node3 - let sub3 = spawn(async move { - let mut recv = vec![]; - loop { - let ev = sub3.next().await.unwrap().unwrap(); - info!("go3 event: {ev:?}"); - if let Event::Gossip(GossipEvent::Received(msg)) = ev { - recv.push(msg.content); - } - if recv.len() == len { - return recv; - } - } - }); - - timeout(Duration::from_secs(10), pub1) - .await - .unwrap() - .unwrap(); - let recv2 = timeout(Duration::from_secs(10), sub2) - .await - .unwrap() - .unwrap(); - let recv3 = timeout(Duration::from_secs(10), sub3) - .await - .unwrap() - .unwrap(); - - let expected: Vec = (0..len) - .map(|i| Bytes::from(format!("hi{i}").into_bytes())) - .collect(); - assert_eq!(recv2, expected); - assert_eq!(recv3, expected); - - cancel.cancel(); - for t in tasks { - timeout(Duration::from_secs(10), t) - .await - .unwrap() - .unwrap() - .unwrap(); - } - } -} diff --git a/iroh-gossip/src/net/handles.rs b/iroh-gossip/src/net/handles.rs deleted file mode 100644 index c944805afa..0000000000 --- a/iroh-gossip/src/net/handles.rs +++ /dev/null @@ -1,276 +0,0 @@ -//! Topic handles for sending and receiving on a gossip topic. -//! -//! These are returned from [`super::Gossip`]. - -use std::{ - collections::{BTreeSet, HashSet}, - pin::Pin, - task::{Context, Poll}, -}; - -use anyhow::{anyhow, Context as _, Result}; -use bytes::Bytes; -use futures_lite::{Stream, StreamExt}; -use iroh_net::NodeId; -use serde::{Deserialize, Serialize}; - -use crate::{net::TOPIC_EVENTS_DEFAULT_CAP, proto::DeliveryScope}; - -/// Sender for a gossip topic. -#[derive(Debug)] -pub struct GossipSender(async_channel::Sender); - -impl GossipSender { - pub(crate) fn new(sender: async_channel::Sender) -> Self { - Self(sender) - } - - /// Broadcast a message to all nodes. - pub async fn broadcast(&self, message: Bytes) -> anyhow::Result<()> { - self.0 - .send(Command::Broadcast(message)) - .await - .map_err(|_| anyhow!("Gossip actor dropped")) - } - - /// Broadcast a message to our direct neighbors. - pub async fn broadcast_neighbors(&self, message: Bytes) -> anyhow::Result<()> { - self.0 - .send(Command::BroadcastNeighbors(message)) - .await - .map_err(|_| anyhow!("Gossip actor dropped")) - } - - /// Join a set of peers. - pub async fn join_peers(&self, peers: Vec) -> anyhow::Result<()> { - self.0 - .send(Command::JoinPeers(peers)) - .await - .map_err(|_| anyhow!("Gossip actor dropped")) - } -} - -type EventStream = Pin> + Send + 'static>>; - -/// Subscribed gossip topic. -/// -/// This handle is a [`Stream`] of [`Event`]s from the topic, and can be used to send messages. -/// -/// It may be split into sender and receiver parts with [`Self::split`]. -#[derive(Debug)] -pub struct GossipTopic { - sender: GossipSender, - receiver: GossipReceiver, -} - -impl GossipTopic { - pub(crate) fn new(sender: async_channel::Sender, receiver: EventStream) -> Self { - Self { - sender: GossipSender::new(sender), - receiver: GossipReceiver::new(Box::pin(receiver)), - } - } - - /// Splits `self` into [`GossipSender`] and [`GossipReceiver`] parts. - pub fn split(self) -> (GossipSender, GossipReceiver) { - (self.sender, self.receiver) - } - - /// Sends a message to all peers. - pub async fn broadcast(&self, message: Bytes) -> anyhow::Result<()> { - self.sender.broadcast(message).await - } - - /// Sends a message to our direct neighbors in the swarm. - pub async fn broadcast_neighbors(&self, message: Bytes) -> anyhow::Result<()> { - self.sender.broadcast_neighbors(message).await - } - - /// Waits until we are connected to at least one node. - pub async fn joined(&mut self) -> Result<()> { - self.receiver.joined().await - } - - /// Returns true if we are connected to at least one node. - pub fn is_joined(&self) -> bool { - self.receiver.is_joined() - } -} - -impl Stream for GossipTopic { - type Item = Result; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.receiver).poll_next(cx) - } -} - -/// Receiver for gossip events on a topic. -/// -/// This is a [`Stream`] of [`Event`]s emitted from the topic. -#[derive(derive_more::Debug)] -pub struct GossipReceiver { - #[debug("EventStream")] - stream: EventStream, - neighbors: HashSet, - joined: bool, -} - -impl GossipReceiver { - pub(crate) fn new(events_rx: EventStream) -> Self { - Self { - stream: events_rx, - neighbors: Default::default(), - joined: false, - } - } - - /// Lists our current direct neighbors. - pub fn neighbors(&self) -> impl Iterator + '_ { - self.neighbors.iter().copied() - } - - /// Waits until we are connected to at least one node. - /// - /// This progresses the stream until we received [`GossipEvent::Joined`], which is the first - /// item emitted on the stream. - /// - /// Note that this consumes the [`GossipEvent::Joined`] event. If you want to act on these - /// initial neighbors, use [`Self::neighbors`] after awaiting [`Self::joined`]. - pub async fn joined(&mut self) -> Result<()> { - if !self.joined { - match self - .try_next() - .await? - .context("Gossip receiver closed before Joined event was received.")? - { - Event::Gossip(GossipEvent::Joined(_)) => {} - _ => anyhow::bail!("Expected Joined event to be the first event received."), - } - } - Ok(()) - } - - /// Returns true if we are connected to at least one node. - pub fn is_joined(&self) -> bool { - !self.neighbors.is_empty() - } -} - -impl Stream for GossipReceiver { - type Item = Result; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let item = std::task::ready!(Pin::new(&mut self.stream).poll_next(cx)); - if let Some(Ok(item)) = &item { - match item { - Event::Gossip(GossipEvent::Joined(neighbors)) => { - self.joined = true; - self.neighbors.extend(neighbors.iter().copied()); - } - Event::Gossip(GossipEvent::NeighborUp(node_id)) => { - self.neighbors.insert(*node_id); - } - Event::Gossip(GossipEvent::NeighborDown(node_id)) => { - self.neighbors.remove(node_id); - } - _ => {} - } - } - Poll::Ready(item) - } -} - -/// Events emitted from a gossip topic with a lagging notification. -/// -/// This is the item of the [`GossipReceiver`] stream. It wraps the actual gossip events to also -/// provide a notification if we missed gossip events for the topic. -#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] -pub enum Event { - /// We received an event. - Gossip(GossipEvent), - /// We missed some messages because our [`GossipReceiver`] was not progressing fast enough. - Lagged, -} - -/// Events emitted from a gossip topic. -/// -/// These are the events emitted from a [`GossipReceiver`], wrapped in [`Event::Gossip`]. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize)] -pub enum GossipEvent { - /// We joined the topic with at least one peer. - /// - /// This is the first event on a [`GossipReceiver`] and will only be emitted once. - Joined(Vec), - /// We have a new, direct neighbor in the swarm membership layer for this topic. - NeighborUp(NodeId), - /// We dropped direct neighbor in the swarm membership layer for this topic. - NeighborDown(NodeId), - /// We received a gossip message for this topic. - Received(Message), -} - -impl From> for GossipEvent { - fn from(event: crate::proto::Event) -> Self { - match event { - crate::proto::Event::NeighborUp(node_id) => Self::NeighborUp(node_id), - crate::proto::Event::NeighborDown(node_id) => Self::NeighborDown(node_id), - crate::proto::Event::Received(message) => Self::Received(Message { - content: message.content, - scope: message.scope, - delivered_from: message.delivered_from, - }), - } - } -} - -/// A gossip message -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, derive_more::Debug, Serialize, Deserialize)] -pub struct Message { - /// The content of the message - #[debug("Bytes({})", self.content.len())] - pub content: Bytes, - /// The scope of the message. - /// This tells us if the message is from a direct neighbor or actual gossip. - pub scope: DeliveryScope, - /// The node that delivered the message. This is not the same as the original author. - pub delivered_from: NodeId, -} - -/// A stream of commands for a gossip subscription. -pub type CommandStream = Pin + Send + Sync + 'static>>; - -/// Send a gossip message -#[derive(Serialize, Deserialize, derive_more::Debug)] -pub enum Command { - /// Broadcast a message to all nodes in the swarm - Broadcast(#[debug("Bytes({})", _0.len())] Bytes), - /// Broadcast a message to all direct neighbors - BroadcastNeighbors(#[debug("Bytes({})", _0.len())] Bytes), - /// Connect to a set of peers - JoinPeers(Vec), -} - -/// Options for joining a gossip topic. -#[derive(Serialize, Deserialize, Debug)] -pub struct JoinOptions { - /// The initial bootstrap nodes - pub bootstrap: BTreeSet, - /// The maximum number of messages that can be buffered in a subscription. - /// - /// If this limit is reached, the subscriber will receive a `Lagged` response, - /// the message will be dropped, and the subscriber will be closed. - /// - /// This is to prevent a single slow subscriber from blocking the dispatch loop. - /// If a subscriber is lagging, it should be closed and re-opened. - pub subscription_capacity: usize, -} - -impl JoinOptions { - /// Creates [`JoinOptions`] with the provided bootstrap nodes and the default subscription - /// capacity. - pub fn with_bootstrap(nodes: impl IntoIterator) -> Self { - Self { - bootstrap: nodes.into_iter().collect(), - subscription_capacity: TOPIC_EVENTS_DEFAULT_CAP, - } - } -} diff --git a/iroh-gossip/src/net/util.rs b/iroh-gossip/src/net/util.rs deleted file mode 100644 index 5bfbee633e..0000000000 --- a/iroh-gossip/src/net/util.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! Utilities for iroh-gossip networking - -use std::{io, pin::Pin, time::Instant}; - -use anyhow::{bail, ensure, Context, Result}; -use bytes::{Bytes, BytesMut}; -use tokio::{ - io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}, - time::{sleep_until, Sleep}, -}; - -use super::ProtoMessage; -use crate::proto::util::TimerMap; - -/// Write a `ProtoMessage` as a length-prefixed, postcard-encoded message. -pub async fn write_message( - writer: &mut W, - buffer: &mut BytesMut, - frame: &ProtoMessage, - max_message_size: usize, -) -> Result<()> { - let len = postcard::experimental::serialized_size(&frame)?; - ensure!(len < max_message_size); - buffer.clear(); - buffer.resize(len, 0u8); - let slice = postcard::to_slice(&frame, buffer)?; - writer.write_u32(len as u32).await?; - writer.write_all(slice).await?; - Ok(()) -} - -/// Read a length-prefixed message and decode as `ProtoMessage`; -pub async fn read_message( - reader: impl AsyncRead + Unpin, - buffer: &mut BytesMut, - max_message_size: usize, -) -> Result> { - match read_lp(reader, buffer, max_message_size).await? { - None => Ok(None), - Some(data) => { - let message = postcard::from_bytes(&data)?; - Ok(Some(message)) - } - } -} - -/// Reads a length prefixed message. -/// -/// # Returns -/// -/// The message as raw bytes. If the end of the stream is reached and there is no partial -/// message, returns `None`. -pub async fn read_lp( - mut reader: impl AsyncRead + Unpin, - buffer: &mut BytesMut, - max_message_size: usize, -) -> Result> { - let size = match reader.read_u32().await { - Ok(size) => size, - Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), - Err(err) => return Err(err.into()), - }; - let mut reader = reader.take(size as u64); - let size = usize::try_from(size).context("frame larger than usize")?; - if size > max_message_size { - bail!("Incoming message exceeds the maximum message size of {max_message_size} bytes"); - } - buffer.reserve(size); - loop { - let r = reader.read_buf(buffer).await?; - if r == 0 { - break; - } - } - Ok(Some(buffer.split_to(size).freeze())) -} - -/// A [`TimerMap`] with an async method to wait for the next timer expiration. -#[derive(Debug)] -pub struct Timers { - next: Option<(Instant, Pin>)>, - map: TimerMap, -} - -impl Default for Timers { - fn default() -> Self { - Self { - next: None, - map: TimerMap::default(), - } - } -} - -impl Timers { - /// Create a new timer map - pub fn new() -> Self { - Self::default() - } - - /// Insert a new entry at the specified instant - pub fn insert(&mut self, instant: Instant, item: T) { - self.map.insert(instant, item); - } - - fn reset(&mut self) { - self.next = self - .map - .first() - .map(|(instant, _)| (*instant, Box::pin(sleep_until((*instant).into())))) - } - - /// Wait for the next timer to expire and return an iterator of all expired timers - /// - /// If the [TimerMap] is empty, this will return a future that is pending forever. - /// After inserting a new entry, prior futures returned from this method will not become ready. - /// They should be dropped after calling [Self::insert], and a new future as returned from - /// this method should be awaited instead. - pub async fn wait_and_drain(&mut self) -> impl Iterator { - self.reset(); - match self.next.as_mut() { - Some((instant, sleep)) => { - sleep.await; - self.map.drain_until(instant) - } - None => std::future::pending().await, - } - } -} diff --git a/iroh-gossip/src/proto.rs b/iroh-gossip/src/proto.rs deleted file mode 100644 index f87eae8364..0000000000 --- a/iroh-gossip/src/proto.rs +++ /dev/null @@ -1,376 +0,0 @@ -//! Implementation of the iroh-gossip protocol, as an IO-less state machine -//! -//! This module implements the iroh-gossip protocol. The entry point is [`State`], which contains -//! the protocol state for a node. -//! -//! The iroh-gossip protocol is made up from two parts: A swarm membership protocol, based on -//! [HyParView][hyparview], and a gossip broadcasting protocol, based on [PlumTree][plumtree]. -//! -//! For a full explanation it is recommended to read the two papers. What follows is a brief -//! outline of the protocols. -//! -//! All protocol messages are namespaced by a [`TopicId`], a 32 byte identifier. Topics are -//! separate swarms and broadcast scopes. The HyParView and PlumTree algorithms both work in the -//! scope of a single topic. Thus, joining multiple topics increases the number of open connections -//! to peers and the size of the local routing table. -//! -//! The **membership protocol** ([HyParView][hyparview]) is a cluster protocol where each peer -//! maintains a partial view of all nodes in the swarm. -//! A peer joins the swarm for a topic by connecting to any known peer that is a member of this -//! topic's swarm. Obtaining this initial contact info happens out of band. The peer then sends -//! a `Join` message to that initial peer. All peers maintain a list of -//! `active` and `passive` peers. Active peers are those that you maintain active connections to. -//! Passive peers is an addressbook of additional peers. If one of your active peers goes offline, -//! its slot is filled with a random peer from the passive set. In the default configuration, the -//! active view has a size of 5 and the passive view a size of 30. -//! The HyParView protocol ensures that active connections are always bidirectional, and regularly -//! exchanges nodes for the passive view in a `Shuffle` operation. -//! Thus, this protocol exposes a high degree of reliability and auto-recovery in the case of node -//! failures. -//! -//! The **gossip protocol** ([PlumTree][plumtree]) builds upon the membership protocol. It exposes -//! a method to broadcast messages to all peers in the swarm. On each node, it maintains two sets -//! of peers: An `eager` set and a `lazy` set. Both are subsets of the `active` view from the -//! membership protocol. When broadcasting a message from the local node, or upon receiving a -//! broadcast message, the message is pushed to all peers in the eager set. Additionally, the hash -//! of the message (which uniquely identifies it), but not the message content, is lazily pushed -//! to all peers in the `lazy` set. When receiving such lazy pushes (called `Ihaves`), those peers -//! may request the message content after a timeout if they didn't receive the message by one of -//! their eager peers before. When requesting a message from a currently-lazy peer, this peer is -//! also upgraded to be an eager peer from that moment on. This strategy self-optimizes the -//! messaging graph by latency. Note however that this optimization will work best if the messaging -//! paths are stable, i.e. if it's always the same peer that broadcasts. If not, the relative -//! message redundancy will grow and the ideal messaging graph might change frequently. -//! -//! [hyparview]: https://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf -//! [plumtree]: https://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf - -use std::{fmt, hash::Hash}; - -use bytes::Bytes; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; - -mod hyparview; -mod plumtree; -pub mod state; -pub mod topic; -pub mod util; - -#[cfg(test)] -mod tests; - -pub use plumtree::{DeliveryScope, Scope}; -pub use state::{InEvent, Message, OutEvent, State, Timer, TopicId}; -pub use topic::{Command, Config, Event, IO}; - -/// The identifier for a peer. -/// -/// The protocol implementation is generic over this trait. When implementing the protocol, -/// a concrete type must be chosen that will then be used throughout the implementation to identify -/// and index individual peers. -/// -/// Note that the concrete type will be used in protocol messages. Therefore, implementations of -/// the protocol are only compatible if the same concrete type is supplied for this trait. -/// -/// TODO: Rename to `PeerId`? It does not necessarily refer to a peer's address, as long as the -/// networking layer can translate the value of its concrete type into an address. -pub trait PeerIdentity: Hash + Eq + Copy + fmt::Debug + Serialize + DeserializeOwned {} -impl PeerIdentity for T where T: Hash + Eq + Copy + fmt::Debug + Serialize + DeserializeOwned {} - -/// Opaque binary data that is transmitted on messages that introduce new peers. -/// -/// Implementations may use these bytes to supply addresses or other information needed to connect -/// to a peer that is not included in the peer's [`PeerIdentity`]. -#[derive(derive_more::Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)] -#[debug("PeerData({}b)", self.0.len())] -pub struct PeerData(Bytes); - -impl PeerData { - /// Create a new [`PeerData`] from a byte buffer. - pub fn new(data: impl Into) -> Self { - Self(data.into()) - } - - /// Get a reference to the contained [`bytes::Bytes`]. - pub fn inner(&self) -> &bytes::Bytes { - &self.0 - } - - /// Get the peer data as a byte slice. - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } -} - -/// PeerInfo contains a peer's identifier and the opaque peer data as provided by the implementer. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -struct PeerInfo { - pub id: PI, - pub data: Option, -} - -impl From<(PI, Option)> for PeerInfo { - fn from((id, data): (PI, Option)) -> Self { - Self { id, data } - } -} - -#[cfg(test)] -mod test { - - use std::{collections::HashSet, env, time::Instant}; - - use rand::SeedableRng; - - use super::{Command, Config, Event, State}; - use crate::proto::{ - tests::{ - assert_synchronous_active, report_round_distribution, sort, Network, Simulator, - SimulatorConfig, - }, - Scope, TopicId, - }; - - #[test] - fn hyparview_smoke() { - let _guard = iroh_test::logging::setup(); - // Create a network with 4 nodes and active_view_capacity 2 - let mut config = Config::default(); - config.membership.active_view_capacity = 2; - let mut network = Network::new(Instant::now()); - let rng = rand_chacha::ChaCha12Rng::seed_from_u64(99); - for i in 0..4 { - network.push(State::new( - i, - Default::default(), - config.clone(), - rng.clone(), - )); - } - - let t: TopicId = [0u8; 32].into(); - - // Do some joins between nodes 0,1,2 - network.command(0, t, Command::Join(vec![1])); - network.command(0, t, Command::Join(vec![2])); - network.command(1, t, Command::Join(vec![2])); - network.command(2, t, Command::Join(vec![])); - network.ticks(10); - - // Confirm emitted events - let actual = network.events_sorted(); - let expected = sort(vec![ - (0, t, Event::NeighborUp(1)), - (0, t, Event::NeighborUp(2)), - (1, t, Event::NeighborUp(2)), - (1, t, Event::NeighborUp(0)), - (2, t, Event::NeighborUp(0)), - (2, t, Event::NeighborUp(1)), - ]); - assert_eq!(actual, expected); - - // Confirm active connections - assert_eq!(network.conns(), vec![(0, 1), (0, 2), (1, 2)]); - - // Now let node 3 join node 0. - // Node 0 is full, so it will disconnect from either node 1 or node 2. - network.command(3, t, Command::Join(vec![0])); - network.ticks(8); - - // Confirm emitted events. There's two options because whether node 0 disconnects from - // node 1 or node 2 is random. - let actual = network.events_sorted(); - eprintln!("actual {actual:?}"); - let expected1 = sort(vec![ - (3, t, Event::NeighborUp(0)), - (0, t, Event::NeighborUp(3)), - (0, t, Event::NeighborDown(1)), - (1, t, Event::NeighborDown(0)), - ]); - let expected2 = sort(vec![ - (3, t, Event::NeighborUp(0)), - (0, t, Event::NeighborUp(3)), - (0, t, Event::NeighborDown(2)), - (2, t, Event::NeighborDown(0)), - ]); - assert!((actual == expected1) || (actual == expected2)); - - // Confirm active connections. - if actual == expected1 { - assert_eq!(network.conns(), vec![(0, 2), (0, 3), (1, 2)]); - } else { - assert_eq!(network.conns(), vec![(0, 1), (0, 3), (1, 2)]); - } - assert!(assert_synchronous_active(&network)); - } - - #[test] - fn plumtree_smoke() { - let _guard = iroh_test::logging::setup(); - let config = Config::default(); - let mut network = Network::new(Instant::now()); - let broadcast_ticks = 12; - let join_ticks = 12; - // build a network with 6 nodes - let rng = rand_chacha::ChaCha12Rng::seed_from_u64(99); - for i in 0..6 { - network.push(State::new( - i, - Default::default(), - config.clone(), - rng.clone(), - )); - } - - let t = [0u8; 32].into(); - - // let node 0 join the topic but do not connect to any peers - network.command(0, t, Command::Join(vec![])); - // connect nodes 1 and 2 to node 0 - (1..3).for_each(|i| network.command(i, t, Command::Join(vec![0]))); - // connect nodes 4 and 5 to node 3 - network.command(3, t, Command::Join(vec![])); - (4..6).for_each(|i| network.command(i, t, Command::Join(vec![3]))); - // run ticks and drain events - network.ticks(join_ticks); - let _ = network.events(); - assert!(assert_synchronous_active(&network)); - - // now broadcast a first message - network.command( - 1, - t, - Command::Broadcast(b"hi1".to_vec().into(), Scope::Swarm), - ); - network.ticks(broadcast_ticks); - let events = network.events(); - let received = events.filter(|x| matches!(x, (_, _, Event::Received(_)))); - // message should be received by two other nodes - assert_eq!(received.count(), 2); - assert!(assert_synchronous_active(&network)); - - // now connect the two sections of the swarm - network.command(2, t, Command::Join(vec![5])); - network.ticks(join_ticks); - let _ = network.events(); - report_round_distribution(&network); - - // now broadcast again - network.command( - 1, - t, - Command::Broadcast(b"hi2".to_vec().into(), Scope::Swarm), - ); - network.ticks(broadcast_ticks); - let events = network.events(); - let received = events.filter(|x| matches!(x, (_, _, Event::Received(_)))); - // message should be received by all 5 other nodes - assert_eq!(received.count(), 5); - assert!(assert_synchronous_active(&network)); - report_round_distribution(&network); - } - - #[test] - fn big_multiple_sender() { - let _guard = iroh_test::logging::setup(); - let mut gossip_config = Config::default(); - gossip_config.broadcast.optimization_threshold = (read_var("OPTIM", 7) as u16).into(); - let config = SimulatorConfig { - peers_count: read_var("PEERS", 100), - ..Default::default() - }; - let rounds = read_var("ROUNDS", 10); - let mut simulator = Simulator::new(config, gossip_config); - simulator.init(); - simulator.bootstrap(); - for i in 0..rounds { - let from = i + 1; - let message = format!("m{i}").into_bytes().into(); - simulator.gossip_round(from, message) - } - simulator.report_round_sums(); - } - - #[test] - fn big_single_sender() { - let _guard = iroh_test::logging::setup(); - let mut gossip_config = Config::default(); - gossip_config.broadcast.optimization_threshold = (read_var("OPTIM", 7) as u16).into(); - let config = SimulatorConfig { - peers_count: read_var("PEERS", 100), - ..Default::default() - }; - let rounds = read_var("ROUNDS", 10); - let mut simulator = Simulator::new(config, gossip_config); - simulator.init(); - simulator.bootstrap(); - for i in 0..rounds { - let from = 2; - let message = format!("m{i}").into_bytes().into(); - simulator.gossip_round(from, message) - } - simulator.report_round_sums(); - } - - #[test] - fn quit() { - let _guard = iroh_test::logging::setup(); - // Create a network with 4 nodes and active_view_capacity 2 - let mut config = Config::default(); - config.membership.active_view_capacity = 2; - let mut network = Network::new(Instant::now()); - let num = 4; - let rng = rand_chacha::ChaCha12Rng::seed_from_u64(99); - for i in 0..num { - network.push(State::new( - i, - Default::default(), - config.clone(), - rng.clone(), - )); - } - - let t: TopicId = [0u8; 32].into(); - - // join all nodes - network.command(0, t, Command::Join(vec![])); - network.command(1, t, Command::Join(vec![0])); - network.command(2, t, Command::Join(vec![1])); - network.command(3, t, Command::Join(vec![2])); - network.ticks(10); - - // assert all peers appear in the connections - let all_conns: HashSet = HashSet::from_iter((0..4).flat_map(|pa| { - network - .get_active(&pa, &t) - .unwrap() - .into_iter() - .flat_map(|x| x.into_iter()) - })); - assert_eq!(all_conns, HashSet::from_iter([0, 1, 2, 3])); - assert!(assert_synchronous_active(&network)); - - // let node 3 leave the swarm - network.command(3, t, Command::Quit); - network.ticks(4); - assert!(network.peer(&3).unwrap().state(&t).is_none()); - - // assert all peers without peer 3 appear in the connections - let all_conns: HashSet = HashSet::from_iter((0..num).flat_map(|pa| { - network - .get_active(&pa, &t) - .unwrap() - .into_iter() - .flat_map(|x| x.into_iter()) - })); - assert_eq!(all_conns, HashSet::from_iter([0, 1, 2])); - assert!(assert_synchronous_active(&network)); - } - - fn read_var(name: &str, default: usize) -> usize { - env::var(name) - .unwrap_or_else(|_| default.to_string()) - .parse() - .unwrap() - } -} diff --git a/iroh-gossip/src/proto/hyparview.rs b/iroh-gossip/src/proto/hyparview.rs deleted file mode 100644 index e40f7cd717..0000000000 --- a/iroh-gossip/src/proto/hyparview.rs +++ /dev/null @@ -1,718 +0,0 @@ -//! Implementation of the HyParView membership protocol -//! -//! The implementation is based on [this paper][paper] by Joao Leitao, Jose Pereira, Luıs Rodrigues -//! and the [example implementation][impl] by Bartosz Sypytkowski -//! -//! [paper]: https://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf -//! [impl]: https://gist.github.com/Horusiath/84fac596101b197da0546d1697580d99 - -use std::{ - collections::{HashMap, HashSet}, - time::{Duration, Instant}, -}; - -use derive_more::{From, Sub}; -use rand::{rngs::ThreadRng, Rng}; -use serde::{Deserialize, Serialize}; -use tracing::debug; - -use super::{util::IndexSet, PeerData, PeerIdentity, PeerInfo, IO}; - -/// Input event for HyParView -#[derive(Debug)] -pub enum InEvent { - /// A [`Message`] was received from a peer. - RecvMessage(PI, Message), - /// A timer has expired. - TimerExpired(Timer), - /// A peer was disconnected on the IO layer. - PeerDisconnected(PI), - /// Send a join request to a peer. - RequestJoin(PI), - /// Update the peer data that is transmitted on join requests. - UpdatePeerData(PeerData), - /// Quit the swarm, informing peers about us leaving. - Quit, -} - -/// Output event for HyParView -#[derive(Debug)] -pub enum OutEvent { - /// Ask the IO layer to send a [`Message`] to peer `PI`. - SendMessage(PI, Message), - /// Schedule a [`Timer`]. - ScheduleTimer(Duration, Timer), - /// Ask the IO layer to close the connection to peer `PI`. - DisconnectPeer(PI), - /// Emit an [`Event`] to the application. - EmitEvent(Event), - /// New [`PeerData`] was received for peer `PI`. - PeerData(PI, PeerData), -} - -/// Event emitted by the [`State`] to the application. -#[derive(Clone, Debug)] -pub enum Event { - /// A peer was added to our set of active connections. - NeighborUp(PI), - /// A peer was removed from our set of active connections. - NeighborDown(PI), -} - -/// Kinds of timers HyParView needs to schedule. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Timer { - DoShuffle, - PendingNeighborRequest(PI), -} - -/// Messages that we can send and receive from peers within the topic. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub enum Message { - /// Sent to a peer if you want to join the swarm - Join(Option), - /// When receiving Join, ForwardJoin is forwarded to the peer's ActiveView to introduce the - /// new member. - ForwardJoin(ForwardJoin), - /// A shuffle request is sent occasionally to re-shuffle the PassiveView with contacts from - /// other peers. - Shuffle(Shuffle), - /// Peers reply to [`Message::Shuffle`] requests with a random peers from their active and - /// passive views. - ShuffleReply(ShuffleReply), - /// Request to add sender to an active view of recipient. If [`Neighbor::priority`] is - /// [`Priority::High`], the request cannot be denied. - Neighbor(Neighbor), - /// Request to disconnect from a peer. - /// If [`Disconnect::alive`] is true, the other peer is not shutting down, so it should be - /// added to the passive set. - /// If [`Disconnect::respond`] is true, the peer should answer the disconnect request - /// before shutting down the connection. - Disconnect(Disconnect), -} - -/// The time-to-live for this message. -/// -/// Each time a message is forwarded, the `Ttl` is decreased by 1. If the `Ttl` reaches 0, it -/// should not be forwarded further. -#[derive(From, Sub, Eq, PartialEq, Clone, Debug, Copy, Serialize, Deserialize)] -pub struct Ttl(pub u16); -impl Ttl { - pub fn expired(&self) -> bool { - *self == Ttl(0) - } - pub fn next(&self) -> Ttl { - Ttl(self.0.saturating_sub(1)) - } -} - -/// A message informing other peers that a new peer joined the swarm for this topic. -/// -/// Will be forwarded in a random walk until `ttl` reaches 0. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct ForwardJoin { - /// The peer that newly joined the swarm - peer: PeerInfo, - /// The time-to-live for this message - ttl: Ttl, -} - -/// Shuffle messages are sent occasionally to shuffle our passive view with peers from other peer's -/// active and passive views. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct Shuffle { - /// The peer that initiated the shuffle request. - origin: PI, - /// A random subset of the active and passive peers of the `origin` peer. - nodes: Vec>, - /// The time-to-live for this message. - ttl: Ttl, -} - -/// Once a shuffle messages reaches a [`Ttl`] of 0, a peer replies with a `ShuffleReply`. -/// -/// The reply is sent to the peer that initiated the shuffle and contains a subset of the active -/// and passive views of the peer at the end of the random walk. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct ShuffleReply { - /// A random subset of the active and passive peers of the peer sending the `ShuffleReply`. - nodes: Vec>, -} - -/// The priority of a `Join` message -/// -/// This is `High` if the sender does not have any active peers, and `Low` otherwise. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub enum Priority { - /// High priority join that may not be denied. - /// - /// A peer may only send high priority joins if it doesn't have any active peers at the moment. - High, - /// Low priority join that can be denied. - Low, -} - -/// A neighbor message is sent after adding a peer to our active view to inform them that we are -/// now neighbors. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct Neighbor { - /// The priority of the `Join` or `ForwardJoin` message that triggered this neighbor request. - priority: Priority, - /// The user data of the peer sending this message. - data: Option, -} - -/// Message sent when leaving the swarm or closing down to inform peers about us being gone. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct Disconnect { - /// Whether we are actually shutting down or closing the connection only because our limits are - /// reached. - alive: Alive, - /// Whether we should reply to the peer with a Disconnect message. - respond: Respond, -} - -/// Configuration for the swarm membership layer -#[derive(Clone, Debug)] -pub struct Config { - /// Number of peers to which active connections are maintained - pub active_view_capacity: usize, - /// Number of peers for which contact information is remembered, - /// but to which we are not actively connected to. - pub passive_view_capacity: usize, - /// Number of hops a `ForwardJoin` message is propagated until the new peer's info - /// is added to a peer's active view. - pub active_random_walk_length: Ttl, - /// Number of hops a `ForwardJoin` message is propagated until the new peer's info - /// is added to a peer's passive view. - pub passive_random_walk_length: Ttl, - /// Number of hops a `Shuffle` message is propagated until a peer replies to it. - pub shuffle_random_walk_length: Ttl, - /// Number of active peers to be included in a `Shuffle` request. - pub shuffle_active_view_count: usize, - /// Number of passive peers to be included in a `Shuffle` request. - pub shuffle_passive_view_count: usize, - /// Interval duration for shuffle requests - pub shuffle_interval: Duration, - /// Timeout after which a `Neighbor` request is considered failed - pub neighbor_request_timeout: Duration, -} -impl Default for Config { - /// Default values for the HyParView layer - fn default() -> Self { - Self { - // From the paper (p9) - active_view_capacity: 5, - // From the paper (p9) - passive_view_capacity: 30, - // From the paper (p9) - active_random_walk_length: Ttl(6), - // From the paper (p9) - passive_random_walk_length: Ttl(3), - // From the paper (p9) - shuffle_random_walk_length: Ttl(6), - // From the paper (p9) - shuffle_active_view_count: 3, - // From the paper (p9) - shuffle_passive_view_count: 4, - // Wild guess - shuffle_interval: Duration::from_secs(60), - // Wild guess - neighbor_request_timeout: Duration::from_millis(500), - } - } -} - -pub type Respond = bool; -pub type Alive = bool; - -#[derive(Default, Debug, Clone)] -pub struct Stats { - total_connections: usize, -} - -/// The state of the HyParView protocol -#[derive(Debug)] -pub struct State { - /// Our peer identity - me: PI, - /// Our opaque user data to transmit to peers on join messages - me_data: Option, - /// The active view, i.e. peers we are connected to - pub(crate) active_view: IndexSet, - /// The passive view, i.e. peers we know about but are not connected to at the moment - pub(crate) passive_view: IndexSet, - /// Protocol configuration (cannot change at runtime) - config: Config, - /// Whether a shuffle timer is currently scheduled - shuffle_scheduled: bool, - /// Random number generator - rng: RG, - /// Statistics - stats: Stats, - /// The set of neighbor requests we sent out but did not yet receive a reply for - pending_neighbor_requests: HashSet, - /// The opaque user peer data we received for other peers - peer_data: HashMap, -} - -impl State -where - PI: PeerIdentity, - RG: Rng, -{ - pub fn new(me: PI, me_data: Option, config: Config, rng: RG) -> Self { - Self { - me, - me_data, - active_view: IndexSet::new(), - passive_view: IndexSet::new(), - config, - shuffle_scheduled: false, - rng, - stats: Stats::default(), - pending_neighbor_requests: Default::default(), - peer_data: Default::default(), - } - } - - pub fn handle(&mut self, event: InEvent, now: Instant, io: &mut impl IO) { - match event { - InEvent::RecvMessage(from, message) => self.handle_message(from, message, now, io), - InEvent::TimerExpired(timer) => match timer { - Timer::DoShuffle => self.handle_shuffle_timer(io), - Timer::PendingNeighborRequest(peer) => self.handle_pending_neighbor_timer(peer, io), - }, - InEvent::PeerDisconnected(peer) => self.handle_disconnect(peer, io), - InEvent::RequestJoin(peer) => self.handle_join(peer, io), - InEvent::UpdatePeerData(data) => { - self.me_data = Some(data); - } - InEvent::Quit => self.handle_quit(io), - } - - // this will only happen on the first call - if !self.shuffle_scheduled { - io.push(OutEvent::ScheduleTimer( - self.config.shuffle_interval, - Timer::DoShuffle, - )); - self.shuffle_scheduled = true; - } - } - - fn handle_message( - &mut self, - from: PI, - message: Message, - now: Instant, - io: &mut impl IO, - ) { - let is_disconnect = matches!(message, Message::Disconnect(Disconnect { .. })); - if !is_disconnect && !self.active_view.contains(&from) { - self.stats.total_connections += 1; - } - match message { - Message::Join(data) => self.on_join(from, data, now, io), - Message::ForwardJoin(details) => self.on_forward_join(from, details, now, io), - Message::Shuffle(details) => self.on_shuffle(from, details, io), - Message::ShuffleReply(details) => self.on_shuffle_reply(details, io), - Message::Neighbor(details) => self.on_neighbor(from, details, now, io), - Message::Disconnect(details) => self.on_disconnect(from, details, io), - } - - // Disconnect from passive nodes right after receiving a message. - if !is_disconnect && !self.active_view.contains(&from) { - io.push(OutEvent::DisconnectPeer(from)); - } - } - - fn handle_join(&mut self, peer: PI, io: &mut impl IO) { - io.push(OutEvent::SendMessage( - peer, - Message::Join(self.me_data.clone()), - )); - } - - fn handle_disconnect(&mut self, peer: PI, io: &mut impl IO) { - self.on_disconnect( - peer, - Disconnect { - alive: true, - respond: false, - }, - io, - ); - } - - fn handle_quit(&mut self, io: &mut impl IO) { - for peer in self.active_view.clone().into_iter() { - self.on_disconnect( - peer, - Disconnect { - alive: false, - respond: true, - }, - io, - ); - } - } - - fn on_join(&mut self, peer: PI, data: Option, now: Instant, io: &mut impl IO) { - // If the peer is already in our active view, there's nothing to do. - if self.active_view.contains(&peer) { - // .. but we still update the peer data. - self.insert_peer_info((peer, data).into(), io); - return; - } - // "A node that receives a join request will start by adding the new - // node to its active view, even if it has to drop a random node from it. (6)" - self.add_active(peer, data.clone(), Priority::High, now, io); - // "The contact node c will then send to all other nodes in its active view a ForwardJoin - // request containing the new node identifier. Associated to the join procedure, - // there are two configuration parameters, named Active Random Walk Length (ARWL), - // that specifies the maximum number of hops a ForwardJoin request is propagated, - // and Passive Random Walk Length (PRWL), that specifies at which point in the walk the node - // is inserted in a passive view. To use these parameters, the ForwardJoin request carries - // a “time to live” field that is initially set to ARWL and decreased at every hop. (7)" - let ttl = self.config.active_random_walk_length; - let peer_info = PeerInfo { id: peer, data }; - for node in self.active_view.iter_without(&peer) { - let message = Message::ForwardJoin(ForwardJoin { - peer: peer_info.clone(), - ttl, - }); - io.push(OutEvent::SendMessage(*node, message)); - } - } - - fn on_forward_join( - &mut self, - sender: PI, - message: ForwardJoin, - now: Instant, - io: &mut impl IO, - ) { - // "i) If the time to live is equal to zero or if the number of nodes in p’s active view is equal to one, - // it will add the new node to its active view (7)" - if message.ttl.expired() || self.active_view.len() <= 1 { - self.add_active( - message.peer.id, - message.peer.data.clone(), - Priority::High, - now, - io, - ); - } - // "ii) If the time to live is equal to PRWL, p will insert the new node into its passive view" - else if message.ttl == self.config.passive_random_walk_length { - self.add_passive(message.peer.id, message.peer.data.clone(), io); - } - // "iii) The time to live field is decremented." - // "iv) If, at this point, n has not been inserted - // in p’s active view, p will forward the request to a random node in its active view - // (different from the one from which the request was received)." - if !self.active_view.contains(&message.peer.id) { - match self - .active_view - .pick_random_without(&[&sender], &mut self.rng) - { - None => { - unreachable!("if the peer was not added, there are at least two peers in our active view."); - } - Some(next) => { - let message = Message::ForwardJoin(ForwardJoin { - peer: message.peer, - ttl: message.ttl.next(), - }); - io.push(OutEvent::SendMessage(*next, message)); - } - } - } - } - - fn on_neighbor(&mut self, from: PI, details: Neighbor, now: Instant, io: &mut impl IO) { - self.pending_neighbor_requests.remove(&from); - // "A node q that receives a high priority neighbor request will always accept the request, even - // if it has to drop a random member from its active view (again, the member that is dropped will - // receive a Disconnect notification). If a node q receives a low priority Neighbor request, it will - // only accept the request if it has a free slot in its active view, otherwise it will refuse the request." - match details.priority { - Priority::High => { - self.add_active(from, details.data, Priority::High, now, io); - } - Priority::Low if !self.active_is_full() => { - self.add_active(from, details.data, Priority::Low, now, io); - } - _ => {} - } - } - - /// Get the peer [`PeerInfo`] for a peer. - fn peer_info(&self, id: &PI) -> PeerInfo { - let data = self.peer_data.get(id).cloned(); - PeerInfo { id: *id, data } - } - - fn insert_peer_info(&mut self, peer_info: PeerInfo, io: &mut impl IO) { - if let Some(data) = peer_info.data { - let old = self.peer_data.remove(&peer_info.id); - let same = matches!(old, Some(old) if old == data); - if !same { - io.push(OutEvent::PeerData(peer_info.id, data.clone())); - } - self.peer_data.insert(peer_info.id, data); - } - } - - /// Handle a [`Message::Shuffle`] - /// - /// > A node q that receives a Shuffle request will first decrease its time to live. If the time - /// > to live of the message is greater than zero and the number of nodes in q’s active view is - /// > greater than 1, the node will select a random node from its active view, different from the - /// > one he received this shuffle message from, and simply forwards the Shuffle request. - /// > Otherwise, node q accepts the Shuffle request and send back (p.8) - fn on_shuffle(&mut self, from: PI, shuffle: Shuffle, io: &mut impl IO) { - if shuffle.ttl.expired() || self.active_view.len() <= 1 { - let len = shuffle.nodes.len(); - for node in shuffle.nodes { - self.add_passive(node.id, node.data, io); - } - let nodes = self - .passive_view - .shuffled_and_capped(len, &mut self.rng) - .into_iter() - .map(|id| self.peer_info(&id)); - let message = Message::ShuffleReply(ShuffleReply { - nodes: nodes.collect(), - }); - io.push(OutEvent::SendMessage(shuffle.origin, message)); - } else if let Some(node) = self - .active_view - .pick_random_without(&[&shuffle.origin, &from], &mut self.rng) - { - let message = Message::Shuffle(Shuffle { - origin: shuffle.origin, - nodes: shuffle.nodes, - ttl: shuffle.ttl.next(), - }); - io.push(OutEvent::SendMessage(*node, message)); - } - } - - fn on_shuffle_reply(&mut self, message: ShuffleReply, io: &mut impl IO) { - for node in message.nodes { - self.add_passive(node.id, node.data, io); - } - } - - fn on_disconnect(&mut self, peer: PI, details: Disconnect, io: &mut impl IO) { - self.pending_neighbor_requests.remove(&peer); - self.remove_active(&peer, details.respond, io); - if details.alive { - if let Some(data) = self.peer_data.remove(&peer) { - self.add_passive(peer, Some(data), io); - } - } else { - self.passive_view.remove(&peer); - } - } - - fn handle_shuffle_timer(&mut self, io: &mut impl IO) { - if let Some(node) = self.active_view.pick_random(&mut self.rng) { - let active = self.active_view.shuffled_without_and_capped( - &[node], - self.config.shuffle_active_view_count, - &mut self.rng, - ); - let passive = self.passive_view.shuffled_without_and_capped( - &[node], - self.config.shuffle_passive_view_count, - &mut self.rng, - ); - let nodes = active - .iter() - .chain(passive.iter()) - .map(|id| self.peer_info(id)); - let message = Shuffle { - origin: self.me, - nodes: nodes.collect(), - ttl: self.config.shuffle_random_walk_length, - }; - io.push(OutEvent::SendMessage(*node, Message::Shuffle(message))); - } - io.push(OutEvent::ScheduleTimer( - self.config.shuffle_interval, - Timer::DoShuffle, - )); - } - - fn passive_is_full(&self) -> bool { - self.passive_view.len() >= self.config.passive_view_capacity - } - - fn active_is_full(&self) -> bool { - self.active_view.len() >= self.config.active_view_capacity - } - - /// Add a peer to the passive view. - /// - /// If the passive view is full, it will first remove a random peer and then insert the new peer. - /// If a peer is currently in the active view it will not be added. - fn add_passive(&mut self, peer: PI, data: Option, io: &mut impl IO) { - self.insert_peer_info((peer, data).into(), io); - if self.active_view.contains(&peer) || self.passive_view.contains(&peer) || peer == self.me - { - return; - } - if self.passive_is_full() { - self.passive_view.remove_random(&mut self.rng); - } - self.passive_view.insert(peer); - } - - /// Remove a peer from the active view. - /// - /// If respond is true, a Disconnect message will be sent to the peer. - fn remove_active(&mut self, peer: &PI, respond: Respond, io: &mut impl IO) -> Option { - self.active_view.get_index_of(peer).map(|idx| { - let removed_peer = self - .remove_active_by_index(idx, respond, RemovalReason::Disconnect, io) - .unwrap(); - - self.refill_active_from_passive(&[&removed_peer], io); - - removed_peer - }) - } - - fn refill_active_from_passive(&mut self, skip_peers: &[&PI], io: &mut impl IO) { - if self.active_view.len() + self.pending_neighbor_requests.len() - >= self.config.active_view_capacity - { - return; - } - // "When a node p suspects that one of the nodes present in its active view has failed - // (by either disconnecting or blocking), it selects a random node q from its passive view and - // attempts to establish a TCP connection with q. If the connection fails to establish, - // node q is considered failed and removed from p’s passive view; another node q′ is selected - // at random and a new attempt is made. The procedure is repeated until a connection is established - // with success." (p7) - let mut skip_peers = skip_peers.to_vec(); - skip_peers.extend(self.pending_neighbor_requests.iter()); - - if let Some(node) = self - .passive_view - .pick_random_without(&skip_peers, &mut self.rng) - { - let priority = match self.active_view.is_empty() { - true => Priority::High, - false => Priority::Low, - }; - let message = Message::Neighbor(Neighbor { - priority, - data: self.me_data.clone(), - }); - io.push(OutEvent::SendMessage(*node, message)); - // schedule a timer that checks if the node replied with a neighbor message, - // otherwise try again with another passive node. - io.push(OutEvent::ScheduleTimer( - self.config.neighbor_request_timeout, - Timer::PendingNeighborRequest(*node), - )); - self.pending_neighbor_requests.insert(*node); - }; - } - - fn handle_pending_neighbor_timer(&mut self, peer: PI, io: &mut impl IO) { - if self.pending_neighbor_requests.remove(&peer) { - self.passive_view.remove(&peer); - self.refill_active_from_passive(&[], io); - } - } - - fn remove_active_by_index( - &mut self, - peer_index: usize, - respond: Respond, - reason: RemovalReason, - io: &mut impl IO, - ) -> Option { - if let Some(peer) = self.active_view.remove_index(peer_index) { - if respond { - let message = Message::Disconnect(Disconnect { - alive: true, - respond: false, - }); - io.push(OutEvent::SendMessage(peer, message)); - } - io.push(OutEvent::DisconnectPeer(peer)); - io.push(OutEvent::EmitEvent(Event::NeighborDown(peer))); - let data = self.peer_data.remove(&peer); - self.add_passive(peer, data, io); - debug!(other = ?peer, "removed from active view, reason: {reason:?}"); - Some(peer) - } else { - None - } - } - - /// Remove a random peer from the active view. - fn free_random_slot_in_active_view(&mut self, io: &mut impl IO) { - if let Some(index) = self.active_view.pick_random_index(&mut self.rng) { - self.remove_active_by_index(index, true, RemovalReason::Random, io); - } - } - - /// Add a peer to the active view. - /// - /// If the active view is currently full, a random peer will be removed first. - /// Sends a Neighbor message to the peer. If high_priority is true, the peer - /// may not deny the Neighbor request. - fn add_active( - &mut self, - peer: PI, - data: Option, - priority: Priority, - _now: Instant, - io: &mut impl IO, - ) -> bool { - self.insert_peer_info((peer, data).into(), io); - if self.active_view.contains(&peer) || peer == self.me { - return true; - } - match (priority, self.active_is_full()) { - (Priority::High, is_full) => { - if is_full { - self.free_random_slot_in_active_view(io); - } - self.add_active_unchecked(peer, Priority::High, io); - true - } - (Priority::Low, false) => { - self.add_active_unchecked(peer, Priority::Low, io); - true - } - (Priority::Low, true) => false, - } - } - - fn add_active_unchecked(&mut self, peer: PI, priority: Priority, io: &mut impl IO) { - self.passive_view.remove(&peer); - self.active_view.insert(peer); - debug!(other = ?peer, "add to active view"); - - let message = Message::Neighbor(Neighbor { - priority, - data: self.me_data.clone(), - }); - io.push(OutEvent::SendMessage(peer, message)); - io.push(OutEvent::EmitEvent(Event::NeighborUp(peer))); - } -} - -#[derive(Debug)] -enum RemovalReason { - Disconnect, - Random, -} diff --git a/iroh-gossip/src/proto/plumtree.rs b/iroh-gossip/src/proto/plumtree.rs deleted file mode 100644 index f5b66f039e..0000000000 --- a/iroh-gossip/src/proto/plumtree.rs +++ /dev/null @@ -1,878 +0,0 @@ -//! Implementation of the Plumtree epidemic broadcast tree protocol -//! -//! The implementation is based on [this paper][paper] by Joao Leitao, Jose Pereira, Luıs Rodrigues -//! and the [example implementation][impl] by Bartosz Sypytkowski -//! -//! [paper]: https://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf -//! [impl]: https://gist.github.com/Horusiath/84fac596101b197da0546d1697580d99 - -use std::{ - collections::{HashMap, HashSet, VecDeque}, - hash::Hash, - time::{Duration, Instant}, -}; - -use bytes::Bytes; -use derive_more::{Add, From, Sub}; -use serde::{Deserialize, Serialize}; -use tracing::warn; - -use super::{ - util::{idbytes_impls, TimeBoundCache}, - PeerIdentity, IO, -}; - -/// A message identifier, which is the message content's blake3 hash. -#[derive(Serialize, Deserialize, Clone, Hash, Copy, PartialEq, Eq)] -pub struct MessageId([u8; 32]); -idbytes_impls!(MessageId, "MessageId"); - -impl MessageId { - /// Create a `[MessageId]` by hashing the message content. - /// - /// This hashes the input with [`blake3::hash`]. - pub fn from_content(message: &[u8]) -> Self { - Self::from(blake3::hash(message)) - } -} - -/// Events Plumtree is informed of from the peer sampling service and IO layer. -#[derive(Debug)] -pub enum InEvent { - /// A [`Message`] was received from the peer. - RecvMessage(PI, Message), - /// Broadcast the contained payload to the given scope. - Broadcast(Bytes, Scope), - /// A timer has expired. - TimerExpired(Timer), - /// New member `PI` has joined the topic. - NeighborUp(PI), - /// Peer `PI` has disconnected from the topic. - NeighborDown(PI), -} - -/// Events Plumtree emits. -#[derive(Debug, PartialEq, Eq)] -pub enum OutEvent { - /// Ask the IO layer to send a [`Message`] to peer `PI`. - SendMessage(PI, Message), - /// Schedule a [`Timer`]. - ScheduleTimer(Duration, Timer), - /// Emit an [`Event`] to the application. - EmitEvent(Event), -} - -/// Kinds of timers Plumtree needs to schedule. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Timer { - /// Request the content for [`MessageId`] by sending [`Message::Graft`]. - /// - /// The message will be sent to a peer that sent us an [`Message::IHave`] for this [`MessageId`], - /// which will send us the message content in reply and also move the peer into the eager set. - /// Will be a no-op if the message for [`MessageId`] was already received from another peer by now. - SendGraft(MessageId), - /// Dispatch the [`Message::IHave`] in our lazy push queue. - DispatchLazyPush, - /// Evict the message cache - EvictCache, -} - -/// Event emitted by the [`State`] to the application. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Event { - /// A new gossip message was received. - Received(GossipEvent), -} - -#[derive(Clone, derive_more::Debug, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] -pub struct GossipEvent { - /// The content of the gossip message. - #[debug("<{}b>", content.len())] - pub content: Bytes, - /// The peer that we received the gossip message from. Note that this is not the peer that - /// originally broadcasted the message, but the peer before us in the gossiping path. - pub delivered_from: PI, - /// The broadcast scope of the message. - pub scope: DeliveryScope, -} - -impl GossipEvent { - fn from_message(message: &Gossip, from: PI) -> Self { - Self { - content: message.content.clone(), - scope: message.scope, - delivered_from: from, - } - } -} - -/// Number of delivery hops a message has taken. -#[derive( - From, Add, Sub, Serialize, Deserialize, Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Debug, Hash, -)] -pub struct Round(u16); - -impl Round { - pub fn next(&self) -> Round { - Round(self.0 + 1) - } -} - -/// Messages that we can send and receive from peers within the topic. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub enum Message { - /// When receiving Gossip, emit as event and forward full message to eager peer and (after a - /// delay) message IDs to lazy peers. - Gossip(Gossip), - /// When receiving Prune, move the peer from the eager to the lazy set. - Prune, - /// When receiving Graft, move the peer to the eager set and send the full content for the - /// included message ID. - Graft(Graft), - /// When receiving IHave, do nothing initially, and request the messages for the included - /// message IDs after some time if they aren't pushed eagerly to us. - IHave(Vec), -} - -/// Payload messages transmitted by the protocol. -#[derive(Serialize, Deserialize, Clone, derive_more::Debug, PartialEq, Eq)] -pub struct Gossip { - /// Id of the message. - id: MessageId, - /// Message contents. - #[debug("<{}b>", content.len())] - content: Bytes, - /// Scope to broadcast to. - scope: DeliveryScope, -} - -impl Gossip { - fn round(&self) -> Option { - match self.scope { - DeliveryScope::Swarm(round) => Some(round), - DeliveryScope::Neighbors => None, - } - } -} - -/// The scope to deliver the message to. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Copy)] -pub enum DeliveryScope { - /// This message was received from the swarm, with a distance (in hops) travelled from the - /// original broadcaster. - Swarm(Round), - /// This message was received from a direct neighbor that broadcasted the message to neighbors - /// only. - Neighbors, -} - -impl DeliveryScope { - /// Whether this message was directly received from its publisher. - pub fn is_direct(&self) -> bool { - matches!(self, Self::Neighbors | Self::Swarm(Round(0))) - } -} - -/// The broadcast scope of a gossip message. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Copy)] -pub enum Scope { - /// The message is broadcast to all peers in the swarm. - Swarm, - /// The message is broadcast only to the immediate neighbors of a peer. - Neighbors, -} - -impl Gossip { - /// Get a clone of this `Gossip` message and increase the delivery round by 1. - pub fn next_round(&self) -> Option { - match self.scope { - DeliveryScope::Neighbors => None, - DeliveryScope::Swarm(round) => Some(Gossip { - id: self.id, - content: self.content.clone(), - scope: DeliveryScope::Swarm(round.next()), - }), - } - } - - /// Validate that the message id is the blake3 hash of the message content. - pub fn validate(&self) -> bool { - let expected = MessageId::from_content(&self.content); - expected == self.id - } -} - -/// Control message to inform peers we have a message without transmitting the whole payload. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct IHave { - /// Id of the message. - id: MessageId, - /// Delivery round of the message. - round: Round, -} - -/// Control message to signal a peer that they have been moved to the eager set, and to ask the -/// peer to do the same with this node. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub struct Graft { - /// Message id that triggers the graft, if any. - /// On receiving a graft, the payload message must be sent in reply if a message id is set. - id: Option, - /// Delivery round of the [`Message::IHave`] that triggered this Graft message. - round: Round, -} - -/// Configuration for the gossip broadcast layer. -/// -/// Currently, the expectation is that the configuration is the same for all peers in the -/// network (as recommended in the paper). -#[derive(Clone, Debug)] -pub struct Config { - /// When receiving an [`IHave`] message, this timeout is registered. If the message for the - /// [`IHave`] was not received once the timeout is expired, a [`Graft`] message is sent to the - /// peer that sent us the [`IHave`] to request the message payload. - /// - /// The plumtree paper notes: - /// > The timeout value is a protocol parameter that should be configured considering the - /// > diameter of the overlay and a target maximum recovery latency, defined by the application - /// > requirements. (p.8) - pub graft_timeout_1: Duration, - /// This timeout is registered when sending a [`Graft`] message. If a reply has not been - /// received once the timeout expires, we send another [`Graft`] message to the next peer that - /// sent us an [`IHave`] for this message. - /// - /// The plumtree paper notes: - /// > This second timeout value should be smaller that the first, in the order of an average - /// > round trip time to a neighbor. - pub graft_timeout_2: Duration, - /// Timeout after which [`IHave`] messages are pushed to peers. - pub dispatch_timeout: Duration, - /// The protocol performs a tree optimization, which promotes lazy peers to eager peers if the - /// [`Message::IHave`] messages received from them have a lower number of hops from the - /// message's origin as the [`InEvent::Broadcast`] messages received from our eager peers. This - /// parameter is the number of hops that the lazy peers must be closer to the origin than our - /// eager peers to be promoted to become an eager peer. - pub optimization_threshold: Round, - - /// Duration for which to keep gossip messages in the internal message cache. - /// - /// Messages broadcast from this node or received from other nodes are kept in an internal - /// cache for this duration before being evicted. If this is too low, other nodes will not be - /// able to retrieve messages once they need them. If this is high, the cache will grow. - /// - /// Should be at least around several round trip times to peers. - pub message_cache_retention: Duration, - - /// Duration for which to keep the [`MessageId`]s for received messages. - /// - /// Should be at least as long as [`Self::message_cache_retention`], usually will be longer to - /// not accidentally receive messages multiple times. - pub message_id_retention: Duration, - - /// How often the internal caches will be checked for expired items. - pub cache_evict_interval: Duration, -} - -impl Default for Config { - /// Sensible defaults for the plumtree configuration - // - // TODO: Find out what good defaults are for the three timeouts here. Current numbers are - // guesses that need validation. The paper does not have concrete recommendations for these - // numbers. - fn default() -> Self { - Self { - // Paper: "The timeout value is a protocol parameter that should be configured considering - // the diameter of the overlay and a target maximum recovery latency, defined by the - // application requirements. This is a parameter that should be statically configured - // at deployment time." (p. 8) - // - // Earthstar has 5ms it seems, see https://github.com/earthstar-project/earthstar/blob/1523c640fedf106f598bf79b184fb0ada64b1cc0/src/syncer/plum_tree.ts#L75 - // However in the paper it is more like a few roundtrips if I read things correctly. - graft_timeout_1: Duration::from_millis(80), - - // Paper: "This second timeout value should be smaller that the first, in the order of an - // average round trip time to a neighbor." (p. 9) - // - // Earthstar doesn't have this step from my reading. - graft_timeout_2: Duration::from_millis(40), - - // Again, paper does not tell a recommended number here. Likely should be quite small, - // as to not delay messages without need. This would also be the time frame in which - // `IHave`s are aggregated to save on packets. - // - // Eartstar dispatches immediately from my reading. - dispatch_timeout: Duration::from_millis(5), - - // This number comes from experiment settings the plumtree paper (p. 12) - optimization_threshold: Round(7), - - // This is a certainly-high-enough value for usual operation. - message_cache_retention: Duration::from_secs(30), - message_id_retention: Duration::from_secs(90), - cache_evict_interval: Duration::from_secs(1), - } - } -} - -/// Stats about this topic's plumtree. -#[derive(Debug, Default, Clone)] -pub struct Stats { - /// Number of payload messages received so far. - /// - /// See [`Message::Gossip`]. - pub payload_messages_received: u64, - /// Number of control messages received so far. - /// - /// See [`Message::Prune`], [`Message::Graft`], [`Message::IHave`]. - pub control_messages_received: u64, - /// Max round seen so far. - pub max_last_delivery_hop: u16, -} - -/// State of the plumtree. -#[derive(Debug)] -pub struct State { - /// Our address. - me: PI, - /// Configuration for this plumtree. - config: Config, - - /// Set of peers used for payload exchange. - pub(crate) eager_push_peers: HashSet, - /// Set of peers used for control message exchange. - pub(crate) lazy_push_peers: HashSet, - - lazy_push_queue: HashMap>, - - /// Messages for which a [`MessageId`] has been seen via a [`Message::IHave`] but we have not - /// yet received the full payload. For each, we store the peers that have claimed to have this - /// message. - missing_messages: HashMap>, - /// Messages for which the full payload has been seen. - received_messages: TimeBoundCache, - /// Payloads of received messages. - cache: TimeBoundCache, - - /// Message ids for which a [`Timer::SendGraft`] has been scheduled. - graft_timer_scheduled: HashSet, - /// Whether a [`Timer::DispatchLazyPush`] has been scheduled. - dispatch_timer_scheduled: bool, - - /// Set to false after the first message is received. Used for initial timer scheduling. - init: bool, - - /// [`Stats`] of this plumtree. - pub(crate) stats: Stats, -} - -impl State { - /// Initialize the [`State`] of a plumtree. - pub fn new(me: PI, config: Config) -> Self { - Self { - me, - eager_push_peers: Default::default(), - lazy_push_peers: Default::default(), - lazy_push_queue: Default::default(), - config, - missing_messages: Default::default(), - received_messages: Default::default(), - graft_timer_scheduled: Default::default(), - dispatch_timer_scheduled: false, - cache: Default::default(), - init: false, - stats: Default::default(), - } - } - - /// Handle an [`InEvent`]. - pub fn handle(&mut self, event: InEvent, now: Instant, io: &mut impl IO) { - if !self.init { - self.init = true; - self.on_evict_cache_timer(now, io) - } - match event { - InEvent::RecvMessage(from, message) => self.handle_message(from, message, now, io), - InEvent::Broadcast(data, scope) => self.broadcast(data, scope, now, io), - InEvent::NeighborUp(peer) => self.on_neighbor_up(peer), - InEvent::NeighborDown(peer) => self.on_neighbor_down(peer), - InEvent::TimerExpired(timer) => match timer { - Timer::DispatchLazyPush => self.on_dispatch_timer(io), - Timer::SendGraft(id) => { - self.on_send_graft_timer(id, io); - } - Timer::EvictCache => self.on_evict_cache_timer(now, io), - }, - } - } - - /// Get access to the [`Stats`] of the plumtree. - pub fn stats(&self) -> &Stats { - &self.stats - } - - /// Handle receiving a [`Message`]. - fn handle_message(&mut self, sender: PI, message: Message, now: Instant, io: &mut impl IO) { - if matches!(message, Message::Gossip(_)) { - self.stats.payload_messages_received += 1; - } else { - self.stats.control_messages_received += 1; - } - match message { - Message::Gossip(details) => self.on_gossip(sender, details, now, io), - Message::Prune => self.on_prune(sender), - Message::IHave(details) => self.on_ihave(sender, details, io), - Message::Graft(details) => self.on_graft(sender, details, io), - } - } - - /// Dispatches messages from lazy queue over to lazy peers. - fn on_dispatch_timer(&mut self, io: &mut impl IO) { - for (peer, list) in self.lazy_push_queue.drain() { - io.push(OutEvent::SendMessage(peer, Message::IHave(list))); - } - - self.dispatch_timer_scheduled = false; - } - - /// Send a gossip message. - /// - /// Will be pushed in full to eager peers. - /// Pushing the message id to the lazy peers is delayed by a timer. - fn broadcast(&mut self, content: Bytes, scope: Scope, now: Instant, io: &mut impl IO) { - let id = MessageId::from_content(&content); - let scope = match scope { - Scope::Neighbors => DeliveryScope::Neighbors, - Scope::Swarm => DeliveryScope::Swarm(Round(0)), - }; - let message = Gossip { id, content, scope }; - let me = self.me; - if let DeliveryScope::Swarm(_) = scope { - self.received_messages - .insert(id, (), now + self.config.message_id_retention); - self.cache.insert( - id, - message.clone(), - now + self.config.message_cache_retention, - ); - self.lazy_push(message.clone(), &me, io); - } - - self.eager_push(message.clone(), &me, io); - } - - /// Handle receiving a [`Message::Gossip`]. - fn on_gossip(&mut self, sender: PI, message: Gossip, now: Instant, io: &mut impl IO) { - // Validate that the message id is the blake3 hash of the message content. - if !message.validate() { - // TODO: Do we want to take any measures against the sender if we received a message - // with a spoofed message id? - warn!( - peer = ?sender, - "Received a message with spoofed message id ({})", message.id - ); - return; - } - - // if we already received this message: move peer to lazy set - // and notify peer about this. - if self.received_messages.contains_key(&message.id) { - self.add_lazy(sender); - io.push(OutEvent::SendMessage(sender, Message::Prune)); - // otherwise store the message, emit to application and forward to peers - } else { - if let DeliveryScope::Swarm(prev_round) = message.scope { - // insert the message in the list of received messages - self.received_messages.insert( - message.id, - (), - now + self.config.message_id_retention, - ); - // increase the round for forwarding the message, and add to cache - // to reply to Graft messages later - // TODO: add callback/event to application to get missing messages that were received before? - let message = message.next_round().expect("just checked"); - - self.cache.insert( - message.id, - message.clone(), - now + self.config.message_cache_retention, - ); - // push the message to our peers - self.eager_push(message.clone(), &sender, io); - self.lazy_push(message.clone(), &sender, io); - // cleanup places where we track missing messages - self.graft_timer_scheduled.remove(&message.id); - let previous_ihaves = self.missing_messages.remove(&message.id); - // do the optimization step from the paper - if let Some(previous_ihaves) = previous_ihaves { - self.optimize_tree(&sender, &message, previous_ihaves, io); - } - self.stats.max_last_delivery_hop = - self.stats.max_last_delivery_hop.max(prev_round.0); - } - - // emit event to application - io.push(OutEvent::EmitEvent(Event::Received( - GossipEvent::from_message(&message, sender), - ))); - } - } - - /// Optimize the tree by pruning the `sender` of a [`Message::Gossip`] if we previously - /// received a [`Message::IHave`] for the same message with a much lower number of delivery - /// hops from the original broadcaster of the message. - /// - /// See [Config::optimization_threshold]. - fn optimize_tree( - &mut self, - gossip_sender: &PI, - message: &Gossip, - previous_ihaves: VecDeque<(PI, Round)>, - io: &mut impl IO, - ) { - let round = message.round().expect("only called for swarm messages"); - let best_ihave = previous_ihaves - .iter() - .min_by(|(_a_peer, a_round), (_b_peer, b_round)| a_round.cmp(b_round)) - .copied(); - - if let Some((ihave_peer, ihave_round)) = best_ihave { - if (ihave_round < round) && (round - ihave_round) >= self.config.optimization_threshold - { - // Graft the sender of the IHave, but only if it's not already eager. - if !self.eager_push_peers.contains(&ihave_peer) { - let message = Message::Graft(Graft { - id: None, - round: ihave_round, - }); - io.push(OutEvent::SendMessage(ihave_peer, message)); - } - // Prune the sender of the Gossip. - io.push(OutEvent::SendMessage(*gossip_sender, Message::Prune)); - } - } - } - - /// Handle receiving a [`Message::Prune`]. - fn on_prune(&mut self, sender: PI) { - self.add_lazy(sender); - } - - /// Handle receiving a [`Message::IHave`]. - /// - /// > When a node receives a IHAVE message, it simply marks the corresponding message as - /// > missing It then starts a timer, with a predefined timeout value, and waits for the missing - /// > message to be received via eager push before the timer expires. The timeout value is a - /// > protocol parameter that should be configured considering the diameter of the overlay and a - /// > target maximum recovery latency, defined by the application requirements. This is a - /// > parameter that should be statically configured at deployment time. (p8) - fn on_ihave(&mut self, sender: PI, ihaves: Vec, io: &mut impl IO) { - for ihave in ihaves { - if !self.received_messages.contains_key(&ihave.id) { - self.missing_messages - .entry(ihave.id) - .or_default() - .push_back((sender, ihave.round)); - - if !self.graft_timer_scheduled.contains(&ihave.id) { - self.graft_timer_scheduled.insert(ihave.id); - io.push(OutEvent::ScheduleTimer( - self.config.graft_timeout_1, - Timer::SendGraft(ihave.id), - )); - } - } - } - } - - /// A scheduled [`Timer::SendGraft`] has reached it's deadline. - fn on_send_graft_timer(&mut self, id: MessageId, io: &mut impl IO) { - // if the message was received before the timer ran out, there is no need to request it - // again - if self.received_messages.contains_key(&id) { - return; - } - // get the first peer that advertised this message - let entry = self - .missing_messages - .get_mut(&id) - .and_then(|entries| entries.pop_front()); - if let Some((peer, round)) = entry { - self.add_eager(peer); - let message = Message::Graft(Graft { - id: Some(id), - round, - }); - io.push(OutEvent::SendMessage(peer, message)); - - // "when a GRAFT message is sent, another timer is started to expire after a certain timeout, - // to ensure that the message will be requested to another neighbor if it is not received - // meanwhile. This second timeout value should be smaller that the first, in the order of - // an average round trip time to a neighbor." (p9) - io.push(OutEvent::ScheduleTimer( - self.config.graft_timeout_2, - Timer::SendGraft(id), - )); - } - } - - /// Handle receiving a [`Message::Graft`]. - fn on_graft(&mut self, sender: PI, details: Graft, io: &mut impl IO) { - self.add_eager(sender); - if let Some(id) = details.id { - if let Some(message) = self.cache.get(&id) { - io.push(OutEvent::SendMessage( - sender, - Message::Gossip(message.clone()), - )); - } - } - } - - /// Handle a [`InEvent::NeighborUp`] when a peer joins the topic. - fn on_neighbor_up(&mut self, peer: PI) { - self.add_eager(peer); - } - - /// Handle a [`InEvent::NeighborDown`] when a peer leaves the topic. - /// > When a neighbor is detected to leave the overlay, it is simple removed from the - /// > membership. Furthermore, the record of IHAVE messages sent from failed members is deleted - /// > from the missing history. (p9) - fn on_neighbor_down(&mut self, peer: PI) { - self.missing_messages.retain(|_message_id, ihaves| { - ihaves.retain(|(ihave_peer, _round)| *ihave_peer != peer); - !ihaves.is_empty() - }); - self.eager_push_peers.remove(&peer); - self.lazy_push_peers.remove(&peer); - } - - fn on_evict_cache_timer(&mut self, now: Instant, io: &mut impl IO) { - self.cache.expire_until(now); - io.push(OutEvent::ScheduleTimer( - self.config.cache_evict_interval, - Timer::EvictCache, - )); - } - - /// Moves peer into eager set. - fn add_eager(&mut self, peer: PI) { - self.lazy_push_peers.remove(&peer); - self.eager_push_peers.insert(peer); - } - - /// Moves peer into lazy set. - fn add_lazy(&mut self, peer: PI) { - self.eager_push_peers.remove(&peer); - self.lazy_push_peers.insert(peer); - } - - /// Immediately sends message to eager peers. - fn eager_push(&mut self, gossip: Gossip, sender: &PI, io: &mut impl IO) { - for peer in self - .eager_push_peers - .iter() - .filter(|peer| **peer != self.me && *peer != sender) - { - io.push(OutEvent::SendMessage( - *peer, - Message::Gossip(gossip.clone()), - )); - } - } - - /// Queue lazy message announcements into the queue that will be sent out as batched - /// [`Message::IHave`] messages once the [`Timer::DispatchLazyPush`] timer is triggered. - fn lazy_push(&mut self, gossip: Gossip, sender: &PI, io: &mut impl IO) { - let Some(round) = gossip.round() else { - return; - }; - for peer in self.lazy_push_peers.iter().filter(|x| *x != sender) { - self.lazy_push_queue.entry(*peer).or_default().push(IHave { - id: gossip.id, - round, - }); - } - if !self.dispatch_timer_scheduled { - io.push(OutEvent::ScheduleTimer( - self.config.dispatch_timeout, - Timer::DispatchLazyPush, - )); - self.dispatch_timer_scheduled = true; - } - } -} - -#[cfg(test)] -mod test { - use super::*; - #[test] - fn optimize_tree() { - let mut io = VecDeque::new(); - let config: Config = Default::default(); - let mut state = State::new(1, config.clone()); - let now = Instant::now(); - - // we receive an IHave message from peer 2 - // it has `round: 2` which means that the the peer that sent us the IHave was - // two hops away from the original sender of the message - let content: Bytes = b"hi".to_vec().into(); - let id = MessageId::from_content(&content); - let event = InEvent::RecvMessage( - 2u32, - Message::IHave(vec![IHave { - id, - round: Round(2), - }]), - ); - state.handle(event, now, &mut io); - io.clear(); - // we then receive a `Gossip` message with the same `MessageId` from peer 3 - // the message has `round: 6`, which means it travelled 6 hops until it reached us - // this is less hops than to peer 2, but not enough to trigger the optimization - // because we use the default config which has `optimization_threshold: 7` - let event = InEvent::RecvMessage( - 3, - Message::Gossip(Gossip { - id, - content: content.clone(), - scope: DeliveryScope::Swarm(Round(6)), - }), - ); - state.handle(event, now, &mut io); - let expected = { - // we expect a dispatch timer schedule and receive event, but no Graft or Prune - // messages - let mut io = VecDeque::new(); - io.push(OutEvent::ScheduleTimer( - config.dispatch_timeout, - Timer::DispatchLazyPush, - )); - io.push(OutEvent::EmitEvent(Event::Received(GossipEvent { - content, - delivered_from: 3, - scope: DeliveryScope::Swarm(Round(6)), - }))); - io - }; - assert_eq!(io, expected); - io.clear(); - - // now we run the same flow again but this time peer 3 is 9 hops away from the message's - // sender. message's sender. this will trigger the optimization: - // peer 2 will be promoted to eager and peer 4 demoted to lazy - - let content: Bytes = b"hi2".to_vec().into(); - let id = MessageId::from_content(&content); - let event = InEvent::RecvMessage( - 2u32, - Message::IHave(vec![IHave { - id, - round: Round(2), - }]), - ); - state.handle(event, now, &mut io); - io.clear(); - - let event = InEvent::RecvMessage( - 3, - Message::Gossip(Gossip { - id, - content: content.clone(), - scope: DeliveryScope::Swarm(Round(9)), - }), - ); - state.handle(event, now, &mut io); - let expected = { - // this time we expect the Graft and Prune messages to be sent, performing the - // optimization step - let mut io = VecDeque::new(); - io.push(OutEvent::SendMessage( - 2, - Message::Graft(Graft { - id: None, - round: Round(2), - }), - )); - io.push(OutEvent::SendMessage(3, Message::Prune)); - io.push(OutEvent::EmitEvent(Event::Received(GossipEvent { - content, - delivered_from: 3, - scope: DeliveryScope::Swarm(Round(9)), - }))); - io - }; - assert_eq!(io, expected); - } - - #[test] - fn spoofed_messages_are_ignored() { - let config: Config = Default::default(); - let mut state = State::new(1, config.clone()); - let now = Instant::now(); - - // we recv a correct gossip message and expect the Received event to be emitted - let content: Bytes = b"hello1".to_vec().into(); - let message = Message::Gossip(Gossip { - content: content.clone(), - id: MessageId::from_content(&content), - scope: DeliveryScope::Swarm(Round(1)), - }); - let mut io = VecDeque::new(); - state.handle(InEvent::RecvMessage(2, message), now, &mut io); - let expected = { - let mut io = VecDeque::new(); - io.push(OutEvent::ScheduleTimer( - config.cache_evict_interval, - Timer::EvictCache, - )); - io.push(OutEvent::ScheduleTimer( - config.dispatch_timeout, - Timer::DispatchLazyPush, - )); - io.push(OutEvent::EmitEvent(Event::Received(GossipEvent { - content, - delivered_from: 2, - scope: DeliveryScope::Swarm(Round(1)), - }))); - io - }; - assert_eq!(io, expected); - - // now we recv with a spoofed id and expect no event to be emitted - let content: Bytes = b"hello2".to_vec().into(); - let message = Message::Gossip(Gossip { - content, - id: MessageId::from_content(b"foo"), - scope: DeliveryScope::Swarm(Round(1)), - }); - let mut io = VecDeque::new(); - state.handle(InEvent::RecvMessage(2, message), now, &mut io); - let expected = VecDeque::new(); - assert_eq!(io, expected); - } - - #[test] - fn cache_is_evicted() { - let config: Config = Default::default(); - let mut state = State::new(1, config.clone()); - let now = Instant::now(); - let content: Bytes = b"hello1".to_vec().into(); - let message = Message::Gossip(Gossip { - content: content.clone(), - id: MessageId::from_content(&content), - scope: DeliveryScope::Swarm(Round(1)), - }); - let mut io = VecDeque::new(); - state.handle(InEvent::RecvMessage(2, message), now, &mut io); - assert_eq!(state.cache.len(), 1); - - let now = now + Duration::from_secs(1); - state.handle(InEvent::TimerExpired(Timer::EvictCache), now, &mut io); - assert_eq!(state.cache.len(), 1); - - let now = now + config.message_cache_retention; - state.handle(InEvent::TimerExpired(Timer::EvictCache), now, &mut io); - assert_eq!(state.cache.len(), 0); - } -} diff --git a/iroh-gossip/src/proto/state.rs b/iroh-gossip/src/proto/state.rs deleted file mode 100644 index b8561aeeef..0000000000 --- a/iroh-gossip/src/proto/state.rs +++ /dev/null @@ -1,353 +0,0 @@ -//! The protocol state of the `iroh-gossip` protocol. - -use std::{ - collections::{hash_map, HashMap, HashSet}, - time::{Duration, Instant}, -}; - -use iroh_metrics::{inc, inc_by}; -use rand::Rng; -use serde::{Deserialize, Serialize}; -use tracing::trace; - -use crate::{ - metrics::Metrics, - proto::{ - topic::{self, Command}, - util::idbytes_impls, - Config, PeerData, PeerIdentity, - }, -}; - -/// The identifier for a topic -#[derive(Clone, Copy, Eq, PartialEq, Hash, Serialize, Ord, PartialOrd, Deserialize)] -pub struct TopicId([u8; 32]); -idbytes_impls!(TopicId, "TopicId"); - -/// Protocol wire message -/// -/// This is the wire frame of the `iroh-gossip` protocol. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Message { - topic: TopicId, - message: topic::Message, -} - -impl Message { - /// Get the kind of this message - pub fn kind(&self) -> MessageKind { - self.message.kind() - } -} - -/// Whether this is a control or data message -#[derive(Debug)] -pub enum MessageKind { - /// A data message. - Data, - /// A control message. - Control, -} - -impl Message { - /// Get the encoded size of this message - pub fn size(&self) -> postcard::Result { - postcard::experimental::serialized_size(&self) - } -} - -/// A timer to be registered into the runtime -/// -/// As the implementation of the protocol is an IO-less state machine, registering timers does not -/// happen within the protocol implementation. Instead, these `Timer` structs are emitted as -/// [`OutEvent`]s. The implementer must register the timer in its runtime to be emitted on the specified [`Instant`], -/// and once triggered inject an [`InEvent::TimerExpired`] into the protocol state. -#[derive(Clone, Debug)] -pub struct Timer { - topic: TopicId, - timer: topic::Timer, -} - -/// Input event to the protocol state. -#[derive(Clone, Debug)] -pub enum InEvent { - /// Message received from the network. - RecvMessage(PI, Message), - /// Execute a command from the application. - Command(TopicId, Command), - /// Trigger a previously scheduled timer. - TimerExpired(Timer), - /// Peer disconnected on the network level. - PeerDisconnected(PI), - /// Update the opaque peer data about yourself. - UpdatePeerData(PeerData), -} - -/// Output event from the protocol state. -#[derive(Debug, Clone)] -pub enum OutEvent { - /// Send a message on the network - SendMessage(PI, Message), - /// Emit an event to the application. - EmitEvent(TopicId, topic::Event), - /// Schedule a timer. The runtime is responsible for sending an [InEvent::TimerExpired] - /// after the duration. - ScheduleTimer(Duration, Timer), - /// Close the connection to a peer on the network level. - DisconnectPeer(PI), - /// Updated peer data - PeerData(PI, PeerData), -} - -type ConnsMap = HashMap>; -type Outbox = Vec>; - -enum InEventMapped { - All(topic::InEvent), - TopicEvent(TopicId, topic::InEvent), -} - -impl From> for InEventMapped { - fn from(event: InEvent) -> InEventMapped { - match event { - InEvent::RecvMessage(from, Message { topic, message }) => { - Self::TopicEvent(topic, topic::InEvent::RecvMessage(from, message)) - } - InEvent::Command(topic, command) => { - Self::TopicEvent(topic, topic::InEvent::Command(command)) - } - InEvent::TimerExpired(Timer { topic, timer }) => { - Self::TopicEvent(topic, topic::InEvent::TimerExpired(timer)) - } - InEvent::PeerDisconnected(peer) => Self::All(topic::InEvent::PeerDisconnected(peer)), - InEvent::UpdatePeerData(data) => Self::All(topic::InEvent::UpdatePeerData(data)), - } - } -} - -/// The state of the `iroh-gossip` protocol. -/// -/// The implementation works as an IO-less state machine. The implementer injects events through -/// [`Self::handle`], which returns an iterator of [`OutEvent`]s to be processed. -/// -/// This struct contains a map of [`topic::State`] for each topic that was joined. It mostly acts as -/// a forwarder of [`InEvent`]s to matching topic state. Each topic's state is completely -/// independent; thus the actual protocol logic lives with [`topic::State`]. -#[derive(Debug)] -pub struct State { - me: PI, - me_data: PeerData, - config: Config, - rng: R, - states: HashMap>, - outbox: Outbox, - peer_topics: ConnsMap, -} - -impl State { - /// Create a new protocol state instance. - /// - /// `me` is the [`PeerIdentity`] of the local node, `peer_data` is the initial [`PeerData`] - /// (which can be updated over time). - /// For the protocol to perform as recommended in the papers, the [`Config`] should be - /// identical for all nodes in the network. - pub fn new(me: PI, me_data: PeerData, config: Config, rng: R) -> Self { - Self { - me, - me_data, - config, - rng, - states: Default::default(), - outbox: Default::default(), - peer_topics: Default::default(), - } - } - - /// Get a reference to the node's [`PeerIdentity`] - pub fn me(&self) -> &PI { - &self.me - } - - /// Get a reference to the protocol state for a topic. - pub fn state(&self, topic: &TopicId) -> Option<&topic::State> { - self.states.get(topic) - } - - /// Get a reference to the protocol state for a topic. - #[cfg(test)] - pub fn state_mut(&mut self, topic: &TopicId) -> Option<&mut topic::State> { - self.states.get_mut(topic) - } - - /// Get an iterator of all joined topics. - pub fn topics(&self) -> impl Iterator { - self.states.keys() - } - - /// Get an iterator for the states of all joined topics. - pub fn states(&self) -> impl Iterator)> { - self.states.iter() - } - - /// Check if a topic has any active (connected) peers. - pub fn has_active_peers(&self, topic: &TopicId) -> bool { - self.state(topic) - .map(|s| s.has_active_peers()) - .unwrap_or(false) - } - - /// Returns the maximum message size configured in the gossip protocol. - pub fn max_message_size(&self) -> usize { - self.config.max_message_size - } - - /// Handle an [`InEvent`] - /// - /// This returns an iterator of [`OutEvent`]s that must be processed. - pub fn handle( - &mut self, - event: InEvent, - now: Instant, - ) -> impl Iterator> + '_ { - trace!("gossp event: {event:?}"); - track_in_event(&event); - - let event: InEventMapped = event.into(); - - match event { - InEventMapped::TopicEvent(topic, event) => { - // when receiving a join command, initialize state if it doesn't exist - if matches!(&event, topic::InEvent::Command(Command::Join(_peers))) { - if let hash_map::Entry::Vacant(e) = self.states.entry(topic) { - e.insert(topic::State::with_rng( - self.me, - Some(self.me_data.clone()), - self.config.clone(), - self.rng.clone(), - )); - } - } - - // when receiving a quit command, note this and drop the topic state after - // processing this last event - let quit = matches!(event, topic::InEvent::Command(Command::Quit)); - - // pass the event to the state handler - if let Some(state) = self.states.get_mut(&topic) { - // when receiving messages, update our conn map to take note that this topic state may want - // to keep this connection - if let topic::InEvent::RecvMessage(from, _message) = &event { - self.peer_topics.entry(*from).or_default().insert(topic); - } - let out = state.handle(event, now); - for event in out { - handle_out_event(topic, event, &mut self.peer_topics, &mut self.outbox); - } - } - - if quit { - self.states.remove(&topic); - } - } - // when a peer disconnected on the network level, forward event to all states - InEventMapped::All(event) => { - if let topic::InEvent::UpdatePeerData(data) = &event { - self.me_data = data.clone(); - } - for (topic, state) in self.states.iter_mut() { - let out = state.handle(event.clone(), now); - for event in out { - handle_out_event(*topic, event, &mut self.peer_topics, &mut self.outbox); - } - } - } - } - - // track metrics - track_out_events(&self.outbox); - - self.outbox.drain(..) - } -} - -fn handle_out_event( - topic: TopicId, - event: topic::OutEvent, - conns: &mut ConnsMap, - outbox: &mut Outbox, -) { - match event { - topic::OutEvent::SendMessage(to, message) => { - outbox.push(OutEvent::SendMessage(to, Message { topic, message })) - } - topic::OutEvent::EmitEvent(event) => outbox.push(OutEvent::EmitEvent(topic, event)), - topic::OutEvent::ScheduleTimer(delay, timer) => { - outbox.push(OutEvent::ScheduleTimer(delay, Timer { topic, timer })) - } - topic::OutEvent::DisconnectPeer(peer) => { - let empty = conns - .get_mut(&peer) - .map(|list| list.remove(&topic) && list.is_empty()) - .unwrap_or(false); - if empty { - conns.remove(&peer); - outbox.push(OutEvent::DisconnectPeer(peer)); - } - } - topic::OutEvent::PeerData(peer, data) => outbox.push(OutEvent::PeerData(peer, data)), - } -} - -fn track_out_events(events: &[OutEvent]) { - for event in events { - match event { - OutEvent::SendMessage(_to, message) => match message.kind() { - MessageKind::Data => { - inc!(Metrics, msgs_data_sent); - inc_by!( - Metrics, - msgs_data_sent_size, - message.size().unwrap_or(0) as u64 - ); - } - MessageKind::Control => { - inc!(Metrics, msgs_ctrl_sent); - inc_by!( - Metrics, - msgs_ctrl_sent_size, - message.size().unwrap_or(0) as u64 - ); - } - }, - OutEvent::EmitEvent(_topic, event) => match event { - super::Event::NeighborUp(_peer) => inc!(Metrics, neighbor_up), - super::Event::NeighborDown(_peer) => inc!(Metrics, neighbor_down), - _ => {} - }, - _ => {} - } - } -} - -fn track_in_event(event: &InEvent) { - if let InEvent::RecvMessage(_from, message) = event { - match message.kind() { - MessageKind::Data => { - inc!(Metrics, msgs_data_recv); - inc_by!( - Metrics, - msgs_data_recv_size, - message.size().unwrap_or(0) as u64 - ); - } - MessageKind::Control => { - inc!(Metrics, msgs_ctrl_recv); - inc_by!( - Metrics, - msgs_ctrl_recv_size, - message.size().unwrap_or(0) as u64 - ); - } - } - } -} diff --git a/iroh-gossip/src/proto/tests.rs b/iroh-gossip/src/proto/tests.rs deleted file mode 100644 index 5f5f3ef40b..0000000000 --- a/iroh-gossip/src/proto/tests.rs +++ /dev/null @@ -1,468 +0,0 @@ -//! Simulation framework for testing the protocol implementation - -use std::{ - collections::{BTreeMap, HashMap, HashSet, VecDeque}, - time::{Duration, Instant}, -}; - -use bytes::Bytes; -use rand::Rng; -use rand_core::SeedableRng; -use tracing::{debug, warn}; - -use super::{ - util::TimerMap, Command, Config, Event, InEvent, OutEvent, PeerIdentity, State, Timer, TopicId, -}; -use crate::proto::Scope; - -const TICK_DURATION: Duration = Duration::from_millis(10); -const DEFAULT_LATENCY: Duration = TICK_DURATION.saturating_mul(3); - -/// Test network implementation. -/// -/// Stores events in VecDeques and processes on ticks. -/// Timers are checked after each tick. The local time is increased with TICK_DURATION before -/// each tick. -/// -/// Note: Panics when sending to an unknown peer. -pub struct Network { - start: Instant, - time: Instant, - tick_duration: Duration, - inqueues: Vec>>, - pub(crate) peers: Vec>, - peers_by_address: HashMap, - conns: HashSet>, - events: VecDeque<(PI, TopicId, Event)>, - timers: TimerMap<(usize, Timer)>, - transport: TimerMap<(usize, InEvent)>, - latencies: HashMap, Duration>, -} -impl Network { - pub fn new(time: Instant) -> Self { - Self { - start: time, - time, - tick_duration: TICK_DURATION, - inqueues: Default::default(), - peers: Default::default(), - peers_by_address: Default::default(), - conns: Default::default(), - events: Default::default(), - timers: TimerMap::new(), - transport: TimerMap::new(), - latencies: HashMap::new(), - } - } -} - -fn push_back( - inqueues: &mut [VecDeque>], - peer_pos: usize, - event: InEvent, -) { - inqueues.get_mut(peer_pos).unwrap().push_back(event); -} - -impl Network { - pub fn push(&mut self, peer: State) { - let idx = self.inqueues.len(); - self.inqueues.push(VecDeque::new()); - self.peers_by_address.insert(*peer.me(), idx); - self.peers.push(peer); - } - - pub fn events(&mut self) -> impl Iterator)> + '_ { - self.events.drain(..) - } - - pub fn events_sorted(&mut self) -> Vec<(PI, TopicId, Event)> { - sort(self.events().collect()) - } - - pub fn conns(&self) -> Vec<(PI, PI)> { - sort(self.conns.iter().cloned().map(Into::into).collect()) - } - - pub fn command(&mut self, peer: PI, topic: TopicId, command: Command) { - debug!(?peer, "~~ COMMAND {command:?}"); - let idx = *self.peers_by_address.get(&peer).unwrap(); - push_back(&mut self.inqueues, idx, InEvent::Command(topic, command)); - } - - pub fn ticks(&mut self, n: usize) { - (0..n).for_each(|_| self.tick()) - } - - pub fn get_tick(&self) -> u32 { - ((self.time - self.start) / self.tick_duration.as_millis() as u32).as_millis() as u32 - } - - pub fn tick(&mut self) { - self.time += self.tick_duration; - - // process timers - for (_time, (idx, timer)) in self.timers.drain_until(&self.time) { - push_back(&mut self.inqueues, idx, InEvent::TimerExpired(timer)); - } - - // move messages - for (_time, (peer, event)) in self.transport.drain_until(&self.time) { - push_back(&mut self.inqueues, peer, event); - } - - // process inqueues: let peer handle all incoming events - let mut messages_sent = 0; - for (idx, queue) in self.inqueues.iter_mut().enumerate() { - let state = self.peers.get_mut(idx).unwrap(); - let peer = *state.me(); - while let Some(event) = queue.pop_front() { - if let InEvent::RecvMessage(from, _message) = &event { - self.conns.insert((*from, peer).into()); - } - debug!(peer = ?peer, "IN {event:?}"); - let out = state.handle(event, self.time); - for event in out { - debug!(peer = ?peer, "OUT {event:?}"); - match event { - OutEvent::SendMessage(to, message) => { - let to_idx = *self.peers_by_address.get(&to).unwrap(); - let latency = latency_between(&mut self.latencies, &peer, &to); - self.transport.insert( - self.time + latency, - (to_idx, InEvent::RecvMessage(peer, message)), - ); - messages_sent += 1; - } - OutEvent::ScheduleTimer(latency, timer) => { - self.timers.insert(self.time + latency, (idx, timer)); - } - OutEvent::DisconnectPeer(to) => { - debug!(peer = ?peer, other = ?to, "disconnect"); - let to_idx = *self.peers_by_address.get(&to).unwrap(); - let latency = latency_between(&mut self.latencies, &peer, &to) - + Duration::from_nanos(1); - if self.conns.remove(&(peer, to).into()) { - self.transport.insert( - self.time + latency, - (to_idx, InEvent::PeerDisconnected(peer)), - ); - } - } - OutEvent::EmitEvent(topic, event) => { - debug!(peer = ?peer, "emit {event:?}"); - self.events.push_back((peer, topic, event)); - } - OutEvent::PeerData(_peer, _data) => {} - } - } - } - } - debug!( - tick = self.get_tick(), - "~~ TICK (messages sent: {messages_sent})" - ); - } - - pub fn peer(&self, peer: &PI) -> Option<&State> { - self.peers_by_address - .get(peer) - .cloned() - .and_then(|idx| self.peers.get(idx)) - } - - pub fn get_active(&self, peer: &PI, topic: &TopicId) -> Option>> { - let peer = self.peer(peer)?; - match peer.state(topic) { - Some(state) => Some(Some( - state.swarm.active_view.iter().cloned().collect::>(), - )), - None => Some(None), - } - } -} -fn latency_between( - _latencies: &mut HashMap, Duration>, - _a: &PI, - _b: &PI, -) -> Duration { - DEFAULT_LATENCY -} - -pub fn assert_synchronous_active( - network: &Network, -) -> bool { - for state in network.peers.iter() { - let peer = *state.me(); - for (topic, state) in state.states() { - for other in state.swarm.active_view.iter() { - let other_idx = network.peers_by_address.get(other).unwrap(); - let other_state = &network - .peers - .get(*other_idx) - .unwrap() - .state(topic) - .unwrap() - .swarm - .active_view; - if !other_state.contains(&peer) { - warn!(peer = ?peer, other = ?other, "missing active_view peer in other"); - return false; - } - } - for other in state.gossip.eager_push_peers.iter() { - let other_idx = network.peers_by_address.get(other).unwrap(); - let other_state = &network - .peers - .get(*other_idx) - .unwrap() - .state(topic) - .unwrap() - .gossip - .eager_push_peers; - if !other_state.contains(&peer) { - warn!(peer = ?peer, other = ?other, "missing eager_push peer in other"); - return false; - } - } - } - } - true -} - -pub type PeerId = usize; - -/// A simple simulator for the gossip protocol -pub struct Simulator { - simulator_config: SimulatorConfig, - protocol_config: Config, - network: Network, - round_stats: Vec, -} -pub struct SimulatorConfig { - pub peers_count: usize, - pub bootstrap_count: usize, - pub bootstrap_ticks: usize, - pub join_ticks: usize, - pub warmup_ticks: usize, - pub round_max_ticks: usize, -} -#[derive(Debug, Default)] -pub struct RoundStats { - ticks: usize, - rmr: f32, - ldh: u16, -} - -pub const TOPIC: TopicId = TopicId::from_bytes([0u8; 32]); - -impl Default for SimulatorConfig { - fn default() -> Self { - Self { - peers_count: 100, - bootstrap_count: 5, - bootstrap_ticks: 50, - join_ticks: 1, - warmup_ticks: 300, - round_max_ticks: 200, - } - } -} -impl Simulator { - pub fn new(simulator_config: SimulatorConfig, protocol_config: Config) -> Self { - Self { - protocol_config, - simulator_config, - network: Network::new(Instant::now()), - round_stats: Default::default(), - } - } - pub fn init(&mut self) { - for i in 0..self.simulator_config.peers_count { - let rng = rand_chacha::ChaCha12Rng::seed_from_u64(99); - self.network.push(State::new( - i, - Default::default(), - self.protocol_config.clone(), - rng.clone(), - )); - } - } - pub fn bootstrap(&mut self) { - self.network.command(0, TOPIC, Command::Join(vec![])); - for i in 1..self.simulator_config.bootstrap_count { - self.network.command(i, TOPIC, Command::Join(vec![0])); - } - self.network.ticks(self.simulator_config.bootstrap_ticks); - let _ = self.network.events(); - - for i in self.simulator_config.bootstrap_count..self.simulator_config.peers_count { - let contact = i % self.simulator_config.bootstrap_count; - self.network.command(i, TOPIC, Command::Join(vec![contact])); - self.network.ticks(self.simulator_config.join_ticks); - let _ = self.network.events(); - } - self.network.ticks(self.simulator_config.warmup_ticks); - let _ = self.network.events(); - } - - pub fn gossip_round(&mut self, from: PeerId, message: Bytes) { - let prev_total_payload_counter = self.total_payload_messages(); - let mut expected: HashSet = HashSet::from_iter( - self.network - .peers - .iter() - .map(|p| *p.me()) - .filter(|p| *p != from), - ); - let expected_len = expected.len() as u64; - self.network.command( - from, - TOPIC, - Command::Broadcast(message.clone(), Scope::Swarm), - ); - - let mut tick = 0; - loop { - if expected.is_empty() { - break; - } - if tick > self.simulator_config.round_max_ticks { - break; - } - tick += 1; - self.network.tick(); - let events = self.network.events(); - let received: HashSet<_> = events - .filter( - |(_peer, _topic, event)| matches!(event, Event::Received(recv) if recv.content == message), - ) - .map(|(peer, _topic, _msg)| peer) - .collect(); - for peer in received.iter() { - expected.remove(peer); - } - } - - assert!(expected.is_empty(), "all nodes received the broadcast"); - let payload_counter = self.total_payload_messages() - prev_total_payload_counter; - let rmr = (payload_counter as f32 / (expected_len as f32 - 1.)) - 1.; - let ldh = self.max_ldh(); - let stats = RoundStats { - ticks: tick, - rmr, - ldh, - }; - self.round_stats.push(stats); - self.reset_stats() - } - - pub fn report_round_sums(&self) { - let len = self.round_stats.len(); - let mut rmr = 0.; - let mut ldh = 0.; - let mut ticks = 0.; - for round in self.round_stats.iter() { - rmr += round.rmr; - ldh += round.ldh as f32; - ticks += round.ticks as f32; - } - rmr /= len as f32; - ldh /= len as f32; - ticks /= len as f32; - eprintln!( - "average over {} rounds with {} peers: RMR {rmr:.2} LDH {ldh:.2} ticks {ticks:.2}", - self.round_stats.len(), - self.network.peers.len(), - ); - eprintln!("RMR = Relative Message Redundancy, LDH = Last Delivery Hop"); - } - - fn reset_stats(&mut self) { - for state in self.network.peers.iter_mut() { - let state = state.state_mut(&TOPIC).unwrap(); - state.gossip.stats = Default::default(); - } - } - - fn max_ldh(&self) -> u16 { - let mut max = 0; - for state in self.network.peers.iter() { - let state = state.state(&TOPIC).unwrap(); - let stats = state.gossip.stats(); - max = max.max(stats.max_last_delivery_hop); - } - max - } - - fn total_payload_messages(&self) -> u64 { - let mut sum = 0; - for state in self.network.peers.iter() { - let state = state.state(&TOPIC).unwrap(); - let stats = state.gossip.stats(); - sum += stats.payload_messages_received; - } - sum - } -} - -/// Helper struct for active connections. A sorted tuple. -#[derive(Debug, Clone, PartialOrd, Ord, Eq, PartialEq, Hash)] -pub struct ConnId([PI; 2]); -impl ConnId { - pub fn new(a: PI, b: PI) -> Self { - let mut conn = [a, b]; - conn.sort(); - Self(conn) - } -} -impl From<(PI, PI)> for ConnId { - fn from((a, b): (PI, PI)) -> Self { - Self::new(a, b) - } -} -impl From> for (PI, PI) { - fn from(conn: ConnId) -> (PI, PI) { - (conn.0[0], conn.0[1]) - } -} - -pub fn sort(items: Vec) -> Vec { - let mut sorted = items; - sorted.sort(); - sorted -} - -pub fn report_round_distribution(network: &Network) { - let mut eager_distrib: BTreeMap = BTreeMap::new(); - let mut lazy_distrib: BTreeMap = BTreeMap::new(); - let mut active_distrib: BTreeMap = BTreeMap::new(); - let mut passive_distrib: BTreeMap = BTreeMap::new(); - let mut payload_recv = 0; - let mut control_recv = 0; - for state in network.peers.iter() { - for (_topic, state) in state.states() { - let stats = state.gossip.stats(); - *eager_distrib - .entry(state.gossip.eager_push_peers.len()) - .or_default() += 1; - *lazy_distrib - .entry(state.gossip.lazy_push_peers.len()) - .or_default() += 1; - *active_distrib - .entry(state.swarm.active_view.len()) - .or_default() += 1; - *passive_distrib - .entry(state.swarm.passive_view.len()) - .or_default() += 1; - payload_recv += stats.payload_messages_received; - control_recv += stats.control_messages_received; - } - } - // eprintln!("distributions {round_distrib:?}"); - eprintln!("payload_recv {payload_recv} control_recv {control_recv}"); - eprintln!("eager_distrib {eager_distrib:?}"); - eprintln!("lazy_distrib {lazy_distrib:?}"); - eprintln!("active_distrib {active_distrib:?}"); - eprintln!("passive_distrib {passive_distrib:?}"); -} diff --git a/iroh-gossip/src/proto/topic.rs b/iroh-gossip/src/proto/topic.rs deleted file mode 100644 index f635845887..0000000000 --- a/iroh-gossip/src/proto/topic.rs +++ /dev/null @@ -1,346 +0,0 @@ -//! This module contains the implementation of the gossiping protocol for an individual topic - -use std::{ - collections::VecDeque, - time::{Duration, Instant}, -}; - -use bytes::Bytes; -use derive_more::From; -use rand::Rng; -use rand_core::SeedableRng; -use serde::{Deserialize, Serialize}; - -use super::{ - hyparview::{self, InEvent as SwarmIn}, - plumtree::{self, GossipEvent, InEvent as GossipIn, Scope}, - state::MessageKind, - PeerData, PeerIdentity, -}; - -/// The default maximum size in bytes for a gossip message. -/// This is a sane but arbitrary default and can be changed in the [`Config`]. -pub const DEFAULT_MAX_MESSAGE_SIZE: usize = 4096; - -/// Input event to the topic state handler. -#[derive(Clone, Debug)] -pub enum InEvent { - /// Message received from the network. - RecvMessage(PI, Message), - /// Execute a command from the application. - Command(Command), - /// Trigger a previously scheduled timer. - TimerExpired(Timer), - /// Peer disconnected on the network level. - PeerDisconnected(PI), - /// Update the opaque peer data about yourself. - UpdatePeerData(PeerData), -} - -/// An output event from the state handler. -#[derive(Debug, PartialEq, Eq)] -pub enum OutEvent { - /// Send a message on the network - SendMessage(PI, Message), - /// Emit an event to the application. - EmitEvent(Event), - /// Schedule a timer. The runtime is responsible for sending an [InEvent::TimerExpired] - /// after the duration. - ScheduleTimer(Duration, Timer), - /// Close the connection to a peer on the network level. - DisconnectPeer(PI), - /// Emitted when new [`PeerData`] was received for a peer. - PeerData(PI, PeerData), -} - -impl From> for OutEvent { - fn from(event: hyparview::OutEvent) -> Self { - use hyparview::OutEvent::*; - match event { - SendMessage(to, message) => Self::SendMessage(to, message.into()), - ScheduleTimer(delay, timer) => Self::ScheduleTimer(delay, timer.into()), - DisconnectPeer(peer) => Self::DisconnectPeer(peer), - EmitEvent(event) => Self::EmitEvent(event.into()), - PeerData(peer, data) => Self::PeerData(peer, data), - } - } -} - -impl From> for OutEvent { - fn from(event: plumtree::OutEvent) -> Self { - use plumtree::OutEvent::*; - match event { - SendMessage(to, message) => Self::SendMessage(to, message.into()), - ScheduleTimer(delay, timer) => Self::ScheduleTimer(delay, timer.into()), - EmitEvent(event) => Self::EmitEvent(event.into()), - } - } -} - -/// A trait for a concrete type to push `OutEvent`s to. -/// -/// The implementation is generic over this trait, which allows the upper layer to supply a -/// container of their choice for `OutEvent`s emitted from the protocol state. -pub trait IO { - /// Push an event in the IO container - fn push(&mut self, event: impl Into>); - - /// Push all events from an iterator into the IO container - fn push_from_iter(&mut self, iter: impl IntoIterator>>) { - for event in iter.into_iter() { - self.push(event); - } - } -} - -/// A protocol message for a particular topic -#[derive(From, Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] -pub enum Message { - /// A message of the swarm membership layer - Swarm(hyparview::Message), - /// A message of the gossip broadcast layer - Gossip(plumtree::Message), -} - -impl Message { - /// Get the kind of this message - pub fn kind(&self) -> MessageKind { - match self { - Message::Swarm(_) => MessageKind::Control, - Message::Gossip(message) => match message { - plumtree::Message::Gossip(_) => MessageKind::Data, - _ => MessageKind::Control, - }, - } - } -} - -/// An event to be emitted to the application for a particular topic. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize)] -pub enum Event { - /// We have a new, direct neighbor in the swarm membership layer for this topic - NeighborUp(PI), - /// We dropped direct neighbor in the swarm membership layer for this topic - NeighborDown(PI), - /// A gossip message was received for this topic - Received(GossipEvent), -} - -impl From> for Event { - fn from(value: hyparview::Event) -> Self { - match value { - hyparview::Event::NeighborUp(peer) => Self::NeighborUp(peer), - hyparview::Event::NeighborDown(peer) => Self::NeighborDown(peer), - } - } -} - -impl From> for Event { - fn from(value: plumtree::Event) -> Self { - match value { - plumtree::Event::Received(event) => Self::Received(event), - } - } -} - -/// A timer to be registered for a particular topic. -/// -/// This should be treated as an opaque value by the implementer and, once emitted, simply returned -/// to the protocol through [`InEvent::TimerExpired`]. -#[derive(Clone, From, Debug, PartialEq, Eq)] -pub enum Timer { - /// A timer for the swarm layer - Swarm(hyparview::Timer), - /// A timer for the gossip layer - Gossip(plumtree::Timer), -} - -/// A command to the protocol state for a particular topic. -#[derive(Clone, derive_more::Debug)] -pub enum Command { - /// Join this topic and connect to peers. - /// - /// If the list of peers is empty, will prepare the state and accept incoming join requests, - /// but only become operational after the first join request by another peer. - Join(Vec), - /// Broadcast a message for this topic. - Broadcast(#[debug("<{}b>", _0.len())] Bytes, Scope), - /// Leave this topic and drop all state. - Quit, -} - -impl IO for VecDeque> { - fn push(&mut self, event: impl Into>) { - self.push_back(event.into()) - } -} - -/// Protocol configuration -#[derive(Clone, Debug)] -pub struct Config { - /// Configuration for the swarm membership layer - pub membership: hyparview::Config, - /// Configuration for the gossip broadcast layer - pub broadcast: plumtree::Config, - /// Max message size in bytes. - /// - /// This size should be the same across a network to ensure all nodes can transmit and read large messages. - /// - /// At minimum, this size should be large enough to send gossip control messages. This can vary, depending on the size of the [`PeerIdentity`] you use and the size of the [`PeerData`] you transmit in your messages. - /// - /// The default is [`DEFAULT_MAX_MESSAGE_SIZE`]. - pub max_message_size: usize, -} - -impl Default for Config { - fn default() -> Self { - Self { - membership: Default::default(), - broadcast: Default::default(), - max_message_size: DEFAULT_MAX_MESSAGE_SIZE, - } - } -} - -/// The topic state maintains the swarm membership and broadcast tree for a particular topic. -#[derive(Debug)] -pub struct State { - me: PI, - pub(crate) swarm: hyparview::State, - pub(crate) gossip: plumtree::State, - outbox: VecDeque>, - stats: Stats, -} - -impl State { - /// Initialize the local state with the default random number generator. - pub fn new(me: PI, me_data: Option, config: Config) -> Self { - Self::with_rng(me, me_data, config, rand::rngs::StdRng::from_entropy()) - } -} - -impl State { - /// The address of your local endpoint. - pub fn endpoint(&self) -> &PI { - &self.me - } -} - -impl State { - /// Initialize the local state with a custom random number generator. - pub fn with_rng(me: PI, me_data: Option, config: Config, rng: R) -> Self { - Self { - swarm: hyparview::State::new(me, me_data, config.membership, rng), - gossip: plumtree::State::new(me, config.broadcast), - me, - outbox: VecDeque::new(), - stats: Stats::default(), - } - } - - /// Handle an incoming event. - /// - /// Returns an iterator of outgoing events that must be processed by the application. - pub fn handle( - &mut self, - event: InEvent, - now: Instant, - ) -> impl Iterator> + '_ { - let io = &mut self.outbox; - // Process the event, store out events in outbox. - match event { - InEvent::Command(command) => match command { - Command::Join(peers) => { - for peer in peers { - self.swarm.handle(SwarmIn::RequestJoin(peer), now, io); - } - } - Command::Broadcast(data, scope) => { - self.gossip - .handle(GossipIn::Broadcast(data, scope), now, io) - } - Command::Quit => self.swarm.handle(SwarmIn::Quit, now, io), - }, - InEvent::RecvMessage(from, message) => { - self.stats.messages_received += 1; - match message { - Message::Swarm(message) => { - self.swarm - .handle(SwarmIn::RecvMessage(from, message), now, io) - } - Message::Gossip(message) => { - self.gossip - .handle(GossipIn::RecvMessage(from, message), now, io) - } - } - } - InEvent::TimerExpired(timer) => match timer { - Timer::Swarm(timer) => self.swarm.handle(SwarmIn::TimerExpired(timer), now, io), - Timer::Gossip(timer) => self.gossip.handle(GossipIn::TimerExpired(timer), now, io), - }, - InEvent::PeerDisconnected(peer) => { - self.swarm.handle(SwarmIn::PeerDisconnected(peer), now, io); - self.gossip.handle(GossipIn::NeighborDown(peer), now, io); - } - InEvent::UpdatePeerData(data) => { - self.swarm.handle(SwarmIn::UpdatePeerData(data), now, io) - } - } - - // Forward NeighborUp and NeighborDown events from hyparview to plumtree - let mut io = VecDeque::new(); - for event in self.outbox.iter() { - match event { - OutEvent::EmitEvent(Event::NeighborUp(peer)) => { - self.gossip - .handle(GossipIn::NeighborUp(*peer), now, &mut io) - } - OutEvent::EmitEvent(Event::NeighborDown(peer)) => { - self.gossip - .handle(GossipIn::NeighborDown(*peer), now, &mut io) - } - _ => {} - } - } - // Note that this is a no-op because plumtree::handle(NeighborUp | NeighborDown) - // above does not emit any OutEvents. - self.outbox.extend(io.drain(..)); - - // Update sent message counter - self.stats.messages_sent += self - .outbox - .iter() - .filter(|event| matches!(event, OutEvent::SendMessage(_, _))) - .count(); - - self.outbox.drain(..) - } - - /// Get stats on how many messages were sent and received - /// - /// TODO: Remove/replace with metrics? - pub fn stats(&self) -> &Stats { - &self.stats - } - - /// Get statistics for the gossip broadcast state - /// - /// TODO: Remove/replace with metrics? - pub fn gossip_stats(&self) -> &plumtree::Stats { - self.gossip.stats() - } - - /// Check if this topic has any active (connected) peers. - pub fn has_active_peers(&self) -> bool { - !self.swarm.active_view.is_empty() - } -} - -/// Statistics for the protocol state of a topic -#[derive(Clone, Debug, Default)] -pub struct Stats { - /// Number of messages sent - pub messages_sent: usize, - /// Number of messages received - pub messages_received: usize, -} diff --git a/iroh-gossip/src/proto/util.rs b/iroh-gossip/src/proto/util.rs deleted file mode 100644 index bd04c2b048..0000000000 --- a/iroh-gossip/src/proto/util.rs +++ /dev/null @@ -1,470 +0,0 @@ -//! Utilities used in the protocol implementation - -use std::{ - collections::{BTreeMap, HashMap}, - hash::Hash, - time::{Duration, Instant}, -}; - -use rand::{ - seq::{IteratorRandom, SliceRandom}, - Rng, -}; - -/// Implement methods, display, debug and conversion traits for 32 byte identifiers. -macro_rules! idbytes_impls { - ($ty:ty, $name:expr) => { - impl $ty { - /// Create from a byte array. - pub const fn from_bytes(bytes: [u8; 32]) -> Self { - Self(bytes) - } - - /// Get as byte slice. - pub fn as_bytes(&self) -> &[u8; 32] { - &self.0 - } - } - - impl> ::std::convert::From for $ty { - fn from(value: T) -> Self { - Self::from_bytes(value.into()) - } - } - - impl ::std::fmt::Display for $ty { - fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { - write!(f, "{}", ::iroh_base::base32::fmt(&self.0)) - } - } - - impl ::std::fmt::Debug for $ty { - fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { - write!(f, "{}({})", $name, ::iroh_base::base32::fmt_short(&self.0)) - } - } - - impl ::std::str::FromStr for $ty { - type Err = ::anyhow::Error; - fn from_str(s: &str) -> ::std::result::Result { - Ok(Self::from_bytes(::iroh_base::base32::parse_array(s)?)) - } - } - - impl ::std::convert::AsRef<[u8]> for $ty { - fn as_ref(&self) -> &[u8] { - &self.0 - } - } - - impl ::std::convert::AsRef<[u8; 32]> for $ty { - fn as_ref(&self) -> &[u8; 32] { - &self.0 - } - } - }; -} - -pub(crate) use idbytes_impls; - -/// A hash set where the iteration order of the values is independent of their -/// hash values. -/// -/// This is wrapper around [indexmap::IndexSet] which couple of utility methods -/// to randomly select elements from the set. -#[derive(Default, Debug, Clone, derive_more::Deref)] -pub(crate) struct IndexSet { - inner: indexmap::IndexSet, -} - -impl PartialEq for IndexSet { - fn eq(&self, other: &Self) -> bool { - self.inner == other.inner - } -} - -impl IndexSet { - pub fn new() -> Self { - Self { - inner: indexmap::IndexSet::new(), - } - } - - pub fn insert(&mut self, value: T) -> bool { - self.inner.insert(value) - } - - /// Remove a random element from the set. - pub fn remove_random(&mut self, rng: &mut R) -> Option { - self.pick_random_index(rng) - .and_then(|idx| self.inner.shift_remove_index(idx)) - } - - /// Pick a random element from the set. - pub fn pick_random(&self, rng: &mut R) -> Option<&T> { - self.pick_random_index(rng) - .and_then(|idx| self.inner.get_index(idx)) - } - - /// Pick a random element from the set, but not any of the elements in `without`. - pub fn pick_random_without(&self, without: &[&T], rng: &mut R) -> Option<&T> { - self.iter().filter(|x| !without.contains(x)).choose(rng) - } - - /// Pick a random index for an element in the set. - pub fn pick_random_index(&self, rng: &mut R) -> Option { - if self.is_empty() { - None - } else { - Some(rng.gen_range(0..self.inner.len())) - } - } - - /// Remove an element from the set. - /// - /// NOTE: the value is removed by swapping it with the last element of the set and popping it off. - /// **This modifies the order of element by moving the last element** - pub fn remove(&mut self, value: &T) -> Option { - self.inner.swap_remove_full(value).map(|(_i, v)| v) - } - - /// Remove an element from the set by its index. - /// - /// NOTE: the value is removed by swapping it with the last element of the set and popping it off. - /// **This modifies the order of element by moving the last element** - pub fn remove_index(&mut self, index: usize) -> Option { - self.inner.swap_remove_index(index) - } - - /// Create an iterator over the set in the order of insertion, while skipping the element in - /// `without`. - pub fn iter_without<'a>(&'a self, value: &'a T) -> impl Iterator { - self.iter().filter(move |x| *x != value) - } -} - -impl IndexSet -where - T: Hash + Eq + Clone, -{ - /// Create a vector of all elements in the set in random order. - pub fn shuffled(&self, rng: &mut R) -> Vec { - let mut items: Vec<_> = self.inner.iter().cloned().collect(); - items.shuffle(rng); - items - } - - /// Create a vector of all elements in the set in random order, and shorten to - /// the first `len` elements after shuffling. - pub fn shuffled_and_capped(&self, len: usize, rng: &mut R) -> Vec { - let mut items = self.shuffled(rng); - items.truncate(len); - items - } - - /// Create a vector of the elements in the set in random order while omitting - /// the elements in `without`. - pub fn shuffled_without(&self, without: &[&T], rng: &mut R) -> Vec { - let mut items = self - .inner - .iter() - .filter(|x| !without.contains(x)) - .cloned() - .collect::>(); - items.shuffle(rng); - items - } - - /// Create a vector of the elements in the set in random order while omitting - /// the elements in `without`, and shorten to the first `len` elements. - pub fn shuffled_without_and_capped( - &self, - without: &[&T], - len: usize, - rng: &mut R, - ) -> Vec { - let mut items = self.shuffled_without(without, rng); - items.truncate(len); - items - } -} - -impl IntoIterator for IndexSet { - type Item = T; - type IntoIter = as IntoIterator>::IntoIter; - fn into_iter(self) -> Self::IntoIter { - self.inner.into_iter() - } -} - -impl FromIterator for IndexSet -where - T: Hash + Eq, -{ - fn from_iter>(iterable: I) -> Self { - IndexSet { - inner: indexmap::IndexSet::from_iter(iterable), - } - } -} - -/// A [`BTreeMap`] with [`Instant`] as key. Allows to process expired items. -#[derive(Debug)] -pub struct TimerMap(BTreeMap>); - -impl Default for TimerMap { - fn default() -> Self { - Self::new() - } -} - -impl TimerMap { - /// Create a new, empty TimerMap. - pub fn new() -> Self { - Self(Default::default()) - } - /// Insert a new entry at the specified instant. - pub fn insert(&mut self, instant: Instant, item: T) { - let entry = self.0.entry(instant).or_default(); - entry.push(item); - } - - /// Remove and return all entries before and equal to `from`. - pub fn drain_until(&mut self, from: &Instant) -> impl Iterator { - let split_point = *from + Duration::from_nanos(1); - let later_half = self.0.split_off(&split_point); - let expired = std::mem::replace(&mut self.0, later_half); - expired - .into_iter() - .flat_map(|(t, v)| v.into_iter().map(move |v| (t, v))) - } - - /// Get a reference to the earliest entry in the TimerMap. - pub fn first(&self) -> Option<(&Instant, &Vec)> { - self.0.iter().next() - } - - /// Iterate over all items in the timer map. - pub fn iter(&self) -> impl Iterator { - self.0 - .iter() - .flat_map(|(t, v)| v.iter().map(move |v| (t, v))) - } -} - -impl TimerMap { - /// Remove an entry from the specified instant. - pub fn remove(&mut self, instant: &Instant, item: &T) { - if let Some(items) = self.0.get_mut(instant) { - items.retain(|x| x != item) - } - } -} - -/// A hash map where entries expire after a time -#[derive(Debug)] -pub struct TimeBoundCache { - map: HashMap, - expiry: TimerMap, -} - -impl Default for TimeBoundCache { - fn default() -> Self { - Self { - map: Default::default(), - expiry: Default::default(), - } - } -} - -impl TimeBoundCache { - /// Insert an item into the cache, marked with an expiration time. - pub fn insert(&mut self, key: K, value: V, expires: Instant) { - self.remove(&key); - self.map.insert(key.clone(), (expires, value)); - self.expiry.insert(expires, key); - } - - /// Returns `true` if the map contains a value for the specified key. - pub fn contains_key(&self, key: &K) -> bool { - self.map.contains_key(key) - } - - /// Remove an item from the cache. - pub fn remove(&mut self, key: &K) -> Option { - if let Some((expires, value)) = self.map.remove(key) { - self.expiry.remove(&expires, key); - Some(value) - } else { - None - } - } - - /// Get the number of entries in the cache. - pub fn len(&self) -> usize { - self.map.len() - } - - /// Returns `true` if the map contains no elements. - pub fn is_empty(&self) -> bool { - self.map.is_empty() - } - - /// Get an item from the cache. - pub fn get(&self, key: &K) -> Option<&V> { - self.map.get(key).map(|(_expires, value)| value) - } - - /// Get the expiration time for an item. - pub fn expires(&self, key: &K) -> Option<&Instant> { - self.map.get(key).map(|(expires, _value)| expires) - } - - /// Iterate over all items in the cache. - pub fn iter(&self) -> impl Iterator { - self.map.iter().map(|(k, (expires, v))| (k, v, expires)) - } - - /// Remove all entries with an expiry instant lower or equal to `instant`. - /// - /// Returns the number of items that were removed. - pub fn expire_until(&mut self, instant: Instant) -> usize { - let drain = self.expiry.drain_until(&instant); - let mut count = 0; - for (_instant, key) in drain { - count += 1; - let _value = self.map.remove(&key); - } - count - } -} - -#[cfg(test)] -mod test { - use std::{ - str::FromStr, - time::{Duration, Instant}, - }; - - use rand_core::SeedableRng; - - use super::{IndexSet, TimeBoundCache, TimerMap}; - - fn test_rng() -> rand_chacha::ChaCha12Rng { - rand_chacha::ChaCha12Rng::seed_from_u64(42) - } - - #[test] - fn indexset() { - let elems = [1, 2, 3, 4]; - let set = IndexSet::from_iter(elems); - let x = set.shuffled(&mut test_rng()); - assert_eq!(x, vec![4, 2, 1, 3]); - let x = set.shuffled_and_capped(2, &mut test_rng()); - assert_eq!(x, vec![4, 2]); - let x = set.shuffled_without(&[&1], &mut test_rng()); - assert_eq!(x, vec![4, 3, 2]); - let x = set.shuffled_without_and_capped(&[&1], 2, &mut test_rng()); - assert_eq!(x, vec![4, 3]); - - // recreate the rng - otherwise we get failures on some architectures when cross-compiling, - // likely due to usize differences pulling different amounts of randomness. - let x = set.pick_random(&mut test_rng()); - assert_eq!(x, Some(&3)); - let x = set.pick_random_without(&[&3], &mut test_rng()); - assert_eq!(x, Some(&4)); - - let mut set = set; - set.remove_random(&mut test_rng()); - assert_eq!(set, IndexSet::from_iter([1, 2, 4])); - } - - #[test] - fn timer_map() { - let mut map = TimerMap::new(); - let now = Instant::now(); - - let times = [ - now - Duration::from_secs(1), - now, - now + Duration::from_secs(1), - now + Duration::from_secs(2), - ]; - map.insert(times[0], -1); - map.insert(times[0], -2); - map.insert(times[1], 0); - map.insert(times[2], 1); - map.insert(times[3], 2); - map.insert(times[3], 3); - - assert_eq!( - map.iter().collect::>(), - vec![ - (×[0], &-1), - (×[0], &-2), - (×[1], &0), - (×[2], &1), - (×[3], &2), - (×[3], &3) - ] - ); - - assert_eq!(map.first(), Some((×[0], &vec![-1, -2]))); - - let drain = map.drain_until(&now); - assert_eq!( - drain.collect::>(), - vec![(times[0], -1), (times[0], -2), (times[1], 0),] - ); - assert_eq!( - map.iter().collect::>(), - vec![(×[2], &1), (×[3], &2), (×[3], &3)] - ); - } - - #[test] - fn base32() { - #[derive(Eq, PartialEq)] - struct Id([u8; 32]); - idbytes_impls!(Id, "Id"); - let id: Id = [1u8; 32].into(); - assert_eq!(id, Id::from_str(&format!("{id}")).unwrap()); - assert_eq!( - &format!("{id}"), - "aeaqcaibaeaqcaibaeaqcaibaeaqcaibaeaqcaibaeaqcaibaeaq" - ); - assert_eq!(&format!("{id:?}"), "Id(aeaqcaibaeaqcaib)"); - assert_eq!(id.as_bytes(), &[1u8; 32]); - } - - #[test] - fn time_bound_cache() { - let mut cache = TimeBoundCache::default(); - - let t0 = Instant::now(); - let t1 = t0 + Duration::from_secs(1); - let t2 = t0 + Duration::from_secs(2); - - cache.insert(1, 10, t0); - cache.insert(2, 20, t1); - cache.insert(3, 30, t1); - cache.insert(4, 40, t2); - - assert_eq!(cache.get(&2), Some(&20)); - assert_eq!(cache.len(), 4); - let removed = cache.expire_until(t1); - assert_eq!(removed, 3); - assert_eq!(cache.len(), 1); - assert_eq!(cache.get(&2), None); - assert_eq!(cache.get(&4), Some(&40)); - - let t3 = t2 + Duration::from_secs(1); - cache.insert(5, 50, t2); - assert_eq!(cache.expires(&5), Some(&t2)); - cache.insert(5, 50, t3); - assert_eq!(cache.expires(&5), Some(&t3)); - cache.expire_until(t2); - assert_eq!(cache.get(&4), None); - assert_eq!(cache.get(&5), Some(&50)); - } -} diff --git a/iroh-net/Cargo.toml b/iroh-net/Cargo.toml index e94fb778a8..9c6f514033 100644 --- a/iroh-net/Cargo.toml +++ b/iroh-net/Cargo.toml @@ -38,7 +38,7 @@ http-body-util = "0.1.0" hyper = { version = "1", features = ["server", "client", "http1"] } hyper-util = "0.1.1" igd-next = { version = "0.15.1", features = ["aio_tokio"] } -iroh-base = { version = "0.27.0", path = "../iroh-base", features = ["key"] } +iroh-base = { version = "0.27.0", features = ["key"] } libc = "0.2.139" num_enum = "0.7" once_cell = "1.18.0" @@ -87,7 +87,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = tr tokio-rustls-acme = { version = "0.4", optional = true } # metrics -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics", default-features = false } +iroh-metrics = { version = "0.27.0", default-features = false } strum = { version = "0.26.2", features = ["derive"] } # local-swarm-discovery @@ -118,7 +118,7 @@ rand_chacha = "0.3.1" testdir = "0.9.1" tokio = { version = "1", features = ["io-util", "sync", "rt", "net", "fs", "macros", "time", "test-util"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } -iroh-test = { path = "../iroh-test" } +iroh-test = "0.27.0" iroh-net = { path = ".", features = ["iroh-relay"] } serde_json = "1.0.107" testresult = "0.4.0" diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 669ef1dd33..401f8cf700 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,16 +26,16 @@ futures-lite = "2.3" futures-util = "0.3" genawaiter = { version = "0.99", default-features = false, features = ["futures03"] } hex = { version = "0.4.3" } -iroh-blobs = { version = "0.27.0", path = "../iroh-blobs", features = ["downloader"] } -iroh-base = { version = "0.27.0", path = "../iroh-base", features = ["key"] } +iroh-blobs = { version = "0.27.0", features = ["downloader"] } +iroh-base = { version = "0.27.0", features = ["key"] } iroh-io = { version = "0.6.0", features = ["stats"] } -iroh-metrics = { version = "0.27.0", path = "../iroh-metrics", optional = true } -iroh-net = { version = "0.27.0", path = "../iroh-net", features = ["discovery-local-network"] } +iroh-metrics = { version = "0.27.0", optional = true } +iroh-net = { version = "0.27.0", features = ["discovery-local-network"] } nested_enum_utils = "0.1.0" num_cpus = { version = "1.15.0" } portable-atomic = "1" -iroh-docs = { version = "0.27.0", path = "../iroh-docs" } -iroh-gossip = { version = "0.27.0", path = "../iroh-gossip" } +iroh-docs = { version = "0.27.0" } +iroh-gossip = "0.27.0" parking_lot = "0.12.1" postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.12", default-features = false, features = ["flume-transport", "quinn-transport"] } @@ -75,7 +75,7 @@ test-utils = ["iroh-net/test-utils"] anyhow = { version = "1" } genawaiter = { version = "0.99", features = ["futures03"] } iroh = { path = ".", features = ["test-utils"] } -iroh-test = { path = "../iroh-test" } +iroh-test = "0.27.0" proptest = "1.2.0" rand_chacha = "0.3.1" regex = { version = "1.7.1", features = ["std"] }