From fcdc299e316b425a26887369a1e48a8f90bdeee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=BCdiger=20Klaehn?= Date: Mon, 8 Apr 2024 16:58:08 +0300 Subject: [PATCH] fix(iroh): do not shut down node on internal rpc error (#2158) ## Description fix(iroh): do not shut down node on internal rpc error Internal rpc errors can happen, e.g. when the client side gets dropped. No reason to shut down the entire node. We already have a cancellation token, so it's not like we rely on this for shutdown. Fixes #2157, #2143 ## Notes & open questions Note: I also sneaked in a fix for #2143. Basically when we are unable to notify the db that a new gc epoch has started, we just shot down the gc loop, since clearly the store is not well (probably shut down). ## Change checklist - [x] Self-review. - [x] Documentation updates if relevant. - [x] Tests if relevant. --- iroh-bytes/src/store/traits.rs | 5 ++++- iroh/src/node/builder.rs | 11 ++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/iroh-bytes/src/store/traits.rs b/iroh-bytes/src/store/traits.rs index 1eced22e6a..6c5dd5aaca 100644 --- a/iroh-bytes/src/store/traits.rs +++ b/iroh-bytes/src/store/traits.rs @@ -356,7 +356,10 @@ pub trait Store: ReadableStore + MapMut { /// Create a temporary pin for this store fn temp_tag(&self, value: HashAndFormat) -> TempTag; - /// Notify the store that a new gc phase is about to start + /// Notify the store that a new gc phase is about to start. + /// + /// This should not fail unless the store is shut down or otherwise in a + /// bad state. The gc task will shut itself down if this fails. fn gc_start(&self) -> impl Future> + Send; /// Traverse all roots recursively and mark them as live. diff --git a/iroh/src/node/builder.rs b/iroh/src/node/builder.rs index 6418cfb08c..75b6fb26bb 100644 --- a/iroh/src/node/builder.rs +++ b/iroh/src/node/builder.rs @@ -484,9 +484,8 @@ where Ok((msg, chan)) => { handler.handle_rpc_request(msg, chan); } - Err(_) => { - info!("last controller dropped, shutting down"); - break; + Err(e) => { + info!("internal rpc request error: {:?}", e); } } }, @@ -537,8 +536,10 @@ where tracing::debug!("GC loop starting {:?}", gc_period); 'outer: loop { if let Err(cause) = db.gc_start().await { - tracing::error!("Error {} starting GC, skipping GC to be safe", cause); - continue 'outer; + tracing::debug!( + "unable to notify the db of GC start: {cause}. Shutting down GC loop." + ); + break; } // do delay before the two phases of GC tokio::time::sleep(gc_period).await;