From bc9bc82a8d63ca995e2c1ab3b67766686a7d39bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:00:11 +0200 Subject: [PATCH] Log node status periodically (#4676) * Fix alignment * Log node status periodically * Log detailed peer and connections info * Logging consistency --- nano/lib/logging_enums.hpp | 1 + nano/lib/thread_roles.cpp | 3 + nano/lib/thread_roles.hpp | 1 + nano/node/CMakeLists.txt | 6 +- nano/node/active_elections.cpp | 8 ++ nano/node/active_elections.hpp | 3 +- nano/node/monitor.cpp | 132 +++++++++++++++++++++++++++ nano/node/monitor.hpp | 52 +++++++++++ nano/node/node.cpp | 11 ++- nano/node/node.hpp | 4 + nano/node/nodeconfig.cpp | 10 ++ nano/node/nodeconfig.hpp | 2 + nano/node/repcrawler.cpp | 18 ++-- nano/node/transport/tcp_listener.cpp | 6 ++ nano/node/transport/tcp_listener.hpp | 14 +-- 15 files changed, 250 insertions(+), 21 deletions(-) create mode 100644 nano/node/monitor.cpp create mode 100644 nano/node/monitor.hpp diff --git a/nano/lib/logging_enums.hpp b/nano/lib/logging_enums.hpp index b2b00b350c..7eecf47cf4 100644 --- a/nano/lib/logging_enums.hpp +++ b/nano/lib/logging_enums.hpp @@ -80,6 +80,7 @@ enum class type peer_history, message_processor, local_block_broadcaster, + monitor, // bootstrap bulk_pull_client, diff --git a/nano/lib/thread_roles.cpp b/nano/lib/thread_roles.cpp index 2831c72038..76f467cd0e 100644 --- a/nano/lib/thread_roles.cpp +++ b/nano/lib/thread_roles.cpp @@ -157,6 +157,9 @@ std::string nano::thread_role::get_string (nano::thread_role::name role) case nano::thread_role::name::vote_router: thread_role_name_string = "Vote router"; break; + case nano::thread_role::name::monitor: + thread_role_name_string = "Monitor"; + break; default: debug_assert (false && "nano::thread_role::get_string unhandled thread role"); } diff --git a/nano/lib/thread_roles.hpp b/nano/lib/thread_roles.hpp index eef15632fa..5896318c42 100644 --- a/nano/lib/thread_roles.hpp +++ b/nano/lib/thread_roles.hpp @@ -59,6 +59,7 @@ enum class name port_mapping, stats, vote_router, + monitor, }; std::string_view to_string (name); diff --git a/nano/node/CMakeLists.txt b/nano/node/CMakeLists.txt index 535f917b67..37646b575b 100644 --- a/nano/node/CMakeLists.txt +++ b/nano/node/CMakeLists.txt @@ -101,6 +101,10 @@ add_library( make_store.cpp message_processor.hpp message_processor.cpp + messages.hpp + messages.cpp + monitor.hpp + monitor.cpp network.hpp network.cpp nodeconfig.hpp @@ -196,8 +200,6 @@ add_library( websocketconfig.cpp websocket_stream.hpp websocket_stream.cpp - messages.hpp - messages.cpp xorshift.hpp) target_link_libraries( diff --git a/nano/node/active_elections.cpp b/nano/node/active_elections.cpp index aa183428e8..3f7d7baeec 100644 --- a/nano/node/active_elections.cpp +++ b/nano/node/active_elections.cpp @@ -515,6 +515,14 @@ std::size_t nano::active_elections::size () const return roots.size (); } +std::size_t nano::active_elections::size (nano::election_behavior behavior) const +{ + nano::lock_guard lock{ mutex }; + auto count = count_by_behavior[behavior]; + debug_assert (count >= 0); + return static_cast (count); +} + bool nano::active_elections::publish (std::shared_ptr const & block_a) { nano::unique_lock lock{ mutex }; diff --git a/nano/node/active_elections.hpp b/nano/node/active_elections.hpp index a4cb478a38..96e47def58 100644 --- a/nano/node/active_elections.hpp +++ b/nano/node/active_elections.hpp @@ -120,6 +120,7 @@ class active_elections final bool erase (nano::qualified_root const &); bool empty () const; std::size_t size () const; + std::size_t size (nano::election_behavior) const; bool publish (std::shared_ptr const &); /** @@ -172,7 +173,7 @@ class active_elections final std::chrono::seconds const election_time_to_live; /** Keeps track of number of elections by election behavior (normal, hinted, optimistic) */ - nano::enum_array count_by_behavior; + nano::enum_array count_by_behavior{}; nano::condition_variable condition; bool stopped{ false }; diff --git a/nano/node/monitor.cpp b/nano/node/monitor.cpp new file mode 100644 index 0000000000..15b98b89d4 --- /dev/null +++ b/nano/node/monitor.cpp @@ -0,0 +1,132 @@ +#include "nano/secure/ledger.hpp" + +#include +#include +#include +#include + +nano::monitor::monitor (nano::monitor_config const & config_a, nano::node & node_a) : + config{ config_a }, + node{ node_a }, + logger{ node_a.logger } +{ +} + +nano::monitor::~monitor () +{ + debug_assert (!thread.joinable ()); +} + +void nano::monitor::start () +{ + if (!config.enabled) + { + return; + } + + thread = std::thread ([this] () { + nano::thread_role::set (nano::thread_role::name::monitor); + run (); + }); +} + +void nano::monitor::stop () +{ + { + nano::lock_guard guard{ mutex }; + stopped = true; + } + condition.notify_all (); + if (thread.joinable ()) + { + thread.join (); + } +} + +void nano::monitor::run () +{ + std::unique_lock lock{ mutex }; + while (!stopped) + { + run_one (); + condition.wait_until (lock, std::chrono::steady_clock::now () + config.interval, [this] { return stopped; }); + } +} + +void nano::monitor::run_one () +{ + // Node status: + // - blocks (confirmed, total) + // - blocks rate (over last 5m, peak over last 5m) + // - peers + // - stake (online, peered, trended, quorum needed) + // - elections active (normal, hinted, optimistic) + // - election stats over last 5m (confirmed, dropped) + + auto const now = std::chrono::steady_clock::now (); + auto blocks_cemented = node.ledger.cemented_count (); + auto blocks_total = node.ledger.block_count (); + + // Wait for node to warm up before logging + if (last_time != std::chrono::steady_clock::time_point{}) + { + // TODO: Maybe emphasize somehow that confirmed doesn't need to be equal to total; backlog is OK + logger.info (nano::log::type::monitor, "Blocks confirmed: {} | total: {}", + blocks_cemented, + blocks_total); + + // Calculate the rates + auto elapsed_seconds = std::chrono::duration_cast (now - last_time).count (); + auto blocks_confirmed_rate = static_cast (blocks_cemented - last_blocks_cemented) / elapsed_seconds; + auto blocks_checked_rate = static_cast (blocks_total - last_blocks_total) / elapsed_seconds; + + logger.info (nano::log::type::monitor, "Blocks rate (average over last {}s): confirmed {:.2f}/s | total {:.2f}/s", + elapsed_seconds, + blocks_confirmed_rate, + blocks_checked_rate); + + logger.info (nano::log::type::monitor, "Peers: {} (realtime: {} | bootstrap: {} | inbound connections: {} | outbound connections: {})", + node.network.size (), + node.tcp_listener.realtime_count (), + node.tcp_listener.bootstrap_count (), + node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::inbound), + node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::outbound)); + + logger.info (nano::log::type::monitor, "Quorum: {} (stake peered: {} | stake online: {})", + nano::uint128_union{ node.online_reps.delta () }.format_balance (Mxrb_ratio, 1, true), + nano::uint128_union{ node.rep_crawler.total_weight () }.format_balance (Mxrb_ratio, 1, true), + nano::uint128_union{ node.online_reps.online () }.format_balance (Mxrb_ratio, 1, true)); + + logger.info (nano::log::type::monitor, "Elections active: {} (priority: {} | hinted: {} | optimistic: {})", + node.active.size (), + node.active.size (nano::election_behavior::priority), + node.active.size (nano::election_behavior::hinted), + node.active.size (nano::election_behavior::optimistic)); + } + + last_time = now; + last_blocks_cemented = blocks_cemented; + last_blocks_total = blocks_total; +} + +/* + * monitor_config + */ + +nano::error nano::monitor_config::serialize (nano::tomlconfig & toml) const +{ + toml.put ("enable", enabled, "Enable or disable periodic node status logging\ntype:bool"); + toml.put ("interval", interval.count (), "Interval between status logs\ntype:seconds"); + + return toml.get_error (); +} + +nano::error nano::monitor_config::deserialize (nano::tomlconfig & toml) +{ + toml.get ("enabled", enabled); + auto interval_l = interval.count (); + toml.get ("interval", interval_l); + interval = std::chrono::seconds{ interval_l }; + + return toml.get_error (); +} \ No newline at end of file diff --git a/nano/node/monitor.hpp b/nano/node/monitor.hpp new file mode 100644 index 0000000000..a76f753bbc --- /dev/null +++ b/nano/node/monitor.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +#include +#include + +using namespace std::chrono_literals; + +namespace nano +{ +class monitor_config final +{ +public: + nano::error deserialize (nano::tomlconfig &); + nano::error serialize (nano::tomlconfig &) const; + +public: + bool enabled{ true }; + std::chrono::seconds interval{ 60s }; +}; + +class monitor final +{ +public: + monitor (monitor_config const &, nano::node &); + ~monitor (); + + void start (); + void stop (); + +private: // Dependencies + monitor_config const & config; + nano::node & node; + nano::logger & logger; + +private: + void run (); + void run_one (); + + std::chrono::steady_clock::time_point last_time{}; + + size_t last_blocks_cemented{ 0 }; + size_t last_blocks_total{ 0 }; + + bool stopped{ false }; + nano::condition_variable condition; + mutable nano::mutex mutex; + std::thread thread; +}; +} \ No newline at end of file diff --git a/nano/node/node.cpp b/nano/node/node.cpp index 05ea923576..eaa141884d 100644 --- a/nano/node/node.cpp +++ b/nano/node/node.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,8 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy process_live_dispatcher{ ledger, scheduler.priority, vote_cache, websocket }, peer_history_impl{ std::make_unique (config.peer_history, store, network, logger, stats) }, peer_history{ *peer_history_impl }, + monitor_impl{ std::make_unique (config.monitor, *this) }, + monitor{ *monitor_impl }, startup_time (std::chrono::steady_clock::now ()), node_seq (seq) { @@ -371,7 +374,7 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy auto const network_label = network_params.network.get_current_network_as_string (); - logger.info (nano::log::type::node, "Node starting, version: {}", NANO_VERSION_STRING); + logger.info (nano::log::type::node, "Version: {}", NANO_VERSION_STRING); logger.info (nano::log::type::node, "Build information: {}", BUILD_INFO); logger.info (nano::log::type::node, "Active network: {}", network_label); logger.info (nano::log::type::node, "Database backend: {}", store.vendor_get ()); @@ -445,7 +448,7 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy ledger.bootstrap_weights = bootstrap_weights.second; - logger.info (nano::log::type::node, "************************************ Bootstrap weights ************************************"); + logger.info (nano::log::type::node, "******************************************** Bootstrap weights ********************************************"); // Sort the weights std::vector> sorted_weights (ledger.bootstrap_weights.begin (), ledger.bootstrap_weights.end ()); @@ -460,7 +463,7 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy nano::uint128_union (rep.second).format_balance (Mxrb_ratio, 0, true)); } - logger.info (nano::log::type::node, "************************************ ================= ************************************"); + logger.info (nano::log::type::node, "******************************************** ================= ********************************************"); } } @@ -721,6 +724,7 @@ void nano::node::start () local_block_broadcaster.start (); peer_history.start (); vote_router.start (); + monitor.start (); add_initial_peers (); } @@ -770,6 +774,7 @@ void nano::node::stop () local_block_broadcaster.stop (); message_processor.stop (); network.stop (); // Stop network last to avoid killing in-use sockets + monitor.stop (); // work pool is not stopped on purpose due to testing setup diff --git a/nano/node/node.hpp b/nano/node/node.hpp index 10494aa83d..6b8d2eec94 100644 --- a/nano/node/node.hpp +++ b/nano/node/node.hpp @@ -45,6 +45,7 @@ namespace nano class active_elections; class confirming_set; class message_processor; +class monitor; class node; class vote_processor; class vote_cache_processor; @@ -216,7 +217,10 @@ class node final : public std::enable_shared_from_this nano::process_live_dispatcher process_live_dispatcher; std::unique_ptr peer_history_impl; nano::peer_history & peer_history; + std::unique_ptr monitor_impl; + nano::monitor & monitor; +public: std::chrono::steady_clock::time_point const startup_time; std::chrono::seconds unchecked_cutoff = std::chrono::seconds (7 * 24 * 60 * 60); // Week std::atomic unresponsive_work_peers{ false }; diff --git a/nano/node/nodeconfig.cpp b/nano/node/nodeconfig.cpp index 0eef9a5674..10adbddeaa 100644 --- a/nano/node/nodeconfig.cpp +++ b/nano/node/nodeconfig.cpp @@ -249,6 +249,10 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const message_processor.serialize (message_processor_l); toml.put_child ("message_processor", message_processor_l); + nano::tomlconfig monitor_l; + monitor.serialize (monitor_l); + toml.put_child ("monitor", monitor_l); + return toml.get_error (); } @@ -366,6 +370,12 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml) message_processor.deserialize (config_l); } + if (toml.has_key ("monitor")) + { + auto config_l = toml.get_required_child ("monitor"); + monitor.deserialize (config_l); + } + if (toml.has_key ("work_peers")) { work_peers.clear (); diff --git a/nano/node/nodeconfig.hpp b/nano/node/nodeconfig.hpp index 2f01e8d194..f6abaa8ab7 100644 --- a/nano/node/nodeconfig.hpp +++ b/nano/node/nodeconfig.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +157,7 @@ class node_config nano::network_config network; nano::local_block_broadcaster_config local_block_broadcaster; nano::confirming_set_config confirming_set; + nano::monitor_config monitor; public: std::string serialize_frontiers_confirmation (nano::frontiers_confirmation_mode) const; diff --git a/nano/node/repcrawler.cpp b/nano/node/repcrawler.cpp index f2e22f3575..f6cca66b5f 100644 --- a/nano/node/repcrawler.cpp +++ b/nano/node/repcrawler.cpp @@ -83,7 +83,7 @@ void nano::rep_crawler::validate_and_process (nano::unique_lock & l nano::uint128_t const rep_weight = node.ledger.weight (vote->account); if (rep_weight < minimum) { - logger.debug (nano::log::type::rep_crawler, "Ignoring vote from account {} with too little voting weight: {}", + logger.debug (nano::log::type::rep_crawler, "Ignoring vote from account: {} with too little voting weight: {}", vote->account.to_account (), nano::util::to_str (rep_weight)); continue; @@ -121,11 +121,11 @@ void nano::rep_crawler::validate_and_process (nano::unique_lock & l if (inserted) { - logger.info (nano::log::type::rep_crawler, "Found representative {} at {}", vote->account.to_account (), channel->to_string ()); + logger.info (nano::log::type::rep_crawler, "Found representative: {} at: {}", vote->account.to_account (), channel->to_string ()); } if (updated) { - logger.warn (nano::log::type::rep_crawler, "Updated representative {} at {} (was at: {})", vote->account.to_account (), channel->to_string (), prev_channel->to_string ()); + logger.warn (nano::log::type::rep_crawler, "Updated representative: {} at: {} (was at: {})", vote->account.to_account (), channel->to_string (), prev_channel->to_string ()); } } } @@ -202,7 +202,7 @@ void nano::rep_crawler::cleanup () erase_if (reps, [this] (rep_entry const & rep) { if (!rep.channel->alive ()) { - logger.info (nano::log::type::rep_crawler, "Evicting representative {} with dead channel at {}", rep.account.to_account (), rep.channel->to_string ()); + logger.info (nano::log::type::rep_crawler, "Evicting representative: {} with dead channel at: {}", rep.account.to_account (), rep.channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::channel_dead); return true; // Erase } @@ -215,12 +215,12 @@ void nano::rep_crawler::cleanup () { if (query.replies == 0) { - logger.debug (nano::log::type::rep_crawler, "Aborting unresponsive query for block {} from {}", query.hash.to_string (), query.channel->to_string ()); + logger.debug (nano::log::type::rep_crawler, "Aborting unresponsive query for block: {} from: {}", query.hash.to_string (), query.channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::query_timeout); } else { - logger.debug (nano::log::type::rep_crawler, "Completion of query with {} replies for block {} from {}", query.replies, query.hash.to_string (), query.channel->to_string ()); + logger.debug (nano::log::type::rep_crawler, "Completion of query with: {} replies for block: {} from: {}", query.replies, query.hash.to_string (), query.channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::query_completion); } return true; // Erase @@ -350,7 +350,7 @@ void nano::rep_crawler::query (std::vectorto_string ()); + logger.debug (nano::log::type::rep_crawler, "Sending query for block: {} to: {}", hash_root.first.to_string (), channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::query_sent); auto const & [hash, root] = hash_root; @@ -368,7 +368,7 @@ void nano::rep_crawler::query (std::vectorto_string ()); + logger.debug (nano::log::type::rep_crawler, "Ignoring duplicate query for block: {} to: {}", hash_root.first.to_string (), channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::query_duplicate); } } @@ -404,7 +404,7 @@ bool nano::rep_crawler::process (std::shared_ptr const & vote, std:: }); if (found) { - logger.debug (nano::log::type::rep_crawler, "Processing response for block {} from {}", target_hash.to_string (), channel->to_string ()); + logger.debug (nano::log::type::rep_crawler, "Processing response for block: {} from: {}", target_hash.to_string (), channel->to_string ()); stats.inc (nano::stat::type::rep_crawler, nano::stat::detail::response); // Track response time diff --git a/nano/node/transport/tcp_listener.cpp b/nano/node/transport/tcp_listener.cpp index b97c116854..06c49332ad 100644 --- a/nano/node/transport/tcp_listener.cpp +++ b/nano/node/transport/tcp_listener.cpp @@ -499,6 +499,12 @@ size_t nano::transport::tcp_listener::connection_count () const return connections.size (); } +size_t nano::transport::tcp_listener::connection_count (connection_type type) const +{ + nano::lock_guard lock{ mutex }; + return count_per_type (type); +} + size_t nano::transport::tcp_listener::attempt_count () const { nano::lock_guard lock{ mutex }; diff --git a/nano/node/transport/tcp_listener.hpp b/nano/node/transport/tcp_listener.hpp index 094560477a..55d0603d88 100644 --- a/nano/node/transport/tcp_listener.hpp +++ b/nano/node/transport/tcp_listener.hpp @@ -53,6 +53,13 @@ class tcp_config */ class tcp_listener final { +public: + enum class connection_type + { + inbound, + outbound, + }; + public: tcp_listener (uint16_t port, tcp_config const &, nano::node &); ~tcp_listener (); @@ -69,6 +76,7 @@ class tcp_listener final nano::tcp_endpoint endpoint () const; size_t connection_count () const; + size_t connection_count (connection_type) const; size_t attempt_count () const; size_t realtime_count () const; size_t bootstrap_count () const; @@ -104,12 +112,6 @@ class tcp_listener final error, }; - enum class connection_type - { - inbound, - outbound, - }; - asio::awaitable connect_impl (asio::ip::tcp::endpoint); asio::awaitable connect_socket (asio::ip::tcp::endpoint);