Skip to content

Commit

Permalink
Log node status periodically (#4676)
Browse files Browse the repository at this point in the history
* Fix alignment

* Log node status periodically

* Log detailed peer and connections info

* Logging consistency
  • Loading branch information
pwojcikdev authored Jul 10, 2024
1 parent fd023dc commit bc9bc82
Show file tree
Hide file tree
Showing 15 changed files with 250 additions and 21 deletions.
1 change: 1 addition & 0 deletions nano/lib/logging_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ enum class type
peer_history,
message_processor,
local_block_broadcaster,
monitor,

// bootstrap
bulk_pull_client,
Expand Down
3 changes: 3 additions & 0 deletions nano/lib/thread_roles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ std::string nano::thread_role::get_string (nano::thread_role::name role)
case nano::thread_role::name::vote_router:
thread_role_name_string = "Vote router";
break;
case nano::thread_role::name::monitor:
thread_role_name_string = "Monitor";
break;
default:
debug_assert (false && "nano::thread_role::get_string unhandled thread role");
}
Expand Down
1 change: 1 addition & 0 deletions nano/lib/thread_roles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ enum class name
port_mapping,
stats,
vote_router,
monitor,
};

std::string_view to_string (name);
Expand Down
6 changes: 4 additions & 2 deletions nano/node/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ add_library(
make_store.cpp
message_processor.hpp
message_processor.cpp
messages.hpp
messages.cpp
monitor.hpp
monitor.cpp
network.hpp
network.cpp
nodeconfig.hpp
Expand Down Expand Up @@ -196,8 +200,6 @@ add_library(
websocketconfig.cpp
websocket_stream.hpp
websocket_stream.cpp
messages.hpp
messages.cpp
xorshift.hpp)

target_link_libraries(
Expand Down
8 changes: 8 additions & 0 deletions nano/node/active_elections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,14 @@ std::size_t nano::active_elections::size () const
return roots.size ();
}

std::size_t nano::active_elections::size (nano::election_behavior behavior) const
{
nano::lock_guard<nano::mutex> lock{ mutex };
auto count = count_by_behavior[behavior];
debug_assert (count >= 0);
return static_cast<std::size_t> (count);
}

bool nano::active_elections::publish (std::shared_ptr<nano::block> const & block_a)
{
nano::unique_lock<nano::mutex> lock{ mutex };
Expand Down
3 changes: 2 additions & 1 deletion nano/node/active_elections.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class active_elections final
bool erase (nano::qualified_root const &);
bool empty () const;
std::size_t size () const;
std::size_t size (nano::election_behavior) const;
bool publish (std::shared_ptr<nano::block> const &);

/**
Expand Down Expand Up @@ -172,7 +173,7 @@ class active_elections final
std::chrono::seconds const election_time_to_live;

/** Keeps track of number of elections by election behavior (normal, hinted, optimistic) */
nano::enum_array<nano::election_behavior, int64_t> count_by_behavior;
nano::enum_array<nano::election_behavior, int64_t> count_by_behavior{};

nano::condition_variable condition;
bool stopped{ false };
Expand Down
132 changes: 132 additions & 0 deletions nano/node/monitor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include "nano/secure/ledger.hpp"

#include <nano/lib/thread_roles.hpp>
#include <nano/lib/utility.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/node.hpp>

nano::monitor::monitor (nano::monitor_config const & config_a, nano::node & node_a) :
config{ config_a },
node{ node_a },
logger{ node_a.logger }
{
}

nano::monitor::~monitor ()
{
debug_assert (!thread.joinable ());
}

void nano::monitor::start ()
{
if (!config.enabled)
{
return;
}

thread = std::thread ([this] () {
nano::thread_role::set (nano::thread_role::name::monitor);
run ();
});
}

void nano::monitor::stop ()
{
{
nano::lock_guard<nano::mutex> guard{ mutex };
stopped = true;
}
condition.notify_all ();
if (thread.joinable ())
{
thread.join ();
}
}

void nano::monitor::run ()
{
std::unique_lock<nano::mutex> lock{ mutex };
while (!stopped)
{
run_one ();
condition.wait_until (lock, std::chrono::steady_clock::now () + config.interval, [this] { return stopped; });
}
}

void nano::monitor::run_one ()
{
// Node status:
// - blocks (confirmed, total)
// - blocks rate (over last 5m, peak over last 5m)
// - peers
// - stake (online, peered, trended, quorum needed)
// - elections active (normal, hinted, optimistic)
// - election stats over last 5m (confirmed, dropped)

auto const now = std::chrono::steady_clock::now ();
auto blocks_cemented = node.ledger.cemented_count ();
auto blocks_total = node.ledger.block_count ();

// Wait for node to warm up before logging
if (last_time != std::chrono::steady_clock::time_point{})
{
// TODO: Maybe emphasize somehow that confirmed doesn't need to be equal to total; backlog is OK
logger.info (nano::log::type::monitor, "Blocks confirmed: {} | total: {}",
blocks_cemented,
blocks_total);

// Calculate the rates
auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds> (now - last_time).count ();
auto blocks_confirmed_rate = static_cast<double> (blocks_cemented - last_blocks_cemented) / elapsed_seconds;
auto blocks_checked_rate = static_cast<double> (blocks_total - last_blocks_total) / elapsed_seconds;

logger.info (nano::log::type::monitor, "Blocks rate (average over last {}s): confirmed {:.2f}/s | total {:.2f}/s",
elapsed_seconds,
blocks_confirmed_rate,
blocks_checked_rate);

logger.info (nano::log::type::monitor, "Peers: {} (realtime: {} | bootstrap: {} | inbound connections: {} | outbound connections: {})",
node.network.size (),
node.tcp_listener.realtime_count (),
node.tcp_listener.bootstrap_count (),
node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::inbound),
node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::outbound));

logger.info (nano::log::type::monitor, "Quorum: {} (stake peered: {} | stake online: {})",
nano::uint128_union{ node.online_reps.delta () }.format_balance (Mxrb_ratio, 1, true),
nano::uint128_union{ node.rep_crawler.total_weight () }.format_balance (Mxrb_ratio, 1, true),
nano::uint128_union{ node.online_reps.online () }.format_balance (Mxrb_ratio, 1, true));

logger.info (nano::log::type::monitor, "Elections active: {} (priority: {} | hinted: {} | optimistic: {})",
node.active.size (),
node.active.size (nano::election_behavior::priority),
node.active.size (nano::election_behavior::hinted),
node.active.size (nano::election_behavior::optimistic));
}

last_time = now;
last_blocks_cemented = blocks_cemented;
last_blocks_total = blocks_total;
}

/*
* monitor_config
*/

nano::error nano::monitor_config::serialize (nano::tomlconfig & toml) const
{
toml.put ("enable", enabled, "Enable or disable periodic node status logging\ntype:bool");
toml.put ("interval", interval.count (), "Interval between status logs\ntype:seconds");

return toml.get_error ();
}

nano::error nano::monitor_config::deserialize (nano::tomlconfig & toml)
{
toml.get ("enabled", enabled);
auto interval_l = interval.count ();
toml.get ("interval", interval_l);
interval = std::chrono::seconds{ interval_l };

return toml.get_error ();
}
52 changes: 52 additions & 0 deletions nano/node/monitor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#pragma once

#include <nano/lib/locks.hpp>
#include <nano/node/fwd.hpp>

#include <chrono>
#include <thread>

using namespace std::chrono_literals;

namespace nano
{
class monitor_config final
{
public:
nano::error deserialize (nano::tomlconfig &);
nano::error serialize (nano::tomlconfig &) const;

public:
bool enabled{ true };
std::chrono::seconds interval{ 60s };
};

class monitor final
{
public:
monitor (monitor_config const &, nano::node &);
~monitor ();

void start ();
void stop ();

private: // Dependencies
monitor_config const & config;
nano::node & node;
nano::logger & logger;

private:
void run ();
void run_one ();

std::chrono::steady_clock::time_point last_time{};

size_t last_blocks_cemented{ 0 };
size_t last_blocks_total{ 0 };

bool stopped{ false };
nano::condition_variable condition;
mutable nano::mutex mutex;
std::thread thread;
};
}
11 changes: 8 additions & 3 deletions nano/node/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <nano/node/local_vote_history.hpp>
#include <nano/node/make_store.hpp>
#include <nano/node/message_processor.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/node.hpp>
#include <nano/node/peer_history.hpp>
#include <nano/node/request_aggregator.hpp>
Expand Down Expand Up @@ -223,6 +224,8 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy
process_live_dispatcher{ ledger, scheduler.priority, vote_cache, websocket },
peer_history_impl{ std::make_unique<nano::peer_history> (config.peer_history, store, network, logger, stats) },
peer_history{ *peer_history_impl },
monitor_impl{ std::make_unique<nano::monitor> (config.monitor, *this) },
monitor{ *monitor_impl },
startup_time (std::chrono::steady_clock::now ()),
node_seq (seq)
{
Expand Down Expand Up @@ -371,7 +374,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy

auto const network_label = network_params.network.get_current_network_as_string ();

logger.info (nano::log::type::node, "Node starting, version: {}", NANO_VERSION_STRING);
logger.info (nano::log::type::node, "Version: {}", NANO_VERSION_STRING);
logger.info (nano::log::type::node, "Build information: {}", BUILD_INFO);
logger.info (nano::log::type::node, "Active network: {}", network_label);
logger.info (nano::log::type::node, "Database backend: {}", store.vendor_get ());
Expand Down Expand Up @@ -445,7 +448,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy

ledger.bootstrap_weights = bootstrap_weights.second;

logger.info (nano::log::type::node, "************************************ Bootstrap weights ************************************");
logger.info (nano::log::type::node, "******************************************** Bootstrap weights ********************************************");

// Sort the weights
std::vector<std::pair<nano::account, nano::uint128_t>> sorted_weights (ledger.bootstrap_weights.begin (), ledger.bootstrap_weights.end ());
Expand All @@ -460,7 +463,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy
nano::uint128_union (rep.second).format_balance (Mxrb_ratio, 0, true));
}

logger.info (nano::log::type::node, "************************************ ================= ************************************");
logger.info (nano::log::type::node, "******************************************** ================= ********************************************");
}
}

Expand Down Expand Up @@ -721,6 +724,7 @@ void nano::node::start ()
local_block_broadcaster.start ();
peer_history.start ();
vote_router.start ();
monitor.start ();

add_initial_peers ();
}
Expand Down Expand Up @@ -770,6 +774,7 @@ void nano::node::stop ()
local_block_broadcaster.stop ();
message_processor.stop ();
network.stop (); // Stop network last to avoid killing in-use sockets
monitor.stop ();

// work pool is not stopped on purpose due to testing setup

Expand Down
4 changes: 4 additions & 0 deletions nano/node/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace nano
class active_elections;
class confirming_set;
class message_processor;
class monitor;
class node;
class vote_processor;
class vote_cache_processor;
Expand Down Expand Up @@ -216,7 +217,10 @@ class node final : public std::enable_shared_from_this<node>
nano::process_live_dispatcher process_live_dispatcher;
std::unique_ptr<nano::peer_history> peer_history_impl;
nano::peer_history & peer_history;
std::unique_ptr<nano::monitor> monitor_impl;
nano::monitor & monitor;

public:
std::chrono::steady_clock::time_point const startup_time;
std::chrono::seconds unchecked_cutoff = std::chrono::seconds (7 * 24 * 60 * 60); // Week
std::atomic<bool> unresponsive_work_peers{ false };
Expand Down
10 changes: 10 additions & 0 deletions nano/node/nodeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,10 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
message_processor.serialize (message_processor_l);
toml.put_child ("message_processor", message_processor_l);

nano::tomlconfig monitor_l;
monitor.serialize (monitor_l);
toml.put_child ("monitor", monitor_l);

return toml.get_error ();
}

Expand Down Expand Up @@ -366,6 +370,12 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
message_processor.deserialize (config_l);
}

if (toml.has_key ("monitor"))
{
auto config_l = toml.get_required_child ("monitor");
monitor.deserialize (config_l);
}

if (toml.has_key ("work_peers"))
{
work_peers.clear ();
Expand Down
2 changes: 2 additions & 0 deletions nano/node/nodeconfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <nano/node/ipc/ipc_config.hpp>
#include <nano/node/local_block_broadcaster.hpp>
#include <nano/node/message_processor.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/network.hpp>
#include <nano/node/peer_history.hpp>
#include <nano/node/repcrawler.hpp>
Expand Down Expand Up @@ -156,6 +157,7 @@ class node_config
nano::network_config network;
nano::local_block_broadcaster_config local_block_broadcaster;
nano::confirming_set_config confirming_set;
nano::monitor_config monitor;

public:
std::string serialize_frontiers_confirmation (nano::frontiers_confirmation_mode) const;
Expand Down
Loading

0 comments on commit bc9bc82

Please sign in to comment.