Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log node status periodically #4676

Merged
merged 4 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nano/lib/logging_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ enum class type
peer_history,
message_processor,
local_block_broadcaster,
monitor,

// bootstrap
bulk_pull_client,
Expand Down
3 changes: 3 additions & 0 deletions nano/lib/thread_roles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ std::string nano::thread_role::get_string (nano::thread_role::name role)
case nano::thread_role::name::vote_router:
thread_role_name_string = "Vote router";
break;
case nano::thread_role::name::monitor:
thread_role_name_string = "Monitor";
break;
default:
debug_assert (false && "nano::thread_role::get_string unhandled thread role");
}
Expand Down
1 change: 1 addition & 0 deletions nano/lib/thread_roles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ enum class name
port_mapping,
stats,
vote_router,
monitor,
};

std::string_view to_string (name);
Expand Down
6 changes: 4 additions & 2 deletions nano/node/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ add_library(
make_store.cpp
message_processor.hpp
message_processor.cpp
messages.hpp
messages.cpp
monitor.hpp
monitor.cpp
network.hpp
network.cpp
nodeconfig.hpp
Expand Down Expand Up @@ -196,8 +200,6 @@ add_library(
websocketconfig.cpp
websocket_stream.hpp
websocket_stream.cpp
messages.hpp
messages.cpp
xorshift.hpp)

target_link_libraries(
Expand Down
8 changes: 8 additions & 0 deletions nano/node/active_elections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,14 @@ std::size_t nano::active_elections::size () const
return roots.size ();
}

std::size_t nano::active_elections::size (nano::election_behavior behavior) const
{
nano::lock_guard<nano::mutex> lock{ mutex };
auto count = count_by_behavior[behavior];
debug_assert (count >= 0);
return static_cast<std::size_t> (count);
}

bool nano::active_elections::publish (std::shared_ptr<nano::block> const & block_a)
{
nano::unique_lock<nano::mutex> lock{ mutex };
Expand Down
3 changes: 2 additions & 1 deletion nano/node/active_elections.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class active_elections final
bool erase (nano::qualified_root const &);
bool empty () const;
std::size_t size () const;
std::size_t size (nano::election_behavior) const;
bool publish (std::shared_ptr<nano::block> const &);

/**
Expand Down Expand Up @@ -172,7 +173,7 @@ class active_elections final
std::chrono::seconds const election_time_to_live;

/** Keeps track of number of elections by election behavior (normal, hinted, optimistic) */
nano::enum_array<nano::election_behavior, int64_t> count_by_behavior;
nano::enum_array<nano::election_behavior, int64_t> count_by_behavior{};

nano::condition_variable condition;
bool stopped{ false };
Expand Down
132 changes: 132 additions & 0 deletions nano/node/monitor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include "nano/secure/ledger.hpp"

#include <nano/lib/thread_roles.hpp>
#include <nano/lib/utility.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/node.hpp>

nano::monitor::monitor (nano::monitor_config const & config_a, nano::node & node_a) :
config{ config_a },
node{ node_a },
logger{ node_a.logger }
{
}

nano::monitor::~monitor ()
{
debug_assert (!thread.joinable ());
}

void nano::monitor::start ()
{
if (!config.enabled)
{
return;
}

thread = std::thread ([this] () {
nano::thread_role::set (nano::thread_role::name::monitor);
run ();
});
}

void nano::monitor::stop ()
{
{
nano::lock_guard<nano::mutex> guard{ mutex };
stopped = true;
}
condition.notify_all ();
if (thread.joinable ())
{
thread.join ();
}
}

void nano::monitor::run ()
{
std::unique_lock<nano::mutex> lock{ mutex };
while (!stopped)
{
run_one ();
condition.wait_until (lock, std::chrono::steady_clock::now () + config.interval, [this] { return stopped; });
}
}

void nano::monitor::run_one ()
{
// Node status:
// - blocks (confirmed, total)
// - blocks rate (over last 5m, peak over last 5m)
// - peers
// - stake (online, peered, trended, quorum needed)
// - elections active (normal, hinted, optimistic)
// - election stats over last 5m (confirmed, dropped)

auto const now = std::chrono::steady_clock::now ();
auto blocks_cemented = node.ledger.cemented_count ();
auto blocks_total = node.ledger.block_count ();

// Wait for node to warm up before logging
if (last_time != std::chrono::steady_clock::time_point{})
clemahieu marked this conversation as resolved.
Show resolved Hide resolved
{
// TODO: Maybe emphasize somehow that confirmed doesn't need to be equal to total; backlog is OK
logger.info (nano::log::type::monitor, "Blocks confirmed: {} | total: {}",
blocks_cemented,
blocks_total);

// Calculate the rates
auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds> (now - last_time).count ();
auto blocks_confirmed_rate = static_cast<double> (blocks_cemented - last_blocks_cemented) / elapsed_seconds;
auto blocks_checked_rate = static_cast<double> (blocks_total - last_blocks_total) / elapsed_seconds;

logger.info (nano::log::type::monitor, "Blocks rate (average over last {}s): confirmed {:.2f}/s | total {:.2f}/s",
elapsed_seconds,
blocks_confirmed_rate,
blocks_checked_rate);

logger.info (nano::log::type::monitor, "Peers: {} (realtime: {} | bootstrap: {} | inbound connections: {} | outbound connections: {})",
node.network.size (),
node.tcp_listener.realtime_count (),
node.tcp_listener.bootstrap_count (),
node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::inbound),
node.tcp_listener.connection_count (nano::transport::tcp_listener::connection_type::outbound));

logger.info (nano::log::type::monitor, "Quorum: {} (stake peered: {} | stake online: {})",
nano::uint128_union{ node.online_reps.delta () }.format_balance (Mxrb_ratio, 1, true),
nano::uint128_union{ node.rep_crawler.total_weight () }.format_balance (Mxrb_ratio, 1, true),
nano::uint128_union{ node.online_reps.online () }.format_balance (Mxrb_ratio, 1, true));

logger.info (nano::log::type::monitor, "Elections active: {} (priority: {} | hinted: {} | optimistic: {})",
node.active.size (),
node.active.size (nano::election_behavior::priority),
node.active.size (nano::election_behavior::hinted),
node.active.size (nano::election_behavior::optimistic));
}

last_time = now;
last_blocks_cemented = blocks_cemented;
last_blocks_total = blocks_total;
}

/*
* monitor_config
*/

nano::error nano::monitor_config::serialize (nano::tomlconfig & toml) const
{
toml.put ("enable", enabled, "Enable or disable periodic node status logging\ntype:bool");
toml.put ("interval", interval.count (), "Interval between status logs\ntype:seconds");

return toml.get_error ();
}

nano::error nano::monitor_config::deserialize (nano::tomlconfig & toml)
{
toml.get ("enabled", enabled);
auto interval_l = interval.count ();
toml.get ("interval", interval_l);
interval = std::chrono::seconds{ interval_l };

return toml.get_error ();
}
52 changes: 52 additions & 0 deletions nano/node/monitor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#pragma once

#include <nano/lib/locks.hpp>
#include <nano/node/fwd.hpp>

#include <chrono>
#include <thread>

using namespace std::chrono_literals;

namespace nano
{
class monitor_config final
{
public:
nano::error deserialize (nano::tomlconfig &);
nano::error serialize (nano::tomlconfig &) const;

public:
bool enabled{ true };
std::chrono::seconds interval{ 60s };
};

class monitor final
{
public:
monitor (monitor_config const &, nano::node &);
~monitor ();

void start ();
void stop ();

private: // Dependencies
monitor_config const & config;
nano::node & node;
nano::logger & logger;

private:
void run ();
void run_one ();

std::chrono::steady_clock::time_point last_time{};

size_t last_blocks_cemented{ 0 };
size_t last_blocks_total{ 0 };

bool stopped{ false };
nano::condition_variable condition;
mutable nano::mutex mutex;
std::thread thread;
};
}
11 changes: 8 additions & 3 deletions nano/node/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <nano/node/local_vote_history.hpp>
#include <nano/node/make_store.hpp>
#include <nano/node/message_processor.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/node.hpp>
#include <nano/node/peer_history.hpp>
#include <nano/node/request_aggregator.hpp>
Expand Down Expand Up @@ -223,6 +224,8 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy
process_live_dispatcher{ ledger, scheduler.priority, vote_cache, websocket },
peer_history_impl{ std::make_unique<nano::peer_history> (config.peer_history, store, network, logger, stats) },
peer_history{ *peer_history_impl },
monitor_impl{ std::make_unique<nano::monitor> (config.monitor, *this) },
monitor{ *monitor_impl },
startup_time (std::chrono::steady_clock::now ()),
node_seq (seq)
{
Expand Down Expand Up @@ -371,7 +374,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy

auto const network_label = network_params.network.get_current_network_as_string ();

logger.info (nano::log::type::node, "Node starting, version: {}", NANO_VERSION_STRING);
logger.info (nano::log::type::node, "Version: {}", NANO_VERSION_STRING);
logger.info (nano::log::type::node, "Build information: {}", BUILD_INFO);
logger.info (nano::log::type::node, "Active network: {}", network_label);
logger.info (nano::log::type::node, "Database backend: {}", store.vendor_get ());
Expand Down Expand Up @@ -445,7 +448,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy

ledger.bootstrap_weights = bootstrap_weights.second;

logger.info (nano::log::type::node, "************************************ Bootstrap weights ************************************");
logger.info (nano::log::type::node, "******************************************** Bootstrap weights ********************************************");

// Sort the weights
std::vector<std::pair<nano::account, nano::uint128_t>> sorted_weights (ledger.bootstrap_weights.begin (), ledger.bootstrap_weights.end ());
Expand All @@ -460,7 +463,7 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy
nano::uint128_union (rep.second).format_balance (Mxrb_ratio, 0, true));
}

logger.info (nano::log::type::node, "************************************ ================= ************************************");
logger.info (nano::log::type::node, "******************************************** ================= ********************************************");
}
}

Expand Down Expand Up @@ -721,6 +724,7 @@ void nano::node::start ()
local_block_broadcaster.start ();
peer_history.start ();
vote_router.start ();
monitor.start ();

add_initial_peers ();
}
Expand Down Expand Up @@ -770,6 +774,7 @@ void nano::node::stop ()
local_block_broadcaster.stop ();
message_processor.stop ();
network.stop (); // Stop network last to avoid killing in-use sockets
monitor.stop ();

// work pool is not stopped on purpose due to testing setup

Expand Down
4 changes: 4 additions & 0 deletions nano/node/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace nano
class active_elections;
class confirming_set;
class message_processor;
class monitor;
class node;
class vote_processor;
class vote_cache_processor;
Expand Down Expand Up @@ -216,7 +217,10 @@ class node final : public std::enable_shared_from_this<node>
nano::process_live_dispatcher process_live_dispatcher;
std::unique_ptr<nano::peer_history> peer_history_impl;
nano::peer_history & peer_history;
std::unique_ptr<nano::monitor> monitor_impl;
nano::monitor & monitor;

public:
std::chrono::steady_clock::time_point const startup_time;
std::chrono::seconds unchecked_cutoff = std::chrono::seconds (7 * 24 * 60 * 60); // Week
std::atomic<bool> unresponsive_work_peers{ false };
Expand Down
10 changes: 10 additions & 0 deletions nano/node/nodeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,10 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
message_processor.serialize (message_processor_l);
toml.put_child ("message_processor", message_processor_l);

nano::tomlconfig monitor_l;
monitor.serialize (monitor_l);
toml.put_child ("monitor", monitor_l);

return toml.get_error ();
}

Expand Down Expand Up @@ -366,6 +370,12 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
message_processor.deserialize (config_l);
}

if (toml.has_key ("monitor"))
{
auto config_l = toml.get_required_child ("monitor");
monitor.deserialize (config_l);
}

if (toml.has_key ("work_peers"))
{
work_peers.clear ();
Expand Down
2 changes: 2 additions & 0 deletions nano/node/nodeconfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <nano/node/ipc/ipc_config.hpp>
#include <nano/node/local_block_broadcaster.hpp>
#include <nano/node/message_processor.hpp>
#include <nano/node/monitor.hpp>
#include <nano/node/network.hpp>
#include <nano/node/peer_history.hpp>
#include <nano/node/repcrawler.hpp>
Expand Down Expand Up @@ -156,6 +157,7 @@ class node_config
nano::network_config network;
nano::local_block_broadcaster_config local_block_broadcaster;
nano::confirming_set_config confirming_set;
nano::monitor_config monitor;

public:
std::string serialize_frontiers_confirmation (nano::frontiers_confirmation_mode) const;
Expand Down
Loading
Loading