Skip to content

Commit

Permalink
refactor(metrics): add gc metrics
Browse files Browse the repository at this point in the history
- split collator metrics to several rows
- fix several clippy lints
  • Loading branch information
0xdeafbeef committed Jul 7, 2024
1 parent c179d38 commit 8e06c1f
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 44 deletions.
2 changes: 1 addition & 1 deletion cli/src/tools/gen_account.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct MultisigCmd {
#[clap(short, long)]
lifetime: Option<u32>,

/// Use SetcodeMultisig instead of SafeMultisig
/// Use `SetcodeMultisig` instead of `SafeMultisig`
#[clap(short, long)]
updatable: bool,
}
Expand Down
11 changes: 6 additions & 5 deletions cli/src/tools/storage_cli.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![allow(clippy::print_stdout)] // it's a CLI tool
use std::path::PathBuf;

use anyhow::{Context, Result};
Expand Down Expand Up @@ -126,8 +127,8 @@ impl BlockCmd {

println!("Found block full {}\n", &self.block_id);
println!("Block is link: {}\n", is_link);
println!("Block hex {}\n", hex::encode(&block));
println!("Block proof {}\n", hex::encode(&proof));
println!("Block hex {}\n", hex::encode(block));
println!("Block proof {}\n", hex::encode(proof));
}
_ => {
println!("Found block empty {}\n", &self.block_id);
Expand Down Expand Up @@ -168,11 +169,11 @@ impl BlockCmd {

println!("Found block full {}\n", &self.block_id);
println!("Block is link: {}\n", is_link);
println!("Block hex {}\n", hex::encode(&block));
println!("Block proof {}\n", hex::encode(&proof));
println!("Block hex {}\n", hex::encode(block));
println!("Block proof {}\n", hex::encode(proof));
}
_ => {
println!("Found block empty {}\n", &self.block_id)
println!("Found block empty {}\n", &self.block_id);
}
};
Ok::<(), anyhow::Error>(())
Expand Down
45 changes: 26 additions & 19 deletions core/src/block_strider/subscriber/gc_subscriber.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ use anyhow::Result;
use everscale_types::models::BlockId;
use rand::Rng;
use scopeguard::defer;
use tokio::select;
use tokio::sync::watch;
use tokio::task::AbortHandle;
use tycho_block_util::block::BlockStuff;
use tycho_storage::{BlocksGcType, Storage};
use tycho_util::metrics::HistogramGuard;

use crate::block_strider::{
BlockSubscriber, BlockSubscriberContext, StateSubscriber, StateSubscriberContext,
Expand Down Expand Up @@ -206,6 +208,7 @@ impl GcSubscriber {

#[tracing::instrument(skip_all)]
async fn states_gc(mut trigger_rx: TriggerRx, storage: Storage) {
use tokio::time;
let Some(config) = storage.config().states_gc else {
tracing::warn!("manager disabled");
return;
Expand All @@ -215,41 +218,45 @@ impl GcSubscriber {
tracing::info!("manager stopped");
}

let mut last_tiggered_at = None::<Instant>;
let mut interval = time::interval(config.interval);
let mut last_triggered_at = None;

while trigger_rx.changed().await.is_ok() {
match last_tiggered_at {
// Wait for an offset before the first GC but after the first trigger
None => {
let offset = if config.random_offset {
rand::thread_rng().gen_range(Duration::ZERO..config.interval)
} else {
config.interval
};
tokio::time::sleep(offset).await
}
// Wait to maintaint the interval between GCs
Some(last) => {
if last.elapsed() < config.interval {
tokio::time::sleep_until((last + config.interval).into()).await;
}
loop {
// either the interval has ticked or a new trigger has arrived
select! {
_ = interval.tick() => {},
Ok(_) = trigger_rx.changed() => {},
else => break,
}

let now = Instant::now();

if let Some(last) = last_triggered_at {
let next_gc: Instant = last + config.interval;
if next_gc > now {
time::sleep_until(next_gc.into()).await;
}
} else if config.random_offset {
let offset = rand::thread_rng().gen_range(Duration::ZERO..config.interval);
time::sleep(offset).await;
}
last_tiggered_at = Some(Instant::now());

// Get the most recent trigger
last_triggered_at = Some(Instant::now());

let Some(trigger) = trigger_rx.borrow_and_update().clone() else {
continue;
};
tracing::debug!(?trigger);

let _hist = HistogramGuard::begin("tycho_gc_states_duration");
if let Err(e) = storage
.shard_state_storage()
.remove_outdated_states(trigger.mc_block_id.seqno)
.await
{
tracing::error!("failed to remove outdated states: {e:?}");
}
metrics::gauge!("tycho_gc_states_seqno").set(trigger.mc_block_id.seqno as f64);
}
}
}
Expand Down
119 changes: 101 additions & 18 deletions scripts/gen-dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
expr_sum_rate,
heatmap_panel,
yaxis,
expr_operator,
)


Expand Down Expand Up @@ -384,6 +385,23 @@ def core_block_strider() -> RowPanel:
create_heatmap_panel(
"tycho_storage_state_update_time", "Time to write state update to rocksdb"
),
create_heatmap_panel(
"tycho_storage_state_store_time", "Time to store state with cell traversal"
),
create_heatmap_panel(
"tycho_gc_states_duration", "Time to garbage collect state"
),
create_gauge_panel(
expr_operator(
Expr("tycho_gc_states_seqno"),
"-",
Expr(
metric="tycho_do_collate_block_seqno",
label_selectors=['workchain="-1"'],
),
),
"GC lag",
),
]
return create_row("Core Block Strider", metrics)

Expand Down Expand Up @@ -502,24 +520,28 @@ def collator_finalize_block() -> RowPanel:
return create_row("Finalize Block", metrics)


def collator_do_collate() -> RowPanel:
def collator_params_metrics() -> RowPanel:
metrics = [
create_gauge_panel(
"tycho_do_collate_msgs_exec_params_set_size",
"Params: msgs set size",
"tycho_do_collate_msgs_exec_params_set_size", "Params: msgs set size"
),
create_gauge_panel(
"tycho_do_collate_msgs_exec_params_min_exts_per_set",
"Params: min externals per set",
),
create_gauge_panel(
"tycho_do_collate_msgs_exec_params_group_limit",
"Params: group limit",
"tycho_do_collate_msgs_exec_params_group_limit", "Params: group limit"
),
create_gauge_panel(
"tycho_do_collate_msgs_exec_params_group_vert_size",
"Params: group vertical size limit",
),
]
return create_row("Collator Parameters", metrics)


def block_metrics() -> RowPanel:
metrics = [
create_counter_panel(
"tycho_do_collate_blocks_count",
"Blocks rate",
Expand All @@ -540,6 +562,12 @@ def collator_do_collate() -> RowPanel:
"Number of blocks with limits reached",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Block Metrics", metrics)


def execution_metrics() -> RowPanel:
metrics = [
create_gauge_panel(
"tycho_do_collate_exec_msgs_sets_per_block",
"Number of msgs sets per block",
Expand Down Expand Up @@ -570,6 +598,12 @@ def collator_do_collate() -> RowPanel:
"MAX exec time in group",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Execution Metrics", metrics)


def message_metrics() -> RowPanel:
metrics = [
create_counter_panel(
"tycho_do_collate_msgs_exec_count_all",
"All executed msgs count",
Expand Down Expand Up @@ -625,23 +659,32 @@ def collator_do_collate() -> RowPanel:
"Executed NewInt msgs count",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Message Metrics", metrics)


def queue_metrics() -> RowPanel:
metrics = [
create_gauge_panel(
"tycho_session_iterator_messages_all",
"Number of internals in the iterator",
labels=['workchain=~"$workchain"'],
),
create_gauge_panel(
"tycho_do_collate_int_msgs_queue_calc",
"Calculated Internal queue len",
"tycho_do_collate_int_msgs_queue_calc", "Calculated Internal queue len"
),
create_counter_panel(
"tycho_do_collate_int_enqueue_count",
"Enqueued int msgs count",
"tycho_do_collate_int_enqueue_count", "Enqueued int msgs count"
),
create_counter_panel(
"tycho_do_collate_int_dequeue_count",
"Dequeued int msgs count",
"tycho_do_collate_int_dequeue_count", "Dequeued int msgs count"
),
]
return create_row("Queue Metrics", metrics)


def time_metrics() -> RowPanel:
metrics = [
create_gauge_panel(
"tycho_do_collate_block_time_diff",
"Block time diff",
Expand All @@ -663,6 +706,12 @@ def collator_do_collate() -> RowPanel:
"Total collation time",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Time Metrics", metrics)


def collation_process_metrics() -> RowPanel:
metrics = [
create_heatmap_panel(
"tycho_do_collate_prepare_time",
"Collation prepare time",
Expand Down Expand Up @@ -708,11 +757,12 @@ def collator_do_collate() -> RowPanel:
"Finalize block",
labels=['workchain=~"$workchain"'],
),
create_heatmap_panel(
"tycho_do_collate_handle_block_candidate_time",
"Handle block candidate",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Collation Process Metrics", metrics)


def special_transactions_metrics() -> RowPanel:
metrics = [
create_heatmap_panel(
"tycho_do_collate_execute_tick_time",
"Execute Tick special transactions",
Expand All @@ -723,6 +773,17 @@ def collator_do_collate() -> RowPanel:
"Execute Tock special transactions",
labels=['workchain=~"$workchain"'],
),
]
return create_row("Special Transactions Metrics", metrics)


def collator_process_metrics() -> RowPanel:
metrics = [
create_heatmap_panel(
"tycho_do_collate_handle_block_candidate_time",
"Handle block candidate",
labels=['workchain=~"$workchain"'],
),
create_heatmap_panel(
"tycho_collator_update_mc_data_time",
"update mc data",
Expand Down Expand Up @@ -768,6 +829,12 @@ def collator_do_collate() -> RowPanel:
"tycho_collator_extract_master_block_subgraph_time",
"extract master block subgraph",
),
]
return create_row("Collator Process Metrics", metrics)


def sync_metrics() -> RowPanel:
metrics = [
create_heatmap_panel(
"tycho_collator_send_blocks_to_sync_time", "send blocks to sync total"
),
Expand All @@ -782,6 +849,12 @@ def collator_do_collate() -> RowPanel:
"tycho_collator_send_blocks_to_sync_commit_diffs_time",
"send blocks to sync: commit diffs",
),
]
return create_row("Sync Metrics", metrics)


def adapter_metrics() -> RowPanel:
metrics = [
create_heatmap_panel(
"tycho_collator_adapter_on_block_accepted_time", "on_block_accepted"
),
Expand All @@ -790,7 +863,7 @@ def collator_do_collate() -> RowPanel:
"on_block_accepted_external",
),
]
return create_row("Collator Do Collate", metrics)
return create_row("Adapter Metrics", metrics)


def mempool() -> RowPanel:
Expand Down Expand Up @@ -1027,7 +1100,17 @@ def templates() -> Templating:
panels=[
core_bc(),
core_block_strider(),
collator_do_collate(),
collator_params_metrics(),
block_metrics(),
execution_metrics(),
message_metrics(),
queue_metrics(),
time_metrics(),
collation_process_metrics(),
special_transactions_metrics(),
collator_process_metrics(),
sync_metrics(),
adapter_metrics(),
collator_finalize_block(),
collator_execution_manager(),
mempool(),
Expand Down
2 changes: 1 addition & 1 deletion storage/src/store/shard_state/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,13 @@ impl ShardStateStorage {
if handle.meta().has_state() {
return Ok(false);
}

let _gc_lock = self.gc_lock.lock().await;

// Double check if the state is already stored
if handle.meta().has_state() {
return Ok(false);
}
let _hist = HistogramGuard::begin("tycho_storage_state_store_time");

let block_id = *handle.id();
let raw_db = self.db.rocksdb().clone();
Expand Down

0 comments on commit 8e06c1f

Please sign in to comment.