diff --git a/cli/src/tools/gen_account.rs b/cli/src/tools/gen_account.rs index 24631f658..0b878bdfd 100644 --- a/cli/src/tools/gen_account.rs +++ b/cli/src/tools/gen_account.rs @@ -88,7 +88,7 @@ struct MultisigCmd { #[clap(short, long)] lifetime: Option, - /// Use SetcodeMultisig instead of SafeMultisig + /// Use `SetcodeMultisig` instead of `SafeMultisig` #[clap(short, long)] updatable: bool, } diff --git a/cli/src/tools/storage_cli.rs b/cli/src/tools/storage_cli.rs index 16b76820c..db1960eb0 100644 --- a/cli/src/tools/storage_cli.rs +++ b/cli/src/tools/storage_cli.rs @@ -1,3 +1,4 @@ +#![allow(clippy::print_stdout)] // it's a CLI tool use std::path::PathBuf; use anyhow::{Context, Result}; @@ -126,8 +127,8 @@ impl BlockCmd { println!("Found block full {}\n", &self.block_id); println!("Block is link: {}\n", is_link); - println!("Block hex {}\n", hex::encode(&block)); - println!("Block proof {}\n", hex::encode(&proof)); + println!("Block hex {}\n", hex::encode(block)); + println!("Block proof {}\n", hex::encode(proof)); } _ => { println!("Found block empty {}\n", &self.block_id); @@ -168,11 +169,11 @@ impl BlockCmd { println!("Found block full {}\n", &self.block_id); println!("Block is link: {}\n", is_link); - println!("Block hex {}\n", hex::encode(&block)); - println!("Block proof {}\n", hex::encode(&proof)); + println!("Block hex {}\n", hex::encode(block)); + println!("Block proof {}\n", hex::encode(proof)); } _ => { - println!("Found block empty {}\n", &self.block_id) + println!("Found block empty {}\n", &self.block_id); } }; Ok::<(), anyhow::Error>(()) diff --git a/core/src/block_strider/subscriber/gc_subscriber.rs b/core/src/block_strider/subscriber/gc_subscriber.rs index 9774890e9..15e39fd6e 100644 --- a/core/src/block_strider/subscriber/gc_subscriber.rs +++ b/core/src/block_strider/subscriber/gc_subscriber.rs @@ -7,10 +7,12 @@ use anyhow::Result; use everscale_types::models::BlockId; use rand::Rng; use scopeguard::defer; +use tokio::select; use tokio::sync::watch; use tokio::task::AbortHandle; use tycho_block_util::block::BlockStuff; use tycho_storage::{BlocksGcType, Storage}; +use tycho_util::metrics::HistogramGuard; use crate::block_strider::{ BlockSubscriber, BlockSubscriberContext, StateSubscriber, StateSubscriberContext, @@ -206,6 +208,7 @@ impl GcSubscriber { #[tracing::instrument(skip_all)] async fn states_gc(mut trigger_rx: TriggerRx, storage: Storage) { + use tokio::time; let Some(config) = storage.config().states_gc else { tracing::warn!("manager disabled"); return; @@ -215,34 +218,37 @@ impl GcSubscriber { tracing::info!("manager stopped"); } - let mut last_tiggered_at = None::; + let mut interval = time::interval(config.interval); + let mut last_triggered_at = None; - while trigger_rx.changed().await.is_ok() { - match last_tiggered_at { - // Wait for an offset before the first GC but after the first trigger - None => { - let offset = if config.random_offset { - rand::thread_rng().gen_range(Duration::ZERO..config.interval) - } else { - config.interval - }; - tokio::time::sleep(offset).await - } - // Wait to maintaint the interval between GCs - Some(last) => { - if last.elapsed() < config.interval { - tokio::time::sleep_until((last + config.interval).into()).await; - } + loop { + // either the interval has ticked or a new trigger has arrived + select! { + _ = interval.tick() => {}, + Ok(_) = trigger_rx.changed() => {}, + else => break, + } + + let now = Instant::now(); + + if let Some(last) = last_triggered_at { + let next_gc: Instant = last + config.interval; + if next_gc > now { + time::sleep_until(next_gc.into()).await; } + } else if config.random_offset { + let offset = rand::thread_rng().gen_range(Duration::ZERO..config.interval); + time::sleep(offset).await; } - last_tiggered_at = Some(Instant::now()); - // Get the most recent trigger + last_triggered_at = Some(Instant::now()); + let Some(trigger) = trigger_rx.borrow_and_update().clone() else { continue; }; tracing::debug!(?trigger); + let _hist = HistogramGuard::begin("tycho_gc_states_time"); if let Err(e) = storage .shard_state_storage() .remove_outdated_states(trigger.mc_block_id.seqno) @@ -250,6 +256,7 @@ impl GcSubscriber { { tracing::error!("failed to remove outdated states: {e:?}"); } + metrics::gauge!("tycho_gc_states_seqno").set(trigger.mc_block_id.seqno as f64); } } } diff --git a/scripts/gen-dashboard.py b/scripts/gen-dashboard.py index d4c008c05..68f7c33e9 100644 --- a/scripts/gen-dashboard.py +++ b/scripts/gen-dashboard.py @@ -23,6 +23,7 @@ expr_sum_rate, heatmap_panel, yaxis, + expr_operator, ) @@ -231,7 +232,7 @@ def net_conn_manager() -> RowPanel: ), create_gauge_panel("tycho_net_known_peers", "Number of currently known peers"), ] - return create_row("Network Connection Manager", metrics) + return create_row("network: Connection Manager", metrics) def net_request_handler() -> RowPanel: @@ -255,7 +256,7 @@ def net_request_handler() -> RowPanel: "tycho_net_req_handlers", "Current number of incoming request handlers" ), ] - return create_row("Network Request Handler", metrics) + return create_row("network: Request Handler", metrics) def net_peer() -> RowPanel: @@ -279,7 +280,7 @@ def net_peer() -> RowPanel: "tycho_net_out_messages", "Current number of outgoing messages" ), ] - return create_row("Network Peers", metrics) + return create_row("network: Peers", metrics) def net_dht() -> RowPanel: @@ -312,7 +313,7 @@ def net_dht() -> RowPanel: "Number of incoming DHT Store requests over time", ), ] - return create_row("Network DHT", metrics) + return create_row("network: DHT", metrics) def core_block_strider() -> RowPanel: @@ -384,8 +385,29 @@ def core_block_strider() -> RowPanel: create_heatmap_panel( "tycho_storage_state_update_time", "Time to write state update to rocksdb" ), + create_heatmap_panel( + "tycho_storage_state_store_time", "Time to store state with cell traversal" + ), + create_heatmap_panel("tycho_gc_states_time", "Time to garbage collect state"), + timeseries_panel( + targets=[ + target( + expr_operator( + Expr( + metric="tycho_do_collate_block_seqno", + label_selectors=['workchain="-1"'], + ), + "- on(instance, job)", + Expr("tycho_gc_states_seqno"), + ), + legend_format="{{instance}}", + ) + ], + unit="Blocks", + title="GC lag", + ), ] - return create_row("Core Block Strider", metrics) + return create_row("block strider: Core Metrics", metrics) def jrpc() -> RowPanel: @@ -499,27 +521,31 @@ def collator_finalize_block() -> RowPanel: labels=['workchain=~"$workchain"'], ), ] - return create_row("Finalize Block", metrics) + return create_row("collator: Finalize Block", metrics) -def collator_do_collate() -> RowPanel: +def collator_params_metrics() -> RowPanel: metrics = [ create_gauge_panel( - "tycho_do_collate_msgs_exec_params_set_size", - "Params: msgs set size", + "tycho_do_collate_msgs_exec_params_set_size", "Params: msgs set size" ), create_gauge_panel( "tycho_do_collate_msgs_exec_params_min_exts_per_set", "Params: min externals per set", ), create_gauge_panel( - "tycho_do_collate_msgs_exec_params_group_limit", - "Params: group limit", + "tycho_do_collate_msgs_exec_params_group_limit", "Params: group limit" ), create_gauge_panel( "tycho_do_collate_msgs_exec_params_group_vert_size", "Params: group vertical size limit", ), + ] + return create_row("collator: Parameters", metrics) + + +def block_metrics() -> RowPanel: + metrics = [ create_counter_panel( "tycho_do_collate_blocks_count", "Blocks rate", @@ -540,6 +566,12 @@ def collator_do_collate() -> RowPanel: "Number of blocks with limits reached", labels=['workchain=~"$workchain"'], ), + ] + return create_row("collator: Block Metrics", metrics) + + +def collator_execution_metrics() -> RowPanel: + metrics = [ create_gauge_panel( "tycho_do_collate_exec_msgs_sets_per_block", "Number of msgs sets per block", @@ -570,6 +602,12 @@ def collator_do_collate() -> RowPanel: "MAX exec time in group", labels=['workchain=~"$workchain"'], ), + ] + return create_row("collator: Execution Metrics", metrics) + + +def collator_message_metrics() -> RowPanel: + metrics = [ create_counter_panel( "tycho_do_collate_msgs_exec_count_all", "All executed msgs count", @@ -625,23 +663,32 @@ def collator_do_collate() -> RowPanel: "Executed NewInt msgs count", labels=['workchain=~"$workchain"'], ), + ] + return create_row("collator: Message Metrics", metrics) + + +def collator_queue_metrics() -> RowPanel: + metrics = [ create_gauge_panel( "tycho_session_iterator_messages_all", "Number of internals in the iterator", labels=['workchain=~"$workchain"'], ), create_gauge_panel( - "tycho_do_collate_int_msgs_queue_calc", - "Calculated Internal queue len", + "tycho_do_collate_int_msgs_queue_calc", "Calculated Internal queue len" ), create_counter_panel( - "tycho_do_collate_int_enqueue_count", - "Enqueued int msgs count", + "tycho_do_collate_int_enqueue_count", "Enqueued int msgs count" ), create_counter_panel( - "tycho_do_collate_int_dequeue_count", - "Dequeued int msgs count", + "tycho_do_collate_int_dequeue_count", "Dequeued int msgs count" ), + ] + return create_row("collator: Queue Metrics", metrics) + + +def collator_time_metrics() -> RowPanel: + metrics = [ create_gauge_panel( "tycho_do_collate_block_time_diff", "Block time diff", @@ -658,6 +705,12 @@ def collator_do_collate() -> RowPanel: "Collation flow overhead", labels=['workchain=~"$workchain"'], ), + ] + return create_row("collator: Time diffs", metrics) + + +def collator_core_operations_metrics() -> RowPanel: + metrics = [ create_heatmap_panel( "tycho_do_collate_total_time", "Total collation time", @@ -680,17 +733,17 @@ def collator_do_collate() -> RowPanel: ), create_heatmap_panel( "tycho_do_collate_fill_msgs_total_time", - "Fill messages time", + "Execution time: incl Fill messages time", labels=['workchain=~"$workchain"'], ), create_heatmap_panel( "tycho_do_collate_exec_msgs_total_time", - "Execute messages time", + "Execution time: incl Execute messages time", labels=['workchain=~"$workchain"'], ), create_heatmap_panel( "tycho_do_collate_process_txs_total_time", - "Process transactions time", + "Execution time: incl Process transactions time", labels=['workchain=~"$workchain"'], ), create_heatmap_panel( @@ -713,61 +766,80 @@ def collator_do_collate() -> RowPanel: "Handle block candidate", labels=['workchain=~"$workchain"'], ), - create_heatmap_panel( - "tycho_do_collate_execute_tick_time", - "Execute Tick special transactions", - labels=['workchain=~"$workchain"'], - ), - create_heatmap_panel( - "tycho_do_collate_execute_tock_time", - "Execute Tock special transactions", - labels=['workchain=~"$workchain"'], - ), + + ] + return create_row("collator: Core Operations Metrics", metrics) + + +def collator_misc_operations_metrics() -> RowPanel: + metrics = [ create_heatmap_panel( "tycho_collator_update_mc_data_time", - "update mc data", + "Update mc data", labels=['workchain=~"$workchain"'], ), create_heatmap_panel( "tycho_collator_import_next_anchor_time", - "import next anchor time", + "Import next anchor time", labels=['workchain=~"$workchain"'], ), create_heatmap_panel( "tycho_collator_try_collate_next_master_block_time", - "try collate next master block", + "Try collate next master block", ), create_heatmap_panel( "tycho_collator_try_collate_next_shard_block_without_do_collate_time", - "try collate next shard block", + "Try collate next shard block", ), create_heatmap_panel( "tycho_collator_refresh_collation_sessions_time", - "refresh collation sessions", + "Refresh collation sessions", ), create_heatmap_panel( "tycho_collator_process_collated_block_candidate_time", - "process collated block candidate", + "Process collated block candidate", ), create_heatmap_panel( "tycho_collator_update_last_collated_chain_time_and_check_should_collate_mc_block_time", - "update last collated chain time and check should collate mc block", + "Update last collated chain time and check should collate mc block", ), create_heatmap_panel( "tycho_collator_enqueue_mc_block_collation_time", - "enqueue mc block collation", + "Enqueue mc block collation", ), create_heatmap_panel( - "tycho_collator_process_validated_block_time", "process validated block" + "tycho_collator_process_validated_block_time", "Process validated block" ), create_heatmap_panel( "tycho_collator_process_valid_master_block_time", - "process valid master block", + "Process valid master block", ), create_heatmap_panel( "tycho_collator_extract_master_block_subgraph_time", - "extract master block subgraph", + "Extract master block subgraph", ), + ] + return create_row("collator: Misc Operations Metrics", metrics) + + +def collator_special_transactions_metrics() -> RowPanel: + metrics = [ + create_heatmap_panel( + "tycho_do_collate_execute_tick_time", + "Execute Tick special transactions", + labels=['workchain=~"$workchain"'], + ), + create_heatmap_panel( + "tycho_do_collate_execute_tock_time", + "Execute Tock special transactions", + labels=['workchain=~"$workchain"'], + ), + ] + return create_row("collator: Special Transactions Metrics", metrics) + + +def collator_sync_metrics() -> RowPanel: + metrics = [ create_heatmap_panel( "tycho_collator_send_blocks_to_sync_time", "send blocks to sync total" ), @@ -782,6 +854,12 @@ def collator_do_collate() -> RowPanel: "tycho_collator_send_blocks_to_sync_commit_diffs_time", "send blocks to sync: commit diffs", ), + ] + return create_row("collator: Sync Metrics", metrics) + + +def collator_adapter_metrics() -> RowPanel: + metrics = [ create_heatmap_panel( "tycho_collator_adapter_on_block_accepted_time", "on_block_accepted" ), @@ -790,7 +868,7 @@ def collator_do_collate() -> RowPanel: "on_block_accepted_external", ), ] - return create_row("Collator Do Collate", metrics) + return create_row("collator: Adapter Metrics", metrics) def mempool() -> RowPanel: @@ -969,7 +1047,7 @@ def collator_execution_manager() -> RowPanel: yaxis(UNITS.SECONDS), ), ] - return create_row("Collator Execution Manager", metrics) + return create_row("collator: Execution Manager", metrics) def allocator_stats() -> RowPanel: @@ -1027,7 +1105,17 @@ def templates() -> Templating: panels=[ core_bc(), core_block_strider(), - collator_do_collate(), + collator_params_metrics(), + block_metrics(), + collator_execution_metrics(), + collator_message_metrics(), + collator_queue_metrics(), + collator_time_metrics(), + collator_core_operations_metrics(), + collator_misc_operations_metrics(), + collator_special_transactions_metrics(), + collator_sync_metrics(), + collator_adapter_metrics(), collator_finalize_block(), collator_execution_manager(), mempool(), @@ -1047,6 +1135,7 @@ def templates() -> Templating: timezone="browser", ).auto_panel_ids() + # open file as stream if len(sys.argv) > 1: stream = open(sys.argv[1], "w") diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index f442fe4db..22696d209 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -100,13 +100,13 @@ impl ShardStateStorage { if handle.meta().has_state() { return Ok(false); } - let _gc_lock = self.gc_lock.lock().await; // Double check if the state is already stored if handle.meta().has_state() { return Ok(false); } + let _hist = HistogramGuard::begin("tycho_storage_state_store_time"); let block_id = *handle.id(); let raw_db = self.db.rocksdb().clone();