diff --git a/README.md b/README.md index 2d89514b8..2df0e30bb 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ 3. **Fat-Client Mode**: The client retrieves larger contiguous chunks of the matrix on each block via RPC calls to an Avail node, and stores them on the DHT. This mode is activated when the `block_matrix_partition` parameter is set in the config file, and is mainly used with the `disable_proof_verification` flag because of the resource cost of cell validation. **IMPORTANT**: disabling proof verification introduces a trust assumption towards the node, that the data provided is correct. -4. **Crawl-Client Mode**: The client crawls cells from DHT for entire block, and calculates success rate. Every block processing is delayed by `crawl_block_delay` parameter. Delay should be enough so crawling of large block can be compensated. This mode is enabled by setting `crawl_block` parameter to `true`. Success rate is emmited in logs and metrics. +4. **Crawl-Client Mode**: The client crawls cells from DHT for entire block, and calculates success rate. Every block processing is delayed by `crawl_block_delay` parameter. Delay should be enough so crawling of large block can be compensated. This mode is enabled by setting `crawl_block` parameter to `true`. Success rate is emmited in logs and metrics. Crawler can be run in three modes: `cells`, `rows` and `both`. Default mode is `cells`, and it can be configured by `crawl_block_mode` parameter. ## Installation diff --git a/src/crawl_client.rs b/src/crawl_client.rs index b712f3ae2..720762f50 100644 --- a/src/crawl_client.rs +++ b/src/crawl_client.rs @@ -1,7 +1,7 @@ use crate::{ network::Client, telemetry::metrics::{MetricEvent, Metrics}, - types::{self, delay_for}, + types::{self, delay_for, CrawlMode}, }; use avail_subxt::primitives::Header; use kate_recovery::matrix::Partition; @@ -19,6 +19,7 @@ pub async fn run( network_client: Client, delay: u64, metrics: Metrics, + mode: CrawlMode, ) { info!("Starting crawl client..."); @@ -28,7 +29,7 @@ pub async fn run( tokio::time::sleep(seconds).await; } let block_number = header.number; - info!("Crawling block {block_number}..."); + info!(block_number, "Crawling block..."); let start = SystemTime::now(); @@ -37,27 +38,51 @@ pub async fn run( continue; }; - let positions = block - .dimensions - .iter_extended_partition_positions(&ENTIRE_BLOCK) - .collect::>(); + if mode == CrawlMode::Cells || mode == CrawlMode::Both { + let positions = block + .dimensions + .iter_extended_partition_positions(&ENTIRE_BLOCK) + .collect::>(); - let total = positions.len(); - let (fetched, _) = network_client - .fetch_cells_from_dht(block_number, &positions) - .await; + let total = positions.len(); + let (fetched, _) = network_client + .fetch_cells_from_dht(block_number, &positions) + .await; + + let success_rate = fetched.len() as f64 / total as f64; + info!( + block_number, + "Fetched {fetched} cells of {total}, success rate: {success_rate}", + fetched = fetched.len(), + ); + metrics.record(MetricEvent::CrawlCellsSuccessRate(success_rate)); + } + + if mode == CrawlMode::Rows || mode == CrawlMode::Both { + let dimensions = block.dimensions; + let rows: Vec = (0..dimensions.extended_rows()).step_by(2).collect(); + let total = rows.len(); + let fetched = network_client + .fetch_rows_from_dht(block_number, &dimensions, &rows) + .await + .iter() + .step_by(2) + .flatten() + .count(); + + let success_rate = fetched as f64 / total as f64; + info!( + block_number, + "Fetched {fetched} rows of {total}, success rate: {success_rate}" + ); + metrics.record(MetricEvent::CrawlRowsSuccessRate(success_rate)); + } let elapsed = start .elapsed() .map(|elapsed| format!("{elapsed:?}")) .unwrap_or("unknown".to_string()); - let success_rate = fetched.len() as f64 / total as f64; - info!( - "Block {block_number}, fetched {fetched} of {total}, success rate: {success_rate}, elapsed: {elapsed}", - fetched = fetched.len(), - ); - - metrics.record(MetricEvent::CrawlCellsSuccessRate(success_rate)); + info!(block_number, "Crawling block finished in {elapsed}") } } diff --git a/src/main.rs b/src/main.rs index fe0783bee..f88dcc6e7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -314,12 +314,14 @@ async fn run(error_sender: Sender) -> Result<()> { if cfg.crawl_block { let delay = cfg.crawl_block_delay; + let mode = cfg.crawl_block_mode.clone(); let message_rx = message_tx.subscribe(); tokio::task::spawn(crawl_client::run( message_rx, network_client.clone(), delay, lc_metrics.clone(), + mode, )); } diff --git a/src/telemetry/metrics.rs b/src/telemetry/metrics.rs index 0d4fc4f59..4ebd7626b 100644 --- a/src/telemetry/metrics.rs +++ b/src/telemetry/metrics.rs @@ -21,6 +21,7 @@ pub struct Metrics { dht_put_rows_success: Gauge, kad_routing_table_peer_num: Gauge, crawl_cells_success_rate: Gauge, + crawl_rows_success_rate: Gauge, } pub enum MetricEvent { @@ -37,6 +38,7 @@ pub enum MetricEvent { DHTPutRowsSuccess(f64), KadRoutingTablePeerNum(u32), CrawlCellsSuccessRate(f64), + CrawlRowsSuccessRate(f64), } impl Metrics { @@ -133,6 +135,13 @@ impl Metrics { crawl_cells_success_rate.clone(), ); + let crawl_rows_success_rate = Gauge::default(); + sub_reg.register( + "crawl_rows_sucess_rate", + "Success rate of the crawling DHT rows operation", + crawl_rows_success_rate.clone(), + ); + Self { session_block_counter, total_block_number, @@ -147,6 +156,7 @@ impl Metrics { dht_put_rows_success, kad_routing_table_peer_num, crawl_cells_success_rate, + crawl_rows_success_rate, } } @@ -191,6 +201,9 @@ impl Metrics { MetricEvent::CrawlCellsSuccessRate(num) => { self.crawl_cells_success_rate.set(num.into()); }, + MetricEvent::CrawlRowsSuccessRate(num) => { + self.crawl_rows_success_rate.set(num.into()); + }, } } } diff --git a/src/types.rs b/src/types.rs index 397975731..8a19d6ace 100644 --- a/src/types.rs +++ b/src/types.rs @@ -298,6 +298,8 @@ pub struct RuntimeConfig { pub crawl_block: bool, /// Crawl block delay. Increment to ensure large block crawling (default: 20) pub crawl_block_delay: u64, + /// Crawl block mode. Available modes are "cells", "rows" and "both" (default: "cells") + pub crawl_block_mode: CrawlMode, } pub struct Delay(Option); @@ -487,6 +489,14 @@ impl From<&RuntimeConfig> for AppClientConfig { } } +#[derive(Serialize, Deserialize, PartialEq, Clone, Debug)] +#[serde(rename_all = "kebab-case")] +pub enum CrawlMode { + Rows, + Cells, + Both, +} + impl Default for RuntimeConfig { fn default() -> Self { RuntimeConfig { @@ -535,6 +545,7 @@ impl Default for RuntimeConfig { max_kad_provided_keys: 1024, crawl_block: false, crawl_block_delay: 20, + crawl_block_mode: CrawlMode::Cells, } } }