From 3580738a8180ab3593aeb26a1648a686ecbe375d Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Fri, 9 Feb 2024 13:09:37 +0100 Subject: [PATCH 01/19] feat(storage): initial implementation --- Cargo.lock | 139 ++++++++++++++++ constansts/Cargo.toml | 11 ++ constansts/src/archive.rs | 1 + constansts/src/lib.rs | 1 + storage/Cargo.toml | 14 +- storage/src/config.rs | 55 +++++++ storage/src/lib.rs | 249 +++++++++++++++++++++++++++++ storage/src/migrations/mod.rs | 50 ++++++ storage/src/refcount.rs | 88 ++++++++++ storage/src/tables.rs | 293 ++++++++++++++++++++++++++++++++++ 10 files changed, 899 insertions(+), 2 deletions(-) create mode 100644 constansts/Cargo.toml create mode 100644 constansts/src/archive.rs create mode 100644 constansts/src/lib.rs create mode 100644 storage/src/config.rs create mode 100644 storage/src/migrations/mod.rs create mode 100644 storage/src/refcount.rs create mode 100644 storage/src/tables.rs diff --git a/Cargo.lock b/Cargo.lock index c12e87bba..ed0cdd118 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,6 +274,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bytesize" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" +dependencies = [ + "serde", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -412,6 +421,12 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + [[package]] name = "cpufeatures" version = "0.2.12" @@ -454,6 +469,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -589,6 +614,12 @@ dependencies = [ "signature", ] +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + [[package]] name = "errno" version = "0.3.8" @@ -669,6 +700,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "fdlimit" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182f7dbc2ef73d9ef67351c5fbbea084729c48362d3ce9dd44c28e32e277fe5" +dependencies = [ + "libc", + "thiserror", +] + [[package]] name = "fiat-crypto" version = "0.2.7" @@ -966,6 +1007,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1321,6 +1371,26 @@ dependencies = [ "bitflags 2.5.0", ] +[[package]] +name = "rayon" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rcgen" version = "0.11.3" @@ -1416,6 +1486,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rlimit" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3560f70f30a0f16d11d01ed078a07740fe6b489667abc7c7b029155d9f21c3d8" +dependencies = [ + "libc", +] + [[package]] name = "rocksdb" version = "0.21.0" @@ -1594,6 +1673,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + [[package]] name = "signature" version = "2.2.0" @@ -1711,6 +1799,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "sysinfo" +version = "0.30.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb4f3438c8f6389c864e61221cbc97e9bca98b4daf39a5beb7bea660f528bb2" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "windows", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -1866,7 +1969,9 @@ dependencies = [ "libc", "mio", "num_cpus", + "parking_lot", "pin-project-lite", + "signal-hook-registry", "socket2", "tokio-macros", "windows-sys 0.48.0", @@ -2043,6 +2148,10 @@ dependencies = [ "weedb", ] +[[package]] +name = "tycho-constansts" +version = "0.1.0" + [[package]] name = "tycho-core" version = "0.0.1" @@ -2110,7 +2219,18 @@ dependencies = [ name = "tycho-storage" version = "0.0.1" dependencies = [ + "anyhow", + "bytesize", + "fdlimit", + "humantime", + "rlimit", + "serde", + "sysinfo", + "tokio", + "tracing", + "tycho-constansts", "tycho-util", + "weedb", ] [[package]] @@ -2330,6 +2450,25 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/constansts/Cargo.toml b/constansts/Cargo.toml new file mode 100644 index 000000000..221b1e130 --- /dev/null +++ b/constansts/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tycho-constansts" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[lints] +workspace = true diff --git a/constansts/src/archive.rs b/constansts/src/archive.rs new file mode 100644 index 000000000..5d6d18af6 --- /dev/null +++ b/constansts/src/archive.rs @@ -0,0 +1 @@ +pub const ARCHIVE_PREFIX: [u8; 4] = u32::to_le_bytes(0xae8fdd01); diff --git a/constansts/src/lib.rs b/constansts/src/lib.rs new file mode 100644 index 000000000..4193e3e11 --- /dev/null +++ b/constansts/src/lib.rs @@ -0,0 +1 @@ +pub mod archive; diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 6eaad11a3..ca1382a69 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -5,10 +5,20 @@ edition = "2021" description = "A unified storage interface." [dependencies] -# crates.io deps +anyhow = { version = "1"} + +weedb = "0.1.1" +rlimit = "0.10.1" +fdlimit = "0.3.0" +bytesize = { version = "1.3.0", features = ["serde"] } +tokio = { version = "1.36.0", features = ["full"] } +tracing = "0.1.40" +humantime = "2.1.0" +sysinfo = "0.30.5" +serde = { version = "1.0.196", features = ["derive"] } # local deps tycho-util = { path = "../util", version = "=0.0.1" } - +tycho-constansts = { path = "../constansts", version = "0.1" } [lints] workspace = true diff --git a/storage/src/config.rs b/storage/src/config.rs new file mode 100644 index 000000000..bd2d53b2d --- /dev/null +++ b/storage/src/config.rs @@ -0,0 +1,55 @@ +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields, default)] +pub struct DbOptions { + pub rocksdb_lru_capacity: ByteSize, + pub cells_cache_size: ByteSize, +} + +impl Default for DbOptions { + fn default() -> Self { + // Fetch the currently available memory in bytes + let available = { + let mut sys = sysinfo::System::new(); + sys.refresh_memory(); + sys.available_memory() + }; + + // Estimated memory usage of components other than cache: + // - 2 GiBs for write buffers(4 if we are out of luck and all memtables are being flushed at the same time) + // - 2 GiBs for indexer logic + // - 10 bits per cell for bloom filter. Realistic case is 100M cells, so 0.25 GiBs + // - 1/3 of all available memory is reserved for kernel buffers + const WRITE_BUFFERS: ByteSize = ByteSize::gib(2); + const INDEXER_LOGIC: ByteSize = ByteSize::gib(2); + const BLOOM_FILTER: ByteSize = ByteSize::mib(256); + let estimated_memory_usage = WRITE_BUFFERS + INDEXER_LOGIC + BLOOM_FILTER + available / 3; + + // Reduce the available memory by the fixed offset + let available = available + .checked_sub(estimated_memory_usage.as_u64()) + .unwrap_or_else(|| { + tracing::error!( + "Not enough memory for cache, using 1/4 of all available memory. \ + Tweak `db_options` in config to improve performance." + ); + available / 4 + }); + + // We will use 3/4 of available memory for the cells cache (at most 4 GB). + let cells_cache_size = std::cmp::min(ByteSize(available * 4 / 3), ByteSize::gib(4)); + + // The reset of the memory is used for LRU cache (at least 128 MB) + let rocksdb_lru_capacity = std::cmp::max( + ByteSize(available.saturating_sub(cells_cache_size.as_u64())), + ByteSize::mib(128), + ); + + Self { + rocksdb_lru_capacity, + cells_cache_size, + } + } +} diff --git a/storage/src/lib.rs b/storage/src/lib.rs index 8b1378917..a19a59a86 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -1 +1,250 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::thread::available_parallelism; +use anyhow::{Context, Result}; +use bytesize::ByteSize; +use weedb::{Caches, WeeDb}; + +pub use weedb::Stats as RocksdbStats; +pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table, UnboundedCfHandle}; + +use crate::config::DbOptions; + +pub mod refcount; +pub mod tables; + +mod config; +mod migrations; + +pub struct Db { + pub archives: Table, + pub block_handles: Table, + pub key_blocks: Table, + pub package_entries: Table, + pub shard_states: Table, + pub cells: Table, + pub node_states: Table, + pub prev1: Table, + pub prev2: Table, + pub next1: Table, + pub next2: Table, + + compaction_lock: tokio::sync::RwLock<()>, + inner: WeeDb, +} + +impl Db { + pub fn open(path: PathBuf, options: DbOptions) -> Result> { + tracing::info!( + rocksdb_lru_capacity = %options.rocksdb_lru_capacity, + cells_cache_size = %options.cells_cache_size, + "opening DB" + ); + + let limit = match fdlimit::raise_fd_limit() { + // New fd limit + Ok(fdlimit::Outcome::LimitRaised { to, .. }) => to, + // Current soft limit + _ => { + rlimit::getrlimit(rlimit::Resource::NOFILE) + .unwrap_or((256, 0)) + .0 + } + }; + + let caches_capacity = + std::cmp::max(options.rocksdb_lru_capacity, ByteSize::mib(256)).as_u64() as usize; + + let caches = Caches::with_capacity(caches_capacity); + let threads = available_parallelism()?.get(); + + let inner = WeeDb::builder(path, caches) + .options(|opts, _| { + opts.set_paranoid_checks(false); + + // bigger base level size - less compactions + // parallel compactions finishes faster - less write stalls + + opts.set_max_subcompactions(threads as u32 / 2); + + // io + opts.set_max_open_files(limit as i32); + + // logging + opts.set_log_level(rocksdb::LogLevel::Info); + opts.set_keep_log_file_num(2); + opts.set_recycle_log_file_num(2); + + // cf + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + // cpu + opts.set_max_background_jobs(std::cmp::max((threads as i32) / 2, 2)); + opts.increase_parallelism(threads as i32); + + opts.set_allow_concurrent_memtable_write(false); + opts.set_enable_write_thread_adaptive_yield(true); + + // debug + // NOTE: could slower everything a bit in some cloud environments. + // See: https://github.com/facebook/rocksdb/issues/3889 + // + // opts.enable_statistics(); + // opts.set_stats_dump_period_sec(600); + }) + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .build() + .context("Failed building db")?; + + migrations::apply(&inner).context("Failed to apply migrations")?; + + Ok(Arc::new(Self { + archives: inner.instantiate_table(), + block_handles: inner.instantiate_table(), + key_blocks: inner.instantiate_table(), + package_entries: inner.instantiate_table(), + shard_states: inner.instantiate_table(), + cells: inner.instantiate_table(), + node_states: inner.instantiate_table(), + prev1: inner.instantiate_table(), + prev2: inner.instantiate_table(), + next1: inner.instantiate_table(), + next2: inner.instantiate_table(), + compaction_lock: tokio::sync::RwLock::default(), + inner, + })) + } + + #[inline] + pub fn raw(&self) -> &Arc { + self.inner.raw() + } + + pub fn get_memory_usage_stats(&self) -> Result { + self.inner.get_memory_usage_stats().map_err(From::from) + } + + pub async fn delay_compaction(&self) -> tokio::sync::RwLockReadGuard<'_, ()> { + self.compaction_lock.read().await + } + + pub async fn trigger_compaction(&self) { + use std::time::Instant; + + let _compaction_guard = self.compaction_lock.write().await; + + let tables = [ + (self.block_handles.cf(), "block handles"), + (self.package_entries.cf(), "package entries"), + (self.archives.cf(), "archives"), + (self.shard_states.cf(), "shard states"), + (self.cells.cf(), "cells"), + ]; + + for (cf, title) in tables { + tracing::info!("{title} compaction started"); + + let instant = Instant::now(); + + let bound = Option::<[u8; 0]>::None; + self.raw().compact_range_cf(&cf, bound, bound); + + tracing::info!( + elapsed = %humantime::format_duration(instant.elapsed()), + "{title} compaction finished" + ); + } + } + + pub fn get_disk_usage(&self) -> Result> { + use std::thread; + + fn get_table_stats(db: &WeeDb) -> (ByteSize, ByteSize) { + let cf = db.instantiate_table::(); + let res: (usize, usize) = cf + .iterator(rocksdb::IteratorMode::Start) + .flat_map(|x| { + let x = match x { + Ok(x) => x, + Err(e) => { + tracing::error!("Error while iterating: {}", e); + return None; + } + }; + Some((x.0.len(), x.1.len())) + }) + .fold((0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); + + (ByteSize(res.0 as u64), ByteSize(res.1 as u64)) + } + + macro_rules! stats { + ($spawner:expr, $( $x:ident => $table:ty ),* ) => {{ + $( + let $x = $spawner.spawn(|| get_table_stats::<$table>(&self.inner)); + )* + stats!($($x),*) + } + }; + ( $( $x:ident),* ) => { + { + let mut temp_vec = Vec::new(); + $( + temp_vec.push({ + let $x = $x.join().map_err(|_|anyhow::anyhow!("Join error"))?; + DiskUsageInfo { + cf_name: stringify!($x).to_string(), + keys_total: $x.0, + values_total: $x.1, + } + }); + )* + return Ok(temp_vec) + } + }; + } + + let stats = thread::scope(|s| -> Result> { + stats!(s, + archives => tables::Archives, + block_handles => tables::BlockHandles, + key_blocks => tables::KeyBlocks, + package_entries => tables::PackageEntries, + shard_states => tables::ShardStates, + cells => tables::Cells, + node_states => tables::NodeStates, + prev1 => tables::Prev1, + prev2 => tables::Prev2, + next1 => tables::Next1, + next2 => tables::Next2 + ) + })?; + + Ok(stats) + } +} + +#[derive(Debug, Clone)] +pub struct DiskUsageInfo { + pub cf_name: String, + pub keys_total: ByteSize, + pub values_total: ByteSize, +} + +impl Drop for Db { + fn drop(&mut self) { + self.raw().cancel_all_background_work(true) + } +} diff --git a/storage/src/migrations/mod.rs b/storage/src/migrations/mod.rs new file mode 100644 index 000000000..a3213c917 --- /dev/null +++ b/storage/src/migrations/mod.rs @@ -0,0 +1,50 @@ +use anyhow::Result; +use weedb::{Migrations, Semver, VersionProvider, WeeDb}; + +use super::tables; + +// declare migrations here as `mod v2_1_x` + +const CURRENT_VERSION: Semver = [2, 1, 0]; + +pub fn apply(db: &WeeDb) -> Result<()> { + let migrations = + Migrations::with_target_version_and_provider(CURRENT_VERSION, NodeStateVersionProvider); + + // === register migrations here === + // v2_1_1::register(&mut migrations).context("Failed to register migrations")?; + + db.apply(migrations)?; + Ok(()) +} + +struct NodeStateVersionProvider; + +impl NodeStateVersionProvider { + const DB_VERSION_KEY: &'static str = "db_version"; +} + +impl VersionProvider for NodeStateVersionProvider { + fn get_version(&self, db: &weedb::WeeDb) -> Result, weedb::Error> { + let state = db.instantiate_table::(); + + let value = state.get(Self::DB_VERSION_KEY)?; + match value { + Some(version) => { + let slice = version.as_ref(); + slice + .try_into() + .map_err(|_| weedb::Error::InvalidDbVersion) + .map(Some) + } + None => Ok(None), + } + } + + fn set_version(&self, db: &weedb::WeeDb, version: Semver) -> Result<(), weedb::Error> { + let state = db.instantiate_table::(); + + state.insert(Self::DB_VERSION_KEY, version)?; + Ok(()) + } +} \ No newline at end of file diff --git a/storage/src/refcount.rs b/storage/src/refcount.rs new file mode 100644 index 000000000..7e7bfd5ab --- /dev/null +++ b/storage/src/refcount.rs @@ -0,0 +1,88 @@ +use std::cmp::Ordering; +use std::convert::TryInto; + +use weedb::rocksdb; +use weedb::rocksdb::compaction_filter::Decision; + +pub fn merge_operator( + _key: &[u8], + existing: Option<&[u8]>, + operands: &rocksdb::MergeOperands, +) -> Option> { + let (mut rc, mut payload) = existing.map_or((0, None), decode_value_with_rc); + for (delta, new_payload) in operands.into_iter().map(decode_value_with_rc) { + if payload.is_none() && delta > 0 { + payload = new_payload; + } + rc += delta; + } + + Some(match rc.cmp(&0) { + Ordering::Less => rc.to_le_bytes().to_vec(), + Ordering::Equal => Vec::new(), + Ordering::Greater => { + let payload = payload.unwrap_or(&[]); + let mut result = Vec::with_capacity(RC_BYTES + payload.len()); + result.extend_from_slice(&rc.to_le_bytes()); + result.extend_from_slice(payload); + result + } + }) +} + +pub fn compaction_filter(_level: u32, _key: &[u8], value: &[u8]) -> Decision { + if value.is_empty() { + Decision::Remove + } else { + Decision::Keep + } +} + +pub fn decode_value_with_rc(bytes: &[u8]) -> (RcType, Option<&[u8]>) { + let without_payload = match bytes.len().cmp(&RC_BYTES) { + std::cmp::Ordering::Greater => false, + std::cmp::Ordering::Equal => true, + std::cmp::Ordering::Less => return (0, None), + }; + + let rc = RcType::from_le_bytes(bytes[..RC_BYTES].try_into().unwrap()); + if rc <= 0 || without_payload { + (rc, None) + } else { + (rc, Some(&bytes[RC_BYTES..])) + } +} + +pub fn strip_refcount(bytes: &[u8]) -> Option<&[u8]> { + if bytes.len() < RC_BYTES { + return None; + } + if RcType::from_le_bytes(bytes[..RC_BYTES].try_into().unwrap()) > 0 { + Some(&bytes[RC_BYTES..]) + } else { + None + } +} + +pub fn has_value(bytes: &[u8]) -> bool { + bytes.len() >= RC_BYTES && RcType::from_le_bytes(bytes[..RC_BYTES].try_into().unwrap()) > 0 +} + +pub fn add_positive_refount(rc: u32, data: Option<&[u8]>, target: &mut Vec) { + target.extend_from_slice(&RcType::from(rc).to_le_bytes()); + if let Some(data) = data { + target.extend_from_slice(data); + } +} + +pub fn encode_positive_refcount(rc: u32) -> [u8; RC_BYTES] { + RcType::from(rc).to_le_bytes() +} + +pub fn encode_negative_refcount(rc: u32) -> [u8; RC_BYTES] { + (-RcType::from(rc)).to_le_bytes() +} + +type RcType = i64; + +const RC_BYTES: usize = std::mem::size_of::(); \ No newline at end of file diff --git a/storage/src/tables.rs b/storage/src/tables.rs new file mode 100644 index 000000000..10cdd1bfa --- /dev/null +++ b/storage/src/tables.rs @@ -0,0 +1,293 @@ +use bytesize::ByteSize; +use weedb::rocksdb::{ + BlockBasedIndexType, BlockBasedOptions, DBCompressionType, DataBlockIndexType, MergeOperands, + Options, ReadOptions, +}; +use weedb::{rocksdb, Caches, ColumnFamily}; + +use super::refcount; + +/// Stores prepared archives +/// - Key: `u32 (BE)` (archive id) +/// - Value: `Vec` (archive data) +pub struct Archives; +impl ColumnFamily for Archives { + const NAME: &'static str = "archives"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + optimize_for_level_compaction(opts, ByteSize::mib(512u64)); + + opts.set_merge_operator_associative("archive_data_merge", archive_data_merge); + opts.set_compression_type(DBCompressionType::Zstd); + } +} + +/// Maps block root hash to block meta +/// - Key: `[u8; 32]` +/// - Value: `BlockMeta` +pub struct BlockHandles; +impl ColumnFamily for BlockHandles { + const NAME: &'static str = "block_handles"; + + fn options(opts: &mut Options, caches: &Caches) { + optimize_for_level_compaction(opts, ByteSize::mib(512u64)); + + let mut block_factory = BlockBasedOptions::default(); + block_factory.set_block_cache(&caches.block_cache); + + block_factory.set_index_type(BlockBasedIndexType::HashSearch); + block_factory.set_data_block_index_type(DataBlockIndexType::BinaryAndHash); + block_factory.set_format_version(5); + + opts.set_block_based_table_factory(&block_factory); + optimize_for_point_lookup(opts, caches); + } + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +/// Maps seqno to key block id +/// - Key: `u32 (BE)` +/// - Value: `ton_block::BlockIdExt` +pub struct KeyBlocks; +impl ColumnFamily for KeyBlocks { + const NAME: &'static str = "key_blocks"; + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +/// Maps package entry id to entry data +/// - Key: `BlockIdShort (16 bytes), [u8; 32], package type (1 byte)` +/// - Value: `Vec` +pub struct PackageEntries; +impl ColumnFamily for PackageEntries { + const NAME: &'static str = "package_entries"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + opts.set_compression_type(DBCompressionType::Zstd); + + // This flag specifies that the implementation should optimize the filters + // mainly for cases where keys are found rather than also optimize for keys + // missed. This would be used in cases where the application knows that + // there are very few misses or the performance in the case of misses is not + // important. + // + // For now, this flag allows us to not store filters for the last level i.e + // the largest level which contains data of the LSM store. For keys which + // are hits, the filters in this level are not useful because we will search + // for the data anyway. NOTE: the filters in other levels are still useful + // even for key hit because they tell us whether to look in that level or go + // to the higher level. + // https://github.com/facebook/rocksdb/blob/81aeb15988e43c49952c795e32e5c8b224793589/include/rocksdb/advanced_options.h#L846 + opts.set_optimize_filters_for_hits(true); + } +} + +/// Maps BlockId to root cell hash +/// - Key: `BlockId` +/// - Value: `[u8; 32]` +pub struct ShardStates; +impl ColumnFamily for ShardStates { + const NAME: &'static str = "shard_states"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + opts.set_compression_type(DBCompressionType::Zstd); + } +} + +/// Stores cells data +/// - Key: `[u8; 32]` (cell repr hash) +/// - Value: `StorageCell` +pub struct Cells; +impl ColumnFamily for Cells { + const NAME: &'static str = "cells"; + + fn options(opts: &mut Options, caches: &Caches) { + opts.set_level_compaction_dynamic_level_bytes(true); + + opts.set_merge_operator_associative("cell_merge", refcount::merge_operator); + opts.set_compaction_filter("cell_compaction", refcount::compaction_filter); + + optimize_for_level_compaction(opts, ByteSize::gib(1u64)); + + let mut block_factory = BlockBasedOptions::default(); + block_factory.set_block_cache(&caches.block_cache); + block_factory.set_data_block_index_type(DataBlockIndexType::BinaryAndHash); + block_factory.set_whole_key_filtering(true); + block_factory.set_checksum_type(rocksdb::ChecksumType::NoChecksum); + + block_factory.set_bloom_filter(10.0, false); + block_factory.set_block_size(16 * 1024); + block_factory.set_format_version(5); + + opts.set_block_based_table_factory(&block_factory); + opts.set_optimize_filters_for_hits(true); + // option is set for cf + opts.set_compression_type(DBCompressionType::Lz4); + } +} + +/// Stores generic node parameters +/// - Key: `...` +/// - Value: `...` +pub struct NodeStates; +impl ColumnFamily for NodeStates { + const NAME: &'static str = "node_states"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + + opts.set_optimize_filters_for_hits(true); + optimize_for_point_lookup(opts, caches); + } +} + +/// Stores connections data +/// - Key: `[u8; 32]` (block root hash) +/// - Value: `BlockId (LE)` +pub struct Prev1; +impl ColumnFamily for Prev1 { + const NAME: &'static str = "prev1"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + + optimize_for_point_lookup(opts, caches); + } + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +/// Stores connections data +/// - Key: `[u8; 32]` (block root hash) +/// - Value: `BlockId (LE)` +pub struct Prev2; +impl ColumnFamily for Prev2 { + const NAME: &'static str = "prev2"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + + optimize_for_point_lookup(opts, caches); + } + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +/// Stores connections data +/// - Key: `[u8; 32]` (block root hash) +/// - Value: `BlockId (LE)` +pub struct Next1; +impl ColumnFamily for Next1 { + const NAME: &'static str = "next1"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + + optimize_for_point_lookup(opts, caches); + } + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +/// Stores connections data +/// - Key: `[u8; 32]` (block root hash) +/// - Value: `BlockId (LE)` +pub struct Next2; +impl ColumnFamily for Next2 { + const NAME: &'static str = "next2"; + + fn options(opts: &mut Options, caches: &Caches) { + default_block_based_table_factory(opts, caches); + + optimize_for_point_lookup(opts, caches); + } + + fn read_options(opts: &mut ReadOptions) { + opts.set_verify_checksums(false); + } +} + +fn archive_data_merge( + _: &[u8], + current_value: Option<&[u8]>, + operands: &MergeOperands, +) -> Option> { + use tycho_constansts::archive::ARCHIVE_PREFIX; + + let total_len: usize = operands.iter().map(|data| data.len()).sum(); + let mut result = Vec::with_capacity(ARCHIVE_PREFIX.len() + total_len); + + result.extend_from_slice(current_value.unwrap_or(&ARCHIVE_PREFIX)); + + for data in operands { + let data = data.strip_prefix(&ARCHIVE_PREFIX).unwrap_or(data); + result.extend_from_slice(data); + } + + Some(result) +} + +fn default_block_based_table_factory(opts: &mut Options, caches: &Caches) { + opts.set_level_compaction_dynamic_level_bytes(true); + let mut block_factory = BlockBasedOptions::default(); + block_factory.set_block_cache(&caches.block_cache); + block_factory.set_format_version(5); + opts.set_block_based_table_factory(&block_factory); +} + +// setting our shared cache instead of individual caches for each cf +fn optimize_for_point_lookup(opts: &mut Options, caches: &Caches) { + // https://github.com/facebook/rocksdb/blob/81aeb15988e43c49952c795e32e5c8b224793589/options/options.cc + // BlockBasedTableOptions block_based_options; + // block_based_options.data_block_index_type = + // BlockBasedTableOptions::kDataBlockBinaryAndHash; + // block_based_options.data_block_hash_table_util_ratio = 0.75; + // block_based_options.filter_policy.reset(NewBloomFilterPolicy(10)); + // block_based_options.block_cache = + // NewLRUCache(static_cast(block_cache_size_mb * 1024 * 1024)); + // table_factory.reset(new BlockBasedTableFactory(block_based_options)); + // memtable_prefix_bloom_size_ratio = 0.02; + // memtable_whole_key_filtering = true; + // + let mut block_factory = BlockBasedOptions::default(); + block_factory.set_data_block_index_type(DataBlockIndexType::BinaryAndHash); + block_factory.set_data_block_hash_ratio(0.75); + block_factory.set_bloom_filter(10.0, false); + block_factory.set_block_cache(&caches.block_cache); + opts.set_block_based_table_factory(&block_factory); + + opts.set_memtable_prefix_bloom_ratio(0.02); + opts.set_memtable_whole_key_filtering(true); +} + +fn optimize_for_level_compaction(opts: &mut Options, budget: ByteSize) { + opts.set_write_buffer_size(budget.as_u64() as usize / 4); + // this means we'll use 50% extra memory in the worst case, but will reduce + // write stalls. + opts.set_min_write_buffer_number_to_merge(2); + // this means we'll use 50% extra memory in the worst case, but will reduce + // write stalls. + opts.set_max_write_buffer_number(6); + // start flushing L0->L1 as soon as possible. each file on level0 is + // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than + // memtable_memory_budget. + opts.set_level_zero_file_num_compaction_trigger(2); + // doesn't really matter much, but we don't want to create too many files + opts.set_target_file_size_base(budget.as_u64() / 8); + // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast + opts.set_max_bytes_for_level_base(budget.as_u64()); +} From 4325b47aa4e449494b5b0794dbeef08180f97422 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Tue, 13 Feb 2024 17:27:19 +0100 Subject: [PATCH 02/19] feat: add block util From 7cd14c840b132524b646cddbb3aa9ba5f084889a Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Tue, 13 Feb 2024 17:27:19 +0100 Subject: [PATCH 03/19] feat: add storage, move db into it --- Cargo.lock | 36 +- consensus/Cargo.toml | 2 +- core/Cargo.toml | 2 +- storage/Cargo.toml | 37 +- storage/src/block_connection_storage/mod.rs | 141 +++ storage/src/block_handle_storage/mod.rs | 314 +++++++ storage/src/block_storage/mod.rs | 819 ++++++++++++++++++ storage/src/{ => db}/config.rs | 0 storage/src/{ => db}/migrations/mod.rs | 0 storage/src/db/mod.rs | 301 +++++++ storage/src/{ => db}/refcount.rs | 0 storage/src/{ => db}/tables.rs | 0 storage/src/lib.rs | 273 +----- storage/src/models/block_handle.rs | 100 +++ storage/src/models/block_meta.rs | 294 +++++++ storage/src/models/mod.rs | 5 + storage/src/node_state_storage/mod.rs | 137 +++ storage/src/runtime_storage/mod.rs | 23 + .../persistent_state_keeper.rs | 88 ++ .../src/shard_state_storage/cell_storage.rs | 728 ++++++++++++++++ .../src/shard_state_storage/cell_writer.rs | 430 +++++++++ .../src/shard_state_storage/entries_buffer.rs | 182 ++++ .../src/shard_state_storage/files_context.rs | 89 ++ storage/src/shard_state_storage/mod.rs | 378 ++++++++ .../replace_transaction.rs | 475 ++++++++++ .../shard_state_storage/shard_state_reader.rs | 527 +++++++++++ validator/Cargo.toml | 17 + 27 files changed, 5145 insertions(+), 253 deletions(-) create mode 100644 storage/src/block_connection_storage/mod.rs create mode 100644 storage/src/block_handle_storage/mod.rs create mode 100644 storage/src/block_storage/mod.rs rename storage/src/{ => db}/config.rs (100%) rename storage/src/{ => db}/migrations/mod.rs (100%) create mode 100644 storage/src/db/mod.rs rename storage/src/{ => db}/refcount.rs (100%) rename storage/src/{ => db}/tables.rs (100%) create mode 100644 storage/src/models/block_handle.rs create mode 100644 storage/src/models/block_meta.rs create mode 100644 storage/src/models/mod.rs create mode 100644 storage/src/node_state_storage/mod.rs create mode 100644 storage/src/runtime_storage/mod.rs create mode 100644 storage/src/runtime_storage/persistent_state_keeper.rs create mode 100644 storage/src/shard_state_storage/cell_storage.rs create mode 100644 storage/src/shard_state_storage/cell_writer.rs create mode 100644 storage/src/shard_state_storage/entries_buffer.rs create mode 100644 storage/src/shard_state_storage/files_context.rs create mode 100644 storage/src/shard_state_storage/mod.rs create mode 100644 storage/src/shard_state_storage/replace_transaction.rs create mode 100644 storage/src/shard_state_storage/shard_state_reader.rs create mode 100644 validator/Cargo.toml diff --git a/Cargo.lock b/Cargo.lock index ed0cdd118..e6bd19d41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -620,6 +620,12 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.8" @@ -1276,6 +1282,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "quick_cache" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c20af3800cee5134b79a3bd4a3d4b583c16ccfa5f53338f46400851a5b3819" +dependencies = [ + "ahash", + "equivalent", + "hashbrown", + "parking_lot", +] + [[package]] name = "quinn" version = "0.10.2" @@ -2217,19 +2235,33 @@ dependencies = [ [[package]] name = "tycho-storage" -version = "0.0.1" +version = "0.1.0" dependencies = [ "anyhow", + "arc-swap", + "bumpalo", + "bytes", "bytesize", + "crc", + "dashmap", + "everscale-types", "fdlimit", + "hex", "humantime", + "libc", + "parking_lot", + "parking_lot_core", + "quick_cache", "rlimit", "serde", + "sha2", + "smallvec", "sysinfo", + "thiserror", "tokio", "tracing", + "tycho-block-util", "tycho-constansts", - "tycho-util", "weedb", ] diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index ba9255877..45e61b0ec 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -16,7 +16,7 @@ weedb = "0.1" # local deps tycho-network = { path = "../network", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "=0.0.1" } +tycho-storage = { path = "../storage", version = "0.1" } tycho-util = { path = "../util", version = "=0.0.1" } [dev-dependencies] diff --git a/core/Cargo.toml b/core/Cargo.toml index d9fab0eff..27dcdcd78 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -9,7 +9,7 @@ description = "Basic functionality of peer." # local deps tycho-network = { path = "../network", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "=0.0.1" } +tycho-storage = { path = "../storage", version = "0.1" } tycho-util = { path = "../util", version = "=0.0.1" } [lints] diff --git a/storage/Cargo.toml b/storage/Cargo.toml index ca1382a69..372e6925a 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -1,24 +1,41 @@ [package] name = "tycho-storage" -version = "0.0.1" +version = "0.1.0" edition = "2021" -description = "A unified storage interface." + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = { version = "1"} +everscale-types = "0.1.0-rc.6" +anyhow = "1.0.79" +bytes = "1.5.0" + +tokio = { version = "1.36.0", features = ["full"] } +tracing = "0.1.40" +thiserror = "1.0.57" +hex = "0.4.3" +libc = "0.2.153" +smallvec = "1.13.1" +sha2 = "0.10.8" + +tycho-block-util = { path = "../block-util" } +tycho-constansts = { path = "../constansts" } + +dashmap = "5.5.3" +bumpalo = "3.14.0" +arc-swap = "1.6.0" +crc = "3.0.1" +bytesize = { version = "1.3.0", features = ["serde"] } +parking_lot = "0.12.1" +quick_cache = "0.4.1" +parking_lot_core = "0.9.9" +serde = { version = "1.0.196", features = ["derive"] } weedb = "0.1.1" rlimit = "0.10.1" fdlimit = "0.3.0" -bytesize = { version = "1.3.0", features = ["serde"] } -tokio = { version = "1.36.0", features = ["full"] } -tracing = "0.1.40" humantime = "2.1.0" sysinfo = "0.30.5" -serde = { version = "1.0.196", features = ["derive"] } -# local deps -tycho-util = { path = "../util", version = "=0.0.1" } -tycho-constansts = { path = "../constansts", version = "0.1" } [lints] workspace = true diff --git a/storage/src/block_connection_storage/mod.rs b/storage/src/block_connection_storage/mod.rs new file mode 100644 index 000000000..1121c042f --- /dev/null +++ b/storage/src/block_connection_storage/mod.rs @@ -0,0 +1,141 @@ +use std::sync::Arc; + +use anyhow::Result; +use everscale_types::models::*; + +use super::models::BlockHandle; +use crate::db::*; +use tycho_block_util::{read_block_id_le, write_block_id_le, StoredValue}; + +/// Stores relations between blocks +pub struct BlockConnectionStorage { + db: Arc, +} + +impl BlockConnectionStorage { + pub fn new(db: Arc) -> Result { + Ok(Self { db }) + } + + pub fn store_connection( + &self, + handle: &BlockHandle, + direction: BlockConnection, + connected_block_id: &BlockId, + ) -> Result<()> { + // Use strange match because all columns have different types + let store = match direction { + BlockConnection::Prev1 => { + if handle.meta().has_prev1() { + return Ok(()); + } + store_block_connection_impl(&self.db.prev1, handle, connected_block_id)?; + handle.meta().set_has_prev1() + } + BlockConnection::Prev2 => { + if handle.meta().has_prev2() { + return Ok(()); + } + store_block_connection_impl(&self.db.prev2, handle, connected_block_id)?; + handle.meta().set_has_prev2() + } + BlockConnection::Next1 => { + if handle.meta().has_next1() { + return Ok(()); + } + store_block_connection_impl(&self.db.next1, handle, connected_block_id)?; + handle.meta().set_has_next1() + } + BlockConnection::Next2 => { + if handle.meta().has_next2() { + return Ok(()); + } + store_block_connection_impl(&self.db.next2, handle, connected_block_id)?; + handle.meta().set_has_next2() + } + }; + + if store { + let id = handle.id(); + + if handle.is_key_block() { + let mut write_batch = weedb::rocksdb::WriteBatch::default(); + + write_batch.put_cf( + &self.db.block_handles.cf(), + id.root_hash.as_slice(), + handle.meta().to_vec(), + ); + write_batch.put_cf( + &self.db.key_blocks.cf(), + id.seqno.to_be_bytes(), + id.to_vec(), + ); + + self.db.raw().write(write_batch)?; + } else { + self.db + .block_handles + .insert(id.root_hash.as_slice(), handle.meta().to_vec())?; + } + } + + Ok(()) + } + + pub fn load_connection( + &self, + block_id: &BlockId, + direction: BlockConnection, + ) -> Result { + match direction { + BlockConnection::Prev1 => load_block_connection_impl(&self.db.prev1, block_id), + BlockConnection::Prev2 => load_block_connection_impl(&self.db.prev2, block_id), + BlockConnection::Next1 => load_block_connection_impl(&self.db.next1, block_id), + BlockConnection::Next2 => load_block_connection_impl(&self.db.next2, block_id), + } + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum BlockConnection { + Prev1, + Prev2, + Next1, + Next2, +} + +#[inline] +fn store_block_connection_impl( + db: &Table, + handle: &BlockHandle, + block_id: &BlockId, +) -> Result<(), weedb::rocksdb::Error> +where + T: ColumnFamily, +{ + db.insert( + handle.id().root_hash.as_slice(), + write_block_id_le(block_id), + ) +} + +#[inline] +fn load_block_connection_impl(db: &Table, block_id: &BlockId) -> Result +where + T: ColumnFamily, +{ + match db.get(block_id.root_hash.as_slice())? { + Some(value) => read_block_id_le(value.as_ref()) + .ok_or_else(|| BlockConnectionStorageError::InvalidBlockId.into()), + None => Err(BlockConnectionStorageError::NotFound.into()), + } +} + +#[derive(Debug, thiserror::Error)] +enum BlockConnectionStorageError { + #[error("Invalid connection block id")] + InvalidBlockId, + #[error("Block connection not found")] + NotFound, +} diff --git a/storage/src/block_handle_storage/mod.rs b/storage/src/block_handle_storage/mod.rs new file mode 100644 index 000000000..d776e067f --- /dev/null +++ b/storage/src/block_handle_storage/mod.rs @@ -0,0 +1,314 @@ +/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +/// +/// Changes: +/// - replaced old `failure` crate with `anyhow` +/// - simplified storing +use std::sync::{Arc, Weak}; + +use anyhow::Result; +use everscale_types::models::*; + +use super::models::*; +use crate::db::*; +use tycho_block_util::*; + +pub struct BlockHandleStorage { + db: Arc, + cache: Arc>>, +} + +impl BlockHandleStorage { + pub fn new(db: Arc) -> Result { + Ok(Self { + db, + cache: Arc::new(Default::default()), + }) + } + + pub fn store_block_applied(&self, handle: &Arc) -> Result { + if handle.meta().set_is_applied() { + self.store_handle(handle)?; + Ok(true) + } else { + Ok(false) + } + } + + pub fn assign_mc_ref_seqno(&self, handle: &Arc, mc_ref_seqno: u32) -> Result<()> { + if handle.set_masterchain_ref_seqno(mc_ref_seqno)? { + self.store_handle(handle)?; + } + Ok(()) + } + + pub fn create_or_load_handle( + &self, + block_id: &BlockId, + meta_data: BlockMetaData, + ) -> Result<(Arc, HandleCreationStatus)> { + if let Some(handle) = self.load_handle(block_id)? { + return Ok((handle, HandleCreationStatus::Fetched)); + } + + if let Some(handle) = self.create_handle(*block_id, BlockMeta::with_data(meta_data))? { + return Ok((handle, HandleCreationStatus::Created)); + } + + if let Some(handle) = self.load_handle(block_id)? { + return Ok((handle, HandleCreationStatus::Fetched)); + } + + Err(BlockHandleStorageError::FailedToCreateBlockHandle.into()) + } + + pub fn load_handle(&self, block_id: &BlockId) -> Result>> { + Ok(loop { + if let Some(weak) = self.cache.get(block_id) { + if let Some(handle) = weak.upgrade() { + break Some(handle); + } + } + + if let Some(meta) = self.db.block_handles.get(block_id.root_hash.as_slice())? { + let meta = BlockMeta::from_slice(meta.as_ref())?; + if let Some(handle) = self.create_handle(*block_id, meta)? { + break Some(handle); + } + } else { + break None; + } + }) + } + + pub fn store_handle(&self, handle: &BlockHandle) -> Result<()> { + let id = handle.id(); + + self.db + .block_handles + .insert(id.root_hash.as_slice(), handle.meta().to_vec())?; + + if handle.is_key_block() { + self.db + .key_blocks + .insert(id.seqno.to_be_bytes(), id.to_vec())?; + } + + Ok(()) + } + + pub fn load_key_block_handle(&self, seqno: u32) -> Result> { + let key_block_id = self + .db + .key_blocks + .get(seqno.to_be_bytes())? + .map(|value| BlockId::from_slice(value.as_ref())) + .transpose()? + .ok_or(BlockHandleStorageError::KeyBlockNotFound)?; + + self.load_handle(&key_block_id)?.ok_or_else(|| { + BlockHandleStorageError::KeyBlockHandleNotFound(key_block_id.seqno).into() + }) + } + + pub fn find_last_key_block(&self) -> Result> { + let mut iter = self.db.key_blocks.raw_iterator(); + iter.seek_to_last(); + + // Load key block from current iterator value + let key_block_id = iter + .value() + .map(BlockId::from_slice) + .transpose()? + .ok_or(BlockHandleStorageError::KeyBlockNotFound)?; + + self.load_handle(&key_block_id)?.ok_or_else(|| { + BlockHandleStorageError::KeyBlockHandleNotFound(key_block_id.seqno).into() + }) + } + + pub fn find_prev_key_block(&self, seqno: u32) -> Result>> { + if seqno == 0 { + return Ok(None); + } + + // Create iterator and move it to the previous key block before the specified + let mut iter = self.db.key_blocks.raw_iterator(); + iter.seek_for_prev((seqno - 1u32).to_be_bytes()); + + // Load key block from current iterator value + iter.value() + .map(BlockId::from_slice) + .transpose()? + .map(|key_block_id| { + self.load_handle(&key_block_id)?.ok_or_else(|| { + BlockHandleStorageError::KeyBlockHandleNotFound(key_block_id.seqno).into() + }) + }) + .transpose() + } + + pub fn find_prev_persistent_key_block(&self, seqno: u32) -> Result>> { + if seqno == 0 { + return Ok(None); + } + + // Create iterator and move it to the previous key block before the specified + let mut iter = self.db.key_blocks.raw_iterator(); + iter.seek_for_prev((seqno - 1u32).to_be_bytes()); + + // Loads key block from current iterator value and moves it backward + let mut get_key_block = move || -> Result>> { + // Load key block id + let key_block_id = match iter.value().map(BlockId::from_slice).transpose()? { + Some(prev_key_block) => prev_key_block, + None => return Ok(None), + }; + + // Load block handle for this id + let handle = self.load_handle(&key_block_id)?.ok_or( + BlockHandleStorageError::KeyBlockHandleNotFound(key_block_id.seqno), + )?; + + // Move iterator backward + iter.prev(); + + // Done + Ok(Some(handle)) + }; + + // Load previous key block + let mut key_block = match get_key_block()? { + Some(id) => id, + None => return Ok(None), + }; + + // Load previous key blocks and check if the `key_block` is for persistent state + while let Some(prev_key_block) = get_key_block()? { + if is_persistent_state( + key_block.meta().gen_utime(), + prev_key_block.meta().gen_utime(), + ) { + // Found + return Ok(Some(key_block)); + } + key_block = prev_key_block; + } + + // Not found + Ok(None) + } + + pub fn key_blocks_iterator( + &self, + direction: KeyBlocksDirection, + ) -> impl Iterator> + '_ { + let mut raw_iterator = self.db.key_blocks.raw_iterator(); + let reverse = match direction { + KeyBlocksDirection::ForwardFrom(seqno) => { + raw_iterator.seek(seqno.to_be_bytes()); + false + } + KeyBlocksDirection::Backward => { + raw_iterator.seek_to_last(); + true + } + }; + + KeyBlocksIterator { + raw_iterator, + reverse, + } + } + + pub fn gc_handles_cache(&self, top_blocks: &TopBlocks) -> usize { + let mut total_removed = 0; + + self.cache.retain(|block_id, value| { + let value = match value.upgrade() { + Some(value) => value, + None => { + total_removed += 1; + return false; + } + }; + + if block_id.seqno == 0 + || block_id.is_masterchain() && value.is_key_block() + || top_blocks.contains(block_id) + { + // Keep zero state, key blocks and latest blocks + true + } else { + // Remove all outdated + total_removed += 1; + value.meta().clear_data_and_proof(); + false + } + }); + + total_removed + } + + fn create_handle( + &self, + block_id: BlockId, + meta: BlockMeta, + ) -> Result>> { + use dashmap::mapref::entry::Entry; + + let handle = match self.cache.entry(block_id) { + Entry::Vacant(entry) => { + let handle = Arc::new(BlockHandle::with_values(block_id, meta, self.cache.clone())); + entry.insert(Arc::downgrade(&handle)); + handle + } + Entry::Occupied(_) => return Ok(None), + }; + + self.store_handle(&handle)?; + + Ok(Some(handle)) + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum HandleCreationStatus { + Created, + Fetched, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum KeyBlocksDirection { + ForwardFrom(u32), + Backward, +} + +struct KeyBlocksIterator<'a> { + raw_iterator: weedb::rocksdb::DBRawIterator<'a>, + reverse: bool, +} + +impl Iterator for KeyBlocksIterator<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + let value = self.raw_iterator.value().map(BlockId::from_slice)?; + if self.reverse { + self.raw_iterator.prev(); + } else { + self.raw_iterator.next(); + } + + Some(value) + } +} + +#[derive(thiserror::Error, Debug)] +enum BlockHandleStorageError { + #[error("Failed to create block handle")] + FailedToCreateBlockHandle, + #[error("Key block not found")] + KeyBlockNotFound, + #[error("Key block handle not found: {}", .0)] + KeyBlockHandleNotFound(u32), +} diff --git a/storage/src/block_storage/mod.rs b/storage/src/block_storage/mod.rs new file mode 100644 index 000000000..0db7c3ff0 --- /dev/null +++ b/storage/src/block_storage/mod.rs @@ -0,0 +1,819 @@ +use std::borrow::Borrow; +use std::collections::BTreeSet; +use std::convert::TryInto; +use std::hash::Hash; +use std::ops::{Bound, RangeBounds}; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use everscale_types::models::*; +use parking_lot::RwLock; + +use super::block_handle_storage::{BlockHandleStorage, HandleCreationStatus}; +use super::models::*; +use crate::db::*; +use serde::{Deserialize, Serialize}; +use tycho_block_util::*; + +pub struct BlockStorage { + db: Arc, + block_handle_storage: Arc, + archive_ids: RwLock>, +} + +impl BlockStorage { + pub fn new(db: Arc, block_handle_storage: Arc) -> Result { + let manager = Self { + db, + block_handle_storage, + archive_ids: Default::default(), + }; + + manager.preload()?; + + Ok(manager) + } + + fn preload(&self) -> Result<()> { + fn check_archive(value: &[u8]) -> Result<(), ArchivePackageError> { + let mut verifier = ArchivePackageVerifier::default(); + verifier.verify(value)?; + verifier.final_check() + } + + let mut iter = self.db.archives.raw_iterator(); + iter.seek_to_first(); + + let mut archive_ids = self.archive_ids.write(); + + while let (Some(key), value) = (iter.key(), iter.value()) { + let archive_id = u32::from_be_bytes( + key.try_into() + .with_context(|| format!("Invalid archive key: {}", hex::encode(key)))?, + ); + + if let Some(Err(e)) = value.map(check_archive) { + tracing::error!(archive_id, "failed to read archive: {e:?}") + } + + archive_ids.insert(archive_id); + iter.next(); + } + + tracing::info!("selfcheck complete"); + Ok(()) + } + + pub async fn store_block_data( + &self, + block: &BlockStuffAug, + meta_data: BlockMetaData, + ) -> Result { + let block_id = block.id(); + let (handle, status) = self + .block_handle_storage + .create_or_load_handle(block_id, meta_data)?; + + let archive_id = PackageEntryId::Block(block_id); + let mut updated = false; + if !handle.meta().has_data() { + let data = block.new_archive_data()?; + + let _lock = handle.block_data_lock().write().await; + if !handle.meta().has_data() { + self.add_data(&archive_id, data)?; + if handle.meta().set_has_data() { + self.block_handle_storage.store_handle(&handle)?; + updated = true; + } + } + } + + Ok(StoreBlockResult { + handle, + updated, + new: status == HandleCreationStatus::Created, + }) + } + + pub async fn load_block_data(&self, handle: &BlockHandle) -> Result { + let raw_block = self.load_block_data_raw_ref(handle).await?; + BlockStuff::deserialize(*handle.id(), raw_block.as_ref()) + } + + pub async fn load_block_data_raw(&self, handle: &BlockHandle) -> Result> { + if !handle.meta().has_data() { + return Err(BlockStorageError::BlockDataNotFound.into()); + } + self.get_data(handle, &PackageEntryId::Block(handle.id())) + .await + } + + pub async fn load_block_data_raw_ref<'a>( + &'a self, + handle: &'a BlockHandle, + ) -> Result + 'a> { + if !handle.meta().has_data() { + return Err(BlockStorageError::BlockDataNotFound.into()); + } + self.get_data_ref(handle, &PackageEntryId::Block(handle.id())) + .await + } + + pub async fn store_block_proof( + &self, + proof: &BlockProofStuffAug, + handle: BlockProofHandle, + ) -> Result { + let block_id = proof.id(); + if matches!(&handle, BlockProofHandle::Existing(handle) if handle.id() != block_id) { + return Err(BlockStorageError::BlockHandleIdMismatch.into()); + } + + let (handle, status) = match handle { + BlockProofHandle::Existing(handle) => (handle, HandleCreationStatus::Fetched), + BlockProofHandle::New(meta_data) => self + .block_handle_storage + .create_or_load_handle(block_id, meta_data)?, + }; + + let mut updated = false; + if proof.is_link() { + let archive_id = PackageEntryId::ProofLink(block_id); + if !handle.meta().has_proof_link() { + let data = proof.new_archive_data()?; + + let _lock = handle.proof_data_lock().write().await; + if !handle.meta().has_proof_link() { + self.add_data(&archive_id, data)?; + if handle.meta().set_has_proof_link() { + self.block_handle_storage.store_handle(&handle)?; + updated = true; + } + } + } + } else { + let archive_id = PackageEntryId::Proof(block_id); + if !handle.meta().has_proof() { + let data = proof.new_archive_data()?; + + let _lock = handle.proof_data_lock().write().await; + if !handle.meta().has_proof() { + self.add_data(&archive_id, data)?; + if handle.meta().set_has_proof() { + self.block_handle_storage.store_handle(&handle)?; + updated = true; + } + } + } + } + + Ok(StoreBlockResult { + handle, + updated, + new: status == HandleCreationStatus::Created, + }) + } + + pub async fn load_block_proof( + &self, + handle: &BlockHandle, + is_link: bool, + ) -> Result { + let raw_proof = self.load_block_proof_raw_ref(handle, is_link).await?; + BlockProofStuff::deserialize(*handle.id(), raw_proof.as_ref(), is_link) + } + + pub async fn load_block_proof_raw( + &self, + handle: &BlockHandle, + is_link: bool, + ) -> Result> { + let (archive_id, exists) = if is_link { + ( + PackageEntryId::ProofLink(handle.id()), + handle.meta().has_proof_link(), + ) + } else { + ( + PackageEntryId::Proof(handle.id()), + handle.meta().has_proof(), + ) + }; + + if !exists { + return Err(BlockStorageError::BlockProofNotFound.into()); + } + + self.get_data(handle, &archive_id).await + } + + pub async fn load_block_proof_raw_ref<'a>( + &'a self, + handle: &'a BlockHandle, + is_link: bool, + ) -> Result + 'a> { + let (archive_id, exists) = if is_link { + ( + PackageEntryId::ProofLink(handle.id()), + handle.meta().has_proof_link(), + ) + } else { + ( + PackageEntryId::Proof(handle.id()), + handle.meta().has_proof(), + ) + }; + + if !exists { + return Err(BlockStorageError::BlockProofNotFound.into()); + } + + self.get_data_ref(handle, &archive_id).await + } + + pub async fn move_into_archive(&self, handle: &BlockHandle) -> Result<()> { + if handle.meta().is_archived() { + return Ok(()); + } + if !handle.meta().set_is_moving_to_archive() { + return Ok(()); + } + + // Prepare data + let block_id = handle.id(); + + let has_data = handle.meta().has_data(); + let mut is_link = false; + let has_proof = handle.has_proof_or_link(&mut is_link); + + let block_data = if has_data { + let lock = handle.block_data_lock().write().await; + + let entry_id = PackageEntryId::Block(block_id); + let data = self.make_archive_segment(&entry_id)?; + + Some((lock, data)) + } else { + None + }; + + let block_proof_data = if has_proof { + let lock = handle.proof_data_lock().write().await; + + let entry_id = if is_link { + PackageEntryId::ProofLink(block_id) + } else { + PackageEntryId::Proof(block_id) + }; + let data = self.make_archive_segment(&entry_id)?; + + Some((lock, data)) + } else { + None + }; + + // Prepare cf + let storage_cf = self.db.archives.cf(); + let handle_cf = self.db.block_handles.cf(); + + // Prepare archive + let archive_id = self.compute_archive_id(handle); + let archive_id_bytes = archive_id.to_be_bytes(); + + // 0. Create transaction + let mut batch = rocksdb::WriteBatch::default(); + // 1. Append archive segment with block data + if let Some((_, data)) = &block_data { + batch.merge_cf(&storage_cf, archive_id_bytes, data); + } + // 2. Append archive segment with block proof data + if let Some((_, data)) = &block_proof_data { + batch.merge_cf(&storage_cf, archive_id_bytes, data); + } + // 3. Update block handle meta + if handle.meta().set_is_archived() { + batch.put_cf( + &handle_cf, + block_id.root_hash.as_slice(), + handle.meta().to_vec(), + ); + } + // 5. Execute transaction + self.db.raw().write(batch)?; + + // Block will be removed after blocks gc + + // Done + Ok(()) + } + + pub fn move_into_archive_with_data( + &self, + handle: &BlockHandle, + is_link: bool, + block_data: &[u8], + block_proof_data: &[u8], + ) -> Result<()> { + if handle.meta().is_archived() { + return Ok(()); + } + if !handle.meta().set_is_moving_to_archive() { + return Ok(()); + } + + let block_id = handle.id(); + + // Prepare cf + let archives_cf = self.db.archives.cf(); + let block_handles_cf = self.db.block_handles.cf(); + + // Prepare archive + let archive_id = self.compute_archive_id(handle); + let archive_id_bytes = archive_id.to_be_bytes(); + + let mut batch = rocksdb::WriteBatch::default(); + + batch.merge_cf( + &archives_cf, + archive_id_bytes, + make_archive_segment(&PackageEntryId::Block(handle.id()).filename(), block_data), + ); + + batch.merge_cf( + &archives_cf, + archive_id_bytes, + make_archive_segment( + &if is_link { + PackageEntryId::ProofLink(block_id) + } else { + PackageEntryId::Proof(block_id) + } + .filename(), + block_proof_data, + ), + ); + + if handle.meta().set_is_archived() { + batch.put_cf( + &block_handles_cf, + block_id.root_hash.as_slice(), + handle.meta().to_vec(), + ); + } + + self.db.raw().write(batch)?; + + Ok(()) + } + + pub fn get_archive_id(&self, mc_seqno: u32) -> Option { + match self.archive_ids.read().range(..=mc_seqno).next_back() { + // NOTE: handles case when mc_seqno is far in the future. + // However if there is a key block between `id` and `mc_seqno`, + // this will return an archive without that specified block. + Some(id) if mc_seqno < id + ARCHIVE_PACKAGE_SIZE => Some(*id), + _ => None, + } + } + + #[allow(unused)] + pub fn get_archives( + &self, + range: impl RangeBounds + 'static, + ) -> impl Iterator)> + '_ { + struct ArchivesIterator<'a> { + first: bool, + ids: (Bound, Bound), + iter: rocksdb::DBRawIterator<'a>, + } + + impl<'a> Iterator for ArchivesIterator<'a> { + type Item = (u32, Vec); + + fn next(&mut self) -> Option { + if self.first { + match self.ids.0 { + Bound::Included(id) => { + self.iter.seek(id.to_be_bytes()); + } + Bound::Excluded(id) => { + self.iter.seek((id + 1).to_be_bytes()); + } + Bound::Unbounded => { + self.iter.seek_to_first(); + } + } + self.first = false; + } else { + self.iter.next(); + } + + match (self.iter.key(), self.iter.value()) { + (Some(key), Some(value)) => { + let id = u32::from_be_bytes(key.try_into().unwrap_or_default()); + match self.ids.1 { + Bound::Included(bound_id) if id > bound_id => None, + Bound::Excluded(bound_id) if id >= bound_id => None, + _ => Some((id, value.to_vec())), + } + } + _ => None, + } + } + } + + ArchivesIterator { + first: true, + ids: (range.start_bound().cloned(), range.end_bound().cloned()), + iter: self.db.archives.raw_iterator(), + } + } + + pub fn get_archive_slice( + &self, + id: u32, + offset: usize, + limit: usize, + ) -> Result>> { + match self.db.archives.get(id.to_be_bytes())? { + Some(slice) if offset < slice.len() => { + let end = std::cmp::min(offset.saturating_add(limit), slice.len()); + Ok(Some(slice[offset..end].to_vec())) + } + Some(_) => Err(BlockStorageError::InvalidOffset.into()), + None => Ok(None), + } + } + + pub async fn remove_outdated_blocks( + &self, + key_block_id: &BlockId, + max_blocks_per_batch: Option, + gc_type: BlocksGcKind, + ) -> Result<()> { + let _compaction_guard = self.db.delay_compaction().await; + + // Find target block + let target_block = match gc_type { + BlocksGcKind::BeforePreviousKeyBlock => self + .block_handle_storage + .find_prev_key_block(key_block_id.seqno)?, + BlocksGcKind::BeforePreviousPersistentState => self + .block_handle_storage + .find_prev_persistent_key_block(key_block_id.seqno)?, + }; + + // Load target block data + let top_blocks = match target_block { + Some(handle) if handle.meta().has_data() => { + tracing::info!( + %key_block_id, + target_block_id = %handle.id(), + "starting blocks GC", + ); + self.load_block_data(&handle) + .await + .context("Failed to load target key block data") + .and_then(|block_data| TopBlocks::from_mc_block(&block_data)) + .context("Failed to compute top blocks for target block")? + } + _ => { + tracing::info!(%key_block_id, "blocks GC skipped"); + return Ok(()); + } + }; + + // Remove all expired entries + let total_cached_handles_removed = self.block_handle_storage.gc_handles_cache(&top_blocks); + + let db = self.db.clone(); + let BlockGcStats { + mc_package_entries_removed, + total_package_entries_removed, + total_handles_removed, + } = tokio::task::spawn_blocking(move || { + remove_blocks(db, max_blocks_per_batch, &top_blocks) + }) + .await??; + + tracing::info!( + %key_block_id, + total_cached_handles_removed, + mc_package_entries_removed, + total_package_entries_removed, + total_handles_removed, + "finished blocks GC" + ); + + // Done + Ok(()) + } + + pub async fn remove_outdated_archives(&self, until_id: u32) -> Result<()> { + let _compaction_guard = self.db.delay_compaction().await; + + let mut archive_ids = self.archive_ids.write(); + + let retained_ids = match archive_ids.iter().rev().find(|&id| *id < until_id).cloned() { + // Splits `archive_ids` into two parts - [..until_id] and [until_id..] + // `archive_ids` will now contain [..until_id] + Some(until_id) => archive_ids.split_off(&until_id), + None => { + tracing::info!("archives GC: nothing to remove"); + return Ok(()); + } + }; + // so we must swap maps to retain [until_id..] and get ids to remove + let removed_ids = std::mem::replace(&mut *archive_ids, retained_ids); + + // Print removed range bounds and compute real `until_id` + let until_id = match (removed_ids.first(), removed_ids.last()) { + (Some(first), Some(last)) => { + let len = removed_ids.len(); + tracing::info!( + archive_count = len, + first, + last, + "archives GC: removing archives" + ); + + match archive_ids.first() { + Some(until_id) => *until_id, + None => *last + 1, + } + } + _ => { + tracing::info!("archives GC: nothing to remove"); + return Ok(()); + } + }; + + // Remove archives + let archives_cf = self.db.archives.cf(); + let write_options = self.db.archives.write_config(); + + self.db.raw().delete_range_cf_opt( + &archives_cf, + [0; 4], + until_id.to_be_bytes(), + write_options, + )?; + + tracing::info!("archives GC: done"); + Ok(()) + } + + fn add_data(&self, id: &PackageEntryId, data: &[u8]) -> Result<(), rocksdb::Error> + where + I: Borrow + Hash, + { + self.db.package_entries.insert(id.to_vec(), data) + } + + #[allow(dead_code)] + fn has_data(&self, id: &PackageEntryId) -> Result + where + I: Borrow + Hash, + { + self.db.package_entries.contains_key(id.to_vec()) + } + + async fn get_data(&self, handle: &BlockHandle, id: &PackageEntryId) -> Result> + where + I: Borrow + Hash, + { + let _lock = match &id { + PackageEntryId::Block(_) => handle.block_data_lock().read().await, + PackageEntryId::Proof(_) | PackageEntryId::ProofLink(_) => { + handle.proof_data_lock().read().await + } + }; + + match self.db.package_entries.get(id.to_vec())? { + Some(a) => Ok(a.to_vec()), + None => Err(BlockStorageError::InvalidBlockData.into()), + } + } + + async fn get_data_ref<'a, I>( + &'a self, + handle: &'a BlockHandle, + id: &PackageEntryId, + ) -> Result + 'a> + where + I: Borrow + Hash, + { + let lock = match id { + PackageEntryId::Block(_) => handle.block_data_lock().read().await, + PackageEntryId::Proof(_) | PackageEntryId::ProofLink(_) => { + handle.proof_data_lock().read().await + } + }; + + match self.db.package_entries.get(id.to_vec())? { + Some(data) => Ok(BlockContentsLock { _lock: lock, data }), + None => Err(BlockStorageError::InvalidBlockData.into()), + } + } + + fn compute_archive_id(&self, handle: &BlockHandle) -> u32 { + let mc_seqno = handle.masterchain_ref_seqno(); + + if handle.meta().is_key_block() { + self.archive_ids.write().insert(mc_seqno); + return mc_seqno; + } + + let mut archive_id = mc_seqno - mc_seqno % ARCHIVE_SLICE_SIZE; + + let prev_id = { + let latest_archives = self.archive_ids.read(); + latest_archives.range(..=mc_seqno).next_back().cloned() + }; + + if let Some(prev_id) = prev_id { + if archive_id < prev_id { + archive_id = prev_id; + } + } + + if mc_seqno.saturating_sub(archive_id) >= ARCHIVE_PACKAGE_SIZE { + self.archive_ids.write().insert(mc_seqno); + archive_id = mc_seqno; + } + + archive_id + } + + fn make_archive_segment(&self, entry_id: &PackageEntryId) -> Result> + where + I: Borrow + Hash, + { + match self.db.package_entries.get(entry_id.to_vec())? { + Some(data) => Ok(make_archive_segment(&entry_id.filename(), &data)), + None => Err(BlockStorageError::InvalidBlockData.into()), + } + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BlocksGcKind { + BeforePreviousKeyBlock, + BeforePreviousPersistentState, +} + +#[derive(Clone)] +pub enum BlockProofHandle { + Existing(Arc), + New(BlockMetaData), +} + +impl From> for BlockProofHandle { + fn from(handle: Arc) -> Self { + Self::Existing(handle) + } +} + +impl From for BlockProofHandle { + fn from(meta_data: BlockMetaData) -> Self { + Self::New(meta_data) + } +} + +pub struct StoreBlockResult { + pub handle: Arc, + pub updated: bool, + pub new: bool, +} + +fn remove_blocks( + db: Arc, + max_blocks_per_batch: Option, + top_blocks: &TopBlocks, +) -> Result { + let mut stats = BlockGcStats::default(); + + let raw = db.raw().as_ref(); + let package_entries_cf = db.package_entries.cf(); + let block_handles_cf = db.block_handles.cf(); + let key_blocks_cf = db.key_blocks.cf(); + + // Create batch + let mut batch = rocksdb::WriteBatch::default(); + let mut batch_len = 0; + + let package_entries_readopts = db.package_entries.new_read_config(); + let key_blocks_readopts = db.key_blocks.new_read_config(); + + // Iterate all entries and find expired items + let mut blocks_iter = raw.raw_iterator_cf_opt(&package_entries_cf, package_entries_readopts); + blocks_iter.seek_to_first(); + + loop { + let key = match blocks_iter.key() { + Some(key) => key, + None => break blocks_iter.status()?, + }; + + // Read only prefix with shard ident and seqno + let BlockIdShort { shard, seqno } = + BlockIdShort::deserialize(&mut std::convert::identity(key))?; + + // Don't gc latest blocks + if top_blocks.contains_shard_seqno(&shard, seqno) { + blocks_iter.next(); + continue; + } + + // Additionally check whether this item is a key block + if seqno == 0 + || shard.is_masterchain() + && raw + .get_pinned_cf_opt(&key_blocks_cf, seqno.to_be_bytes(), &key_blocks_readopts)? + .is_some() + { + // Don't remove key blocks + blocks_iter.next(); + continue; + } + + // Add item to the batch + batch.delete_cf(&package_entries_cf, key); + stats.total_package_entries_removed += 1; + if shard.is_masterchain() { + stats.mc_package_entries_removed += 1; + } + + // Key structure: + // [workchain id, 4 bytes] + // [shard id, 8 bytes] + // [seqno, 4 bytes] + // [root hash, 32 bytes] <- + // .. + if key.len() >= 48 { + batch.delete_cf(&block_handles_cf, &key[16..48]); + stats.total_handles_removed += 1; + } + + batch_len += 1; + if matches!( + max_blocks_per_batch, + Some(max_blocks_per_batch) if batch_len >= max_blocks_per_batch + ) { + tracing::info!( + total_package_entries_removed = stats.total_package_entries_removed, + "applying intermediate batch", + ); + let batch = std::mem::take(&mut batch); + raw.write(batch)?; + batch_len = 0; + } + + blocks_iter.next(); + } + + if batch_len > 0 { + tracing::info!("applying final batch"); + raw.write(batch)?; + } + + // Done + Ok(stats) +} + +#[derive(Debug, Copy, Clone, Default)] +pub struct BlockGcStats { + pub mc_package_entries_removed: usize, + pub total_package_entries_removed: usize, + pub total_handles_removed: usize, +} + +struct BlockContentsLock<'a> { + _lock: tokio::sync::RwLockReadGuard<'a, ()>, + data: rocksdb::DBPinnableSlice<'a>, +} + +impl<'a> AsRef<[u8]> for BlockContentsLock<'a> { + fn as_ref(&self) -> &[u8] { + self.data.as_ref() + } +} + +pub const ARCHIVE_PACKAGE_SIZE: u32 = 100; +pub const ARCHIVE_SLICE_SIZE: u32 = 20_000; + +#[derive(thiserror::Error, Debug)] +enum BlockStorageError { + #[error("Block data not found")] + BlockDataNotFound, + #[error("Block proof not found")] + BlockProofNotFound, + #[error("Block handle id mismatch")] + BlockHandleIdMismatch, + #[error("Invalid block data")] + InvalidBlockData, + #[error("Offset is outside of the archive slice")] + InvalidOffset, +} diff --git a/storage/src/config.rs b/storage/src/db/config.rs similarity index 100% rename from storage/src/config.rs rename to storage/src/db/config.rs diff --git a/storage/src/migrations/mod.rs b/storage/src/db/migrations/mod.rs similarity index 100% rename from storage/src/migrations/mod.rs rename to storage/src/db/migrations/mod.rs diff --git a/storage/src/db/mod.rs b/storage/src/db/mod.rs new file mode 100644 index 000000000..acb1eda1d --- /dev/null +++ b/storage/src/db/mod.rs @@ -0,0 +1,301 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::thread::available_parallelism; + +use anyhow::{Context, Result}; +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; +use weedb::{Caches, WeeDb}; + +pub use weedb::Stats as RocksdbStats; +pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table, UnboundedCfHandle}; +pub mod refcount; +pub mod tables; + +mod config; +mod migrations; + +pub struct Db { + pub archives: Table, + pub block_handles: Table, + pub key_blocks: Table, + pub package_entries: Table, + pub shard_states: Table, + pub cells: Table, + pub node_states: Table, + pub prev1: Table, + pub prev2: Table, + pub next1: Table, + pub next2: Table, + + compaction_lock: tokio::sync::RwLock<()>, + inner: WeeDb, +} + +impl Db { + pub fn open(path: PathBuf, options: DbOptions) -> Result> { + tracing::info!( + rocksdb_lru_capacity = %options.rocksdb_lru_capacity, + cells_cache_size = %options.cells_cache_size, + "opening DB" + ); + + let limit = match fdlimit::raise_fd_limit() { + // New fd limit + Ok(fdlimit::Outcome::LimitRaised { to, .. }) => to, + // Current soft limit + _ => { + rlimit::getrlimit(rlimit::Resource::NOFILE) + .unwrap_or((256, 0)) + .0 + } + }; + + let caches_capacity = + std::cmp::max(options.rocksdb_lru_capacity, ByteSize::mib(256)).as_u64() as usize; + + let caches = Caches::with_capacity(caches_capacity); + let threads = available_parallelism()?.get(); + + let inner = WeeDb::builder(path, caches) + .options(|opts, _| { + opts.set_paranoid_checks(false); + + // bigger base level size - less compactions + // parallel compactions finishes faster - less write stalls + + opts.set_max_subcompactions(threads as u32 / 2); + + // io + opts.set_max_open_files(limit as i32); + + // logging + opts.set_log_level(rocksdb::LogLevel::Info); + opts.set_keep_log_file_num(2); + opts.set_recycle_log_file_num(2); + + // cf + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + // cpu + opts.set_max_background_jobs(std::cmp::max((threads as i32) / 2, 2)); + opts.increase_parallelism(threads as i32); + + opts.set_allow_concurrent_memtable_write(false); + opts.set_enable_write_thread_adaptive_yield(true); + + // debug + // NOTE: could slower everything a bit in some cloud environments. + // See: https://github.com/facebook/rocksdb/issues/3889 + // + // opts.enable_statistics(); + // opts.set_stats_dump_period_sec(600); + }) + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .build() + .context("Failed building db")?; + + migrations::apply(&inner).context("Failed to apply migrations")?; + + Ok(Arc::new(Self { + archives: inner.instantiate_table(), + block_handles: inner.instantiate_table(), + key_blocks: inner.instantiate_table(), + package_entries: inner.instantiate_table(), + shard_states: inner.instantiate_table(), + cells: inner.instantiate_table(), + node_states: inner.instantiate_table(), + prev1: inner.instantiate_table(), + prev2: inner.instantiate_table(), + next1: inner.instantiate_table(), + next2: inner.instantiate_table(), + compaction_lock: tokio::sync::RwLock::default(), + inner, + })) + } + + #[inline] + pub fn raw(&self) -> &Arc { + self.inner.raw() + } + + pub fn get_memory_usage_stats(&self) -> Result { + self.inner.get_memory_usage_stats().map_err(From::from) + } + + pub async fn delay_compaction(&self) -> tokio::sync::RwLockReadGuard<'_, ()> { + self.compaction_lock.read().await + } + + pub async fn trigger_compaction(&self) { + use std::time::Instant; + + let _compaction_guard = self.compaction_lock.write().await; + + let tables = [ + (self.block_handles.cf(), "block handles"), + (self.package_entries.cf(), "package entries"), + (self.archives.cf(), "archives"), + (self.shard_states.cf(), "shard states"), + (self.cells.cf(), "cells"), + ]; + + for (cf, title) in tables { + tracing::info!("{title} compaction started"); + + let instant = Instant::now(); + + let bound = Option::<[u8; 0]>::None; + self.raw().compact_range_cf(&cf, bound, bound); + + tracing::info!( + elapsed = %humantime::format_duration(instant.elapsed()), + "{title} compaction finished" + ); + } + } + + pub fn get_disk_usage(&self) -> Result> { + use std::thread; + + fn get_table_stats(db: &WeeDb) -> (ByteSize, ByteSize) { + let cf = db.instantiate_table::(); + let res: (usize, usize) = cf + .iterator(rocksdb::IteratorMode::Start) + .flat_map(|x| { + let x = match x { + Ok(x) => x, + Err(e) => { + tracing::error!("Error while iterating: {}", e); + return None; + } + }; + Some((x.0.len(), x.1.len())) + }) + .fold((0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); + + (ByteSize(res.0 as u64), ByteSize(res.1 as u64)) + } + + macro_rules! stats { + ($spawner:expr, $( $x:ident => $table:ty ),* ) => {{ + $( + let $x = $spawner.spawn(|| get_table_stats::<$table>(&self.inner)); + )* + stats!($($x),*) + } + }; + ( $( $x:ident),* ) => { + { + let mut temp_vec = Vec::new(); + $( + temp_vec.push({ + let $x = $x.join().map_err(|_|anyhow::anyhow!("Join error"))?; + DiskUsageInfo { + cf_name: stringify!($x).to_string(), + keys_total: $x.0, + values_total: $x.1, + } + }); + )* + return Ok(temp_vec) + } + }; + } + + let stats = thread::scope(|s| -> Result> { + stats!(s, + archives => tables::Archives, + block_handles => tables::BlockHandles, + key_blocks => tables::KeyBlocks, + package_entries => tables::PackageEntries, + shard_states => tables::ShardStates, + cells => tables::Cells, + node_states => tables::NodeStates, + prev1 => tables::Prev1, + prev2 => tables::Prev2, + next1 => tables::Next1, + next2 => tables::Next2 + ) + })?; + + Ok(stats) + } +} + +#[derive(Debug, Clone)] +pub struct DiskUsageInfo { + pub cf_name: String, + pub keys_total: ByteSize, + pub values_total: ByteSize, +} + +impl Drop for Db { + fn drop(&mut self) { + self.raw().cancel_all_background_work(true) + } +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields, default)] +pub struct DbOptions { + pub rocksdb_lru_capacity: ByteSize, + pub cells_cache_size: ByteSize, +} + +impl Default for DbOptions { + fn default() -> Self { + // Fetch the currently available memory in bytes + let available = { + let mut sys = sysinfo::System::new(); + sys.refresh_memory(); + sys.available_memory() + }; + + // Estimated memory usage of components other than cache: + // - 2 GiBs for write buffers(4 if we are out of luck and all memtables are being flushed at the same time) + // - 2 GiBs for indexer logic + // - 10 bits per cell for bloom filter. Realistic case is 100M cells, so 0.25 GiBs + // - 1/3 of all available memory is reserved for kernel buffers + const WRITE_BUFFERS: ByteSize = ByteSize::gib(2); + const INDEXER_LOGIC: ByteSize = ByteSize::gib(2); + const BLOOM_FILTER: ByteSize = ByteSize::mib(256); + let estimated_memory_usage = WRITE_BUFFERS + INDEXER_LOGIC + BLOOM_FILTER + available / 3; + + // Reduce the available memory by the fixed offset + let available = available + .checked_sub(estimated_memory_usage.as_u64()) + .unwrap_or_else(|| { + tracing::error!( + "Not enough memory for cache, using 1/4 of all available memory. \ + Tweak `db_options` in config to improve performance." + ); + available / 4 + }); + + // We will use 3/4 of available memory for the cells cache (at most 4 GB). + let cells_cache_size = std::cmp::min(ByteSize(available * 4 / 3), ByteSize::gib(4)); + + // The reset of the memory is used for LRU cache (at least 128 MB) + let rocksdb_lru_capacity = std::cmp::max( + ByteSize(available.saturating_sub(cells_cache_size.as_u64())), + ByteSize::mib(128), + ); + + Self { + rocksdb_lru_capacity, + cells_cache_size, + } + } +} diff --git a/storage/src/refcount.rs b/storage/src/db/refcount.rs similarity index 100% rename from storage/src/refcount.rs rename to storage/src/db/refcount.rs diff --git a/storage/src/tables.rs b/storage/src/db/tables.rs similarity index 100% rename from storage/src/tables.rs rename to storage/src/db/tables.rs diff --git a/storage/src/lib.rs b/storage/src/lib.rs index a19a59a86..75f36e313 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -1,250 +1,45 @@ use std::path::PathBuf; use std::sync::Arc; -use std::thread::available_parallelism; -use anyhow::{Context, Result}; -use bytesize::ByteSize; -use weedb::{Caches, WeeDb}; - -pub use weedb::Stats as RocksdbStats; -pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table, UnboundedCfHandle}; - -use crate::config::DbOptions; - -pub mod refcount; -pub mod tables; - -mod config; -mod migrations; - -pub struct Db { - pub archives: Table, - pub block_handles: Table, - pub key_blocks: Table, - pub package_entries: Table, - pub shard_states: Table, - pub cells: Table, - pub node_states: Table, - pub prev1: Table, - pub prev2: Table, - pub next1: Table, - pub next2: Table, - - compaction_lock: tokio::sync::RwLock<()>, - inner: WeeDb, +pub use self::block_connection_storage::*; +pub use self::block_handle_storage::*; +pub use self::models::*; +pub use self::runtime_storage::*; + +use self::block_storage::*; +use self::shard_state_storage::*; + +mod block_connection_storage; +mod block_handle_storage; +mod block_storage; +mod db; +mod models; +mod node_state_storage; +mod runtime_storage; +mod shard_state_storage; + +pub struct Storage { + file_db_path: PathBuf, + + runtime_storage: Arc, + block_handle_storage: Arc, + block_storage: Arc, + shard_state_storage: ShardStateStorage, + block_connection_storage: BlockConnectionStorage, + //node_state_storage: NodeStateStorage, + //persistent_state_storage: PersistentStateStorage, } -impl Db { - pub fn open(path: PathBuf, options: DbOptions) -> Result> { - tracing::info!( - rocksdb_lru_capacity = %options.rocksdb_lru_capacity, - cells_cache_size = %options.cells_cache_size, - "opening DB" - ); - - let limit = match fdlimit::raise_fd_limit() { - // New fd limit - Ok(fdlimit::Outcome::LimitRaised { to, .. }) => to, - // Current soft limit - _ => { - rlimit::getrlimit(rlimit::Resource::NOFILE) - .unwrap_or((256, 0)) - .0 - } - }; - - let caches_capacity = - std::cmp::max(options.rocksdb_lru_capacity, ByteSize::mib(256)).as_u64() as usize; - - let caches = Caches::with_capacity(caches_capacity); - let threads = available_parallelism()?.get(); - - let inner = WeeDb::builder(path, caches) - .options(|opts, _| { - opts.set_paranoid_checks(false); - - // bigger base level size - less compactions - // parallel compactions finishes faster - less write stalls - - opts.set_max_subcompactions(threads as u32 / 2); - - // io - opts.set_max_open_files(limit as i32); - - // logging - opts.set_log_level(rocksdb::LogLevel::Info); - opts.set_keep_log_file_num(2); - opts.set_recycle_log_file_num(2); - - // cf - opts.create_if_missing(true); - opts.create_missing_column_families(true); - - // cpu - opts.set_max_background_jobs(std::cmp::max((threads as i32) / 2, 2)); - opts.increase_parallelism(threads as i32); - - opts.set_allow_concurrent_memtable_write(false); - opts.set_enable_write_thread_adaptive_yield(true); - - // debug - // NOTE: could slower everything a bit in some cloud environments. - // See: https://github.com/facebook/rocksdb/issues/3889 - // - // opts.enable_statistics(); - // opts.set_stats_dump_period_sec(600); - }) - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .build() - .context("Failed building db")?; - - migrations::apply(&inner).context("Failed to apply migrations")?; - - Ok(Arc::new(Self { - archives: inner.instantiate_table(), - block_handles: inner.instantiate_table(), - key_blocks: inner.instantiate_table(), - package_entries: inner.instantiate_table(), - shard_states: inner.instantiate_table(), - cells: inner.instantiate_table(), - node_states: inner.instantiate_table(), - prev1: inner.instantiate_table(), - prev2: inner.instantiate_table(), - next1: inner.instantiate_table(), - next2: inner.instantiate_table(), - compaction_lock: tokio::sync::RwLock::default(), - inner, - })) - } - - #[inline] - pub fn raw(&self) -> &Arc { - self.inner.raw() - } - - pub fn get_memory_usage_stats(&self) -> Result { - self.inner.get_memory_usage_stats().map_err(From::from) +impl Storage { + pub fn block_handle_storage(&self) -> &BlockHandleStorage { + &self.block_handle_storage } - pub async fn delay_compaction(&self) -> tokio::sync::RwLockReadGuard<'_, ()> { - self.compaction_lock.read().await - } - - pub async fn trigger_compaction(&self) { - use std::time::Instant; - - let _compaction_guard = self.compaction_lock.write().await; - - let tables = [ - (self.block_handles.cf(), "block handles"), - (self.package_entries.cf(), "package entries"), - (self.archives.cf(), "archives"), - (self.shard_states.cf(), "shard states"), - (self.cells.cf(), "cells"), - ]; - - for (cf, title) in tables { - tracing::info!("{title} compaction started"); - - let instant = Instant::now(); - - let bound = Option::<[u8; 0]>::None; - self.raw().compact_range_cf(&cf, bound, bound); - - tracing::info!( - elapsed = %humantime::format_duration(instant.elapsed()), - "{title} compaction finished" - ); - } + pub fn block_connection_storage(&self) -> &BlockConnectionStorage { + &self.block_connection_storage } - pub fn get_disk_usage(&self) -> Result> { - use std::thread; - - fn get_table_stats(db: &WeeDb) -> (ByteSize, ByteSize) { - let cf = db.instantiate_table::(); - let res: (usize, usize) = cf - .iterator(rocksdb::IteratorMode::Start) - .flat_map(|x| { - let x = match x { - Ok(x) => x, - Err(e) => { - tracing::error!("Error while iterating: {}", e); - return None; - } - }; - Some((x.0.len(), x.1.len())) - }) - .fold((0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); - - (ByteSize(res.0 as u64), ByteSize(res.1 as u64)) - } - - macro_rules! stats { - ($spawner:expr, $( $x:ident => $table:ty ),* ) => {{ - $( - let $x = $spawner.spawn(|| get_table_stats::<$table>(&self.inner)); - )* - stats!($($x),*) - } - }; - ( $( $x:ident),* ) => { - { - let mut temp_vec = Vec::new(); - $( - temp_vec.push({ - let $x = $x.join().map_err(|_|anyhow::anyhow!("Join error"))?; - DiskUsageInfo { - cf_name: stringify!($x).to_string(), - keys_total: $x.0, - values_total: $x.1, - } - }); - )* - return Ok(temp_vec) - } - }; - } - - let stats = thread::scope(|s| -> Result> { - stats!(s, - archives => tables::Archives, - block_handles => tables::BlockHandles, - key_blocks => tables::KeyBlocks, - package_entries => tables::PackageEntries, - shard_states => tables::ShardStates, - cells => tables::Cells, - node_states => tables::NodeStates, - prev1 => tables::Prev1, - prev2 => tables::Prev2, - next1 => tables::Next1, - next2 => tables::Next2 - ) - })?; - - Ok(stats) - } -} - -#[derive(Debug, Clone)] -pub struct DiskUsageInfo { - pub cf_name: String, - pub keys_total: ByteSize, - pub values_total: ByteSize, -} - -impl Drop for Db { - fn drop(&mut self) { - self.raw().cancel_all_background_work(true) + pub fn shard_state_storage(&self) -> &ShardStateStorage { + &self.shard_state_storage } } diff --git a/storage/src/models/block_handle.rs b/storage/src/models/block_handle.rs new file mode 100644 index 000000000..6c239544c --- /dev/null +++ b/storage/src/models/block_handle.rs @@ -0,0 +1,100 @@ +/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +/// +/// Changes: +/// - replaced old `failure` crate with `anyhow` +/// - moved all flags to meta +use std::sync::{Arc, Weak}; + +use anyhow::Result; +use everscale_types::models::*; +use tokio::sync::RwLock; + +use super::BlockMeta; +use tycho_block_util::FastDashMap; + +pub struct BlockHandle { + id: BlockId, + meta: BlockMeta, + block_data_lock: RwLock<()>, + proof_data_block: RwLock<()>, + cache: Arc>>, +} + +impl BlockHandle { + pub fn with_values( + id: BlockId, + meta: BlockMeta, + cache: Arc>>, + ) -> Self { + Self { + id, + meta, + block_data_lock: Default::default(), + proof_data_block: Default::default(), + cache, + } + } + + #[inline] + pub fn id(&self) -> &BlockId { + &self.id + } + + #[inline] + pub fn meta(&self) -> &BlockMeta { + &self.meta + } + + #[inline] + pub fn is_key_block(&self) -> bool { + self.meta.is_key_block() || self.id.seqno == 0 + } + + #[inline] + pub fn block_data_lock(&self) -> &RwLock<()> { + &self.block_data_lock + } + + #[inline] + pub fn proof_data_lock(&self) -> &RwLock<()> { + &self.proof_data_block + } + + pub fn has_proof_or_link(&self, is_link: &mut bool) -> bool { + *is_link = !self.id.shard.is_masterchain(); + if *is_link { + self.meta.has_proof_link() + } else { + self.meta.has_proof() + } + } + + pub fn masterchain_ref_seqno(&self) -> u32 { + if self.id.shard.is_masterchain() { + self.id.seqno + } else { + self.meta.masterchain_ref_seqno() + } + } + + pub fn set_masterchain_ref_seqno(&self, masterchain_ref_seqno: u32) -> Result { + match self.meta.set_masterchain_ref_seqno(masterchain_ref_seqno) { + 0 => Ok(true), + prev_seqno if prev_seqno == masterchain_ref_seqno => Ok(false), + _ => Err(BlockHandleError::RefSeqnoAlreadySet.into()), + } + } +} + +impl Drop for BlockHandle { + fn drop(&mut self) { + self.cache + .remove_if(&self.id, |_, weak| weak.strong_count() == 0); + } +} + +#[derive(thiserror::Error, Debug)] +enum BlockHandleError { + #[error("Different masterchain ref seqno has already been set")] + RefSeqnoAlreadySet, +} diff --git a/storage/src/models/block_meta.rs b/storage/src/models/block_meta.rs new file mode 100644 index 000000000..751abd2a0 --- /dev/null +++ b/storage/src/models/block_meta.rs @@ -0,0 +1,294 @@ +/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +/// +/// Changes: +/// - replaced old `failure` crate with `anyhow` +/// - moved all flags here from block handle +/// - removed temporary unused flags +use std::sync::atomic::{AtomicU64, Ordering}; + +use anyhow::Result; +use bytes::Buf; +use everscale_types::models::BlockInfo; + +use tycho_block_util::{StoredValue, StoredValueBuffer}; + +#[derive(Debug, Copy, Clone)] +pub struct BlockMetaData { + pub is_key_block: bool, + pub gen_utime: u32, + pub mc_ref_seqno: Option, +} + +#[derive(Debug, Copy, Clone)] +pub struct BriefBlockInfo { + pub is_key_block: bool, + pub gen_utime: u32, + pub after_split: bool, +} + +impl BriefBlockInfo { + pub fn with_mc_seqno(self, mc_seqno: u32) -> BlockMetaData { + BlockMetaData { + is_key_block: self.is_key_block, + gen_utime: self.gen_utime, + mc_ref_seqno: Some(mc_seqno), + } + } +} + +impl From<&BlockInfo> for BriefBlockInfo { + fn from(info: &BlockInfo) -> Self { + Self { + is_key_block: info.key_block, + gen_utime: info.gen_utime, + after_split: info.after_split, + } + } +} + +impl BlockMetaData { + pub fn zero_state(gen_utime: u32) -> Self { + Self { + is_key_block: true, + gen_utime, + mc_ref_seqno: Some(0), + } + } +} + +#[derive(Debug, Default)] +pub struct BlockMeta { + flags: AtomicU64, + gen_utime: u32, +} + +impl BlockMeta { + pub fn with_data(data: BlockMetaData) -> Self { + Self { + flags: AtomicU64::new( + if data.is_key_block { + BLOCK_META_FLAG_IS_KEY_BLOCK + } else { + 0 + } | data.mc_ref_seqno.unwrap_or_default() as u64, + ), + gen_utime: data.gen_utime, + } + } + + pub fn brief(&self) -> BriefBlockMeta { + BriefBlockMeta { + flags: self.flags.load(Ordering::Acquire), + gen_utime: self.gen_utime, + } + } + + pub fn masterchain_ref_seqno(&self) -> u32 { + self.flags.load(Ordering::Acquire) as u32 + } + + pub fn set_masterchain_ref_seqno(&self, seqno: u32) -> u32 { + self.flags.fetch_or(seqno as u64, Ordering::Release) as u32 + } + + #[inline] + pub fn gen_utime(&self) -> u32 { + self.gen_utime + } + + pub fn clear_data_and_proof(&self) { + self.flags.fetch_and(CLEAR_DATA_MASK, Ordering::Release); + } + + pub fn set_has_data(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_DATA) + } + + pub fn has_data(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_DATA) + } + + pub fn set_has_proof(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_PROOF) + } + + pub fn has_proof(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_PROOF) + } + + pub fn set_has_proof_link(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_PROOF_LINK) + } + + pub fn has_proof_link(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_PROOF_LINK) + } + + pub fn set_has_state(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_STATE) + } + + pub fn has_state(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_STATE) + } + + #[allow(unused)] + pub fn set_has_persistent_state(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_PERSISTENT_STATE) + } + + #[allow(unused)] + pub fn has_persistent_state(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_PERSISTENT_STATE) + } + + pub fn set_has_next1(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_NEXT_1) + } + + pub fn has_next1(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_NEXT_1) + } + + pub fn set_has_next2(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_NEXT_2) + } + + pub fn has_next2(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_NEXT_2) + } + + pub fn set_has_prev1(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_PREV_1) + } + + pub fn has_prev1(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_PREV_1) + } + + pub fn set_has_prev2(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_HAS_PREV_2) + } + + pub fn has_prev2(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_HAS_PREV_2) + } + + pub fn set_is_applied(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_IS_APPLIED) + } + + pub fn is_applied(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_IS_APPLIED) + } + + pub fn is_key_block(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_IS_KEY_BLOCK) + } + + pub fn set_is_moving_to_archive(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_MOVING_TO_ARCHIVE) + } + + pub fn set_is_archived(&self) -> bool { + self.set_flag(BLOCK_META_FLAG_MOVED_TO_ARCHIVE) + } + + pub fn is_archived(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_MOVED_TO_ARCHIVE) + } + + fn test_flag(&self, flag: u64) -> bool { + self.flags.load(Ordering::Acquire) & flag == flag + } + + fn set_flag(&self, flag: u64) -> bool { + self.flags.fetch_or(flag, Ordering::Release) & flag != flag + } +} + +impl StoredValue for BlockMeta { + /// 8 bytes flags + /// 4 bytes gen_utime + const SIZE_HINT: usize = 8 + 4; + + type OnStackSlice = [u8; Self::SIZE_HINT]; + + fn serialize(&self, buffer: &mut T) { + const FLAGS_MASK: u64 = 0x0000_ffff_ffff_ffff; + let flags = self.flags.load(Ordering::Acquire) & FLAGS_MASK; + + buffer.write_raw_slice(&flags.to_le_bytes()); + buffer.write_raw_slice(&self.gen_utime.to_le_bytes()); + } + + fn deserialize(reader: &mut &[u8]) -> Result + where + Self: Sized, + { + let flags = reader.get_u64_le(); + let gen_utime = reader.get_u32_le(); + + Ok(Self { + flags: AtomicU64::new(flags), + gen_utime, + }) + } +} + +#[derive(Debug, Default, Copy, Clone)] +pub struct BriefBlockMeta { + flags: u64, + gen_utime: u32, +} + +impl BriefBlockMeta { + #[inline] + pub fn gen_utime(&self) -> u32 { + self.gen_utime + } + + #[inline] + pub fn masterchain_ref_seqno(&self) -> u32 { + self.flags as u32 + } + + #[inline] + pub fn is_key_block(&self) -> bool { + self.test_flag(BLOCK_META_FLAG_IS_KEY_BLOCK) + } + + #[inline] + fn test_flag(&self, flag: u64) -> bool { + self.flags & flag == flag + } +} + +const BLOCK_META_FLAG_HAS_DATA: u64 = 1 << 32; +const BLOCK_META_FLAG_HAS_PROOF: u64 = 1 << (32 + 1); +const BLOCK_META_FLAG_HAS_PROOF_LINK: u64 = 1 << (32 + 2); +// skip flag 3 (processed by external listener) +const BLOCK_META_FLAG_HAS_STATE: u64 = 1 << (32 + 4); +const BLOCK_META_FLAG_HAS_PERSISTENT_STATE: u64 = 1 << (32 + 5); +const BLOCK_META_FLAG_HAS_NEXT_1: u64 = 1 << (32 + 6); +const BLOCK_META_FLAG_HAS_NEXT_2: u64 = 1 << (32 + 7); +const BLOCK_META_FLAG_HAS_PREV_1: u64 = 1 << (32 + 8); +const BLOCK_META_FLAG_HAS_PREV_2: u64 = 1 << (32 + 9); +const BLOCK_META_FLAG_IS_APPLIED: u64 = 1 << (32 + 10); +const BLOCK_META_FLAG_IS_KEY_BLOCK: u64 = 1 << (32 + 11); + +const BLOCK_META_FLAG_MOVING_TO_ARCHIVE: u64 = 1 << (32 + 12); +const BLOCK_META_FLAG_MOVED_TO_ARCHIVE: u64 = 1 << (32 + 13); + +const CLEAR_DATA_MASK: u64 = + !(BLOCK_META_FLAG_HAS_DATA | BLOCK_META_FLAG_HAS_PROOF | BLOCK_META_FLAG_HAS_PROOF_LINK); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + pub fn fully_on_stack() { + assert!(!BlockMeta::default().to_vec().spilled()); + } +} diff --git a/storage/src/models/mod.rs b/storage/src/models/mod.rs new file mode 100644 index 000000000..b106684ee --- /dev/null +++ b/storage/src/models/mod.rs @@ -0,0 +1,5 @@ +pub use block_handle::BlockHandle; +pub use block_meta::{BlockMeta, BlockMetaData, BriefBlockMeta}; + +mod block_handle; +mod block_meta; diff --git a/storage/src/node_state_storage/mod.rs b/storage/src/node_state_storage/mod.rs new file mode 100644 index 000000000..61265bc43 --- /dev/null +++ b/storage/src/node_state_storage/mod.rs @@ -0,0 +1,137 @@ +use std::sync::Arc; + +use anyhow::Result; +use everscale_types::models::*; +use parking_lot::Mutex; + +use crate::db::*; +use tycho_block_util::{read_block_id_le, write_block_id_le, StoredValue}; + +pub struct NodeStateStorage { + db: Arc, + last_mc_block_id: BlockIdCache, + init_mc_block_id: BlockIdCache, + shards_client_mc_block_id: BlockIdCache, +} + +impl NodeStateStorage { + pub fn new(db: Arc) -> Result { + Ok(Self { + db, + last_mc_block_id: (Default::default(), LAST_MC_BLOCK_ID), + init_mc_block_id: (Default::default(), INIT_MC_BLOCK_ID), + shards_client_mc_block_id: (Default::default(), SHARDS_CLIENT_MC_BLOCK_ID), + }) + } + + pub fn store_historical_sync_start(&self, id: &BlockId) -> Result<()> { + let node_states = &self.db.node_states; + node_states.insert(HISTORICAL_SYNC_LOW, id.to_vec())?; + Ok(()) + } + + pub fn load_historical_sync_start(&self) -> Result> { + Ok(match self.db.node_states.get(HISTORICAL_SYNC_LOW)? { + Some(data) => Some(BlockId::from_slice(data.as_ref())?), + None => None, + }) + } + + pub fn store_historical_sync_end(&self, id: &BlockId) -> Result<()> { + let node_states = &self.db.node_states; + node_states.insert(HISTORICAL_SYNC_HIGH, id.to_vec())?; + Ok(()) + } + + pub fn load_historical_sync_end(&self) -> Result { + let node_states = &self.db.node_states; + let data = node_states + .get(HISTORICAL_SYNC_HIGH)? + .ok_or(NodeStateStorageError::HighBlockNotFound)?; + BlockId::from_slice(data.as_ref()) + } + + #[allow(unused)] + pub fn store_last_uploaded_archive(&self, archive_id: u32) -> Result<()> { + let node_states = &self.db.node_states; + node_states.insert(LAST_UPLOADED_ARCHIVE, archive_id.to_le_bytes())?; + Ok(()) + } + + #[allow(unused)] + pub fn load_last_uploaded_archive(&self) -> Result> { + Ok(match self.db.node_states.get(LAST_UPLOADED_ARCHIVE)? { + Some(data) if data.len() >= 4 => { + Some(u32::from_le_bytes(data[..4].try_into().unwrap())) + } + _ => None, + }) + } + + pub fn store_last_mc_block_id(&self, id: &BlockId) -> Result<()> { + self.store_block_id(&self.last_mc_block_id, id) + } + + pub fn load_last_mc_block_id(&self) -> Result { + self.load_block_id(&self.last_mc_block_id) + } + + pub fn store_init_mc_block_id(&self, id: &BlockId) -> Result<()> { + self.store_block_id(&self.init_mc_block_id, id) + } + + pub fn load_init_mc_block_id(&self) -> Result { + self.load_block_id(&self.init_mc_block_id) + } + + pub fn store_shards_client_mc_block_id(&self, id: &BlockId) -> Result<()> { + self.store_block_id(&self.shards_client_mc_block_id, id) + } + + pub fn load_shards_client_mc_block_id(&self) -> Result { + self.load_block_id(&self.shards_client_mc_block_id) + } + + #[inline(always)] + fn store_block_id(&self, (cache, key): &BlockIdCache, block_id: &BlockId) -> Result<()> { + let node_states = &self.db.node_states; + node_states.insert(key, write_block_id_le(block_id))?; + *cache.lock() = Some(*block_id); + Ok(()) + } + + #[inline(always)] + fn load_block_id(&self, (cache, key): &BlockIdCache) -> Result { + if let Some(cached) = &*cache.lock() { + return Ok(*cached); + } + + let value = match self.db.node_states.get(key)? { + Some(data) => read_block_id_le(&data).ok_or(NodeStateStorageError::InvalidBlockId)?, + None => return Err(NodeStateStorageError::ParamNotFound.into()), + }; + *cache.lock() = Some(value); + Ok(value) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum NodeStateStorageError { + #[error("High block not found")] + HighBlockNotFound, + #[error("Not found")] + ParamNotFound, + #[error("Invalid block id")] + InvalidBlockId, +} + +type BlockIdCache = (Mutex>, &'static [u8]); + +const HISTORICAL_SYNC_LOW: &[u8] = b"background_sync_low"; +const HISTORICAL_SYNC_HIGH: &[u8] = b"background_sync_high"; + +const LAST_UPLOADED_ARCHIVE: &[u8] = b"last_uploaded_archive"; + +const LAST_MC_BLOCK_ID: &[u8] = b"LastMcBlockId"; +const INIT_MC_BLOCK_ID: &[u8] = b"InitMcBlockId"; +const SHARDS_CLIENT_MC_BLOCK_ID: &[u8] = b"ShardsClientMcBlockId"; diff --git a/storage/src/runtime_storage/mod.rs b/storage/src/runtime_storage/mod.rs new file mode 100644 index 000000000..69bdafae1 --- /dev/null +++ b/storage/src/runtime_storage/mod.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; + +pub use self::persistent_state_keeper::PersistentStateKeeper; +use super::BlockHandleStorage; + +mod persistent_state_keeper; + +pub struct RuntimeStorage { + persistent_state_keeper: PersistentStateKeeper, +} + +impl RuntimeStorage { + pub fn new(block_handle_storage: Arc) -> Self { + Self { + persistent_state_keeper: PersistentStateKeeper::new(block_handle_storage), + } + } + + #[inline(always)] + pub fn persistent_state_keeper(&self) -> &PersistentStateKeeper { + &self.persistent_state_keeper + } +} \ No newline at end of file diff --git a/storage/src/runtime_storage/persistent_state_keeper.rs b/storage/src/runtime_storage/persistent_state_keeper.rs new file mode 100644 index 000000000..24bfc8c77 --- /dev/null +++ b/storage/src/runtime_storage/persistent_state_keeper.rs @@ -0,0 +1,88 @@ +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::Arc; + +use anyhow::Result; +use arc_swap::ArcSwapOption; +use tokio::sync::Notify; + +use crate::models::{BlockHandle, BriefBlockMeta}; +use crate::BlockHandleStorage; +use tycho_block_util::*; + +pub struct PersistentStateKeeper { + block_handle_storage: Arc, + initialized: AtomicBool, + persistent_state_changed: Notify, + current_persistent_state: ArcSwapOption, + last_utime: AtomicU32, +} + +impl PersistentStateKeeper { + pub fn new(block_handle_storage: Arc) -> Self { + Self { + block_handle_storage, + initialized: Default::default(), + persistent_state_changed: Default::default(), + current_persistent_state: Default::default(), + last_utime: Default::default(), + } + } + + pub fn update(&self, block_handle: &Arc) -> Result<()> { + if !self.initialized.load(Ordering::Acquire) { + let prev_persistent_key_block = self + .block_handle_storage + .find_prev_persistent_key_block(block_handle.id().seqno)?; + + if let Some(handle) = &prev_persistent_key_block { + self.last_utime + .store(handle.meta().gen_utime(), Ordering::Release); + } + self.current_persistent_state + .store(prev_persistent_key_block); + + self.initialized.store(true, Ordering::Release); + + self.persistent_state_changed.notify_waiters(); + } + + if !block_handle.is_key_block() { + return Ok(()); + } + + let block_utime = block_handle.meta().gen_utime(); + let prev_utime = self.last_utime(); + + if prev_utime > block_utime { + return Ok(()); + } + + if is_persistent_state(block_utime, prev_utime) { + self.last_utime.store(block_utime, Ordering::Release); + self.current_persistent_state + .store(Some(block_handle.clone())); + self.persistent_state_changed.notify_waiters(); + } + + Ok(()) + } + + pub fn last_utime(&self) -> u32 { + self.last_utime.load(Ordering::Acquire) + } + + pub fn current(&self) -> Option> { + self.current_persistent_state.load_full() + } + + pub fn current_meta(&self) -> Option<(u32, BriefBlockMeta)> { + self.current_persistent_state + .load() + .as_ref() + .map(|handle| (handle.id().seqno, handle.meta().brief())) + } + + pub fn new_state_found(&self) -> tokio::sync::futures::Notified { + self.persistent_state_changed.notified() + } +} diff --git a/storage/src/shard_state_storage/cell_storage.rs b/storage/src/shard_state_storage/cell_storage.rs new file mode 100644 index 000000000..af2c4166a --- /dev/null +++ b/storage/src/shard_state_storage/cell_storage.rs @@ -0,0 +1,728 @@ +use std::cell::UnsafeCell; +use std::collections::hash_map; +use std::mem::{ManuallyDrop, MaybeUninit}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, Weak}; + +use anyhow::{Context, Result}; +use bumpalo::Bump; +use bytes::Bytes; +use everscale_types::cell::*; +use quick_cache::sync::Cache; + +use crate::db::*; +use tycho_block_util::{CacheStats, FastDashMap, FastHashMap, FastHasherState}; + +pub struct CellStorage { + db: Arc, + cells_cache: Arc>>, + raw_cells_cache: RawCellsCache, +} + +impl CellStorage { + pub fn new(db: Arc, cache_size_bytes: u64) -> Result> { + let cells_cache = Default::default(); + let raw_cells_cache = RawCellsCache::new(cache_size_bytes); + + Ok(Arc::new(Self { + db, + cells_cache, + raw_cells_cache, + })) + } + + pub fn store_cell( + &self, + batch: &mut weedb::rocksdb::WriteBatch, + root: Cell, + ) -> Result { + struct CellWithRefs<'a> { + rc: u32, + data: Option<&'a [u8]>, + } + + struct Context<'a> { + cells_cf: &'a weedb::BoundedCfHandle<'a>, + alloc: &'a Bump, + transaction: FastHashMap>, + buffer: Vec, + } + + impl Context<'_> { + fn insert_cell( + &mut self, + key: &HashBytes, + cell: &DynCell, + value: Option, + ) -> Result + where + V: AsRef<[u8]>, + { + Ok(match self.transaction.entry(*key) { + hash_map::Entry::Occupied(mut value) => { + value.get_mut().rc += 1; + false + } + hash_map::Entry::Vacant(entry) => { + let has_value = + matches!(value, Some(value) if refcount::has_value(value.as_ref())); + + let data = if !has_value { + self.buffer.clear(); + if StorageCell::serialize_to(cell, &mut self.buffer).is_err() { + return Err(CellStorageError::InvalidCell); + } + Some(self.alloc.alloc_slice_copy(self.buffer.as_slice()) as &[u8]) + } else { + None + }; + entry.insert(CellWithRefs { rc: 1, data }); + !has_value + } + }) + } + + fn finalize( + mut self, + batch: &mut weedb::rocksdb::WriteBatch, + raw_cache: &RawCellsCache, + ) -> usize { + let total = self.transaction.len(); + for (key, CellWithRefs { rc, data }) in self.transaction { + self.buffer.clear(); + refcount::add_positive_refount(rc, data, &mut self.buffer); + if data.is_some() { + raw_cache.insert(key, &self.buffer); + } + batch.merge_cf(self.cells_cf, key.as_array(), &self.buffer); + } + total + } + } + + // Prepare context and handles + let alloc = Bump::new(); + let cells = &self.db.cells; + let cells_cf = &cells.cf(); + + let mut ctx = Context { + cells_cf, + alloc: &alloc, + transaction: FastHashMap::with_capacity_and_hasher(128, Default::default()), + buffer: Vec::with_capacity(512), + }; + + // Check root cell + { + let key = root.repr_hash(); + match cells.get(key.as_array()) { + Ok(value) => { + if !ctx.insert_cell(key, root.as_ref(), value.as_deref())? { + return Ok(0); + } + } + Err(e) => return Err(CellStorageError::Internal(e)), + } + } + + let mut stack = Vec::with_capacity(16); + stack.push(root.as_ref()); + + // Check other cells + while let Some(current) = stack.pop() { + for cell in current.references() { + let key = cell.repr_hash(); + match cells.get(key.as_array()) { + Ok(value) => { + if !ctx.insert_cell(key, cell, value.as_deref())? { + continue; + } + } + Err(e) => return Err(CellStorageError::Internal(e)), + } + + stack.push(cell); + } + } + + // Clear big chunks of data before finalization + drop(stack); + + // Write transaction to the `WriteBatch` + Ok(ctx.finalize(batch, &self.raw_cells_cache)) + } + + pub fn load_cell( + self: &Arc, + hash: &HashBytes, + ) -> Result, CellStorageError> { + if let Some(cell) = self.cells_cache.get(hash) { + if let Some(cell) = cell.upgrade() { + return Ok(cell); + } + } + + let cell = match self.raw_cells_cache.get_raw(self.db.as_ref(), hash) { + Ok(value) => 'cell: { + if let Some(value) = value { + if let Some(value) = refcount::strip_refcount(&value) { + match StorageCell::deserialize(self.clone(), value) { + Some(cell) => break 'cell Arc::new(cell), + None => return Err(CellStorageError::InvalidCell), + } + } + } + return Err(CellStorageError::CellNotFound); + } + Err(e) => return Err(CellStorageError::Internal(e)), + }; + self.cells_cache.insert(*hash, Arc::downgrade(&cell)); + + Ok(cell) + } + + pub fn remove_cell( + &self, + batch: &mut weedb::rocksdb::WriteBatch, + alloc: &Bump, + hash: &HashBytes, + ) -> Result { + #[derive(Clone, Copy)] + struct CellState<'a> { + rc: i64, + removes: u32, + refs: &'a [HashBytes], + } + + impl<'a> CellState<'a> { + fn remove(&mut self) -> Result, CellStorageError> { + self.removes += 1; + if self.removes as i64 <= self.rc { + Ok(self.next_refs()) + } else { + Err(CellStorageError::CounterMismatch) + } + } + + fn next_refs(&self) -> Option<&'a [HashBytes]> { + if self.rc > self.removes as i64 { + None + } else { + Some(self.refs) + } + } + } + + let cells = &self.db.cells; + let cells_cf = &cells.cf(); + + let mut transaction: FastHashMap<&HashBytes, CellState> = + FastHashMap::with_capacity_and_hasher(128, Default::default()); + let mut buffer = Vec::with_capacity(4); + + let mut stack = Vec::with_capacity(16); + stack.push(hash); + + // While some cells left + while let Some(cell_id) = stack.pop() { + let refs = match transaction.entry(cell_id) { + hash_map::Entry::Occupied(mut v) => v.get_mut().remove()?, + hash_map::Entry::Vacant(v) => { + let rc = match self.db.cells.get(cell_id.as_array()) { + Ok(value) => 'rc: { + if let Some(value) = value { + buffer.clear(); + if let (rc, Some(value)) = refcount::decode_value_with_rc(&value) { + if StorageCell::deserialize_references(value, &mut buffer) { + break 'rc rc; + } else { + return Err(CellStorageError::InvalidCell); + } + } + } + return Err(CellStorageError::CellNotFound); + } + Err(e) => return Err(CellStorageError::Internal(e)), + }; + + v.insert(CellState { + rc, + removes: 1, + refs: alloc.alloc_slice_copy(buffer.as_slice()), + }) + .next_refs() + } + }; + + if let Some(refs) = refs { + // Add all children + for cell_id in refs { + // Unknown cell, push to the stack to process it + stack.push(cell_id); + } + } + } + + // Clear big chunks of data before finalization + drop(stack); + + // Write transaction to the `WriteBatch` + let total = transaction.len(); + for (key, CellState { removes, .. }) in transaction { + batch.merge_cf( + cells_cf, + key.as_slice(), + refcount::encode_negative_refcount(removes), + ); + } + Ok(total) + } + + pub fn drop_cell(&self, hash: &HashBytes) { + self.cells_cache.remove(hash); + } + + pub fn cache_stats(&self) -> CacheStats { + let hits = self.raw_cells_cache.0.hits(); + let misses = self.raw_cells_cache.0.misses(); + let occupied = self.raw_cells_cache.0.len() as u64; + let weight = self.raw_cells_cache.0.weight(); + + let hits_ratio = if hits > 0 { + hits as f64 / (hits + misses) as f64 + } else { + 0.0 + } * 100.0; + CacheStats { + hits, + misses, + requests: hits + misses, + occupied, + hits_ratio, + size_bytes: weight, + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum CellStorageError { + #[error("Cell not found in cell db")] + CellNotFound, + #[error("Invalid cell")] + InvalidCell, + #[error("Cell counter mismatch")] + CounterMismatch, + #[error("Internal rocksdb error")] + Internal(#[source] weedb::rocksdb::Error), +} + +pub struct StorageCell { + cell_storage: Arc, + descriptor: CellDescriptor, + bit_len: u16, + data: Vec, + hashes: Vec<(HashBytes, u16)>, + + reference_states: [AtomicU8; 4], + reference_data: [UnsafeCell; 4], +} + +impl StorageCell { + const REF_EMPTY: u8 = 0x0; + const REF_RUNNING: u8 = 0x1; + const REF_STORAGE: u8 = 0x2; + const REF_REPLACED: u8 = 0x3; + + pub fn deserialize(cell_storage: Arc, buffer: &[u8]) -> Option { + if buffer.len() < 4 { + return None; + } + + let descriptor = CellDescriptor::new([buffer[0], buffer[1]]); + let bit_len = u16::from_le_bytes([buffer[2], buffer[3]]); + let byte_len = descriptor.byte_len() as usize; + let hash_count = descriptor.hash_count() as usize; + let ref_count = descriptor.reference_count() as usize; + + let total_len = 4usize + byte_len + (32 + 2) * hash_count + 32 * ref_count; + if buffer.len() < total_len { + return None; + } + + let data = buffer[4..4 + byte_len].to_vec(); + + let mut hashes = Vec::with_capacity(hash_count); + let mut offset = 4 + byte_len; + for _ in 0..hash_count { + hashes.push(( + HashBytes::from_slice(&buffer[offset..offset + 32]), + u16::from_le_bytes([buffer[offset + 32], buffer[offset + 33]]), + )); + offset += 32 + 2; + } + + let reference_states = Default::default(); + let reference_data = unsafe { + MaybeUninit::<[UnsafeCell; 4]>::uninit().assume_init() + }; + + for slot in reference_data.iter().take(ref_count) { + let slot = slot.get() as *mut u8; + unsafe { std::ptr::copy_nonoverlapping(buffer.as_ptr().add(offset), slot, 32) }; + offset += 32; + } + + Some(Self { + cell_storage, + bit_len, + descriptor, + data, + hashes, + reference_states, + reference_data, + }) + } + + pub fn deserialize_references(data: &[u8], target: &mut Vec) -> bool { + if data.len() < 4 { + return false; + } + + let descriptor = CellDescriptor::new([data[0], data[1]]); + let hash_count = descriptor.hash_count(); + let ref_count = descriptor.reference_count() as usize; + + let mut offset = 4usize + descriptor.byte_len() as usize + (32 + 2) * hash_count as usize; + if data.len() < offset + 32 * ref_count { + return false; + } + + target.reserve(ref_count); + for _ in 0..ref_count { + target.push(HashBytes::from_slice(&data[offset..offset + 32])); + offset += 32; + } + + true + } + + pub fn serialize_to(cell: &DynCell, target: &mut Vec) -> Result<()> { + let descriptor = cell.descriptor(); + let hash_count = descriptor.hash_count(); + let ref_count = descriptor.reference_count(); + + target.reserve( + 4usize + + descriptor.byte_len() as usize + + (32 + 2) * hash_count as usize + + 32 * ref_count as usize, + ); + + target.extend_from_slice(&[descriptor.d1, descriptor.d2]); + target.extend_from_slice(&cell.bit_len().to_le_bytes()); + target.extend_from_slice(cell.data()); + + for i in 0..descriptor.hash_count() { + target.extend_from_slice(cell.hash(i).as_array()); + target.extend_from_slice(&cell.depth(i).to_le_bytes()); + } + + for i in 0..descriptor.reference_count() { + let cell = cell.reference(i).context("Child not found")?; + target.extend_from_slice(cell.repr_hash().as_array()); + } + + Ok(()) + } + + pub fn reference_raw(&self, index: u8) -> Option<&Arc> { + if index > 3 || index >= self.descriptor.reference_count() { + return None; + } + + let state = &self.reference_states[index as usize]; + let slot = self.reference_data[index as usize].get(); + + let current_state = state.load(Ordering::Acquire); + if current_state == Self::REF_STORAGE { + return Some(unsafe { &(*slot).storage_cell }); + } + + let mut res = Ok(()); + Self::initialize_inner(state, &mut || match self + .cell_storage + .load_cell(unsafe { &(*slot).hash }) + { + Ok(cell) => unsafe { + *slot = StorageCellReferenceData { + storage_cell: ManuallyDrop::new(cell), + }; + true + }, + Err(err) => { + res = Err(err); + false + } + }); + + // TODO: just return none? + res.unwrap(); + + Some(unsafe { &(*slot).storage_cell }) + } + + // Note: this is intentionally monomorphic + #[inline(never)] + fn initialize_inner(state: &AtomicU8, init: &mut dyn FnMut() -> bool) { + struct Guard<'a> { + state: &'a AtomicU8, + new_state: u8, + } + + impl<'a> Drop for Guard<'a> { + fn drop(&mut self) { + self.state.store(self.new_state, Ordering::Release); + unsafe { + let key = self.state as *const AtomicU8 as usize; + parking_lot_core::unpark_all(key, parking_lot_core::DEFAULT_UNPARK_TOKEN); + } + } + } + + loop { + let exchange = state.compare_exchange_weak( + Self::REF_EMPTY, + Self::REF_RUNNING, + Ordering::Acquire, + Ordering::Acquire, + ); + match exchange { + Ok(_) => { + let mut guard = Guard { + state, + new_state: Self::REF_EMPTY, + }; + if init() { + guard.new_state = Self::REF_STORAGE; + } + return; + } + Err(Self::REF_STORAGE) => return, + Err(Self::REF_RUNNING) => unsafe { + let key = state as *const AtomicU8 as usize; + parking_lot_core::park( + key, + || state.load(Ordering::Relaxed) == Self::REF_RUNNING, + || (), + |_, _| (), + parking_lot_core::DEFAULT_PARK_TOKEN, + None, + ); + }, + Err(Self::REF_EMPTY) => (), + Err(_) => debug_assert!(false), + } + } + } +} + +impl CellImpl for StorageCell { + fn descriptor(&self) -> CellDescriptor { + self.descriptor + } + + fn data(&self) -> &[u8] { + &self.data + } + + fn bit_len(&self) -> u16 { + self.bit_len + } + + fn reference(&self, index: u8) -> Option<&DynCell> { + Some(self.reference_raw(index)?.as_ref()) + } + + fn reference_cloned(&self, index: u8) -> Option { + Some(Cell::from(self.reference_raw(index)?.clone() as Arc<_>)) + } + + fn virtualize(&self) -> &DynCell { + VirtualCellWrapper::wrap(self) + } + + fn hash(&self, level: u8) -> &HashBytes { + let i = self.descriptor.level_mask().hash_index(level); + &self.hashes[i as usize].0 + } + + fn depth(&self, level: u8) -> u16 { + let i = self.descriptor.level_mask().hash_index(level); + self.hashes[i as usize].1 + } + + fn take_first_child(&mut self) -> Option { + let state = self.reference_states[0].swap(Self::REF_EMPTY, Ordering::AcqRel); + let data = self.reference_data[0].get_mut(); + match state { + Self::REF_STORAGE => Some(unsafe { data.take_storage_cell() }), + Self::REF_REPLACED => Some(unsafe { data.take_replaced_cell() }), + _ => None, + } + } + + fn replace_first_child(&mut self, parent: Cell) -> std::result::Result { + let state = self.reference_states[0].load(Ordering::Acquire); + if state < Self::REF_STORAGE { + return Err(parent); + } + + self.reference_states[0].store(Self::REF_REPLACED, Ordering::Release); + let data = self.reference_data[0].get_mut(); + + let cell = match state { + Self::REF_STORAGE => unsafe { data.take_storage_cell() }, + Self::REF_REPLACED => unsafe { data.take_replaced_cell() }, + _ => return Err(parent), + }; + data.replaced_cell = ManuallyDrop::new(parent); + Ok(cell) + } + + fn take_next_child(&mut self) -> Option { + while self.descriptor.reference_count() > 1 { + self.descriptor.d1 -= 1; + let idx = (self.descriptor.d1 & CellDescriptor::REF_COUNT_MASK) as usize; + + let state = self.reference_states[idx].swap(Self::REF_EMPTY, Ordering::AcqRel); + let data = self.reference_data[idx].get_mut(); + + return Some(match state { + Self::REF_STORAGE => unsafe { data.take_storage_cell() }, + Self::REF_REPLACED => unsafe { data.take_replaced_cell() }, + _ => continue, + }); + } + + None + } +} + +impl Drop for StorageCell { + fn drop(&mut self) { + println!("DROPPING"); + self.cell_storage.drop_cell(DynCell::repr_hash(self)); + for i in 0..4 { + let state = self.reference_states[i].load(Ordering::Acquire); + let data = self.reference_data[i].get_mut(); + + unsafe { + match state { + Self::REF_STORAGE => ManuallyDrop::drop(&mut data.storage_cell), + Self::REF_REPLACED => ManuallyDrop::drop(&mut data.replaced_cell), + _ => {} + } + } + } + } +} + +unsafe impl Send for StorageCell {} +unsafe impl Sync for StorageCell {} + +pub union StorageCellReferenceData { + /// Incplmete state. + hash: HashBytes, + /// Complete state. + storage_cell: ManuallyDrop>, + /// Replaced state. + replaced_cell: ManuallyDrop, +} + +impl StorageCellReferenceData { + unsafe fn take_storage_cell(&mut self) -> Cell { + Cell::from(ManuallyDrop::take(&mut self.storage_cell) as Arc<_>) + } + + unsafe fn take_replaced_cell(&mut self) -> Cell { + ManuallyDrop::take(&mut self.replaced_cell) + } +} + +struct RawCellsCache(Cache); + +#[derive(Clone, Copy)] +struct CellSizeEstimator; +impl quick_cache::Weighter for CellSizeEstimator { + fn weight(&self, _: &HashBytes, val: &Bytes) -> u32 { + const BYTES_SIZE: usize = std::mem::size_of::() * 4; + let len = 32 + val.len() + BYTES_SIZE; + + len as u32 + } +} + +impl RawCellsCache { + fn new(size_in_bytes: u64) -> Self { + // Percentile 0.1% from 96 to 127 => 1725119 count + // Percentile 10% from 128 to 191 => 82838849 count + // Percentile 25% from 128 to 191 => 82838849 count + // Percentile 50% from 128 to 191 => 82838849 count + // Percentile 75% from 128 to 191 => 82838849 count + // Percentile 90% from 192 to 255 => 22775080 count + // Percentile 95% from 192 to 255 => 22775080 count + // Percentile 99% from 192 to 255 => 22775080 count + // Percentile 99.9% from 256 to 383 => 484002 count + // Percentile 99.99% from 256 to 383 => 484002 count + // Percentile 99.999% from 256 to 383 => 484002 count + + // from 64 to 95 - 15_267 + // from 96 to 127 - 1_725_119 + // from 128 to 191 - 82_838_849 + // from 192 to 255 - 22_775_080 + // from 256 to 383 - 484_002 + + // we assume that 75% of cells are in range 128..191 + // so we can use use 192 as size for value in cache + + const MAX_CELL_SIZE: u64 = 192; + const KEY_SIZE: u64 = 32; + + let estimated_cell_cache_capacity = size_in_bytes / (KEY_SIZE + MAX_CELL_SIZE); + tracing::info!( + estimated_cell_cache_capacity, + max_cell_cache_size = %bytesize::ByteSize(size_in_bytes), + ); + + let raw_cache = Cache::with( + estimated_cell_cache_capacity as usize, + size_in_bytes, + CellSizeEstimator, + FastHasherState::default(), + Default::default(), + ); + + Self(raw_cache) + } + + fn get_raw(&self, db: &Db, key: &HashBytes) -> Result, weedb::rocksdb::Error> { + if let Some(value) = self.0.get(key) { + return Ok(Some(value)); + } + + let value = db + .cells + .get(key.as_array())? + .map(|v| Bytes::copy_from_slice(v.as_ref())); + if let Some(value) = &value { + self.0.insert(*key, value.clone()); + } + + Ok(value) + } + + pub fn insert(&self, key: HashBytes, value: &[u8]) { + let value = Bytes::copy_from_slice(value); + self.0.insert(key, value); + } +} diff --git a/storage/src/shard_state_storage/cell_writer.rs b/storage/src/shard_state_storage/cell_writer.rs new file mode 100644 index 000000000..36fb131ea --- /dev/null +++ b/storage/src/shard_state_storage/cell_writer.rs @@ -0,0 +1,430 @@ +use std::collections::hash_map; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use smallvec::SmallVec; + +use crate::db::Db; +use tycho_block_util::FastHashMap; + +pub struct CellWriter<'a> { + db: &'a Db, + base_path: &'a Path, +} + +impl<'a> CellWriter<'a> { + #[allow(unused)] + pub fn new(db: &'a Db, base_path: &'a Path) -> Self { + Self { db, base_path } + } + + #[allow(unused)] + pub fn write(&self, root_hash: &[u8; 32]) -> Result<()> { + // Open target file in advance to get the error immediately (if any) + let file_path = self.base_path.join(hex::encode(root_hash)); + let file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(file_path) + .context("Failed to create target file")?; + + // Load cells from db in reverse order into the temp file + tracing::info!("started loading cells"); + let mut intermediate = write_rev_cells(self.db, self.base_path, root_hash) + .context("Failed to write reversed cells data")?; + tracing::info!("finished loading cells"); + let cell_count = intermediate.cell_sizes.len() as u32; + + // Compute offset type size (usually 4 bytes) + let offset_size = + std::cmp::min(number_of_bytes_to_fit(intermediate.total_size), 8) as usize; + + // Reserve space for the file + alloc_file( + &file, + 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize), + )?; + + // Write cells data in BOC format + let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file); + + // Header | current len: 0 + let flags = 0b1000_0000u8 | (REF_SIZE as u8); + buffer.write_all(&[0xb5, 0xee, 0x9c, 0x72, flags, offset_size as u8])?; + + // Unique cell count | current len: 6 + buffer.write_all(&cell_count.to_be_bytes())?; + + // Root count | current len: 10 + buffer.write_all(&1u32.to_be_bytes())?; + + // Absent cell count | current len: 14 + buffer.write_all(&[0, 0, 0, 0])?; + + // Total cell size | current len: 18 + buffer.write_all(&intermediate.total_size.to_be_bytes()[(8 - offset_size)..8])?; + + // Root index | current len: 18 + offset_size + buffer.write_all(&[0, 0, 0, 0])?; + + // Cells index | current len: 22 + offset_size + tracing::info!("started building index"); + { + let mut next_offset = 0; + for &cell_size in intermediate.cell_sizes.iter().rev() { + next_offset += cell_size as u64; + buffer.write_all(&next_offset.to_be_bytes()[(8 - offset_size)..8])?; + } + } + tracing::info!("finished building index"); + + // Cells | current len: 22 + offset_size * (1 + cell_sizes.len()) + let mut cell_buffer = [0; 2 + 128 + 4 * REF_SIZE]; + for &cell_size in intermediate.cell_sizes.iter().rev() { + intermediate.total_size -= cell_size as u64; + intermediate + .file + .seek(SeekFrom::Start(intermediate.total_size))?; + intermediate + .file + .read_exact(&mut cell_buffer[..cell_size as usize])?; + + let d1 = cell_buffer[0]; + let d2 = cell_buffer[1]; + let ref_count = (d1 & 7) as usize; + let data_size = ((d2 >> 1) + (d2 & 1 != 0) as u8) as usize; + + let ref_offset = 2 + data_size; + for r in 0..ref_count { + let ref_offset = ref_offset + r * REF_SIZE; + let slice = &mut cell_buffer[ref_offset..ref_offset + REF_SIZE]; + + let index = u32::from_be_bytes(slice.try_into().unwrap()); + slice.copy_from_slice(&(cell_count - index - 1).to_be_bytes()); + } + + buffer.write_all(&cell_buffer[..cell_size as usize])?; + } + + buffer.flush()?; + + Ok(()) + } +} + +struct IntermediateState { + file: File, + cell_sizes: Vec, + total_size: u64, + _remove_on_drop: RemoveOnDrop, +} + +fn write_rev_cells>( + db: &Db, + base_path: P, + root_hash: &[u8; 32], +) -> Result { + enum StackItem { + New([u8; 32]), + Loaded(LoadedCell), + } + + struct LoadedCell { + hash: [u8; 32], + d1: u8, + d2: u8, + data: SmallVec<[u8; 128]>, + indices: SmallVec<[u32; 4]>, + } + + let file_path = base_path + .as_ref() + .join(hex::encode(root_hash)) + .with_extension("temp"); + + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&file_path) + .context("Failed to create temp file")?; + let remove_on_drop = RemoveOnDrop(file_path); + + let raw = db.raw().as_ref(); + let read_options = db.cells.read_config(); + let cf = db.cells.cf(); + + let mut references_buffer = SmallVec::<[[u8; 32]; 4]>::with_capacity(4); + + let mut indices = FastHashMap::default(); + let mut remap = FastHashMap::default(); + let mut cell_sizes = Vec::::with_capacity(FILE_BUFFER_LEN); + let mut stack = Vec::with_capacity(32); + + let mut total_size = 0u64; + let mut iteration = 0u32; + let mut remap_index = 0u32; + + stack.push((iteration, StackItem::New(*root_hash))); + indices.insert(*root_hash, (iteration, false)); + + let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file); + + while let Some((index, data)) = stack.pop() { + match data { + StackItem::New(hash) => { + let value = raw + .get_pinned_cf_opt(&cf, hash, read_options)? + .ok_or(CellWriterError::CellNotFound)?; + + let value = value.as_ref(); + if value.is_empty() { + return Err(CellWriterError::InvalidCell.into()); + } + + let (d1, d2, data) = deserialize_cell(&value[1..], &mut references_buffer) + .ok_or(CellWriterError::InvalidCell)?; + + let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); + + let mut indices_buffer = [0; 4]; + let mut keys = [std::ptr::null(); 4]; + let mut preload_count = 0; + + for hash in &references_buffer { + let index = match indices.entry(*hash) { + hash_map::Entry::Vacant(entry) => { + remap_index += 1; + + entry.insert((remap_index, false)); + + indices_buffer[preload_count] = remap_index; + keys[preload_count] = hash.as_ptr(); + preload_count += 1; + + remap_index + } + hash_map::Entry::Occupied(entry) => { + let (remap_index, written) = *entry.get(); + if !written { + indices_buffer[preload_count] = remap_index; + keys[preload_count] = hash.as_ptr(); + preload_count += 1; + } + remap_index + } + }; + + reference_indices.push(index); + } + + stack.push(( + index, + StackItem::Loaded(LoadedCell { + hash, + d1, + d2, + data: SmallVec::from_slice(data), + indices: reference_indices, + }), + )); + + if preload_count > 0 { + indices_buffer[..preload_count].reverse(); + keys[..preload_count].reverse(); + + for i in 0..preload_count { + let index = indices_buffer[i]; + let hash = unsafe { *(keys[i] as *const [u8; 32]) }; + stack.push((index, StackItem::New(hash))); + } + } + + references_buffer.clear(); + } + StackItem::Loaded(loaded) => { + match remap.entry(index) { + hash_map::Entry::Vacant(entry) => { + entry.insert(iteration.to_be_bytes()); + } + hash_map::Entry::Occupied(_) => continue, + }; + + if let Some((_, written)) = indices.get_mut(&loaded.hash) { + *written = true; + } + + iteration += 1; + if iteration % 100000 == 0 { + tracing::info!(iteration); + } + + let cell_size = 2 + loaded.data.len() + loaded.indices.len() * REF_SIZE; + cell_sizes.push(cell_size as u8); + total_size += cell_size as u64; + + temp_file_buffer.write_all(&[loaded.d1, loaded.d2])?; + temp_file_buffer.write_all(&loaded.data)?; + for index in loaded.indices { + let index = remap.get(&index).with_context(|| { + format!("Child not found. Iteration {iteration}. Child {index}") + })?; + temp_file_buffer.write_all(index)?; + } + } + } + } + + let mut file = temp_file_buffer.into_inner()?; + file.flush()?; + + Ok(IntermediateState { + file, + cell_sizes, + total_size, + _remove_on_drop: remove_on_drop, + }) +} + +fn deserialize_cell<'a>( + value: &'a [u8], + references_buffer: &mut SmallVec<[[u8; 32]; 4]>, +) -> Option<(u8, u8, &'a [u8])> { + let mut index = Index { + value_len: value.len(), + offset: 0, + }; + + index.require(3)?; + let cell_type = value[*index]; + index.advance(1); + let bit_length = u16::from_le_bytes((&value[*index..*index + 2]).try_into().unwrap()); + index.advance(2); + + let d2 = (((bit_length >> 2) as u8) & !0b1) | ((bit_length % 8 != 0) as u8); + + // TODO: Replace with `(big_length + 7) / 8` + let data_len = ((d2 >> 1) + u8::from(d2 & 1 != 0)) as usize; + index.require(data_len)?; + let data = &value[*index..*index + data_len]; + + // NOTE: additional byte is required here due to internal structure + index.advance(((bit_length + 8) / 8) as usize); + + index.require(1)?; + let level_mask = value[*index]; + // skip store_hashes + index.advance(2); + + index.require(2)?; + let has_hashes = value[*index]; + index.advance(1); + if has_hashes != 0 { + let count = value[*index]; + index.advance(1 + (count * 32) as usize); + } + + index.require(2)?; + let has_depths = value[*index]; + index.advance(1); + if has_depths != 0 { + let count = value[*index]; + index.advance(1 + (count * 2) as usize); + } + + index.require(1)?; + let reference_count = value[*index]; + index.advance(1); + + let d1 = reference_count | (((cell_type != 0x01) as u8) << 3) | (level_mask << 5); + + for _ in 0..reference_count { + index.require(32)?; + let mut hash = [0; 32]; + hash.copy_from_slice(&value[*index..*index + 32]); + references_buffer.push(hash); + index.advance(32); + } + + Some((d1, d2, data)) +} + +#[cfg(not(target_os = "macos"))] +fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len as i64) }; + if res == 0 { + Ok(()) + } else { + Err(std::io::Error::last_os_error()) + } +} + +#[cfg(target_os = "macos")] +pub fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::ftruncate(file.as_raw_fd(), len as i64) }; + if res < 0 { + Err(std::io::Error::last_os_error()) + } else { + Ok(()) + } +} + +fn number_of_bytes_to_fit(l: u64) -> u32 { + 8 - l.leading_zeros() / 8 +} + +struct RemoveOnDrop(PathBuf); + +impl Drop for RemoveOnDrop { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(&self.0) { + tracing::error!(path = %self.0.display(), "failed to remove file: {e:?}"); + } + } +} + +struct Index { + value_len: usize, + offset: usize, +} + +impl Index { + #[inline(always)] + fn require(&self, len: usize) -> Option<()> { + if self.offset + len < self.value_len { + Some(()) + } else { + None + } + } + + #[inline(always)] + fn advance(&mut self, bytes: usize) { + self.offset += bytes; + } +} + +impl std::ops::Deref for Index { + type Target = usize; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.offset + } +} + +const REF_SIZE: usize = std::mem::size_of::(); +const FILE_BUFFER_LEN: usize = 128 * 1024 * 1024; // 128 MB + +#[derive(thiserror::Error, Debug)] +enum CellWriterError { + #[error("Cell not found in cell db")] + CellNotFound, + #[error("Invalid cell")] + InvalidCell, +} diff --git a/storage/src/shard_state_storage/entries_buffer.rs b/storage/src/shard_state_storage/entries_buffer.rs new file mode 100644 index 000000000..5347ad5fe --- /dev/null +++ b/storage/src/shard_state_storage/entries_buffer.rs @@ -0,0 +1,182 @@ +use everscale_types::cell::{CellType, LevelMask}; + +pub struct EntriesBuffer(Box<[[u8; HashesEntry::LEN]; 5]>); + +impl EntriesBuffer { + pub fn new() -> Self { + Self(Box::new([[0; HashesEntry::LEN]; 5])) + } + + pub fn current_entry_buffer(&mut self) -> &mut [u8; HashesEntry::LEN] { + &mut self.0[0] + } + + pub fn iter_child_buffers( + &mut self, + ) -> impl Iterator::LEN]> { + self.0.iter_mut().skip(1) + } + + pub fn split_children<'a, 'b>( + &'a mut self, + references: &'b [u32], + ) -> (HashesEntryWriter<'a>, EntriesBufferChildren<'b>) + where + 'a: 'b, + { + let [first, tail @ ..] = &mut *self.0; + ( + HashesEntryWriter(first), + EntriesBufferChildren(references, tail), + ) + } + + pub fn repr_hash(&self) -> &[u8; 32] { + let [first, ..] = &*self.0; + HashesEntry(first).hash(3) + } +} + +pub struct EntriesBufferChildren<'a>(&'a [u32], &'a [[u8; HashesEntry::LEN]]); + +impl EntriesBufferChildren<'_> { + pub fn iter(&self) -> impl Iterator { + self.0 + .iter() + .zip(self.1) + .map(|(index, item)| (index, HashesEntry(item))) + } +} + +pub struct HashesEntryWriter<'a>(&'a mut [u8; HashesEntry::LEN]); + +impl HashesEntryWriter<'_> { + pub fn as_reader(&self) -> HashesEntry { + HashesEntry(self.0) + } + + pub fn clear(&mut self) { + for byte in &mut *self.0 { + *byte = 0; + } + } + + pub fn set_level_mask(&mut self, level_mask: LevelMask) { + self.0[0] = level_mask.into(); + } + + pub fn set_cell_type(&mut self, cell_type: CellType) { + self.0[1] = cell_type.into(); + } + + pub fn set_tree_bits_count(&mut self, count: u64) { + self.0[4..12].copy_from_slice(&count.to_le_bytes()); + } + + pub fn set_tree_cell_count(&mut self, count: u64) { + self.0[12..20].copy_from_slice(&count.to_le_bytes()); + } + + pub fn get_tree_counters(&mut self) -> &[u8] { + &self.0[4..20] + } + + pub fn set_hash(&mut self, i: u8, hash: &[u8]) { + self.get_hash_slice(i).copy_from_slice(hash); + } + + pub fn get_hash_slice(&mut self, i: u8) -> &mut [u8; 32] { + let offset = HashesEntry::HASHES_OFFSET + 32 * i as usize; + unsafe { &mut *(self.0.as_mut_ptr().add(offset) as *mut _) } + } + + pub fn set_depth(&mut self, i: u8, depth: u16) { + self.get_depth_slice(i) + .copy_from_slice(&depth.to_le_bytes()); + } + + pub fn get_depth_slice(&mut self, i: u8) -> &mut [u8; 2] { + let offset = HashesEntry::DEPTHS_OFFSET + 2 * i as usize; + unsafe { &mut *(self.0.as_mut_ptr().add(offset) as *mut _) } + } +} + +pub struct HashesEntry<'a>(&'a [u8; HashesEntry::LEN]); + +impl<'a> HashesEntry<'a> { + // 4 bytes - info (1 byte level mask, 1 byte cell type, 2 bytes padding) + // 8 bytes - tree bits count + // 8 bytes - cell count + // 32 * 4 bytes - hashes + // 2 * 4 bytes - depths + pub const LEN: usize = 4 + 8 + 8 + 32 * 4 + 2 * 4; + pub const HASHES_OFFSET: usize = 4 + 8 + 8; + pub const DEPTHS_OFFSET: usize = 4 + 8 + 8 + 32 * 4; + + pub fn level_mask(&self) -> LevelMask { + // SAFETY: loaded from `set_level_mask` + unsafe { LevelMask::new_unchecked(self.0[0]) } + } + + pub fn cell_type(&self) -> CellType { + match self.0[1] { + 1 => CellType::PrunedBranch, + 2 => CellType::LibraryReference, + 3 => CellType::MerkleProof, + 4 => CellType::MerkleUpdate, + _ => CellType::Ordinary, + } + } + + pub fn tree_bits_count(&self) -> u64 { + u64::from_le_bytes(self.0[4..12].try_into().unwrap()) + } + + pub fn tree_cell_count(&self) -> u64 { + u64::from_le_bytes(self.0[12..20].try_into().unwrap()) + } + + pub fn hash(&self, n: u8) -> &'a [u8; 32] { + let offset = Self::HASHES_OFFSET + 32 * self.level_mask().hash_index(n) as usize; + unsafe { &*(self.0.as_ptr().add(offset) as *const _) } + } + + pub fn depth(&self, n: u8) -> u16 { + let offset = Self::DEPTHS_OFFSET + 2 * self.level_mask().hash_index(n) as usize; + u16::from_le_bytes([self.0[offset], self.0[offset + 1]]) + } + + pub fn pruned_branch_hash<'b>(&self, n: u8, data: &'b [u8]) -> Option<&'b [u8; 32]> + where + 'a: 'b, + { + let level_mask = self.level_mask(); + let index = level_mask.hash_index(n) as usize; + let level = level_mask.level() as usize; + + Some(if index == level { + let offset = Self::HASHES_OFFSET; + unsafe { &*(self.0.as_ptr().add(offset) as *const _) } + } else { + let offset = 1 + 1 + index * 32; + if data.len() < offset + 32 { + return None; + } + unsafe { &*(data.as_ptr().add(offset) as *const _) } + }) + } + + pub fn pruned_branch_depth(&self, n: u8, data: &[u8]) -> u16 { + let level_mask = self.level_mask(); + let index = level_mask.hash_index(n) as usize; + let level = level_mask.level() as usize; + + if index == level { + let offset = Self::DEPTHS_OFFSET; + u16::from_le_bytes([self.0[offset], self.0[offset + 1]]) + } else { + let offset = 1 + 1 + level * 32 + index * 2; + u16::from_be_bytes([data[offset], data[offset + 1]]) + } + } +} \ No newline at end of file diff --git a/storage/src/shard_state_storage/files_context.rs b/storage/src/shard_state_storage/files_context.rs new file mode 100644 index 000000000..846b863aa --- /dev/null +++ b/storage/src/shard_state_storage/files_context.rs @@ -0,0 +1,89 @@ +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use everscale_types::models::*; +use tokio::fs::File; +use tokio::io::{AsyncWriteExt, BufWriter}; + +use tycho_block_util::MappedFile; + +pub struct FilesContext { + cells_path: PathBuf, + cells_file: Option>, + hashes_path: PathBuf, +} + +impl FilesContext { + pub async fn new

(downloads_dir: P, block_id: &BlockId) -> Result + where + P: AsRef, + { + let block_id = format!( + "({},{:016x},{})", + block_id.shard.workchain(), + block_id.shard.prefix(), + block_id.seqno + ); + + let cells_path = downloads_dir + .as_ref() + .join(format!("state_cells_{block_id}")); + let hashes_path = downloads_dir + .as_ref() + .join(format!("state_hashes_{block_id}")); + + let cells_file = Some(BufWriter::new( + tokio::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .read(true) + .open(&cells_path) + .await + .context("Failed to create cells file")?, + )); + + Ok(Self { + cells_path, + cells_file, + hashes_path, + }) + } + + pub async fn clear(self) -> Result<()> { + tokio::fs::remove_file(self.cells_path).await?; + tokio::fs::remove_file(self.hashes_path).await?; + Ok(()) + } + + pub fn cells_file(&mut self) -> Result<&mut BufWriter> { + match &mut self.cells_file { + Some(file) => Ok(file), + None => Err(FilesContextError::AlreadyFinalized.into()), + } + } + + pub fn create_mapped_hashes_file(&self, length: usize) -> Result { + let mapped_file = MappedFile::new(&self.hashes_path, length)?; + Ok(mapped_file) + } + + pub async fn create_mapped_cells_file(&mut self) -> Result { + let file = match self.cells_file.take() { + Some(mut file) => { + file.flush().await?; + file.into_inner().into_std().await + } + None => return Err(FilesContextError::AlreadyFinalized.into()), + }; + + let mapped_file = MappedFile::from_existing_file(file)?; + Ok(mapped_file) + } +} + +#[derive(thiserror::Error, Debug)] +enum FilesContextError { + #[error("Already finalized")] + AlreadyFinalized, +} diff --git a/storage/src/shard_state_storage/mod.rs b/storage/src/shard_state_storage/mod.rs new file mode 100644 index 000000000..ec3c45c9b --- /dev/null +++ b/storage/src/shard_state_storage/mod.rs @@ -0,0 +1,378 @@ +use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Instant; + +use anyhow::{Context, Result}; +use everscale_types::models::*; +use everscale_types::prelude::{Cell, HashBytes}; + +use self::cell_storage::*; +use self::files_context::FilesContext; +use self::replace_transaction::ShardStateReplaceTransaction; +use super::{models::BlockHandle, BlockHandleStorage}; + +use crate::block_storage::BlockStorage; +use crate::db::*; +use tycho_block_util::*; + +mod cell_storage; +mod cell_writer; +mod entries_buffer; +mod files_context; +mod replace_transaction; +mod shard_state_reader; + +pub struct ShardStateStorage { + db: Arc, + + block_handle_storage: Arc, + block_storage: Arc, + cell_storage: Arc, + downloads_dir: Arc, + + gc_lock: tokio::sync::RwLock<()>, + min_ref_mc_state: Arc, + max_new_mc_cell_count: AtomicUsize, + max_new_sc_cell_count: AtomicUsize, +} + +impl ShardStateStorage { + pub async fn new( + db: Arc, + block_handle_storage: Arc, + block_storage: Arc, + file_db_path: PathBuf, + cache_size_bytes: u64, + ) -> Result { + let downloads_dir = prepare_file_db_dir(file_db_path, "downloads").await?; + + let cell_storage = CellStorage::new(db.clone(), cache_size_bytes)?; + + let res = Self { + db, + block_handle_storage, + block_storage, + cell_storage, + downloads_dir, + gc_lock: Default::default(), + min_ref_mc_state: Arc::new(Default::default()), + max_new_mc_cell_count: AtomicUsize::new(0), + max_new_sc_cell_count: AtomicUsize::new(0), + }; + + // Done + Ok(res) + } + + pub fn metrics(&self) -> ShardStateStorageMetrics { + #[cfg(feature = "count-cells")] + let storage_cell = countme::get::(); + + ShardStateStorageMetrics { + #[cfg(feature = "count-cells")] + storage_cell_live_count: storage_cell.live, + #[cfg(feature = "count-cells")] + storage_cell_max_live_count: storage_cell.max_live, + max_new_mc_cell_count: self.max_new_mc_cell_count.swap(0, Ordering::AcqRel), + max_new_sc_cell_count: self.max_new_sc_cell_count.swap(0, Ordering::AcqRel), + } + } + + pub fn cache_metrics(&self) -> CacheStats { + self.cell_storage.cache_stats() + } + + pub fn min_ref_mc_state(&self) -> &Arc { + &self.min_ref_mc_state + } + + pub async fn store_state( + &self, + handle: &Arc, + state: &ShardStateStuff, + ) -> Result { + if handle.id() != state.block_id() { + return Err(ShardStateStorageError::BlockHandleIdMismatch.into()); + } + + if handle.meta().has_state() { + return Ok(false); + } + + let block_id = handle.id(); + let cell_id = state.root_cell().repr_hash(); + + let mut batch = weedb::rocksdb::WriteBatch::default(); + + let _gc_lock = self.gc_lock.read().await; + + let len = self + .cell_storage + .store_cell(&mut batch, state.root_cell().clone())?; + + if block_id.shard.is_masterchain() { + self.max_new_mc_cell_count.fetch_max(len, Ordering::Release); + } else { + self.max_new_sc_cell_count.fetch_max(len, Ordering::Release); + } + + let mut value = [0; 32 * 3]; + value[..32].copy_from_slice(cell_id.as_slice()); + value[32..64].copy_from_slice(block_id.root_hash.as_slice()); + value[64..96].copy_from_slice(block_id.file_hash.as_slice()); + + batch.put_cf( + &self.db.shard_states.cf(), + BlockIdShort { + shard: block_id.shard, + seqno: block_id.seqno, + } + .to_vec(), + value, + ); + + self.db.raw().write(batch)?; + + Ok(if handle.meta().set_has_state() { + self.block_handle_storage.store_handle(handle)?; + true + } else { + false + }) + } + + pub async fn load_state(&self, block_id: &BlockId) -> Result> { + let cell_id = self.load_state_root(block_id.as_short_id())?; + let cell = self.cell_storage.load_cell(&cell_id)?; + + ShardStateStuff::new( + *block_id, + Cell::from(cell as Arc<_>), + &self.min_ref_mc_state, + ) + .map(Arc::new) + } + + pub async fn begin_replace( + &'_ self, + block_id: &BlockId, + ) -> Result<(ShardStateReplaceTransaction<'_>, FilesContext)> { + let ctx = FilesContext::new(self.downloads_dir.as_ref(), block_id).await?; + + Ok(( + ShardStateReplaceTransaction::new(&self.db, &self.cell_storage, &self.min_ref_mc_state), + ctx, + )) + } + + pub async fn remove_outdated_states(&self, mc_seqno: u32) -> Result { + let _compaction_guard = self.db.delay_compaction().await; + + // Compute recent block ids for the specified masterchain seqno + let top_blocks = self + .compute_recent_blocks(mc_seqno) + .await? + .context("Recent blocks edge not found")?; + + tracing::info!( + block_id = %top_blocks.mc_block, + "starting shard states GC", + ); + let instant = Instant::now(); + + let raw = self.db.raw(); + + // Manually get required column factory and r/w options + let snapshot = raw.snapshot(); + let shard_states_cf = self.db.shard_states.get_unbounded_cf(); + let mut states_read_options = self.db.shard_states.new_read_config(); + states_read_options.set_snapshot(&snapshot); + + let cells_write_options = self.db.cells.write_config(); + + let mut alloc = bumpalo::Bump::new(); + + // Create iterator + let mut iter = raw.raw_iterator_cf_opt(&shard_states_cf.bound(), states_read_options); + iter.seek_to_first(); + + // Iterate all states and remove outdated + let mut removed_states = 0usize; + let mut removed_cells = 0usize; + loop { + let (key, value) = match iter.item() { + Some(item) => item, + None => match iter.status() { + Ok(()) => break, + Err(e) => return Err(e.into()), + }, + }; + + let block_id = BlockIdShort::deserialize(&mut std::convert::identity(key))?; + let root_hash = HashBytes::wrap(value.try_into().expect("invalid value")); + + // Skip blocks from zero state and top blocks + if block_id.seqno == 0 + || top_blocks.contains_shard_seqno(&block_id.shard, block_id.seqno) + { + iter.next(); + continue; + } + + alloc.reset(); + let mut batch = weedb::rocksdb::WriteBatch::default(); + { + let _guard = self.gc_lock.write().await; + let total = self + .cell_storage + .remove_cell(&mut batch, &alloc, root_hash)?; + batch.delete_cf(&shard_states_cf.bound(), key); + raw.write_opt(batch, cells_write_options)?; + + removed_cells += total; + tracing::debug!( + removed_cells = total, + %block_id, + ); + } + + removed_states += 1; + iter.next(); + } + + // Done + tracing::info!( + removed_states, + removed_cells, + block_id = %top_blocks.mc_block, + elapsed_sec = instant.elapsed().as_secs_f64(), + "finished shard states GC", + ); + Ok(top_blocks) + } + + /// Searches for an edge with the least referenced masterchain block + /// + /// Returns `None` if all states are recent enough + pub async fn compute_recent_blocks(&self, mut mc_seqno: u32) -> Result> { + // 0. Adjust masterchain seqno with minimal referenced masterchain state + if let Some(min_ref_mc_seqno) = self.min_ref_mc_state.seqno() { + if min_ref_mc_seqno < mc_seqno { + mc_seqno = min_ref_mc_seqno; + } + } + + // 1. Find target block + + // Find block id using states table + let mc_block_id = match self + .find_mc_block_id(mc_seqno) + .context("Failed to find block id by seqno")? + { + Some(block_id) => block_id, + None => return Ok(None), + }; + + // Find block handle + let handle = match self.block_handle_storage.load_handle(&mc_block_id)? { + Some(handle) if handle.meta().has_data() => handle, + // Skip blocks without handle or data + _ => return Ok(None), + }; + + // 2. Find minimal referenced masterchain block from the target block + + let block_data = self.block_storage.load_block_data(&handle).await?; + let block_info = block_data + .block() + .load_info() + .context("Failed to read target block info")?; + + // Find full min masterchain reference id + let min_ref_mc_seqno = block_info.min_ref_mc_seqno; + let min_ref_block_id = match self.find_mc_block_id(min_ref_mc_seqno)? { + Some(block_id) => block_id, + None => return Ok(None), + }; + + // Find block handle + let min_ref_block_handle = match self + .block_handle_storage + .load_handle(&min_ref_block_id) + .context("Failed to find min ref mc block handle")? + { + Some(handle) if handle.meta().has_data() => handle, + // Skip blocks without handle or data + _ => return Ok(None), + }; + + // Compute `TopBlocks` from block data + self.block_storage + .load_block_data(&min_ref_block_handle) + .await + .and_then(|block_data| TopBlocks::from_mc_block(&block_data)) + .map(Some) + } + + fn load_state_root(&self, block_id_short: BlockIdShort) -> Result { + let shard_states = &self.db.shard_states; + let shard_state = shard_states.get(block_id_short.to_vec())?; + match shard_state { + Some(root) => Ok(HashBytes::from_slice(&root[..32])), + None => Err(ShardStateStorageError::NotFound.into()), + } + } + + fn find_mc_block_id(&self, mc_seqno: u32) -> Result> { + let shard_states = &self.db.shard_states; + Ok(shard_states + .get( + BlockIdShort { + shard: ShardIdent::MASTERCHAIN, + seqno: mc_seqno, + } + .to_vec(), + )? + .and_then(|value| { + let value = value.as_ref(); + if value.len() < 96 { + return None; + } + + let root_hash: [u8; 32] = value[32..64].try_into().unwrap(); + let file_hash: [u8; 32] = value[64..96].try_into().unwrap(); + + Some(BlockId { + shard: ShardIdent::MASTERCHAIN, + seqno: mc_seqno, + root_hash: HashBytes(root_hash), + file_hash: HashBytes(file_hash), + }) + })) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct ShardStateStorageMetrics { + #[cfg(feature = "count-cells")] + pub storage_cell_live_count: usize, + #[cfg(feature = "count-cells")] + pub storage_cell_max_live_count: usize, + pub max_new_mc_cell_count: usize, + pub max_new_sc_cell_count: usize, +} + +async fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result> { + let dir = Arc::new(file_db_path.join(folder)); + tokio::fs::create_dir_all(dir.as_ref()).await?; + Ok(dir) +} + +#[derive(thiserror::Error, Debug)] +enum ShardStateStorageError { + #[error("Not found")] + NotFound, + #[error("Block handle id mismatch")] + BlockHandleIdMismatch, +} diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/shard_state_storage/replace_transaction.rs new file mode 100644 index 000000000..44570d4be --- /dev/null +++ b/storage/src/shard_state_storage/replace_transaction.rs @@ -0,0 +1,475 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use everscale_types::cell::*; +use everscale_types::models::BlockId; + +use super::cell_storage::*; +use super::entries_buffer::*; +use super::files_context::*; +use super::shard_state_reader::*; +use crate::db::*; +use tycho_block_util::*; + +pub struct ShardStateReplaceTransaction<'a> { + db: &'a Db, + cell_storage: &'a Arc, + min_ref_mc_state: &'a Arc, + reader: ShardStatePacketReader, + header: Option, + cells_read: u64, +} + +impl<'a> ShardStateReplaceTransaction<'a> { + pub fn new( + db: &'a Db, + cell_storage: &'a Arc, + min_ref_mc_state: &'a Arc, + ) -> Self { + Self { + db, + cell_storage, + min_ref_mc_state, + reader: ShardStatePacketReader::new(), + header: None, + cells_read: 0, + } + } + + pub fn header(&self) -> &Option { + &self.header + } + + pub async fn process_packet( + &mut self, + ctx: &mut FilesContext, + packet: Vec, + progress_bar: &mut ProgressBar, + ) -> Result { + use tokio::io::AsyncWriteExt; + + let cells_file = ctx.cells_file()?; + + self.reader.set_next_packet(packet); + + let header = loop { + if let Some(header) = &self.header { + break header; + } + + let header = match self.reader.read_header()? { + Some(header) => header, + None => return Ok(false), + }; + + tracing::debug!(?header); + progress_bar.set_total(header.cell_count); + + self.header = Some(header); + }; + + let mut chunk_size = 0u32; + let mut buffer = [0; 256]; // At most 2 + 128 + 4 * 4 + + while self.cells_read < header.cell_count { + let cell_size = match self.reader.read_cell(header.ref_size, &mut buffer)? { + Some(cell_size) => cell_size, + None => break, + }; + + buffer[cell_size] = cell_size as u8; + cells_file.write_all(&buffer[..cell_size + 1]).await?; + + chunk_size += cell_size as u32 + 1; + self.cells_read += 1; + } + + progress_bar.set_progress(self.cells_read); + + if chunk_size > 0 { + tracing::debug!(chunk_size, "creating chunk"); + cells_file.write_u32_le(chunk_size).await?; + } + + if self.cells_read < header.cell_count { + return Ok(false); + } + + if header.has_crc && self.reader.read_crc()?.is_none() { + return Ok(false); + } + + progress_bar.complete(); + Ok(true) + } + + pub async fn finalize( + self, + ctx: &mut FilesContext, + block_id: BlockId, + progress_bar: &mut ProgressBar, + ) -> Result> { + // 2^7 bits + 1 bytes + const MAX_DATA_SIZE: usize = 128; + const CELLS_PER_BATCH: u64 = 1_000_000; + + let header = match &self.header { + Some(header) => header, + None => { + return Err(ReplaceTransactionError::InvalidShardStatePacket) + .context("BOC header not found") + } + }; + + let hashes_file = + ctx.create_mapped_hashes_file(header.cell_count as usize * HashesEntry::LEN)?; + let cells_file = ctx.create_mapped_cells_file().await?; + + let raw = self.db.raw().as_ref(); + let write_options = self.db.cells.new_write_config(); + + let mut tail = [0; 4]; + let mut ctx = FinalizationContext::new(self.db); + + // Allocate on heap to prevent big future size + let mut chunk_buffer = Vec::with_capacity(1 << 20); + let mut data_buffer = vec![0u8; MAX_DATA_SIZE]; + + let total_size = cells_file.length(); + progress_bar.set_total(total_size as u64); + + let mut file_pos = total_size; + let mut cell_index = header.cell_count; + let mut batch_len = 0; + while file_pos >= 4 { + file_pos -= 4; + unsafe { cells_file.read_exact_at(file_pos, &mut tail) }; + + let mut chunk_size = u32::from_le_bytes(tail) as usize; + chunk_buffer.resize(chunk_size, 0); + + file_pos -= chunk_size; + unsafe { cells_file.read_exact_at(file_pos, &mut chunk_buffer) }; + + tracing::debug!(chunk_size, "processing chunk"); + + while chunk_size > 0 { + cell_index -= 1; + batch_len += 1; + let cell_size = chunk_buffer[chunk_size - 1] as usize; + chunk_size -= cell_size + 1; + + let cell = RawCell::from_stored_data( + &mut &chunk_buffer[chunk_size..chunk_size + cell_size], + header.ref_size, + header.cell_count as usize, + cell_index as usize, + &mut data_buffer, + )?; + + for (&index, buffer) in cell + .reference_indices + .iter() + .zip(ctx.entries_buffer.iter_child_buffers()) + { + // SAFETY: `buffer` is guaranteed to be in separate memory area + unsafe { hashes_file.read_exact_at(index as usize * HashesEntry::LEN, buffer) } + } + + self.finalize_cell(&mut ctx, cell_index as u32, cell)?; + + // SAFETY: `entries_buffer` is guaranteed to be in separate memory area + unsafe { + hashes_file.write_all_at( + cell_index as usize * HashesEntry::LEN, + ctx.entries_buffer.current_entry_buffer(), + ) + }; + + chunk_buffer.truncate(chunk_size); + } + + if batch_len > CELLS_PER_BATCH { + ctx.finalize_cell_usages(); + raw.write_opt(std::mem::take(&mut ctx.write_batch), &write_options)?; + batch_len = 0; + } + + progress_bar.set_progress((total_size - file_pos) as u64); + tokio::task::yield_now().await; + } + + if batch_len > 0 { + ctx.finalize_cell_usages(); + raw.write_opt(std::mem::take(&mut ctx.write_batch), &write_options)?; + } + + // Current entry contains root cell + let root_hash = ctx.entries_buffer.repr_hash(); + ctx.final_check(root_hash)?; + + let shard_state_key = block_id.as_short_id().to_vec(); + self.db.shard_states.insert(&shard_state_key, root_hash)?; + + progress_bar.complete(); + + // Load stored shard state + match self.db.shard_states.get(shard_state_key)? { + Some(root) => { + let cell_id = HashBytes::from_slice(&root[..32]); + + let cell = self.cell_storage.load_cell(&cell_id)?; + Ok(Arc::new(ShardStateStuff::new( + block_id, + Cell::from(cell as Arc<_>), + self.min_ref_mc_state, + )?)) + } + None => Err(ReplaceTransactionError::NotFound.into()), + } + } + + fn finalize_cell( + &self, + ctx: &mut FinalizationContext, + cell_index: u32, + mut cell: RawCell<'_>, + ) -> Result<()> { + use sha2::{Digest, Sha256}; + + let (mut current_entry, children) = + ctx.entries_buffer.split_children(&cell.reference_indices); + + current_entry.clear(); + + // Prepare mask and counters + let mut children_mask = LevelMask::new(0); + let mut tree_bits_count = cell.bit_len as u64; + let mut tree_cell_count = 1; + + for (_, child) in children.iter() { + children_mask |= child.level_mask(); + tree_bits_count += child.tree_bits_count(); + tree_cell_count += child.tree_cell_count(); + } + + let mut is_merkle_cell = false; + let mut is_pruned_cell = false; + let level_mask = match cell.descriptor.cell_type() { + CellType::Ordinary => children_mask, + CellType::PrunedBranch => { + is_pruned_cell = true; + cell.descriptor.level_mask() + } + CellType::LibraryReference => LevelMask::new(0), + CellType::MerkleProof => { + is_merkle_cell = true; + children_mask.virtualize(1) + } + CellType::MerkleUpdate => { + is_merkle_cell = true; + children_mask.virtualize(1) + } + }; + + if cell.descriptor.level_mask() != level_mask.to_byte() { + return Err(ReplaceTransactionError::InvalidCell).context("Level mask mismatch"); + } + + // Save mask and counters + current_entry.set_level_mask(level_mask); + current_entry.set_cell_type(cell.descriptor.cell_type()); + current_entry.set_tree_bits_count(tree_bits_count); + current_entry.set_tree_cell_count(tree_cell_count); + + // Calculate hashes + let hash_count = if is_pruned_cell { + 1 + } else { + level_mask.level() + 1 + }; + + let mut max_depths = [0u16; 4]; + for i in 0..hash_count { + let mut hasher = Sha256::new(); + + let level_mask = if is_pruned_cell { + level_mask + } else { + LevelMask::from_level(i) + }; + + cell.descriptor.d1 &= !(CellDescriptor::LEVEL_MASK | CellDescriptor::STORE_HASHES_MASK); + cell.descriptor.d1 |= u8::from(level_mask) << 5; + hasher.update([cell.descriptor.d1, cell.descriptor.d2]); + + if i == 0 { + hasher.update(cell.data); + } else { + hasher.update(current_entry.get_hash_slice(i - 1)); + } + + for (index, child) in children.iter() { + let child_depth = if child.cell_type().is_pruned_branch() { + let child_data = ctx + .pruned_branches + .get(index) + .ok_or(ReplaceTransactionError::InvalidCell) + .context("Pruned branch data not found")?; + child.pruned_branch_depth(i, child_data) + } else { + child.depth(if is_merkle_cell { i + 1 } else { i }) + }; + hasher.update(child_depth.to_be_bytes()); + + let depth = &mut max_depths[i as usize]; + *depth = std::cmp::max(*depth, child_depth + 1); + + current_entry.set_depth(i, *depth); + } + + for (index, child) in children.iter() { + let child_hash = if child.cell_type().is_pruned_branch() { + let child_data = ctx + .pruned_branches + .get(index) + .ok_or(ReplaceTransactionError::InvalidCell) + .context("Pruned branch data not found")?; + child + .pruned_branch_hash(i, child_data) + .context("Invalid pruned branch")? + } else { + child.hash(if is_merkle_cell { i + 1 } else { i }) + }; + hasher.update(child_hash); + } + + current_entry.set_hash(i, hasher.finalize().as_slice()); + } + + // Update pruned branches + if is_pruned_cell { + ctx.pruned_branches.insert(cell_index, cell.data.to_vec()); + } + + // Write cell data + let output_buffer = &mut ctx.output_buffer; + output_buffer.clear(); + + output_buffer.extend_from_slice(&[ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + cell.descriptor.d1, + cell.descriptor.d2, + ]); + output_buffer.extend_from_slice(&(cell.bit_len as u16).to_le_bytes()); + output_buffer.extend_from_slice(cell.data); + + let hash_count = cell.descriptor.hash_count(); + for i in 0..hash_count { + output_buffer.extend_from_slice(current_entry.get_hash_slice(i)); + output_buffer.extend_from_slice(current_entry.get_depth_slice(i)); + } + + // Write cell references + for (index, child) in children.iter() { + let child_hash = if child.cell_type().is_pruned_branch() { + let child_data = ctx + .pruned_branches + .get(index) + .ok_or(ReplaceTransactionError::InvalidCell) + .context("Pruned branch data not found")?; + child + .pruned_branch_hash(MAX_LEVEL, child_data) + .context("Invalid pruned branch")? + } else { + child.hash(MAX_LEVEL) + }; + + *ctx.cell_usages.entry(*child_hash).or_default() += 1; + output_buffer.extend_from_slice(child_hash); + } + + // // Write counters + // output_buffer.extend_from_slice(current_entry.get_tree_counters()); + + // Save serialized data + let repr_hash = if is_pruned_cell { + current_entry + .as_reader() + .pruned_branch_hash(3, cell.data) + .context("Invalid pruned branch")? + } else { + current_entry.as_reader().hash(MAX_LEVEL) + }; + + ctx.write_batch + .merge_cf(&ctx.cells_cf, repr_hash, output_buffer.as_slice()); + ctx.cell_usages.insert(*repr_hash, -1); + + // Done + Ok(()) + } +} + +struct FinalizationContext<'a> { + pruned_branches: FastHashMap>, + cell_usages: FastHashMap<[u8; 32], i32>, + entries_buffer: EntriesBuffer, + output_buffer: Vec, + cells_cf: BoundedCfHandle<'a>, + write_batch: rocksdb::WriteBatch, +} + +impl<'a> FinalizationContext<'a> { + fn new(db: &'a Db) -> Self { + Self { + pruned_branches: Default::default(), + cell_usages: FastHashMap::with_capacity_and_hasher(128, Default::default()), + entries_buffer: EntriesBuffer::new(), + output_buffer: Vec::with_capacity(1 << 10), + cells_cf: db.cells.cf(), + write_batch: rocksdb::WriteBatch::default(), + } + } + + fn finalize_cell_usages(&mut self) { + self.cell_usages.retain(|key, &mut rc| { + if rc > 0 { + self.write_batch.merge_cf( + &self.cells_cf, + key, + refcount::encode_positive_refcount(rc as u32), + ); + } + + rc < 0 + }); + } + + fn final_check(&self, root_hash: &[u8; 32]) -> Result<()> { + anyhow::ensure!( + self.cell_usages.len() == 1 && self.cell_usages.contains_key(root_hash), + "Invalid shard state cell" + ); + Ok(()) + } +} + +#[derive(thiserror::Error, Debug)] +enum ReplaceTransactionError { + #[error("Not found")] + NotFound, + #[error("Invalid shard state packet")] + InvalidShardStatePacket, + #[error("Invalid cell")] + InvalidCell, +} + +const MAX_LEVEL: u8 = 3; diff --git a/storage/src/shard_state_storage/shard_state_reader.rs b/storage/src/shard_state_storage/shard_state_reader.rs new file mode 100644 index 000000000..001a2eecf --- /dev/null +++ b/storage/src/shard_state_storage/shard_state_reader.rs @@ -0,0 +1,527 @@ +use std::io::Read; + +use anyhow::{Context, Result}; +use crc::{Crc, CRC_32_ISCSI}; +use everscale_types::cell::{CellDescriptor, CellType, LevelMask}; +use smallvec::SmallVec; + +macro_rules! try_read { + ($expr:expr) => { + match $expr { + Ok(data) => data, + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e.into()), + } + }; +} + +pub struct ShardStatePacketReader { + hasher: crc::Digest<'static, u32>, + has_crc: bool, + offset: usize, + current_packet: Vec, + next_packet: Vec, + bytes_to_skip: usize, +} + +impl ShardStatePacketReader { + pub fn new() -> Self { + Self { + hasher: CRC.digest(), + has_crc: true, + offset: 0, + current_packet: Default::default(), + next_packet: Default::default(), + bytes_to_skip: 0, + } + } + + pub fn read_header(&mut self) -> Result> { + const BOC_INDEXED_TAG: u32 = 0x68ff65f3; + const BOC_INDEXED_CRC32_TAG: u32 = 0xacc3a728; + const BOC_GENERIC_TAG: u32 = 0xb5ee9c72; + + if self.process_skip() == ReaderAction::Incomplete { + return Ok(None); + } + + let mut src = self.begin(); + let mut total_size = 0u64; + + let magic = try_read!(src.read_be_u32()); + total_size += 4; + + let first_byte = try_read!(src.read_byte()); + total_size += 1; + + let index_included; + let mut has_crc = false; + let ref_size; + + match magic { + BOC_INDEXED_TAG => { + ref_size = first_byte as usize; + index_included = true; + } + BOC_INDEXED_CRC32_TAG => { + ref_size = first_byte as usize; + index_included = true; + has_crc = true; + } + BOC_GENERIC_TAG => { + index_included = first_byte & 0b1000_0000 != 0; + has_crc = first_byte & 0b0100_0000 != 0; + ref_size = (first_byte & 0b0000_0111) as usize; + } + _ => { + return Err(ShardStateParserError::InvalidShardStateHeader).context("Invalid flags") + } + } + + src.reader.has_crc = has_crc; + + if ref_size == 0 || ref_size > 4 { + return Err(ShardStateParserError::InvalidShardStateHeader) + .context("Ref size must be in range [1;4]"); + } + + let offset_size = try_read!(src.read_byte()) as u64; + total_size += 1; + if offset_size == 0 || offset_size > 8 { + return Err(ShardStateParserError::InvalidShardStateHeader) + .context("Offset size must be in range [1;8]"); + } + + let cell_count = try_read!(src.read_be_uint(ref_size)); + total_size += ref_size as u64; + + let root_count = try_read!(src.read_be_uint(ref_size)); + total_size += ref_size as u64; + + try_read!(src.read_be_uint(ref_size)); // skip absent + total_size += ref_size as u64; + + if root_count != 1 { + return Err(ShardStateParserError::InvalidShardStateHeader) + .context("Expected one root cell"); + } + if root_count > cell_count { + return Err(ShardStateParserError::InvalidShardStateHeader) + .context("Root count is greater then cell count"); + } + + total_size += try_read!(src.read_be_uint(offset_size as usize)); // total cells size + total_size += offset_size; + + let root_index = if magic == BOC_GENERIC_TAG { + let root_index = try_read!(src.read_be_uint(ref_size)); + total_size += ref_size as u64; + root_index + } else { + 0 + }; + + src.end(); + + if index_included { + let index_size = cell_count * offset_size; + total_size += index_size; + self.set_skip(index_size as usize); + } + + if has_crc { + total_size += 4; + } + + Ok(Some(BocHeader { + root_index, + index_included, + has_crc, + ref_size, + offset_size, + cell_count, + total_size, + })) + } + + pub fn read_cell(&mut self, ref_size: usize, buffer: &mut [u8]) -> Result> { + if self.process_skip() == ReaderAction::Incomplete { + return Ok(None); + } + + let mut src = self.begin(); + + let d1 = try_read!(src.read_byte()); + let l = d1 >> 5; + let h = (d1 & 0b0001_0000) != 0; + let r = (d1 & 0b0000_0111) as usize; + let absent = r == 0b111 && h; + + buffer[0] = d1; + + let size = if absent { + let data_size = 32 * ((LevelMask::new(l).level() + 1) as usize); + try_read!(src.read_exact(&mut buffer[1..1 + data_size])); + + tracing::info!("ABSENT"); + + // 1 byte of d1 + fixed data size of absent cell + 1 + data_size + } else { + if r > 4 { + tracing::error!("CELLS: {r}"); + return Err(ShardStateParserError::InvalidShardStateCell) + .context("Cell must contain at most 4 references"); + } + + let d2 = try_read!(src.read_byte()); + buffer[1] = d2; + + // Skip optional precalculated hashes + let hash_count = LevelMask::new(l).level() as usize + 1; + if h && !src.skip(hash_count * (32 + 2)) { + return Ok(None); + } + + let data_size = ((d2 >> 1) + u8::from(d2 & 1 != 0)) as usize; + try_read!(src.read_exact(&mut buffer[2..2 + data_size + r * ref_size])); + + // 2 bytes for d1 and d2 + data size + total references size + 2 + data_size + r * ref_size + }; + + src.end(); + + Ok(Some(size)) + } + + pub fn read_crc(&mut self) -> Result> { + if self.process_skip() == ReaderAction::Incomplete { + return Ok(None); + } + + let current_crc = std::mem::replace(&mut self.hasher, CRC.digest()).finalize(); + + let mut src = self.begin(); + let target_crc = try_read!(src.read_le_u32()); + src.end(); + + if current_crc == target_crc { + Ok(Some(())) + } else { + Err(ShardStateParserError::CrcMismatch.into()) + } + } + + pub fn set_next_packet(&mut self, packet: Vec) { + self.next_packet = packet; + } + + fn begin(&'_ mut self) -> ShardStatePacketReaderTransaction<'_> { + let offset = self.offset; + ShardStatePacketReaderTransaction { + reader: self, + reading_next_packet: false, + offset, + } + } + + fn set_skip(&mut self, n: usize) { + self.bytes_to_skip = n; + } + + fn process_skip(&mut self) -> ReaderAction { + if self.bytes_to_skip == 0 { + return ReaderAction::Complete; + } + + let mut n = std::mem::take(&mut self.bytes_to_skip); + + let remaining = self.current_packet.len() - self.offset; + match n.cmp(&remaining) { + std::cmp::Ordering::Less => { + self.hasher + .update(&self.current_packet[self.offset..self.offset + n]); + self.offset += n; + ReaderAction::Complete + } + std::cmp::Ordering::Equal => { + self.hasher.update(&self.current_packet[self.offset..]); + self.offset = 0; + self.current_packet = std::mem::take(&mut self.next_packet); + ReaderAction::Complete + } + std::cmp::Ordering::Greater => { + n -= remaining; + self.hasher.update(&self.current_packet[self.offset..]); + self.offset = 0; + self.current_packet = std::mem::take(&mut self.next_packet); + + if n > self.current_packet.len() { + n -= self.current_packet.len(); + self.hasher.update(&self.current_packet); + self.current_packet = Vec::new(); + self.bytes_to_skip = n; + ReaderAction::Incomplete + } else { + self.offset = n; + self.hasher.update(&self.current_packet[..self.offset]); + ReaderAction::Complete + } + } + } + } +} + +static CRC: Crc = Crc::::new(&CRC_32_ISCSI); + +#[derive(Debug)] +pub struct BocHeader { + pub root_index: u64, + pub index_included: bool, + pub has_crc: bool, + pub ref_size: usize, + pub offset_size: u64, + pub cell_count: u64, + pub total_size: u64, +} + +pub struct RawCell<'a> { + pub descriptor: CellDescriptor, + pub data: &'a [u8], + pub bit_len: u16, + pub reference_indices: SmallVec<[u32; 4]>, +} + +impl<'a> RawCell<'a> { + pub fn from_stored_data( + src: &mut R, + ref_size: usize, + cell_count: usize, + cell_index: usize, + data_buffer: &'a mut [u8], + ) -> Result + where + R: Read, + { + let mut descriptor = [0u8; 2]; + src.read_exact(&mut descriptor)?; + let descriptor = CellDescriptor::new(descriptor); + let byte_len = descriptor.byte_len() as usize; + let ref_count = descriptor.reference_count() as usize; + + anyhow::ensure!(!descriptor.is_absent(), "Absent cells are not supported"); + + let data = &mut data_buffer[0..byte_len]; + src.read_exact(&mut data[..byte_len])?; + + let mut reference_indices = SmallVec::with_capacity(ref_count); + for _ in 0..ref_count { + let index = src.read_be_uint(ref_size)? as usize; + if index > cell_count || index <= cell_index { + return Err(ShardStateParserError::InvalidShardStateCell) + .context("Reference index out of range"); + } else { + reference_indices.push(index as u32); + } + } + + let bit_len = if descriptor.is_aligned() { + (byte_len * 8) as u16 + } else if let Some(data) = data.last() { + byte_len as u16 * 8 - data.trailing_zeros() as u16 - 1 + } else { + 0 + }; + + Ok(RawCell { + descriptor, + data, + bit_len, + reference_indices, + }) + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +pub enum ReaderAction { + Incomplete, + Complete, +} + +pub struct ShardStatePacketReaderTransaction<'a> { + reader: &'a mut ShardStatePacketReader, + reading_next_packet: bool, + offset: usize, +} + +impl<'a> ShardStatePacketReaderTransaction<'a> { + pub fn skip(&mut self, mut n: usize) -> bool { + loop { + let current_packet = match self.reading_next_packet { + // Reading non-empty current packet + false if self.offset < self.reader.current_packet.len() => { + &self.reader.current_packet + } + + // Current packet is empty - retry and switch to next + false => { + self.reading_next_packet = true; + self.offset = 0; + continue; + } + + // Reading non-empty next packet + true if self.offset < self.reader.next_packet.len() => &self.reader.next_packet, + + // Reading next packet which is empty + true => return false, + }; + + let skipped = std::cmp::min(current_packet.len() - self.offset, n); + n -= skipped; + self.offset += skipped; + + if n == 0 { + return true; + } + } + } + + pub fn end(self) { + if self.reading_next_packet { + if self.reader.has_crc { + // Write to the hasher until the end of current packet + self.reader + .hasher + .update(&self.reader.current_packet[self.reader.offset..]); + + // Write to the hasher current bytes + self.reader + .hasher + .update(&self.reader.next_packet[..self.offset]); + } + + // Replace current packet + self.reader.current_packet = std::mem::take(&mut self.reader.next_packet); + } else if self.reader.has_crc { + // Write to the hasher current bytes + self.reader + .hasher + .update(&self.reader.current_packet[self.reader.offset..self.offset]); + } + + // Bump offset + self.reader.offset = self.offset; + } +} + +impl<'a> Read for ShardStatePacketReaderTransaction<'a> { + fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result { + let mut result = 0; + + loop { + let current_packet = match self.reading_next_packet { + // Reading non-empty current packet + false if self.offset < self.reader.current_packet.len() => { + &self.reader.current_packet + } + + // Current packet is empty - retry and switch to next + false => { + self.reading_next_packet = true; + self.offset = 0; + continue; + } + + // Reading non-empty next packet + true if self.offset < self.reader.next_packet.len() => &self.reader.next_packet, + + // Reading next packet which is empty + true => { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "packet buffer underflow", + )) + } + }; + + let n = std::cmp::min(current_packet.len() - self.offset, buf.len()); + for i in 0..n { + buf[i] = current_packet[self.offset + i]; + } + + result += n; + self.offset += n; + + let tmp = buf; + buf = &mut tmp[n..]; + + if buf.is_empty() { + return Ok(result); + } + } + } +} + +#[derive(thiserror::Error, Debug)] +enum ShardStateParserError { + #[error("Invalid shard state header")] + InvalidShardStateHeader, + #[error("Invalid shard state cell")] + InvalidShardStateCell, + #[error("Crc mismatch")] + CrcMismatch, +} + +pub trait ByteOrderRead { + fn read_be_uint(&mut self, bytes: usize) -> std::io::Result; + fn read_byte(&mut self) -> std::io::Result; + fn read_be_u32(&mut self) -> std::io::Result; + fn read_le_u32(&mut self) -> std::io::Result; +} + +impl ByteOrderRead for T { + fn read_be_uint(&mut self, bytes: usize) -> std::io::Result { + match bytes { + 1 => { + let mut buf = [0]; + self.read_exact(&mut buf)?; + Ok(buf[0] as u64) + } + 2 => { + let mut buf = [0; 2]; + self.read_exact(&mut buf)?; + Ok(u16::from_be_bytes(buf) as u64) + } + 3..=4 => { + let mut buf = [0; 4]; + self.read_exact(&mut buf[4 - bytes..])?; + Ok(u32::from_be_bytes(buf) as u64) + } + 5..=8 => { + let mut buf = [0; 8]; + self.read_exact(&mut buf[8 - bytes..])?; + Ok(u64::from_be_bytes(buf)) + } + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "too many bytes to read in u64", + )), + } + } + + fn read_byte(&mut self) -> std::io::Result { + self.read_be_uint(1).map(|value| value as u8) + } + + fn read_be_u32(&mut self) -> std::io::Result { + self.read_be_uint(4).map(|value| value as u32) + } + + fn read_le_u32(&mut self) -> std::io::Result { + let mut buf = [0; 4]; + self.read_exact(&mut buf)?; + Ok(u32::from_le_bytes(buf)) + } +} \ No newline at end of file diff --git a/validator/Cargo.toml b/validator/Cargo.toml new file mode 100644 index 000000000..5b2c4810b --- /dev/null +++ b/validator/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "tycho-validator" +version = "0.0.1" +edition = "2021" +description = "A validator node." + +[dependencies] +# crates.io deps + +# local deps +tycho-core = { path = "../core", version = "=0.0.1" } +tycho-consensus = { path = "../consensus", version = "=0.0.1" } +tycho-storage = { path = "../storage", version = "0.1" } +tycho-util = { path = "../util", version = "=0.0.1" } + +[lints] +workspace= true From 57ba0e9fee1c07ebbc8c4ef6613e9f095375b228 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Wed, 14 Feb 2024 15:50:24 +0100 Subject: [PATCH 04/19] refactor(storage): backport updated gc --- Cargo.lock | 121 +++++---- collator/Cargo.toml | 2 +- storage/Cargo.toml | 1 + storage/src/db/mod.rs | 2 +- storage/src/db/refcount.rs | 7 +- .../src/shard_state_storage/cell_storage.rs | 243 ++++++++++++------ storage/src/shard_state_storage/mod.rs | 8 +- .../replace_transaction.rs | 2 +- validator/Cargo.toml | 17 -- 9 files changed, 247 insertions(+), 156 deletions(-) delete mode 100644 validator/Cargo.toml diff --git a/Cargo.lock b/Cargo.lock index e6bd19d41..c66318ce4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,9 +95,9 @@ checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "arc-swap" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b3d0060af21e8d11a926981cc00c6c1541aa91dd64b9f881985c3da1094425f" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "argh" @@ -118,7 +118,7 @@ dependencies = [ "argh_shared", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "backtrace" @@ -229,7 +229,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -371,9 +371,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -393,14 +393,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -528,7 +528,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -601,7 +601,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -616,9 +616,9 @@ dependencies = [ [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "equivalent" @@ -688,7 +688,7 @@ checksum = "323d8b61c76be2c16eb2d72d007f1542fdeb3760fdf2e2cae219fc0da3db0c09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -702,9 +702,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "fdlimit" @@ -736,7 +736,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -831,9 +831,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" @@ -1177,7 +1177,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1221,9 +1221,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "platforms" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626dec3cac7cc0e1577a2ec3fc496277ec2baa084bebad95bb6fdbfae235f84c" +checksum = "db23d408679286588f4d4644f965003d056e3dd5abcaaa938116871d7ce2fee7" [[package]] name = "powerfmt" @@ -1239,12 +1239,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1284,9 +1284,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58c20af3800cee5134b79a3bd4a3d4b583c16ccfa5f53338f46400851a5b3819" +checksum = "b1380629287ed1247c1e0fcc6d43efdcec508b65382c9ab775cc8f3df7ca07b0" dependencies = [ "ahash", "equivalent", @@ -1391,9 +1391,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -1439,7 +1439,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.6", - "regex-syntax 0.8.2", + "regex-syntax 0.8.3", ] [[package]] @@ -1459,7 +1459,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax 0.8.3", ] [[package]] @@ -1470,9 +1470,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "ring" @@ -1651,14 +1651,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", @@ -1771,6 +1771,12 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.0" @@ -1796,9 +1802,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" dependencies = [ "proc-macro2", "quote", @@ -1819,9 +1825,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.30.5" +version = "0.30.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb4f3438c8f6389c864e61221cbc97e9bca98b4daf39a5beb7bea660f528bb2" +checksum = "0c385888ef380a852a16209afc8cfad22795dd8873d69c9a14d2e2088f118d18" dependencies = [ "cfg-if", "core-foundation-sys", @@ -1867,7 +1873,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1938,9 +1944,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tl-proto" -version = "0.4.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3418163db528cc2324ed7bc9d52aa3ca7a8f73d685f8b21b319d2a08ee4b36d3" +checksum = "d4da430e55186abb18b4d1457a23eb0765af0dee66a9f741d652d6eaa476a8d7" dependencies = [ "bytes", "digest", @@ -1952,14 +1958,14 @@ dependencies = [ [[package]] name = "tl-proto-proc" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3f51de4865e0618b187c2b790c137de938d01fe5510116b959387b6133c20ac" +checksum = "5a3eaf6822a3ce34a40564dd3078a915d35c3c5fd1f6b3d81eab991e6d00a0fb" dependencies = [ "proc-macro2", "quote", "rustc-hash", - "syn 2.0.53", + "syn 2.0.55", "tl-scheme", ] @@ -2003,7 +2009,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -2051,7 +2057,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -2121,6 +2127,10 @@ name = "triomphe" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" +dependencies = [ + "serde", + "stable_deref_trait", +] [[package]] name = "tycho-block-util" @@ -2260,6 +2270,7 @@ dependencies = [ "thiserror", "tokio", "tracing", + "triomphe", "tycho-block-util", "tycho-constansts", "weedb", @@ -2396,7 +2407,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", "wasm-bindgen-shared", ] @@ -2418,7 +2429,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2489,7 +2500,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -2498,7 +2509,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -2676,7 +2687,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] diff --git a/collator/Cargo.toml b/collator/Cargo.toml index 152d4de52..70c25a78c 100644 --- a/collator/Cargo.toml +++ b/collator/Cargo.toml @@ -10,7 +10,7 @@ description = "A collator node." # local deps tycho-core = { path = "../core", version = "=0.0.1" } tycho-consensus = { path = "../consensus", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "=0.0.1" } +tycho-storage = { path = "../storage", version = "=0.1.0" } tycho-util = { path = "../util", version = "=0.0.1" } [lints] diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 372e6925a..f2437e0bb 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -36,6 +36,7 @@ rlimit = "0.10.1" fdlimit = "0.3.0" humantime = "2.1.0" sysinfo = "0.30.5" +triomphe = "0.1.11" [lints] workspace = true diff --git a/storage/src/db/mod.rs b/storage/src/db/mod.rs index acb1eda1d..806e1d4ba 100644 --- a/storage/src/db/mod.rs +++ b/storage/src/db/mod.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use weedb::{Caches, WeeDb}; pub use weedb::Stats as RocksdbStats; -pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table, UnboundedCfHandle}; +pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table}; pub mod refcount; pub mod tables; diff --git a/storage/src/db/refcount.rs b/storage/src/db/refcount.rs index 7e7bfd5ab..a9fbb800d 100644 --- a/storage/src/db/refcount.rs +++ b/storage/src/db/refcount.rs @@ -53,6 +53,7 @@ pub fn decode_value_with_rc(bytes: &[u8]) -> (RcType, Option<&[u8]>) { } } +// will be use in persistent storage writer pub fn strip_refcount(bytes: &[u8]) -> Option<&[u8]> { if bytes.len() < RC_BYTES { return None; @@ -64,10 +65,6 @@ pub fn strip_refcount(bytes: &[u8]) -> Option<&[u8]> { } } -pub fn has_value(bytes: &[u8]) -> bool { - bytes.len() >= RC_BYTES && RcType::from_le_bytes(bytes[..RC_BYTES].try_into().unwrap()) > 0 -} - pub fn add_positive_refount(rc: u32, data: Option<&[u8]>, target: &mut Vec) { target.extend_from_slice(&RcType::from(rc).to_le_bytes()); if let Some(data) = data { @@ -85,4 +82,4 @@ pub fn encode_negative_refcount(rc: u32) -> [u8; RC_BYTES] { type RcType = i64; -const RC_BYTES: usize = std::mem::size_of::(); \ No newline at end of file +const RC_BYTES: usize = std::mem::size_of::(); diff --git a/storage/src/shard_state_storage/cell_storage.rs b/storage/src/shard_state_storage/cell_storage.rs index af2c4166a..8750b46b6 100644 --- a/storage/src/shard_state_storage/cell_storage.rs +++ b/storage/src/shard_state_storage/cell_storage.rs @@ -1,14 +1,14 @@ use std::cell::UnsafeCell; use std::collections::hash_map; use std::mem::{ManuallyDrop, MaybeUninit}; -use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::atomic::{AtomicI64, AtomicU8, Ordering}; use std::sync::{Arc, Weak}; use anyhow::{Context, Result}; use bumpalo::Bump; -use bytes::Bytes; use everscale_types::cell::*; -use quick_cache::sync::Cache; +use quick_cache::sync::{Cache, DefaultLifecycle}; +use triomphe::ThinArc; use crate::db::*; use tycho_block_util::{CacheStats, FastDashMap, FastHashMap, FastHasherState}; @@ -42,30 +42,60 @@ impl CellStorage { } struct Context<'a> { - cells_cf: &'a weedb::BoundedCfHandle<'a>, + db: &'a Db, + raw_cache: &'a RawCellsCache, alloc: &'a Bump, transaction: FastHashMap>, buffer: Vec, } impl Context<'_> { - fn insert_cell( + fn insert_cell( &mut self, key: &HashBytes, cell: &DynCell, - value: Option, - ) -> Result - where - V: AsRef<[u8]>, - { + depth: usize, + ) -> Result { Ok(match self.transaction.entry(*key) { hash_map::Entry::Occupied(mut value) => { value.get_mut().rc += 1; false } hash_map::Entry::Vacant(entry) => { - let has_value = - matches!(value, Some(value) if refcount::has_value(value.as_ref())); + // A constant which tells since which depth we should start to use cache. + // This method is used mostly for inserting new states, so we can assume + // that first N levels will mostly be new. + // + // This value was chosen empirically. + const NEW_CELLS_DEPTH_THRESHOLD: usize = 4; + + let (old_rc, has_value) = 'value: { + if depth >= NEW_CELLS_DEPTH_THRESHOLD { + // NOTE: `get` here is used to affect a "hotness" of the value, because + // there is a big chance that we will need it soon during state processing + if let Some(entry) = self.raw_cache.0.get(key) { + let rc = entry.header.header.load(Ordering::Acquire); + break 'value (rc, rc > 0); + } + } + + match self + .db + .cells + .get(key.as_slice()) + .map_err(CellStorageError::Internal)? + { + Some(value) => { + let (rc, value) = + refcount::decode_value_with_rc(value.as_ref()); + (rc, value.is_some()) + } + None => (0, false), + } + }; + + // TODO: lower to `debug_assert` when sure + assert!(has_value && old_rc > 0 || !has_value && old_rc == 0); let data = if !has_value { self.buffer.clear(); @@ -82,19 +112,18 @@ impl CellStorage { }) } - fn finalize( - mut self, - batch: &mut weedb::rocksdb::WriteBatch, - raw_cache: &RawCellsCache, - ) -> usize { + fn finalize(mut self, batch: &mut rocksdb::WriteBatch) -> usize { let total = self.transaction.len(); + let cells_cf = &self.db.cells.cf(); for (key, CellWithRefs { rc, data }) in self.transaction { self.buffer.clear(); refcount::add_positive_refount(rc, data, &mut self.buffer); - if data.is_some() { - raw_cache.insert(key, &self.buffer); + if let Some(data) = data { + self.raw_cache.insert(&key, rc, data); + } else { + self.raw_cache.add_refs(&key, rc); } - batch.merge_cf(self.cells_cf, key.as_array(), &self.buffer); + batch.merge_cf(cells_cf, key.as_slice(), &self.buffer); } total } @@ -102,11 +131,10 @@ impl CellStorage { // Prepare context and handles let alloc = Bump::new(); - let cells = &self.db.cells; - let cells_cf = &cells.cf(); let mut ctx = Context { - cells_cf, + db: &self.db, + raw_cache: &self.raw_cells_cache, alloc: &alloc, transaction: FastHashMap::with_capacity_and_hasher(128, Default::default()), buffer: Vec::with_capacity(512), @@ -115,58 +143,57 @@ impl CellStorage { // Check root cell { let key = root.repr_hash(); - match cells.get(key.as_array()) { - Ok(value) => { - if !ctx.insert_cell(key, root.as_ref(), value.as_deref())? { - return Ok(0); - } - } - Err(e) => return Err(CellStorageError::Internal(e)), + + if !ctx.insert_cell(key, root.as_ref(), 0)? { + return Ok(0); } } let mut stack = Vec::with_capacity(16); - stack.push(root.as_ref()); + stack.push(root.references()); // Check other cells - while let Some(current) = stack.pop() { - for cell in current.references() { - let key = cell.repr_hash(); - match cells.get(key.as_array()) { - Ok(value) => { - if !ctx.insert_cell(key, cell, value.as_deref())? { - continue; - } - } - Err(e) => return Err(CellStorageError::Internal(e)), - } + 'outer: loop { + let depth = stack.len(); + let Some(iter) = stack.last_mut() else { + break; + }; + + for child in &mut *iter { + let key = child.repr_hash(); - stack.push(cell); + if ctx.insert_cell(key, child, depth)? { + stack.push(child.references()); + continue 'outer; + } } + + stack.pop(); } // Clear big chunks of data before finalization drop(stack); // Write transaction to the `WriteBatch` - Ok(ctx.finalize(batch, &self.raw_cells_cache)) + Ok(ctx.finalize(batch)) } pub fn load_cell( self: &Arc, - hash: &HashBytes, + hash: HashBytes, ) -> Result, CellStorageError> { - if let Some(cell) = self.cells_cache.get(hash) { + if let Some(cell) = self.cells_cache.get(&hash) { if let Some(cell) = cell.upgrade() { return Ok(cell); } } - let cell = match self.raw_cells_cache.get_raw(self.db.as_ref(), hash) { + let cell = match self.raw_cells_cache.get_raw(self.db.as_ref(), &hash) { Ok(value) => 'cell: { if let Some(value) = value { - if let Some(value) = refcount::strip_refcount(&value) { - match StorageCell::deserialize(self.clone(), value) { + let rc = &value.header.header; + if rc.load(Ordering::Acquire) > 0 { + match StorageCell::deserialize(self.clone(), &value.slice) { Some(cell) => break 'cell Arc::new(cell), None => return Err(CellStorageError::InvalidCell), } @@ -176,7 +203,7 @@ impl CellStorage { } Err(e) => return Err(CellStorageError::Internal(e)), }; - self.cells_cache.insert(*hash, Arc::downgrade(&cell)); + self.cells_cache.insert(hash, Arc::downgrade(&cell)); Ok(cell) } @@ -451,7 +478,7 @@ impl StorageCell { let mut res = Ok(()); Self::initialize_inner(state, &mut || match self .cell_storage - .load_cell(unsafe { &(*slot).hash }) + .load_cell(unsafe { (*slot).hash }) { Ok(cell) => unsafe { *slot = StorageCellReferenceData { @@ -649,15 +676,29 @@ impl StorageCellReferenceData { } } -struct RawCellsCache(Cache); +struct RawCellsCache(Cache); -#[derive(Clone, Copy)] -struct CellSizeEstimator; -impl quick_cache::Weighter for CellSizeEstimator { - fn weight(&self, _: &HashBytes, val: &Bytes) -> u32 { - const BYTES_SIZE: usize = std::mem::size_of::() * 4; - let len = 32 + val.len() + BYTES_SIZE; +impl RawCellsCache { + pub(crate) fn hit_ratio(&self) -> f64 { + (if self.0.hits() > 0 { + self.0.hits() as f64 / (self.0.hits() + self.0.misses()) as f64 + } else { + 0.0 + }) * 100.0 + } +} +type RawCellsCacheItem = ThinArc; + +#[derive(Clone, Copy)] +pub struct CellSizeEstimator; +impl quick_cache::Weighter for CellSizeEstimator { + fn weight(&self, key: &HashBytes, val: &RawCellsCacheItem) -> u32 { + const STATIC_SIZE: usize = std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() * 2; // ArcInner refs + HeaderWithLength length + + let len = key.0.len() + val.slice.len() + STATIC_SIZE; len as u32 } } @@ -699,30 +740,88 @@ impl RawCellsCache { size_in_bytes, CellSizeEstimator, FastHasherState::default(), - Default::default(), + DefaultLifecycle::default(), ); Self(raw_cache) } - fn get_raw(&self, db: &Db, key: &HashBytes) -> Result, weedb::rocksdb::Error> { - if let Some(value) = self.0.get(key) { - return Ok(Some(value)); + fn get_raw( + &self, + db: &Db, + key: &HashBytes, + ) -> Result, rocksdb::Error> { + use quick_cache::GuardResult; + + match self.0.get_value_or_guard(key, None) { + GuardResult::Value(value) => Ok(Some(value)), + GuardResult::Guard(g) => Ok(if let Some(value) = db.cells.get(key.as_slice())? { + let (rc, data) = refcount::decode_value_with_rc(value.as_ref()); + data.map(|value| { + let value = RawCellsCacheItem::from_header_and_slice(AtomicI64::new(rc), value); + _ = g.insert(value.clone()); + value + }) + } else { + None + }), + GuardResult::Timeout => unreachable!(), } + } + + fn get_raw_for_delete( + &self, + db: &Db, + key: &HashBytes, + refs_buffer: &mut Vec, + ) -> Result { + refs_buffer.clear(); + + // NOTE: `peek` here is used to avoid affecting a "hotness" of the value + if let Some(value) = self.0.peek(key) { + let rc = value.header.header.load(Ordering::Acquire); + if rc <= 0 { + return Err(CellStorageError::CellNotFound); + } - let value = db - .cells - .get(key.as_array())? - .map(|v| Bytes::copy_from_slice(v.as_ref())); - if let Some(value) = &value { - self.0.insert(*key, value.clone()); + StorageCell::deserialize_references(&value.slice, refs_buffer) + .then_some(rc) + .ok_or(CellStorageError::InvalidCell) + } else { + match db.cells.get(key.as_slice()) { + Ok(value) => { + if let Some(value) = value { + if let (rc, Some(value)) = refcount::decode_value_with_rc(&value) { + return StorageCell::deserialize_references(value, refs_buffer) + .then_some(rc) + .ok_or(CellStorageError::InvalidCell); + } + } + + Err(CellStorageError::CellNotFound) + } + Err(e) => Err(CellStorageError::Internal(e)), + } } + } + + fn insert(&self, key: &HashBytes, refs: u32, value: &[u8]) { + let value = RawCellsCacheItem::from_header_and_slice(AtomicI64::new(refs as _), value); + self.0.insert(*key, value); + } - Ok(value) + fn add_refs(&self, key: &HashBytes, refs: u32) { + // NOTE: `peek` here is used to avoid affecting a "hotness" of the value + if let Some(v) = self.0.peek(key) { + v.header.header.fetch_add(refs as i64, Ordering::Release); + } } - pub fn insert(&self, key: HashBytes, value: &[u8]) { - let value = Bytes::copy_from_slice(value); - self.0.insert(key, value); + fn remove_refs(&self, key: &HashBytes, refs: u32) { + // NOTE: `peek` here is used to avoid affecting a "hotness" of the value + if let Some(v) = self.0.peek(key) { + let old_refs = v.header.header.fetch_sub(refs as i64, Ordering::Release); + debug_assert!(old_refs >= refs as i64); + } } } diff --git a/storage/src/shard_state_storage/mod.rs b/storage/src/shard_state_storage/mod.rs index ec3c45c9b..5c12f9174 100644 --- a/storage/src/shard_state_storage/mod.rs +++ b/storage/src/shard_state_storage/mod.rs @@ -31,7 +31,7 @@ pub struct ShardStateStorage { cell_storage: Arc, downloads_dir: Arc, - gc_lock: tokio::sync::RwLock<()>, + gc_lock: tokio::sync::Mutex<()>, min_ref_mc_state: Arc, max_new_mc_cell_count: AtomicUsize, max_new_sc_cell_count: AtomicUsize, @@ -105,7 +105,7 @@ impl ShardStateStorage { let mut batch = weedb::rocksdb::WriteBatch::default(); - let _gc_lock = self.gc_lock.read().await; + let _gc_lock = self.gc_lock.lock().await; let len = self .cell_storage @@ -144,7 +144,7 @@ impl ShardStateStorage { pub async fn load_state(&self, block_id: &BlockId) -> Result> { let cell_id = self.load_state_root(block_id.as_short_id())?; - let cell = self.cell_storage.load_cell(&cell_id)?; + let cell = self.cell_storage.load_cell(cell_id)?; ShardStateStuff::new( *block_id, @@ -223,7 +223,7 @@ impl ShardStateStorage { alloc.reset(); let mut batch = weedb::rocksdb::WriteBatch::default(); { - let _guard = self.gc_lock.write().await; + let _guard = self.gc_lock.lock().await; let total = self .cell_storage .remove_cell(&mut batch, &alloc, root_hash)?; diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/shard_state_storage/replace_transaction.rs index 44570d4be..ebdf5f398 100644 --- a/storage/src/shard_state_storage/replace_transaction.rs +++ b/storage/src/shard_state_storage/replace_transaction.rs @@ -218,7 +218,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { Some(root) => { let cell_id = HashBytes::from_slice(&root[..32]); - let cell = self.cell_storage.load_cell(&cell_id)?; + let cell = self.cell_storage.load_cell(cell_id)?; Ok(Arc::new(ShardStateStuff::new( block_id, Cell::from(cell as Arc<_>), diff --git a/validator/Cargo.toml b/validator/Cargo.toml deleted file mode 100644 index 5b2c4810b..000000000 --- a/validator/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "tycho-validator" -version = "0.0.1" -edition = "2021" -description = "A validator node." - -[dependencies] -# crates.io deps - -# local deps -tycho-core = { path = "../core", version = "=0.0.1" } -tycho-consensus = { path = "../consensus", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "0.1" } -tycho-util = { path = "../util", version = "=0.0.1" } - -[lints] -workspace= true From 6d950ffd7d30a556e14f214280a942de92d2ab04 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Thu, 15 Feb 2024 12:48:29 +0100 Subject: [PATCH 05/19] chore(storage): apply clippy suggestions --- storage/src/block_handle_storage/mod.rs | 10 ++++---- storage/src/block_storage/mod.rs | 2 +- storage/src/db/migrations/mod.rs | 2 +- storage/src/db/mod.rs | 4 ++-- storage/src/db/tables.rs | 2 +- storage/src/models/block_handle.rs | 10 ++++---- storage/src/models/block_meta.rs | 14 +++++------ storage/src/runtime_storage/mod.rs | 2 +- .../persistent_state_keeper.rs | 2 +- .../src/shard_state_storage/cell_storage.rs | 4 ++-- .../src/shard_state_storage/cell_writer.rs | 2 +- .../src/shard_state_storage/entries_buffer.rs | 24 +++++++++---------- .../replace_transaction.rs | 4 ++-- .../shard_state_storage/shard_state_reader.rs | 8 +++---- 14 files changed, 45 insertions(+), 45 deletions(-) diff --git a/storage/src/block_handle_storage/mod.rs b/storage/src/block_handle_storage/mod.rs index d776e067f..cd6dd1eb8 100644 --- a/storage/src/block_handle_storage/mod.rs +++ b/storage/src/block_handle_storage/mod.rs @@ -1,8 +1,8 @@ -/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -/// -/// Changes: -/// - replaced old `failure` crate with `anyhow` -/// - simplified storing +// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +// +// Changes: +// - replaced old `failure` crate with `anyhow` +// - simplified storing use std::sync::{Arc, Weak}; use anyhow::Result; diff --git a/storage/src/block_storage/mod.rs b/storage/src/block_storage/mod.rs index 0db7c3ff0..6ec18ff0e 100644 --- a/storage/src/block_storage/mod.rs +++ b/storage/src/block_storage/mod.rs @@ -53,7 +53,7 @@ impl BlockStorage { ); if let Some(Err(e)) = value.map(check_archive) { - tracing::error!(archive_id, "failed to read archive: {e:?}") + tracing::error!(archive_id, "failed to read archive: {e:?}"); } archive_ids.insert(archive_id); diff --git a/storage/src/db/migrations/mod.rs b/storage/src/db/migrations/mod.rs index a3213c917..710c980eb 100644 --- a/storage/src/db/migrations/mod.rs +++ b/storage/src/db/migrations/mod.rs @@ -47,4 +47,4 @@ impl VersionProvider for NodeStateVersionProvider { state.insert(Self::DB_VERSION_KEY, version)?; Ok(()) } -} \ No newline at end of file +} diff --git a/storage/src/db/mod.rs b/storage/src/db/mod.rs index 806e1d4ba..61c14a84d 100644 --- a/storage/src/db/mod.rs +++ b/storage/src/db/mod.rs @@ -173,7 +173,7 @@ impl Db { let cf = db.instantiate_table::(); let res: (usize, usize) = cf .iterator(rocksdb::IteratorMode::Start) - .flat_map(|x| { + .filter_map(|x| { let x = match x { Ok(x) => x, Err(e) => { @@ -243,7 +243,7 @@ pub struct DiskUsageInfo { impl Drop for Db { fn drop(&mut self) { - self.raw().cancel_all_background_work(true) + self.raw().cancel_all_background_work(true); } } diff --git a/storage/src/db/tables.rs b/storage/src/db/tables.rs index 10cdd1bfa..722924a4d 100644 --- a/storage/src/db/tables.rs +++ b/storage/src/db/tables.rs @@ -89,7 +89,7 @@ impl ColumnFamily for PackageEntries { } } -/// Maps BlockId to root cell hash +/// Maps `BlockId` to root cell hash /// - Key: `BlockId` /// - Value: `[u8; 32]` pub struct ShardStates; diff --git a/storage/src/models/block_handle.rs b/storage/src/models/block_handle.rs index 6c239544c..a4114b722 100644 --- a/storage/src/models/block_handle.rs +++ b/storage/src/models/block_handle.rs @@ -1,8 +1,8 @@ -/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -/// -/// Changes: -/// - replaced old `failure` crate with `anyhow` -/// - moved all flags to meta +// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +// +// Changes: +// - replaced old `failure` crate with `anyhow` +// - moved all flags to meta use std::sync::{Arc, Weak}; use anyhow::Result; diff --git a/storage/src/models/block_meta.rs b/storage/src/models/block_meta.rs index 751abd2a0..1e5e2804f 100644 --- a/storage/src/models/block_meta.rs +++ b/storage/src/models/block_meta.rs @@ -1,9 +1,9 @@ -/// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -/// -/// Changes: -/// - replaced old `failure` crate with `anyhow` -/// - moved all flags here from block handle -/// - removed temporary unused flags +// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node +// +// Changes: +// - replaced old `failure` crate with `anyhow` +// - moved all flags here from block handle +// - removed temporary unused flags use std::sync::atomic::{AtomicU64, Ordering}; use anyhow::Result; @@ -209,7 +209,7 @@ impl BlockMeta { impl StoredValue for BlockMeta { /// 8 bytes flags - /// 4 bytes gen_utime + /// 4 bytes `gen_utime` const SIZE_HINT: usize = 8 + 4; type OnStackSlice = [u8; Self::SIZE_HINT]; diff --git a/storage/src/runtime_storage/mod.rs b/storage/src/runtime_storage/mod.rs index 69bdafae1..6cf671e3c 100644 --- a/storage/src/runtime_storage/mod.rs +++ b/storage/src/runtime_storage/mod.rs @@ -20,4 +20,4 @@ impl RuntimeStorage { pub fn persistent_state_keeper(&self) -> &PersistentStateKeeper { &self.persistent_state_keeper } -} \ No newline at end of file +} diff --git a/storage/src/runtime_storage/persistent_state_keeper.rs b/storage/src/runtime_storage/persistent_state_keeper.rs index 24bfc8c77..18217419d 100644 --- a/storage/src/runtime_storage/persistent_state_keeper.rs +++ b/storage/src/runtime_storage/persistent_state_keeper.rs @@ -82,7 +82,7 @@ impl PersistentStateKeeper { .map(|handle| (handle.id().seqno, handle.meta().brief())) } - pub fn new_state_found(&self) -> tokio::sync::futures::Notified { + pub fn new_state_found(&self) -> tokio::sync::futures::Notified<'_> { self.persistent_state_changed.notified() } } diff --git a/storage/src/shard_state_storage/cell_storage.rs b/storage/src/shard_state_storage/cell_storage.rs index 8750b46b6..45faa0782 100644 --- a/storage/src/shard_state_storage/cell_storage.rs +++ b/storage/src/shard_state_storage/cell_storage.rs @@ -243,7 +243,7 @@ impl CellStorage { let cells = &self.db.cells; let cells_cf = &cells.cf(); - let mut transaction: FastHashMap<&HashBytes, CellState> = + let mut transaction: FastHashMap<&HashBytes, CellState<'_>> = FastHashMap::with_capacity_and_hasher(128, Default::default()); let mut buffer = Vec::with_capacity(4); @@ -394,7 +394,7 @@ impl StorageCell { }; for slot in reference_data.iter().take(ref_count) { - let slot = slot.get() as *mut u8; + let slot = slot.get().cast::(); unsafe { std::ptr::copy_nonoverlapping(buffer.as_ptr().add(offset), slot, 32) }; offset += 32; } diff --git a/storage/src/shard_state_storage/cell_writer.rs b/storage/src/shard_state_storage/cell_writer.rs index 36fb131ea..261e09131 100644 --- a/storage/src/shard_state_storage/cell_writer.rs +++ b/storage/src/shard_state_storage/cell_writer.rs @@ -240,7 +240,7 @@ fn write_rev_cells>( for i in 0..preload_count { let index = indices_buffer[i]; - let hash = unsafe { *(keys[i] as *const [u8; 32]) }; + let hash = unsafe { *keys[i].cast::<[u8; 32]>() }; stack.push((index, StackItem::New(hash))); } } diff --git a/storage/src/shard_state_storage/entries_buffer.rs b/storage/src/shard_state_storage/entries_buffer.rs index 5347ad5fe..a2f9896eb 100644 --- a/storage/src/shard_state_storage/entries_buffer.rs +++ b/storage/src/shard_state_storage/entries_buffer.rs @@ -21,8 +21,8 @@ impl EntriesBuffer { &'a mut self, references: &'b [u32], ) -> (HashesEntryWriter<'a>, EntriesBufferChildren<'b>) - where - 'a: 'b, + where + 'a: 'b, { let [first, tail @ ..] = &mut *self.0; ( @@ -40,7 +40,7 @@ impl EntriesBuffer { pub struct EntriesBufferChildren<'a>(&'a [u32], &'a [[u8; HashesEntry::LEN]]); impl EntriesBufferChildren<'_> { - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator)> { self.0 .iter() .zip(self.1) @@ -51,7 +51,7 @@ impl EntriesBufferChildren<'_> { pub struct HashesEntryWriter<'a>(&'a mut [u8; HashesEntry::LEN]); impl HashesEntryWriter<'_> { - pub fn as_reader(&self) -> HashesEntry { + pub fn as_reader(&self) -> HashesEntry<'_> { HashesEntry(self.0) } @@ -87,7 +87,7 @@ impl HashesEntryWriter<'_> { pub fn get_hash_slice(&mut self, i: u8) -> &mut [u8; 32] { let offset = HashesEntry::HASHES_OFFSET + 32 * i as usize; - unsafe { &mut *(self.0.as_mut_ptr().add(offset) as *mut _) } + unsafe { &mut *self.0.as_mut_ptr().add(offset).cast() } } pub fn set_depth(&mut self, i: u8, depth: u16) { @@ -97,7 +97,7 @@ impl HashesEntryWriter<'_> { pub fn get_depth_slice(&mut self, i: u8) -> &mut [u8; 2] { let offset = HashesEntry::DEPTHS_OFFSET + 2 * i as usize; - unsafe { &mut *(self.0.as_mut_ptr().add(offset) as *mut _) } + unsafe { &mut *self.0.as_mut_ptr().add(offset).cast() } } } @@ -138,7 +138,7 @@ impl<'a> HashesEntry<'a> { pub fn hash(&self, n: u8) -> &'a [u8; 32] { let offset = Self::HASHES_OFFSET + 32 * self.level_mask().hash_index(n) as usize; - unsafe { &*(self.0.as_ptr().add(offset) as *const _) } + unsafe { &*self.0.as_ptr().add(offset).cast() } } pub fn depth(&self, n: u8) -> u16 { @@ -147,8 +147,8 @@ impl<'a> HashesEntry<'a> { } pub fn pruned_branch_hash<'b>(&self, n: u8, data: &'b [u8]) -> Option<&'b [u8; 32]> - where - 'a: 'b, + where + 'a: 'b, { let level_mask = self.level_mask(); let index = level_mask.hash_index(n) as usize; @@ -156,13 +156,13 @@ impl<'a> HashesEntry<'a> { Some(if index == level { let offset = Self::HASHES_OFFSET; - unsafe { &*(self.0.as_ptr().add(offset) as *const _) } + unsafe { &*self.0.as_ptr().add(offset).cast() } } else { let offset = 1 + 1 + index * 32; if data.len() < offset + 32 { return None; } - unsafe { &*(data.as_ptr().add(offset) as *const _) } + unsafe { &*data.as_ptr().add(offset).cast() } }) } @@ -179,4 +179,4 @@ impl<'a> HashesEntry<'a> { u16::from_be_bytes([data[offset], data[offset + 1]]) } } -} \ No newline at end of file +} diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/shard_state_storage/replace_transaction.rs index ebdf5f398..54260369e 100644 --- a/storage/src/shard_state_storage/replace_transaction.rs +++ b/storage/src/shard_state_storage/replace_transaction.rs @@ -183,7 +183,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { hashes_file.write_all_at( cell_index as usize * HashesEntry::LEN, ctx.entries_buffer.current_entry_buffer(), - ) + ); }; chunk_buffer.truncate(chunk_size); @@ -368,7 +368,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { cell.descriptor.d1, cell.descriptor.d2, ]); - output_buffer.extend_from_slice(&(cell.bit_len as u16).to_le_bytes()); + output_buffer.extend_from_slice(&cell.bit_len.to_le_bytes()); output_buffer.extend_from_slice(cell.data); let hash_count = cell.descriptor.hash_count(); diff --git a/storage/src/shard_state_storage/shard_state_reader.rs b/storage/src/shard_state_storage/shard_state_reader.rs index 001a2eecf..42ba73393 100644 --- a/storage/src/shard_state_storage/shard_state_reader.rs +++ b/storage/src/shard_state_storage/shard_state_reader.rs @@ -2,7 +2,7 @@ use std::io::Read; use anyhow::{Context, Result}; use crc::{Crc, CRC_32_ISCSI}; -use everscale_types::cell::{CellDescriptor, CellType, LevelMask}; +use everscale_types::cell::{CellDescriptor, LevelMask}; use smallvec::SmallVec; macro_rules! try_read { @@ -301,8 +301,8 @@ impl<'a> RawCell<'a> { cell_index: usize, data_buffer: &'a mut [u8], ) -> Result - where - R: Read, + where + R: Read, { let mut descriptor = [0u8; 2]; src.read_exact(&mut descriptor)?; @@ -524,4 +524,4 @@ impl ByteOrderRead for T { self.read_exact(&mut buf)?; Ok(u32::from_le_bytes(buf)) } -} \ No newline at end of file +} From f23cce7293980d4f3c94424865a4c2c6381527e7 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Wed, 21 Feb 2024 17:09:32 +0100 Subject: [PATCH 06/19] refactor(storage): remove unused methods --- Cargo.lock | 6 +----- constansts/Cargo.toml | 11 ----------- constansts/src/archive.rs | 1 - constansts/src/lib.rs | 1 - storage/Cargo.toml | 2 +- storage/src/block_handle_storage/mod.rs | 7 ++----- storage/src/db/tables.rs | 2 +- storage/src/models/block_handle.rs | 7 +------ storage/src/models/block_meta.rs | 6 ------ storage/src/shard_state_storage/cell_storage.rs | 3 ++- storage/src/shard_state_storage/cell_writer.rs | 2 +- .../src/shard_state_storage/replace_transaction.rs | 4 +++- util/src/lib.rs | 1 + 13 files changed, 13 insertions(+), 40 deletions(-) delete mode 100644 constansts/Cargo.toml delete mode 100644 constansts/src/archive.rs delete mode 100644 constansts/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index c66318ce4..114835770 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2176,10 +2176,6 @@ dependencies = [ "weedb", ] -[[package]] -name = "tycho-constansts" -version = "0.1.0" - [[package]] name = "tycho-core" version = "0.0.1" @@ -2272,7 +2268,7 @@ dependencies = [ "tracing", "triomphe", "tycho-block-util", - "tycho-constansts", + "tycho-util", "weedb", ] diff --git a/constansts/Cargo.toml b/constansts/Cargo.toml deleted file mode 100644 index 221b1e130..000000000 --- a/constansts/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "tycho-constansts" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] - -[lints] -workspace = true diff --git a/constansts/src/archive.rs b/constansts/src/archive.rs deleted file mode 100644 index 5d6d18af6..000000000 --- a/constansts/src/archive.rs +++ /dev/null @@ -1 +0,0 @@ -pub const ARCHIVE_PREFIX: [u8; 4] = u32::to_le_bytes(0xae8fdd01); diff --git a/constansts/src/lib.rs b/constansts/src/lib.rs deleted file mode 100644 index 4193e3e11..000000000 --- a/constansts/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod archive; diff --git a/storage/Cargo.toml b/storage/Cargo.toml index f2437e0bb..cff61308b 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -19,7 +19,7 @@ smallvec = "1.13.1" sha2 = "0.10.8" tycho-block-util = { path = "../block-util" } -tycho-constansts = { path = "../constansts" } +tycho-util = { path = "../util" } dashmap = "5.5.3" bumpalo = "3.14.0" diff --git a/storage/src/block_handle_storage/mod.rs b/storage/src/block_handle_storage/mod.rs index cd6dd1eb8..2ea5d9928 100644 --- a/storage/src/block_handle_storage/mod.rs +++ b/storage/src/block_handle_storage/mod.rs @@ -1,8 +1,3 @@ -// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -// -// Changes: -// - replaced old `failure` crate with `anyhow` -// - simplified storing use std::sync::{Arc, Weak}; use anyhow::Result; @@ -10,7 +5,9 @@ use everscale_types::models::*; use super::models::*; use crate::db::*; + use tycho_block_util::*; +use tycho_util::FastDashMap; pub struct BlockHandleStorage { db: Arc, diff --git a/storage/src/db/tables.rs b/storage/src/db/tables.rs index 722924a4d..50037bae7 100644 --- a/storage/src/db/tables.rs +++ b/storage/src/db/tables.rs @@ -226,7 +226,7 @@ fn archive_data_merge( current_value: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { - use tycho_constansts::archive::ARCHIVE_PREFIX; + use tycho_block_util::ARCHIVE_PREFIX; let total_len: usize = operands.iter().map(|data| data.len()).sum(); let mut result = Vec::with_capacity(ARCHIVE_PREFIX.len() + total_len); diff --git a/storage/src/models/block_handle.rs b/storage/src/models/block_handle.rs index a4114b722..254584e40 100644 --- a/storage/src/models/block_handle.rs +++ b/storage/src/models/block_handle.rs @@ -1,8 +1,3 @@ -// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -// -// Changes: -// - replaced old `failure` crate with `anyhow` -// - moved all flags to meta use std::sync::{Arc, Weak}; use anyhow::Result; @@ -10,7 +5,7 @@ use everscale_types::models::*; use tokio::sync::RwLock; use super::BlockMeta; -use tycho_block_util::FastDashMap; +use tycho_util::FastDashMap; pub struct BlockHandle { id: BlockId, diff --git a/storage/src/models/block_meta.rs b/storage/src/models/block_meta.rs index 1e5e2804f..90a8ea33f 100644 --- a/storage/src/models/block_meta.rs +++ b/storage/src/models/block_meta.rs @@ -1,9 +1,3 @@ -// This file is a modified copy of the file from https://github.com/tonlabs/ton-labs-node -// -// Changes: -// - replaced old `failure` crate with `anyhow` -// - moved all flags here from block handle -// - removed temporary unused flags use std::sync::atomic::{AtomicU64, Ordering}; use anyhow::Result; diff --git a/storage/src/shard_state_storage/cell_storage.rs b/storage/src/shard_state_storage/cell_storage.rs index 45faa0782..8b024f5f6 100644 --- a/storage/src/shard_state_storage/cell_storage.rs +++ b/storage/src/shard_state_storage/cell_storage.rs @@ -11,7 +11,8 @@ use quick_cache::sync::{Cache, DefaultLifecycle}; use triomphe::ThinArc; use crate::db::*; -use tycho_block_util::{CacheStats, FastDashMap, FastHashMap, FastHasherState}; +use tycho_block_util::{CacheStats}; +use tycho_util::{FastDashMap, FastHashMap, FastHasherState}; pub struct CellStorage { db: Arc, diff --git a/storage/src/shard_state_storage/cell_writer.rs b/storage/src/shard_state_storage/cell_writer.rs index 261e09131..e04eeb60a 100644 --- a/storage/src/shard_state_storage/cell_writer.rs +++ b/storage/src/shard_state_storage/cell_writer.rs @@ -8,7 +8,7 @@ use anyhow::{Context, Result}; use smallvec::SmallVec; use crate::db::Db; -use tycho_block_util::FastHashMap; +use tycho_util::FastHashMap; pub struct CellWriter<'a> { db: &'a Db, diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/shard_state_storage/replace_transaction.rs index 54260369e..b2e0994ac 100644 --- a/storage/src/shard_state_storage/replace_transaction.rs +++ b/storage/src/shard_state_storage/replace_transaction.rs @@ -9,7 +9,9 @@ use super::entries_buffer::*; use super::files_context::*; use super::shard_state_reader::*; use crate::db::*; + use tycho_block_util::*; +use tycho_util::FastHashMap; pub struct ShardStateReplaceTransaction<'a> { db: &'a Db, @@ -231,7 +233,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { fn finalize_cell( &self, - ctx: &mut FinalizationContext, + ctx: &mut FinalizationContext<'_>, cell_index: u32, mut cell: RawCell<'_>, ) -> Result<()> { diff --git a/util/src/lib.rs b/util/src/lib.rs index 0fb010594..f99e01223 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -29,6 +29,7 @@ pub type FastDashMap = dashmap::DashMap; pub type FastDashSet = dashmap::DashSet; pub type FastHashMap = HashMap; pub type FastHashSet = HashSet; +pub type FastHasherState = ahash::RandomState; /// # Example /// From b13bca1460998d0c67df7c82662bcb0c1ac4dbae Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Thu, 22 Feb 2024 16:59:04 +0100 Subject: [PATCH 07/19] refactor(block-util): remove unnecessary modules and update remaining --- Cargo.lock | 1 - storage/src/block_storage/mod.rs | 48 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 114835770..d09f635df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2143,7 +2143,6 @@ dependencies = [ "parking_lot", "rand", "sha2", - "smallvec", "thiserror", "tycho-util", ] diff --git a/storage/src/block_storage/mod.rs b/storage/src/block_storage/mod.rs index 6ec18ff0e..c667d132a 100644 --- a/storage/src/block_storage/mod.rs +++ b/storage/src/block_storage/mod.rs @@ -74,7 +74,7 @@ impl BlockStorage { .block_handle_storage .create_or_load_handle(block_id, meta_data)?; - let archive_id = PackageEntryId::Block(block_id); + let archive_id = ArchiveEntryId::Block(block_id); let mut updated = false; if !handle.meta().has_data() { let data = block.new_archive_data()?; @@ -105,7 +105,7 @@ impl BlockStorage { if !handle.meta().has_data() { return Err(BlockStorageError::BlockDataNotFound.into()); } - self.get_data(handle, &PackageEntryId::Block(handle.id())) + self.get_data(handle, &ArchiveEntryId::Block(handle.id())) .await } @@ -116,7 +116,7 @@ impl BlockStorage { if !handle.meta().has_data() { return Err(BlockStorageError::BlockDataNotFound.into()); } - self.get_data_ref(handle, &PackageEntryId::Block(handle.id())) + self.get_data_ref(handle, &ArchiveEntryId::Block(handle.id())) .await } @@ -139,7 +139,7 @@ impl BlockStorage { let mut updated = false; if proof.is_link() { - let archive_id = PackageEntryId::ProofLink(block_id); + let archive_id = ArchiveEntryId::ProofLink(block_id); if !handle.meta().has_proof_link() { let data = proof.new_archive_data()?; @@ -153,7 +153,7 @@ impl BlockStorage { } } } else { - let archive_id = PackageEntryId::Proof(block_id); + let archive_id = ArchiveEntryId::Proof(block_id); if !handle.meta().has_proof() { let data = proof.new_archive_data()?; @@ -191,12 +191,12 @@ impl BlockStorage { ) -> Result> { let (archive_id, exists) = if is_link { ( - PackageEntryId::ProofLink(handle.id()), + ArchiveEntryId::ProofLink(handle.id()), handle.meta().has_proof_link(), ) } else { ( - PackageEntryId::Proof(handle.id()), + ArchiveEntryId::Proof(handle.id()), handle.meta().has_proof(), ) }; @@ -215,12 +215,12 @@ impl BlockStorage { ) -> Result + 'a> { let (archive_id, exists) = if is_link { ( - PackageEntryId::ProofLink(handle.id()), + ArchiveEntryId::ProofLink(handle.id()), handle.meta().has_proof_link(), ) } else { ( - PackageEntryId::Proof(handle.id()), + ArchiveEntryId::Proof(handle.id()), handle.meta().has_proof(), ) }; @@ -250,7 +250,7 @@ impl BlockStorage { let block_data = if has_data { let lock = handle.block_data_lock().write().await; - let entry_id = PackageEntryId::Block(block_id); + let entry_id = ArchiveEntryId::Block(block_id); let data = self.make_archive_segment(&entry_id)?; Some((lock, data)) @@ -262,9 +262,9 @@ impl BlockStorage { let lock = handle.proof_data_lock().write().await; let entry_id = if is_link { - PackageEntryId::ProofLink(block_id) + ArchiveEntryId::ProofLink(block_id) } else { - PackageEntryId::Proof(block_id) + ArchiveEntryId::Proof(block_id) }; let data = self.make_archive_segment(&entry_id)?; @@ -337,7 +337,7 @@ impl BlockStorage { batch.merge_cf( &archives_cf, archive_id_bytes, - make_archive_segment(&PackageEntryId::Block(handle.id()).filename(), block_data), + make_archive_segment(&ArchiveEntryId::Block(handle.id()).filename(), block_data), ); batch.merge_cf( @@ -345,9 +345,9 @@ impl BlockStorage { archive_id_bytes, make_archive_segment( &if is_link { - PackageEntryId::ProofLink(block_id) + ArchiveEntryId::ProofLink(block_id) } else { - PackageEntryId::Proof(block_id) + ArchiveEntryId::Proof(block_id) } .filename(), block_proof_data, @@ -564,7 +564,7 @@ impl BlockStorage { Ok(()) } - fn add_data(&self, id: &PackageEntryId, data: &[u8]) -> Result<(), rocksdb::Error> + fn add_data(&self, id: &ArchiveEntryId, data: &[u8]) -> Result<(), rocksdb::Error> where I: Borrow + Hash, { @@ -572,20 +572,20 @@ impl BlockStorage { } #[allow(dead_code)] - fn has_data(&self, id: &PackageEntryId) -> Result + fn has_data(&self, id: &ArchiveEntryId) -> Result where I: Borrow + Hash, { self.db.package_entries.contains_key(id.to_vec()) } - async fn get_data(&self, handle: &BlockHandle, id: &PackageEntryId) -> Result> + async fn get_data(&self, handle: &BlockHandle, id: &ArchiveEntryId) -> Result> where I: Borrow + Hash, { let _lock = match &id { - PackageEntryId::Block(_) => handle.block_data_lock().read().await, - PackageEntryId::Proof(_) | PackageEntryId::ProofLink(_) => { + ArchiveEntryId::Block(_) => handle.block_data_lock().read().await, + ArchiveEntryId::Proof(_) | ArchiveEntryId::ProofLink(_) => { handle.proof_data_lock().read().await } }; @@ -599,14 +599,14 @@ impl BlockStorage { async fn get_data_ref<'a, I>( &'a self, handle: &'a BlockHandle, - id: &PackageEntryId, + id: &ArchiveEntryId, ) -> Result + 'a> where I: Borrow + Hash, { let lock = match id { - PackageEntryId::Block(_) => handle.block_data_lock().read().await, - PackageEntryId::Proof(_) | PackageEntryId::ProofLink(_) => { + ArchiveEntryId::Block(_) => handle.block_data_lock().read().await, + ArchiveEntryId::Proof(_) | ArchiveEntryId::ProofLink(_) => { handle.proof_data_lock().read().await } }; @@ -646,7 +646,7 @@ impl BlockStorage { archive_id } - fn make_archive_segment(&self, entry_id: &PackageEntryId) -> Result> + fn make_archive_segment(&self, entry_id: &ArchiveEntryId) -> Result> where I: Borrow + Hash, { From f35a5d40445ddbafc5d67bace9b32d5ef6931f02 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Fri, 23 Feb 2024 15:37:03 +0100 Subject: [PATCH 08/19] refactor(storage): replace ton-labs libs with everscale-types --- storage/src/block_connection_storage/mod.rs | 5 +- storage/src/block_handle_storage/mod.rs | 10 +- storage/src/block_storage/mod.rs | 27 +- storage/src/db/tables.rs | 2 +- storage/src/lib.rs | 1 + storage/src/models/block_meta.rs | 23 +- storage/src/node_state_storage/mod.rs | 2 +- storage/src/runtime_storage/mod.rs | 1 + .../persistent_state_keeper.rs | 3 +- storage/src/shard_state_storage/mod.rs | 16 +- .../replace_transaction.rs | 6 +- storage/src/utils/mod.rs | 3 + storage/src/utils/stored_value.rs | 242 ++++++++++++++++++ 13 files changed, 302 insertions(+), 39 deletions(-) create mode 100644 storage/src/utils/mod.rs create mode 100644 storage/src/utils/stored_value.rs diff --git a/storage/src/block_connection_storage/mod.rs b/storage/src/block_connection_storage/mod.rs index 1121c042f..723152f3a 100644 --- a/storage/src/block_connection_storage/mod.rs +++ b/storage/src/block_connection_storage/mod.rs @@ -3,9 +3,10 @@ use std::sync::Arc; use anyhow::Result; use everscale_types::models::*; -use super::models::BlockHandle; +use super::models::*; + use crate::db::*; -use tycho_block_util::{read_block_id_le, write_block_id_le, StoredValue}; +use crate::utils::*; /// Stores relations between blocks pub struct BlockConnectionStorage { diff --git a/storage/src/block_handle_storage/mod.rs b/storage/src/block_handle_storage/mod.rs index 2ea5d9928..d64d2e03b 100644 --- a/storage/src/block_handle_storage/mod.rs +++ b/storage/src/block_handle_storage/mod.rs @@ -1,13 +1,15 @@ use std::sync::{Arc, Weak}; use anyhow::Result; -use everscale_types::models::*; +use everscale_types::models::BlockId; +use tycho_block_util::block::TopBlocks; +use tycho_block_util::state::is_persistent_state; +use tycho_util::FastDashMap; use super::models::*; -use crate::db::*; -use tycho_block_util::*; -use tycho_util::FastDashMap; +use crate::db::*; +use crate::utils::*; pub struct BlockHandleStorage { db: Arc, diff --git a/storage/src/block_storage/mod.rs b/storage/src/block_storage/mod.rs index c667d132a..b05170c7f 100644 --- a/storage/src/block_storage/mod.rs +++ b/storage/src/block_storage/mod.rs @@ -8,12 +8,19 @@ use std::sync::Arc; use anyhow::{Context, Result}; use everscale_types::models::*; use parking_lot::RwLock; - -use super::block_handle_storage::{BlockHandleStorage, HandleCreationStatus}; +use serde::{Deserialize, Serialize}; +use tycho_block_util::archive::{ + make_archive_entry, ArchiveEntryId, ArchiveReaderError, ArchiveVerifier, +}; +use tycho_block_util::block::{ + BlockProofStuff, BlockProofStuffAug, BlockStuff, BlockStuffAug, TopBlocks, +}; + +use super::block_handle_storage::*; use super::models::*; + use crate::db::*; -use serde::{Deserialize, Serialize}; -use tycho_block_util::*; +use crate::utils::*; pub struct BlockStorage { db: Arc, @@ -35,9 +42,9 @@ impl BlockStorage { } fn preload(&self) -> Result<()> { - fn check_archive(value: &[u8]) -> Result<(), ArchivePackageError> { - let mut verifier = ArchivePackageVerifier::default(); - verifier.verify(value)?; + fn check_archive(value: &[u8]) -> Result<(), ArchiveReaderError> { + let mut verifier = ArchiveVerifier::default(); + verifier.write_verify(value)?; verifier.final_check() } @@ -337,13 +344,13 @@ impl BlockStorage { batch.merge_cf( &archives_cf, archive_id_bytes, - make_archive_segment(&ArchiveEntryId::Block(handle.id()).filename(), block_data), + make_archive_entry(&ArchiveEntryId::Block(handle.id()).filename(), block_data), ); batch.merge_cf( &archives_cf, archive_id_bytes, - make_archive_segment( + make_archive_entry( &if is_link { ArchiveEntryId::ProofLink(block_id) } else { @@ -651,7 +658,7 @@ impl BlockStorage { I: Borrow + Hash, { match self.db.package_entries.get(entry_id.to_vec())? { - Some(data) => Ok(make_archive_segment(&entry_id.filename(), &data)), + Some(data) => Ok(make_archive_entry(&entry_id.filename(), &data)), None => Err(BlockStorageError::InvalidBlockData.into()), } } diff --git a/storage/src/db/tables.rs b/storage/src/db/tables.rs index 50037bae7..0d588c972 100644 --- a/storage/src/db/tables.rs +++ b/storage/src/db/tables.rs @@ -226,7 +226,7 @@ fn archive_data_merge( current_value: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { - use tycho_block_util::ARCHIVE_PREFIX; + use tycho_block_util::archive::ARCHIVE_PREFIX; let total_len: usize = operands.iter().map(|data| data.len()).sum(); let mut result = Vec::with_capacity(ARCHIVE_PREFIX.len() + total_len); diff --git a/storage/src/lib.rs b/storage/src/lib.rs index 75f36e313..c15317963 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -17,6 +17,7 @@ mod models; mod node_state_storage; mod runtime_storage; mod shard_state_storage; +mod utils; pub struct Storage { file_db_path: PathBuf, diff --git a/storage/src/models/block_meta.rs b/storage/src/models/block_meta.rs index 90a8ea33f..b5bbcb4f4 100644 --- a/storage/src/models/block_meta.rs +++ b/storage/src/models/block_meta.rs @@ -4,7 +4,7 @@ use anyhow::Result; use bytes::Buf; use everscale_types::models::BlockInfo; -use tycho_block_util::{StoredValue, StoredValueBuffer}; +use crate::utils::{StoredValue, StoredValueBuffer}; #[derive(Debug, Copy, Clone)] pub struct BlockMetaData { @@ -13,6 +13,16 @@ pub struct BlockMetaData { pub mc_ref_seqno: Option, } +impl BlockMetaData { + pub fn zero_state(gen_utime: u32) -> Self { + Self { + is_key_block: true, + gen_utime, + mc_ref_seqno: Some(0), + } + } +} + #[derive(Debug, Copy, Clone)] pub struct BriefBlockInfo { pub is_key_block: bool, @@ -39,17 +49,6 @@ impl From<&BlockInfo> for BriefBlockInfo { } } } - -impl BlockMetaData { - pub fn zero_state(gen_utime: u32) -> Self { - Self { - is_key_block: true, - gen_utime, - mc_ref_seqno: Some(0), - } - } -} - #[derive(Debug, Default)] pub struct BlockMeta { flags: AtomicU64, diff --git a/storage/src/node_state_storage/mod.rs b/storage/src/node_state_storage/mod.rs index 61265bc43..02c45eed5 100644 --- a/storage/src/node_state_storage/mod.rs +++ b/storage/src/node_state_storage/mod.rs @@ -5,7 +5,7 @@ use everscale_types::models::*; use parking_lot::Mutex; use crate::db::*; -use tycho_block_util::{read_block_id_le, write_block_id_le, StoredValue}; +use crate::utils::*; pub struct NodeStateStorage { db: Arc, diff --git a/storage/src/runtime_storage/mod.rs b/storage/src/runtime_storage/mod.rs index 6cf671e3c..54d9b89ad 100644 --- a/storage/src/runtime_storage/mod.rs +++ b/storage/src/runtime_storage/mod.rs @@ -1,6 +1,7 @@ use std::sync::Arc; pub use self::persistent_state_keeper::PersistentStateKeeper; + use super::BlockHandleStorage; mod persistent_state_keeper; diff --git a/storage/src/runtime_storage/persistent_state_keeper.rs b/storage/src/runtime_storage/persistent_state_keeper.rs index 18217419d..49f012b52 100644 --- a/storage/src/runtime_storage/persistent_state_keeper.rs +++ b/storage/src/runtime_storage/persistent_state_keeper.rs @@ -5,9 +5,10 @@ use anyhow::Result; use arc_swap::ArcSwapOption; use tokio::sync::Notify; +use tycho_block_util::state::*; + use crate::models::{BlockHandle, BriefBlockMeta}; use crate::BlockHandleStorage; -use tycho_block_util::*; pub struct PersistentStateKeeper { block_handle_storage: Arc, diff --git a/storage/src/shard_state_storage/mod.rs b/storage/src/shard_state_storage/mod.rs index 5c12f9174..1cd8e9427 100644 --- a/storage/src/shard_state_storage/mod.rs +++ b/storage/src/shard_state_storage/mod.rs @@ -12,9 +12,12 @@ use self::files_context::FilesContext; use self::replace_transaction::ShardStateReplaceTransaction; use super::{models::BlockHandle, BlockHandleStorage}; -use crate::block_storage::BlockStorage; +use crate::block_storage::*; use crate::db::*; -use tycho_block_util::*; +use crate::utils::*; + +use tycho_block_util::block::*; +use tycho_block_util::state::*; mod cell_storage; mod cell_writer; @@ -32,7 +35,7 @@ pub struct ShardStateStorage { downloads_dir: Arc, gc_lock: tokio::sync::Mutex<()>, - min_ref_mc_state: Arc, + min_ref_mc_state: Arc, max_new_mc_cell_count: AtomicUsize, max_new_sc_cell_count: AtomicUsize, } @@ -79,11 +82,12 @@ impl ShardStateStorage { } } - pub fn cache_metrics(&self) -> CacheStats { + // TODO: implement metrics + /*pub fn cache_metrics(&self) -> CacheStats { self.cell_storage.cache_stats() - } + }*/ - pub fn min_ref_mc_state(&self) -> &Arc { + pub fn min_ref_mc_state(&self) -> &Arc { &self.min_ref_mc_state } diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/shard_state_storage/replace_transaction.rs index b2e0994ac..ce1beeb83 100644 --- a/storage/src/shard_state_storage/replace_transaction.rs +++ b/storage/src/shard_state_storage/replace_transaction.rs @@ -9,14 +9,16 @@ use super::entries_buffer::*; use super::files_context::*; use super::shard_state_reader::*; use crate::db::*; +use crate::utils::*; +use tycho_block_util::state::*; use tycho_block_util::*; use tycho_util::FastHashMap; pub struct ShardStateReplaceTransaction<'a> { db: &'a Db, cell_storage: &'a Arc, - min_ref_mc_state: &'a Arc, + min_ref_mc_state: &'a Arc, reader: ShardStatePacketReader, header: Option, cells_read: u64, @@ -26,7 +28,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { pub fn new( db: &'a Db, cell_storage: &'a Arc, - min_ref_mc_state: &'a Arc, + min_ref_mc_state: &'a Arc, ) -> Self { Self { db, diff --git a/storage/src/utils/mod.rs b/storage/src/utils/mod.rs new file mode 100644 index 000000000..d28b57e54 --- /dev/null +++ b/storage/src/utils/mod.rs @@ -0,0 +1,3 @@ +pub use self::stored_value::*; + +mod stored_value; diff --git a/storage/src/utils/stored_value.rs b/storage/src/utils/stored_value.rs new file mode 100644 index 000000000..c1ac8ca94 --- /dev/null +++ b/storage/src/utils/stored_value.rs @@ -0,0 +1,242 @@ +use anyhow::Result; +use bytes::Buf; +use smallvec::SmallVec; + +use everscale_types::cell::HashBytes; +use everscale_types::models::{BlockId, BlockIdShort, ShardIdent}; + +/// A trait for writing or reading data from a stack-allocated buffer +pub trait StoredValue { + /// On-stack buffer size hint + const SIZE_HINT: usize; + + /// On-stack buffer type (see [`smallvec::SmallVec`]) + type OnStackSlice: smallvec::Array; + + /// Serializes the data to the buffer + fn serialize(&self, buffer: &mut T); + + /// Deserializes the data from the buffer. + /// + /// In case of successful deserialization it is guaranteed that `reader` will be + /// moved to the end of the deserialized data. + /// + /// NOTE: `reader` should not be used after this call in case of an error + fn deserialize(reader: &mut &[u8]) -> Result + where + Self: Sized; + + /// Deserializes the data from the buffer. + /// + /// [`StoredValue::deserialize`] + #[inline(always)] + fn from_slice(mut data: &[u8]) -> Result + where + Self: Sized, + { + Self::deserialize(&mut data) + } + + /// Constructs on-stack buffer with the serialized object + fn to_vec(&self) -> SmallVec { + let mut result = SmallVec::with_capacity(Self::SIZE_HINT); + self.serialize(&mut result); + result + } +} + +/// A trait for simple buffer-based serialization +pub trait StoredValueBuffer { + fn write_byte(&mut self, byte: u8); + fn write_raw_slice(&mut self, data: &[u8]); +} + +impl StoredValueBuffer for Vec { + #[inline(always)] + fn write_byte(&mut self, byte: u8) { + self.push(byte); + } + + #[inline(always)] + fn write_raw_slice(&mut self, data: &[u8]) { + self.extend_from_slice(data); + } +} + +impl StoredValueBuffer for SmallVec +where + T: smallvec::Array, +{ + #[inline(always)] + fn write_byte(&mut self, byte: u8) { + self.push(byte); + } + + #[inline(always)] + fn write_raw_slice(&mut self, data: &[u8]) { + self.extend_from_slice(data); + } +} + +impl StoredValue for BlockId { + /// 4 bytes workchain, + /// 8 bytes shard, + /// 4 bytes seqno, + /// 32 bytes root hash, + /// 32 bytes file hash + const SIZE_HINT: usize = ShardIdent::SIZE_HINT + 4 + 32 + 32; + + type OnStackSlice = [u8; Self::SIZE_HINT]; + + fn serialize(&self, buffer: &mut T) { + self.shard.serialize(buffer); + buffer.write_raw_slice(&self.seqno.to_be_bytes()); + buffer.write_raw_slice(self.root_hash.as_slice()); + buffer.write_raw_slice(self.file_hash.as_slice()); + } + + fn deserialize(reader: &mut &[u8]) -> Self + where + Self: Sized, + { + debug_assert!(reader.remaining() >= Self::SIZE_HINT); + + let shard = ShardIdent::deserialize(reader); + let seqno = reader.get_u32(); + let root_hash = HashBytes::from(reader.get_uint(256)); + let file_hash = HashBytes::from(reader.get_uint(256)); + Self { + shard, + seqno, + root_hash, + file_hash, + } + } +} + +impl StoredValue for ShardIdent { + /// 4 bytes workchain + /// 8 bytes shard + const SIZE_HINT: usize = 4 + 8; + + type OnStackSlice = [u8; Self::SIZE_HINT]; + + #[inline(always)] + fn serialize(&self, buffer: &mut T) { + buffer.write_raw_slice(&self.workchain_id().to_be_bytes()); + buffer.write_raw_slice(&self.shard_prefix_with_tag().to_be_bytes()); + } + + fn deserialize(reader: &mut &[u8]) -> Self + where + Self: Sized, + { + debug_assert!(reader.remaining() >= ShardIdent::SIZE_HINT); + + let workchain = reader.get_u32() as i32; + let prefix = reader.get_u64()?; + unsafe { Self::new_unchecked(workchain, prefix) } + } +} + +impl StoredValue for BlockIdShort { + /// 12 bytes shard ident + /// 4 bytes seqno + const SIZE_HINT: usize = ShardIdent::SIZE_HINT + 4; + + type OnStackSlice = [u8; Self::SIZE_HINT]; + + #[inline(always)] + fn serialize(&self, buffer: &mut T) { + self.shard.serialize(buffer); + buffer.write_raw_slice(&self.seqno.to_be_bytes()); + } + + fn deserialize(reader: &mut &[u8]) -> Self + where + Self: Sized, + { + debug_assert!(reader.remaining() >= BlockIdShort::SIZE_HINT); + + let shard = ShardIdent::deserialize(reader); + let seqno = reader.get_u32(); + Self { shard, seqno } + } +} + +/// Writes BlockIdExt in little-endian format +pub fn write_block_id_le(block_id: &BlockId) -> [u8; 80] { + let mut bytes = [0u8; 80]; + bytes[..4].copy_from_slice(&block_id.shard.workchain().to_le_bytes()); + bytes[4..12].copy_from_slice(&block_id.shard.prefix().to_le_bytes()); + bytes[12..16].copy_from_slice(&block_id.seqno.to_le_bytes()); + bytes[16..48].copy_from_slice(block_id.root_hash.as_slice()); + bytes[48..80].copy_from_slice(block_id.file_hash.as_slice()); + bytes +} + +/// Reads BlockId in little-endian format +pub fn read_block_id_le(data: &[u8]) -> Option { + if data.len() < 80 { + return None; + } + + let mut workchain = [0; 4]; + workchain.copy_from_slice(&data[0..4]); + let workchain = i32::from_le_bytes(workchain); + + let mut shard = [0; 8]; + shard.copy_from_slice(&data[4..12]); + let shard = u64::from_le_bytes(shard); + + let mut seqno = [0; 4]; + seqno.copy_from_slice(&data[12..16]); + let seqno = u32::from_le_bytes(seqno); + + let mut root_hash = [0; 32]; + root_hash.copy_from_slice(&data[16..48]); + + let mut file_hash = [0; 32]; + file_hash.copy_from_slice(&data[48..80]); + + let shard = unsafe { ShardIdent::new_unchecked(workchain, shard) }; + + Some(BlockId { + shard, + seqno, + root_hash: root_hash.into(), + file_hash: file_hash.into(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fully_on_stack() { + assert!(!BlockId::default().to_vec().spilled()); + assert!(!BlockId::default().to_vec().spilled()); + } + + #[test] + fn correct_block_id_le_serialization() { + const SERIALIZED: [u8; 80] = [ + 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 128, 123, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + ]; + + let block_id = BlockId { + shard: ShardIdent::MASTERCHAIN, + seqno: 123, + root_hash: [1u8; 32].into(), + file_hash: [2u8; 32].into(), + }; + + let serialized = write_block_id_le(&block_id); + assert_eq!(serialized, SERIALIZED); + + assert_eq!(read_block_id_le(&serialized).unwrap(), block_id); + } +} From ffea4330efb8134e320fa6f6d8085ab65ccbce80 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Mon, 26 Feb 2024 11:33:15 +0100 Subject: [PATCH 09/19] refactor(storage): rework project structure; add file db for persistent state (WIP); fix StoredValue deserialization --- Cargo.lock | 4 + storage/Cargo.toml | 3 +- storage/src/db/file_db/cell_writer.rs | 423 ++++++++++++++++++ storage/src/db/file_db/mod.rs | 228 ++++++++++ storage/src/db/{ => kv_db}/config.rs | 0 storage/src/db/{ => kv_db}/migrations/mod.rs | 0 storage/src/db/kv_db/mod.rs | 302 +++++++++++++ storage/src/db/{ => kv_db}/refcount.rs | 0 storage/src/db/{ => kv_db}/tables.rs | 0 storage/src/db/mod.rs | 304 +------------ storage/src/lib.rs | 19 +- .../src/{block_storage => store/block}/mod.rs | 6 +- .../block_connection}/mod.rs | 3 +- .../block_handle}/mod.rs | 3 +- storage/src/store/mod.rs | 13 + .../node_state}/mod.rs | 0 .../{runtime_storage => store/runtime}/mod.rs | 0 .../runtime}/persistent_state_keeper.rs | 0 .../shard_state}/cell_storage.rs | 22 - .../shard_state}/cell_writer.rs | 0 .../shard_state}/entries_buffer.rs | 0 .../shard_state}/files_context.rs | 2 +- .../shard_state}/mod.rs | 3 +- .../shard_state}/replace_transaction.rs | 1 + .../shard_state}/shard_state_reader.rs | 0 storage/src/utils/stored_value.rs | 32 +- util/Cargo.toml | 2 + util/src/byte_reader.rs | 11 + util/src/lib.rs | 4 + util/src/mapped_file.rs | 101 +++++ util/src/progress_bar.rs | 130 ++++++ 31 files changed, 1253 insertions(+), 363 deletions(-) create mode 100644 storage/src/db/file_db/cell_writer.rs create mode 100644 storage/src/db/file_db/mod.rs rename storage/src/db/{ => kv_db}/config.rs (100%) rename storage/src/db/{ => kv_db}/migrations/mod.rs (100%) create mode 100644 storage/src/db/kv_db/mod.rs rename storage/src/db/{ => kv_db}/refcount.rs (100%) rename storage/src/db/{ => kv_db}/tables.rs (100%) rename storage/src/{block_storage => store/block}/mod.rs (99%) rename storage/src/{block_connection_storage => store/block_connection}/mod.rs (99%) rename storage/src/{block_handle_storage => store/block_handle}/mod.rs (99%) create mode 100644 storage/src/store/mod.rs rename storage/src/{node_state_storage => store/node_state}/mod.rs (100%) rename storage/src/{runtime_storage => store/runtime}/mod.rs (100%) rename storage/src/{runtime_storage => store/runtime}/persistent_state_keeper.rs (100%) rename storage/src/{shard_state_storage => store/shard_state}/cell_storage.rs (97%) rename storage/src/{shard_state_storage => store/shard_state}/cell_writer.rs (100%) rename storage/src/{shard_state_storage => store/shard_state}/entries_buffer.rs (100%) rename storage/src/{shard_state_storage => store/shard_state}/files_context.rs (98%) rename storage/src/{shard_state_storage => store/shard_state}/mod.rs (99%) rename storage/src/{shard_state_storage => store/shard_state}/replace_transaction.rs (99%) rename storage/src/{shard_state_storage => store/shard_state}/shard_state_reader.rs (100%) create mode 100644 util/src/byte_reader.rs create mode 100644 util/src/mapped_file.rs create mode 100644 util/src/progress_bar.rs diff --git a/Cargo.lock b/Cargo.lock index d09f635df..2145d80b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2143,6 +2143,7 @@ dependencies = [ "parking_lot", "rand", "sha2", + "smallvec", "thiserror", "tycho-util", ] @@ -2254,6 +2255,7 @@ dependencies = [ "hex", "humantime", "libc", + "num-traits", "parking_lot", "parking_lot_core", "quick_cache", @@ -2281,10 +2283,12 @@ dependencies = [ "futures-util", "hex", "humantime", + "libc", "rand", "serde", "thiserror", "tokio", + "tracing", ] [[package]] diff --git a/storage/Cargo.toml b/storage/Cargo.toml index cff61308b..4c410f4c0 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -11,7 +11,7 @@ anyhow = "1.0.79" bytes = "1.5.0" tokio = { version = "1.36.0", features = ["full"] } -tracing = "0.1.40" +tracing = "0.1" thiserror = "1.0.57" hex = "0.4.3" libc = "0.2.153" @@ -37,6 +37,7 @@ fdlimit = "0.3.0" humantime = "2.1.0" sysinfo = "0.30.5" triomphe = "0.1.11" +num-traits = "0.2.18" [lints] workspace = true diff --git a/storage/src/db/file_db/cell_writer.rs b/storage/src/db/file_db/cell_writer.rs new file mode 100644 index 000000000..96a6c9076 --- /dev/null +++ b/storage/src/db/file_db/cell_writer.rs @@ -0,0 +1,423 @@ +use std::collections::hash_map; +use std::fs; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Instant; + +use anyhow::{Context, Result}; +use everscale_types::cell::HashBytes; +use everscale_types::models::BlockId; +use num_traits::ToPrimitive; +use smallvec::SmallVec; +use tycho_util::byte_reader::ByteOrderRead; +use tycho_util::FastHashMap; + +use crate::db; +use crate::db::Db; + +pub struct CellWriter<'a> { + db: &'a Db, + base_path: &'a Path, +} + +impl<'a> CellWriter<'a> { + pub fn clear_temp(base_path: &Path, master_block_id: &BlockId, block_id: &BlockId) { + tracing::info!("Cleaning temporary persistent state files"); + + let file_path = Self::make_pss_path(base_path, master_block_id, block_id); + let int_file_path = Self::make_rev_pss_path(&file_path); + let temp_file_path = Self::make_temp_pss_path(&file_path); + + let _ = fs::remove_file(int_file_path); + let _ = fs::remove_file(temp_file_path); + } + + pub fn make_pss_path(base_path: &Path, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { + let dir_path = mc_block_id.seqno.to_string(); + let file_name = block_id.root_hash.to_string(); + base_path.join(dir_path).join(file_name) + } + + pub fn make_temp_pss_path(file_path: &Path) -> PathBuf { + file_path.with_extension("temp") + } + + pub fn make_rev_pss_path(file_path: &Path) -> PathBuf { + file_path.with_extension("rev") + } + + #[allow(unused)] + pub fn new(db: &'a Db, base_path: &'a Path) -> Self { + Self { db, base_path } + } + + pub fn write( + &self, + master_block_id: &BlockId, + block_id: &BlockId, + state_root_hash: &HashBytes, + is_cancelled: Arc, + ) -> Result { + let file_path = Self::make_pss_path(self.base_path, master_block_id, block_id); + + // Load cells from db in reverse order into the temp file + tracing::info!(block = %block_id.to_string(), "Started loading cells"); + let now = Instant::now(); + let mut intermediate = write_rev_cells( + self.db, + Self::make_rev_pss_path(&file_path), + state_root_hash.as_array(), + is_cancelled.clone(), + ) + .map_err(|e| { + anyhow::Error::msg(format!("Failed to write reversed cells data. Inner: {e:?}")) + })?; + + let temp_file_path = Self::make_temp_pss_path(&file_path); + + tracing::info!(block = %block_id.to_string(), "Creating intermediate file {:?}", file_path); + + let file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&temp_file_path) + .context("Failed to create target file")?; + + let cell_count = intermediate.cell_sizes.len() as u32; + tracing::info!( + elapsed = %humantime::format_duration(now.elapsed()), + cell_count, + block = %block_id, + "Finished loading cells" + ); + + // Compute offset type size (usually 4 bytes) + let offset_size = + std::cmp::min(number_of_bytes_to_fit(intermediate.total_size), 8) as usize; + + // Reserve space for the file + alloc_file( + &file, + 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize), + )?; + + // Write cells data in BOC format + let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file); + + // Header | current len: 0 + let flags = 0b1000_0000u8 | (REF_SIZE as u8); + buffer.write_all(&[0xb5, 0xee, 0x9c, 0x72, flags, offset_size as u8])?; + + // Unique cell count | current len: 6 + buffer.write_all(&cell_count.to_be_bytes())?; + + // Root count | current len: 10 + buffer.write_all(&1u32.to_be_bytes())?; + + // Absent cell count | current len: 14 + buffer.write_all(&[0, 0, 0, 0])?; + + // Total cell size | current len: 18 + buffer.write_all(&intermediate.total_size.to_be_bytes()[(8 - offset_size)..8])?; + + // Root index | current len: 18 + offset_size + buffer.write_all(&[0, 0, 0, 0])?; + + // Cells index | current len: 22 + offset_size + tracing::info!(block = %block_id, "Started building index"); + { + let mut next_offset = 0; + for &cell_size in intermediate.cell_sizes.iter().rev() { + next_offset += cell_size as u64; + buffer.write_all(&next_offset.to_be_bytes()[(8 - offset_size)..8])?; + } + } + tracing::info!(block = %block_id, "Finished building index"); + + // Cells | current len: 22 + offset_size * (1 + cell_sizes.len()) + let mut cell_buffer = [0; 2 + 128 + 4 * REF_SIZE]; + for (i, &cell_size) in intermediate.cell_sizes.iter().rev().enumerate() { + if i % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { + anyhow::bail!("Persistent state writing cancelled.") + } + intermediate.total_size -= cell_size as u64; + intermediate + .file + .seek(SeekFrom::Start(intermediate.total_size))?; + intermediate + .file + .read_exact(&mut cell_buffer[..cell_size as usize])?; + + let d1 = cell_buffer[0]; + let d2 = cell_buffer[1]; + let ref_count = (d1 & 7) as usize; + let data_size = ((d2 >> 1) + (d2 & 1 != 0) as u8) as usize; + + let ref_offset = 2 + data_size; + for r in 0..ref_count { + let ref_offset = ref_offset + r * REF_SIZE; + let slice = &mut cell_buffer[ref_offset..ref_offset + REF_SIZE]; + + let index = u32::from_be_bytes(slice.try_into().unwrap()); + slice.copy_from_slice(&(cell_count - index - 1).to_be_bytes()); + } + + buffer.write_all(&cell_buffer[..cell_size as usize])?; + } + + buffer.flush()?; + std::fs::rename(&temp_file_path, &file_path)?; + + Ok(file_path) + } +} + +struct IntermediateState { + file: File, + cell_sizes: Vec, + total_size: u64, + _remove_on_drop: RemoveOnDrop, +} + +fn write_rev_cells( + db: &Db, + file_path: PathBuf, + state_root_hash: &[u8; 32], + is_cancelled: Arc, +) -> Result { + todo!() + + /*enum StackItem { + New([u8; 32]), + Loaded(LoadedCell), + } + + struct LoadedCell { + hash: [u8; 32], + d1: u8, + d2: u8, + data: SmallVec<[u8; 128]>, + indices: SmallVec<[u32; 4]>, + } + + tracing::info!("Creating rev file {:?}", file_path); + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&file_path) + .context("Failed to write rev file")?; + let remove_on_drop = RemoveOnDrop(file_path); + + let raw = db.raw().as_ref(); + let read_options = db.cells.read_config(); + let cf = db.cells.cf(); + + let mut references_buffer = SmallVec::<[[u8; 32]; 4]>::with_capacity(4); + + let mut indices = FastHashMap::default(); + let mut remap = FastHashMap::default(); + let mut cell_sizes = Vec::::with_capacity(FILE_BUFFER_LEN); + let mut stack = Vec::with_capacity(32); + + let mut total_size = 0u64; + let mut iteration = 0u32; + let mut remap_index = 0u32; + + stack.push((iteration, StackItem::New(*state_root_hash))); + indices.insert(*state_root_hash, (iteration, false)); + + let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file); + + while let Some((index, data)) = stack.pop() { + if iteration % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { + anyhow::bail!("Persistent state writing cancelled.") + } + + match data { + StackItem::New(hash) => { + let value = raw + .get_pinned_cf_opt(&cf, hash, read_options)? + .ok_or(CellWriterError::CellNotFound)?; + + let value = value.as_ref(); + + let mut value = match db::refcount::strip_refcount(value) { + Some(bytes) => bytes, + None => return Err(CellWriterError::CellNotFound.into()), + }; + if value.is_empty() { + return Err(CellWriterError::InvalidCell.into()); + } + + let cell_data = ton_types::CellData::deserialize(&mut value)?; + let bit_length = cell_data.bit_length(); + let d2 = (((bit_length >> 2) as u8) & !0b1) | ((bit_length % 8 != 0) as u8); + + let references_count = cell_data.references_count(); + let cell_type = cell_data + .cell_type() + .to_u8() + .ok_or(CellWriterError::InvalidCell)?; + + let level_mask = cell_data.level_mask().mask(); + let d1 = + references_count as u8 | (((cell_type != 0x01) as u8) << 3) | (level_mask << 5); + let data = cell_data.data(); + + for _ in 0..references_count { + let hash = HashBytes::from(value.read_u256()?); + references_buffer.push(hash.inner()); + } + + let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); + + let mut indices_buffer = [0; 4]; + let mut keys = [std::ptr::null(); 4]; + let mut preload_count = 0; + + for hash in &references_buffer { + let index = match indices.entry(*hash) { + hash_map::Entry::Vacant(entry) => { + remap_index += 1; + + entry.insert((remap_index, false)); + + indices_buffer[preload_count] = remap_index; + keys[preload_count] = hash.as_ptr(); + preload_count += 1; + + remap_index + } + hash_map::Entry::Occupied(entry) => { + let (remap_index, written) = *entry.get(); + if !written { + indices_buffer[preload_count] = remap_index; + keys[preload_count] = hash.as_ptr(); + preload_count += 1; + } + remap_index + } + }; + + reference_indices.push(index); + } + + stack.push(( + index, + StackItem::Loaded(LoadedCell { + hash, + d1, + d2, + data: SmallVec::from_slice(data), + indices: reference_indices, + }), + )); + + if preload_count > 0 { + indices_buffer[..preload_count].reverse(); + keys[..preload_count].reverse(); + + for i in 0..preload_count { + let index = indices_buffer[i]; + let hash = unsafe { *(keys[i] as *const [u8; 32]) }; + stack.push((index, StackItem::New(hash))); + } + } + + references_buffer.clear(); + } + StackItem::Loaded(loaded) => { + match remap.entry(index) { + hash_map::Entry::Vacant(entry) => { + entry.insert(iteration.to_be_bytes()); + } + hash_map::Entry::Occupied(_) => continue, + }; + + if let Some((_, written)) = indices.get_mut(&loaded.hash) { + *written = true; + } + + iteration += 1; + if iteration % 100000 == 0 { + tracing::info!(iteration); + } + + let cell_size = 2 + loaded.data.len() + loaded.indices.len() * REF_SIZE; + cell_sizes.push(cell_size as u8); + total_size += cell_size as u64; + + temp_file_buffer.write_all(&[loaded.d1, loaded.d2])?; + temp_file_buffer.write_all(&loaded.data)?; + for index in loaded.indices { + let index = remap.get(&index).with_context(|| { + format!("Child not found. Iteration {iteration}. Child {index}") + })?; + temp_file_buffer.write_all(index)?; + } + } + } + } + + let mut file = temp_file_buffer.into_inner()?; + file.flush()?; + + Ok(IntermediateState { + file, + cell_sizes, + total_size, + _remove_on_drop: remove_on_drop, + })*/ +} + +#[cfg(not(target_os = "macos"))] +fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len as i64) }; + if res == 0 { + Ok(()) + } else { + Err(std::io::Error::last_os_error()) + } +} + +#[cfg(target_os = "macos")] +pub fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::ftruncate(file.as_raw_fd(), len as i64) }; + if res < 0 { + Err(std::io::Error::last_os_error()) + } else { + Ok(()) + } +} + +fn number_of_bytes_to_fit(l: u64) -> u32 { + 8 - l.leading_zeros() / 8 +} + +struct RemoveOnDrop(PathBuf); + +impl Drop for RemoveOnDrop { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(&self.0) { + tracing::error!(path = %self.0.display(), "failed to remove file: {e:?}"); + } + } +} + +const REF_SIZE: usize = std::mem::size_of::(); +const FILE_BUFFER_LEN: usize = 128 * 1024 * 1024; // 128 MB + +#[derive(thiserror::Error, Debug)] +enum CellWriterError { + #[error("Cell not found in cell db")] + CellNotFound, + #[error("Invalid cell")] + InvalidCell, +} diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs new file mode 100644 index 000000000..2d48bd58b --- /dev/null +++ b/storage/src/db/file_db/mod.rs @@ -0,0 +1,228 @@ +use std::fs; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use anyhow::Result; +use bytes::BytesMut; +use everscale_types::cell::HashBytes; +use everscale_types::models::BlockId; +use tokio::time::Instant; + +use crate::db::Db; +use crate::store::BlockHandleStorage; + +use self::cell_writer::*; + +mod cell_writer; + +const KEY_BLOCK_UTIME_STEP: u32 = 86400; + +pub struct PersistentStateStorage { + block_handle_storage: Arc, + storage_path: PathBuf, + db: Arc, + is_cancelled: Arc, +} + +impl PersistentStateStorage { + pub async fn new( + file_db_path: PathBuf, + db: Arc, + block_handle_storage: Arc, + ) -> Result { + let dir = file_db_path.join("states"); + tokio::fs::create_dir_all(&dir).await?; + let is_cancelled = Arc::new(Default::default()); + + Ok(Self { + block_handle_storage, + storage_path: dir, + db, + is_cancelled, + }) + } + + pub async fn save_state( + &self, + block_id: &BlockId, + master_block_id: &BlockId, + state_root_hash: &HashBytes, + ) -> Result<()> { + let block_id = block_id.clone(); + let master_block_id = master_block_id.clone(); + let state_root_hash = *state_root_hash; + let db = self.db.clone(); + let base_path = self.storage_path.clone(); + let is_cancelled = self.is_cancelled.clone(); + + tokio::task::spawn_blocking(move || { + let cell_writer = CellWriter::new(&db, &base_path); + match cell_writer.write(&master_block_id, &block_id, &state_root_hash, is_cancelled) { + Ok(path) => { + tracing::info!( + block_id = %block_id.to_string(), + path = %path.display(), + "Successfully wrote persistent state to a file", + ); + } + Err(e) => { + tracing::error!( + block_id = %block_id.to_string(), + "Writing persistent state failed. Err: {e:?}" + ); + + CellWriter::clear_temp(&base_path, &master_block_id, &block_id); + } + } + }) + .await + .map_err(From::from) + } + + pub async fn read_state_part( + &self, + mc_block_id: &BlockId, + block_id: &BlockId, + offset: u64, + size: u64, + ) -> Option> { + use tokio::io::{AsyncReadExt, AsyncSeekExt, SeekFrom}; + + // TODO: cache file handles + let mut file = tokio::fs::File::open(self.get_state_file_path(mc_block_id, block_id)) + .await + .ok()?; + + if let Err(e) = file.seek(SeekFrom::Start(offset)).await { + tracing::error!("Failed to seek state file offset. Err: {e:?}"); + return None; + } + + // SAFETY: size must be checked + let mut result = BytesMut::with_capacity(size as usize); + let now = Instant::now(); + loop { + match file.read_buf(&mut result).await { + Ok(bytes_read) => { + tracing::debug!("Reading state file. Bytes read: {}", bytes_read); + if bytes_read == 0 || bytes_read == size as usize { + break; + } + } + Err(e) => { + tracing::error!("Failed to read state file. Err: {e:?}"); + return None; + } + } + } + tracing::info!( + "Finished reading buffer after: {} ms", + now.elapsed().as_millis() + ); + + // TODO: use `Bytes` + Some(result.to_vec()) + } + + pub fn state_exists(&self, mc_block_id: &BlockId, block_id: &BlockId) -> bool { + // TODO: cache file handles + self.get_state_file_path(mc_block_id, block_id).is_file() + } + + pub fn prepare_persistent_states_dir(&self, mc_block: &BlockId) -> Result<()> { + let dir_path = mc_block.seqno.to_string(); + let path = self.storage_path.join(dir_path); + if !path.exists() { + tracing::info!(mc_block = %mc_block, "Creating persistent state directory"); + fs::create_dir(path)?; + } + Ok(()) + } + + fn get_state_file_path(&self, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { + CellWriter::make_pss_path(&self.storage_path, mc_block_id, block_id) + } + + pub fn cancel(&self) { + self.is_cancelled.store(true, Ordering::Release); + } + + pub async fn clear_old_persistent_states(&self) -> Result<()> { + tracing::info!("Started clearing old persistent state directories"); + let start = Instant::now(); + + // Keep 2 days of states + 1 state before + let block = { + let now = tycho_util::time::now_sec(); + let mut key_block = self.block_handle_storage.find_last_key_block()?; + + loop { + match self + .block_handle_storage + .find_prev_persistent_key_block(key_block.id().seqno)? + { + Some(prev_key_block) => { + if prev_key_block.meta().gen_utime() + 2 * KEY_BLOCK_UTIME_STEP < now { + break prev_key_block; + } else { + key_block = prev_key_block; + } + } + None => return Ok(()), + } + } + }; + + self.clear_outdated_state_entries(block.id())?; + + tracing::info!( + elapsed = %humantime::format_duration(start.elapsed()), + "Clearing old persistent state directories completed" + ); + + Ok(()) + } + + fn clear_outdated_state_entries(&self, recent_block_id: &BlockId) -> Result<()> { + let mut directories_to_remove: Vec = Vec::new(); + let mut files_to_remove: Vec = Vec::new(); + + for entry in fs::read_dir(&self.storage_path)?.flatten() { + let path = entry.path(); + + if path.is_file() { + files_to_remove.push(path); + continue; + } + + let Ok(name) = entry.file_name().into_string() else { + directories_to_remove.push(path); + continue; + }; + + let is_recent = + matches!(name.parse::(), Ok(seqno) if seqno >= recent_block_id.seqno); + + if !is_recent { + directories_to_remove.push(path); + } + } + + for dir in directories_to_remove { + tracing::info!(dir = %dir.display(), "Removing an old persistent state directory"); + if let Err(e) = fs::remove_dir_all(&dir) { + tracing::error!(dir = %dir.display(), "Failed to remove an old persistent state: {e:?}"); + } + } + + for file in files_to_remove { + tracing::info!(file = %file.display(), "Removing file"); + if let Err(e) = fs::remove_file(&file) { + tracing::error!(file = %file.display(), "Failed to remove file: {e:?}"); + } + } + + Ok(()) + } +} diff --git a/storage/src/db/config.rs b/storage/src/db/kv_db/config.rs similarity index 100% rename from storage/src/db/config.rs rename to storage/src/db/kv_db/config.rs diff --git a/storage/src/db/migrations/mod.rs b/storage/src/db/kv_db/migrations/mod.rs similarity index 100% rename from storage/src/db/migrations/mod.rs rename to storage/src/db/kv_db/migrations/mod.rs diff --git a/storage/src/db/kv_db/mod.rs b/storage/src/db/kv_db/mod.rs new file mode 100644 index 000000000..7e9c434a8 --- /dev/null +++ b/storage/src/db/kv_db/mod.rs @@ -0,0 +1,302 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::thread::available_parallelism; + +use anyhow::{Context, Result}; +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; +use weedb::{Caches, WeeDb}; + +pub use weedb::Stats as RocksdbStats; +pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table}; + +pub mod refcount; +pub mod tables; + +mod config; +mod migrations; + +pub struct Db { + pub archives: Table, + pub block_handles: Table, + pub key_blocks: Table, + pub package_entries: Table, + pub shard_states: Table, + pub cells: Table, + pub node_states: Table, + pub prev1: Table, + pub prev2: Table, + pub next1: Table, + pub next2: Table, + + compaction_lock: tokio::sync::RwLock<()>, + inner: WeeDb, +} + +impl Db { + pub fn open(path: PathBuf, options: DbOptions) -> Result> { + tracing::info!( + rocksdb_lru_capacity = %options.rocksdb_lru_capacity, + cells_cache_size = %options.cells_cache_size, + "opening DB" + ); + + let limit = match fdlimit::raise_fd_limit() { + // New fd limit + Ok(fdlimit::Outcome::LimitRaised { to, .. }) => to, + // Current soft limit + _ => { + rlimit::getrlimit(rlimit::Resource::NOFILE) + .unwrap_or((256, 0)) + .0 + } + }; + + let caches_capacity = + std::cmp::max(options.rocksdb_lru_capacity, ByteSize::mib(256)).as_u64() as usize; + + let caches = Caches::with_capacity(caches_capacity); + let threads = available_parallelism()?.get(); + + let inner = WeeDb::builder(path, caches) + .options(|opts, _| { + opts.set_paranoid_checks(false); + + // bigger base level size - less compactions + // parallel compactions finishes faster - less write stalls + + opts.set_max_subcompactions(threads as u32 / 2); + + // io + opts.set_max_open_files(limit as i32); + + // logging + opts.set_log_level(rocksdb::LogLevel::Info); + opts.set_keep_log_file_num(2); + opts.set_recycle_log_file_num(2); + + // cf + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + // cpu + opts.set_max_background_jobs(std::cmp::max((threads as i32) / 2, 2)); + opts.increase_parallelism(threads as i32); + + opts.set_allow_concurrent_memtable_write(false); + opts.set_enable_write_thread_adaptive_yield(true); + + // debug + // NOTE: could slower everything a bit in some cloud environments. + // See: https://github.com/facebook/rocksdb/issues/3889 + // + // opts.enable_statistics(); + // opts.set_stats_dump_period_sec(600); + }) + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .with_table::() + .build() + .context("Failed building db")?; + + migrations::apply(&inner).context("Failed to apply migrations")?; + + Ok(Arc::new(Self { + archives: inner.instantiate_table(), + block_handles: inner.instantiate_table(), + key_blocks: inner.instantiate_table(), + package_entries: inner.instantiate_table(), + shard_states: inner.instantiate_table(), + cells: inner.instantiate_table(), + node_states: inner.instantiate_table(), + prev1: inner.instantiate_table(), + prev2: inner.instantiate_table(), + next1: inner.instantiate_table(), + next2: inner.instantiate_table(), + compaction_lock: tokio::sync::RwLock::default(), + inner, + })) + } + + #[inline] + pub fn raw(&self) -> &Arc { + self.inner.raw() + } + + pub fn get_memory_usage_stats(&self) -> Result { + self.inner.get_memory_usage_stats().map_err(From::from) + } + + pub async fn delay_compaction(&self) -> tokio::sync::RwLockReadGuard<'_, ()> { + self.compaction_lock.read().await + } + + pub async fn trigger_compaction(&self) { + use std::time::Instant; + + let _compaction_guard = self.compaction_lock.write().await; + + let tables = [ + (self.block_handles.cf(), "block handles"), + (self.package_entries.cf(), "package entries"), + (self.archives.cf(), "archives"), + (self.shard_states.cf(), "shard states"), + (self.cells.cf(), "cells"), + ]; + + for (cf, title) in tables { + tracing::info!("{title} compaction started"); + + let instant = Instant::now(); + + let bound = Option::<[u8; 0]>::None; + self.raw().compact_range_cf(&cf, bound, bound); + + tracing::info!( + elapsed = %humantime::format_duration(instant.elapsed()), + "{title} compaction finished" + ); + } + } + + pub fn get_disk_usage(&self) -> Result> { + use std::thread; + + fn get_table_stats(db: &WeeDb) -> (ByteSize, ByteSize) { + let cf = db.instantiate_table::(); + let res: (usize, usize) = cf + .iterator(rocksdb::IteratorMode::Start) + .filter_map(|x| { + let x = match x { + Ok(x) => x, + Err(e) => { + tracing::error!("Error while iterating: {}", e); + return None; + } + }; + Some((x.0.len(), x.1.len())) + }) + .fold((0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); + + (ByteSize(res.0 as u64), ByteSize(res.1 as u64)) + } + + macro_rules! stats { + ($spawner:expr, $( $x:ident => $table:ty ),* ) => {{ + $( + let $x = $spawner.spawn(|| get_table_stats::<$table>(&self.inner)); + )* + stats!($($x),*) + } + }; + ( $( $x:ident),* ) => { + { + let mut temp_vec = Vec::new(); + $( + temp_vec.push({ + let $x = $x.join().map_err(|_|anyhow::anyhow!("Join error"))?; + DiskUsageInfo { + cf_name: stringify!($x).to_string(), + keys_total: $x.0, + values_total: $x.1, + } + }); + )* + return Ok(temp_vec) + } + }; + } + + let stats = thread::scope(|s| -> Result> { + stats!(s, + archives => tables::Archives, + block_handles => tables::BlockHandles, + key_blocks => tables::KeyBlocks, + package_entries => tables::PackageEntries, + shard_states => tables::ShardStates, + cells => tables::Cells, + node_states => tables::NodeStates, + prev1 => tables::Prev1, + prev2 => tables::Prev2, + next1 => tables::Next1, + next2 => tables::Next2 + ) + })?; + + Ok(stats) + } +} + +#[derive(Debug, Clone)] +pub struct DiskUsageInfo { + pub cf_name: String, + pub keys_total: ByteSize, + pub values_total: ByteSize, +} + +impl Drop for Db { + fn drop(&mut self) { + self.raw().cancel_all_background_work(true); + } +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields, default)] +pub struct DbOptions { + pub rocksdb_lru_capacity: ByteSize, + pub cells_cache_size: ByteSize, +} + +impl Default for DbOptions { + fn default() -> Self { + // Fetch the currently available memory in bytes + let available = { + let mut sys = sysinfo::System::new(); + sys.refresh_memory(); + sys.available_memory() + }; + + // Estimated memory usage of components other than cache: + // - 2 GiBs for write buffers(4 if we are out of luck and all memtables are being flushed at the same time) + // - 2 GiBs for indexer logic + // - 10 bits per cell for bloom filter. Realistic case is 100M cells, so 0.25 GiBs + // - 1/3 of all available memory is reserved for kernel buffers + const WRITE_BUFFERS: ByteSize = ByteSize::gib(2); + const INDEXER_LOGIC: ByteSize = ByteSize::gib(2); + const BLOOM_FILTER: ByteSize = ByteSize::mib(256); + let estimated_memory_usage = WRITE_BUFFERS + INDEXER_LOGIC + BLOOM_FILTER + available / 3; + + // Reduce the available memory by the fixed offset + let available = available + .checked_sub(estimated_memory_usage.as_u64()) + .unwrap_or_else(|| { + tracing::error!( + "Not enough memory for cache, using 1/4 of all available memory. \ + Tweak `db_options` in config to improve performance." + ); + available / 4 + }); + + // We will use 3/4 of available memory for the cells cache (at most 4 GB). + let cells_cache_size = std::cmp::min(ByteSize(available * 4 / 3), ByteSize::gib(4)); + + // The reset of the memory is used for LRU cache (at least 128 MB) + let rocksdb_lru_capacity = std::cmp::max( + ByteSize(available.saturating_sub(cells_cache_size.as_u64())), + ByteSize::mib(128), + ); + + Self { + rocksdb_lru_capacity, + cells_cache_size, + } + } +} diff --git a/storage/src/db/refcount.rs b/storage/src/db/kv_db/refcount.rs similarity index 100% rename from storage/src/db/refcount.rs rename to storage/src/db/kv_db/refcount.rs diff --git a/storage/src/db/tables.rs b/storage/src/db/kv_db/tables.rs similarity index 100% rename from storage/src/db/tables.rs rename to storage/src/db/kv_db/tables.rs diff --git a/storage/src/db/mod.rs b/storage/src/db/mod.rs index 61c14a84d..1151c28bc 100644 --- a/storage/src/db/mod.rs +++ b/storage/src/db/mod.rs @@ -1,301 +1,5 @@ -use std::path::PathBuf; -use std::sync::Arc; -use std::thread::available_parallelism; +pub use self::file_db::*; +pub use self::kv_db::*; -use anyhow::{Context, Result}; -use bytesize::ByteSize; -use serde::{Deserialize, Serialize}; -use weedb::{Caches, WeeDb}; - -pub use weedb::Stats as RocksdbStats; -pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table}; -pub mod refcount; -pub mod tables; - -mod config; -mod migrations; - -pub struct Db { - pub archives: Table, - pub block_handles: Table, - pub key_blocks: Table, - pub package_entries: Table, - pub shard_states: Table, - pub cells: Table, - pub node_states: Table, - pub prev1: Table, - pub prev2: Table, - pub next1: Table, - pub next2: Table, - - compaction_lock: tokio::sync::RwLock<()>, - inner: WeeDb, -} - -impl Db { - pub fn open(path: PathBuf, options: DbOptions) -> Result> { - tracing::info!( - rocksdb_lru_capacity = %options.rocksdb_lru_capacity, - cells_cache_size = %options.cells_cache_size, - "opening DB" - ); - - let limit = match fdlimit::raise_fd_limit() { - // New fd limit - Ok(fdlimit::Outcome::LimitRaised { to, .. }) => to, - // Current soft limit - _ => { - rlimit::getrlimit(rlimit::Resource::NOFILE) - .unwrap_or((256, 0)) - .0 - } - }; - - let caches_capacity = - std::cmp::max(options.rocksdb_lru_capacity, ByteSize::mib(256)).as_u64() as usize; - - let caches = Caches::with_capacity(caches_capacity); - let threads = available_parallelism()?.get(); - - let inner = WeeDb::builder(path, caches) - .options(|opts, _| { - opts.set_paranoid_checks(false); - - // bigger base level size - less compactions - // parallel compactions finishes faster - less write stalls - - opts.set_max_subcompactions(threads as u32 / 2); - - // io - opts.set_max_open_files(limit as i32); - - // logging - opts.set_log_level(rocksdb::LogLevel::Info); - opts.set_keep_log_file_num(2); - opts.set_recycle_log_file_num(2); - - // cf - opts.create_if_missing(true); - opts.create_missing_column_families(true); - - // cpu - opts.set_max_background_jobs(std::cmp::max((threads as i32) / 2, 2)); - opts.increase_parallelism(threads as i32); - - opts.set_allow_concurrent_memtable_write(false); - opts.set_enable_write_thread_adaptive_yield(true); - - // debug - // NOTE: could slower everything a bit in some cloud environments. - // See: https://github.com/facebook/rocksdb/issues/3889 - // - // opts.enable_statistics(); - // opts.set_stats_dump_period_sec(600); - }) - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .with_table::() - .build() - .context("Failed building db")?; - - migrations::apply(&inner).context("Failed to apply migrations")?; - - Ok(Arc::new(Self { - archives: inner.instantiate_table(), - block_handles: inner.instantiate_table(), - key_blocks: inner.instantiate_table(), - package_entries: inner.instantiate_table(), - shard_states: inner.instantiate_table(), - cells: inner.instantiate_table(), - node_states: inner.instantiate_table(), - prev1: inner.instantiate_table(), - prev2: inner.instantiate_table(), - next1: inner.instantiate_table(), - next2: inner.instantiate_table(), - compaction_lock: tokio::sync::RwLock::default(), - inner, - })) - } - - #[inline] - pub fn raw(&self) -> &Arc { - self.inner.raw() - } - - pub fn get_memory_usage_stats(&self) -> Result { - self.inner.get_memory_usage_stats().map_err(From::from) - } - - pub async fn delay_compaction(&self) -> tokio::sync::RwLockReadGuard<'_, ()> { - self.compaction_lock.read().await - } - - pub async fn trigger_compaction(&self) { - use std::time::Instant; - - let _compaction_guard = self.compaction_lock.write().await; - - let tables = [ - (self.block_handles.cf(), "block handles"), - (self.package_entries.cf(), "package entries"), - (self.archives.cf(), "archives"), - (self.shard_states.cf(), "shard states"), - (self.cells.cf(), "cells"), - ]; - - for (cf, title) in tables { - tracing::info!("{title} compaction started"); - - let instant = Instant::now(); - - let bound = Option::<[u8; 0]>::None; - self.raw().compact_range_cf(&cf, bound, bound); - - tracing::info!( - elapsed = %humantime::format_duration(instant.elapsed()), - "{title} compaction finished" - ); - } - } - - pub fn get_disk_usage(&self) -> Result> { - use std::thread; - - fn get_table_stats(db: &WeeDb) -> (ByteSize, ByteSize) { - let cf = db.instantiate_table::(); - let res: (usize, usize) = cf - .iterator(rocksdb::IteratorMode::Start) - .filter_map(|x| { - let x = match x { - Ok(x) => x, - Err(e) => { - tracing::error!("Error while iterating: {}", e); - return None; - } - }; - Some((x.0.len(), x.1.len())) - }) - .fold((0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); - - (ByteSize(res.0 as u64), ByteSize(res.1 as u64)) - } - - macro_rules! stats { - ($spawner:expr, $( $x:ident => $table:ty ),* ) => {{ - $( - let $x = $spawner.spawn(|| get_table_stats::<$table>(&self.inner)); - )* - stats!($($x),*) - } - }; - ( $( $x:ident),* ) => { - { - let mut temp_vec = Vec::new(); - $( - temp_vec.push({ - let $x = $x.join().map_err(|_|anyhow::anyhow!("Join error"))?; - DiskUsageInfo { - cf_name: stringify!($x).to_string(), - keys_total: $x.0, - values_total: $x.1, - } - }); - )* - return Ok(temp_vec) - } - }; - } - - let stats = thread::scope(|s| -> Result> { - stats!(s, - archives => tables::Archives, - block_handles => tables::BlockHandles, - key_blocks => tables::KeyBlocks, - package_entries => tables::PackageEntries, - shard_states => tables::ShardStates, - cells => tables::Cells, - node_states => tables::NodeStates, - prev1 => tables::Prev1, - prev2 => tables::Prev2, - next1 => tables::Next1, - next2 => tables::Next2 - ) - })?; - - Ok(stats) - } -} - -#[derive(Debug, Clone)] -pub struct DiskUsageInfo { - pub cf_name: String, - pub keys_total: ByteSize, - pub values_total: ByteSize, -} - -impl Drop for Db { - fn drop(&mut self) { - self.raw().cancel_all_background_work(true); - } -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields, default)] -pub struct DbOptions { - pub rocksdb_lru_capacity: ByteSize, - pub cells_cache_size: ByteSize, -} - -impl Default for DbOptions { - fn default() -> Self { - // Fetch the currently available memory in bytes - let available = { - let mut sys = sysinfo::System::new(); - sys.refresh_memory(); - sys.available_memory() - }; - - // Estimated memory usage of components other than cache: - // - 2 GiBs for write buffers(4 if we are out of luck and all memtables are being flushed at the same time) - // - 2 GiBs for indexer logic - // - 10 bits per cell for bloom filter. Realistic case is 100M cells, so 0.25 GiBs - // - 1/3 of all available memory is reserved for kernel buffers - const WRITE_BUFFERS: ByteSize = ByteSize::gib(2); - const INDEXER_LOGIC: ByteSize = ByteSize::gib(2); - const BLOOM_FILTER: ByteSize = ByteSize::mib(256); - let estimated_memory_usage = WRITE_BUFFERS + INDEXER_LOGIC + BLOOM_FILTER + available / 3; - - // Reduce the available memory by the fixed offset - let available = available - .checked_sub(estimated_memory_usage.as_u64()) - .unwrap_or_else(|| { - tracing::error!( - "Not enough memory for cache, using 1/4 of all available memory. \ - Tweak `db_options` in config to improve performance." - ); - available / 4 - }); - - // We will use 3/4 of available memory for the cells cache (at most 4 GB). - let cells_cache_size = std::cmp::min(ByteSize(available * 4 / 3), ByteSize::gib(4)); - - // The reset of the memory is used for LRU cache (at least 128 MB) - let rocksdb_lru_capacity = std::cmp::max( - ByteSize(available.saturating_sub(cells_cache_size.as_u64())), - ByteSize::mib(128), - ); - - Self { - rocksdb_lru_capacity, - cells_cache_size, - } - } -} +mod file_db; +mod kv_db; diff --git a/storage/src/lib.rs b/storage/src/lib.rs index c15317963..b1b8d4f48 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -1,22 +1,13 @@ use std::path::PathBuf; use std::sync::Arc; -pub use self::block_connection_storage::*; -pub use self::block_handle_storage::*; +pub use self::db::*; pub use self::models::*; -pub use self::runtime_storage::*; +pub use self::store::*; -use self::block_storage::*; -use self::shard_state_storage::*; - -mod block_connection_storage; -mod block_handle_storage; -mod block_storage; mod db; mod models; -mod node_state_storage; -mod runtime_storage; -mod shard_state_storage; +mod store; mod utils; pub struct Storage { @@ -27,8 +18,8 @@ pub struct Storage { block_storage: Arc, shard_state_storage: ShardStateStorage, block_connection_storage: BlockConnectionStorage, - //node_state_storage: NodeStateStorage, - //persistent_state_storage: PersistentStateStorage, + node_state_storage: NodeStateStorage, + persistent_state_storage: PersistentStateStorage, } impl Storage { diff --git a/storage/src/block_storage/mod.rs b/storage/src/store/block/mod.rs similarity index 99% rename from storage/src/block_storage/mod.rs rename to storage/src/store/block/mod.rs index b05170c7f..e9d683f1e 100644 --- a/storage/src/block_storage/mod.rs +++ b/storage/src/store/block/mod.rs @@ -10,17 +10,15 @@ use everscale_types::models::*; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use tycho_block_util::archive::{ - make_archive_entry, ArchiveEntryId, ArchiveReaderError, ArchiveVerifier, + make_archive_entry, ArchiveEntryId, ArchiveReaderError, ArchiveVerifier, GetFileName, }; use tycho_block_util::block::{ BlockProofStuff, BlockProofStuffAug, BlockStuff, BlockStuffAug, TopBlocks, }; -use super::block_handle_storage::*; -use super::models::*; - use crate::db::*; use crate::utils::*; +use crate::{models::*, BlockHandleStorage, HandleCreationStatus}; pub struct BlockStorage { db: Arc, diff --git a/storage/src/block_connection_storage/mod.rs b/storage/src/store/block_connection/mod.rs similarity index 99% rename from storage/src/block_connection_storage/mod.rs rename to storage/src/store/block_connection/mod.rs index 723152f3a..50ad26ece 100644 --- a/storage/src/block_connection_storage/mod.rs +++ b/storage/src/store/block_connection/mod.rs @@ -3,9 +3,8 @@ use std::sync::Arc; use anyhow::Result; use everscale_types::models::*; -use super::models::*; - use crate::db::*; +use crate::models::*; use crate::utils::*; /// Stores relations between blocks diff --git a/storage/src/block_handle_storage/mod.rs b/storage/src/store/block_handle/mod.rs similarity index 99% rename from storage/src/block_handle_storage/mod.rs rename to storage/src/store/block_handle/mod.rs index d64d2e03b..535a62623 100644 --- a/storage/src/block_handle_storage/mod.rs +++ b/storage/src/store/block_handle/mod.rs @@ -6,9 +6,8 @@ use tycho_block_util::block::TopBlocks; use tycho_block_util::state::is_persistent_state; use tycho_util::FastDashMap; -use super::models::*; - use crate::db::*; +use crate::models::*; use crate::utils::*; pub struct BlockHandleStorage { diff --git a/storage/src/store/mod.rs b/storage/src/store/mod.rs new file mode 100644 index 000000000..1a6d5c84b --- /dev/null +++ b/storage/src/store/mod.rs @@ -0,0 +1,13 @@ +pub use self::block::*; +pub use self::block_connection::*; +pub use self::block_handle::*; +pub use self::node_state::*; +pub use self::runtime::*; +pub use self::shard_state::*; + +mod block; +mod block_connection; +mod block_handle; +mod node_state; +mod runtime; +mod shard_state; diff --git a/storage/src/node_state_storage/mod.rs b/storage/src/store/node_state/mod.rs similarity index 100% rename from storage/src/node_state_storage/mod.rs rename to storage/src/store/node_state/mod.rs diff --git a/storage/src/runtime_storage/mod.rs b/storage/src/store/runtime/mod.rs similarity index 100% rename from storage/src/runtime_storage/mod.rs rename to storage/src/store/runtime/mod.rs diff --git a/storage/src/runtime_storage/persistent_state_keeper.rs b/storage/src/store/runtime/persistent_state_keeper.rs similarity index 100% rename from storage/src/runtime_storage/persistent_state_keeper.rs rename to storage/src/store/runtime/persistent_state_keeper.rs diff --git a/storage/src/shard_state_storage/cell_storage.rs b/storage/src/store/shard_state/cell_storage.rs similarity index 97% rename from storage/src/shard_state_storage/cell_storage.rs rename to storage/src/store/shard_state/cell_storage.rs index 8b024f5f6..16112cc95 100644 --- a/storage/src/shard_state_storage/cell_storage.rs +++ b/storage/src/store/shard_state/cell_storage.rs @@ -11,7 +11,6 @@ use quick_cache::sync::{Cache, DefaultLifecycle}; use triomphe::ThinArc; use crate::db::*; -use tycho_block_util::{CacheStats}; use tycho_util::{FastDashMap, FastHashMap, FastHasherState}; pub struct CellStorage { @@ -309,27 +308,6 @@ impl CellStorage { pub fn drop_cell(&self, hash: &HashBytes) { self.cells_cache.remove(hash); } - - pub fn cache_stats(&self) -> CacheStats { - let hits = self.raw_cells_cache.0.hits(); - let misses = self.raw_cells_cache.0.misses(); - let occupied = self.raw_cells_cache.0.len() as u64; - let weight = self.raw_cells_cache.0.weight(); - - let hits_ratio = if hits > 0 { - hits as f64 / (hits + misses) as f64 - } else { - 0.0 - } * 100.0; - CacheStats { - hits, - misses, - requests: hits + misses, - occupied, - hits_ratio, - size_bytes: weight, - } - } } #[derive(thiserror::Error, Debug)] diff --git a/storage/src/shard_state_storage/cell_writer.rs b/storage/src/store/shard_state/cell_writer.rs similarity index 100% rename from storage/src/shard_state_storage/cell_writer.rs rename to storage/src/store/shard_state/cell_writer.rs diff --git a/storage/src/shard_state_storage/entries_buffer.rs b/storage/src/store/shard_state/entries_buffer.rs similarity index 100% rename from storage/src/shard_state_storage/entries_buffer.rs rename to storage/src/store/shard_state/entries_buffer.rs diff --git a/storage/src/shard_state_storage/files_context.rs b/storage/src/store/shard_state/files_context.rs similarity index 98% rename from storage/src/shard_state_storage/files_context.rs rename to storage/src/store/shard_state/files_context.rs index 846b863aa..73740456b 100644 --- a/storage/src/shard_state_storage/files_context.rs +++ b/storage/src/store/shard_state/files_context.rs @@ -5,7 +5,7 @@ use everscale_types::models::*; use tokio::fs::File; use tokio::io::{AsyncWriteExt, BufWriter}; -use tycho_block_util::MappedFile; +use tycho_util::mapped_file::MappedFile; pub struct FilesContext { cells_path: PathBuf, diff --git a/storage/src/shard_state_storage/mod.rs b/storage/src/store/shard_state/mod.rs similarity index 99% rename from storage/src/shard_state_storage/mod.rs rename to storage/src/store/shard_state/mod.rs index 1cd8e9427..9265c15a6 100644 --- a/storage/src/shard_state_storage/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -10,11 +10,10 @@ use everscale_types::prelude::{Cell, HashBytes}; use self::cell_storage::*; use self::files_context::FilesContext; use self::replace_transaction::ShardStateReplaceTransaction; -use super::{models::BlockHandle, BlockHandleStorage}; -use crate::block_storage::*; use crate::db::*; use crate::utils::*; +use crate::{models::BlockHandle, BlockHandleStorage, BlockStorage}; use tycho_block_util::block::*; use tycho_block_util::state::*; diff --git a/storage/src/shard_state_storage/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs similarity index 99% rename from storage/src/shard_state_storage/replace_transaction.rs rename to storage/src/store/shard_state/replace_transaction.rs index ce1beeb83..ade7a6c6b 100644 --- a/storage/src/shard_state_storage/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -13,6 +13,7 @@ use crate::utils::*; use tycho_block_util::state::*; use tycho_block_util::*; +use tycho_util::progress_bar::*; use tycho_util::FastHashMap; pub struct ShardStateReplaceTransaction<'a> { diff --git a/storage/src/shard_state_storage/shard_state_reader.rs b/storage/src/store/shard_state/shard_state_reader.rs similarity index 100% rename from storage/src/shard_state_storage/shard_state_reader.rs rename to storage/src/store/shard_state/shard_state_reader.rs diff --git a/storage/src/utils/stored_value.rs b/storage/src/utils/stored_value.rs index c1ac8ca94..50186c97d 100644 --- a/storage/src/utils/stored_value.rs +++ b/storage/src/utils/stored_value.rs @@ -1,9 +1,11 @@ -use anyhow::Result; use bytes::Buf; use smallvec::SmallVec; +use anyhow::Result; use everscale_types::cell::HashBytes; use everscale_types::models::{BlockId, BlockIdShort, ShardIdent}; +use tokio::io::AsyncReadExt; +use tycho_util::byte_reader::ByteOrderRead; /// A trait for writing or reading data from a stack-allocated buffer pub trait StoredValue { @@ -95,22 +97,22 @@ impl StoredValue for BlockId { buffer.write_raw_slice(self.file_hash.as_slice()); } - fn deserialize(reader: &mut &[u8]) -> Self + fn deserialize(reader: &mut &[u8]) -> Result where Self: Sized, { debug_assert!(reader.remaining() >= Self::SIZE_HINT); - let shard = ShardIdent::deserialize(reader); + let shard = ShardIdent::deserialize(reader)?; let seqno = reader.get_u32(); - let root_hash = HashBytes::from(reader.get_uint(256)); - let file_hash = HashBytes::from(reader.get_uint(256)); - Self { + let root_hash = HashBytes::from(reader.read_u256()?); + let file_hash = HashBytes::from(reader.read_u256()?); + Ok(Self { shard, seqno, root_hash, file_hash, - } + }) } } @@ -123,19 +125,19 @@ impl StoredValue for ShardIdent { #[inline(always)] fn serialize(&self, buffer: &mut T) { - buffer.write_raw_slice(&self.workchain_id().to_be_bytes()); - buffer.write_raw_slice(&self.shard_prefix_with_tag().to_be_bytes()); + buffer.write_raw_slice(&self.workchain().to_be_bytes()); + buffer.write_raw_slice(&self.prefix().to_be_bytes()); } - fn deserialize(reader: &mut &[u8]) -> Self + fn deserialize(reader: &mut &[u8]) -> Result where Self: Sized, { debug_assert!(reader.remaining() >= ShardIdent::SIZE_HINT); let workchain = reader.get_u32() as i32; - let prefix = reader.get_u64()?; - unsafe { Self::new_unchecked(workchain, prefix) } + let prefix = reader.get_u64(); + Ok(unsafe { Self::new_unchecked(workchain, prefix) }) } } @@ -152,15 +154,15 @@ impl StoredValue for BlockIdShort { buffer.write_raw_slice(&self.seqno.to_be_bytes()); } - fn deserialize(reader: &mut &[u8]) -> Self + fn deserialize(reader: &mut &[u8]) -> Result where Self: Sized, { debug_assert!(reader.remaining() >= BlockIdShort::SIZE_HINT); - let shard = ShardIdent::deserialize(reader); + let shard = ShardIdent::deserialize(reader)?; let seqno = reader.get_u32(); - Self { shard, seqno } + Ok(Self { shard, seqno }) } } diff --git a/util/Cargo.toml b/util/Cargo.toml index f2b0a7fa5..53152926c 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -12,10 +12,12 @@ dashmap = "5.4" futures-util = "0.3" hex = "0.4" humantime = "2" +libc = "0.2" rand = "0.8" serde = { version = "1.0", features = ["derive"] } thiserror = "1.0" tokio = { version = "1", default-features = false, features = ["time", "sync", "rt"] } +tracing = "0.1" [dev-dependencies] tokio = { version = "1", default-features = false, features = [ diff --git a/util/src/byte_reader.rs b/util/src/byte_reader.rs new file mode 100644 index 000000000..5eb65aeda --- /dev/null +++ b/util/src/byte_reader.rs @@ -0,0 +1,11 @@ +pub trait ByteOrderRead { + fn read_u256(&mut self) -> std::io::Result<[u8; 32]>; +} + +impl ByteOrderRead for T { + fn read_u256(&mut self) -> std::io::Result<[u8; 32]> { + let mut buf = [0; 32]; + self.read_exact(&mut buf)?; + Ok(buf) + } +} diff --git a/util/src/lib.rs b/util/src/lib.rs index f99e01223..45a08533d 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -1,6 +1,10 @@ use std::collections::HashMap; use std::collections::HashSet; +pub mod byte_reader; +pub mod futures; +pub mod mapped_file; +pub mod progress_bar; pub mod serde_helpers; pub mod time; diff --git a/util/src/mapped_file.rs b/util/src/mapped_file.rs new file mode 100644 index 000000000..16d90d20c --- /dev/null +++ b/util/src/mapped_file.rs @@ -0,0 +1,101 @@ +use std::path::Path; + +/// Memory buffer that is mapped to a file +pub struct MappedFile { + file: std::fs::File, + length: usize, + ptr: *mut libc::c_void, +} + +impl MappedFile { + /// Opens a file and maps it to memory. Resizes the file to `length` bytes. + pub fn new

(path: &P, length: usize) -> std::io::Result + where + P: AsRef, + { + let file = std::fs::OpenOptions::new() + .write(true) + .read(true) + .truncate(true) + .create(true) + .open(path)?; + + file.set_len(length as u64)?; + + Self::from_existing_file(file) + } + + /// Opens an existing file and maps it to memory + pub fn from_existing_file(file: std::fs::File) -> std::io::Result { + use std::os::unix::io::AsRawFd; + + let length = file.metadata()?.len() as usize; + + // SAFETY: File was opened successfully, file mode is RW, offset is aligned + let ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + length, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + file.as_raw_fd(), + 0, + ) + }; + + if ptr == libc::MAP_FAILED { + return Err(std::io::Error::last_os_error()); + } + + if unsafe { libc::madvise(ptr, length, libc::MADV_RANDOM) } != 0 { + return Err(std::io::Error::last_os_error()); + } + + Ok(Self { file, length, ptr }) + } + + /// Mapped buffer length in bytes + pub fn length(&self) -> usize { + self.length + } + + /// Copies chunk of bytes to the specified buffer + /// + /// # Safety + /// The caller must take care that the buffer is not out of the mapped memory! + pub unsafe fn read_exact_at(&self, offset: usize, buffer: &mut [u8]) { + std::ptr::copy_nonoverlapping( + (self.ptr as *const u8).add(offset), + buffer.as_mut_ptr(), + buffer.len(), + ) + } + + /// Copies buffer to the mapped memory + /// + /// # Safety + /// The caller must take care that the buffer is not out of the mapped memory! + pub unsafe fn write_all_at(&self, offset: usize, buffer: &[u8]) { + std::ptr::copy_nonoverlapping( + buffer.as_ptr(), + (self.ptr as *mut u8).add(offset), + buffer.len(), + ) + } +} + +impl Drop for MappedFile { + fn drop(&mut self) { + // SAFETY: File still exists, ptr and length were initialized once on creation + if unsafe { libc::munmap(self.ptr, self.length) } != 0 { + // TODO: how to handle this? + panic!("failed to unmap file: {}", std::io::Error::last_os_error()); + } + + let _ = self.file.set_len(0); + let _ = self.file.sync_all(); + } +} + +unsafe impl Send for MappedFile {} +unsafe impl Sync for MappedFile {} diff --git a/util/src/progress_bar.rs b/util/src/progress_bar.rs new file mode 100644 index 000000000..a4d59ed15 --- /dev/null +++ b/util/src/progress_bar.rs @@ -0,0 +1,130 @@ +pub struct ProgressBar { + name: &'static str, + percentage_step: u64, + current: u64, + total: Option, + exact_unit: Option<&'static str>, + mapper: Box String + Send + 'static>, +} + +impl ProgressBar { + pub fn builder(name: &'static str) -> ProgressBarBuilder { + ProgressBarBuilder::new(name) + } + + pub fn set_total(&mut self, total: impl Into) { + self.total = Some(total.into()); + } + + pub fn set_progress(&mut self, current: impl Into) { + let old = self.compute_current_progress(); + self.current = current.into(); + let new = self.compute_current_progress(); + + if matches!( + (old, new), + (Some(old), Some(new)) if old / self.percentage_step != new / self.percentage_step + ) { + self.progress_message(); + } + } + + pub fn complete(&self) { + self.message("complete"); + } + + #[inline(always)] + fn progress_message(&self) { + let total = match self.total { + Some(total) if total > 0 => total, + _ => return, + }; + + let percent = self.current * 100 / total; + let current = (self.mapper)(self.current); + let total = (self.mapper)(total); + + match self.exact_unit { + Some(exact_unit) => self.message(format_args!( + "{percent}% ({current} / {total} {exact_unit})", + )), + None => self.message(format_args!("{percent}%")), + } + } + + #[inline(always)] + fn message(&self, text: impl std::fmt::Display) { + tracing::info!("{}... {text}", self.name); + } + + fn compute_current_progress(&self) -> Option { + self.total + .filter(|&total| total > 0) + .map(|total| self.current * 100u64 / total) + .map(From::from) + } +} + +pub struct ProgressBarBuilder { + name: &'static str, + percentage_step: u64, + total: Option, + exact_unit: Option<&'static str>, + mapper: Option String + Send + 'static>>, +} + +impl ProgressBarBuilder { + pub fn new(name: &'static str) -> Self { + Self { + name, + percentage_step: PERCENTAGE_STEP, + total: None, + exact_unit: None, + mapper: None, + } + } + + pub fn with_mapper(mut self, mapper: F) -> Self + where + F: Fn(u64) -> String + Send + 'static, + { + self.mapper = Some(Box::new(mapper)); + self + } + + pub fn percentage_step(mut self, step: u64) -> Self { + self.percentage_step = std::cmp::max(step, 1); + self + } + + pub fn total(mut self, total: impl Into) -> Self { + self.total = Some(total.into()); + self + } + + pub fn exact_unit(mut self, unit: &'static str) -> Self { + self.exact_unit = Some(unit); + self + } + + pub fn build(self) -> ProgressBar { + let pg = ProgressBar { + name: self.name, + percentage_step: self.percentage_step, + current: 0, + total: self.total, + exact_unit: self.exact_unit, + mapper: self.mapper.unwrap_or_else(|| Box::new(|x| x.to_string())), + }; + + if self.total.is_some() { + pg.progress_message(); + } else { + pg.message("estimating total"); + } + + pg + } +} + +const PERCENTAGE_STEP: u64 = 5; From 4fce0dd73fd358ccd6e448b0d6a6da4485b5ce2f Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Tue, 27 Feb 2024 13:39:53 +0100 Subject: [PATCH 10/19] fix(storage): fix build --- Cargo.lock | 89 ++++++++++++++++++++------------------------ consensus/Cargo.toml | 2 +- core/Cargo.toml | 4 +- storage/Cargo.toml | 45 +++++++++++----------- 4 files changed, 64 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2145d80b1..4f8652836 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,9 +95,9 @@ checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "7b3d0060af21e8d11a926981cc00c6c1541aa91dd64b9f881985c3da1094425f" [[package]] name = "argh" @@ -118,7 +118,7 @@ dependencies = [ "argh_shared", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -229,7 +229,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -400,7 +400,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -528,7 +528,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -601,7 +601,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -666,7 +666,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff3c058b07bdb5414da10bc8a2489715e31b0c3f4274a213c1a23831e9d94e91" dependencies = [ "ahash", - "base64", + "base64 0.21.7", "bitflags 2.5.0", "crc32c", "everscale-crypto", @@ -688,16 +688,7 @@ checksum = "323d8b61c76be2c16eb2d72d007f1542fdeb3760fdf2e2cae219fc0da3db0c09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", -] - -[[package]] -name = "exponential-backoff" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47f78d87d930eee4b5686a2ab032de499c72bd1e954b84262bb03492a0f932cd" -dependencies = [ - "rand", + "syn 2.0.53", ] [[package]] @@ -736,7 +727,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -831,9 +822,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" @@ -1177,7 +1168,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -1221,9 +1212,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "platforms" -version = "3.4.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db23d408679286588f4d4644f965003d056e3dd5abcaaa938116871d7ce2fee7" +checksum = "626dec3cac7cc0e1577a2ec3fc496277ec2baa084bebad95bb6fdbfae235f84c" [[package]] name = "powerfmt" @@ -1239,12 +1230,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.17" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -1284,9 +1275,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.4.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1380629287ed1247c1e0fcc6d43efdcec508b65382c9ab775cc8f3df7ca07b0" +checksum = "58c20af3800cee5134b79a3bd4a3d4b583c16ccfa5f53338f46400851a5b3819" dependencies = [ "ahash", "equivalent", @@ -1391,9 +1382,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.10.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", @@ -1432,14 +1423,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-syntax 0.8.2", ] [[package]] @@ -1470,9 +1461,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "ring" @@ -1651,7 +1642,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -1802,9 +1793,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.55" +version = "2.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" +checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" dependencies = [ "proc-macro2", "quote", @@ -1873,7 +1864,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -1958,14 +1949,14 @@ dependencies = [ [[package]] name = "tl-proto-proc" -version = "0.4.6" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3eaf6822a3ce34a40564dd3078a915d35c3c5fd1f6b3d81eab991e6d00a0fb" +checksum = "b3f51de4865e0618b187c2b790c137de938d01fe5510116b959387b6133c20ac" dependencies = [ "proc-macro2", "quote", "rustc-hash", - "syn 2.0.55", + "syn 2.0.53", "tl-scheme", ] @@ -2009,7 +2000,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -2057,7 +2048,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] @@ -2241,7 +2232,7 @@ dependencies = [ [[package]] name = "tycho-storage" -version = "0.1.0" +version = "0.0.1" dependencies = [ "anyhow", "arc-swap", @@ -2406,7 +2397,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", "wasm-bindgen-shared", ] @@ -2428,7 +2419,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2686,7 +2677,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.53", ] [[package]] diff --git a/consensus/Cargo.toml b/consensus/Cargo.toml index 45e61b0ec..ba9255877 100644 --- a/consensus/Cargo.toml +++ b/consensus/Cargo.toml @@ -16,7 +16,7 @@ weedb = "0.1" # local deps tycho-network = { path = "../network", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "0.1" } +tycho-storage = { path = "../storage", version = "=0.0.1" } tycho-util = { path = "../util", version = "=0.0.1" } [dev-dependencies] diff --git a/core/Cargo.toml b/core/Cargo.toml index 27dcdcd78..6c6638331 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -9,8 +9,8 @@ description = "Basic functionality of peer." # local deps tycho-network = { path = "../network", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "0.1" } +tycho-storage = { path = "../storage", version = "=0.0.1" } tycho-util = { path = "../util", version = "=0.0.1" } [lints] -workspace= true \ No newline at end of file +workspace= true diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 4c410f4c0..aea2cc4e8 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -1,43 +1,40 @@ [package] name = "tycho-storage" -version = "0.1.0" +version = "0.0.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -everscale-types = "0.1.0-rc.6" anyhow = "1.0.79" +arc-swap = "1.6.0" +bumpalo = "3.14.0" bytes = "1.5.0" - -tokio = { version = "1.36.0", features = ["full"] } -tracing = "0.1" -thiserror = "1.0.57" +bytesize = { version = "1.3.0", features = ["serde"] } +crc = "3.0.1" +dashmap = "5.5.3" +everscale-types = "0.1.0-rc.6" +fdlimit = "0.3.0" hex = "0.4.3" +humantime = "2.1.0" libc = "0.2.153" -smallvec = "1.13.1" -sha2 = "0.10.8" - -tycho-block-util = { path = "../block-util" } -tycho-util = { path = "../util" } - -dashmap = "5.5.3" -bumpalo = "3.14.0" -arc-swap = "1.6.0" -crc = "3.0.1" -bytesize = { version = "1.3.0", features = ["serde"] } +num-traits = "0.2.18" parking_lot = "0.12.1" -quick_cache = "0.4.1" parking_lot_core = "0.9.9" -serde = { version = "1.0.196", features = ["derive"] } - -weedb = "0.1.1" +quick_cache = "0.4.1" rlimit = "0.10.1" -fdlimit = "0.3.0" -humantime = "2.1.0" +serde = { version = "1.0.196", features = ["derive"] } +sha2 = "0.10.8" +smallvec = "1.13.1" sysinfo = "0.30.5" +thiserror = "1.0.57" +tokio = { version = "1.36.0", features = ["full"] } +tracing = "0.1" triomphe = "0.1.11" -num-traits = "0.2.18" +weedb = "0.1.1" + +tycho-block-util = { path = "../block-util" } +tycho-util = { path = "../util" } [lints] workspace = true From b01aee22832ff43a14c807a65d61906b645c75a2 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Tue, 27 Feb 2024 14:22:36 +0100 Subject: [PATCH 11/19] refactor(storage): rework file db (WIP) --- .../src/db/file_db}/mapped_file.rs | 0 storage/src/db/file_db/mod.rs | 271 +++++------------- storage/src/store/mod.rs | 2 + .../persistent_state}/cell_writer.rs | 4 +- storage/src/store/persistent_state/mod.rs | 228 +++++++++++++++ .../src/store/shard_state/files_context.rs | 89 ------ storage/src/store/shard_state/mod.rs | 8 +- .../store/shard_state/replace_transaction.rs | 11 +- util/src/lib.rs | 1 - 9 files changed, 307 insertions(+), 307 deletions(-) rename {util/src => storage/src/db/file_db}/mapped_file.rs (100%) rename storage/src/{db/file_db => store/persistent_state}/cell_writer.rs (98%) create mode 100644 storage/src/store/persistent_state/mod.rs delete mode 100644 storage/src/store/shard_state/files_context.rs diff --git a/util/src/mapped_file.rs b/storage/src/db/file_db/mapped_file.rs similarity index 100% rename from util/src/mapped_file.rs rename to storage/src/db/file_db/mapped_file.rs diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index 2d48bd58b..b8389d51c 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -1,228 +1,91 @@ -use std::fs; -use std::path::PathBuf; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; +use std::path::{Path, PathBuf}; -use anyhow::Result; -use bytes::BytesMut; -use everscale_types::cell::HashBytes; -use everscale_types::models::BlockId; -use tokio::time::Instant; +use anyhow::{Context, Result}; +use everscale_types::models::*; +use tokio::fs::File; +use tokio::io::{AsyncWriteExt, BufWriter}; -use crate::db::Db; -use crate::store::BlockHandleStorage; +pub use mapped_file::MappedFile; -use self::cell_writer::*; +mod mapped_file; -mod cell_writer; - -const KEY_BLOCK_UTIME_STEP: u32 = 86400; - -pub struct PersistentStateStorage { - block_handle_storage: Arc, - storage_path: PathBuf, - db: Arc, - is_cancelled: Arc, +pub struct FileDb { + cells_path: PathBuf, + cells_file: Option>, + hashes_path: PathBuf, } -impl PersistentStateStorage { - pub async fn new( - file_db_path: PathBuf, - db: Arc, - block_handle_storage: Arc, - ) -> Result { - let dir = file_db_path.join("states"); - tokio::fs::create_dir_all(&dir).await?; - let is_cancelled = Arc::new(Default::default()); - - Ok(Self { - block_handle_storage, - storage_path: dir, - db, - is_cancelled, - }) - } - - pub async fn save_state( - &self, - block_id: &BlockId, - master_block_id: &BlockId, - state_root_hash: &HashBytes, - ) -> Result<()> { - let block_id = block_id.clone(); - let master_block_id = master_block_id.clone(); - let state_root_hash = *state_root_hash; - let db = self.db.clone(); - let base_path = self.storage_path.clone(); - let is_cancelled = self.is_cancelled.clone(); - - tokio::task::spawn_blocking(move || { - let cell_writer = CellWriter::new(&db, &base_path); - match cell_writer.write(&master_block_id, &block_id, &state_root_hash, is_cancelled) { - Ok(path) => { - tracing::info!( - block_id = %block_id.to_string(), - path = %path.display(), - "Successfully wrote persistent state to a file", - ); - } - Err(e) => { - tracing::error!( - block_id = %block_id.to_string(), - "Writing persistent state failed. Err: {e:?}" - ); - - CellWriter::clear_temp(&base_path, &master_block_id, &block_id); - } - } - }) - .await - .map_err(From::from) - } - - pub async fn read_state_part( - &self, - mc_block_id: &BlockId, - block_id: &BlockId, - offset: u64, - size: u64, - ) -> Option> { - use tokio::io::{AsyncReadExt, AsyncSeekExt, SeekFrom}; - - // TODO: cache file handles - let mut file = tokio::fs::File::open(self.get_state_file_path(mc_block_id, block_id)) - .await - .ok()?; - - if let Err(e) = file.seek(SeekFrom::Start(offset)).await { - tracing::error!("Failed to seek state file offset. Err: {e:?}"); - return None; - } - - // SAFETY: size must be checked - let mut result = BytesMut::with_capacity(size as usize); - let now = Instant::now(); - loop { - match file.read_buf(&mut result).await { - Ok(bytes_read) => { - tracing::debug!("Reading state file. Bytes read: {}", bytes_read); - if bytes_read == 0 || bytes_read == size as usize { - break; - } - } - Err(e) => { - tracing::error!("Failed to read state file. Err: {e:?}"); - return None; - } - } - } - tracing::info!( - "Finished reading buffer after: {} ms", - now.elapsed().as_millis() +impl FileDb { + pub async fn new

(downloads_dir: P, block_id: &BlockId) -> Result + where + P: AsRef, + { + let block_id = format!( + "({},{:016x},{})", + block_id.shard.workchain(), + block_id.shard.prefix(), + block_id.seqno ); - // TODO: use `Bytes` - Some(result.to_vec()) - } + let cells_path = downloads_dir + .as_ref() + .join(format!("state_cells_{block_id}")); + let hashes_path = downloads_dir + .as_ref() + .join(format!("state_hashes_{block_id}")); + + let cells_file = Some(BufWriter::new( + tokio::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .read(true) + .open(&cells_path) + .await + .context("Failed to create cells file")?, + )); - pub fn state_exists(&self, mc_block_id: &BlockId, block_id: &BlockId) -> bool { - // TODO: cache file handles - self.get_state_file_path(mc_block_id, block_id).is_file() + Ok(Self { + cells_path, + cells_file, + hashes_path, + }) } - pub fn prepare_persistent_states_dir(&self, mc_block: &BlockId) -> Result<()> { - let dir_path = mc_block.seqno.to_string(); - let path = self.storage_path.join(dir_path); - if !path.exists() { - tracing::info!(mc_block = %mc_block, "Creating persistent state directory"); - fs::create_dir(path)?; - } + pub async fn clear(self) -> Result<()> { + tokio::fs::remove_file(self.cells_path).await?; + tokio::fs::remove_file(self.hashes_path).await?; Ok(()) } - fn get_state_file_path(&self, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { - CellWriter::make_pss_path(&self.storage_path, mc_block_id, block_id) + pub fn cells_file(&mut self) -> Result<&mut BufWriter> { + match &mut self.cells_file { + Some(file) => Ok(file), + None => Err(FileDbError::AlreadyFinalized.into()), + } } - pub fn cancel(&self) { - self.is_cancelled.store(true, Ordering::Release); + pub fn create_mapped_hashes_file(&self, length: usize) -> Result { + let mapped_file = MappedFile::new(&self.hashes_path, length)?; + Ok(mapped_file) } - pub async fn clear_old_persistent_states(&self) -> Result<()> { - tracing::info!("Started clearing old persistent state directories"); - let start = Instant::now(); - - // Keep 2 days of states + 1 state before - let block = { - let now = tycho_util::time::now_sec(); - let mut key_block = self.block_handle_storage.find_last_key_block()?; - - loop { - match self - .block_handle_storage - .find_prev_persistent_key_block(key_block.id().seqno)? - { - Some(prev_key_block) => { - if prev_key_block.meta().gen_utime() + 2 * KEY_BLOCK_UTIME_STEP < now { - break prev_key_block; - } else { - key_block = prev_key_block; - } - } - None => return Ok(()), - } + pub async fn create_mapped_cells_file(&mut self) -> Result { + let file = match self.cells_file.take() { + Some(mut file) => { + file.flush().await?; + file.into_inner().into_std().await } + None => return Err(FileDbError::AlreadyFinalized.into()), }; - self.clear_outdated_state_entries(block.id())?; - - tracing::info!( - elapsed = %humantime::format_duration(start.elapsed()), - "Clearing old persistent state directories completed" - ); - - Ok(()) + let mapped_file = MappedFile::from_existing_file(file)?; + Ok(mapped_file) } +} - fn clear_outdated_state_entries(&self, recent_block_id: &BlockId) -> Result<()> { - let mut directories_to_remove: Vec = Vec::new(); - let mut files_to_remove: Vec = Vec::new(); - - for entry in fs::read_dir(&self.storage_path)?.flatten() { - let path = entry.path(); - - if path.is_file() { - files_to_remove.push(path); - continue; - } - - let Ok(name) = entry.file_name().into_string() else { - directories_to_remove.push(path); - continue; - }; - - let is_recent = - matches!(name.parse::(), Ok(seqno) if seqno >= recent_block_id.seqno); - - if !is_recent { - directories_to_remove.push(path); - } - } - - for dir in directories_to_remove { - tracing::info!(dir = %dir.display(), "Removing an old persistent state directory"); - if let Err(e) = fs::remove_dir_all(&dir) { - tracing::error!(dir = %dir.display(), "Failed to remove an old persistent state: {e:?}"); - } - } - - for file in files_to_remove { - tracing::info!(file = %file.display(), "Removing file"); - if let Err(e) = fs::remove_file(&file) { - tracing::error!(file = %file.display(), "Failed to remove file: {e:?}"); - } - } - - Ok(()) - } +#[derive(thiserror::Error, Debug)] +enum FileDbError { + #[error("Already finalized")] + AlreadyFinalized, } diff --git a/storage/src/store/mod.rs b/storage/src/store/mod.rs index 1a6d5c84b..fdfacd2c0 100644 --- a/storage/src/store/mod.rs +++ b/storage/src/store/mod.rs @@ -2,6 +2,7 @@ pub use self::block::*; pub use self::block_connection::*; pub use self::block_handle::*; pub use self::node_state::*; +pub use self::persistent_state::*; pub use self::runtime::*; pub use self::shard_state::*; @@ -9,5 +10,6 @@ mod block; mod block_connection; mod block_handle; mod node_state; +mod persistent_state; mod runtime; mod shard_state; diff --git a/storage/src/db/file_db/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs similarity index 98% rename from storage/src/db/file_db/cell_writer.rs rename to storage/src/store/persistent_state/cell_writer.rs index 96a6c9076..de528bba6 100644 --- a/storage/src/db/file_db/cell_writer.rs +++ b/storage/src/store/persistent_state/cell_writer.rs @@ -65,7 +65,7 @@ impl<'a> CellWriter<'a> { let file_path = Self::make_pss_path(self.base_path, master_block_id, block_id); // Load cells from db in reverse order into the temp file - tracing::info!(block = %block_id.to_string(), "Started loading cells"); + tracing::info!(block = %block_id, "Started loading cells"); let now = Instant::now(); let mut intermediate = write_rev_cells( self.db, @@ -79,7 +79,7 @@ impl<'a> CellWriter<'a> { let temp_file_path = Self::make_temp_pss_path(&file_path); - tracing::info!(block = %block_id.to_string(), "Creating intermediate file {:?}", file_path); + tracing::info!(block = %block_id, "Creating intermediate file {:?}", file_path); let file = fs::OpenOptions::new() .write(true) diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs new file mode 100644 index 000000000..56f6141bc --- /dev/null +++ b/storage/src/store/persistent_state/mod.rs @@ -0,0 +1,228 @@ +use std::fs; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use anyhow::Result; +use bytes::BytesMut; +use everscale_types::cell::HashBytes; +use everscale_types::models::BlockId; +use tokio::time::Instant; + +use crate::db::Db; +use crate::store::BlockHandleStorage; + +use self::cell_writer::*; + +mod cell_writer; + +const KEY_BLOCK_UTIME_STEP: u32 = 86400; + +pub struct PersistentStateStorage { + block_handle_storage: Arc, + storage_path: PathBuf, + db: Arc, + is_cancelled: Arc, +} + +impl PersistentStateStorage { + pub async fn new( + file_db_path: PathBuf, + db: Arc, + block_handle_storage: Arc, + ) -> Result { + let dir = file_db_path.join("states"); + tokio::fs::create_dir_all(&dir).await?; + let is_cancelled = Arc::new(Default::default()); + + Ok(Self { + block_handle_storage, + storage_path: dir, + db, + is_cancelled, + }) + } + + pub async fn save_state( + &self, + block_id: &BlockId, + master_block_id: &BlockId, + state_root_hash: &HashBytes, + ) -> Result<()> { + let block_id = block_id.clone(); + let master_block_id = master_block_id.clone(); + let state_root_hash = *state_root_hash; + let db = self.db.clone(); + let base_path = self.storage_path.clone(); + let is_cancelled = self.is_cancelled.clone(); + + tokio::task::spawn_blocking(move || { + let cell_writer = CellWriter::new(&db, &base_path); + match cell_writer.write(&master_block_id, &block_id, &state_root_hash, is_cancelled) { + Ok(path) => { + tracing::info!( + block_id = %block_id, + path = %path.display(), + "Successfully wrote persistent state to a file", + ); + } + Err(e) => { + tracing::error!( + block_id = %block_id, + "Writing persistent state failed. Err: {e:?}" + ); + + CellWriter::clear_temp(&base_path, &master_block_id, &block_id); + } + } + }) + .await + .map_err(From::from) + } + + pub async fn read_state_part( + &self, + mc_block_id: &BlockId, + block_id: &BlockId, + offset: u64, + size: u64, + ) -> Option> { + use tokio::io::{AsyncReadExt, AsyncSeekExt, SeekFrom}; + + // TODO: cache file handles + let mut file = tokio::fs::File::open(self.get_state_file_path(mc_block_id, block_id)) + .await + .ok()?; + + if let Err(e) = file.seek(SeekFrom::Start(offset)).await { + tracing::error!("Failed to seek state file offset. Err: {e:?}"); + return None; + } + + // SAFETY: size must be checked + let mut result = BytesMut::with_capacity(size as usize); + let now = Instant::now(); + loop { + match file.read_buf(&mut result).await { + Ok(bytes_read) => { + tracing::debug!("Reading state file. Bytes read: {}", bytes_read); + if bytes_read == 0 || bytes_read == size as usize { + break; + } + } + Err(e) => { + tracing::error!("Failed to read state file. Err: {e:?}"); + return None; + } + } + } + tracing::info!( + "Finished reading buffer after: {} ms", + now.elapsed().as_millis() + ); + + // TODO: use `Bytes` + Some(result.to_vec()) + } + + pub fn state_exists(&self, mc_block_id: &BlockId, block_id: &BlockId) -> bool { + // TODO: cache file handles + self.get_state_file_path(mc_block_id, block_id).is_file() + } + + pub fn prepare_persistent_states_dir(&self, mc_block: &BlockId) -> Result<()> { + let dir_path = mc_block.seqno.to_string(); + let path = self.storage_path.join(dir_path); + if !path.exists() { + tracing::info!(mc_block = %mc_block, "Creating persistent state directory"); + fs::create_dir(path)?; + } + Ok(()) + } + + fn get_state_file_path(&self, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { + CellWriter::make_pss_path(&self.storage_path, mc_block_id, block_id) + } + + pub fn cancel(&self) { + self.is_cancelled.store(true, Ordering::Release); + } + + pub async fn clear_old_persistent_states(&self) -> Result<()> { + tracing::info!("Started clearing old persistent state directories"); + let start = Instant::now(); + + // Keep 2 days of states + 1 state before + let block = { + let now = tycho_util::time::now_sec(); + let mut key_block = self.block_handle_storage.find_last_key_block()?; + + loop { + match self + .block_handle_storage + .find_prev_persistent_key_block(key_block.id().seqno)? + { + Some(prev_key_block) => { + if prev_key_block.meta().gen_utime() + 2 * KEY_BLOCK_UTIME_STEP < now { + break prev_key_block; + } else { + key_block = prev_key_block; + } + } + None => return Ok(()), + } + } + }; + + self.clear_outdated_state_entries(block.id())?; + + tracing::info!( + elapsed = %humantime::format_duration(start.elapsed()), + "Clearing old persistent state directories completed" + ); + + Ok(()) + } + + fn clear_outdated_state_entries(&self, recent_block_id: &BlockId) -> Result<()> { + let mut directories_to_remove: Vec = Vec::new(); + let mut files_to_remove: Vec = Vec::new(); + + for entry in fs::read_dir(&self.storage_path)?.flatten() { + let path = entry.path(); + + if path.is_file() { + files_to_remove.push(path); + continue; + } + + let Ok(name) = entry.file_name().into_string() else { + directories_to_remove.push(path); + continue; + }; + + let is_recent = + matches!(name.parse::(), Ok(seqno) if seqno >= recent_block_id.seqno); + + if !is_recent { + directories_to_remove.push(path); + } + } + + for dir in directories_to_remove { + tracing::info!(dir = %dir.display(), "Removing an old persistent state directory"); + if let Err(e) = fs::remove_dir_all(&dir) { + tracing::error!(dir = %dir.display(), "Failed to remove an old persistent state: {e:?}"); + } + } + + for file in files_to_remove { + tracing::info!(file = %file.display(), "Removing file"); + if let Err(e) = fs::remove_file(&file) { + tracing::error!(file = %file.display(), "Failed to remove file: {e:?}"); + } + } + + Ok(()) + } +} diff --git a/storage/src/store/shard_state/files_context.rs b/storage/src/store/shard_state/files_context.rs deleted file mode 100644 index 73740456b..000000000 --- a/storage/src/store/shard_state/files_context.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::path::{Path, PathBuf}; - -use anyhow::{Context, Result}; -use everscale_types::models::*; -use tokio::fs::File; -use tokio::io::{AsyncWriteExt, BufWriter}; - -use tycho_util::mapped_file::MappedFile; - -pub struct FilesContext { - cells_path: PathBuf, - cells_file: Option>, - hashes_path: PathBuf, -} - -impl FilesContext { - pub async fn new

(downloads_dir: P, block_id: &BlockId) -> Result - where - P: AsRef, - { - let block_id = format!( - "({},{:016x},{})", - block_id.shard.workchain(), - block_id.shard.prefix(), - block_id.seqno - ); - - let cells_path = downloads_dir - .as_ref() - .join(format!("state_cells_{block_id}")); - let hashes_path = downloads_dir - .as_ref() - .join(format!("state_hashes_{block_id}")); - - let cells_file = Some(BufWriter::new( - tokio::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .read(true) - .open(&cells_path) - .await - .context("Failed to create cells file")?, - )); - - Ok(Self { - cells_path, - cells_file, - hashes_path, - }) - } - - pub async fn clear(self) -> Result<()> { - tokio::fs::remove_file(self.cells_path).await?; - tokio::fs::remove_file(self.hashes_path).await?; - Ok(()) - } - - pub fn cells_file(&mut self) -> Result<&mut BufWriter> { - match &mut self.cells_file { - Some(file) => Ok(file), - None => Err(FilesContextError::AlreadyFinalized.into()), - } - } - - pub fn create_mapped_hashes_file(&self, length: usize) -> Result { - let mapped_file = MappedFile::new(&self.hashes_path, length)?; - Ok(mapped_file) - } - - pub async fn create_mapped_cells_file(&mut self) -> Result { - let file = match self.cells_file.take() { - Some(mut file) => { - file.flush().await?; - file.into_inner().into_std().await - } - None => return Err(FilesContextError::AlreadyFinalized.into()), - }; - - let mapped_file = MappedFile::from_existing_file(file)?; - Ok(mapped_file) - } -} - -#[derive(thiserror::Error, Debug)] -enum FilesContextError { - #[error("Already finalized")] - AlreadyFinalized, -} diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index 9265c15a6..64d386b00 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -8,7 +8,6 @@ use everscale_types::models::*; use everscale_types::prelude::{Cell, HashBytes}; use self::cell_storage::*; -use self::files_context::FilesContext; use self::replace_transaction::ShardStateReplaceTransaction; use crate::db::*; @@ -21,7 +20,6 @@ use tycho_block_util::state::*; mod cell_storage; mod cell_writer; mod entries_buffer; -mod files_context; mod replace_transaction; mod shard_state_reader; @@ -160,12 +158,12 @@ impl ShardStateStorage { pub async fn begin_replace( &'_ self, block_id: &BlockId, - ) -> Result<(ShardStateReplaceTransaction<'_>, FilesContext)> { - let ctx = FilesContext::new(self.downloads_dir.as_ref(), block_id).await?; + ) -> Result<(ShardStateReplaceTransaction<'_>, FileDb)> { + let file_db = FileDb::new(self.downloads_dir.as_ref(), block_id).await?; Ok(( ShardStateReplaceTransaction::new(&self.db, &self.cell_storage, &self.min_ref_mc_state), - ctx, + file_db, )) } diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index ade7a6c6b..2e05752df 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -6,7 +6,6 @@ use everscale_types::models::BlockId; use super::cell_storage::*; use super::entries_buffer::*; -use super::files_context::*; use super::shard_state_reader::*; use crate::db::*; use crate::utils::*; @@ -47,13 +46,13 @@ impl<'a> ShardStateReplaceTransaction<'a> { pub async fn process_packet( &mut self, - ctx: &mut FilesContext, + file_db: &mut FileDb, packet: Vec, progress_bar: &mut ProgressBar, ) -> Result { use tokio::io::AsyncWriteExt; - let cells_file = ctx.cells_file()?; + let cells_file = file_db.cells_file()?; self.reader.set_next_packet(packet); @@ -110,7 +109,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { pub async fn finalize( self, - ctx: &mut FilesContext, + file_db: &mut FileDb, block_id: BlockId, progress_bar: &mut ProgressBar, ) -> Result> { @@ -127,8 +126,8 @@ impl<'a> ShardStateReplaceTransaction<'a> { }; let hashes_file = - ctx.create_mapped_hashes_file(header.cell_count as usize * HashesEntry::LEN)?; - let cells_file = ctx.create_mapped_cells_file().await?; + file_db.create_mapped_hashes_file(header.cell_count as usize * HashesEntry::LEN)?; + let cells_file = file_db.create_mapped_cells_file().await?; let raw = self.db.raw().as_ref(); let write_options = self.db.cells.new_write_config(); diff --git a/util/src/lib.rs b/util/src/lib.rs index 45a08533d..eef6bc3de 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -3,7 +3,6 @@ use std::collections::HashSet; pub mod byte_reader; pub mod futures; -pub mod mapped_file; pub mod progress_bar; pub mod serde_helpers; pub mod time; From 1aa115d5302c6fff3c3cdafb78eb7f0886d4b5a0 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Thu, 29 Feb 2024 14:47:18 +0100 Subject: [PATCH 12/19] refactor(storage): single file_db instance for objects working with files --- storage/src/db/file_db/mod.rs | 103 ++++++--------- storage/src/store/shard_state/mod.rs | 34 ++--- .../store/shard_state/replace_transaction.rs | 122 +++++++++++++++--- 3 files changed, 162 insertions(+), 97 deletions(-) diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index b8389d51c..33593f117 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -1,91 +1,70 @@ +use std::fs::File; +use std::io::Write; use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; use everscale_types::models::*; -use tokio::fs::File; -use tokio::io::{AsyncWriteExt, BufWriter}; pub use mapped_file::MappedFile; mod mapped_file; pub struct FileDb { - cells_path: PathBuf, - cells_file: Option>, - hashes_path: PathBuf, + root_path: PathBuf, } impl FileDb { - pub async fn new

(downloads_dir: P, block_id: &BlockId) -> Result + pub fn new(root_path: PathBuf) -> Self { + Self { root_path } + } + + pub fn open

(&self, path: P, is_relative_path: bool) -> Result where P: AsRef, { - let block_id = format!( - "({},{:016x},{})", - block_id.shard.workchain(), - block_id.shard.prefix(), - block_id.seqno - ); - - let cells_path = downloads_dir - .as_ref() - .join(format!("state_cells_{block_id}")); - let hashes_path = downloads_dir - .as_ref() - .join(format!("state_hashes_{block_id}")); + let path = if is_relative_path { + self.root_path.join(path) + } else { + PathBuf::from(path.as_ref()) + }; - let cells_file = Some(BufWriter::new( - tokio::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .read(true) - .open(&cells_path) - .await - .context("Failed to create cells file")?, - )); + let file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .read(true) + .open(&path) + .context("Failed to create cells file")?; - Ok(Self { - cells_path, - cells_file, - hashes_path, - }) + Ok(file) } - pub async fn clear(self) -> Result<()> { - tokio::fs::remove_file(self.cells_path).await?; - tokio::fs::remove_file(self.hashes_path).await?; + pub fn clear

(&self, path: P, is_relative_path: bool) -> Result<()> + where + P: AsRef, + { + let path = if is_relative_path { + self.root_path.join(path) + } else { + PathBuf::from(path.as_ref()) + }; + + std::fs::remove_file(path)?; + Ok(()) } - pub fn cells_file(&mut self) -> Result<&mut BufWriter> { - match &mut self.cells_file { - Some(file) => Ok(file), - None => Err(FileDbError::AlreadyFinalized.into()), - } + pub fn write_all(file: &mut File, buf: &[u8]) -> Result<()> { + file.write_all(buf)?; + Ok(()) } - pub fn create_mapped_hashes_file(&self, length: usize) -> Result { - let mapped_file = MappedFile::new(&self.hashes_path, length)?; - Ok(mapped_file) + pub fn flush(file: &mut File) -> Result<()> { + file.flush()?; + Ok(()) } - pub async fn create_mapped_cells_file(&mut self) -> Result { - let file = match self.cells_file.take() { - Some(mut file) => { - file.flush().await?; - file.into_inner().into_std().await - } - None => return Err(FileDbError::AlreadyFinalized.into()), - }; - - let mapped_file = MappedFile::from_existing_file(file)?; - Ok(mapped_file) + pub fn root_path(&self) -> &PathBuf { + &self.root_path } } - -#[derive(thiserror::Error, Debug)] -enum FileDbError { - #[error("Already finalized")] - AlreadyFinalized, -} diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index 64d386b00..cac9c2912 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -1,3 +1,4 @@ +use std::fs::File; use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -25,11 +26,11 @@ mod shard_state_reader; pub struct ShardStateStorage { db: Arc, + file_db: Arc, block_handle_storage: Arc, block_storage: Arc, cell_storage: Arc, - downloads_dir: Arc, gc_lock: tokio::sync::Mutex<()>, min_ref_mc_state: Arc, @@ -38,23 +39,24 @@ pub struct ShardStateStorage { } impl ShardStateStorage { - pub async fn new( + pub fn new( db: Arc, block_handle_storage: Arc, block_storage: Arc, file_db_path: PathBuf, cache_size_bytes: u64, ) -> Result { - let downloads_dir = prepare_file_db_dir(file_db_path, "downloads").await?; + let downloads_dir = prepare_file_db_dir(file_db_path, "downloads")?; + let file_db = Arc::new(FileDb::new(downloads_dir)); let cell_storage = CellStorage::new(db.clone(), cache_size_bytes)?; let res = Self { db, + file_db, block_handle_storage, block_storage, cell_storage, - downloads_dir, gc_lock: Default::default(), min_ref_mc_state: Arc::new(Default::default()), max_new_mc_cell_count: AtomicUsize::new(0), @@ -155,16 +157,14 @@ impl ShardStateStorage { .map(Arc::new) } - pub async fn begin_replace( - &'_ self, - block_id: &BlockId, - ) -> Result<(ShardStateReplaceTransaction<'_>, FileDb)> { - let file_db = FileDb::new(self.downloads_dir.as_ref(), block_id).await?; - - Ok(( - ShardStateReplaceTransaction::new(&self.db, &self.cell_storage, &self.min_ref_mc_state), - file_db, - )) + pub fn begin_replace(&'_ self, block_id: &BlockId) -> Result> { + ShardStateReplaceTransaction::new( + &self.db, + &self.file_db, + &self.cell_storage, + &self.min_ref_mc_state, + block_id, + ) } pub async fn remove_outdated_states(&self, mc_seqno: u32) -> Result { @@ -364,9 +364,9 @@ pub struct ShardStateStorageMetrics { pub max_new_sc_cell_count: usize, } -async fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result> { - let dir = Arc::new(file_db_path.join(folder)); - tokio::fs::create_dir_all(dir.as_ref()).await?; +fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result { + let dir = file_db_path.join(folder); + std::fs::create_dir_all(&dir)?; Ok(dir) } diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index 2e05752df..13710721e 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -1,3 +1,6 @@ +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{Context, Result}; @@ -17,42 +20,49 @@ use tycho_util::FastHashMap; pub struct ShardStateReplaceTransaction<'a> { db: &'a Db, + file_db: &'a FileDb, cell_storage: &'a Arc, min_ref_mc_state: &'a Arc, reader: ShardStatePacketReader, header: Option, cells_read: u64, + file_ctx: FilesContext, } impl<'a> ShardStateReplaceTransaction<'a> { pub fn new( db: &'a Db, + file_db: &'a FileDb, cell_storage: &'a Arc, min_ref_mc_state: &'a Arc, - ) -> Self { - Self { + block_id: &BlockId, + ) -> Result { + let file_ctx = FilesContext::new(file_db, block_id)?; + + Ok(Self { db, + file_db, + file_ctx, cell_storage, min_ref_mc_state, reader: ShardStatePacketReader::new(), header: None, cells_read: 0, - } + }) } pub fn header(&self) -> &Option { &self.header } - pub async fn process_packet( + pub fn process_packet( &mut self, - file_db: &mut FileDb, packet: Vec, progress_bar: &mut ProgressBar, ) -> Result { - use tokio::io::AsyncWriteExt; + use std::io::Write; - let cells_file = file_db.cells_file()?; + let cells_file = self.file_ctx.cells_file()?; self.reader.set_next_packet(packet); @@ -63,7 +73,10 @@ impl<'a> ShardStateReplaceTransaction<'a> { let header = match self.reader.read_header()? { Some(header) => header, - None => return Ok(false), + None => { + self.file_ctx.clear()?; + return Ok(false); + } }; tracing::debug!(?header); @@ -82,7 +95,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { }; buffer[cell_size] = cell_size as u8; - cells_file.write_all(&buffer[..cell_size + 1]).await?; + cells_file.write_all(&buffer[..cell_size + 1])?; chunk_size += cell_size as u32 + 1; self.cells_read += 1; @@ -92,14 +105,16 @@ impl<'a> ShardStateReplaceTransaction<'a> { if chunk_size > 0 { tracing::debug!(chunk_size, "creating chunk"); - cells_file.write_u32_le(chunk_size).await?; + cells_file.write(&chunk_size.to_le_bytes())?; } if self.cells_read < header.cell_count { + self.file_ctx.clear()?; return Ok(false); } if header.has_crc && self.reader.read_crc()?.is_none() { + self.file_ctx.clear()?; return Ok(false); } @@ -108,8 +123,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { } pub async fn finalize( - self, - file_db: &mut FileDb, + mut self, block_id: BlockId, progress_bar: &mut ProgressBar, ) -> Result> { @@ -120,14 +134,17 @@ impl<'a> ShardStateReplaceTransaction<'a> { let header = match &self.header { Some(header) => header, None => { + self.file_ctx.clear()?; return Err(ReplaceTransactionError::InvalidShardStatePacket) - .context("BOC header not found") + .context("BOC header not found"); } }; - let hashes_file = - file_db.create_mapped_hashes_file(header.cell_count as usize * HashesEntry::LEN)?; - let cells_file = file_db.create_mapped_cells_file().await?; + let hashes_file = self + .file_ctx + .create_mapped_hashes_file(header.cell_count as usize * HashesEntry::LEN)?; + + let cells_file = self.file_ctx.create_mapped_cells_file()?; let raw = self.db.raw().as_ref(); let write_options = self.db.cells.new_write_config(); @@ -218,7 +235,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { progress_bar.complete(); // Load stored shard state - match self.db.shard_states.get(shard_state_key)? { + let result = match self.db.shard_states.get(shard_state_key)? { Some(root) => { let cell_id = HashBytes::from_slice(&root[..32]); @@ -230,7 +247,11 @@ impl<'a> ShardStateReplaceTransaction<'a> { )?)) } None => Err(ReplaceTransactionError::NotFound.into()), - } + }; + + self.file_ctx.clear()?; + + result } fn finalize_cell( @@ -466,6 +487,65 @@ impl<'a> FinalizationContext<'a> { } } +struct FilesContext { + cells_path: PathBuf, + hashes_path: PathBuf, + cells_file: Option, +} + +impl FilesContext { + pub fn new(file_db: &FileDb, block_id: &BlockId) -> Result { + let block_id = format!( + "({},{:016x},{})", + block_id.shard.workchain(), + block_id.shard.prefix(), + block_id.seqno + ); + + let cells_path = file_db.root_path().join(format!("state_cells_{block_id}")); + let hashes_path = file_db.root_path().join(format!("state_hashes_{block_id}")); + + let cells_file = Some(file_db.open(&cells_path, false)?); + + Ok(Self { + cells_file, + cells_path, + hashes_path, + }) + } + + pub fn cells_file(&mut self) -> Result<&mut File> { + match &mut self.cells_file { + Some(file) => Ok(file), + None => Err(FilesContextError::AlreadyFinalized.into()), + } + } + + pub fn create_mapped_hashes_file(&self, length: usize) -> Result { + let mapped_file = MappedFile::new(&self.hashes_path, length)?; + Ok(mapped_file) + } + + pub fn create_mapped_cells_file(&mut self) -> Result { + let file = match self.cells_file.take() { + Some(mut file) => { + file.flush()?; + file + } + None => return Err(FilesContextError::AlreadyFinalized.into()), + }; + + let mapped_file = MappedFile::from_existing_file(file)?; + Ok(mapped_file) + } + + pub fn clear(&self) -> Result<()> { + std::fs::remove_file(&self.cells_path)?; + std::fs::remove_file(&self.hashes_path)?; + Ok(()) + } +} + #[derive(thiserror::Error, Debug)] enum ReplaceTransactionError { #[error("Not found")] @@ -476,4 +556,10 @@ enum ReplaceTransactionError { InvalidCell, } +#[derive(thiserror::Error, Debug)] +enum FilesContextError { + #[error("Already finalized")] + AlreadyFinalized, +} + const MAX_LEVEL: u8 = 3; From 942a2be564fdd134e2c1b07c69e7116e89ae0fc0 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Thu, 29 Feb 2024 15:02:57 +0100 Subject: [PATCH 13/19] refactor(storage): remove unnecessary file_db storage for shard state --- storage/src/db/file_db/mod.rs | 13 +++++----- storage/src/store/shard_state/mod.rs | 14 +++++------ .../store/shard_state/replace_transaction.rs | 24 +++++++++++-------- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index 33593f117..64e0995a3 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -14,8 +14,13 @@ pub struct FileDb { } impl FileDb { - pub fn new(root_path: PathBuf) -> Self { - Self { root_path } + pub fn new

(root_path: P) -> Self + where + P: AsRef, + { + Self { + root_path: PathBuf::from(root_path.as_ref()), + } } pub fn open

(&self, path: P, is_relative_path: bool) -> Result @@ -63,8 +68,4 @@ impl FileDb { file.flush()?; Ok(()) } - - pub fn root_path(&self) -> &PathBuf { - &self.root_path - } } diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index cac9c2912..ee189fafa 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -26,11 +26,11 @@ mod shard_state_reader; pub struct ShardStateStorage { db: Arc, - file_db: Arc, block_handle_storage: Arc, block_storage: Arc, cell_storage: Arc, + downloads_dir: Arc, gc_lock: tokio::sync::Mutex<()>, min_ref_mc_state: Arc, @@ -47,16 +47,14 @@ impl ShardStateStorage { cache_size_bytes: u64, ) -> Result { let downloads_dir = prepare_file_db_dir(file_db_path, "downloads")?; - let file_db = Arc::new(FileDb::new(downloads_dir)); - let cell_storage = CellStorage::new(db.clone(), cache_size_bytes)?; let res = Self { db, - file_db, block_handle_storage, block_storage, cell_storage, + downloads_dir, gc_lock: Default::default(), min_ref_mc_state: Arc::new(Default::default()), max_new_mc_cell_count: AtomicUsize::new(0), @@ -160,9 +158,9 @@ impl ShardStateStorage { pub fn begin_replace(&'_ self, block_id: &BlockId) -> Result> { ShardStateReplaceTransaction::new( &self.db, - &self.file_db, &self.cell_storage, &self.min_ref_mc_state, + self.downloads_dir.as_ref(), block_id, ) } @@ -364,9 +362,9 @@ pub struct ShardStateStorageMetrics { pub max_new_sc_cell_count: usize, } -fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result { - let dir = file_db_path.join(folder); - std::fs::create_dir_all(&dir)?; +fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result> { + let dir = Arc::new(file_db_path.join(folder)); + std::fs::create_dir_all(dir.as_ref())?; Ok(dir) } diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index 13710721e..12ae4da60 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -20,7 +20,6 @@ use tycho_util::FastHashMap; pub struct ShardStateReplaceTransaction<'a> { db: &'a Db, - file_db: &'a FileDb, cell_storage: &'a Arc, min_ref_mc_state: &'a Arc, reader: ShardStatePacketReader, @@ -30,18 +29,20 @@ pub struct ShardStateReplaceTransaction<'a> { } impl<'a> ShardStateReplaceTransaction<'a> { - pub fn new( + pub fn new

( db: &'a Db, - file_db: &'a FileDb, cell_storage: &'a Arc, min_ref_mc_state: &'a Arc, + path: P, block_id: &BlockId, - ) -> Result { - let file_ctx = FilesContext::new(file_db, block_id)?; + ) -> Result + where + P: AsRef, + { + let file_ctx = FilesContext::new(path, block_id)?; Ok(Self { db, - file_db, file_ctx, cell_storage, min_ref_mc_state, @@ -494,7 +495,10 @@ struct FilesContext { } impl FilesContext { - pub fn new(file_db: &FileDb, block_id: &BlockId) -> Result { + pub fn new

(root_path: P, block_id: &BlockId) -> Result + where + P: AsRef, + { let block_id = format!( "({},{:016x},{})", block_id.shard.workchain(), @@ -502,10 +506,10 @@ impl FilesContext { block_id.seqno ); - let cells_path = file_db.root_path().join(format!("state_cells_{block_id}")); - let hashes_path = file_db.root_path().join(format!("state_hashes_{block_id}")); + let cells_path = root_path.as_ref().join(format!("state_cells_{block_id}")); + let hashes_path = root_path.as_ref().join(format!("state_hashes_{block_id}")); - let cells_file = Some(file_db.open(&cells_path, false)?); + let cells_file = Some(FileDb::new(root_path).open(&cells_path, false)?); Ok(Self { cells_file, From a8c11b814a2a6f79c77bcb1f1b4e19455c8fa5ff Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Fri, 1 Mar 2024 14:32:01 +0100 Subject: [PATCH 14/19] refactor(storage): remove temp files when drop object --- storage/src/db/file_db/mapped_file.rs | 40 ++++++++------- storage/src/db/file_db/mod.rs | 49 ++++++------------- .../src/store/persistent_state/cell_writer.rs | 6 ++- .../store/shard_state/replace_transaction.rs | 26 +++++----- 4 files changed, 52 insertions(+), 69 deletions(-) diff --git a/storage/src/db/file_db/mapped_file.rs b/storage/src/db/file_db/mapped_file.rs index 16d90d20c..e671b5bb3 100644 --- a/storage/src/db/file_db/mapped_file.rs +++ b/storage/src/db/file_db/mapped_file.rs @@ -1,35 +1,33 @@ use std::path::Path; +use anyhow::Result; + +use crate::FileDb; + /// Memory buffer that is mapped to a file pub struct MappedFile { - file: std::fs::File, + file_db: FileDb, length: usize, ptr: *mut libc::c_void, } impl MappedFile { /// Opens a file and maps it to memory. Resizes the file to `length` bytes. - pub fn new

(path: &P, length: usize) -> std::io::Result + pub fn new

(path: &P, length: usize) -> Result where P: AsRef, { - let file = std::fs::OpenOptions::new() - .write(true) - .read(true) - .truncate(true) - .create(true) - .open(path)?; - - file.set_len(length as u64)?; + let file_db = FileDb::open(path)?; + file_db.file.set_len(length as u64)?; - Self::from_existing_file(file) + Self::from_existing_file(file_db) } /// Opens an existing file and maps it to memory - pub fn from_existing_file(file: std::fs::File) -> std::io::Result { + pub fn from_existing_file(file_db: FileDb) -> Result { use std::os::unix::io::AsRawFd; - let length = file.metadata()?.len() as usize; + let length = file_db.file.metadata()?.len() as usize; // SAFETY: File was opened successfully, file mode is RW, offset is aligned let ptr = unsafe { @@ -38,20 +36,24 @@ impl MappedFile { length, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED, - file.as_raw_fd(), + file_db.file.as_raw_fd(), 0, ) }; if ptr == libc::MAP_FAILED { - return Err(std::io::Error::last_os_error()); + return Err(std::io::Error::last_os_error().into()); } if unsafe { libc::madvise(ptr, length, libc::MADV_RANDOM) } != 0 { - return Err(std::io::Error::last_os_error()); + return Err(std::io::Error::last_os_error().into()); } - Ok(Self { file, length, ptr }) + Ok(Self { + file_db, + length, + ptr, + }) } /// Mapped buffer length in bytes @@ -92,8 +94,8 @@ impl Drop for MappedFile { panic!("failed to unmap file: {}", std::io::Error::last_os_error()); } - let _ = self.file.set_len(0); - let _ = self.file.sync_all(); + let _ = self.file_db.file.set_len(0); + let _ = self.file_db.file.sync_all(); } } diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index 64e0995a3..53eecdea8 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -10,62 +10,41 @@ pub use mapped_file::MappedFile; mod mapped_file; pub struct FileDb { - root_path: PathBuf, + pub file: File, + pub path: PathBuf, } impl FileDb { - pub fn new

(root_path: P) -> Self + pub fn open

(path: P) -> Result where P: AsRef, { - Self { - root_path: PathBuf::from(root_path.as_ref()), - } - } - - pub fn open

(&self, path: P, is_relative_path: bool) -> Result - where - P: AsRef, - { - let path = if is_relative_path { - self.root_path.join(path) - } else { - PathBuf::from(path.as_ref()) - }; - let file = std::fs::OpenOptions::new() .write(true) .create(true) .truncate(true) .read(true) .open(&path) - .context("Failed to create cells file")?; + .context("Failed to create file")?; - Ok(file) + Ok(Self { + file, + path: PathBuf::from(path.as_ref()), + }) } - pub fn clear

(&self, path: P, is_relative_path: bool) -> Result<()> - where - P: AsRef, - { - let path = if is_relative_path { - self.root_path.join(path) - } else { - PathBuf::from(path.as_ref()) - }; - - std::fs::remove_file(path)?; - + pub fn write(&mut self, buf: &[u8]) -> Result<()> { + self.file.write(buf)?; Ok(()) } - pub fn write_all(file: &mut File, buf: &[u8]) -> Result<()> { - file.write_all(buf)?; + pub fn write_all(&mut self, buf: &[u8]) -> Result<()> { + self.file.write_all(buf)?; Ok(()) } - pub fn flush(file: &mut File) -> Result<()> { - file.flush()?; + pub fn flush(&mut self) -> Result<()> { + self.file.flush()?; Ok(()) } } diff --git a/storage/src/store/persistent_state/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs index de528bba6..3c2501279 100644 --- a/storage/src/store/persistent_state/cell_writer.rs +++ b/storage/src/store/persistent_state/cell_writer.rs @@ -16,8 +16,8 @@ use smallvec::SmallVec; use tycho_util::byte_reader::ByteOrderRead; use tycho_util::FastHashMap; -use crate::db; use crate::db::Db; +use crate::{db, FileDb}; pub struct CellWriter<'a> { db: &'a Db, @@ -147,9 +147,11 @@ impl<'a> CellWriter<'a> { } intermediate.total_size -= cell_size as u64; intermediate + .file_db .file .seek(SeekFrom::Start(intermediate.total_size))?; intermediate + .file_db .file .read_exact(&mut cell_buffer[..cell_size as usize])?; @@ -178,7 +180,7 @@ impl<'a> CellWriter<'a> { } struct IntermediateState { - file: File, + file_db: FileDb, cell_sizes: Vec, total_size: u64, _remove_on_drop: RemoveOnDrop, diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index 12ae4da60..ac18ad308 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -75,7 +75,6 @@ impl<'a> ShardStateReplaceTransaction<'a> { let header = match self.reader.read_header()? { Some(header) => header, None => { - self.file_ctx.clear()?; return Ok(false); } }; @@ -110,12 +109,10 @@ impl<'a> ShardStateReplaceTransaction<'a> { } if self.cells_read < header.cell_count { - self.file_ctx.clear()?; return Ok(false); } if header.has_crc && self.reader.read_crc()?.is_none() { - self.file_ctx.clear()?; return Ok(false); } @@ -135,7 +132,6 @@ impl<'a> ShardStateReplaceTransaction<'a> { let header = match &self.header { Some(header) => header, None => { - self.file_ctx.clear()?; return Err(ReplaceTransactionError::InvalidShardStatePacket) .context("BOC header not found"); } @@ -250,8 +246,6 @@ impl<'a> ShardStateReplaceTransaction<'a> { None => Err(ReplaceTransactionError::NotFound.into()), }; - self.file_ctx.clear()?; - result } @@ -491,7 +485,7 @@ impl<'a> FinalizationContext<'a> { struct FilesContext { cells_path: PathBuf, hashes_path: PathBuf, - cells_file: Option, + cells_file: Option, } impl FilesContext { @@ -509,7 +503,7 @@ impl FilesContext { let cells_path = root_path.as_ref().join(format!("state_cells_{block_id}")); let hashes_path = root_path.as_ref().join(format!("state_hashes_{block_id}")); - let cells_file = Some(FileDb::new(root_path).open(&cells_path, false)?); + let cells_file = Some(FileDb::open(&cells_path)?); Ok(Self { cells_file, @@ -518,7 +512,7 @@ impl FilesContext { }) } - pub fn cells_file(&mut self) -> Result<&mut File> { + pub fn cells_file(&mut self) -> Result<&mut FileDb> { match &mut self.cells_file { Some(file) => Ok(file), None => Err(FilesContextError::AlreadyFinalized.into()), @@ -542,11 +536,17 @@ impl FilesContext { let mapped_file = MappedFile::from_existing_file(file)?; Ok(mapped_file) } +} - pub fn clear(&self) -> Result<()> { - std::fs::remove_file(&self.cells_path)?; - std::fs::remove_file(&self.hashes_path)?; - Ok(()) +impl Drop for FilesContext { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(&self.cells_path) { + tracing::error!(file = ?self.cells_path, "failed to remove file: {e}"); + } + + if let Err(e) = std::fs::remove_file(&self.hashes_path) { + tracing::error!(file = ?self.cells_path, "failed to remove file: {e}"); + } } } From 0d596df0429a37551ddd7902de56866bd26dbd83 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Tue, 5 Mar 2024 14:39:22 +0100 Subject: [PATCH 15/19] refactor(storage): cell writer for pesistent and shard states --- storage/src/db/file_db/mod.rs | 28 +- .../src/store/persistent_state/cell_writer.rs | 425 ------------------ storage/src/store/persistent_state/mod.rs | 72 ++- storage/src/store/shard_state/mod.rs | 1 - .../shard_state => utils}/cell_writer.rs | 59 ++- storage/src/utils/mod.rs | 2 + 6 files changed, 98 insertions(+), 489 deletions(-) delete mode 100644 storage/src/store/persistent_state/cell_writer.rs rename storage/src/{store/shard_state => utils}/cell_writer.rs (89%) diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index 53eecdea8..c4f73b1f0 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -1,5 +1,5 @@ use std::fs::File; -use std::io::Write; +use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; @@ -10,8 +10,8 @@ pub use mapped_file::MappedFile; mod mapped_file; pub struct FileDb { - pub file: File, - pub path: PathBuf, + file: File, + path: PathBuf, } impl FileDb { @@ -25,7 +25,7 @@ impl FileDb { .truncate(true) .read(true) .open(&path) - .context("Failed to create file")?; + .context(format!("Failed to create file {:?}", path.as_ref()))?; Ok(Self { file, @@ -47,4 +47,24 @@ impl FileDb { self.file.flush()?; Ok(()) } + + pub fn seek(&mut self, pos: SeekFrom) -> Result<()> { + self.file.seek(pos)?; + Ok(()) + } + + pub fn file(&self) -> &File { + &self.file + } + + pub fn read(&mut self, buf: &mut [u8]) -> Result { + let bytes = self.file.read(buf)?; + Ok(bytes) + } +} + +impl Into for FileDb { + fn into(self) -> File { + self.file + } } diff --git a/storage/src/store/persistent_state/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs deleted file mode 100644 index 3c2501279..000000000 --- a/storage/src/store/persistent_state/cell_writer.rs +++ /dev/null @@ -1,425 +0,0 @@ -use std::collections::hash_map; -use std::fs; -use std::fs::File; -use std::io::{Read, Seek, SeekFrom, Write}; -use std::os::unix::io::AsRawFd; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Instant; - -use anyhow::{Context, Result}; -use everscale_types::cell::HashBytes; -use everscale_types::models::BlockId; -use num_traits::ToPrimitive; -use smallvec::SmallVec; -use tycho_util::byte_reader::ByteOrderRead; -use tycho_util::FastHashMap; - -use crate::db::Db; -use crate::{db, FileDb}; - -pub struct CellWriter<'a> { - db: &'a Db, - base_path: &'a Path, -} - -impl<'a> CellWriter<'a> { - pub fn clear_temp(base_path: &Path, master_block_id: &BlockId, block_id: &BlockId) { - tracing::info!("Cleaning temporary persistent state files"); - - let file_path = Self::make_pss_path(base_path, master_block_id, block_id); - let int_file_path = Self::make_rev_pss_path(&file_path); - let temp_file_path = Self::make_temp_pss_path(&file_path); - - let _ = fs::remove_file(int_file_path); - let _ = fs::remove_file(temp_file_path); - } - - pub fn make_pss_path(base_path: &Path, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { - let dir_path = mc_block_id.seqno.to_string(); - let file_name = block_id.root_hash.to_string(); - base_path.join(dir_path).join(file_name) - } - - pub fn make_temp_pss_path(file_path: &Path) -> PathBuf { - file_path.with_extension("temp") - } - - pub fn make_rev_pss_path(file_path: &Path) -> PathBuf { - file_path.with_extension("rev") - } - - #[allow(unused)] - pub fn new(db: &'a Db, base_path: &'a Path) -> Self { - Self { db, base_path } - } - - pub fn write( - &self, - master_block_id: &BlockId, - block_id: &BlockId, - state_root_hash: &HashBytes, - is_cancelled: Arc, - ) -> Result { - let file_path = Self::make_pss_path(self.base_path, master_block_id, block_id); - - // Load cells from db in reverse order into the temp file - tracing::info!(block = %block_id, "Started loading cells"); - let now = Instant::now(); - let mut intermediate = write_rev_cells( - self.db, - Self::make_rev_pss_path(&file_path), - state_root_hash.as_array(), - is_cancelled.clone(), - ) - .map_err(|e| { - anyhow::Error::msg(format!("Failed to write reversed cells data. Inner: {e:?}")) - })?; - - let temp_file_path = Self::make_temp_pss_path(&file_path); - - tracing::info!(block = %block_id, "Creating intermediate file {:?}", file_path); - - let file = fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(&temp_file_path) - .context("Failed to create target file")?; - - let cell_count = intermediate.cell_sizes.len() as u32; - tracing::info!( - elapsed = %humantime::format_duration(now.elapsed()), - cell_count, - block = %block_id, - "Finished loading cells" - ); - - // Compute offset type size (usually 4 bytes) - let offset_size = - std::cmp::min(number_of_bytes_to_fit(intermediate.total_size), 8) as usize; - - // Reserve space for the file - alloc_file( - &file, - 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize), - )?; - - // Write cells data in BOC format - let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file); - - // Header | current len: 0 - let flags = 0b1000_0000u8 | (REF_SIZE as u8); - buffer.write_all(&[0xb5, 0xee, 0x9c, 0x72, flags, offset_size as u8])?; - - // Unique cell count | current len: 6 - buffer.write_all(&cell_count.to_be_bytes())?; - - // Root count | current len: 10 - buffer.write_all(&1u32.to_be_bytes())?; - - // Absent cell count | current len: 14 - buffer.write_all(&[0, 0, 0, 0])?; - - // Total cell size | current len: 18 - buffer.write_all(&intermediate.total_size.to_be_bytes()[(8 - offset_size)..8])?; - - // Root index | current len: 18 + offset_size - buffer.write_all(&[0, 0, 0, 0])?; - - // Cells index | current len: 22 + offset_size - tracing::info!(block = %block_id, "Started building index"); - { - let mut next_offset = 0; - for &cell_size in intermediate.cell_sizes.iter().rev() { - next_offset += cell_size as u64; - buffer.write_all(&next_offset.to_be_bytes()[(8 - offset_size)..8])?; - } - } - tracing::info!(block = %block_id, "Finished building index"); - - // Cells | current len: 22 + offset_size * (1 + cell_sizes.len()) - let mut cell_buffer = [0; 2 + 128 + 4 * REF_SIZE]; - for (i, &cell_size) in intermediate.cell_sizes.iter().rev().enumerate() { - if i % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { - anyhow::bail!("Persistent state writing cancelled.") - } - intermediate.total_size -= cell_size as u64; - intermediate - .file_db - .file - .seek(SeekFrom::Start(intermediate.total_size))?; - intermediate - .file_db - .file - .read_exact(&mut cell_buffer[..cell_size as usize])?; - - let d1 = cell_buffer[0]; - let d2 = cell_buffer[1]; - let ref_count = (d1 & 7) as usize; - let data_size = ((d2 >> 1) + (d2 & 1 != 0) as u8) as usize; - - let ref_offset = 2 + data_size; - for r in 0..ref_count { - let ref_offset = ref_offset + r * REF_SIZE; - let slice = &mut cell_buffer[ref_offset..ref_offset + REF_SIZE]; - - let index = u32::from_be_bytes(slice.try_into().unwrap()); - slice.copy_from_slice(&(cell_count - index - 1).to_be_bytes()); - } - - buffer.write_all(&cell_buffer[..cell_size as usize])?; - } - - buffer.flush()?; - std::fs::rename(&temp_file_path, &file_path)?; - - Ok(file_path) - } -} - -struct IntermediateState { - file_db: FileDb, - cell_sizes: Vec, - total_size: u64, - _remove_on_drop: RemoveOnDrop, -} - -fn write_rev_cells( - db: &Db, - file_path: PathBuf, - state_root_hash: &[u8; 32], - is_cancelled: Arc, -) -> Result { - todo!() - - /*enum StackItem { - New([u8; 32]), - Loaded(LoadedCell), - } - - struct LoadedCell { - hash: [u8; 32], - d1: u8, - d2: u8, - data: SmallVec<[u8; 128]>, - indices: SmallVec<[u32; 4]>, - } - - tracing::info!("Creating rev file {:?}", file_path); - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(&file_path) - .context("Failed to write rev file")?; - let remove_on_drop = RemoveOnDrop(file_path); - - let raw = db.raw().as_ref(); - let read_options = db.cells.read_config(); - let cf = db.cells.cf(); - - let mut references_buffer = SmallVec::<[[u8; 32]; 4]>::with_capacity(4); - - let mut indices = FastHashMap::default(); - let mut remap = FastHashMap::default(); - let mut cell_sizes = Vec::::with_capacity(FILE_BUFFER_LEN); - let mut stack = Vec::with_capacity(32); - - let mut total_size = 0u64; - let mut iteration = 0u32; - let mut remap_index = 0u32; - - stack.push((iteration, StackItem::New(*state_root_hash))); - indices.insert(*state_root_hash, (iteration, false)); - - let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file); - - while let Some((index, data)) = stack.pop() { - if iteration % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { - anyhow::bail!("Persistent state writing cancelled.") - } - - match data { - StackItem::New(hash) => { - let value = raw - .get_pinned_cf_opt(&cf, hash, read_options)? - .ok_or(CellWriterError::CellNotFound)?; - - let value = value.as_ref(); - - let mut value = match db::refcount::strip_refcount(value) { - Some(bytes) => bytes, - None => return Err(CellWriterError::CellNotFound.into()), - }; - if value.is_empty() { - return Err(CellWriterError::InvalidCell.into()); - } - - let cell_data = ton_types::CellData::deserialize(&mut value)?; - let bit_length = cell_data.bit_length(); - let d2 = (((bit_length >> 2) as u8) & !0b1) | ((bit_length % 8 != 0) as u8); - - let references_count = cell_data.references_count(); - let cell_type = cell_data - .cell_type() - .to_u8() - .ok_or(CellWriterError::InvalidCell)?; - - let level_mask = cell_data.level_mask().mask(); - let d1 = - references_count as u8 | (((cell_type != 0x01) as u8) << 3) | (level_mask << 5); - let data = cell_data.data(); - - for _ in 0..references_count { - let hash = HashBytes::from(value.read_u256()?); - references_buffer.push(hash.inner()); - } - - let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); - - let mut indices_buffer = [0; 4]; - let mut keys = [std::ptr::null(); 4]; - let mut preload_count = 0; - - for hash in &references_buffer { - let index = match indices.entry(*hash) { - hash_map::Entry::Vacant(entry) => { - remap_index += 1; - - entry.insert((remap_index, false)); - - indices_buffer[preload_count] = remap_index; - keys[preload_count] = hash.as_ptr(); - preload_count += 1; - - remap_index - } - hash_map::Entry::Occupied(entry) => { - let (remap_index, written) = *entry.get(); - if !written { - indices_buffer[preload_count] = remap_index; - keys[preload_count] = hash.as_ptr(); - preload_count += 1; - } - remap_index - } - }; - - reference_indices.push(index); - } - - stack.push(( - index, - StackItem::Loaded(LoadedCell { - hash, - d1, - d2, - data: SmallVec::from_slice(data), - indices: reference_indices, - }), - )); - - if preload_count > 0 { - indices_buffer[..preload_count].reverse(); - keys[..preload_count].reverse(); - - for i in 0..preload_count { - let index = indices_buffer[i]; - let hash = unsafe { *(keys[i] as *const [u8; 32]) }; - stack.push((index, StackItem::New(hash))); - } - } - - references_buffer.clear(); - } - StackItem::Loaded(loaded) => { - match remap.entry(index) { - hash_map::Entry::Vacant(entry) => { - entry.insert(iteration.to_be_bytes()); - } - hash_map::Entry::Occupied(_) => continue, - }; - - if let Some((_, written)) = indices.get_mut(&loaded.hash) { - *written = true; - } - - iteration += 1; - if iteration % 100000 == 0 { - tracing::info!(iteration); - } - - let cell_size = 2 + loaded.data.len() + loaded.indices.len() * REF_SIZE; - cell_sizes.push(cell_size as u8); - total_size += cell_size as u64; - - temp_file_buffer.write_all(&[loaded.d1, loaded.d2])?; - temp_file_buffer.write_all(&loaded.data)?; - for index in loaded.indices { - let index = remap.get(&index).with_context(|| { - format!("Child not found. Iteration {iteration}. Child {index}") - })?; - temp_file_buffer.write_all(index)?; - } - } - } - } - - let mut file = temp_file_buffer.into_inner()?; - file.flush()?; - - Ok(IntermediateState { - file, - cell_sizes, - total_size, - _remove_on_drop: remove_on_drop, - })*/ -} - -#[cfg(not(target_os = "macos"))] -fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { - let res = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len as i64) }; - if res == 0 { - Ok(()) - } else { - Err(std::io::Error::last_os_error()) - } -} - -#[cfg(target_os = "macos")] -pub fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { - let res = unsafe { libc::ftruncate(file.as_raw_fd(), len as i64) }; - if res < 0 { - Err(std::io::Error::last_os_error()) - } else { - Ok(()) - } -} - -fn number_of_bytes_to_fit(l: u64) -> u32 { - 8 - l.leading_zeros() / 8 -} - -struct RemoveOnDrop(PathBuf); - -impl Drop for RemoveOnDrop { - fn drop(&mut self) { - if let Err(e) = std::fs::remove_file(&self.0) { - tracing::error!(path = %self.0.display(), "failed to remove file: {e:?}"); - } - } -} - -const REF_SIZE: usize = std::mem::size_of::(); -const FILE_BUFFER_LEN: usize = 128 * 1024 * 1024; // 128 MB - -#[derive(thiserror::Error, Debug)] -enum CellWriterError { - #[error("Cell not found in cell db")] - CellNotFound, - #[error("Invalid cell")] - InvalidCell, -} diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs index 56f6141bc..7c1d43da2 100644 --- a/storage/src/store/persistent_state/mod.rs +++ b/storage/src/store/persistent_state/mod.rs @@ -1,5 +1,6 @@ use std::fs; -use std::path::PathBuf; +use std::io::SeekFrom; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -11,10 +12,8 @@ use tokio::time::Instant; use crate::db::Db; use crate::store::BlockHandleStorage; - -use self::cell_writer::*; - -mod cell_writer; +use crate::utils::CellWriter; +use crate::FileDb; const KEY_BLOCK_UTIME_STEP: u32 = 86400; @@ -26,13 +25,13 @@ pub struct PersistentStateStorage { } impl PersistentStateStorage { - pub async fn new( + pub fn new( file_db_path: PathBuf, db: Arc, block_handle_storage: Arc, ) -> Result { let dir = file_db_path.join("states"); - tokio::fs::create_dir_all(&dir).await?; + fs::create_dir_all(&dir)?; let is_cancelled = Arc::new(Default::default()); Ok(Self { @@ -45,34 +44,34 @@ impl PersistentStateStorage { pub async fn save_state( &self, + mc_block_id: &BlockId, block_id: &BlockId, - master_block_id: &BlockId, - state_root_hash: &HashBytes, + root_hash: &HashBytes, ) -> Result<()> { let block_id = block_id.clone(); - let master_block_id = master_block_id.clone(); - let state_root_hash = *state_root_hash; + let root_hash = *root_hash; let db = self.db.clone(); - let base_path = self.storage_path.clone(); - let is_cancelled = self.is_cancelled.clone(); + let is_cancelled = Some(self.is_cancelled.clone()); + let base_path = self.get_state_file_path(&mc_block_id, &block_id); tokio::task::spawn_blocking(move || { let cell_writer = CellWriter::new(&db, &base_path); - match cell_writer.write(&master_block_id, &block_id, &state_root_hash, is_cancelled) { - Ok(path) => { + match cell_writer.write(&root_hash.0, is_cancelled) { + Ok(()) => { tracing::info!( block_id = %block_id, - path = %path.display(), - "Successfully wrote persistent state to a file", + "successfully wrote persistent state to a file", ); } Err(e) => { tracing::error!( block_id = %block_id, - "Writing persistent state failed. Err: {e:?}" + "writing persistent state failed: {e:?}" ); - CellWriter::clear_temp(&base_path, &master_block_id, &block_id); + if let Err(e) = cell_writer.remove(&root_hash.0) { + tracing::error!(%block_id, "{e}") + } } } }) @@ -87,15 +86,11 @@ impl PersistentStateStorage { offset: u64, size: u64, ) -> Option> { - use tokio::io::{AsyncReadExt, AsyncSeekExt, SeekFrom}; - // TODO: cache file handles - let mut file = tokio::fs::File::open(self.get_state_file_path(mc_block_id, block_id)) - .await - .ok()?; + let mut file_db = FileDb::open(self.get_state_file_path(mc_block_id, block_id)).ok()?; - if let Err(e) = file.seek(SeekFrom::Start(offset)).await { - tracing::error!("Failed to seek state file offset. Err: {e:?}"); + if let Err(e) = file_db.seek(SeekFrom::Start(offset)) { + tracing::error!("failed to seek state file offset: {e:?}"); return None; } @@ -103,15 +98,15 @@ impl PersistentStateStorage { let mut result = BytesMut::with_capacity(size as usize); let now = Instant::now(); loop { - match file.read_buf(&mut result).await { + match file_db.read(&mut result) { Ok(bytes_read) => { - tracing::debug!("Reading state file. Bytes read: {}", bytes_read); + tracing::debug!(bytes_read, "reading state file"); if bytes_read == 0 || bytes_read == size as usize { break; } } Err(e) => { - tracing::error!("Failed to read state file. Err: {e:?}"); + tracing::error!("failed to read state file. Err: {e:?}"); return None; } } @@ -134,14 +129,17 @@ impl PersistentStateStorage { let dir_path = mc_block.seqno.to_string(); let path = self.storage_path.join(dir_path); if !path.exists() { - tracing::info!(mc_block = %mc_block, "Creating persistent state directory"); + tracing::info!(mc_block = %mc_block, "creating persistent state directory"); fs::create_dir(path)?; } Ok(()) } fn get_state_file_path(&self, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { - CellWriter::make_pss_path(&self.storage_path, mc_block_id, block_id) + self.storage_path + .clone() + .join(mc_block_id.seqno.to_string()) + .join(block_id.root_hash.to_string()) } pub fn cancel(&self) { @@ -149,7 +147,7 @@ impl PersistentStateStorage { } pub async fn clear_old_persistent_states(&self) -> Result<()> { - tracing::info!("Started clearing old persistent state directories"); + tracing::info!("started clearing old persistent state directories"); let start = Instant::now(); // Keep 2 days of states + 1 state before @@ -178,7 +176,7 @@ impl PersistentStateStorage { tracing::info!( elapsed = %humantime::format_duration(start.elapsed()), - "Clearing old persistent state directories completed" + "clearing old persistent state directories completed" ); Ok(()) @@ -210,16 +208,16 @@ impl PersistentStateStorage { } for dir in directories_to_remove { - tracing::info!(dir = %dir.display(), "Removing an old persistent state directory"); + tracing::info!(dir = %dir.display(), "removing an old persistent state directory"); if let Err(e) = fs::remove_dir_all(&dir) { - tracing::error!(dir = %dir.display(), "Failed to remove an old persistent state: {e:?}"); + tracing::error!(dir = %dir.display(), "failed to remove an old persistent state: {e:?}"); } } for file in files_to_remove { - tracing::info!(file = %file.display(), "Removing file"); + tracing::info!(file = %file.display(), "removing file"); if let Err(e) = fs::remove_file(&file) { - tracing::error!(file = %file.display(), "Failed to remove file: {e:?}"); + tracing::error!(file = %file.display(), "failed to remove file: {e:?}"); } } diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index ee189fafa..cde8dedda 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -19,7 +19,6 @@ use tycho_block_util::block::*; use tycho_block_util::state::*; mod cell_storage; -mod cell_writer; mod entries_buffer; mod replace_transaction; mod shard_state_reader; diff --git a/storage/src/store/shard_state/cell_writer.rs b/storage/src/utils/cell_writer.rs similarity index 89% rename from storage/src/store/shard_state/cell_writer.rs rename to storage/src/utils/cell_writer.rs index e04eeb60a..f51468419 100644 --- a/storage/src/store/shard_state/cell_writer.rs +++ b/storage/src/utils/cell_writer.rs @@ -1,15 +1,20 @@ use std::collections::hash_map; +use std::fs; use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; use anyhow::{Context, Result}; use smallvec::SmallVec; -use crate::db::Db; use tycho_util::FastHashMap; +use crate::db::Db; +use crate::FileDb; + pub struct CellWriter<'a> { db: &'a Db, base_path: &'a Path, @@ -22,19 +27,14 @@ impl<'a> CellWriter<'a> { } #[allow(unused)] - pub fn write(&self, root_hash: &[u8; 32]) -> Result<()> { + pub fn write(&self, root_hash: &[u8; 32], is_cancelled: Option>) -> Result<()> { // Open target file in advance to get the error immediately (if any) let file_path = self.base_path.join(hex::encode(root_hash)); - let file = std::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(file_path) - .context("Failed to create target file")?; + let file_db = FileDb::open(file_path)?; // Load cells from db in reverse order into the temp file tracing::info!("started loading cells"); - let mut intermediate = write_rev_cells(self.db, self.base_path, root_hash) + let mut intermediate = write_rev_cells(self.db, self.base_path, root_hash, &is_cancelled) .context("Failed to write reversed cells data")?; tracing::info!("finished loading cells"); let cell_count = intermediate.cell_sizes.len() as u32; @@ -45,12 +45,12 @@ impl<'a> CellWriter<'a> { // Reserve space for the file alloc_file( - &file, + file_db.file(), 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize), )?; // Write cells data in BOC format - let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file); + let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file_db.file()); // Header | current len: 0 let flags = 0b1000_0000u8 | (REF_SIZE as u8); @@ -84,7 +84,13 @@ impl<'a> CellWriter<'a> { // Cells | current len: 22 + offset_size * (1 + cell_sizes.len()) let mut cell_buffer = [0; 2 + 128 + 4 * REF_SIZE]; - for &cell_size in intermediate.cell_sizes.iter().rev() { + for (i, &cell_size) in intermediate.cell_sizes.iter().rev().enumerate() { + if let Some(is_cancelled) = is_cancelled.as_ref() { + if i % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { + anyhow::bail!("Cell writing cancelled.") + } + } + intermediate.total_size -= cell_size as u64; intermediate .file @@ -114,6 +120,14 @@ impl<'a> CellWriter<'a> { Ok(()) } + + pub fn remove(&self, root_hash: &[u8; 32]) -> Result<()> { + let file_path = self.base_path.join(hex::encode(root_hash)); + fs::remove_file(&file_path).context(format!( + "Failed to remove persistent state file {:?}", + file_path + )) + } } struct IntermediateState { @@ -127,6 +141,7 @@ fn write_rev_cells>( db: &Db, base_path: P, root_hash: &[u8; 32], + is_cancelled: &Option>, ) -> Result { enum StackItem { New([u8; 32]), @@ -146,13 +161,7 @@ fn write_rev_cells>( .join(hex::encode(root_hash)) .with_extension("temp"); - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(&file_path) - .context("Failed to create temp file")?; + let file_db = FileDb::open(&file_path)?; let remove_on_drop = RemoveOnDrop(file_path); let raw = db.raw().as_ref(); @@ -173,9 +182,15 @@ fn write_rev_cells>( stack.push((iteration, StackItem::New(*root_hash))); indices.insert(*root_hash, (iteration, false)); - let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file); + let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file_db.into()); while let Some((index, data)) = stack.pop() { + if let Some(is_cancelled) = is_cancelled { + if iteration % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { + anyhow::bail!("Persistent state writing cancelled.") + } + } + match data { StackItem::New(hash) => { let value = raw @@ -280,7 +295,7 @@ fn write_rev_cells>( } } - let mut file = temp_file_buffer.into_inner()?; + let mut file: File = temp_file_buffer.into_inner()?; file.flush()?; Ok(IntermediateState { @@ -382,7 +397,7 @@ struct RemoveOnDrop(PathBuf); impl Drop for RemoveOnDrop { fn drop(&mut self) { - if let Err(e) = std::fs::remove_file(&self.0) { + if let Err(e) = fs::remove_file(&self.0) { tracing::error!(path = %self.0.display(), "failed to remove file: {e:?}"); } } diff --git a/storage/src/utils/mod.rs b/storage/src/utils/mod.rs index d28b57e54..359217fe1 100644 --- a/storage/src/utils/mod.rs +++ b/storage/src/utils/mod.rs @@ -1,3 +1,5 @@ +pub use self::cell_writer::*; pub use self::stored_value::*; +mod cell_writer; mod stored_value; From 4843914aeaeeec4de4624f7f6d4f3b1ef4d9a085 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Fri, 8 Mar 2024 17:48:24 +0100 Subject: [PATCH 16/19] refactor(storage): add tests (WIP) --- .gitignore | 5 +- Cargo.lock | 5 + storage/Cargo.toml | 7 + storage/src/db/kv_db/mod.rs | 55 +--- storage/src/lib.rs | 54 ++++ storage/src/store/block_connection/mod.rs | 4 +- storage/src/store/block_handle/mod.rs | 6 +- storage/src/store/node_state/mod.rs | 6 +- storage/src/store/persistent_state/mod.rs | 1 - storage/src/store/shard_state/cell_storage.rs | 8 +- storage/src/store/shard_state/mod.rs | 2 +- storage/tests/everscale_zerostate.boc | Bin 0 -> 31818 bytes storage/tests/global-config.json | 22 ++ storage/tests/mod.rs | 270 ++++++++++++++++++ 14 files changed, 377 insertions(+), 68 deletions(-) create mode 100644 storage/tests/everscale_zerostate.boc create mode 100644 storage/tests/global-config.json create mode 100644 storage/tests/mod.rs diff --git a/.gitignore b/.gitignore index ca739ab20..628f21143 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,7 @@ target/ .fleet/ perf.data* -.scratch \ No newline at end of file +.scratch + +.DS_Store +storage/tmp/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 4f8652836..61c30b67a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2236,6 +2236,7 @@ version = "0.0.1" dependencies = [ "anyhow", "arc-swap", + "base64 0.22.0", "bumpalo", "bytes", "bytesize", @@ -2252,12 +2253,16 @@ dependencies = [ "quick_cache", "rlimit", "serde", + "serde_json", "sha2", "smallvec", "sysinfo", "thiserror", "tokio", "tracing", + "tracing-appender", + "tracing-subscriber", + "tracing-test", "triomphe", "tycho-block-util", "tycho-util", diff --git a/storage/Cargo.toml b/storage/Cargo.toml index aea2cc4e8..b8760f1e6 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -36,5 +36,12 @@ weedb = "0.1.1" tycho-block-util = { path = "../block-util" } tycho-util = { path = "../util" } +[dev-dependencies] +base64 = "0.22.0" +serde_json = "1.0.114" +tracing-appender = "0.2.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-test = "0.2" + [lints] workspace = true diff --git a/storage/src/db/kv_db/mod.rs b/storage/src/db/kv_db/mod.rs index 7e9c434a8..6e2faf8ab 100644 --- a/storage/src/db/kv_db/mod.rs +++ b/storage/src/db/kv_db/mod.rs @@ -10,6 +10,8 @@ use weedb::{Caches, WeeDb}; pub use weedb::Stats as RocksdbStats; pub use weedb::{rocksdb, BoundedCfHandle, ColumnFamily, Table}; +pub use self::config::DbOptions; + pub mod refcount; pub mod tables; @@ -247,56 +249,3 @@ impl Drop for Db { self.raw().cancel_all_background_work(true); } } - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields, default)] -pub struct DbOptions { - pub rocksdb_lru_capacity: ByteSize, - pub cells_cache_size: ByteSize, -} - -impl Default for DbOptions { - fn default() -> Self { - // Fetch the currently available memory in bytes - let available = { - let mut sys = sysinfo::System::new(); - sys.refresh_memory(); - sys.available_memory() - }; - - // Estimated memory usage of components other than cache: - // - 2 GiBs for write buffers(4 if we are out of luck and all memtables are being flushed at the same time) - // - 2 GiBs for indexer logic - // - 10 bits per cell for bloom filter. Realistic case is 100M cells, so 0.25 GiBs - // - 1/3 of all available memory is reserved for kernel buffers - const WRITE_BUFFERS: ByteSize = ByteSize::gib(2); - const INDEXER_LOGIC: ByteSize = ByteSize::gib(2); - const BLOOM_FILTER: ByteSize = ByteSize::mib(256); - let estimated_memory_usage = WRITE_BUFFERS + INDEXER_LOGIC + BLOOM_FILTER + available / 3; - - // Reduce the available memory by the fixed offset - let available = available - .checked_sub(estimated_memory_usage.as_u64()) - .unwrap_or_else(|| { - tracing::error!( - "Not enough memory for cache, using 1/4 of all available memory. \ - Tweak `db_options` in config to improve performance." - ); - available / 4 - }); - - // We will use 3/4 of available memory for the cells cache (at most 4 GB). - let cells_cache_size = std::cmp::min(ByteSize(available * 4 / 3), ByteSize::gib(4)); - - // The reset of the memory is used for LRU cache (at least 128 MB) - let rocksdb_lru_capacity = std::cmp::max( - ByteSize(available.saturating_sub(cells_cache_size.as_u64())), - ByteSize::mib(128), - ); - - Self { - rocksdb_lru_capacity, - cells_cache_size, - } - } -} diff --git a/storage/src/lib.rs b/storage/src/lib.rs index b1b8d4f48..73b31cfdc 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -23,15 +23,69 @@ pub struct Storage { } impl Storage { + pub fn new( + db: Arc, + file_db_path: PathBuf, + max_cell_cache_size_bytes: u64, + ) -> anyhow::Result> { + let block_handle_storage = Arc::new(BlockHandleStorage::new(db.clone())); + let runtime_storage = Arc::new(RuntimeStorage::new(block_handle_storage.clone())); + let block_storage = Arc::new(BlockStorage::new(db.clone(), block_handle_storage.clone())?); + let shard_state_storage = ShardStateStorage::new( + db.clone(), + block_handle_storage.clone(), + block_storage.clone(), + file_db_path.clone(), + max_cell_cache_size_bytes, + )?; + let persistent_state_storage = PersistentStateStorage::new( + file_db_path.clone(), + db.clone(), + block_handle_storage.clone(), + )?; + let node_state_storage = NodeStateStorage::new(db.clone()); + let block_connection_storage = BlockConnectionStorage::new(db); + + Ok(Arc::new(Self { + file_db_path, + + block_handle_storage, + block_storage, + shard_state_storage, + persistent_state_storage, + block_connection_storage, + node_state_storage, + runtime_storage, + })) + } + + #[inline(always)] + pub fn runtime_storage(&self) -> &RuntimeStorage { + &self.runtime_storage + } + + #[inline(always)] + pub fn persistent_state_storage(&self) -> &PersistentStateStorage { + &self.persistent_state_storage + } + + #[inline(always)] pub fn block_handle_storage(&self) -> &BlockHandleStorage { &self.block_handle_storage } + #[inline(always)] pub fn block_connection_storage(&self) -> &BlockConnectionStorage { &self.block_connection_storage } + #[inline(always)] pub fn shard_state_storage(&self) -> &ShardStateStorage { &self.shard_state_storage } + + #[inline(always)] + pub fn node_state(&self) -> &NodeStateStorage { + &self.node_state_storage + } } diff --git a/storage/src/store/block_connection/mod.rs b/storage/src/store/block_connection/mod.rs index 50ad26ece..3a6dd2f55 100644 --- a/storage/src/store/block_connection/mod.rs +++ b/storage/src/store/block_connection/mod.rs @@ -13,8 +13,8 @@ pub struct BlockConnectionStorage { } impl BlockConnectionStorage { - pub fn new(db: Arc) -> Result { - Ok(Self { db }) + pub fn new(db: Arc) -> Self { + Self { db } } pub fn store_connection( diff --git a/storage/src/store/block_handle/mod.rs b/storage/src/store/block_handle/mod.rs index 535a62623..1790047c9 100644 --- a/storage/src/store/block_handle/mod.rs +++ b/storage/src/store/block_handle/mod.rs @@ -16,11 +16,11 @@ pub struct BlockHandleStorage { } impl BlockHandleStorage { - pub fn new(db: Arc) -> Result { - Ok(Self { + pub fn new(db: Arc) -> Self { + Self { db, cache: Arc::new(Default::default()), - }) + } } pub fn store_block_applied(&self, handle: &Arc) -> Result { diff --git a/storage/src/store/node_state/mod.rs b/storage/src/store/node_state/mod.rs index 02c45eed5..a9ae5926a 100644 --- a/storage/src/store/node_state/mod.rs +++ b/storage/src/store/node_state/mod.rs @@ -15,13 +15,13 @@ pub struct NodeStateStorage { } impl NodeStateStorage { - pub fn new(db: Arc) -> Result { - Ok(Self { + pub fn new(db: Arc) -> Self { + Self { db, last_mc_block_id: (Default::default(), LAST_MC_BLOCK_ID), init_mc_block_id: (Default::default(), INIT_MC_BLOCK_ID), shards_client_mc_block_id: (Default::default(), SHARDS_CLIENT_MC_BLOCK_ID), - }) + } } pub fn store_historical_sync_start(&self, id: &BlockId) -> Result<()> { diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs index 7c1d43da2..9c0e74477 100644 --- a/storage/src/store/persistent_state/mod.rs +++ b/storage/src/store/persistent_state/mod.rs @@ -139,7 +139,6 @@ impl PersistentStateStorage { self.storage_path .clone() .join(mc_block_id.seqno.to_string()) - .join(block_id.root_hash.to_string()) } pub fn cancel(&self) { diff --git a/storage/src/store/shard_state/cell_storage.rs b/storage/src/store/shard_state/cell_storage.rs index 16112cc95..99b211685 100644 --- a/storage/src/store/shard_state/cell_storage.rs +++ b/storage/src/store/shard_state/cell_storage.rs @@ -20,20 +20,20 @@ pub struct CellStorage { } impl CellStorage { - pub fn new(db: Arc, cache_size_bytes: u64) -> Result> { + pub fn new(db: Arc, cache_size_bytes: u64) -> Arc { let cells_cache = Default::default(); let raw_cells_cache = RawCellsCache::new(cache_size_bytes); - Ok(Arc::new(Self { + Arc::new(Self { db, cells_cache, raw_cells_cache, - })) + }) } pub fn store_cell( &self, - batch: &mut weedb::rocksdb::WriteBatch, + batch: &mut rocksdb::WriteBatch, root: Cell, ) -> Result { struct CellWithRefs<'a> { diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index cde8dedda..5603b4443 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -46,7 +46,7 @@ impl ShardStateStorage { cache_size_bytes: u64, ) -> Result { let downloads_dir = prepare_file_db_dir(file_db_path, "downloads")?; - let cell_storage = CellStorage::new(db.clone(), cache_size_bytes)?; + let cell_storage = CellStorage::new(db.clone(), cache_size_bytes); let res = Self { db, diff --git a/storage/tests/everscale_zerostate.boc b/storage/tests/everscale_zerostate.boc new file mode 100644 index 0000000000000000000000000000000000000000..6cea5582d5fa249d97848bbe894ea66160a878f9 GIT binary patch literal 31818 zcmc$H2_RJ4|M)p~24f$)ESW(iZDUOmBa)=Dwoz142{V>28MB$bMkN(d=J zS<4n7m3B!rN#=jJP#u=ShuB0c2W!Im2ucJa!eT-WQH8ieV4^^VK#o9}AX$(mIA8FRkgSldP#npW zlq5_M))qDvW(g;XSczN^DHF97?Gn=$n<*9{_F6nnLRMn5#AJy;iBFP1k}SDUa)7Kt zP9&$2v&sEZ9BC`*Qt3_^nvAK8t&D@Lp=^k3ne1cPSF$~F{&GQb5pp$h-SQIh*W@26 zELN0PTscZ`RJxM3l9f`N(g~#uWkF?GWl!ZmCYXtYiHeE6Nr1_b8QL?}&&ZuoHshVChiRf|p=qmWzga5Xj$T85M1O6r zYkt{0-~9ee*_pJNF&4&_nwBY+7cBE<5od+YN}AO@Yrx9RYWZw^Yb9&nIg~m1HfwD* z+JxK0*d*GVwYg?<$7aA5*lOD{=h5fSTwrNOvD;yH+&;j;-BH>x!SSw>p0k$oV&`kl zcNQ8gOj?+}NMMoDqNR&U7QI_cU2MAe*%J0rqovoD7A>uDQFJ-tlHzjNrO$P%Yq)Ei zYm;l=GNon3%Q}~PE@v)hEx+TY?N;Z$(B0jg>7Kn}qYaTd+qXy^Gfx) z>{aAdwf?@hiMN%HjZYr~FbbF)<^{iN{$BoD16Bqw1L+$iHayz6XVcEjhFeUw6l^Wt z+8-piU2F$=hgGm0%PC}O$hwe?A>ko0A)Jtmkb=;nunXarc4q9d*{!wPaQB5hm-c4t zE!Y>dpA|uhl!!bLnGu=K&SzH~xO|{EDl~dejBzYImKnD>epEc`VEDnJLzfcN6Dkfn z9%denIAVCD?dZy*-baIu7A0CH&QDyPm~zbaSZ`8b(vBo{Qr&T<09N>UcLN zA(@_BkX)ABbb@k%bK>lYoRi>WYKm=2MasjJXQw8fNQ1Uys&=Y%YR~D!w9#o= zX+~)~&VVx_XOzykpD8-iaW>+d{<-XP1?Q^L<3;549fZKBPZf`Y`Ze=)o4lKjG^I6VHx)P4 zG_^IoZ0cm+-Hu*N!wy3tGw)D1~w$iqTZJljB?Vw$v-M9UI z$K(!nM`Fj>Cn8V0o|JWJcN%qCbvkvf>@0e!{xtMy$J6eo1J9N}%YQ!l`Htri&yPG$ zd!GHg_<2p2ZdXiK&kKha?k{{_+q#>EaK z!LSp!x=TFRn;zqlqqaot`Zm^QE}<1v?&qH!j5AcxbvpD=6K5T zR38R~_@-}>Bx7M#*YYJf$@yPPrf)y>z~zy<9a!LT=i$Np<_`XhK`}FqhaTL3qS&L zKpv9 zzvYH~kKnx?-E1HO+UUkB4fW6sRG=;`ttNfQr4?4nJB}knE(qN@D@Y_iO>&L)B(dJ{ ze)2&ApMlI;3Bga5)}@L7*n)Y|QkS|1#y@>BMZ8nhXoE>gqQjx}mJ$IkWE6}=^Y$D) ztOcEM-QL|Y#xWb+HZ|0Tp6lJOQkL`d=|0FYSQ#2Jpt>(i%vN1 zv-Ll*-f`xcmy2wO7vuIIUT9lp`bqx;x*-7`#wXwC348Y=1W_5F`y*>x7%m$PA0N}6 zCUE9(9#qc&fanmGe-of_-R~yUMo&#pf z1Gj~xD7#m1$c$v(j+;3Z|-%3RJ zgz1DacXID6;k*b3mtr`Reuf1L7J9y*cjNQ+0*uDzO^rWq6nx$u-`_r=!S?ArVXIZX z9dZdJs&UTdn_}kge|uF>VfvnOp<+yuZeV_6wQm6iG=L_;fvOclBxKT!8Lzqyb``SL z8M%xBr=F9Vy%+3Ou9P}vOdBx{XwdC3eR-sf{m-2qcAp5wV3MHjMJ*hK=29C>Aox{w z4|@jym~_Zt`cHM>R)g`DK&AkU1>=xdOyB`Z3ndiV9?CgHdt+HuFRIp-(&HY2fqF#> zIzXpt)oOXw_R={$$?9LA6WQ_$x#t8Iz*;q(TA{jX_!Xp-maCQX?!IvX);J(tvoUbb ziJ~0@RrhsgR(zP;qY>#By?3eHEcW3E1MyD{aH(kl#>1I^D++;?w4^hWR4FRHA=mDM z{1%qF?6FNx_sEXg5~iwG0>)pr?I;9fy1rWgwF+queP95O=PwRvI!=ySwCT9*j*x+hy_%3d(&U;eE8hjE%nyN1{)DV zW0`Al^Xa5Lhb~V@kP|7cEQ}U!lfqqF5{N$?K?E^Vw+TWx!gF-im60E}c22K}!i1Brw8h}j+@4j79)Eee z$9tJxSnf`btX*e?j@F%+6>_x2(V;GWwY#ja-IS=G=WMur2u$Fy+|L_J)Gb$~zu05z zd9h`luBOg{6;;u1#*SMvTeKB!-!n>~f#Pa3Tr)pkgTNZ+-oMfHMT#MxSU=qc>G9YX zpwJCw08?NFWqF`CMW#2mr&JWsfwj~-=d(Xm{^jl<_gP|)0?ffofYpGkZ$mt<8f^}i@xdk&fr24;c*T9 z`$>;A_x2J_PB@xkP9mItw(lFi?yQ#<6IQL-vVw49TkI;T*qZ(L&FFT-`F~?C$$(jZ zFCJC^DO*`0bK3NKS6oi<7}ZCku8%yW^W z1#9BAGv@YDZUcZRcR0t+86tCK`vhe>Ux(VAsVAGl+{5jY`%@j~zwb8R;_$vnHHLE|8y5$4M)l!ovhZ)C+!U?Kj;B?12Nw`ArK@L!n+`PG+KC24D2o zi}Icj|G7q>)Y}|HS^@hCTC4w{7S_=7DzA=F4YHFs`pSIY!L{SOU5=hQ@Z90j-i<4h zkNP_zmTf#d+?FsxmUTuf+avp8g^adMZp!@5=6g~P-;iw64bSacqL4C6n$8g z(#D}@x|-R1{xIO2})=`seCYZ_9bsluxs-09{ zv2W2~)hZS|rx(cC{8w}Fr}Jd61}5h{i>EU;7+j60Tx2d^)H`{hV)J!d@A!msQ^c1> z7#0l(-y8ACqkimK!F)Pk&nwEX!+8M)(7GuuLTGNq}7*l>=mSfYGXqyijIpjYf zX0YdceYqA#%sM_|yl})c!qlr#wu=lDkBo8L`CPZAWhQp&;(TH>(X<5HxSl2Q8W$nn zHE9wOsoC!jzwAkgdRTR`3_G_~sP#V|8w)cTzV`lYI(+*#-_pDE>&WGFSNU+PVgK;h2* z(jvx!ijB}~br7Jy9l9sv&7z*)N4q|Fgy zQFGoHvuMUmRQR}RywlN>tIzFseTl!4kfR)wF^Xs zxdR>j)6lO z4)4^UH7>@jL^J!9oLMF(Ex?ZyRjb2RuRBgcW930NQm><6J3rP)M`<18TBY~n<5MA)$ac_0xIk-$kI zrZk(}#hOj*v%-YTo-E~|~=I0)s&xrwp9wAs7dh;tKVaoc>D zjzZ3;S-~&BN*LYg80RL!`rK}1N1jR^2ezR0_aX_T&&n+=6H+Ya$ydmht4Ws&mGfX9 zhW^Uc2RU5xh(^;Eu`hdNC+LJe7vXPMLFnarU{+W{33ei^fDzYA<^ zY_myeFIZeDkDN0KI}GoE3rGXOkP=G%5NK_?WN;{``(QuzP=|_>ESpUWC8LPk09|Hq z^7s_jOSIEN5Iq^m>;rDZ-&JTg6zBuh5CM|w1t=thV1<&uFak80EXC4t0Xt%)ydAL* zlP>p?Mt-8%T^9W-Kr_D|_%gYA6yI1kBC`$oojgR`Fa^uYjCCV0+mM4i%vbMUc@`(68IHk zh6zUNcEC__&zewl;bOs1RJqZUpF^(?QXNh`*cZEM> zV#?CIOsa1t>OfnwiMPFjTplq?79A4s-bK5xkM$CJiG9FAzuAOYS_Wi#K`)w9L_>TW zbfEb^NjO13{HRDlyeY}|qX~TIb~!z#sLdv{S|JuCOFE;#vFA*!8tN%KWZb?Sw9l(G zqs{XCBGq%k8IgY|`C*^~oioHt;4@kh+YeRPd&AX+PPMxcz<#1#vx#Hupm>=s66+?i zqg}We`F!5$bEYabo9I#tVXUGXsg2@`C^--8273tkjv9mH80SXx32sLnf?ogv=qQ(W z*p1+W1)Qk^Sd6}Yhoc>fj5DB4oSTpjnj8%zSm>VUoP`sGm~5|iNz+abaV%XQCrVhv zi`DV*YlHrvA6y1|G3^+dK6|eP>Yt7r$Ih~2mW-tpqPTrX?y0aQm6i+1R!H}Pa$cDk zcBBe*Szq6A)?mM4-2|EKI5zq~4XAdIM+`5|aj?rK**hm=rRBVGGsLgbWq^*?x6n9; zVP`REbX70e67lX5Br?Cy*ZCkVuJg_8 zg8-O&al-LX17Iu`k@+#=y$3*5ENSC)47yFnk1ZNKj(cIH;0p~Q@ix`05-ysCV<3g_ znL;!b>(#&2d#fVwRWfs2R7}O1Pbn0ckgidP zJH&cTS@;L_!KuM_wZbiQ8jevY>PJONeNmzjsaV>{Hcqu)m_!4liDiZ1^2O5O5{{cT zR+65Ci@C^Nnt_~E5ODj_HfMBe5WzN|RIqk@rif3dJ3epNOdyE3^LOZ`1tI5hKSD4 z!8xZV)6UXD1|$G|CdQUQ%~Kg&gsu%rBP!L99-I1fk z5&ixDx#hinCyF6zM=Kr0W+tIx#~*F|V|oELYA8Q|-fK z`Aj8UZdP&E56N~dK_njJ%GJ=A(A|8U+6%xz4b*H9KEEbdtywM4Q7(r)S)YJQ&_1Ab z4zoX;Y&V`mEG;)f8V(1`zY*y!CyD^SLrjQ7ZwD^Yvv9uTfMnQWp@=OAGkh`Ts)T}C z*bH&zOWu1t{7Ig4G0bY^soWR>c9jBps-{_fC2+n+wtiv4~ZaWj*Q( z9vRW|pscMsX$IkFmq&e}ijh0npmakKuZ@&1R=Tm0B5W%5(6Dmis_;j;x-4SeZ(JZ8 zp~h7xc!X2_32=h2#jsNCNyT0YX$GI+OA9WSRG5<9*4>`st#RAdS-{?~Qb0>iHT;1J zY7W%Nsenx>d=5XV=#t)sq{vAmNENBRF)qw_B)?%!2f-2|j3HeeLu2An-CK`R1lZwZ zq^Q_CX=IINlMZR50#1Qcpe2CjS$1%qaRY-N$At-qD}vcq$0D-h&sZ9U3t=6;6pque z*W<9Goi>rqgxh;^927WnGY};aIuIoh`LpwkW22fXHtUM=QY1K zm-MIRb~`EVK_)Y5#DYT@YD5p_^xQxKT-Q;0kkR6h(Z@lIU5ZEp!#M(I4^f&OVTj7g zKpD7;Q^z@wju(Z1FUrq@3;0nsxQaPuTf3}@I{qfJ_N=;{`@n&Fy(v=FESEL=W{S7j z0G~Fx!&q0k;Y!zg;WUs7_>^`LWMC^YP8e1a#TOO2mo%?RleOfa**c2E^T~qjN`*pF zRdI8nENCuZ@n@#^`caZQu4QqcncxyOE?t84CdoXk4|y^r<~~Vt6b;|g`OPo8ICs$3 zDLjx}vOsGuoxIg-YE9H^s}P?gnX?&F!?obm5NLha4>ax#FeKy7OCDy^Y~mB9#+KGZ zvSJ70lrA;ifxy79R+#mSrbj?3ltCgeh)DNd0i$b$V8NuGy>-3FN2oEY29s;hiQJyO z$X7seRj8JSvtO-RH3q%FHE)18%^X z=O*GNk^luKI%FJ?$GQm)&QUCyBSdw`_<@AecR?<$Q}e-1Obb;)&I%$+WJbi$d=OcB zufPX`ddT-&2IW96u^MPx+fg@9lJ0<(77-nf!RRtfzD|0tUC=0 zxE=o1wBXs#`W#5@tSKC%iB*vzMj!d^SyaL9F~G2!GG1mfMcH%}!3go22}>NeISt4mjuf?qykp1n2I>I-eTaXGv*clBjV<^-<0g)Y7CjWu3{M zMT+1S1{Z^3a2rsDjAOj_xSFblJ{hOB+3U}M*2*an%9|0*2p=juy29+qu2K%t=!j&- z(eqhivf;`FaX;wgON5v^>YQFy+jRU^vF%WdQuqxQD+4Ss%?K4E4xr%6Flu-&dxn`@;ZaC~O*IFCQ53Qk)S<9r40pexdrR@NcLGL2U-M&khpA=VKIS8G zOVIn9TlrOTl#xh9i`HoHPd5ydqek;Se>yFBdFHD^-Bpnqj`d&HU44;%{_!5Ob5Lv^ zx!~Z)8?8ibpRIlNA$|#|fsN8JB**AjI zEBYh7_ov;`Nk>a|x4iAXI-{9+03J$9#m>nxeKgIm4Z5$)ADF^9Jv-5hr6UMv#baj1 zJ#AT)iZ9>liiX5#qvcmoD2A4_E=Jl-eCaBru%zY1#0?iuP1tqg^bLbY0#|CS`kXFs z#iMA=Fli(GpU-5y)Uh7nzAi;ifeE=cmdUJ6lv{N8oQ~Mu9!k-@_%N5xk#7}%RDQhk zu$!HhG{=qd&w)0hnAXQB!;b_Z{x8?j8gRTf^6}n;*b5iZEGVyL<$A z+u+6fa=MV4Hpe|&?r6&>-)jXS`E+NZYQCjOT0Vg5wC4*wP&F5(|J<@Ep;+8r zZeu06;12+wq6AJ`E}4oH#CFE1XqR>eO}TLx=$eB%wuL=hJP#Ao}Vhk|kgY&BY&hNH9jQibOfTf%%E>eQ&!RnujVYLkO)=qw&M&tc2x;PHz$7T8ar3;e3x z%DL2>cyB7|$zbJo0y(id-J)%@9(O!o{{p;3Bm^PEFnABV09y>`2%WB1jgBoA)`+7W)VS^$P}~#7L&mU>$hmO?++r zzF?xqp+X~YxumzUloKu!ANOGL$!M`i1AW&WVx>r5sXf&4c8x`Xr% zT$~6tu~vmsgh3FXyN6Fd7F*LsHo=U<0RNzQ@SxN^Tsb~Y_QJJ-ND>9S2R&d6PS3Jp zZKN3j{(6J{9|fA~5SmSff;5SEkOl>;7TP)RBQ<1=ZW8!_6{83VBXD)j0bAN{p%Y#; zs)4Thbovv9!%Fo^$U4W1gY`2v2tz# zZ8}tzC|3`UdKfsct|zz&hbkEFK5(ebE+Csk2owHdt{W!IQAiU^#iY~ZIF2w4^_7FH z4%~XWN!B2bX*RJW{O(E|*~(3@qXT?F z=G9kX(_ciz#v*BS#7%TiW+B1jsF=r&(9uPLYEU4_#Y~;rF~;|ZoA7T{LIy{G;$|W~ zI|k!sb}_hzKhAD5IIqO zht6;P{dqgW^;j2F-Dw38)s$O;C@qv4(myKam^YQ*ZX6!p>_Ab#XqPs0K{1Cdt5fw0 zQ3XT#6>?&$xLS zqA0FU+bK#R;P2qk4)nQO1qI++wwklXFNgt2Yra?K@@r|n?w;?&>xWrZEtdQ9&@NB*~t@89B*DQ_mr?ma{nPdKEv zLJX4NvK*)7b@RjnOF@PrCTW#CFY8Y6tTA)(fW&|RajrW{i*^JWvy9!T+9BFNX%7^@ zK?*`4;Dk>?2#C(x56@KzVPkN0S?y10-T3$>r>_Gkso+hzP$sSp?;!pxvwI`;; zzIK(Jr2pXZG-i6)>4~t6+%o;zz?UH9Xz_iVE|lRgBC8fEpf-bx;EjekRh^CLxu&HB zn}S=H=Gkzx1eFGK%TBG@u1*j=eo4r}=Z4MXj;Q6sX{M#jU(~ zL)=Q?2?Zm~-+(f>C49Ucw7k~r^7nqYT+PFM`ptKfmY=edC5Q))TJ4aTA1(dYmM|nE zOBhPwmN1m&dm!&G7k8{Bu6n--zTLsToSf%%nG1Asj$BqjF3-pGrzasC z*jHj&v&`q~Q|gOj5(=#SXPPcPeUnqBLyYej#>axPI6yf*KzY88^*YDyo0OvD>?3vN zAFCdPIx|hCwCHyExE#8*@LjJ_RzE(#j^U?QcbEYx;_ycC;VI$pTK(UZ+c8h?UNgsI zUfk%*j#u-{)Rr_hP8yT`c=y9PFPR8T27=ZVH5yk@2pX!w(U^=KU0KcB@1r2HmBvGD zG>biCM@{a8&)0)fMpn zmrnTvTHm{zsm~A>pq$Za?f*1OzDf{~5ne>s8zbisWyeE(goos6O;qrvgwJ5r>?D4pYu3@L}=ZhGefN+zx zb_UNz?||s?@KY2AV=Qj7pw@;*YKQ$6U2t|iHEA_%#JHJ)^3A{eOQyFH5EtHuJYTNI>5Ik_uY^Ff}1lP*dl6J*t zHteN_-2ltCDOBr7+Rq3dcAp3hehP0vGe}2xM38so)uXaakA!Bk{#QtIavLAE{~`>q8KdIh)) zw9DhGSY+y;)n5Uw57(RP&-LRnxxU=>T(4m63a%%26?Y|DuUCM{^(7KNdap!J9ehfxYttKr^N2 z-cq65{WwQ`LTl!@=!^4@0}%?G18t-a1#o;-6J?n7dBx;~8(1 z%~XFf%D*u;Cnveuw?PmMKYX)i=9ioOwR`e1d2S(qvoO(dDRY| zyQeEihqWhb`0l9a*gLG)nTb!l1s^q*IBFVXHd)C|lr4L*tB(?ETbky+Y2!Gd(xjy) z9&9(ABs+Ok5DqZs_+uNMpOtdZu@Bwx+p8?Xn{GXNA>wzg&EmzzwSDdgS zkLSVpc;O5eVpt#_RR8i5@byY4Z-62CE z&miofKu56dG^WB7u9+k>1FfuOh=EbU5|c4iX7v{dIUN-sEFnhL)uKwGe3It!Q4&B6 zsD*EQA>IXH8nlP~!*GZjmC4j#sOumvMxy5u&L>{#Xtnkz%8jAK)6`j%>#b~kN8-;l zDBO7=5cEn=qIFuc*s_j#DF#&TJZFfUk0tm83$9NqEv1X*D19TE8m-;|9Y8wQpX!f} z4pC2IDFJ7Q9b7*u*Vj#uI}=oV2DQhbWnTRJSlq4 zm>Z!qlq}N7VTY1g2;-TRqH0ve1ZR;62(UIf7+)zWq=o*+Rf_V)qH;{7yk@0nutue* zqLrIb|>G?fv>oux;Ak&X6rCx6nxA?)*lz6Ou4hMFq`#xJtqlQqxbxItKQb=m; zY1;yq0x?8D2)WoJG)nGVN7`ofwDTc=mvKc%M`y$Jo;E)&aNOnZmq|TF^A7>6Ozt=a z`AV=}t0U%2bt7;cC*QBfTDg-|1_CPS_t9D@%d3(;zN+*-q*m%sD-CkvD#I$J z;wnQbWjgL^d)#l1tAr#A^U#50su}v1>=H(2n!6jj zpLRd#ZtrgCuJ5knb>Hf~*Im+mJ1;f(WcNOLi>7;fu6g%SG=MG@b1lLJc}ey5VvDB* zr7KrKHv~qLAoafnP=32kFXc)wdkWLdJrb=v|IJ2ZGE46=8KTS% zH$;4n8aG-gSurRS$py^Aj#BY73QOyHD_jZBvK^@m1$BnJ21Dsj1f|Xx^%o?i&QSac zxO3GRa#X*~8fodtIy&6->iq7z(74{v14+m2Dcrf5zpJ46qr%``(0_NN*i-{OM|CvX zSvOjB;VQuD`cDM_U&Ol!!hW_hl_5n%lSoZh&(x)%lpuRuM=3OY3`bowTn)yA(hIzxR4(s{jriY}7eNQ3xb{4;d?5*5$~_H)gB*;A1% zkH$S>Bfhm>oEKkLjqZI+DS@$Quk{D=#D(}*gdSHjhfxM_9h}Tp2$~FX5uhBRp^`

d+ewxCtPQPF0_N;{QaW--SLvWKiuQ8-oe` za8n~3?!=JmJDQom5Q@9cK{VU4qGsnrNkN&t`JeW^N!H?_3 z0KSJF$gBTBTy@pl`3112&HFozacy!CJ)!*L4WP>@VCJ91GWD;7GSc$`{|&S6-!W7* z6G@7nl^RZayV%OOQv0=Z2|mJf!*4?3h1n5Jft%nb_6A3dT=sNF^AQj|M5&x9geAzE zuwY;Ytwum-HXv{NMXM zLK&r_UO*Pi%77VJ|H`mD(bm)|z&Bi9h-)LeRf0FRt*!mGx;YH}mwKzN8ewg_ekGf*CNsX9{XNU`&Dbbd!U`s#yD zdr&-Xj|9gunaqwsX2V@$7tnwnQQ=Sge>CYZ3`G6699~=x?1H=TA`Y7XKVQ9|VqyAk z+~3Q~P@0cLI3<)&6=f=$FY8YUABEbagi{cqPYft3xtmo~csBt3>lIa&ekgieijGUs zaSsLVLFt-sFWd(@f8qT9wb$z=_}@?gLK!ACTi||pC37wJ;=kg6)ZjDUUAVmO9-R8B zyr%_s;Q=^+E7-vQi!f{}*D9t*4@SVVv3^|7j_)>Ktx)w3H9Qkn!|34vzEOwDA)^ji z%zq!Zf{}fdP9`3KG?j0MKS#2DQK;PdR_{g3`#;1{C+{6lLq);lI3v1XJKCv?$o;GDYX|Exq^3y58r{M;SYxkvJ(q&I)?M4tqWW z=I7H#yl|X|8f}VF_BnT^Mw&VOx&NvxX6h4Fs~L8Wj@%TrE#`exFzg<2qa3t4(HEG# zeAT*LYdQ=)8eLo{f#)1ltyHLbMlI+4U*ojA^4DLTF=eT{j+ zwWk4RqPOQloq-vimyM*>5?XV}@%Ac#-|$Dq`>s$7n-@8zXtr&--&x|>KJSik#Mb2N z20P>r@8!Lm>@#dqK{5{T1Rvl@9AN7lR%Gc+{T+E$%#-Vjr3|u#BPL&)5M%0gqE6iP zkuQ7T);wAnsqb_wr*sUwG)CP^P5z->0sPUI^aVGP}}Z+IsP69?KbNo7o6!-wqp{h>cw4vC->ur_qs% zUYUWuEfe=R^v_P}`25j$?zWl3U@Z&B+GRf0uHaZ}={!hE+3%S)`wZ{H94(KTH+nW$ zbF7y{FK+kV(P{DWfHw~ByzXq{>m@Fe)vtx!E}%qej*(`pU7FDt!aliWT50~>nqlBw z#o=A!!@JI(rGq6wFT(CR&0pAi=jzJ}wWd!OpN6jQm3_FS;EnfraJ3G-{3JhO+vfXT zcei<7GHm|*b-hujkC3x^2+eBNmQ}ZS?evl6SLLE6ucdukR(f?!VxdTh^!`P9Ir?Xq&!jk(N_pW;R!ocn|R4>|{{mwvDds5o_ zHV2dC86$4A0tx#8X4;AqmcHz$6LWf|zK3!04VK;!rx#7^Ed{Gx<#Z1tnfBLNC3kVe zmGKc*jw9}|z|x3y>lFIx$IMH760!Z1%YJRRXt(FF2K6@6NFi4>JY{X5^lX0D`2&e> zq)Q&ucqKPrvvx}Qw0apkx!6A`NZFi+gxX&NyoUq4&j(nE1AOF`&h?#5Xws^k6JM#} z6Khi;pBQk??a=LBfqlnLkx%qXOC`P$s8L8goRkuApzX|e)ZD)pAY;(tTU+I# zDsdrklUHZeo{U?Pi=T}=a{_-g+Xsbi>sJwiebru3ifaPyitGr9Z+%GPtdh>1%iEN( z8HQWUt4ly0<3HQTumPc#m-)V{>2r9_lEw=wJ#UwlCuS>runIRR>UoRvj;hdkM6^NLQm%nmA zSk;g9c8@7uFD&Duiv1<2PC8};N+P2zU&p&Kl@ZCA!>~2 zQu9~$w8rkyDSamo-&Kt`ldBbH-S7FU9qJG;3rZYImslPtcH-SnEf^!ex)=>AcvP0ssAtRbbG%L-bZ%dEE%;E=rf**= zzP-J1e7XDcQ6o=8?L>;zq4IWOIVte^9**}S8C6O1k83oqUgJEO(902wxSsAj($^)z zE*vv%5si^zAEvl1Gq5k$?!NHIto+UNf&OPZJ};}hNDGMQ7>>N2;+T2H$INpaGY!YG zd#z8~_dWk$e$$~u_0evfSG6StFDnctz0}eQ2_ja`fF3@7Ym@8N*L>wh6!U@Iz9CTwl|B8 zUVgH3>-7z<6R&dw_uf6Stugl0q9`h!o%IoZf~7wvk&PQ%f8wvdp+NVK+%d)<^TxFQ zlsCpid1J$*i-Buw)xp7o5JYz23zy`F!d6dz#l;a$L}(n60oE5UMEX0VVhuauMz@GA4zi z@C*JpKLvh+1Hh}9Ni6}#)T;vUw-tZ_7!}pgMy(fDi)we2DgaS!g#`3g7ZkSqmYW&X z_Pryp`Fozzz8l~3ob>T`V*H%mq&)E7OKtj!f;9ifNj&`^>91pXcoNUVpCc!DjAJlG zX#qby$!;jX^F1+%pS6?sJ!@wqSR6^B4iBGnK}-m~MsXzM?Elvjb%q-v9=`(rqwE|E z{-;?v3;x5b984JHw1}`-Pl)KTR~U%$f!z5C6&yUF;!g|06s88#ZNfqs$0G}+7s@e2 z-KdUHL%Adv@qd)dfeC=z|5+LYfhy@H3C~|~|`sTd|dlA-$ z-hkL|*l!p#wSQ`mLBD}nKdoOcNT*+ivF>`XNQwHTuGD+ zH1I#;f?ZE<=@yCNtOTG<#UVz{Qu(##ITK+2BwJ%5N)(x&R;af#-P&v zMm452m_D`q$Lj9?#6KEb$NkkBE;bhSqbK}Zx&LMz_g}5yV&i~6dIo{Z_~r8ke@7Yo z*XbhIco6ri z=L+~4W+UhHd*2r_M=XFxwswh41j1KcPwP_tvn5Py5}vU6UtPi+shY+nV^d%ap8ZBg z*>5AvFaBNL4W0srXR8mJvW3wA5m^)7BVX{vQ+P1KJvyd`>4PxQdMEhp=7ah?5=1oD)fIj?d-}UqWeQ0ohS)j-kW`TFW5}O6& z@&O>^W~~t~bhZO>H&MJJH%}?<1R&!Ttg$(GVS~+;hMwm|-x<+0+oCt6P4}5_hUMFw(>{Kz zZJ_2895*Ylm3YIp*gU+j#~kqC&&R*)@ZK!|SF0SSEQMcL7?Fw86m{+6^A$yc3y(SL zmt4r}PO1<)I*Vyn1P}EQ~W3Z^TN=o0=gYP zT94t6=wUvCP5WWK04~N)3|;oa{Lo8`0eV*t?8jgoej1>wSTs_Mn}D_1;@p=nK zDgZKj@Gjv55s9(!;YDLH_{|6KuP89{CBJ(^y&dZNAbiLIxWM>cWvEj_orSwN=*FQc zc(bus97LpX2yZqX|2haXiy*3l;~M?*?-K&1^^gF7P!#~CEOY_w^E*!j6tsu_aRyDm z+dGUM!Hc6<(U>LGOA- z#Z~MYUYx`5_kCiS*d@F;k6plvi&zG5k$u9M(~%@N+h3qd;9YyHjnoBr`+f252%j}^ zcMg$1ExMFh{?da!2$ASMR6o`_H{Y6^yUro_+f3hyVj;fM6b07Yq{>}9Zikr!oV`C- z9sH<&&3GkmT*|j8-3A631*N{%_gThu3QS2`s6JJgZTv1Jso)Ro=k2$%GE!LdX8FDD z<+1OhyhiyE?}_&eM7qz_3EYz19z_3MA0I{*b{Q|OVA(^xODIXvAl-SME0pN6GI3!- zY-4`2`=^{SjTgcSns}mFB|q*Ss=x2;4&hd%sjo3$d^!KCb?2^#?juWXf4eS3e(_}1 zSaQoCu-`^=v{pd5;s-Ps=(>nhX4?Kz7wW6N&qkViB*U_)|qT(i2fEPC~^omdlmWSmJ z^)-H>!nV6L7o?u}tw|`T5cX6liIzt{e7VjvBg5iWQ0+zfU}HaE#~eGd^Dr$ZD&S4@ zY>ao08|e`#GjK0^{^q%Ii!l=?^C93r>Stdr^4(jRdsy1bJto4kIm~RbxzWVO&n(#w zv-K2%D=B}hAHC(n=#xOHa1_x|9+D8!2J z;ucmsG@5fO7w=@q-P+00ey?(RPR1$&*2Z#m(WR|#L!?sLKh%Dsf4_UE{)vuPUGi18 z&sPYo%ek_*@69@U?y4Mt{PWwy8LCGD62*T&8>*j8P5aW>qODQjU(nTY_Oozo!`+Gc z@_u)MLWJ5C=WPi5WBajMSHA6@Xmw|ORe8X%=~Gl1*0zY2#O@4wn{UgT@l1;TqkViB zC0Hq5Jis2}#a*lnFDkJryeP*i@ZuhJf2b!{pKUv)Gyl4^Ud)z=2@^@mqYiJow1++; zEOMgs{SV$uF&5x&Kjz`!Sg}l>C3nxY3sm={6Q5+>SafB}>Ujq%oJK!5XVA5GSHU0Z zAG#6qU`5i{lbrK8oPCAw!_MzmoA;Vl>g#lKdehkUCgLCK$6r1;%RtxsRh796e0Hzr zvtv|Xz;pTr7r9oq^@^%b5(k?{G>i|U8mqyJTC8qpG>5~a?grCqW4w2_vfY)0*7?8m z42nJeeDp{43%a|Hj#B+2v|Y;gJ3Z6WjAqcv-a0lZt1mlSByE!;eK1LSdz7hPOeTF0 z+>g;`AD&UM*+0O@#MpZIc(oi)vkeEyRUae9?z%E+lXw~f{IUJWmF2OP&mWJg-7|k? z?ztMh{rj879$qXqL#x{CX3wXaT0h#yhf$9;0GFD?Kw%T-) -> Result { + let data = std::fs::read_to_string(path.as_ref())?; + Ok(serde_json::from_str::(&data)?.try_into()?) + } +} + +impl TryFrom for GlobalConfig { + type Error = anyhow::Error; + + fn try_from(value: GlobalConfigJson) -> Result { + Ok(Self { + block_id: value.validator.zero_state.try_into()?, + }) + } +} + +#[tokio::test] +async fn storage_init() { + tracing_subscriber::fmt::try_init().ok(); + tracing::info!("connect_new_node_to_bootstrap"); + + let root_path = Path::new("tmp"); + let db_options = DbOptions { + rocksdb_lru_capacity: ByteSize::kb(1024), + cells_cache_size: ByteSize::kb(1024), + }; + let db = Db::open(root_path.join("db_storage"), db_options).unwrap(); + + let storage = Storage::new( + db, + root_path.join("file_storage"), + db_options.cells_cache_size.as_u64(), + ) + .unwrap(); + assert!(storage.node_state().load_init_mc_block_id().is_err()); + + // Read zerostate + let zero_state = ShardStateCombined::from_file("tests/everscale_zerostate.boc").unwrap(); + + // Read global config + let global_config = GlobalConfig::from_file("tests/global-config.json").unwrap(); + + // Write zerostate to db + let (handle, _) = storage + .block_handle_storage() + .create_or_load_handle( + &global_config.block_id, + BlockMetaData::zero_state(zero_state.gen_utime().unwrap()), + ) + .unwrap(); + + let state = ShardStateStuff::new( + global_config.block_id, + zero_state.cell.clone(), + storage.shard_state_storage().min_ref_mc_state(), + ) + .unwrap(); + + storage + .shard_state_storage() + .store_state(&handle, &state) + .await + .unwrap(); + + let min_ref_mc_state = storage.shard_state_storage().min_ref_mc_state(); + assert_eq!(min_ref_mc_state.seqno(), zero_state.min_ref_mc_seqno()); + + // Write persistent state + let persistent_state_keeper = storage.runtime_storage().persistent_state_keeper(); + assert!(persistent_state_keeper.current().is_none()); + + storage + .persistent_state_storage() + .prepare_persistent_states_dir(&state.block_id()) + .unwrap(); + + storage + .persistent_state_storage() + .save_state( + &state.block_id(), + &state.block_id(), + zero_state.cell.repr_hash(), + ) + .await + .unwrap(); + + tokio::time::sleep(Duration::from_secs(10)).await; + + //println!("{:?}", zero_state.state); + //println!("{:?}", global_config); + + //std::fs::remove_dir_all(root_path).unwrap() +} From 1b25703cdc3d4c1c0129b093ff80a7d3cce4225f Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Fri, 15 Mar 2024 16:50:32 +0100 Subject: [PATCH 17/19] refactor(storage): fix writing cells to file; finish tests --- .gitignore | 3 +- Cargo.lock | 26 ++- collator/Cargo.toml | 2 +- storage/Cargo.toml | 1 + storage/src/db/file_db/mapped_file.rs | 10 +- storage/src/db/file_db/mod.rs | 54 +++--- storage/src/db/kv_db/mod.rs | 1 - .../persistent_state}/cell_writer.rs | 114 +++++------ storage/src/store/persistent_state/mod.rs | 78 ++++---- .../store/runtime/persistent_state_keeper.rs | 2 + storage/src/store/shard_state/cell_storage.rs | 2 +- storage/src/store/shard_state/mod.rs | 6 +- .../store/shard_state/replace_transaction.rs | 25 +-- storage/src/utils/mod.rs | 2 - storage/src/utils/stored_value.rs | 1 - storage/tests/mod.rs | 179 +++++++++--------- util/src/lib.rs | 1 - 17 files changed, 256 insertions(+), 251 deletions(-) rename storage/src/{utils => store/persistent_state}/cell_writer.rs (83%) diff --git a/.gitignore b/.gitignore index 628f21143..ae9a3bdcb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,4 @@ target/ perf.data* .scratch -.DS_Store -storage/tmp/ \ No newline at end of file +.DS_Store \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 61c30b67a..c62426cf6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -196,6 +196,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + [[package]] name = "base64ct" version = "1.6.0" @@ -691,6 +697,15 @@ dependencies = [ "syn 2.0.53", ] +[[package]] +name = "exponential-backoff" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f78d87d930eee4b5686a2ab032de499c72bd1e954b84262bb03492a0f932cd" +dependencies = [ + "rand", +] + [[package]] name = "fastrand" version = "2.0.2" @@ -1133,7 +1148,7 @@ version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" dependencies = [ - "base64", + "base64 0.21.7", "serde", ] @@ -1450,7 +1465,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.2", ] [[package]] @@ -1949,9 +1964,9 @@ dependencies = [ [[package]] name = "tl-proto-proc" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3f51de4865e0618b187c2b790c137de938d01fe5510116b959387b6133c20ac" +checksum = "5a3eaf6822a3ce34a40564dd3078a915d35c3c5fd1f6b3d81eab991e6d00a0fb" dependencies = [ "proc-macro2", "quote", @@ -2184,7 +2199,7 @@ dependencies = [ "anyhow", "arc-swap", "argh", - "base64", + "base64 0.21.7", "bytes", "castaway", "dashmap", @@ -2257,6 +2272,7 @@ dependencies = [ "sha2", "smallvec", "sysinfo", + "tempfile", "thiserror", "tokio", "tracing", diff --git a/collator/Cargo.toml b/collator/Cargo.toml index 70c25a78c..152d4de52 100644 --- a/collator/Cargo.toml +++ b/collator/Cargo.toml @@ -10,7 +10,7 @@ description = "A collator node." # local deps tycho-core = { path = "../core", version = "=0.0.1" } tycho-consensus = { path = "../consensus", version = "=0.0.1" } -tycho-storage = { path = "../storage", version = "=0.1.0" } +tycho-storage = { path = "../storage", version = "=0.0.1" } tycho-util = { path = "../util", version = "=0.0.1" } [lints] diff --git a/storage/Cargo.toml b/storage/Cargo.toml index b8760f1e6..d865f5b6f 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -42,6 +42,7 @@ serde_json = "1.0.114" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-test = "0.2" +tempfile = "3.10" [lints] workspace = true diff --git a/storage/src/db/file_db/mapped_file.rs b/storage/src/db/file_db/mapped_file.rs index e671b5bb3..a6c535245 100644 --- a/storage/src/db/file_db/mapped_file.rs +++ b/storage/src/db/file_db/mapped_file.rs @@ -1,3 +1,4 @@ +use std::fs; use std::path::Path; use anyhow::Result; @@ -17,7 +18,14 @@ impl MappedFile { where P: AsRef, { - let file_db = FileDb::open(path)?; + let file_db = FileDb::new( + path, + fs::OpenOptions::new() + .write(true) + .read(true) + .truncate(true) + .create(true), + )?; file_db.file.set_len(length as u64)?; Self::from_existing_file(file_db) diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index c4f73b1f0..4be7821b5 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -2,67 +2,57 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; -use anyhow::{Context, Result}; -use everscale_types::models::*; - pub use mapped_file::MappedFile; mod mapped_file; pub struct FileDb { file: File, - path: PathBuf, + _path: PathBuf, } impl FileDb { - pub fn open

(path: P) -> Result + pub fn new

(path: P, options: &mut std::fs::OpenOptions) -> std::io::Result where P: AsRef, { - let file = std::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .read(true) - .open(&path) - .context(format!("Failed to create file {:?}", path.as_ref()))?; + let file = options.open(&path)?; Ok(Self { file, - path: PathBuf::from(path.as_ref()), + _path: PathBuf::from(path.as_ref()), }) } - pub fn write(&mut self, buf: &[u8]) -> Result<()> { - self.file.write(buf)?; - Ok(()) - } - - pub fn write_all(&mut self, buf: &[u8]) -> Result<()> { - self.file.write_all(buf)?; - Ok(()) - } - - pub fn flush(&mut self) -> Result<()> { - self.file.flush()?; - Ok(()) + pub fn file(&self) -> &File { + &self.file } +} - pub fn seek(&mut self, pos: SeekFrom) -> Result<()> { - self.file.seek(pos)?; - Ok(()) +impl Write for FileDb { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.file.write(buf) } - pub fn file(&self) -> &File { - &self.file + #[inline] + fn flush(&mut self) -> std::io::Result<()> { + self.file.flush() } +} - pub fn read(&mut self, buf: &mut [u8]) -> Result { +impl Read for FileDb { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let bytes = self.file.read(buf)?; Ok(bytes) } } +impl Seek for FileDb { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + self.file.seek(pos) + } +} + impl Into for FileDb { fn into(self) -> File { self.file diff --git a/storage/src/db/kv_db/mod.rs b/storage/src/db/kv_db/mod.rs index 6e2faf8ab..aeed753fd 100644 --- a/storage/src/db/kv_db/mod.rs +++ b/storage/src/db/kv_db/mod.rs @@ -4,7 +4,6 @@ use std::thread::available_parallelism; use anyhow::{Context, Result}; use bytesize::ByteSize; -use serde::{Deserialize, Serialize}; use weedb::{Caches, WeeDb}; pub use weedb::Stats as RocksdbStats; diff --git a/storage/src/utils/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs similarity index 83% rename from storage/src/utils/cell_writer.rs rename to storage/src/store/persistent_state/cell_writer.rs index f51468419..bb997e4db 100644 --- a/storage/src/utils/cell_writer.rs +++ b/storage/src/store/persistent_state/cell_writer.rs @@ -8,6 +8,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use anyhow::{Context, Result}; +use everscale_types::cell::CellDescriptor; use smallvec::SmallVec; use tycho_util::FastHashMap; @@ -29,8 +30,13 @@ impl<'a> CellWriter<'a> { #[allow(unused)] pub fn write(&self, root_hash: &[u8; 32], is_cancelled: Option>) -> Result<()> { // Open target file in advance to get the error immediately (if any) - let file_path = self.base_path.join(hex::encode(root_hash)); - let file_db = FileDb::open(file_path)?; + let file_db = FileDb::new( + self.base_path, + fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true), + )?; // Load cells from db in reverse order into the temp file tracing::info!("started loading cells"); @@ -99,13 +105,13 @@ impl<'a> CellWriter<'a> { .file .read_exact(&mut cell_buffer[..cell_size as usize])?; - let d1 = cell_buffer[0]; - let d2 = cell_buffer[1]; - let ref_count = (d1 & 7) as usize; - let data_size = ((d2 >> 1) + (d2 & 1 != 0) as u8) as usize; + let descriptor = CellDescriptor { + d1: cell_buffer[0], + d2: cell_buffer[1], + }; - let ref_offset = 2 + data_size; - for r in 0..ref_count { + let ref_offset = 2 + descriptor.byte_len() as usize; + for r in 0..descriptor.reference_count() as usize { let ref_offset = ref_offset + r * REF_SIZE; let slice = &mut cell_buffer[ref_offset..ref_offset + REF_SIZE]; @@ -121,11 +127,10 @@ impl<'a> CellWriter<'a> { Ok(()) } - pub fn remove(&self, root_hash: &[u8; 32]) -> Result<()> { - let file_path = self.base_path.join(hex::encode(root_hash)); - fs::remove_file(&file_path).context(format!( + pub fn remove(&self) -> Result<()> { + fs::remove_file(&self.base_path).context(format!( "Failed to remove persistent state file {:?}", - file_path + self.base_path )) } } @@ -150,18 +155,21 @@ fn write_rev_cells>( struct LoadedCell { hash: [u8; 32], - d1: u8, - d2: u8, + descriptor: CellDescriptor, data: SmallVec<[u8; 128]>, indices: SmallVec<[u32; 4]>, } - let file_path = base_path - .as_ref() - .join(hex::encode(root_hash)) - .with_extension("temp"); + let file_path = base_path.as_ref().with_extension("temp"); - let file_db = FileDb::open(&file_path)?; + let file_db = FileDb::new( + &file_path, + fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true), + )?; let remove_on_drop = RemoveOnDrop(file_path); let raw = db.raw().as_ref(); @@ -197,12 +205,17 @@ fn write_rev_cells>( .get_pinned_cf_opt(&cf, hash, read_options)? .ok_or(CellWriterError::CellNotFound)?; - let value = value.as_ref(); + let value = match crate::refcount::strip_refcount(value.as_ref()) { + Some(bytes) => bytes, + None => { + return Err(CellWriterError::CellNotFound.into()); + } + }; if value.is_empty() { return Err(CellWriterError::InvalidCell.into()); } - let (d1, d2, data) = deserialize_cell(&value[1..], &mut references_buffer) + let (descriptor, data) = deserialize_cell(value, &mut references_buffer) .ok_or(CellWriterError::InvalidCell)?; let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); @@ -242,8 +255,7 @@ fn write_rev_cells>( index, StackItem::Loaded(LoadedCell { hash, - d1, - d2, + descriptor, data: SmallVec::from_slice(data), indices: reference_indices, }), @@ -283,7 +295,7 @@ fn write_rev_cells>( cell_sizes.push(cell_size as u8); total_size += cell_size as u64; - temp_file_buffer.write_all(&[loaded.d1, loaded.d2])?; + temp_file_buffer.write_all(&[loaded.descriptor.d1, loaded.descriptor.d2])?; temp_file_buffer.write_all(&loaded.data)?; for index in loaded.indices { let index = remap.get(&index).with_context(|| { @@ -309,56 +321,30 @@ fn write_rev_cells>( fn deserialize_cell<'a>( value: &'a [u8], references_buffer: &mut SmallVec<[[u8; 32]; 4]>, -) -> Option<(u8, u8, &'a [u8])> { +) -> Option<(CellDescriptor, &'a [u8])> { let mut index = Index { value_len: value.len(), offset: 0, }; - index.require(3)?; - let cell_type = value[*index]; - index.advance(1); - let bit_length = u16::from_le_bytes((&value[*index..*index + 2]).try_into().unwrap()); - index.advance(2); + index.require(4)?; + let mut descriptor = CellDescriptor::new([value[*index], value[*index + 1]]); + descriptor.d1 &= !CellDescriptor::STORE_HASHES_MASK; - let d2 = (((bit_length >> 2) as u8) & !0b1) | ((bit_length % 8 != 0) as u8); + index.advance(2); + let bit_length = u16::from_le_bytes([value[*index], value[*index + 1]]); + index.advance(2); - // TODO: Replace with `(big_length + 7) / 8` - let data_len = ((d2 >> 1) + u8::from(d2 & 1 != 0)) as usize; + let data_len = descriptor.byte_len() as usize; index.require(data_len)?; let data = &value[*index..*index + data_len]; + index.advance(data_len); - // NOTE: additional byte is required here due to internal structure - index.advance(((bit_length + 8) / 8) as usize); - - index.require(1)?; - let level_mask = value[*index]; - // skip store_hashes - index.advance(2); - - index.require(2)?; - let has_hashes = value[*index]; - index.advance(1); - if has_hashes != 0 { - let count = value[*index]; - index.advance(1 + (count * 32) as usize); - } - - index.require(2)?; - let has_depths = value[*index]; - index.advance(1); - if has_depths != 0 { - let count = value[*index]; - index.advance(1 + (count * 2) as usize); - } - - index.require(1)?; - let reference_count = value[*index]; - index.advance(1); + assert_eq!((bit_length as usize + 7) / 8, data_len); - let d1 = reference_count | (((cell_type != 0x01) as u8) << 3) | (level_mask << 5); + index.advance((32 + 2) * descriptor.hash_count() as usize); - for _ in 0..reference_count { + for _ in 0..descriptor.reference_count() { index.require(32)?; let mut hash = [0; 32]; hash.copy_from_slice(&value[*index..*index + 32]); @@ -366,7 +352,7 @@ fn deserialize_cell<'a>( index.advance(32); } - Some((d1, d2, data)) + Some((descriptor, data)) } #[cfg(not(target_os = "macos"))] @@ -411,7 +397,7 @@ struct Index { impl Index { #[inline(always)] fn require(&self, len: usize) -> Option<()> { - if self.offset + len < self.value_len { + if self.offset + len <= self.value_len { Some(()) } else { None diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs index 9c0e74477..e938bc70b 100644 --- a/storage/src/store/persistent_state/mod.rs +++ b/storage/src/store/persistent_state/mod.rs @@ -1,20 +1,21 @@ use std::fs; -use std::io::SeekFrom; -use std::path::{Path, PathBuf}; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use anyhow::Result; -use bytes::BytesMut; +use bytes::{Bytes, BytesMut}; use everscale_types::cell::HashBytes; use everscale_types::models::BlockId; use tokio::time::Instant; use crate::db::Db; use crate::store::BlockHandleStorage; -use crate::utils::CellWriter; use crate::FileDb; +mod cell_writer; + const KEY_BLOCK_UTIME_STEP: u32 = 86400; pub struct PersistentStateStorage { @@ -55,7 +56,7 @@ impl PersistentStateStorage { let base_path = self.get_state_file_path(&mc_block_id, &block_id); tokio::task::spawn_blocking(move || { - let cell_writer = CellWriter::new(&db, &base_path); + let cell_writer = cell_writer::CellWriter::new(&db, &base_path); match cell_writer.write(&root_hash.0, is_cancelled) { Ok(()) => { tracing::info!( @@ -69,7 +70,7 @@ impl PersistentStateStorage { "writing persistent state failed: {e:?}" ); - if let Err(e) = cell_writer.remove(&root_hash.0) { + if let Err(e) = cell_writer.remove() { tracing::error!(%block_id, "{e}") } } @@ -85,39 +86,49 @@ impl PersistentStateStorage { block_id: &BlockId, offset: u64, size: u64, - ) -> Option> { - // TODO: cache file handles - let mut file_db = FileDb::open(self.get_state_file_path(mc_block_id, block_id)).ok()?; + ) -> Option { + let path = self.get_state_file_path(mc_block_id, block_id); - if let Err(e) = file_db.seek(SeekFrom::Start(offset)) { - tracing::error!("failed to seek state file offset: {e:?}"); - return None; - } + tokio::task::spawn_blocking(move || { + // TODO: cache file handles + let mut file_db = FileDb::new(path, fs::OpenOptions::new().read(true)).ok()?; - // SAFETY: size must be checked - let mut result = BytesMut::with_capacity(size as usize); - let now = Instant::now(); - loop { - match file_db.read(&mut result) { - Ok(bytes_read) => { - tracing::debug!(bytes_read, "reading state file"); - if bytes_read == 0 || bytes_read == size as usize { - break; + if let Err(e) = file_db.seek(SeekFrom::Start(offset)) { + tracing::error!("failed to seek state file offset: {e:?}"); + return None; + } + + let mut buf_reader = BufReader::new(file_db.file()); + + let mut result = BytesMut::zeroed(size as usize); + let mut result_cursor = 0; + + let now = Instant::now(); + loop { + match buf_reader.read(&mut result[result_cursor..]) { + Ok(bytes_read) => { + tracing::info!("Reading state file. Bytes read: {}", bytes_read); + if bytes_read == 0 || bytes_read == size as usize { + break; + } + result_cursor += bytes_read; + } + Err(e) => { + tracing::error!("Failed to read state file. Err: {e:?}"); + return None; } - } - Err(e) => { - tracing::error!("failed to read state file. Err: {e:?}"); - return None; } } - } - tracing::info!( - "Finished reading buffer after: {} ms", - now.elapsed().as_millis() - ); + tracing::info!( + "Finished reading buffer after: {} ms", + now.elapsed().as_millis() + ); - // TODO: use `Bytes` - Some(result.to_vec()) + Some(result.freeze()) + }) + .await + .ok() + .flatten() } pub fn state_exists(&self, mc_block_id: &BlockId, block_id: &BlockId) -> bool { @@ -139,6 +150,7 @@ impl PersistentStateStorage { self.storage_path .clone() .join(mc_block_id.seqno.to_string()) + .join(block_id.root_hash.to_string()) } pub fn cancel(&self) { diff --git a/storage/src/store/runtime/persistent_state_keeper.rs b/storage/src/store/runtime/persistent_state_keeper.rs index 49f012b52..453c8b0ac 100644 --- a/storage/src/store/runtime/persistent_state_keeper.rs +++ b/storage/src/store/runtime/persistent_state_keeper.rs @@ -30,6 +30,8 @@ impl PersistentStateKeeper { } pub fn update(&self, block_handle: &Arc) -> Result<()> { + println!("UPDATE"); + if !self.initialized.load(Ordering::Acquire) { let prev_persistent_key_block = self .block_handle_storage diff --git a/storage/src/store/shard_state/cell_storage.rs b/storage/src/store/shard_state/cell_storage.rs index 99b211685..6d56d4bec 100644 --- a/storage/src/store/shard_state/cell_storage.rs +++ b/storage/src/store/shard_state/cell_storage.rs @@ -427,6 +427,7 @@ impl StorageCell { target.extend_from_slice(&[descriptor.d1, descriptor.d2]); target.extend_from_slice(&cell.bit_len().to_le_bytes()); target.extend_from_slice(cell.data()); + assert_eq!(cell.data().len(), descriptor.byte_len() as usize); for i in 0..descriptor.hash_count() { target.extend_from_slice(cell.hash(i).as_array()); @@ -616,7 +617,6 @@ impl CellImpl for StorageCell { impl Drop for StorageCell { fn drop(&mut self) { - println!("DROPPING"); self.cell_storage.drop_cell(DynCell::repr_hash(self)); for i in 0..4 { let state = self.reference_states[i].load(Ordering::Acquire); diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index 5603b4443..8533eb200 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -1,4 +1,3 @@ -use std::fs::File; use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -7,6 +6,8 @@ use std::time::Instant; use anyhow::{Context, Result}; use everscale_types::models::*; use everscale_types::prelude::{Cell, HashBytes}; +use tycho_block_util::block::*; +use tycho_block_util::state::*; use self::cell_storage::*; use self::replace_transaction::ShardStateReplaceTransaction; @@ -15,9 +16,6 @@ use crate::db::*; use crate::utils::*; use crate::{models::BlockHandle, BlockHandleStorage, BlockStorage}; -use tycho_block_util::block::*; -use tycho_block_util::state::*; - mod cell_storage; mod entries_buffer; mod replace_transaction; diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index ac18ad308..35d75ddf9 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -1,4 +1,3 @@ -use std::fs::File; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -14,7 +13,6 @@ use crate::db::*; use crate::utils::*; use tycho_block_util::state::*; -use tycho_block_util::*; use tycho_util::progress_bar::*; use tycho_util::FastHashMap; @@ -61,8 +59,6 @@ impl<'a> ShardStateReplaceTransaction<'a> { packet: Vec, progress_bar: &mut ProgressBar, ) -> Result { - use std::io::Write; - let cells_file = self.file_ctx.cells_file()?; self.reader.set_next_packet(packet); @@ -120,7 +116,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { Ok(true) } - pub async fn finalize( + pub fn finalize( mut self, block_id: BlockId, progress_bar: &mut ProgressBar, @@ -214,7 +210,6 @@ impl<'a> ShardStateReplaceTransaction<'a> { } progress_bar.set_progress((total_size - file_pos) as u64); - tokio::task::yield_now().await; } if batch_len > 0 { @@ -253,7 +248,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { &self, ctx: &mut FinalizationContext<'_>, cell_index: u32, - mut cell: RawCell<'_>, + cell: RawCell<'_>, ) -> Result<()> { use sha2::{Digest, Sha256}; @@ -310,6 +305,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { }; let mut max_depths = [0u16; 4]; + let mut temp_descriptor = cell.descriptor; for i in 0..hash_count { let mut hasher = Sha256::new(); @@ -319,9 +315,9 @@ impl<'a> ShardStateReplaceTransaction<'a> { LevelMask::from_level(i) }; - cell.descriptor.d1 &= !(CellDescriptor::LEVEL_MASK | CellDescriptor::STORE_HASHES_MASK); - cell.descriptor.d1 |= u8::from(level_mask) << 5; - hasher.update([cell.descriptor.d1, cell.descriptor.d2]); + temp_descriptor.d1 &= !(CellDescriptor::LEVEL_MASK | CellDescriptor::STORE_HASHES_MASK); + temp_descriptor.d1 |= u8::from(level_mask) << 5; + hasher.update([temp_descriptor.d1, temp_descriptor.d2]); if i == 0 { hasher.update(cell.data); @@ -503,7 +499,14 @@ impl FilesContext { let cells_path = root_path.as_ref().join(format!("state_cells_{block_id}")); let hashes_path = root_path.as_ref().join(format!("state_hashes_{block_id}")); - let cells_file = Some(FileDb::open(&cells_path)?); + let cells_file = Some(FileDb::new( + &cells_path, + std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .read(true), + )?); Ok(Self { cells_file, diff --git a/storage/src/utils/mod.rs b/storage/src/utils/mod.rs index 359217fe1..d28b57e54 100644 --- a/storage/src/utils/mod.rs +++ b/storage/src/utils/mod.rs @@ -1,5 +1,3 @@ -pub use self::cell_writer::*; pub use self::stored_value::*; -mod cell_writer; mod stored_value; diff --git a/storage/src/utils/stored_value.rs b/storage/src/utils/stored_value.rs index 50186c97d..813edb106 100644 --- a/storage/src/utils/stored_value.rs +++ b/storage/src/utils/stored_value.rs @@ -4,7 +4,6 @@ use smallvec::SmallVec; use anyhow::Result; use everscale_types::cell::HashBytes; use everscale_types::models::{BlockId, BlockIdShort, ShardIdent}; -use tokio::io::AsyncReadExt; use tycho_util::byte_reader::ByteOrderRead; /// A trait for writing or reading data from a stack-allocated buffer diff --git a/storage/tests/mod.rs b/storage/tests/mod.rs index 793f2c307..46294c55a 100644 --- a/storage/tests/mod.rs +++ b/storage/tests/mod.rs @@ -1,16 +1,12 @@ -use std::path::Path; -use std::str::FromStr; -use std::time::Duration; - use anyhow::{anyhow, Result}; use base64::prelude::BASE64_STANDARD; use base64::Engine; use bytesize::ByteSize; use everscale_types::boc::Boc; -use everscale_types::cell::{Cell, HashBytes}; +use everscale_types::cell::{Cell, DynCell, HashBytes}; use everscale_types::models::{BlockId, ShardIdent, ShardState}; use serde::{Deserialize, Deserializer}; -use tycho_block_util::state::{MinRefMcStateTracker, ShardStateStuff}; +use tycho_block_util::state::ShardStateStuff; use tycho_storage::{BlockMetaData, Db, DbOptions, Storage}; #[derive(Clone)] @@ -22,30 +18,11 @@ struct ShardStateCombined { impl ShardStateCombined { fn from_file(path: impl AsRef) -> Result { let bytes = std::fs::read(path.as_ref())?; - let cell = Boc::decode(bytes)?; + let cell = Boc::decode(&bytes)?; let state = cell.parse()?; Ok(Self { cell, state }) } - fn short_id(&self) -> ShardShortId { - match &self.state { - ShardState::Unsplit(s) => ShardShortId::Unsplit { - seqno: s.seqno, - shard_ident: s.shard_ident, - }, - ShardState::Split(s) => { - let left = s.left.load().unwrap(); - let right = s.right.load().unwrap(); - ShardShortId::Split { - left_seqno: left.seqno, - left_shard_ident: left.shard_ident, - right_seqno: right.seqno, - right_shard_ident: right.shard_ident, - } - } - } - } - fn gen_utime(&self) -> Option { match &self.state { ShardState::Unsplit(s) => Some(s.gen_utime), @@ -56,39 +33,7 @@ impl ShardStateCombined { fn min_ref_mc_seqno(&self) -> Option { match &self.state { ShardState::Unsplit(s) => Some(s.min_ref_mc_seqno), - ShardState::Split(s) => None, - } - } -} - -#[derive(Debug)] -enum ShardShortId { - Unsplit { - seqno: u32, - shard_ident: ShardIdent, - }, - Split { - left_seqno: u32, - left_shard_ident: ShardIdent, - right_seqno: u32, - right_shard_ident: ShardIdent, - }, -} - -impl ShardShortId { - pub fn shard_ident(&self) -> ShardIdent { - match self { - ShardShortId::Unsplit { shard_ident, .. } => *shard_ident, - ShardShortId::Split { - left_shard_ident, .. - } => *left_shard_ident, - } - } - - pub fn seqno(&self) -> u32 { - match self { - ShardShortId::Unsplit { seqno, .. } => *seqno, - ShardShortId::Split { left_seqno, .. } => *left_seqno, + ShardState::Split(_) => None, } } } @@ -191,80 +136,130 @@ impl TryFrom for GlobalConfig { } } +fn compare_cells(orig_cell: &DynCell, stored_cell: &DynCell) { + assert_eq!(orig_cell.repr_hash(), stored_cell.repr_hash()); + + let l = orig_cell.descriptor(); + let r = stored_cell.descriptor(); + + assert_eq!(l.d1, r.d1); + assert_eq!(l.d2, r.d2); + assert_eq!(orig_cell.data(), stored_cell.data()); + + for (orig_cell, stored_cell) in std::iter::zip(orig_cell.references(), stored_cell.references()) + { + compare_cells(orig_cell, stored_cell); + } +} + #[tokio::test] -async fn storage_init() { +async fn persistent_storage_everscale() -> Result<()> { tracing_subscriber::fmt::try_init().ok(); - tracing::info!("connect_new_node_to_bootstrap"); - let root_path = Path::new("tmp"); + let tmp_dir = tempfile::tempdir()?; + let root_path = tmp_dir.path(); + + // Init rocksdb let db_options = DbOptions { rocksdb_lru_capacity: ByteSize::kb(1024), cells_cache_size: ByteSize::kb(1024), }; - let db = Db::open(root_path.join("db_storage"), db_options).unwrap(); + let db = Db::open(root_path.join("db_storage"), db_options)?; + // Init storage let storage = Storage::new( db, root_path.join("file_storage"), db_options.cells_cache_size.as_u64(), - ) - .unwrap(); + )?; assert!(storage.node_state().load_init_mc_block_id().is_err()); // Read zerostate - let zero_state = ShardStateCombined::from_file("tests/everscale_zerostate.boc").unwrap(); + let zero_state_raw = ShardStateCombined::from_file("tests/everscale_zerostate.boc")?; // Read global config - let global_config = GlobalConfig::from_file("tests/global-config.json").unwrap(); + let global_config = GlobalConfig::from_file("tests/global-config.json")?; // Write zerostate to db - let (handle, _) = storage - .block_handle_storage() - .create_or_load_handle( - &global_config.block_id, - BlockMetaData::zero_state(zero_state.gen_utime().unwrap()), - ) - .unwrap(); + let (handle, _) = storage.block_handle_storage().create_or_load_handle( + &global_config.block_id, + BlockMetaData::zero_state(zero_state_raw.gen_utime().unwrap()), + )?; - let state = ShardStateStuff::new( + let zerostate = ShardStateStuff::new( global_config.block_id, - zero_state.cell.clone(), + zero_state_raw.cell.clone(), storage.shard_state_storage().min_ref_mc_state(), - ) - .unwrap(); + )?; storage .shard_state_storage() - .store_state(&handle, &state) - .await - .unwrap(); + .store_state(&handle, &zerostate) + .await?; + // Check seqno let min_ref_mc_state = storage.shard_state_storage().min_ref_mc_state(); - assert_eq!(min_ref_mc_state.seqno(), zero_state.min_ref_mc_seqno()); + assert_eq!(min_ref_mc_state.seqno(), zero_state_raw.min_ref_mc_seqno()); + + // Load zerostate from db + let loaded_state = storage + .shard_state_storage() + .load_state(zerostate.block_id()) + .await?; + + assert_eq!(zerostate.state(), loaded_state.state()); + assert_eq!(zerostate.block_id(), loaded_state.block_id()); + assert_eq!(zerostate.root_cell(), loaded_state.root_cell()); + + compare_cells( + zerostate.root_cell().as_ref(), + loaded_state.root_cell().as_ref(), + ); - // Write persistent state + // Write persistent state to file let persistent_state_keeper = storage.runtime_storage().persistent_state_keeper(); assert!(persistent_state_keeper.current().is_none()); storage .persistent_state_storage() - .prepare_persistent_states_dir(&state.block_id()) - .unwrap(); + .prepare_persistent_states_dir(&zerostate.block_id())?; storage .persistent_state_storage() .save_state( - &state.block_id(), - &state.block_id(), - zero_state.cell.repr_hash(), + &zerostate.block_id(), + &zerostate.block_id(), + zero_state_raw.cell.repr_hash(), + ) + .await?; + + // Check if state exists + let exist = storage + .persistent_state_storage() + .state_exists(&zerostate.block_id(), &zerostate.block_id()); + assert_eq!(exist, true); + + // Read persistent state + let offset = 0u64; + let max_size = 1_000_000u64; + + let persistent_state_storage = storage.persistent_state_storage(); + let persistent_state_data = persistent_state_storage + .read_state_part( + &zerostate.block_id(), + &zerostate.block_id(), + offset, + max_size, ) .await .unwrap(); - tokio::time::sleep(Duration::from_secs(10)).await; + // Check state + let cell = Boc::decode(&persistent_state_data)?; + assert_eq!(&cell, zerostate.root_cell()); - //println!("{:?}", zero_state.state); - //println!("{:?}", global_config); + // Clear files for test + tmp_dir.close()?; - //std::fs::remove_dir_all(root_path).unwrap() + Ok(()) } diff --git a/util/src/lib.rs b/util/src/lib.rs index eef6bc3de..1aa5a8d6e 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use std::collections::HashSet; pub mod byte_reader; -pub mod futures; pub mod progress_bar; pub mod serde_helpers; pub mod time; From e97ebd7f5d0331ac7eb847a622b0eb134e5eb8e4 Mon Sep 17 00:00:00 2001 From: Alexey Pashinov Date: Wed, 27 Mar 2024 12:49:18 +0100 Subject: [PATCH 18/19] refactor(storage): fix clippy warnings --- storage/src/db/file_db/mapped_file.rs | 6 +++--- storage/src/db/file_db/mod.rs | 6 +++--- .../src/store/persistent_state/cell_writer.rs | 2 +- storage/src/store/persistent_state/mod.rs | 6 +++--- .../store/shard_state/replace_transaction.rs | 18 ++++++------------ storage/src/utils/stored_value.rs | 4 ++-- 6 files changed, 18 insertions(+), 24 deletions(-) diff --git a/storage/src/db/file_db/mapped_file.rs b/storage/src/db/file_db/mapped_file.rs index a6c535245..897c29f38 100644 --- a/storage/src/db/file_db/mapped_file.rs +++ b/storage/src/db/file_db/mapped_file.rs @@ -78,7 +78,7 @@ impl MappedFile { (self.ptr as *const u8).add(offset), buffer.as_mut_ptr(), buffer.len(), - ) + ); } /// Copies buffer to the mapped memory @@ -88,9 +88,9 @@ impl MappedFile { pub unsafe fn write_all_at(&self, offset: usize, buffer: &[u8]) { std::ptr::copy_nonoverlapping( buffer.as_ptr(), - (self.ptr as *mut u8).add(offset), + (self.ptr.cast::()).add(offset), buffer.len(), - ) + ); } } diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index 4be7821b5..fbff8234a 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -53,8 +53,8 @@ impl Seek for FileDb { } } -impl Into for FileDb { - fn into(self) -> File { - self.file +impl From for File { + fn from(val: FileDb) -> Self { + val.file } } diff --git a/storage/src/store/persistent_state/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs index bb997e4db..036ba6a19 100644 --- a/storage/src/store/persistent_state/cell_writer.rs +++ b/storage/src/store/persistent_state/cell_writer.rs @@ -128,7 +128,7 @@ impl<'a> CellWriter<'a> { } pub fn remove(&self) -> Result<()> { - fs::remove_file(&self.base_path).context(format!( + fs::remove_file(self.base_path).context(format!( "Failed to remove persistent state file {:?}", self.base_path )) diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs index e938bc70b..038d68e8c 100644 --- a/storage/src/store/persistent_state/mod.rs +++ b/storage/src/store/persistent_state/mod.rs @@ -49,11 +49,11 @@ impl PersistentStateStorage { block_id: &BlockId, root_hash: &HashBytes, ) -> Result<()> { - let block_id = block_id.clone(); + let block_id = *block_id; let root_hash = *root_hash; let db = self.db.clone(); let is_cancelled = Some(self.is_cancelled.clone()); - let base_path = self.get_state_file_path(&mc_block_id, &block_id); + let base_path = self.get_state_file_path(mc_block_id, &block_id); tokio::task::spawn_blocking(move || { let cell_writer = cell_writer::CellWriter::new(&db, &base_path); @@ -71,7 +71,7 @@ impl PersistentStateStorage { ); if let Err(e) = cell_writer.remove() { - tracing::error!(%block_id, "{e}") + tracing::error!(%block_id, "{e}"); } } } diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index 35d75ddf9..8ffde75e6 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -101,7 +101,8 @@ impl<'a> ShardStateReplaceTransaction<'a> { if chunk_size > 0 { tracing::debug!(chunk_size, "creating chunk"); - cells_file.write(&chunk_size.to_le_bytes())?; + let bytes = cells_file.write(&chunk_size.to_le_bytes())?; + tracing::trace!(bytes, "writing cells to file"); } if self.cells_read < header.cell_count { @@ -190,7 +191,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { unsafe { hashes_file.read_exact_at(index as usize * HashesEntry::LEN, buffer) } } - self.finalize_cell(&mut ctx, cell_index as u32, cell)?; + ShardStateReplaceTransaction::finalize_cell(&mut ctx, cell_index as u32, cell)?; // SAFETY: `entries_buffer` is guaranteed to be in separate memory area unsafe { @@ -227,7 +228,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { progress_bar.complete(); // Load stored shard state - let result = match self.db.shard_states.get(shard_state_key)? { + match self.db.shard_states.get(shard_state_key)? { Some(root) => { let cell_id = HashBytes::from_slice(&root[..32]); @@ -239,13 +240,10 @@ impl<'a> ShardStateReplaceTransaction<'a> { )?)) } None => Err(ReplaceTransactionError::NotFound.into()), - }; - - result + } } fn finalize_cell( - &self, ctx: &mut FinalizationContext<'_>, cell_index: u32, cell: RawCell<'_>, @@ -277,11 +275,7 @@ impl<'a> ShardStateReplaceTransaction<'a> { cell.descriptor.level_mask() } CellType::LibraryReference => LevelMask::new(0), - CellType::MerkleProof => { - is_merkle_cell = true; - children_mask.virtualize(1) - } - CellType::MerkleUpdate => { + CellType::MerkleProof | CellType::MerkleUpdate => { is_merkle_cell = true; children_mask.virtualize(1) } diff --git a/storage/src/utils/stored_value.rs b/storage/src/utils/stored_value.rs index 813edb106..deaea8198 100644 --- a/storage/src/utils/stored_value.rs +++ b/storage/src/utils/stored_value.rs @@ -165,7 +165,7 @@ impl StoredValue for BlockIdShort { } } -/// Writes BlockIdExt in little-endian format +/// Writes `BlockIdExt` in little-endian format pub fn write_block_id_le(block_id: &BlockId) -> [u8; 80] { let mut bytes = [0u8; 80]; bytes[..4].copy_from_slice(&block_id.shard.workchain().to_le_bytes()); @@ -176,7 +176,7 @@ pub fn write_block_id_le(block_id: &BlockId) -> [u8; 80] { bytes } -/// Reads BlockId in little-endian format +/// Reads `BlockId` in little-endian format pub fn read_block_id_le(data: &[u8]) -> Option { if data.len() < 80 { return None; From 55eb184b65968f6772392e41ca44a86c30989282 Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Fri, 29 Mar 2024 18:07:06 +0100 Subject: [PATCH 19/19] refactor(storage): rework file db --- Cargo.lock | 86 ++-- storage/src/db/file_db/mapped_file.rs | 55 +-- storage/src/db/file_db/mod.rs | 153 +++++-- storage/src/db/file_db/temp_file.rs | 67 ++++ storage/src/db/kv_db/migrations/mod.rs | 2 +- storage/src/lib.rs | 39 +- storage/src/models/block_meta.rs | 12 +- storage/src/store/block/mod.rs | 2 +- storage/src/store/block_connection/mod.rs | 2 +- storage/src/store/block_handle/mod.rs | 2 +- storage/src/store/node_state/mod.rs | 2 +- .../src/store/persistent_state/cell_writer.rs | 378 ++++++++---------- storage/src/store/persistent_state/mod.rs | 64 +-- storage/src/store/shard_state/mod.rs | 21 +- .../store/shard_state/replace_transaction.rs | 50 +-- storage/src/{utils => util}/stored_value.rs | 9 +- storage/src/utils/mod.rs | 3 - storage/tests/global-config.json | 22 - storage/tests/mod.rs | 117 +----- util/src/byte_reader.rs | 11 - util/src/lib.rs | 1 - 21 files changed, 533 insertions(+), 565 deletions(-) create mode 100644 storage/src/db/file_db/temp_file.rs rename storage/src/{utils => util}/stored_value.rs (97%) delete mode 100644 storage/src/utils/mod.rs delete mode 100644 storage/tests/global-config.json delete mode 100644 util/src/byte_reader.rs diff --git a/Cargo.lock b/Cargo.lock index c62426cf6..d610a90b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,9 +95,9 @@ checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "arc-swap" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b3d0060af21e8d11a926981cc00c6c1541aa91dd64b9f881985c3da1094425f" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "argh" @@ -118,7 +118,7 @@ dependencies = [ "argh_shared", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -235,7 +235,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -406,7 +406,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -534,7 +534,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -607,7 +607,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -694,7 +694,7 @@ checksum = "323d8b61c76be2c16eb2d72d007f1542fdeb3760fdf2e2cae219fc0da3db0c09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -742,7 +742,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -837,9 +837,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" @@ -958,9 +958,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "minimal-lexical" @@ -1183,7 +1183,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1227,9 +1227,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "platforms" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626dec3cac7cc0e1577a2ec3fc496277ec2baa084bebad95bb6fdbfae235f84c" +checksum = "db23d408679286588f4d4644f965003d056e3dd5abcaaa938116871d7ce2fee7" [[package]] name = "powerfmt" @@ -1245,12 +1245,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1290,9 +1290,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58c20af3800cee5134b79a3bd4a3d4b583c16ccfa5f53338f46400851a5b3819" +checksum = "b1380629287ed1247c1e0fcc6d43efdcec508b65382c9ab775cc8f3df7ca07b0" dependencies = [ "ahash", "equivalent", @@ -1397,9 +1397,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -1438,14 +1438,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.6", - "regex-syntax 0.8.2", + "regex-syntax 0.8.3", ] [[package]] @@ -1465,7 +1465,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax 0.8.3", ] [[package]] @@ -1476,9 +1476,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "ring" @@ -1657,7 +1657,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1808,9 +1808,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" dependencies = [ "proc-macro2", "quote", @@ -1879,7 +1879,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -1971,7 +1971,7 @@ dependencies = [ "proc-macro2", "quote", "rustc-hash", - "syn 2.0.53", + "syn 2.0.55", "tl-scheme", ] @@ -1990,9 +1990,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -2015,7 +2015,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -2063,7 +2063,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -2418,7 +2418,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", "wasm-bindgen-shared", ] @@ -2440,7 +2440,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2698,7 +2698,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.55", ] [[package]] @@ -2709,9 +2709,9 @@ checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" [[package]] name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", "pkg-config", diff --git a/storage/src/db/file_db/mapped_file.rs b/storage/src/db/file_db/mapped_file.rs index 897c29f38..4594397aa 100644 --- a/storage/src/db/file_db/mapped_file.rs +++ b/storage/src/db/file_db/mapped_file.rs @@ -1,41 +1,32 @@ -use std::fs; +use std::fs::File; +use std::os::fd::AsRawFd; use std::path::Path; -use anyhow::Result; - -use crate::FileDb; - /// Memory buffer that is mapped to a file pub struct MappedFile { - file_db: FileDb, + file: File, length: usize, ptr: *mut libc::c_void, } impl MappedFile { /// Opens a file and maps it to memory. Resizes the file to `length` bytes. - pub fn new

(path: &P, length: usize) -> Result - where - P: AsRef, - { - let file_db = FileDb::new( - path, - fs::OpenOptions::new() - .write(true) - .read(true) - .truncate(true) - .create(true), - )?; - file_db.file.set_len(length as u64)?; + pub fn new>(path: P, length: usize) -> std::io::Result { + let file = std::fs::OpenOptions::new() + .write(true) + .read(true) + .truncate(true) + .create(true) + .open(path)?; - Self::from_existing_file(file_db) + file.set_len(length as u64)?; + + Self::from_existing_file(file) } /// Opens an existing file and maps it to memory - pub fn from_existing_file(file_db: FileDb) -> Result { - use std::os::unix::io::AsRawFd; - - let length = file_db.file.metadata()?.len() as usize; + pub fn from_existing_file(file: File) -> std::io::Result { + let length = file.metadata()?.len() as usize; // SAFETY: File was opened successfully, file mode is RW, offset is aligned let ptr = unsafe { @@ -44,24 +35,20 @@ impl MappedFile { length, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED, - file_db.file.as_raw_fd(), + file.as_raw_fd(), 0, ) }; if ptr == libc::MAP_FAILED { - return Err(std::io::Error::last_os_error().into()); + return Err(std::io::Error::last_os_error()); } if unsafe { libc::madvise(ptr, length, libc::MADV_RANDOM) } != 0 { - return Err(std::io::Error::last_os_error().into()); + return Err(std::io::Error::last_os_error()); } - Ok(Self { - file_db, - length, - ptr, - }) + Ok(Self { file, length, ptr }) } /// Mapped buffer length in bytes @@ -102,8 +89,8 @@ impl Drop for MappedFile { panic!("failed to unmap file: {}", std::io::Error::last_os_error()); } - let _ = self.file_db.file.set_len(0); - let _ = self.file_db.file.sync_all(); + let _ = self.file.set_len(0); + let _ = self.file.sync_all(); } } diff --git a/storage/src/db/file_db/mod.rs b/storage/src/db/file_db/mod.rs index fbff8234a..be5c69323 100644 --- a/storage/src/db/file_db/mod.rs +++ b/storage/src/db/file_db/mod.rs @@ -1,60 +1,149 @@ -use std::fs::File; -use std::io::{Read, Seek, SeekFrom, Write}; +use std::fs::{File, OpenOptions}; +use std::os::fd::AsRawFd; use std::path::{Path, PathBuf}; +use std::sync::Arc; -pub use mapped_file::MappedFile; +pub use self::mapped_file::MappedFile; +pub use self::temp_file::TempFile; mod mapped_file; +mod temp_file; -pub struct FileDb { - file: File, - _path: PathBuf, -} +#[derive(Clone)] +pub struct FileDb(Arc); impl FileDb { - pub fn new

(path: P, options: &mut std::fs::OpenOptions) -> std::io::Result + pub fn new

(root: P) -> Self where P: AsRef, { - let file = options.open(&path)?; + Self(Arc::new(FileDbInner { + base_dir: root.as_ref().to_path_buf(), + })) + } - Ok(Self { - file, - _path: PathBuf::from(path.as_ref()), - }) + pub fn path(&self) -> &Path { + &self.0.base_dir } - pub fn file(&self) -> &File { - &self.file + pub fn ensure_exists(&self) -> std::io::Result<()> { + std::fs::create_dir_all(&self.0.base_dir) + } + + pub fn create_dir_all>(&self, rel_path: P) -> std::io::Result<()> { + std::fs::create_dir_all(self.0.base_dir.join(rel_path)) + } + + pub fn remove_file>(&self, rel_path: P) -> std::io::Result<()> { + std::fs::remove_file(self.0.base_dir.join(rel_path)) + } + + pub fn file>(&self, rel_path: P) -> FileBuilder { + FileBuilder { + path: self.0.base_dir.join(rel_path.as_ref()), + options: std::fs::OpenOptions::new(), + prealloc: None, + } } -} -impl Write for FileDb { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.file.write(buf) + pub fn subdir>(&self, rel_path: P) -> Self { + Self(Arc::new(FileDbInner { + base_dir: self.0.base_dir.join(rel_path), + })) } - #[inline] - fn flush(&mut self) -> std::io::Result<()> { - self.file.flush() + pub fn file_exists>(&self, rel_path: P) -> bool { + self.path().join(rel_path).is_file() } + + pub fn entries(&self) -> std::io::Result { + std::fs::read_dir(&self.0.base_dir) + } +} + +struct FileDbInner { + base_dir: PathBuf, } -impl Read for FileDb { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - let bytes = self.file.read(buf)?; - Ok(bytes) +pub struct FileBuilder { + path: PathBuf, + options: OpenOptions, + prealloc: Option, +} + +impl FileBuilder { + pub fn open(&self) -> std::io::Result { + let file = self.options.open(&self.path)?; + if let Some(prealloc) = self.prealloc { + alloc_file(&file, prealloc)?; + } + Ok(file) + } + + pub fn open_as_temp(&self) -> std::io::Result { + let file = self.open()?; + Ok(TempFile::new(self.path.clone(), file)) + } + + pub fn open_as_mapped(&self) -> std::io::Result { + match self.prealloc { + Some(length) => MappedFile::new(&self.path, length), + None => MappedFile::from_existing_file(self.open()?), + } + } + + pub fn append(&mut self, append: bool) -> &mut Self { + self.options.append(append); + self + } + + pub fn create(&mut self, create: bool) -> &mut Self { + self.options.create(create); + self + } + + pub fn create_new(&mut self, create_new: bool) -> &mut Self { + self.options.create_new(create_new); + self + } + + pub fn read(&mut self, read: bool) -> &mut Self { + self.options.read(read); + self + } + + pub fn truncate(&mut self, truncate: bool) -> &mut Self { + self.options.truncate(truncate); + self + } + + pub fn write(&mut self, write: bool) -> &mut Self { + self.options.write(write); + self + } + + pub fn prealloc(&mut self, prealloc: usize) -> &mut Self { + self.prealloc = Some(prealloc); + self } } -impl Seek for FileDb { - fn seek(&mut self, pos: SeekFrom) -> std::io::Result { - self.file.seek(pos) +#[cfg(not(target_os = "macos"))] +fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len as i64) }; + if res == 0 { + Ok(()) + } else { + Err(std::io::Error::last_os_error()) } } -impl From for File { - fn from(val: FileDb) -> Self { - val.file +#[cfg(target_os = "macos")] +pub fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { + let res = unsafe { libc::ftruncate(file.as_raw_fd(), len as i64) }; + if res < 0 { + Err(std::io::Error::last_os_error()) + } else { + Ok(()) } } diff --git a/storage/src/db/file_db/temp_file.rs b/storage/src/db/file_db/temp_file.rs new file mode 100644 index 000000000..afa233b73 --- /dev/null +++ b/storage/src/db/file_db/temp_file.rs @@ -0,0 +1,67 @@ +use std::fs::File; +use std::mem::ManuallyDrop; +use std::path::PathBuf; + +pub struct TempFile { + file: ManuallyDrop, + file_path: Option, +} + +impl TempFile { + pub fn new(path: PathBuf, file: File) -> Self { + Self { + file: ManuallyDrop::new(file), + file_path: Some(path), + } + } + + pub fn disarm(mut self) -> File { + self.file_path = None; + + // SAFETY: File will not be dropped as `file_path` is `None`. + unsafe { ManuallyDrop::take(&mut self.file) } + } +} + +impl AsRef for TempFile { + #[inline] + fn as_ref(&self) -> &File { + &self.file + } +} + +impl AsMut for TempFile { + #[inline] + fn as_mut(&mut self) -> &mut File { + &mut self.file + } +} + +impl std::ops::Deref for TempFile { + type Target = File; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.file + } +} + +impl std::ops::DerefMut for TempFile { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.file + } +} + +impl Drop for TempFile { + fn drop(&mut self) { + if let Some(file_path) = self.file_path.take() { + // SAFETY: File will only be dropped once. + unsafe { ManuallyDrop::drop(&mut self.file) }; + + if let Err(e) = std::fs::remove_file(&file_path) { + tracing::error!(path = %file_path.display(), "failed to remove file: {e:?}"); + } + } + } +} diff --git a/storage/src/db/kv_db/migrations/mod.rs b/storage/src/db/kv_db/migrations/mod.rs index 710c980eb..17a01170f 100644 --- a/storage/src/db/kv_db/migrations/mod.rs +++ b/storage/src/db/kv_db/migrations/mod.rs @@ -34,7 +34,7 @@ impl VersionProvider for NodeStateVersionProvider { let slice = version.as_ref(); slice .try_into() - .map_err(|_| weedb::Error::InvalidDbVersion) + .map_err(|_e| weedb::Error::InvalidDbVersion) .map(Some) } None => Ok(None), diff --git a/storage/src/lib.rs b/storage/src/lib.rs index 73b31cfdc..e671a026c 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -8,11 +8,14 @@ pub use self::store::*; mod db; mod models; mod store; -mod utils; -pub struct Storage { - file_db_path: PathBuf, +mod util { + pub use stored_value::*; + + mod stored_value; +} +pub struct Storage { runtime_storage: Arc, block_handle_storage: Arc, block_storage: Arc, @@ -28,27 +31,24 @@ impl Storage { file_db_path: PathBuf, max_cell_cache_size_bytes: u64, ) -> anyhow::Result> { + let files_dir = FileDb::new(file_db_path); + let block_handle_storage = Arc::new(BlockHandleStorage::new(db.clone())); let runtime_storage = Arc::new(RuntimeStorage::new(block_handle_storage.clone())); let block_storage = Arc::new(BlockStorage::new(db.clone(), block_handle_storage.clone())?); let shard_state_storage = ShardStateStorage::new( db.clone(), + &files_dir, block_handle_storage.clone(), block_storage.clone(), - file_db_path.clone(), max_cell_cache_size_bytes, )?; - let persistent_state_storage = PersistentStateStorage::new( - file_db_path.clone(), - db.clone(), - block_handle_storage.clone(), - )?; + let persistent_state_storage = + PersistentStateStorage::new(db.clone(), &files_dir, block_handle_storage.clone())?; let node_state_storage = NodeStateStorage::new(db.clone()); let block_connection_storage = BlockConnectionStorage::new(db); Ok(Arc::new(Self { - file_db_path, - block_handle_storage, block_storage, shard_state_storage, @@ -59,32 +59,37 @@ impl Storage { })) } - #[inline(always)] + #[inline] pub fn runtime_storage(&self) -> &RuntimeStorage { &self.runtime_storage } - #[inline(always)] + #[inline] pub fn persistent_state_storage(&self) -> &PersistentStateStorage { &self.persistent_state_storage } - #[inline(always)] + #[inline] pub fn block_handle_storage(&self) -> &BlockHandleStorage { &self.block_handle_storage } - #[inline(always)] + #[inline] + pub fn block_storage(&self) -> &BlockStorage { + &self.block_storage + } + + #[inline] pub fn block_connection_storage(&self) -> &BlockConnectionStorage { &self.block_connection_storage } - #[inline(always)] + #[inline] pub fn shard_state_storage(&self) -> &ShardStateStorage { &self.shard_state_storage } - #[inline(always)] + #[inline] pub fn node_state(&self) -> &NodeStateStorage { &self.node_state_storage } diff --git a/storage/src/models/block_meta.rs b/storage/src/models/block_meta.rs index b5bbcb4f4..3581ab629 100644 --- a/storage/src/models/block_meta.rs +++ b/storage/src/models/block_meta.rs @@ -4,7 +4,7 @@ use anyhow::Result; use bytes::Buf; use everscale_types::models::BlockInfo; -use crate::utils::{StoredValue, StoredValueBuffer}; +use crate::util::{StoredValue, StoredValueBuffer}; #[derive(Debug, Copy, Clone)] pub struct BlockMetaData { @@ -30,16 +30,6 @@ pub struct BriefBlockInfo { pub after_split: bool, } -impl BriefBlockInfo { - pub fn with_mc_seqno(self, mc_seqno: u32) -> BlockMetaData { - BlockMetaData { - is_key_block: self.is_key_block, - gen_utime: self.gen_utime, - mc_ref_seqno: Some(mc_seqno), - } - } -} - impl From<&BlockInfo> for BriefBlockInfo { fn from(info: &BlockInfo) -> Self { Self { diff --git a/storage/src/store/block/mod.rs b/storage/src/store/block/mod.rs index e9d683f1e..1e1b45efd 100644 --- a/storage/src/store/block/mod.rs +++ b/storage/src/store/block/mod.rs @@ -17,7 +17,7 @@ use tycho_block_util::block::{ }; use crate::db::*; -use crate::utils::*; +use crate::util::*; use crate::{models::*, BlockHandleStorage, HandleCreationStatus}; pub struct BlockStorage { diff --git a/storage/src/store/block_connection/mod.rs b/storage/src/store/block_connection/mod.rs index 3a6dd2f55..17d0a139c 100644 --- a/storage/src/store/block_connection/mod.rs +++ b/storage/src/store/block_connection/mod.rs @@ -5,7 +5,7 @@ use everscale_types::models::*; use crate::db::*; use crate::models::*; -use crate::utils::*; +use crate::util::*; /// Stores relations between blocks pub struct BlockConnectionStorage { diff --git a/storage/src/store/block_handle/mod.rs b/storage/src/store/block_handle/mod.rs index 1790047c9..15341fed9 100644 --- a/storage/src/store/block_handle/mod.rs +++ b/storage/src/store/block_handle/mod.rs @@ -8,7 +8,7 @@ use tycho_util::FastDashMap; use crate::db::*; use crate::models::*; -use crate::utils::*; +use crate::util::*; pub struct BlockHandleStorage { db: Arc, diff --git a/storage/src/store/node_state/mod.rs b/storage/src/store/node_state/mod.rs index a9ae5926a..d8e3f4fcb 100644 --- a/storage/src/store/node_state/mod.rs +++ b/storage/src/store/node_state/mod.rs @@ -5,7 +5,7 @@ use everscale_types::models::*; use parking_lot::Mutex; use crate::db::*; -use crate::utils::*; +use crate::util::*; pub struct NodeStateStorage { db: Arc, diff --git a/storage/src/store/persistent_state/cell_writer.rs b/storage/src/store/persistent_state/cell_writer.rs index 036ba6a19..bd3f6f3dd 100644 --- a/storage/src/store/persistent_state/cell_writer.rs +++ b/storage/src/store/persistent_state/cell_writer.rs @@ -1,46 +1,38 @@ use std::collections::hash_map; -use std::fs; -use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; -use std::os::unix::io::AsRawFd; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use anyhow::{Context, Result}; -use everscale_types::cell::CellDescriptor; +use everscale_types::cell::{CellDescriptor, HashBytes}; use smallvec::SmallVec; - use tycho_util::FastHashMap; -use crate::db::Db; -use crate::FileDb; +use crate::db::{Db, FileDb, TempFile}; pub struct CellWriter<'a> { db: &'a Db, - base_path: &'a Path, + states_dir: &'a FileDb, + block_root_hash: &'a HashBytes, } impl<'a> CellWriter<'a> { #[allow(unused)] - pub fn new(db: &'a Db, base_path: &'a Path) -> Self { - Self { db, base_path } + pub fn new(db: &'a Db, states_dir: &'a FileDb, block_root_hash: &'a HashBytes) -> Self { + Self { + db, + states_dir, + block_root_hash, + } } #[allow(unused)] pub fn write(&self, root_hash: &[u8; 32], is_cancelled: Option>) -> Result<()> { - // Open target file in advance to get the error immediately (if any) - let file_db = FileDb::new( - self.base_path, - fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true), - )?; - // Load cells from db in reverse order into the temp file tracing::info!("started loading cells"); - let mut intermediate = write_rev_cells(self.db, self.base_path, root_hash, &is_cancelled) + let mut intermediate = self + .write_rev(root_hash, &is_cancelled) .context("Failed to write reversed cells data")?; tracing::info!("finished loading cells"); let cell_count = intermediate.cell_sizes.len() as u32; @@ -49,14 +41,22 @@ impl<'a> CellWriter<'a> { let offset_size = std::cmp::min(number_of_bytes_to_fit(intermediate.total_size), 8) as usize; - // Reserve space for the file - alloc_file( - file_db.file(), - 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize), - )?; + // Compute file size + let file_size = + 22 + offset_size * (1 + cell_count as usize) + (intermediate.total_size as usize); + + // Create states file + let mut file = self + .states_dir + .file(self.file_name()) + .create(true) + .write(true) + .truncate(true) + .prealloc(file_size) + .open()?; // Write cells data in BOC format - let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file_db.file()); + let mut buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN / 2, file); // Header | current len: 0 let flags = 0b1000_0000u8 | (REF_SIZE as u8); @@ -128,194 +128,194 @@ impl<'a> CellWriter<'a> { } pub fn remove(&self) -> Result<()> { - fs::remove_file(self.base_path).context(format!( - "Failed to remove persistent state file {:?}", - self.base_path + let file_name = self.file_name(); + self.states_dir.remove_file(&file_name).context(format!( + "Failed to remove persistent state file {}", + self.states_dir.path().join(file_name).display() )) } -} -struct IntermediateState { - file: File, - cell_sizes: Vec, - total_size: u64, - _remove_on_drop: RemoveOnDrop, -} - -fn write_rev_cells>( - db: &Db, - base_path: P, - root_hash: &[u8; 32], - is_cancelled: &Option>, -) -> Result { - enum StackItem { - New([u8; 32]), - Loaded(LoadedCell), - } - - struct LoadedCell { - hash: [u8; 32], - descriptor: CellDescriptor, - data: SmallVec<[u8; 128]>, - indices: SmallVec<[u32; 4]>, - } + fn write_rev( + &self, + root_hash: &[u8; 32], + is_cancelled: &Option>, + ) -> Result { + enum StackItem { + New([u8; 32]), + Loaded(LoadedCell), + } - let file_path = base_path.as_ref().with_extension("temp"); + struct LoadedCell { + hash: [u8; 32], + descriptor: CellDescriptor, + data: SmallVec<[u8; 128]>, + indices: SmallVec<[u32; 4]>, + } - let file_db = FileDb::new( - &file_path, - fs::OpenOptions::new() - .read(true) - .write(true) + let mut file = self + .states_dir + .file(self.file_name().with_extension("temp")) .create(true) - .truncate(true), - )?; - let remove_on_drop = RemoveOnDrop(file_path); + .write(true) + .read(true) + .truncate(true) + .open_as_temp()?; - let raw = db.raw().as_ref(); - let read_options = db.cells.read_config(); - let cf = db.cells.cf(); + let raw = self.db.raw().as_ref(); + let read_options = self.db.cells.read_config(); + let cf = self.db.cells.cf(); - let mut references_buffer = SmallVec::<[[u8; 32]; 4]>::with_capacity(4); + let mut references_buffer = SmallVec::<[[u8; 32]; 4]>::with_capacity(4); - let mut indices = FastHashMap::default(); - let mut remap = FastHashMap::default(); - let mut cell_sizes = Vec::::with_capacity(FILE_BUFFER_LEN); - let mut stack = Vec::with_capacity(32); + let mut indices = FastHashMap::default(); + let mut remap = FastHashMap::default(); + let mut cell_sizes = Vec::::with_capacity(FILE_BUFFER_LEN); + let mut stack = Vec::with_capacity(32); - let mut total_size = 0u64; - let mut iteration = 0u32; - let mut remap_index = 0u32; + let mut total_size = 0u64; + let mut iteration = 0u32; + let mut remap_index = 0u32; - stack.push((iteration, StackItem::New(*root_hash))); - indices.insert(*root_hash, (iteration, false)); + stack.push((iteration, StackItem::New(*root_hash))); + indices.insert(*root_hash, (iteration, false)); - let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, file_db.into()); + let mut temp_file_buffer = std::io::BufWriter::with_capacity(FILE_BUFFER_LEN, &mut *file); - while let Some((index, data)) = stack.pop() { - if let Some(is_cancelled) = is_cancelled { - if iteration % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { - anyhow::bail!("Persistent state writing cancelled.") + while let Some((index, data)) = stack.pop() { + if let Some(is_cancelled) = is_cancelled { + if iteration % 1000 == 0 && is_cancelled.load(Ordering::Relaxed) { + anyhow::bail!("Persistent state writing cancelled.") + } } - } - match data { - StackItem::New(hash) => { - let value = raw - .get_pinned_cf_opt(&cf, hash, read_options)? - .ok_or(CellWriterError::CellNotFound)?; + match data { + StackItem::New(hash) => { + let value = raw + .get_pinned_cf_opt(&cf, hash, read_options)? + .ok_or(CellWriterError::CellNotFound)?; - let value = match crate::refcount::strip_refcount(value.as_ref()) { - Some(bytes) => bytes, - None => { - return Err(CellWriterError::CellNotFound.into()); + let value = match crate::refcount::strip_refcount(value.as_ref()) { + Some(bytes) => bytes, + None => { + return Err(CellWriterError::CellNotFound.into()); + } + }; + if value.is_empty() { + return Err(CellWriterError::InvalidCell.into()); } - }; - if value.is_empty() { - return Err(CellWriterError::InvalidCell.into()); - } - let (descriptor, data) = deserialize_cell(value, &mut references_buffer) - .ok_or(CellWriterError::InvalidCell)?; + let (descriptor, data) = deserialize_cell(value, &mut references_buffer) + .ok_or(CellWriterError::InvalidCell)?; - let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); + let mut reference_indices = SmallVec::with_capacity(references_buffer.len()); - let mut indices_buffer = [0; 4]; - let mut keys = [std::ptr::null(); 4]; - let mut preload_count = 0; + let mut indices_buffer = [0; 4]; + let mut keys = [std::ptr::null(); 4]; + let mut preload_count = 0; - for hash in &references_buffer { - let index = match indices.entry(*hash) { - hash_map::Entry::Vacant(entry) => { - remap_index += 1; - - entry.insert((remap_index, false)); + for hash in &references_buffer { + let index = match indices.entry(*hash) { + hash_map::Entry::Vacant(entry) => { + remap_index += 1; - indices_buffer[preload_count] = remap_index; - keys[preload_count] = hash.as_ptr(); - preload_count += 1; + entry.insert((remap_index, false)); - remap_index - } - hash_map::Entry::Occupied(entry) => { - let (remap_index, written) = *entry.get(); - if !written { indices_buffer[preload_count] = remap_index; keys[preload_count] = hash.as_ptr(); preload_count += 1; - } - remap_index - } - }; - reference_indices.push(index); - } + remap_index + } + hash_map::Entry::Occupied(entry) => { + let (remap_index, written) = *entry.get(); + if !written { + indices_buffer[preload_count] = remap_index; + keys[preload_count] = hash.as_ptr(); + preload_count += 1; + } + remap_index + } + }; - stack.push(( - index, - StackItem::Loaded(LoadedCell { - hash, - descriptor, - data: SmallVec::from_slice(data), - indices: reference_indices, - }), - )); - - if preload_count > 0 { - indices_buffer[..preload_count].reverse(); - keys[..preload_count].reverse(); - - for i in 0..preload_count { - let index = indices_buffer[i]; - let hash = unsafe { *keys[i].cast::<[u8; 32]>() }; - stack.push((index, StackItem::New(hash))); + reference_indices.push(index); } - } - references_buffer.clear(); - } - StackItem::Loaded(loaded) => { - match remap.entry(index) { - hash_map::Entry::Vacant(entry) => { - entry.insert(iteration.to_be_bytes()); + stack.push(( + index, + StackItem::Loaded(LoadedCell { + hash, + descriptor, + data: SmallVec::from_slice(data), + indices: reference_indices, + }), + )); + + if preload_count > 0 { + indices_buffer[..preload_count].reverse(); + keys[..preload_count].reverse(); + + for i in 0..preload_count { + let index = indices_buffer[i]; + let hash = unsafe { *keys[i].cast::<[u8; 32]>() }; + stack.push((index, StackItem::New(hash))); + } } - hash_map::Entry::Occupied(_) => continue, - }; - if let Some((_, written)) = indices.get_mut(&loaded.hash) { - *written = true; + references_buffer.clear(); } + StackItem::Loaded(loaded) => { + match remap.entry(index) { + hash_map::Entry::Vacant(entry) => { + entry.insert(iteration.to_be_bytes()); + } + hash_map::Entry::Occupied(_) => continue, + }; - iteration += 1; - if iteration % 100000 == 0 { - tracing::info!(iteration); - } + if let Some((_, written)) = indices.get_mut(&loaded.hash) { + *written = true; + } - let cell_size = 2 + loaded.data.len() + loaded.indices.len() * REF_SIZE; - cell_sizes.push(cell_size as u8); - total_size += cell_size as u64; - - temp_file_buffer.write_all(&[loaded.descriptor.d1, loaded.descriptor.d2])?; - temp_file_buffer.write_all(&loaded.data)?; - for index in loaded.indices { - let index = remap.get(&index).with_context(|| { - format!("Child not found. Iteration {iteration}. Child {index}") - })?; - temp_file_buffer.write_all(index)?; + iteration += 1; + if iteration % 100000 == 0 { + tracing::info!(iteration); + } + + let cell_size = 2 + loaded.data.len() + loaded.indices.len() * REF_SIZE; + cell_sizes.push(cell_size as u8); + total_size += cell_size as u64; + + temp_file_buffer.write_all(&[loaded.descriptor.d1, loaded.descriptor.d2])?; + temp_file_buffer.write_all(&loaded.data)?; + for index in loaded.indices { + let index = remap.get(&index).with_context(|| { + format!("Child not found. Iteration {iteration}. Child {index}") + })?; + temp_file_buffer.write_all(index)?; + } } } } + + drop(temp_file_buffer); + + file.flush()?; + + Ok(IntermediateState { + file, + cell_sizes, + total_size, + }) } - let mut file: File = temp_file_buffer.into_inner()?; - file.flush()?; + fn file_name(&self) -> PathBuf { + PathBuf::from(self.block_root_hash.to_string()) + } +} - Ok(IntermediateState { - file, - cell_sizes, - total_size, - _remove_on_drop: remove_on_drop, - }) +struct IntermediateState { + file: TempFile, + cell_sizes: Vec, + total_size: u64, } fn deserialize_cell<'a>( @@ -355,40 +355,10 @@ fn deserialize_cell<'a>( Some((descriptor, data)) } -#[cfg(not(target_os = "macos"))] -fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { - let res = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len as i64) }; - if res == 0 { - Ok(()) - } else { - Err(std::io::Error::last_os_error()) - } -} - -#[cfg(target_os = "macos")] -pub fn alloc_file(file: &File, len: usize) -> std::io::Result<()> { - let res = unsafe { libc::ftruncate(file.as_raw_fd(), len as i64) }; - if res < 0 { - Err(std::io::Error::last_os_error()) - } else { - Ok(()) - } -} - fn number_of_bytes_to_fit(l: u64) -> u32 { 8 - l.leading_zeros() / 8 } -struct RemoveOnDrop(PathBuf); - -impl Drop for RemoveOnDrop { - fn drop(&mut self) { - if let Err(e) = fs::remove_file(&self.0) { - tracing::error!(path = %self.0.display(), "failed to remove file: {e:?}"); - } - } -} - struct Index { value_len: usize, offset: usize, diff --git a/storage/src/store/persistent_state/mod.rs b/storage/src/store/persistent_state/mod.rs index 038d68e8c..da6858b2c 100644 --- a/storage/src/store/persistent_state/mod.rs +++ b/storage/src/store/persistent_state/mod.rs @@ -1,4 +1,3 @@ -use std::fs; use std::io::{BufReader, Read, Seek, SeekFrom}; use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; @@ -17,28 +16,30 @@ use crate::FileDb; mod cell_writer; const KEY_BLOCK_UTIME_STEP: u32 = 86400; +const BASE_DIR: &str = "states"; pub struct PersistentStateStorage { - block_handle_storage: Arc, - storage_path: PathBuf, db: Arc, + storage_dir: FileDb, + block_handle_storage: Arc, is_cancelled: Arc, } impl PersistentStateStorage { pub fn new( - file_db_path: PathBuf, db: Arc, + files_dir: &FileDb, block_handle_storage: Arc, ) -> Result { - let dir = file_db_path.join("states"); - fs::create_dir_all(&dir)?; - let is_cancelled = Arc::new(Default::default()); + let storage_dir = files_dir.subdir(BASE_DIR); + storage_dir.ensure_exists()?; + + let is_cancelled = Arc::new(AtomicBool::new(false)); Ok(Self { - block_handle_storage, - storage_path: dir, db, + storage_dir, + block_handle_storage, is_cancelled, }) } @@ -51,12 +52,13 @@ impl PersistentStateStorage { ) -> Result<()> { let block_id = *block_id; let root_hash = *root_hash; - let db = self.db.clone(); let is_cancelled = Some(self.is_cancelled.clone()); - let base_path = self.get_state_file_path(mc_block_id, &block_id); + + let db = self.db.clone(); + let states_dir = self.prepare_persistent_states_dir(mc_block_id)?; tokio::task::spawn_blocking(move || { - let cell_writer = cell_writer::CellWriter::new(&db, &base_path); + let cell_writer = cell_writer::CellWriter::new(&db, &states_dir, &block_id.root_hash); match cell_writer.write(&root_hash.0, is_cancelled) { Ok(()) => { tracing::info!( @@ -87,18 +89,20 @@ impl PersistentStateStorage { offset: u64, size: u64, ) -> Option { - let path = self.get_state_file_path(mc_block_id, block_id); + let path = self + .mc_states_dir(mc_block_id) + .join(block_id.root_hash.to_string()); tokio::task::spawn_blocking(move || { // TODO: cache file handles - let mut file_db = FileDb::new(path, fs::OpenOptions::new().read(true)).ok()?; + let mut file = std::fs::OpenOptions::new().read(true).open(path).ok()?; - if let Err(e) = file_db.seek(SeekFrom::Start(offset)) { + if let Err(e) = file.seek(SeekFrom::Start(offset)) { tracing::error!("failed to seek state file offset: {e:?}"); return None; } - let mut buf_reader = BufReader::new(file_db.file()); + let mut buf_reader = BufReader::new(file); let mut result = BytesMut::zeroed(size as usize); let mut result_cursor = 0; @@ -133,24 +137,22 @@ impl PersistentStateStorage { pub fn state_exists(&self, mc_block_id: &BlockId, block_id: &BlockId) -> bool { // TODO: cache file handles - self.get_state_file_path(mc_block_id, block_id).is_file() + self.mc_states_dir(mc_block_id) + .join(block_id.root_hash.to_string()) + .is_file() } - pub fn prepare_persistent_states_dir(&self, mc_block: &BlockId) -> Result<()> { - let dir_path = mc_block.seqno.to_string(); - let path = self.storage_path.join(dir_path); - if !path.exists() { + pub fn prepare_persistent_states_dir(&self, mc_block: &BlockId) -> Result { + let states_dir = self.storage_dir.subdir(mc_block.seqno.to_string()); + if !states_dir.path().is_dir() { tracing::info!(mc_block = %mc_block, "creating persistent state directory"); - fs::create_dir(path)?; + states_dir.ensure_exists()?; } - Ok(()) + Ok(states_dir) } - fn get_state_file_path(&self, mc_block_id: &BlockId, block_id: &BlockId) -> PathBuf { - self.storage_path - .clone() - .join(mc_block_id.seqno.to_string()) - .join(block_id.root_hash.to_string()) + fn mc_states_dir(&self, mc_block_id: &BlockId) -> PathBuf { + self.storage_dir.path().join(mc_block_id.seqno.to_string()) } pub fn cancel(&self) { @@ -197,7 +199,7 @@ impl PersistentStateStorage { let mut directories_to_remove: Vec = Vec::new(); let mut files_to_remove: Vec = Vec::new(); - for entry in fs::read_dir(&self.storage_path)?.flatten() { + for entry in self.storage_dir.entries()?.flatten() { let path = entry.path(); if path.is_file() { @@ -220,14 +222,14 @@ impl PersistentStateStorage { for dir in directories_to_remove { tracing::info!(dir = %dir.display(), "removing an old persistent state directory"); - if let Err(e) = fs::remove_dir_all(&dir) { + if let Err(e) = std::fs::remove_dir_all(&dir) { tracing::error!(dir = %dir.display(), "failed to remove an old persistent state: {e:?}"); } } for file in files_to_remove { tracing::info!(file = %file.display(), "removing file"); - if let Err(e) = fs::remove_file(&file) { + if let Err(e) = std::fs::remove_file(&file) { tracing::error!(file = %file.display(), "failed to remove file: {e:?}"); } } diff --git a/storage/src/store/shard_state/mod.rs b/storage/src/store/shard_state/mod.rs index 8533eb200..f4a3bdeda 100644 --- a/storage/src/store/shard_state/mod.rs +++ b/storage/src/store/shard_state/mod.rs @@ -1,4 +1,3 @@ -use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; @@ -13,7 +12,7 @@ use self::cell_storage::*; use self::replace_transaction::ShardStateReplaceTransaction; use crate::db::*; -use crate::utils::*; +use crate::util::*; use crate::{models::BlockHandle, BlockHandleStorage, BlockStorage}; mod cell_storage; @@ -21,13 +20,15 @@ mod entries_buffer; mod replace_transaction; mod shard_state_reader; +const DOWNLOADS_DIR: &str = "downloads"; + pub struct ShardStateStorage { db: Arc, + downloads_dir: FileDb, block_handle_storage: Arc, block_storage: Arc, cell_storage: Arc, - downloads_dir: Arc, gc_lock: tokio::sync::Mutex<()>, min_ref_mc_state: Arc, @@ -38,12 +39,14 @@ pub struct ShardStateStorage { impl ShardStateStorage { pub fn new( db: Arc, + files_dir: &FileDb, block_handle_storage: Arc, block_storage: Arc, - file_db_path: PathBuf, cache_size_bytes: u64, ) -> Result { - let downloads_dir = prepare_file_db_dir(file_db_path, "downloads")?; + let downloads_dir = files_dir.subdir(DOWNLOADS_DIR); + downloads_dir.ensure_exists()?; + let cell_storage = CellStorage::new(db.clone(), cache_size_bytes); let res = Self { @@ -155,9 +158,9 @@ impl ShardStateStorage { pub fn begin_replace(&'_ self, block_id: &BlockId) -> Result> { ShardStateReplaceTransaction::new( &self.db, + &self.downloads_dir, &self.cell_storage, &self.min_ref_mc_state, - self.downloads_dir.as_ref(), block_id, ) } @@ -359,12 +362,6 @@ pub struct ShardStateStorageMetrics { pub max_new_sc_cell_count: usize, } -fn prepare_file_db_dir(file_db_path: PathBuf, folder: &str) -> Result> { - let dir = Arc::new(file_db_path.join(folder)); - std::fs::create_dir_all(dir.as_ref())?; - Ok(dir) -} - #[derive(thiserror::Error, Debug)] enum ShardStateStorageError { #[error("Not found")] diff --git a/storage/src/store/shard_state/replace_transaction.rs b/storage/src/store/shard_state/replace_transaction.rs index 8ffde75e6..b33421ddc 100644 --- a/storage/src/store/shard_state/replace_transaction.rs +++ b/storage/src/store/shard_state/replace_transaction.rs @@ -1,5 +1,6 @@ +use std::fs::File; use std::io::Write; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::sync::Arc; use anyhow::{Context, Result}; @@ -10,7 +11,7 @@ use super::cell_storage::*; use super::entries_buffer::*; use super::shard_state_reader::*; use crate::db::*; -use crate::utils::*; +use crate::util::*; use tycho_block_util::state::*; use tycho_util::progress_bar::*; @@ -27,17 +28,14 @@ pub struct ShardStateReplaceTransaction<'a> { } impl<'a> ShardStateReplaceTransaction<'a> { - pub fn new

( + pub fn new( db: &'a Db, + downloads_dir: &FileDb, cell_storage: &'a Arc, min_ref_mc_state: &'a Arc, - path: P, block_id: &BlockId, - ) -> Result - where - P: AsRef, - { - let file_ctx = FilesContext::new(path, block_id)?; + ) -> Result { + let file_ctx = FilesContext::new(downloads_dir, block_id)?; Ok(Self { db, @@ -475,14 +473,11 @@ impl<'a> FinalizationContext<'a> { struct FilesContext { cells_path: PathBuf, hashes_path: PathBuf, - cells_file: Option, + cells_file: Option, } impl FilesContext { - pub fn new

(root_path: P, block_id: &BlockId) -> Result - where - P: AsRef, - { + pub fn new(downloads_dir: &FileDb, block_id: &BlockId) -> Result { let block_id = format!( "({},{:016x},{})", block_id.shard.workchain(), @@ -490,26 +485,25 @@ impl FilesContext { block_id.seqno ); - let cells_path = root_path.as_ref().join(format!("state_cells_{block_id}")); - let hashes_path = root_path.as_ref().join(format!("state_hashes_{block_id}")); + let cells_file_name = format!("state_cells_{block_id}"); + let hashes_file_name = format!("state_hashes_{block_id}"); - let cells_file = Some(FileDb::new( - &cells_path, - std::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .read(true), - )?); + let cells_file = downloads_dir + .file(&cells_file_name) + .write(true) + .create(true) + .truncate(true) + .read(true) + .open()?; Ok(Self { - cells_file, - cells_path, - hashes_path, + cells_path: downloads_dir.path().join(cells_file_name), + hashes_path: downloads_dir.path().join(hashes_file_name), + cells_file: Some(cells_file), }) } - pub fn cells_file(&mut self) -> Result<&mut FileDb> { + pub fn cells_file(&mut self) -> Result<&mut File> { match &mut self.cells_file { Some(file) => Ok(file), None => Err(FilesContextError::AlreadyFinalized.into()), diff --git a/storage/src/utils/stored_value.rs b/storage/src/util/stored_value.rs similarity index 97% rename from storage/src/utils/stored_value.rs rename to storage/src/util/stored_value.rs index deaea8198..c37374412 100644 --- a/storage/src/utils/stored_value.rs +++ b/storage/src/util/stored_value.rs @@ -4,7 +4,6 @@ use smallvec::SmallVec; use anyhow::Result; use everscale_types::cell::HashBytes; use everscale_types::models::{BlockId, BlockIdShort, ShardIdent}; -use tycho_util::byte_reader::ByteOrderRead; /// A trait for writing or reading data from a stack-allocated buffer pub trait StoredValue { @@ -104,8 +103,12 @@ impl StoredValue for BlockId { let shard = ShardIdent::deserialize(reader)?; let seqno = reader.get_u32(); - let root_hash = HashBytes::from(reader.read_u256()?); - let file_hash = HashBytes::from(reader.read_u256()?); + + let mut root_hash = HashBytes::default(); + root_hash.0.copy_from_slice(&reader[..32]); + let mut file_hash = HashBytes::default(); + file_hash.0.copy_from_slice(&reader[32..]); + Ok(Self { shard, seqno, diff --git a/storage/src/utils/mod.rs b/storage/src/utils/mod.rs deleted file mode 100644 index d28b57e54..000000000 --- a/storage/src/utils/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub use self::stored_value::*; - -mod stored_value; diff --git a/storage/tests/global-config.json b/storage/tests/global-config.json deleted file mode 100644 index bd4533a2f..000000000 --- a/storage/tests/global-config.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "@type": "config.global", - "dht": { - "@type": "dht.config.global", - "k": 6, - "a": 3, - "static_nodes": { - "@type": "dht.nodes", - "nodes": [] - } - }, - "validator": { - "@type": "validator.config.global", - "zero_state": { - "workchain": -1, - "shard": -9223372036854775808, - "seqno": 0, - "root_hash": "WP/KGheNr/cF3lQhblQzyb0ufYUAcNM004mXhHq56EU=", - "file_hash": "0nC4eylStbp9qnCq8KjDYb789NjS25L5ZA1UQwcIOOQ=" - } - } -} \ No newline at end of file diff --git a/storage/tests/mod.rs b/storage/tests/mod.rs index 46294c55a..06ccfc2c8 100644 --- a/storage/tests/mod.rs +++ b/storage/tests/mod.rs @@ -1,11 +1,10 @@ -use anyhow::{anyhow, Result}; -use base64::prelude::BASE64_STANDARD; -use base64::Engine; +use std::str::FromStr; + +use anyhow::Result; use bytesize::ByteSize; use everscale_types::boc::Boc; -use everscale_types::cell::{Cell, DynCell, HashBytes}; -use everscale_types::models::{BlockId, ShardIdent, ShardState}; -use serde::{Deserialize, Deserializer}; +use everscale_types::cell::{Cell, DynCell}; +use everscale_types::models::{BlockId, ShardState}; use tycho_block_util::state::ShardStateStuff; use tycho_storage::{BlockMetaData, Db, DbOptions, Storage}; @@ -38,104 +37,6 @@ impl ShardStateCombined { } } -#[derive(Deserialize)] -struct GlobalConfigJson { - validator: ValidatorJson, -} - -#[derive(Deserialize)] -struct ValidatorJson { - zero_state: BlockIdJson, -} - -#[derive(Debug, Default)] -pub struct BlockIdJson { - workchain: i32, - shard: u64, - seqno: u32, - root_hash: HashBytes, - file_hash: HashBytes, -} - -impl<'de> Deserialize<'de> for BlockIdJson { - fn deserialize(deserializer: D) -> std::result::Result - where - D: Deserializer<'de>, - { - use serde::de::Error; - - #[derive(Deserialize)] - struct BlockIdJsonHelper { - workchain: i32, - shard: i64, - seqno: u32, - root_hash: String, - file_hash: String, - } - - let BlockIdJsonHelper { - workchain, - shard, - seqno, - root_hash, - file_hash, - } = BlockIdJsonHelper::deserialize(deserializer)?; - - let shard = shard as u64; - - let mut result = Self { - workchain, - shard, - seqno, - ..Default::default() - }; - - result.root_hash = - HashBytes::from_slice(&BASE64_STANDARD.decode(root_hash).map_err(Error::custom)?); - - result.file_hash = - HashBytes::from_slice(&BASE64_STANDARD.decode(file_hash).map_err(Error::custom)?); - - Ok(result) - } -} - -impl TryFrom for BlockId { - type Error = anyhow::Error; - - fn try_from(value: BlockIdJson) -> Result { - Ok(Self { - shard: ShardIdent::new(value.workchain, value.shard) - .ok_or(anyhow!("Invalid ShardIdent"))?, - seqno: value.seqno, - root_hash: value.root_hash, - file_hash: value.file_hash, - }) - } -} - -#[derive(Debug)] -struct GlobalConfig { - block_id: BlockId, -} - -impl GlobalConfig { - pub fn from_file(path: impl AsRef) -> Result { - let data = std::fs::read_to_string(path.as_ref())?; - Ok(serde_json::from_str::(&data)?.try_into()?) - } -} - -impl TryFrom for GlobalConfig { - type Error = anyhow::Error; - - fn try_from(value: GlobalConfigJson) -> Result { - Ok(Self { - block_id: value.validator.zero_state.try_into()?, - }) - } -} - fn compare_cells(orig_cell: &DynCell, stored_cell: &DynCell) { assert_eq!(orig_cell.repr_hash(), stored_cell.repr_hash()); @@ -177,17 +78,17 @@ async fn persistent_storage_everscale() -> Result<()> { // Read zerostate let zero_state_raw = ShardStateCombined::from_file("tests/everscale_zerostate.boc")?; - // Read global config - let global_config = GlobalConfig::from_file("tests/global-config.json")?; + // Parse block id + let block_id = BlockId::from_str("-1:8000000000000000:0:58ffca1a178daff705de54216e5433c9bd2e7d850070d334d38997847ab9e845:d270b87b2952b5ba7daa70aaf0a8c361befcf4d8d2db92f9640d5443070838e4")?; // Write zerostate to db let (handle, _) = storage.block_handle_storage().create_or_load_handle( - &global_config.block_id, + &block_id, BlockMetaData::zero_state(zero_state_raw.gen_utime().unwrap()), )?; let zerostate = ShardStateStuff::new( - global_config.block_id, + block_id, zero_state_raw.cell.clone(), storage.shard_state_storage().min_ref_mc_state(), )?; diff --git a/util/src/byte_reader.rs b/util/src/byte_reader.rs deleted file mode 100644 index 5eb65aeda..000000000 --- a/util/src/byte_reader.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub trait ByteOrderRead { - fn read_u256(&mut self) -> std::io::Result<[u8; 32]>; -} - -impl ByteOrderRead for T { - fn read_u256(&mut self) -> std::io::Result<[u8; 32]> { - let mut buf = [0; 32]; - self.read_exact(&mut buf)?; - Ok(buf) - } -} diff --git a/util/src/lib.rs b/util/src/lib.rs index 1aa5a8d6e..40cbda55e 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use std::collections::HashSet; -pub mod byte_reader; pub mod progress_bar; pub mod serde_helpers; pub mod time;