From ca81ee6aaa05734ff902d90511bc3a9ae4846917 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:13:14 +0200 Subject: [PATCH 01/11] Use Cache_size setting instead of memory_multiplier --- nano/core_test/toml.cpp | 6 +++--- nano/lib/rocksdbconfig.cpp | 8 ++++---- nano/lib/rocksdbconfig.hpp | 2 +- nano/store/rocksdb/rocksdb.cpp | 25 ++++++++++++------------- nano/store/rocksdb/rocksdb.hpp | 3 +-- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/nano/core_test/toml.cpp b/nano/core_test/toml.cpp index ce861da1d8..61c2b7fe02 100644 --- a/nano/core_test/toml.cpp +++ b/nano/core_test/toml.cpp @@ -239,7 +239,7 @@ TEST (toml, daemon_config_deserialize_defaults) ASSERT_EQ (conf.node.lmdb_config.map_size, defaults.node.lmdb_config.map_size); ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable); - ASSERT_EQ (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier); + ASSERT_EQ (conf.node.rocksdb_config.cache_size, defaults.node.rocksdb_config.cache_size); ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); ASSERT_EQ (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); @@ -573,7 +573,7 @@ TEST (toml, daemon_config_deserialize_no_defaults) [node.rocksdb] enable = true - memory_multiplier = 3 + cache_size = 3 io_threads = 99 [node.experimental] @@ -743,7 +743,7 @@ TEST (toml, daemon_config_deserialize_no_defaults) ASSERT_TRUE (conf.node.rocksdb_config.enable); ASSERT_EQ (nano::rocksdb_config::using_rocksdb_in_tests (), defaults.node.rocksdb_config.enable); - ASSERT_NE (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier); + ASSERT_NE (conf.node.rocksdb_config.cache_size, defaults.node.rocksdb_config.cache_size); ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); ASSERT_NE (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); diff --git a/nano/lib/rocksdbconfig.cpp b/nano/lib/rocksdbconfig.cpp index a12605d1f7..4c9a232b1c 100644 --- a/nano/lib/rocksdbconfig.cpp +++ b/nano/lib/rocksdbconfig.cpp @@ -5,7 +5,7 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const { toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database.\ntype:bool"); - toml.put ("memory_multiplier", memory_multiplier, "This will modify how much memory is used represented by 1 (low), 2 (medium), 3 (high). Default is 2.\ntype:uint8"); + toml.put ("cache_size", cache_size, "Amount of memory in MB used for caching for each table. Valid values are from 1 to 1024. Default is 64.\ntype:uint8"); toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing.\ntype:uint32"); return toml.get_error (); } @@ -13,7 +13,7 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_optional ("enable", enable); - toml.get_optional ("memory_multiplier", memory_multiplier); + toml.get_optional ("cache_size", cache_size); toml.get_optional ("io_threads", io_threads); // Validate ranges @@ -21,9 +21,9 @@ nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_error ().set ("io_threads must be non-zero"); } - if (memory_multiplier < 1 || memory_multiplier > 3) + if (cache_size < 1 || cache_size > 1024) { - toml.get_error ().set ("memory_multiplier must be either 1, 2 or 3"); + toml.get_error ().set ("cache_size must be between 1 and 1024 MB"); } return toml.get_error (); diff --git a/nano/lib/rocksdbconfig.hpp b/nano/lib/rocksdbconfig.hpp index 232d320193..c2d178cfff 100644 --- a/nano/lib/rocksdbconfig.hpp +++ b/nano/lib/rocksdbconfig.hpp @@ -25,7 +25,7 @@ class rocksdb_config final static bool using_rocksdb_in_tests (); bool enable{ false }; - uint8_t memory_multiplier{ 2 }; + uint16_t cache_size{ 64 }; unsigned io_threads{ std::max (nano::hardware_concurrency () / 2, 1u) }; }; } diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 60f7e95cee..f4368f6ff5 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -436,16 +436,15 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st { ::rocksdb::ColumnFamilyOptions cf_options; auto const memtable_size_bytes = base_memtable_size_bytes (); - auto const block_cache_size_bytes = 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_block_cache_size; if (cf_name_a == "blocks") { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 4))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, blocks_memtable_size_bytes ()); } else if (cf_name_a == "confirmation_height") { // Entries will not be deleted in the normal case, so can make memtables a lot bigger - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes * 2); } else if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers") @@ -463,7 +462,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st else if (cf_name_a == "pending") { // Pending can have a lot of deletions too - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded @@ -475,34 +474,34 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st else if (cf_name_a == "frontiers") { // Frontiers is only needed during bootstrap for legacy blocks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "accounts") { // Can have deletions from rollbacks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "vote") { // No deletes it seems, only overwrites. - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "pruned") { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "final_votes") { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "rep_weights") { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) @@ -899,7 +898,7 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options () return db_options; } -rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_table_options (std::size_t lru_size) const +rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_table_options () const { ::rocksdb::BlockBasedTableOptions table_options; @@ -914,7 +913,7 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl table_options.index_block_restart_interval = 16; // Block cache for reads - table_options.block_cache = ::rocksdb::NewLRUCache (lru_size); + table_options.block_cache = ::rocksdb::NewLRUCache (1024ULL * 1024 * rocksdb_config.cache_size); // Bloom filter to help with point reads. 10bits gives 1% false positive rate. table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false)); @@ -1116,7 +1115,7 @@ unsigned long long nano::store::rocksdb::component::blocks_memtable_size_bytes ( unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const { - return 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_memtable_size; + return 1024ULL * 1024 * base_memtable_size; } // This is a ratio of the blocks memtable size to keep total write transaction commit size down. diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 5d8b22bba7..f27316c1da 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -158,7 +158,7 @@ class component : public nano::store::component ::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; ::rocksdb::ColumnFamilyOptions get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; ::rocksdb::ColumnFamilyOptions get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const; - ::rocksdb::BlockBasedTableOptions get_active_table_options (std::size_t lru_size) const; + ::rocksdb::BlockBasedTableOptions get_active_table_options () const; ::rocksdb::BlockBasedTableOptions get_small_table_options () const; ::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const; @@ -173,7 +173,6 @@ class component : public nano::store::component unsigned long long blocks_memtable_size_bytes () const; constexpr static int base_memtable_size = 16; - constexpr static int base_block_cache_size = 8; friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; From a470cd00fa3abdc86d2f8c7f5060310167ee06fd Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:29:47 +0200 Subject: [PATCH 02/11] Removed redundant function --- nano/store/rocksdb/rocksdb.cpp | 9 ++------- nano/store/rocksdb/rocksdb.hpp | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index f4368f6ff5..860fd124e8 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -64,7 +64,7 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy logger{ logger_a }, constants{ constants }, rocksdb_config{ rocksdb_config_a }, - max_block_write_batch_num_m{ nano::narrow_cast (blocks_memtable_size_bytes () / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, + max_block_write_batch_num_m{ nano::narrow_cast (base_memtable_size_bytes () / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, cf_name_table_map{ create_cf_name_table_map () } { boost::system::error_code error_mkdir, error_chmod; @@ -439,7 +439,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st if (cf_name_a == "blocks") { std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, blocks_memtable_size_bytes ()); + cf_options = get_active_cf_options (table_factory, base_memtable_size_bytes ()); } else if (cf_name_a == "confirmation_height") { @@ -1108,11 +1108,6 @@ void nano::store::rocksdb::component::serialize_memory_stats (boost::property_tr json.put ("block-cache-usage", val); } -unsigned long long nano::store::rocksdb::component::blocks_memtable_size_bytes () const -{ - return base_memtable_size_bytes (); -} - unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const { return 1024ULL * 1024 * base_memtable_size; diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index f27316c1da..3cf29364a1 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -170,7 +170,6 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); unsigned long long base_memtable_size_bytes () const; - unsigned long long blocks_memtable_size_bytes () const; constexpr static int base_memtable_size = 16; From a0d87da0c4cc04974d6ddf843f9a60bb9b06fb0b Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:41:33 +0200 Subject: [PATCH 03/11] Simplify table setup --- nano/store/rocksdb/rocksdb.cpp | 43 +++------------------------------- nano/store/rocksdb/rocksdb.hpp | 2 +- 2 files changed, 4 insertions(+), 41 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 860fd124e8..61a7e2b4ac 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -436,18 +436,8 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st { ::rocksdb::ColumnFamilyOptions cf_options; auto const memtable_size_bytes = base_memtable_size_bytes (); - if (cf_name_a == "blocks") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, base_memtable_size_bytes ()); - } - else if (cf_name_a == "confirmation_height") - { - // Entries will not be deleted in the normal case, so can make memtables a lot bigger - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes * 2); - } - else if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers") + + if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers") { // Meta - It contains just version key // Online weight - Periodically deleted @@ -471,39 +461,12 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st // L1 size, compaction is triggered for L0 at this size (2 SST files in L1) cf_options.max_bytes_for_level_base = memtable_size_bytes * 2; } - else if (cf_name_a == "frontiers") + else if (cf_name_a == "blocks" || cf_name_a == "frontiers" || cf_name_a == "accounts" || cf_name_a == "vote" || cf_name_a == "pruned" || cf_name_a == "final_votes" || cf_name_a == "rep_weights" || cf_name_a == "confirmation_height") { // Frontiers is only needed during bootstrap for legacy blocks std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } - else if (cf_name_a == "accounts") - { - // Can have deletions from rollbacks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "vote") - { - // No deletes it seems, only overwrites. - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "pruned") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "final_votes") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "rep_weights") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } else if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) { // Do nothing. diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 3cf29364a1..851feb491a 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -171,7 +171,7 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); unsigned long long base_memtable_size_bytes () const; - constexpr static int base_memtable_size = 16; + constexpr static int base_memtable_size = 32; friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; From eebd3fecc4818a80f76a5ee9261b148978159e52 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Thu, 22 Aug 2024 22:15:16 +0200 Subject: [PATCH 04/11] Use default values --- nano/store/rocksdb/rocksdb.cpp | 71 +--------------------------------- 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 61a7e2b4ac..d0c917c119 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -405,30 +405,6 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti ::rocksdb::ColumnFamilyOptions cf_options; cf_options.table_factory = table_factory_a; - // (1 active, 1 inactive) - auto num_memtables = 2; - - // Each level is a multiple of the above. If L1 is 512MB. L2 will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on... - cf_options.max_bytes_for_level_multiplier = 8; - - // Although this should be the default provided by RocksDB, not setting this is causing sequence conflict checks if not using - cf_options.max_write_buffer_size_to_maintain = memtable_size_bytes_a * num_memtables; - - // Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however. - cf_options.ttl = 1 * 24 * 60 * 60; - - // Multiplier for each level - cf_options.target_file_size_multiplier = 10; - - // Size of level 1 sst files - cf_options.target_file_size_base = memtable_size_bytes_a; - - // Size of each memtable - cf_options.write_buffer_size = memtable_size_bytes_a; - - // Number of memtables to keep in memory - cf_options.max_write_buffer_number = num_memtables; - return cf_options; } @@ -825,31 +801,10 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options () db_options.create_if_missing = true; db_options.create_missing_column_families = true; - // TODO: review if this should be changed due to the unchecked table removal. - // Enable whole key bloom filter in memtables for ones with memtable_prefix_bloom_size_ratio set (unchecked table currently). - // It can potentially reduce CPU usage for point-look-ups. - db_options.memtable_whole_key_filtering = true; - - // Sets the compaction priority - db_options.compaction_pri = ::rocksdb::CompactionPri::kMinOverlappingRatio; - - // Start aggressively flushing WAL files when they reach over 1GB - db_options.max_total_wal_size = 1 * 1024 * 1024 * 1024LL; - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well db_options.IncreaseParallelism (rocksdb_config.io_threads); db_options.OptimizeLevelStyleCompaction (); - // Adds a separate write queue for memtable/WAL - db_options.enable_pipelined_write = true; - - // Default is 16, setting to -1 allows faster startup times for SSDs by allowings more files to be read in parallel. - db_options.max_file_opening_threads = -1; - - // The MANIFEST file contains a history of all file operations since the last time the DB was opened and is replayed during DB open. - // Default is 1GB, lowering this to avoid replaying for too long (100MB) - db_options.max_manifest_file_size = 100 * 1024 * 1024ULL; - // Not compressing any SST files for compatibility reasons. db_options.compression = ::rocksdb::kNoCompression; @@ -867,26 +822,15 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl // Improve point lookup performance be using the data block hash index (uses about 5% more space). table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - table_options.data_block_hash_table_util_ratio = 0.75; // Using format_version=4 significantly reduces the index block size, in some cases around 4-5x. // This frees more space in block cache, which would result in higher hit rate for data and filter blocks, // or offer the same performance with a smaller block cache size. table_options.format_version = 4; - table_options.index_block_restart_interval = 16; // Block cache for reads table_options.block_cache = ::rocksdb::NewLRUCache (1024ULL * 1024 * rocksdb_config.cache_size); - // Bloom filter to help with point reads. 10bits gives 1% false positive rate. - table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false)); - - // Increasing block_size decreases memory usage and space amplification, but increases read amplification. - table_options.block_size = 16 * 1024ULL; - - // Whether level 0 index and filter blocks are stored in block_cache - table_options.pin_l0_filter_and_index_blocks_in_cache = true; - return table_options; } @@ -895,8 +839,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table ::rocksdb::BlockBasedTableOptions table_options; // Improve point lookup performance be using the data block hash index (uses about 5% more space). table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - table_options.data_block_hash_table_util_ratio = 0.75; - table_options.block_size = 1024ULL; return table_options; } @@ -916,18 +858,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_small_cf_optio ::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const { - auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes_a); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 4; - - // L1 size, compaction is triggered for L0 at this size (4 SST files in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes_a * 4; - - // Size target of levels are changed dynamically based on size of the last level - cf_options.level_compaction_dynamic_level_bytes = true; - - return cf_options; + return get_common_cf_options (table_factory_a, memtable_size_bytes_a); } void nano::store::rocksdb::component::on_flush (::rocksdb::FlushJobInfo const & flush_job_info_a) From 0e1010c8f33d7b32f69e097d4ba6e0560764b169 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 23 Aug 2024 17:21:20 +0200 Subject: [PATCH 05/11] Do not read ahead --- nano/store/rocksdb/rocksdb.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index d0c917c119..4fe547f19d 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -774,6 +774,8 @@ int nano::store::rocksdb::component::clear (::rocksdb::ColumnFamilyHandle * colu ::rocksdb::ReadOptions read_options; ::rocksdb::WriteOptions write_options; ::rocksdb::WriteBatch write_batch; + read_options.readahead_size = 0; + std::unique_ptr<::rocksdb::Iterator> it (db->NewIterator (read_options, column_family)); for (it->SeekToFirst (); it->Valid (); it->Next ()) @@ -800,11 +802,10 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options () ::rocksdb::Options db_options; db_options.create_if_missing = true; db_options.create_missing_column_families = true; - - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well + // Set number of threads to use db_options.IncreaseParallelism (rocksdb_config.io_threads); + // Optimize RocksDB. This is the easiest way to get RocksDB to perform well db_options.OptimizeLevelStyleCompaction (); - // Not compressing any SST files for compatibility reasons. db_options.compression = ::rocksdb::kNoCompression; From de9aeff2c8fa9e1ab7e955b5d0406b8cbe6d0d22 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Sat, 24 Aug 2024 09:21:35 +0200 Subject: [PATCH 06/11] Use the same table options for all tables --- nano/store/rocksdb/rocksdb.cpp | 44 ++++------------------------------ nano/store/rocksdb/rocksdb.hpp | 2 +- 2 files changed, 6 insertions(+), 40 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 4fe547f19d..a43ff3414e 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -411,48 +411,14 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const { ::rocksdb::ColumnFamilyOptions cf_options; - auto const memtable_size_bytes = base_memtable_size_bytes (); - - if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers") - { - // Meta - It contains just version key - // Online weight - Periodically deleted - // Peers - Cleaned periodically, a lot of deletions. This is never read outside of initializing? Keep this small - cf_options = get_small_cf_options (small_table_factory); - } - else if (cf_name_a == "cached_counts") - { - // Really small (keys are blocks tables, value is uint64_t) - cf_options = get_small_cf_options (small_table_factory); - } - else if (cf_name_a == "pending") - { - // Pending can have a lot of deletions too - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 2; - - // L1 size, compaction is triggered for L0 at this size (2 SST files in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes * 2; - } - else if (cf_name_a == "blocks" || cf_name_a == "frontiers" || cf_name_a == "accounts" || cf_name_a == "vote" || cf_name_a == "pruned" || cf_name_a == "final_votes" || cf_name_a == "rep_weights" || cf_name_a == "confirmation_height") - { - // Frontiers is only needed during bootstrap for legacy blocks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) + if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) { - // Do nothing. - } - else - { - debug_assert (false); + return cf_options; } - return cf_options; + auto const memtable_size_bytes = base_memtable_size_bytes (); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); + return get_active_cf_options (table_factory, memtable_size_bytes); } std::vector nano::store::rocksdb::component::create_column_families () diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 851feb491a..5f3c20ebce 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -171,7 +171,7 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); unsigned long long base_memtable_size_bytes () const; - constexpr static int base_memtable_size = 32; + constexpr static int base_memtable_size = 8; friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; From e57424cf172c3e3d1db5bf0e51ccead59fb69d04 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Sat, 24 Aug 2024 09:36:48 +0200 Subject: [PATCH 07/11] Remove unused functions --- nano/store/rocksdb/rocksdb.cpp | 33 ++------------------------------- nano/store/rocksdb/rocksdb.hpp | 5 +---- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index a43ff3414e..3ced05cbf1 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -80,7 +80,6 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy debug_assert (path_a.filename () == "rocksdb"); generate_tombstone_map (); - small_table_factory.reset (::rocksdb::NewBlockBasedTableFactory (get_small_table_options ())); // TODO: get_db_options () registers a listener for resetting tombstones, needs to check if it is a problem calling it more than once. auto options = get_db_options (); @@ -400,7 +399,7 @@ void nano::store::rocksdb::component::generate_tombstone_map () tombstone_map.emplace (std::piecewise_construct, std::forward_as_tuple (nano::tables::pending), std::forward_as_tuple (0, 25000)); } -rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const +rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const { ::rocksdb::ColumnFamilyOptions cf_options; cf_options.table_factory = table_factory_a; @@ -416,9 +415,8 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st return cf_options; } - auto const memtable_size_bytes = base_memtable_size_bytes (); std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - return get_active_cf_options (table_factory, memtable_size_bytes); + return get_common_cf_options (table_factory); } std::vector nano::store::rocksdb::component::create_column_families () @@ -801,33 +799,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl return table_options; } -rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table_options () const -{ - ::rocksdb::BlockBasedTableOptions table_options; - // Improve point lookup performance be using the data block hash index (uses about 5% more space). - table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - return table_options; -} - -rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const -{ - auto const memtable_size_bytes = 10000; - auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 1; - - // L1 size, compaction is triggered for L0 at this size (1 SST file in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes; - - return cf_options; -} - -::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const -{ - return get_common_cf_options (table_factory_a, memtable_size_bytes_a); -} - void nano::store::rocksdb::component::on_flush (::rocksdb::FlushJobInfo const & flush_job_info_a) { // Reset appropriate tombstone counters diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 5f3c20ebce..2210561c1b 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -155,11 +155,8 @@ class component : public nano::store::component void construct_column_family_mutexes (); ::rocksdb::Options get_db_options (); - ::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; - ::rocksdb::ColumnFamilyOptions get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; - ::rocksdb::ColumnFamilyOptions get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const; + ::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const; ::rocksdb::BlockBasedTableOptions get_active_table_options () const; - ::rocksdb::BlockBasedTableOptions get_small_table_options () const; ::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const; void on_flush (::rocksdb::FlushJobInfo const &); From d0c957dceb108883a55b919109c725f3c6448037 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Sat, 24 Aug 2024 12:45:31 +0200 Subject: [PATCH 08/11] Handle memtable size and max writes per batch --- nano/store/rocksdb/rocksdb.cpp | 17 ++++++++++------- nano/store/rocksdb/rocksdb.hpp | 5 +++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 3ced05cbf1..6690494ccb 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -64,7 +64,7 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy logger{ logger_a }, constants{ constants }, rocksdb_config{ rocksdb_config_a }, - max_block_write_batch_num_m{ nano::narrow_cast (base_memtable_size_bytes () / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, + max_block_write_batch_num_m{ calculate_max_block_write_batch_num () }, cf_name_table_map{ create_cf_name_table_map () } { boost::system::error_code error_mkdir, error_chmod; @@ -403,6 +403,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti { ::rocksdb::ColumnFamilyOptions cf_options; cf_options.table_factory = table_factory_a; + cf_options.write_buffer_size = memtable_size_bytes; return cf_options; } @@ -738,7 +739,7 @@ int nano::store::rocksdb::component::clear (::rocksdb::ColumnFamilyHandle * colu ::rocksdb::ReadOptions read_options; ::rocksdb::WriteOptions write_options; ::rocksdb::WriteBatch write_batch; - read_options.readahead_size = 0; + read_options.readahead_size = 0; // Readahead only adds overhead on SSD drives std::unique_ptr<::rocksdb::Iterator> it (db->NewIterator (read_options, column_family)); @@ -940,17 +941,19 @@ void nano::store::rocksdb::component::serialize_memory_stats (boost::property_tr json.put ("block-cache-usage", val); } -unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const -{ - return 1024ULL * 1024 * base_memtable_size; -} - // This is a ratio of the blocks memtable size to keep total write transaction commit size down. unsigned nano::store::rocksdb::component::max_block_write_batch_num () const { return max_block_write_batch_num_m; } +unsigned nano::store::rocksdb::component::calculate_max_block_write_batch_num () const +{ + // Calculates the max write batch size from the memtable_size (write buffer) and the size of a block. + // With a memtable_size of 32 MB we will get 125672 as max block write batch + return nano::narrow_cast (memtable_size_bytes / (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state))); +} + std::string nano::store::rocksdb::component::error_string (int status) const { return std::to_string (status); diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 2210561c1b..286cb3dd21 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -112,6 +112,7 @@ class component : public nano::store::component std::unordered_map write_lock_mutexes; nano::rocksdb_config rocksdb_config; unsigned const max_block_write_batch_num_m; + unsigned calculate_max_block_write_batch_num () const; class tombstone_info { @@ -163,12 +164,12 @@ class component : public nano::store::component void flush_table (nano::tables table_a); void flush_tombstones_check (nano::tables table_a); void generate_tombstone_map (); + std::unordered_map create_cf_name_table_map () const; std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); - unsigned long long base_memtable_size_bytes () const; - constexpr static int base_memtable_size = 8; + constexpr static int memtable_size_bytes = 1024ULL * 1024 * 32; // 32 MB write buffer friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; From 7ade21f81b25a8a78f2fe850870686c11270ce0b Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 07:24:01 +0200 Subject: [PATCH 09/11] Move logic from get_common_cf_options --- nano/store/rocksdb/rocksdb.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 6690494ccb..39730dea84 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -399,25 +399,16 @@ void nano::store::rocksdb::component::generate_tombstone_map () tombstone_map.emplace (std::piecewise_construct, std::forward_as_tuple (nano::tables::pending), std::forward_as_tuple (0, 25000)); } -rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const -{ - ::rocksdb::ColumnFamilyOptions cf_options; - cf_options.table_factory = table_factory_a; - cf_options.write_buffer_size = memtable_size_bytes; - - return cf_options; -} - rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const { ::rocksdb::ColumnFamilyOptions cf_options; - if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) + if (cf_name_a != ::rocksdb::kDefaultColumnFamilyName) { - return cf_options; + cf_options.table_factory = std::shared_ptr<::rocksdb::TableFactory> (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); + cf_options.write_buffer_size = memtable_size_bytes; } - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ())); - return get_common_cf_options (table_factory); + return cf_options; } std::vector nano::store::rocksdb::component::create_column_families () From be469ae2bc5d6f1220ab4732e99b31fa03798fda Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 07:29:47 +0200 Subject: [PATCH 10/11] Clean up --- nano/store/rocksdb/rocksdb.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 286cb3dd21..e5cf8d88e9 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -108,7 +108,6 @@ class component : public nano::store::component ::rocksdb::TransactionDB * transaction_db = nullptr; std::unique_ptr<::rocksdb::DB> db; std::vector> handles; - std::shared_ptr<::rocksdb::TableFactory> small_table_factory; std::unordered_map write_lock_mutexes; nano::rocksdb_config rocksdb_config; unsigned const max_block_write_batch_num_m; From ecfbf78d1c1e3922ce26cb5853346acaebbfa7fe Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 07:36:11 +0200 Subject: [PATCH 11/11] 64mb write cache --- nano/store/rocksdb/rocksdb.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index e5cf8d88e9..ea91e8e1b8 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -168,7 +168,7 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); - constexpr static int memtable_size_bytes = 1024ULL * 1024 * 32; // 32 MB write buffer + constexpr static int memtable_size_bytes = 1024ULL * 1024 * 64; // 64 MB write buffer friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test;