From 496f1a23fa61332ab5166b7ce16373d102ee7e6b Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:38:36 +0200 Subject: [PATCH 1/9] Remove memory_multiplier --- nano/core_test/toml.cpp | 3 --- nano/lib/rocksdbconfig.cpp | 7 ------- nano/lib/rocksdbconfig.hpp | 1 - nano/store/rocksdb/rocksdb.cpp | 4 ++-- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/nano/core_test/toml.cpp b/nano/core_test/toml.cpp index ce861da1d8..44636cdf8c 100644 --- a/nano/core_test/toml.cpp +++ b/nano/core_test/toml.cpp @@ -239,7 +239,6 @@ TEST (toml, daemon_config_deserialize_defaults) ASSERT_EQ (conf.node.lmdb_config.map_size, defaults.node.lmdb_config.map_size); ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable); - ASSERT_EQ (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier); ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); ASSERT_EQ (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); @@ -573,7 +572,6 @@ TEST (toml, daemon_config_deserialize_no_defaults) [node.rocksdb] enable = true - memory_multiplier = 3 io_threads = 99 [node.experimental] @@ -743,7 +741,6 @@ TEST (toml, daemon_config_deserialize_no_defaults) ASSERT_TRUE (conf.node.rocksdb_config.enable); ASSERT_EQ (nano::rocksdb_config::using_rocksdb_in_tests (), defaults.node.rocksdb_config.enable); - ASSERT_NE (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier); ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); ASSERT_NE (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); diff --git a/nano/lib/rocksdbconfig.cpp b/nano/lib/rocksdbconfig.cpp index a12605d1f7..11d318c44f 100644 --- a/nano/lib/rocksdbconfig.cpp +++ b/nano/lib/rocksdbconfig.cpp @@ -5,7 +5,6 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const { toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database.\ntype:bool"); - toml.put ("memory_multiplier", memory_multiplier, "This will modify how much memory is used represented by 1 (low), 2 (medium), 3 (high). Default is 2.\ntype:uint8"); toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing.\ntype:uint32"); return toml.get_error (); } @@ -13,7 +12,6 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_optional ("enable", enable); - toml.get_optional ("memory_multiplier", memory_multiplier); toml.get_optional ("io_threads", io_threads); // Validate ranges @@ -21,11 +19,6 @@ nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_error ().set ("io_threads must be non-zero"); } - if (memory_multiplier < 1 || memory_multiplier > 3) - { - toml.get_error ().set ("memory_multiplier must be either 1, 2 or 3"); - } - return toml.get_error (); } diff --git a/nano/lib/rocksdbconfig.hpp b/nano/lib/rocksdbconfig.hpp index 232d320193..f9ca0ec3f6 100644 --- a/nano/lib/rocksdbconfig.hpp +++ b/nano/lib/rocksdbconfig.hpp @@ -25,7 +25,6 @@ class rocksdb_config final static bool using_rocksdb_in_tests (); bool enable{ false }; - uint8_t memory_multiplier{ 2 }; unsigned io_threads{ std::max (nano::hardware_concurrency () / 2, 1u) }; }; } diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 60f7e95cee..b065eed1b1 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -436,7 +436,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st { ::rocksdb::ColumnFamilyOptions cf_options; auto const memtable_size_bytes = base_memtable_size_bytes (); - auto const block_cache_size_bytes = 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_block_cache_size; + auto const block_cache_size_bytes = 1024ULL * 1024 * base_block_cache_size; if (cf_name_a == "blocks") { std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 4))); @@ -1116,7 +1116,7 @@ unsigned long long nano::store::rocksdb::component::blocks_memtable_size_bytes ( unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const { - return 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_memtable_size; + return 1024ULL * 1024 * base_memtable_size; } // This is a ratio of the blocks memtable size to keep total write transaction commit size down. From 61ac6cbbfecafb74cece960212617ff86fcd599d Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:44:23 +0200 Subject: [PATCH 2/9] Read mem_table_size directly --- nano/store/rocksdb/rocksdb.cpp | 15 ++------------- nano/store/rocksdb/rocksdb.hpp | 4 +--- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index b065eed1b1..82e8100474 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -64,7 +64,7 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy logger{ logger_a }, constants{ constants }, rocksdb_config{ rocksdb_config_a }, - max_block_write_batch_num_m{ nano::narrow_cast (blocks_memtable_size_bytes () / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, + max_block_write_batch_num_m{ nano::narrow_cast (memtable_size_bytes / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, cf_name_table_map{ create_cf_name_table_map () } { boost::system::error_code error_mkdir, error_chmod; @@ -435,12 +435,11 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const { ::rocksdb::ColumnFamilyOptions cf_options; - auto const memtable_size_bytes = base_memtable_size_bytes (); auto const block_cache_size_bytes = 1024ULL * 1024 * base_block_cache_size; if (cf_name_a == "blocks") { std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 4))); - cf_options = get_active_cf_options (table_factory, blocks_memtable_size_bytes ()); + cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } else if (cf_name_a == "confirmation_height") { @@ -1109,16 +1108,6 @@ void nano::store::rocksdb::component::serialize_memory_stats (boost::property_tr json.put ("block-cache-usage", val); } -unsigned long long nano::store::rocksdb::component::blocks_memtable_size_bytes () const -{ - return base_memtable_size_bytes (); -} - -unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const -{ - return 1024ULL * 1024 * base_memtable_size; -} - // This is a ratio of the blocks memtable size to keep total write transaction commit size down. unsigned nano::store::rocksdb::component::max_block_write_batch_num () const { diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 5d8b22bba7..dc2fa9e69d 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -169,10 +169,8 @@ class component : public nano::store::component std::unordered_map create_cf_name_table_map () const; std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); - unsigned long long base_memtable_size_bytes () const; - unsigned long long blocks_memtable_size_bytes () const; - constexpr static int base_memtable_size = 16; + constexpr static long memtable_size_bytes = 16 * 1024 * 1024; constexpr static int base_block_cache_size = 8; friend class nano::rocksdb_block_store_tombstone_count_Test; From 2d1fcdc3fb7916974f546216e57efc70ff58cc6a Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:07:45 +0200 Subject: [PATCH 3/9] Use the same options for all column families --- nano/store/rocksdb/rocksdb.cpp | 91 +--------------------------------- nano/store/rocksdb/rocksdb.hpp | 3 +- 2 files changed, 3 insertions(+), 91 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 82e8100474..a7f19190d1 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -435,84 +435,11 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const { ::rocksdb::ColumnFamilyOptions cf_options; - auto const block_cache_size_bytes = 1024ULL * 1024 * base_block_cache_size; - if (cf_name_a == "blocks") + if (cf_name_a != ::rocksdb::kDefaultColumnFamilyName) { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 4))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (read_cache_size_bytes))); cf_options = get_active_cf_options (table_factory, memtable_size_bytes); } - else if (cf_name_a == "confirmation_height") - { - // Entries will not be deleted in the normal case, so can make memtables a lot bigger - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes * 2); - } - else if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers") - { - // Meta - It contains just version key - // Online weight - Periodically deleted - // Peers - Cleaned periodically, a lot of deletions. This is never read outside of initializing? Keep this small - cf_options = get_small_cf_options (small_table_factory); - } - else if (cf_name_a == "cached_counts") - { - // Really small (keys are blocks tables, value is uint64_t) - cf_options = get_small_cf_options (small_table_factory); - } - else if (cf_name_a == "pending") - { - // Pending can have a lot of deletions too - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 2; - - // L1 size, compaction is triggered for L0 at this size (2 SST files in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes * 2; - } - else if (cf_name_a == "frontiers") - { - // Frontiers is only needed during bootstrap for legacy blocks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "accounts") - { - // Can have deletions from rollbacks - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "vote") - { - // No deletes it seems, only overwrites. - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "pruned") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "final_votes") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == "rep_weights") - { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); - } - else if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName) - { - // Do nothing. - } - else - { - debug_assert (false); - } - return cf_options; } @@ -937,20 +864,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table return table_options; } -rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const -{ - auto const memtable_size_bytes = 10000; - auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 1; - - // L1 size, compaction is triggered for L0 at this size (1 SST file in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes; - - return cf_options; -} - ::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const { auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes_a); diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index dc2fa9e69d..c2e55a6017 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -157,7 +157,6 @@ class component : public nano::store::component ::rocksdb::Options get_db_options (); ::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; ::rocksdb::ColumnFamilyOptions get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; - ::rocksdb::ColumnFamilyOptions get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const; ::rocksdb::BlockBasedTableOptions get_active_table_options (std::size_t lru_size) const; ::rocksdb::BlockBasedTableOptions get_small_table_options () const; ::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const; @@ -171,7 +170,7 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); constexpr static long memtable_size_bytes = 16 * 1024 * 1024; - constexpr static int base_block_cache_size = 8; + constexpr static long read_cache_size_bytes = 8 * 1024 * 1024; friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; From 3595953b4d8b01e1e904e819e7f44c6c45f319de Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:43:18 +0200 Subject: [PATCH 4/9] Use RocksDb default options --- nano/store/rocksdb/rocksdb.cpp | 66 ++-------------------------------- 1 file changed, 2 insertions(+), 64 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index a7f19190d1..a8f7cfba9a 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -404,31 +404,6 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_opti { ::rocksdb::ColumnFamilyOptions cf_options; cf_options.table_factory = table_factory_a; - - // (1 active, 1 inactive) - auto num_memtables = 2; - - // Each level is a multiple of the above. If L1 is 512MB. L2 will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on... - cf_options.max_bytes_for_level_multiplier = 8; - - // Although this should be the default provided by RocksDB, not setting this is causing sequence conflict checks if not using - cf_options.max_write_buffer_size_to_maintain = memtable_size_bytes_a * num_memtables; - - // Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however. - cf_options.ttl = 1 * 24 * 60 * 60; - - // Multiplier for each level - cf_options.target_file_size_multiplier = 10; - - // Size of level 1 sst files - cf_options.target_file_size_base = memtable_size_bytes_a; - - // Size of each memtable - cf_options.write_buffer_size = memtable_size_bytes_a; - - // Number of memtables to keep in memory - cf_options.max_write_buffer_number = num_memtables; - return cf_options; } @@ -789,30 +764,11 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options () db_options.create_if_missing = true; db_options.create_missing_column_families = true; - // TODO: review if this should be changed due to the unchecked table removal. - // Enable whole key bloom filter in memtables for ones with memtable_prefix_bloom_size_ratio set (unchecked table currently). - // It can potentially reduce CPU usage for point-look-ups. - db_options.memtable_whole_key_filtering = true; - - // Sets the compaction priority - db_options.compaction_pri = ::rocksdb::CompactionPri::kMinOverlappingRatio; - - // Start aggressively flushing WAL files when they reach over 1GB - db_options.max_total_wal_size = 1 * 1024 * 1024 * 1024LL; - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well - db_options.IncreaseParallelism (rocksdb_config.io_threads); db_options.OptimizeLevelStyleCompaction (); - // Adds a separate write queue for memtable/WAL - db_options.enable_pipelined_write = true; - - // Default is 16, setting to -1 allows faster startup times for SSDs by allowings more files to be read in parallel. - db_options.max_file_opening_threads = -1; - - // The MANIFEST file contains a history of all file operations since the last time the DB was opened and is replayed during DB open. - // Default is 1GB, lowering this to avoid replaying for too long (100MB) - db_options.max_manifest_file_size = 100 * 1024 * 1024ULL; + // Set max number of threads + db_options.IncreaseParallelism (rocksdb_config.io_threads); // Not compressing any SST files for compatibility reasons. db_options.compression = ::rocksdb::kNoCompression; @@ -831,13 +787,11 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl // Improve point lookup performance be using the data block hash index (uses about 5% more space). table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - table_options.data_block_hash_table_util_ratio = 0.75; // Using format_version=4 significantly reduces the index block size, in some cases around 4-5x. // This frees more space in block cache, which would result in higher hit rate for data and filter blocks, // or offer the same performance with a smaller block cache size. table_options.format_version = 4; - table_options.index_block_restart_interval = 16; // Block cache for reads table_options.block_cache = ::rocksdb::NewLRUCache (lru_size); @@ -845,12 +799,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl // Bloom filter to help with point reads. 10bits gives 1% false positive rate. table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false)); - // Increasing block_size decreases memory usage and space amplification, but increases read amplification. - table_options.block_size = 16 * 1024ULL; - - // Whether level 0 index and filter blocks are stored in block_cache - table_options.pin_l0_filter_and_index_blocks_in_cache = true; - return table_options; } @@ -867,16 +815,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table ::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const { auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes_a); - - // Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 4; - - // L1 size, compaction is triggered for L0 at this size (4 SST files in L1) - cf_options.max_bytes_for_level_base = memtable_size_bytes_a * 4; - - // Size target of levels are changed dynamically based on size of the last level - cf_options.level_compaction_dynamic_level_bytes = true; - return cf_options; } From 757a9fa6195e5ffed1d18acf430ff252933a7ef4 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:36:07 +0200 Subject: [PATCH 5/9] Removed redundant cf option functions --- nano/store/rocksdb/rocksdb.cpp | 15 +-------------- nano/store/rocksdb/rocksdb.hpp | 2 -- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index a8f7cfba9a..e59325c2d9 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -400,20 +400,13 @@ void nano::store::rocksdb::component::generate_tombstone_map () tombstone_map.emplace (std::piecewise_construct, std::forward_as_tuple (nano::tables::pending), std::forward_as_tuple (0, 25000)); } -rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const -{ - ::rocksdb::ColumnFamilyOptions cf_options; - cf_options.table_factory = table_factory_a; - return cf_options; -} - rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const { ::rocksdb::ColumnFamilyOptions cf_options; if (cf_name_a != ::rocksdb::kDefaultColumnFamilyName) { std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (read_cache_size_bytes))); - cf_options = get_active_cf_options (table_factory, memtable_size_bytes); + cf_options.table_factory = table_factory; } return cf_options; } @@ -812,12 +805,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table return table_options; } -::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const -{ - auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes_a); - return cf_options; -} - void nano::store::rocksdb::component::on_flush (::rocksdb::FlushJobInfo const & flush_job_info_a) { // Reset appropriate tombstone counters diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index c2e55a6017..147823e73a 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -155,8 +155,6 @@ class component : public nano::store::component void construct_column_family_mutexes (); ::rocksdb::Options get_db_options (); - ::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; - ::rocksdb::ColumnFamilyOptions get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const; ::rocksdb::BlockBasedTableOptions get_active_table_options (std::size_t lru_size) const; ::rocksdb::BlockBasedTableOptions get_small_table_options () const; ::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const; From 4d4a7cc0a7ddb258b223345ddc24c6af28f70967 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:44:40 +0200 Subject: [PATCH 6/9] Consolidate table option functions --- nano/store/rocksdb/rocksdb.cpp | 17 +++-------------- nano/store/rocksdb/rocksdb.hpp | 4 +--- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index e59325c2d9..440be46d2c 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -80,7 +80,6 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy debug_assert (path_a.filename () == "rocksdb"); generate_tombstone_map (); - small_table_factory.reset (::rocksdb::NewBlockBasedTableFactory (get_small_table_options ())); // TODO: get_db_options () registers a listener for resetting tombstones, needs to check if it is a problem calling it more than once. auto options = get_db_options (); @@ -405,7 +404,7 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st ::rocksdb::ColumnFamilyOptions cf_options; if (cf_name_a != ::rocksdb::kDefaultColumnFamilyName) { - std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (read_cache_size_bytes))); + std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_table_options ())); cf_options.table_factory = table_factory; } return cf_options; @@ -774,7 +773,7 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options () return db_options; } -rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_table_options (std::size_t lru_size) const +rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_table_options () const { ::rocksdb::BlockBasedTableOptions table_options; @@ -787,7 +786,7 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl table_options.format_version = 4; // Block cache for reads - table_options.block_cache = ::rocksdb::NewLRUCache (lru_size); + table_options.block_cache = ::rocksdb::NewLRUCache (read_cache_size_bytes); // Bloom filter to help with point reads. 10bits gives 1% false positive rate. table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false)); @@ -795,16 +794,6 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_tabl return table_options; } -rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table_options () const -{ - ::rocksdb::BlockBasedTableOptions table_options; - // Improve point lookup performance be using the data block hash index (uses about 5% more space). - table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - table_options.data_block_hash_table_util_ratio = 0.75; - table_options.block_size = 1024ULL; - return table_options; -} - void nano::store::rocksdb::component::on_flush (::rocksdb::FlushJobInfo const & flush_job_info_a) { // Reset appropriate tombstone counters diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index 147823e73a..e8d3c818b2 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -108,7 +108,6 @@ class component : public nano::store::component ::rocksdb::TransactionDB * transaction_db = nullptr; std::unique_ptr<::rocksdb::DB> db; std::vector> handles; - std::shared_ptr<::rocksdb::TableFactory> small_table_factory; std::unordered_map write_lock_mutexes; nano::rocksdb_config rocksdb_config; unsigned const max_block_write_batch_num_m; @@ -155,8 +154,7 @@ class component : public nano::store::component void construct_column_family_mutexes (); ::rocksdb::Options get_db_options (); - ::rocksdb::BlockBasedTableOptions get_active_table_options (std::size_t lru_size) const; - ::rocksdb::BlockBasedTableOptions get_small_table_options () const; + ::rocksdb::BlockBasedTableOptions get_table_options () const; ::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const; void on_flush (::rocksdb::FlushJobInfo const &); From 5d3320ea89fca6cb818eabec9f0dd745f242249c Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 19:19:09 +0200 Subject: [PATCH 7/9] Configurable cache settings --- nano/lib/rocksdbconfig.cpp | 16 ++++++++++++++++ nano/lib/rocksdbconfig.hpp | 2 ++ nano/store/rocksdb/rocksdb.cpp | 6 ++++-- nano/store/rocksdb/rocksdb.hpp | 3 --- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/nano/lib/rocksdbconfig.cpp b/nano/lib/rocksdbconfig.cpp index 11d318c44f..100af267fd 100644 --- a/nano/lib/rocksdbconfig.cpp +++ b/nano/lib/rocksdbconfig.cpp @@ -6,6 +6,9 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const { toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database.\ntype:bool"); toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing.\ntype:uint32"); + toml.put ("read_cache", read_cache, "Amount of megabytes per table allocated to read cache. Valid range is 1 - 1024. Default is 32.\nCarefully monitor memory usage if non-default values are used\ntype:long"); + toml.put ("write_cache", write_cache, "Total amount of megabytes allocated to write cache. Valid range is 1 - 256. Default is 64.\nCarefully monitor memory usage if non-default values are used\ntype:long"); + return toml.get_error (); } @@ -13,12 +16,25 @@ nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_optional ("enable", enable); toml.get_optional ("io_threads", io_threads); + toml.get_optional ("read_cache", read_cache); + toml.get_optional ("write_cache", write_cache); // Validate ranges if (io_threads == 0) { toml.get_error ().set ("io_threads must be non-zero"); } + + if (read_cache < 1 || read_cache > 1024) + { + toml.get_error ().set ("read_cache must be between 1 and 1024 MB"); + } + + if (write_cache < 1 || write_cache > 256) + { + toml.get_error ().set ("write_cache must be between 1 and 256 MB"); + } + return toml.get_error (); } diff --git a/nano/lib/rocksdbconfig.hpp b/nano/lib/rocksdbconfig.hpp index f9ca0ec3f6..fabc1fb0a1 100644 --- a/nano/lib/rocksdbconfig.hpp +++ b/nano/lib/rocksdbconfig.hpp @@ -26,5 +26,7 @@ class rocksdb_config final bool enable{ false }; unsigned io_threads{ std::max (nano::hardware_concurrency () / 2, 1u) }; + long read_cache{ 32 }; + long write_cache{ 64 }; }; } diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 440be46d2c..19c6cb90db 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -64,7 +64,7 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy logger{ logger_a }, constants{ constants }, rocksdb_config{ rocksdb_config_a }, - max_block_write_batch_num_m{ nano::narrow_cast (memtable_size_bytes / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, + max_block_write_batch_num_m{ nano::narrow_cast ((rocksdb_config_a.write_cache * 1024 * 1024) / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) }, cf_name_table_map{ create_cf_name_table_map () } { boost::system::error_code error_mkdir, error_chmod; @@ -406,6 +406,8 @@ rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (st { std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_table_options ())); cf_options.table_factory = table_factory; + // Size of each memtable (write buffer for this column family) + cf_options.write_buffer_size = rocksdb_config.write_cache * 1024 * 1024; } return cf_options; } @@ -786,7 +788,7 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_table_optio table_options.format_version = 4; // Block cache for reads - table_options.block_cache = ::rocksdb::NewLRUCache (read_cache_size_bytes); + table_options.block_cache = ::rocksdb::NewLRUCache (rocksdb_config.read_cache * 1024 * 1024); // Bloom filter to help with point reads. 10bits gives 1% false positive rate. table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false)); diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp index e8d3c818b2..e106931c3d 100644 --- a/nano/store/rocksdb/rocksdb.hpp +++ b/nano/store/rocksdb/rocksdb.hpp @@ -165,9 +165,6 @@ class component : public nano::store::component std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families (); - constexpr static long memtable_size_bytes = 16 * 1024 * 1024; - constexpr static long read_cache_size_bytes = 8 * 1024 * 1024; - friend class nano::rocksdb_block_store_tombstone_count_Test; friend class rocksdb_block_store_upgrade_v21_v22_Test; }; From 012c053d7c097b2650c63f07ea40503d00eef266 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 20:49:53 +0200 Subject: [PATCH 8/9] Added test for config parameters --- nano/core_test/toml.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nano/core_test/toml.cpp b/nano/core_test/toml.cpp index 44636cdf8c..80fd6c24a9 100644 --- a/nano/core_test/toml.cpp +++ b/nano/core_test/toml.cpp @@ -240,6 +240,8 @@ TEST (toml, daemon_config_deserialize_defaults) ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable); ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); + ASSERT_EQ (conf.node.rocksdb_config.read_cache, defaults.node.rocksdb_config.read_cache); + ASSERT_EQ (conf.node.rocksdb_config.write_cache, defaults.node.rocksdb_config.write_cache); ASSERT_EQ (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); ASSERT_EQ (conf.node.optimistic_scheduler.gap_threshold, defaults.node.optimistic_scheduler.gap_threshold); @@ -573,6 +575,8 @@ TEST (toml, daemon_config_deserialize_no_defaults) [node.rocksdb] enable = true io_threads = 99 + read_cache = 99 + write_cache = 99 [node.experimental] secondary_work_peers = ["dev.org:998"] @@ -742,6 +746,8 @@ TEST (toml, daemon_config_deserialize_no_defaults) ASSERT_TRUE (conf.node.rocksdb_config.enable); ASSERT_EQ (nano::rocksdb_config::using_rocksdb_in_tests (), defaults.node.rocksdb_config.enable); ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); + ASSERT_NE (conf.node.rocksdb_config.read_cache, defaults.node.rocksdb_config.read_cache); + ASSERT_NE (conf.node.rocksdb_config.write_cache, defaults.node.rocksdb_config.write_cache); ASSERT_NE (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled); ASSERT_NE (conf.node.optimistic_scheduler.gap_threshold, defaults.node.optimistic_scheduler.gap_threshold); From 3f089a2ef633a2f9160a6243bfa03d495fcca640 Mon Sep 17 00:00:00 2001 From: RickiNano <81099017+RickiNano@users.noreply.github.com> Date: Fri, 30 Aug 2024 21:29:50 +0200 Subject: [PATCH 9/9] Use format_version 5 --- nano/store/rocksdb/rocksdb.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp index 19c6cb90db..439cb92db7 100644 --- a/nano/store/rocksdb/rocksdb.cpp +++ b/nano/store/rocksdb/rocksdb.cpp @@ -782,10 +782,10 @@ rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_table_optio // Improve point lookup performance be using the data block hash index (uses about 5% more space). table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; - // Using format_version=4 significantly reduces the index block size, in some cases around 4-5x. - // This frees more space in block cache, which would result in higher hit rate for data and filter blocks, - // or offer the same performance with a smaller block cache size. - table_options.format_version = 4; + // Using storage format_version 5. + // Version 5 offers improved read spead, caching and better compression (if enabled) + // Any existing ledger data in version 4 will not be migrated. New data will be written in version 5. + table_options.format_version = 5; // Block cache for reads table_options.block_cache = ::rocksdb::NewLRUCache (rocksdb_config.read_cache * 1024 * 1024);