nanocurrency · RickiNano · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024
diff --git a/nano/core_test/toml.cpp b/nano/core_test/toml.cpp
@@ -239,7 +239,7 @@ TEST (toml, daemon_config_deserialize_defaults)
 	ASSERT_EQ (conf.node.lmdb_config.map_size, defaults.node.lmdb_config.map_size);
 
 	ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable);
-	ASSERT_EQ (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier);
+	ASSERT_EQ (conf.node.rocksdb_config.cache_size, defaults.node.rocksdb_config.cache_size);
 	ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads);
 
 	ASSERT_EQ (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled);
@@ -573,7 +573,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
 
 	[node.rocksdb]
 	enable = true
-	memory_multiplier = 3
+	cache_size = 3
 	io_threads = 99
 
 	[node.experimental]
@@ -743,7 +743,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
 
 	ASSERT_TRUE (conf.node.rocksdb_config.enable);
 	ASSERT_EQ (nano::rocksdb_config::using_rocksdb_in_tests (), defaults.node.rocksdb_config.enable);
-	ASSERT_NE (conf.node.rocksdb_config.memory_multiplier, defaults.node.rocksdb_config.memory_multiplier);
+	ASSERT_NE (conf.node.rocksdb_config.cache_size, defaults.node.rocksdb_config.cache_size);
 	ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads);
 
 	ASSERT_NE (conf.node.optimistic_scheduler.enabled, defaults.node.optimistic_scheduler.enabled);

diff --git a/nano/lib/rocksdbconfig.cpp b/nano/lib/rocksdbconfig.cpp
@@ -5,25 +5,25 @@
 nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const
 {
 	toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database.\ntype:bool");
-	toml.put ("memory_multiplier", memory_multiplier, "This will modify how much memory is used represented by 1 (low), 2 (medium), 3 (high). Default is 2.\ntype:uint8");
+	toml.put ("cache_size", cache_size, "Amount of memory in MB used for caching for each table. Valid values are from 1 to 1024. Default is 64.\ntype:uint8");
 	toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing.\ntype:uint32");
 	return toml.get_error ();
 }
 
 nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml)
 {
 	toml.get_optional<bool> ("enable", enable);
-	toml.get_optional<uint8_t> ("memory_multiplier", memory_multiplier);
+	toml.get_optional<uint16_t> ("cache_size", cache_size);
 	toml.get_optional<unsigned> ("io_threads", io_threads);
 
 	// Validate ranges
 	if (io_threads == 0)
 	{
 		toml.get_error ().set ("io_threads must be non-zero");
 	}
-	if (memory_multiplier < 1 || memory_multiplier > 3)
+	if (cache_size < 1 || cache_size > 1024)
 	{
-		toml.get_error ().set ("memory_multiplier must be either 1, 2 or 3");
+		toml.get_error ().set ("cache_size must be between 1 and 1024 MB");
 	}
 
 	return toml.get_error ();

diff --git a/nano/lib/rocksdbconfig.hpp b/nano/lib/rocksdbconfig.hpp
@@ -25,7 +25,7 @@ class rocksdb_config final
 	static bool using_rocksdb_in_tests ();
 
 	bool enable{ false };
-	uint8_t memory_multiplier{ 2 };
+	uint16_t cache_size{ 64 };
 	unsigned io_threads{ std::max (nano::hardware_concurrency () / 2, 1u) };
 };
 }
diff --git a/nano/store/rocksdb/rocksdb.cpp b/nano/store/rocksdb/rocksdb.cpp
@@ -64,7 +64,7 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy
 	logger{ logger_a },
 	constants{ constants },
 	rocksdb_config{ rocksdb_config_a },
-	max_block_write_batch_num_m{ nano::narrow_cast<unsigned> (blocks_memtable_size_bytes () / (2 * (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)))) },
+	max_block_write_batch_num_m{ calculate_max_block_write_batch_num () },
 	cf_name_table_map{ create_cf_name_table_map () }
 {
 	boost::system::error_code error_mkdir, error_chmod;
@@ -80,7 +80,6 @@ nano::store::rocksdb::component::component (nano::logger & logger_a, std::filesy
 	debug_assert (path_a.filename () == "rocksdb");
 
 	generate_tombstone_map ();
-	small_table_factory.reset (::rocksdb::NewBlockBasedTableFactory (get_small_table_options ()));
 
 	// TODO: get_db_options () registers a listener for resetting tombstones, needs to check if it is a problem calling it more than once.
 	auto options = get_db_options ();
@@ -400,118 +399,13 @@ void nano::store::rocksdb::component::generate_tombstone_map ()
 	tombstone_map.emplace (std::piecewise_construct, std::forward_as_tuple (nano::tables::pending), std::forward_as_tuple (0, 25000));
 }
 
-rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const
-{
-	::rocksdb::ColumnFamilyOptions cf_options;
-	cf_options.table_factory = table_factory_a;
-
-	// (1 active, 1 inactive)
-	auto num_memtables = 2;
-
-	// Each level is a multiple of the above. If L1 is 512MB. L2 will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on...
-	cf_options.max_bytes_for_level_multiplier = 8;
-
-	// Although this should be the default provided by RocksDB, not setting this is causing sequence conflict checks if not using
-	cf_options.max_write_buffer_size_to_maintain = memtable_size_bytes_a * num_memtables;
-
-	// Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however.
-	cf_options.ttl = 1 * 24 * 60 * 60;
-
-	// Multiplier for each level
-	cf_options.target_file_size_multiplier = 10;
-
-	// Size of level 1 sst files
-	cf_options.target_file_size_base = memtable_size_bytes_a;
-
-	// Size of each memtable
-	cf_options.write_buffer_size = memtable_size_bytes_a;
-
-	// Number of memtables to keep in memory
-	cf_options.max_write_buffer_number = num_memtables;
-
-	return cf_options;
-}
-
 rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_cf_options (std::string const & cf_name_a) const
 {
 	::rocksdb::ColumnFamilyOptions cf_options;
-	auto const memtable_size_bytes = base_memtable_size_bytes ();
-	auto const block_cache_size_bytes = 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_block_cache_size;
-	if (cf_name_a == "blocks")
-	{
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 4)));
-		cf_options = get_active_cf_options (table_factory, blocks_memtable_size_bytes ());
-	}
-	else if (cf_name_a == "confirmation_height")
-	{
-		// Entries will not be deleted in the normal case, so can make memtables a lot bigger
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes * 2);
-	}
-	else if (cf_name_a == "meta" || cf_name_a == "online_weight" || cf_name_a == "peers")
-	{
-		// Meta - It contains just version key
-		// Online weight - Periodically deleted
-		// Peers - Cleaned periodically, a lot of deletions. This is never read outside of initializing? Keep this small
-		cf_options = get_small_cf_options (small_table_factory);
-	}
-	else if (cf_name_a == "cached_counts")
-	{
-		// Really small (keys are blocks tables, value is uint64_t)
-		cf_options = get_small_cf_options (small_table_factory);
-	}
-	else if (cf_name_a == "pending")
+	if (cf_name_a != ::rocksdb::kDefaultColumnFamilyName)
 	{
-		// Pending can have a lot of deletions too
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-
-		// Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded
-		cf_options.level0_file_num_compaction_trigger = 2;
-
-		// L1 size, compaction is triggered for L0 at this size (2 SST files in L1)
-		cf_options.max_bytes_for_level_base = memtable_size_bytes * 2;
-	}
-	else if (cf_name_a == "frontiers")
-	{
-		// Frontiers is only needed during bootstrap for legacy blocks
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == "accounts")
-	{
-		// Can have deletions from rollbacks
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == "vote")
-	{
-		// No deletes it seems, only overwrites.
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == "pruned")
-	{
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == "final_votes")
-	{
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == "rep_weights")
-	{
-		std::shared_ptr<::rocksdb::TableFactory> table_factory (::rocksdb::NewBlockBasedTableFactory (get_active_table_options (block_cache_size_bytes * 2)));
-		cf_options = get_active_cf_options (table_factory, memtable_size_bytes);
-	}
-	else if (cf_name_a == ::rocksdb::kDefaultColumnFamilyName)
-	{
-		// Do nothing.
-	}
-	else
-	{
-		debug_assert (false);
+		cf_options.table_factory = std::shared_ptr<::rocksdb::TableFactory> (::rocksdb::NewBlockBasedTableFactory (get_active_table_options ()));
+		cf_options.write_buffer_size = memtable_size_bytes;
 	}
 
 	return cf_options;
@@ -836,6 +730,8 @@ int nano::store::rocksdb::component::clear (::rocksdb::ColumnFamilyHandle * colu
 	::rocksdb::ReadOptions read_options;
 	::rocksdb::WriteOptions write_options;
 	::rocksdb::WriteBatch write_batch;
+	read_options.readahead_size = 0; // Readahead only adds overhead on SSD drives
+
 	std::unique_ptr<::rocksdb::Iterator> it (db->NewIterator (read_options, column_family));
 
 	for (it->SeekToFirst (); it->Valid (); it->Next ())
@@ -862,32 +758,10 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options ()
 	::rocksdb::Options db_options;
 	db_options.create_if_missing = true;
 	db_options.create_missing_column_families = true;
-
-	// TODO: review if this should be changed due to the unchecked table removal.
-	// Enable whole key bloom filter in memtables for ones with memtable_prefix_bloom_size_ratio set (unchecked table currently).
-	// It can potentially reduce CPU usage for point-look-ups.
-	db_options.memtable_whole_key_filtering = true;
-
-	// Sets the compaction priority
-	db_options.compaction_pri = ::rocksdb::CompactionPri::kMinOverlappingRatio;
-
-	// Start aggressively flushing WAL files when they reach over 1GB
-	db_options.max_total_wal_size = 1 * 1024 * 1024 * 1024LL;
-
-	// Optimize RocksDB. This is the easiest way to get RocksDB to perform well
+	// Set number of threads to use
 	db_options.IncreaseParallelism (rocksdb_config.io_threads);
+	// Optimize RocksDB. This is the easiest way to get RocksDB to perform well
 	db_options.OptimizeLevelStyleCompaction ();
-
-	// Adds a separate write queue for memtable/WAL
-	db_options.enable_pipelined_write = true;
-
-	// Default is 16, setting to -1 allows faster startup times for SSDs by allowings more files to be read in parallel.
-	db_options.max_file_opening_threads = -1;
-
-	// The MANIFEST file contains a history of all file operations since the last time the DB was opened and is replayed during DB open.
-	// Default is 1GB, lowering this to avoid replaying for too long (100MB)
-	db_options.max_manifest_file_size = 100 * 1024 * 1024ULL;
-
 	// Not compressing any SST files for compatibility reasons.
 	db_options.compression = ::rocksdb::kNoCompression;
 
@@ -899,75 +773,24 @@ rocksdb::Options nano::store::rocksdb::component::get_db_options ()
 	return db_options;
 }
 
-rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_table_options (std::size_t lru_size) const
+rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_active_table_options () const
 {
 	::rocksdb::BlockBasedTableOptions table_options;
 
 	// Improve point lookup performance be using the data block hash index (uses about 5% more space).
 	table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash;
-	table_options.data_block_hash_table_util_ratio = 0.75;
 
 	// Using format_version=4 significantly reduces the index block size, in some cases around 4-5x.
 	// This frees more space in block cache, which would result in higher hit rate for data and filter blocks,
 	// or offer the same performance with a smaller block cache size.
 	table_options.format_version = 4;
-	table_options.index_block_restart_interval = 16;
 
 	// Block cache for reads
-	table_options.block_cache = ::rocksdb::NewLRUCache (lru_size);
-
-	// Bloom filter to help with point reads. 10bits gives 1% false positive rate.
-	table_options.filter_policy.reset (::rocksdb::NewBloomFilterPolicy (10, false));
-
-	// Increasing block_size decreases memory usage and space amplification, but increases read amplification.
-	table_options.block_size = 16 * 1024ULL;
-
-	// Whether level 0 index and filter blocks are stored in block_cache
-	table_options.pin_l0_filter_and_index_blocks_in_cache = true;
+	table_options.block_cache = ::rocksdb::NewLRUCache (1024ULL * 1024 * rocksdb_config.cache_size);
 
 	return table_options;
 }
 
-rocksdb::BlockBasedTableOptions nano::store::rocksdb::component::get_small_table_options () const
-{
-	::rocksdb::BlockBasedTableOptions table_options;
-	// Improve point lookup performance be using the data block hash index (uses about 5% more space).
-	table_options.data_block_index_type = ::rocksdb::BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash;
-	table_options.data_block_hash_table_util_ratio = 0.75;
-	table_options.block_size = 1024ULL;
-	return table_options;
-}
-
-rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const
-{
-	auto const memtable_size_bytes = 10000;
-	auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes);
-
-	// Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded
-	cf_options.level0_file_num_compaction_trigger = 1;
-
-	// L1 size, compaction is triggered for L0 at this size (1 SST file in L1)
-	cf_options.max_bytes_for_level_base = memtable_size_bytes;
-
-	return cf_options;
-}
-
-::rocksdb::ColumnFamilyOptions nano::store::rocksdb::component::get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const
-{
-	auto cf_options = get_common_cf_options (table_factory_a, memtable_size_bytes_a);
-
-	// Number of files in level 0 which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded
-	cf_options.level0_file_num_compaction_trigger = 4;
-
-	// L1 size, compaction is triggered for L0 at this size (4 SST files in L1)
-	cf_options.max_bytes_for_level_base = memtable_size_bytes_a * 4;
-
-	// Size target of levels are changed dynamically based on size of the last level
-	cf_options.level_compaction_dynamic_level_bytes = true;
-
-	return cf_options;
-}
-
 void nano::store::rocksdb::component::on_flush (::rocksdb::FlushJobInfo const & flush_job_info_a)
 {
 	// Reset appropriate tombstone counters
@@ -1109,22 +932,19 @@ void nano::store::rocksdb::component::serialize_memory_stats (boost::property_tr
 	json.put ("block-cache-usage", val);
 }
 
-unsigned long long nano::store::rocksdb::component::blocks_memtable_size_bytes () const
-{
-	return base_memtable_size_bytes ();
-}
-
-unsigned long long nano::store::rocksdb::component::base_memtable_size_bytes () const
-{
-	return 1024ULL * 1024 * rocksdb_config.memory_multiplier * base_memtable_size;
-}
-
 // This is a ratio of the blocks memtable size to keep total write transaction commit size down.
 unsigned nano::store::rocksdb::component::max_block_write_batch_num () const
 {
 	return max_block_write_batch_num_m;
 }
 
+unsigned nano::store::rocksdb::component::calculate_max_block_write_batch_num () const
+{
+	// Calculates the max write batch size from the memtable_size (write buffer) and the size of a block.
+	// With a memtable_size of 32 MB we will get 125672 as max block write batch
+	return nano::narrow_cast<unsigned> (memtable_size_bytes / (sizeof (nano::block_type) + nano::state_block::size + nano::block_sideband::size (nano::block_type::state)));
+}
+
 std::string nano::store::rocksdb::component::error_string (int status) const
 {
 	return std::to_string (status);

diff --git a/nano/store/rocksdb/rocksdb.hpp b/nano/store/rocksdb/rocksdb.hpp
@@ -108,10 +108,10 @@ class component : public nano::store::component
 	::rocksdb::TransactionDB * transaction_db = nullptr;
 	std::unique_ptr<::rocksdb::DB> db;
 	std::vector<std::unique_ptr<::rocksdb::ColumnFamilyHandle>> handles;
-	std::shared_ptr<::rocksdb::TableFactory> small_table_factory;
 	std::unordered_map<nano::tables, nano::mutex> write_lock_mutexes;
 	nano::rocksdb_config rocksdb_config;
 	unsigned const max_block_write_batch_num_m;
+	unsigned calculate_max_block_write_batch_num () const;
 
 	class tombstone_info
 	{
@@ -155,25 +155,20 @@ class component : public nano::store::component
 
 	void construct_column_family_mutexes ();
 	::rocksdb::Options get_db_options ();
-	::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const;
-	::rocksdb::ColumnFamilyOptions get_active_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a, unsigned long long memtable_size_bytes_a) const;
-	::rocksdb::ColumnFamilyOptions get_small_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const;
-	::rocksdb::BlockBasedTableOptions get_active_table_options (std::size_t lru_size) const;
-	::rocksdb::BlockBasedTableOptions get_small_table_options () const;
+	::rocksdb::ColumnFamilyOptions get_common_cf_options (std::shared_ptr<::rocksdb::TableFactory> const & table_factory_a) const;
+	::rocksdb::BlockBasedTableOptions get_active_table_options () const;
 	::rocksdb::ColumnFamilyOptions get_cf_options (std::string const & cf_name_a) const;
 
 	void on_flush (::rocksdb::FlushJobInfo const &);
 	void flush_table (nano::tables table_a);
 	void flush_tombstones_check (nano::tables table_a);
 	void generate_tombstone_map ();
+
 	std::unordered_map<char const *, nano::tables> create_cf_name_table_map () const;
 
 	std::vector<::rocksdb::ColumnFamilyDescriptor> create_column_families ();
-	unsigned long long base_memtable_size_bytes () const;
-	unsigned long long blocks_memtable_size_bytes () const;
 
-	constexpr static int base_memtable_size = 16;
-	constexpr static int base_block_cache_size = 8;
+	constexpr static int memtable_size_bytes = 1024ULL * 1024 * 64; // 64 MB write buffer
 
 	friend class nano::rocksdb_block_store_tombstone_count_Test;
 	friend class rocksdb_block_store_upgrade_v21_v22_Test;