From 1d7814dfd9c233b137d7c706211e6e443001c155 Mon Sep 17 00:00:00 2001 From: Shinichi Umegane Date: Mon, 17 Feb 2025 14:47:43 +0900 Subject: [PATCH] Add tests for blob_file_gc_snapshot and fix bugs --- src/limestone/blob_file_gc_snapshot.cpp | 61 ++-- src/limestone/blob_file_gc_snapshot.h | 6 +- .../blob/blob_file_gc_snapshot_test.cpp | 310 ++++++++++++++++++ 3 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 test/limestone/blob/blob_file_gc_snapshot_test.cpp diff --git a/src/limestone/blob_file_gc_snapshot.cpp b/src/limestone/blob_file_gc_snapshot.cpp index 1b78ab3..3d06fbd 100644 --- a/src/limestone/blob_file_gc_snapshot.cpp +++ b/src/limestone/blob_file_gc_snapshot.cpp @@ -27,11 +27,16 @@ thread_local std::shared_ptr blob_file_gc_snapshot::tls_container_ = nullptr; blob_file_gc_snapshot::blob_file_gc_snapshot(const write_version_type& threshold) - : threshold_(threshold) - { + : threshold_(threshold) { // The thread_containers_ vector and snapshot_ are default-constructed. } + blob_file_gc_snapshot::~blob_file_gc_snapshot() { + // Reset the thread-local container to avoid state leakage between tests or re-use in the same thread. + tls_container_.reset(); +} + + void blob_file_gc_snapshot::sanitize_and_add_entry(const log_entry& entry) { // Only process entries of type normal_with_blob. if (entry.type() != log_entry::entry_type::normal_with_blob) { @@ -67,34 +72,33 @@ thread_local std::shared_ptr blob_file_gc_snapshot::tls_con tls_container_->append(modified_entry); } - void blob_file_gc_snapshot::finalize_local_entries() -{ - tls_container_->sort_descending(); -} - -const log_entry_container& blob_file_gc_snapshot::finalize_snapshot() -{ - log_entry_container merged = log_entry_container::merge_sorted_collections(thread_containers_); - - // Remove duplicate entries from the merged container. - // Since the container is sorted in descending order, the first entry for a given key_sid - // is the one with the maximum write_version. - snapshot_.clear(); - std::string last_key; - for (const auto& entry : merged) { - const std::string& current_key = entry.key_sid(); - if (last_key.empty() || current_key != last_key) { - snapshot_.append(entry); - last_key = current_key; - } + void blob_file_gc_snapshot::finalize_local_entries() { + if (tls_container_) { + tls_container_->sort_descending(); + tls_container_.reset(); } + } - return snapshot_; -} + const log_entry_container& blob_file_gc_snapshot::finalize_snapshot() { + log_entry_container merged = log_entry_container::merge_sorted_collections(thread_containers_); - - void blob_file_gc_snapshot::reset() - { + // Remove duplicate entries from the merged container. + // Since the container is sorted in descending order, the first entry for a given key_sid + // is the one with the maximum write_version. + snapshot_.clear(); + std::string last_key; + for (const auto& entry : merged) { + const std::string& current_key = entry.key_sid(); + if (last_key.empty() || current_key != last_key) { + snapshot_.append(entry); + last_key = current_key; + } + } + + return snapshot_; + } + + void blob_file_gc_snapshot::reset() { { // Clean up any remaining thread-local containers. std::lock_guard lock(global_mtx_); @@ -104,7 +108,6 @@ const log_entry_container& blob_file_gc_snapshot::finalize_snapshot() // Note: The thread_local tls_container remains set in each thread. // Its lifetime is managed per thread; if needed, threads can reset it. } - - + } // namespace limestone::internal \ No newline at end of file diff --git a/src/limestone/blob_file_gc_snapshot.h b/src/limestone/blob_file_gc_snapshot.h index d41e93a..5f0685a 100644 --- a/src/limestone/blob_file_gc_snapshot.h +++ b/src/limestone/blob_file_gc_snapshot.h @@ -44,8 +44,12 @@ class blob_file_gc_snapshot { blob_file_gc_snapshot& operator=(const blob_file_gc_snapshot&) = delete; blob_file_gc_snapshot(blob_file_gc_snapshot&&) = delete; blob_file_gc_snapshot& operator=(blob_file_gc_snapshot&&) = delete; - ~blob_file_gc_snapshot() = default; + /** + * @brief Destructor that resets the thread-local container. + */ + ~blob_file_gc_snapshot(); + /* * Sanitizes and adds a log entry to the snapshot. * diff --git a/test/limestone/blob/blob_file_gc_snapshot_test.cpp b/test/limestone/blob/blob_file_gc_snapshot_test.cpp new file mode 100644 index 0000000..2f2c533 --- /dev/null +++ b/test/limestone/blob/blob_file_gc_snapshot_test.cpp @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blob_file_gc_snapshot.h" +#include "log_entry.h" + +// Use the namespaces defined for the API and internal classes. +using namespace limestone::api; +using namespace limestone::internal; + +namespace limestone { +namespace testing { + +// Test fixture for blob_file_gc_snapshot. +class blob_file_gc_snapshot_test : public ::testing::Test { +protected: + std::string temp_dir; + int file_counter = 0; + + // Set up a temporary directory for creating log_entry instances. + void SetUp() override { + if (system("rm -rf /tmp/limestone_blob_file_gc_snapshot_test") != 0) { + std::cerr << "Failed to remove /tmp/limestone_blob_file_gc_snapshot_test" << std::endl; + } + if (system("mkdir -p /tmp/limestone_blob_file_gc_snapshot_test") != 0) { + std::cerr << "Failed to create /tmp/limestone_blob_file_gc_snapshot_test" << std::endl; + } + temp_dir = "/tmp/limestone_blob_file_gc_snapshot_test"; + file_counter = 0; + } + + void TearDown() override { + boost::filesystem::remove_all(temp_dir); + } + + // Generate a unique temporary file name in temp_dir. + std::string get_temp_file_name() { + file_counter++; + return temp_dir + "/temp_file_" + std::to_string(file_counter); + } + + /** + * @brief Creates a blob log entry using write_with_blob. + * + * This helper creates a log_entry with type normal_with_blob. + * The entry is written to a temporary file and then read back. + * + * @param storage Storage ID. + * @param key The key. + * @param value The value (payload appended after the write_version header). + * @param wv The write_version. + * @param blob_ids (Optional) vector of blob_id; default is empty. + * @return log_entry The created log entry. + */ + log_entry create_blob_log_entry(storage_id_type storage, + const std::string &key, + const std::string &value, + const write_version_type &wv, + const std::vector& blob_ids = {}) { + std::string temp_file = get_temp_file_name(); + FILE* out = std::fopen(temp_file.c_str(), "wb"); + if (!out) { + throw std::runtime_error("Failed to open temporary file for writing."); + } + // Use the new signature of write_with_blob. + log_entry::write_with_blob(out, storage, key, value, wv, blob_ids); + std::fclose(out); + + std::ifstream in(temp_file, std::ios::binary); + if (!in.is_open()) { + throw std::runtime_error("Failed to open temporary file for reading."); + } + log_entry entry; + bool rc = entry.read(in); + in.close(); + boost::filesystem::remove(temp_file); + if (!rc) { + throw std::runtime_error("Failed to read blob log entry."); + } + return entry; + } + + // Helper function to check log_entry fields. + // Checking order: storage_id, key, value, write_version. + void check_log_entry(const log_entry& entry, + storage_id_type expected_storage, + const std::string& expected_key, + const std::string& expected_value, + const write_version_type& expected_wv) { + EXPECT_EQ(entry.storage(), expected_storage); + + std::string key_buf; + entry.key(key_buf); + EXPECT_EQ(key_buf, expected_key); + + std::string value_buf; + entry.value(value_buf); + EXPECT_EQ(value_buf, expected_value); + + write_version_type actual_wv; + entry.write_version(actual_wv); + EXPECT_EQ(actual_wv, expected_wv); + } +}; + +/** + * @brief Test that a valid blob entry is sanitized and added. + * + * The sanitize_and_add_entry() method should truncate the value portion (leaving only + * the write_version header) and add the entry if its write_version is below the threshold. + */ +TEST_F(blob_file_gc_snapshot_test, sanitize_and_add_entry_valid) { + storage_id_type storage = 100; + std::string key = "testKey"; + std::string value = "testValue"; // This payload should be truncated. + write_version_type wv(50, 1); // Entry write_version. + + // Create a blob log entry. + log_entry entry = create_blob_log_entry(storage, key, value, wv); + + // Verify that the entry is of type normal_with_blob and value_etc contains header + payload. + EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob); + constexpr std::size_t headerSize = sizeof(epoch_id_type) + sizeof(std::uint64_t); + EXPECT_EQ(entry.value_etc().size(), headerSize + value.size()); + + // Create a blob_file_gc_snapshot with a threshold higher than the entry's write_version. + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + snapshot.sanitize_and_add_entry(entry); + snapshot.finalize_local_entries(); + const log_entry_container& snap = snapshot.finalize_snapshot(); + + // Expect one entry in the snapshot. + EXPECT_EQ(snap.size(), 1u); + + // The added entry should have its value truncated (i.e. extracted value is empty), + // while storage, key, and write_version remain unchanged. + log_entry sanitized_entry = *snap.begin(); + std::string extracted_value; + sanitized_entry.value(extracted_value); + EXPECT_TRUE(extracted_value.empty()); + check_log_entry(sanitized_entry, storage, key, "", wv); +} + +/** + * @brief Test that a non-blob entry is ignored. + * + * Entries that are not of type normal_with_blob should not be added to the snapshot. + */ +TEST_F(blob_file_gc_snapshot_test, sanitize_and_add_entry_invalid_type) { + // Create a normal entry (using log_entry::write(), not a blob entry). + storage_id_type storage = 200; + std::string key = "normalKey"; + std::string value = "normalValue"; + write_version_type wv(50, 1); + + std::string temp_file = get_temp_file_name(); + FILE* out = std::fopen(temp_file.c_str(), "wb"); + ASSERT_NE(out, nullptr); + log_entry::write(out, storage, key, value, wv); + std::fclose(out); + std::ifstream in(temp_file, std::ios::binary); + ASSERT_TRUE(in.is_open()); + log_entry entry; + bool rc = entry.read(in); + in.close(); + boost::filesystem::remove(temp_file); + ASSERT_TRUE(rc); + + EXPECT_EQ(entry.type(), log_entry::entry_type::normal_entry); + + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + snapshot.sanitize_and_add_entry(entry); + snapshot.finalize_local_entries(); + const log_entry_container& snap = snapshot.finalize_snapshot(); + + // Since the entry is not normal_with_blob, it should not be added. + EXPECT_EQ(snap.size(), 0u); +} + +/** + * @brief Test that reset() clears the snapshot. + */ +TEST_F(blob_file_gc_snapshot_test, reset_snapshot) { + storage_id_type storage = 300; + std::string key = "resetKey"; + std::string value = "resetValue"; + write_version_type wv(50, 1); + + log_entry entry = create_blob_log_entry(storage, key, value, wv); + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + snapshot.sanitize_and_add_entry(entry); + snapshot.finalize_local_entries(); + const log_entry_container& snap = snapshot.finalize_snapshot(); + EXPECT_EQ(snap.size(), 1u); + + // Reset the snapshot. + snapshot.reset(); + + // After reset, finalize_snapshot should return an empty snapshot. + const log_entry_container& snap2 = snapshot.finalize_snapshot(); + EXPECT_EQ(snap2.size(), 0u); +} + +/** + * @brief Test that finalize_snapshot() merges duplicate entries. + * + * When multiple entries with the same key are added, finalize_snapshot() should remove duplicates, + * keeping only the entry with the maximum write_version (since the container is sorted in descending order). + */ +TEST_F(blob_file_gc_snapshot_test, finalize_snapshot_merging_duplicates) { + storage_id_type storage = 400; + std::string key = "dupKey"; + std::string value1 = "val1"; + std::string value2 = "val2"; + write_version_type wv_low(10, 1); // Lower version. + write_version_type wv_high(10, 2); // Higher version. + + // Create two blob entries with the same key. + log_entry entry1 = create_blob_log_entry(storage, key, value1, wv_low); + log_entry entry2 = create_blob_log_entry(storage, key, value2, wv_high); + + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + snapshot.sanitize_and_add_entry(entry1); + snapshot.sanitize_and_add_entry(entry2); + snapshot.finalize_local_entries(); + const log_entry_container& snap = snapshot.finalize_snapshot(); + + // After merging and duplicate removal, only one entry should remain. + EXPECT_EQ(snap.size(), 1u); + + // The remaining entry should have the maximum write_version (wv_high). + log_entry merged_entry = *snap.begin(); + write_version_type merged_wv; + merged_entry.write_version(merged_wv); + EXPECT_EQ(merged_wv.get_major(), wv_high.get_major()); + EXPECT_EQ(merged_wv.get_minor(), wv_high.get_minor()); +} + +TEST_F(blob_file_gc_snapshot_test, tls_container_null_state_behavior) { + // Create a snapshot instance with a given threshold. + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + + // Ensure the internal state is reset so that tls_container_ becomes nullptr. + snapshot.reset(); + + // Calling finalize_local_entries() when tls_container_ is nullptr should not crash. + EXPECT_NO_THROW(snapshot.finalize_local_entries()); + const log_entry_container& snap1 = snapshot.finalize_snapshot(); + EXPECT_EQ(snap1.size(), 0u); + + // Now, create a valid blob entry. + storage_id_type storage = 500; + std::string key = "boundaryKey"; + std::string value = "boundaryValue"; + write_version_type wv(50, 1); + log_entry entry = create_blob_log_entry(storage, key, value, wv); + + // Calling sanitize_and_add_entry() when tls_container_ is nullptr should not crash + // and should create a new container to add the entry. + EXPECT_NO_THROW(snapshot.sanitize_and_add_entry(entry)); + snapshot.finalize_local_entries(); + const log_entry_container& snap2 = snapshot.finalize_snapshot(); + EXPECT_EQ(snap2.size(), 1u); +} + +TEST_F(blob_file_gc_snapshot_test, threshold_boundary_test) { + // Create a snapshot with threshold write_version (100, 1) + blob_file_gc_snapshot snapshot(write_version_type(100, 1)); + + // Case 1: Entry with write_version exactly equal to threshold. + // Expected: The entry should NOT be added (because it's not less than the threshold). + log_entry entry_equal = create_blob_log_entry(600, "boundaryKey", "boundaryValue", write_version_type(100, 1)); + snapshot.sanitize_and_add_entry(entry_equal); + snapshot.finalize_local_entries(); + const log_entry_container& snap_equal = snapshot.finalize_snapshot(); + EXPECT_EQ(snap_equal.size(), 0u); + + // Reset snapshot state. + snapshot.reset(); + + // Case 2: Entry with write_version just below the threshold. + // For example, (100, 0) is less than (100, 1); expected: the entry should be added. + log_entry entry_lower = create_blob_log_entry(600, "boundaryKey", "boundaryValue", write_version_type(100, 0)); + snapshot.sanitize_and_add_entry(entry_lower); + snapshot.finalize_local_entries(); + const log_entry_container& snap_lower = snapshot.finalize_snapshot(); + EXPECT_EQ(snap_lower.size(), 1u); + + // Reset snapshot state. + snapshot.reset(); + + // Case 3: Entry with write_version above the threshold. + // For example, (101, 0) is greater than (100, 1); expected: the entry should NOT be added. + log_entry entry_higher = create_blob_log_entry(600, "boundaryKey", "boundaryValue", write_version_type(101, 0)); + snapshot.sanitize_and_add_entry(entry_higher); + snapshot.finalize_local_entries(); + const log_entry_container& snap_higher = snapshot.finalize_snapshot(); + EXPECT_EQ(snap_higher.size(), 0u); +} + + + +} // namespace testing +} // namespace limestone