diff --git a/src/limestone/blob_file_gc_snapshot.cpp b/src/limestone/blob_file_gc_snapshot.cpp index 49af5b3..1b78ab3 100644 --- a/src/limestone/blob_file_gc_snapshot.cpp +++ b/src/limestone/blob_file_gc_snapshot.cpp @@ -19,165 +19,85 @@ #include "log_entry_comparator.h" #include - namespace limestone { - namespace internal { - - - // Thread-local pointer to each thread's log_entry_container. - thread_local log_entry_container* tls_container = nullptr; - - -// ----------------- Implementation of blob_id_iterator ----------------- - -blob_id_iterator::blob_id_iterator(const log_entry_container* snapshot) - : snapshot_(snapshot), entry_index_(0), blob_index_(0) -{ - // Ensure cache is initially empty. - cached_blob_ids_.clear(); - advance_to_valid(); -} - -bool blob_id_iterator::has_next() const -{ - return (snapshot_ && entry_index_ < snapshot_->size()); -} - -limestone::api::blob_id_type blob_id_iterator::current() -{ - // Assume caller has checked has_next() - // Use the cached blob IDs. - limestone::api::blob_id_type result = cached_blob_ids_[blob_index_]; - ++blob_index_; - advance_to_valid(); - return result; -} - -void blob_id_iterator::advance_to_valid() -{ - // Loop until a valid blob ID is found or we run out of entries. - while (snapshot_ && entry_index_ < snapshot_->size()) { - // If the cache is empty, load the blob IDs from the current entry. - if (cached_blob_ids_.empty()) { - const auto& entry = *std::next(snapshot_->begin(), entry_index_); - cached_blob_ids_ = entry.get_blob_ids(); - } - if (blob_index_ < cached_blob_ids_.size()) { - // Valid blob found. - return; - } - // Otherwise, move to the next entry. - ++entry_index_; - blob_index_ = 0; - cached_blob_ids_.clear(); // Clear cache so it will be reloaded for the new entry. - } - // If we exit the loop, no more valid blob IDs exist. -} - + namespace limestone::internal { + // ----------------- Implementation of blob_file_gc_snapshot methods ----------------- - +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +thread_local std::shared_ptr blob_file_gc_snapshot::tls_container_ = nullptr; blob_file_gc_snapshot::blob_file_gc_snapshot(const write_version_type& threshold) : threshold_(threshold) { // The thread_containers_ vector and snapshot_ are default-constructed. } - - void blob_file_gc_snapshot::sanitize_and_add_entry(const log_entry& entry) - { + + void blob_file_gc_snapshot::sanitize_and_add_entry(const log_entry& entry) { // Only process entries of type normal_with_blob. if (entry.type() != log_entry::entry_type::normal_with_blob) { return; } - + // Create a modifiable copy of the entry. + // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) log_entry modified_entry = entry; - - // Clear the payload part of value_etc while keeping the header (write_version) intact. - // The header size is determined by the logical sizes of epoch_id_type and std::uint64_t. - constexpr std::size_t header_size = sizeof(limestone::api::epoch_id_type) + sizeof(std::uint64_t); - std::string& mutable_value = const_cast(modified_entry.value_etc()); - if (mutable_value.size() > header_size) { - mutable_value.resize(header_size); - } - + + // Truncate unnecessary data from the value_etc field. + modified_entry.truncate_value_from_normal_entry(); + // Obtain the write_version from the modified entry. write_version_type entry_wv; modified_entry.write_version(entry_wv); - + // Compare the obtained write_version with the threshold. if (!(entry_wv < threshold_)) { return; } - + // Obtain or create the thread-local log_entry_container. - if (!tls_container) { - tls_container = new log_entry_container(); + if (!tls_container_) { + tls_container_ = std::make_shared(); { std::lock_guard lock(global_mtx_); - thread_containers_.push_back(tls_container); + thread_containers_.push_back(tls_container_); } } - + // Append the modified entry into the thread-local container. - tls_container->append(modified_entry); + tls_container_->append(modified_entry); } - + void blob_file_gc_snapshot::finalize_local_entries() { - tls_container->sort_descending(); + tls_container_->sort_descending(); } -void blob_file_gc_snapshot::finalize_snapshot() +const log_entry_container& blob_file_gc_snapshot::finalize_snapshot() { - // Prepare a vector to collect containers from all threads. - std::vector containers_to_merge; - { - // Lock the global mutex to safely access thread_containers_. - std::lock_guard lock(global_mtx_); - for (auto* container_ptr : thread_containers_) { - // Although each thread should have already finalized its local container, - // we call sort_descending() here as a defensive measure. - container_ptr->sort_descending(); - // Move the container's content into the merging vector. - containers_to_merge.push_back(std::move(*container_ptr)); - // Free the allocated memory. - delete container_ptr; - } - // Clear the global container list. - thread_containers_.clear(); - } + log_entry_container merged = log_entry_container::merge_sorted_collections(thread_containers_); - // Merge all thread-local containers into a single container. - log_entry_container merged = log_entry_container::merge_sorted_collections(containers_to_merge); - - // Remove duplicates directly into snapshot_. + // Remove duplicate entries from the merged container. + // Since the container is sorted in descending order, the first entry for a given key_sid + // is the one with the maximum write_version. snapshot_.clear(); - // last_key is initialized to an empty string. - // It is guaranteed that key_sid is never an empty string, so the first entry will always be appended. std::string last_key; - for (auto it = merged.begin(); it != merged.end(); ++it) { - const std::string& current_key = it->key_sid(); + for (const auto& entry : merged) { + const std::string& current_key = entry.key_sid(); if (last_key.empty() || current_key != last_key) { - snapshot_.append(*it); + snapshot_.append(entry); last_key = current_key; } } -} - -blob_id_iterator blob_file_gc_snapshot::blob_ids_iterator() const { - return blob_id_iterator(&snapshot_); + return snapshot_; } + void blob_file_gc_snapshot::reset() { { // Clean up any remaining thread-local containers. std::lock_guard lock(global_mtx_); - for (auto* container_ptr : thread_containers_) { - delete container_ptr; - } thread_containers_.clear(); } snapshot_.clear(); @@ -185,10 +105,6 @@ blob_id_iterator blob_file_gc_snapshot::blob_ids_iterator() const { // Its lifetime is managed per thread; if needed, threads can reset it. } - // These helper functions are no longer used because merging and duplicate removal are handled in finalize_snapshot. - void blob_file_gc_snapshot::merge_entries() { } - void blob_file_gc_snapshot::remove_duplicate_entries() { } - } // namespace internal - } // namespace limestone - \ No newline at end of file + } // namespace limestone::internal + \ No newline at end of file diff --git a/src/limestone/blob_file_gc_snapshot.h b/src/limestone/blob_file_gc_snapshot.h index 15f190a..d41e93a 100644 --- a/src/limestone/blob_file_gc_snapshot.h +++ b/src/limestone/blob_file_gc_snapshot.h @@ -20,49 +20,7 @@ #include "log_entry_container.h" #include "limestone/api/blob_id_type.h" -namespace limestone { -namespace internal { - -/* - * blob_id_iterator - * - * A simple, Java-style iterator that allows sequential access to all blob IDs - * contained in a snapshot (log_entry_container) without creating a separate list. - * - * The iterator provides: - * - has_next(): to check if there is a next blob ID. - * - current(): to retrieve the current blob ID and advance the iterator. - * - * Internally, the iterator caches the blob IDs from the current log entry to - * avoid repeated calls to get_blob_ids(). - */ -class blob_id_iterator { -public: - // Constructs an iterator for the given snapshot. - // The iterator starts at the first blob ID of the first log entry. - explicit blob_id_iterator(const log_entry_container* snapshot); - - // Returns true if there is a next blob ID in the snapshot. - bool has_next() const; - - // Returns the current blob ID and advances the iterator. - // Caller should check has_next() before calling current(). - limestone::api::blob_id_type current(); - -private: - // Pointer to the snapshot containing log entries. - const log_entry_container* snapshot_; - // Index of the current log entry within the snapshot. - std::size_t entry_index_; - // Index of the current blob ID within the blob ID list of the current log entry. - std::size_t blob_index_; - // Cached blob IDs from the current log entry to reduce repeated retrieval. - std::vector cached_blob_ids_; - - // Advances internal indices (and updates the cache) to point to the next valid blob ID. - // If the current log entry has no further blob IDs, it advances to the next log entry. - void advance_to_valid(); -}; +namespace limestone::internal { /* * blob_file_gc_snapshot @@ -84,8 +42,9 @@ class blob_file_gc_snapshot { // Disable copy and move semantics. blob_file_gc_snapshot(const blob_file_gc_snapshot&) = delete; blob_file_gc_snapshot& operator=(const blob_file_gc_snapshot&) = delete; - blob_file_gc_snapshot(blob_file_gc_snapshot&&) = default; - blob_file_gc_snapshot& operator=(blob_file_gc_snapshot&&) = default; + blob_file_gc_snapshot(blob_file_gc_snapshot&&) = delete; + blob_file_gc_snapshot& operator=(blob_file_gc_snapshot&&) = delete; + ~blob_file_gc_snapshot() = default; /* * Sanitizes and adds a log entry to the snapshot. @@ -104,22 +63,14 @@ class blob_file_gc_snapshot { */ void finalize_local_entries(); - - /* - * Finalizes the snapshot after all threads have finished adding entries. + /** + * @brief Finalizes the snapshot after all entries have been added and returns the snapshot. * - * This method merges all entries from the internal buffer, sorts them in descending order, - * and removes duplicate entries with the same key_sid by retaining only the entry with the maximum write_version. - */ - void finalize_snapshot(); - - /* - * Returns a blob_id_iterator that provides sequential access to all blob IDs in the snapshot. + * Merges thread-local containers, sorts them in descending order, and removes duplicate entries. * - * This iterator allows clients to iterate over each blob ID without creating an additional list, - * thereby reducing memory overhead. + * @return const log_entry_container& The finalized snapshot of log entries. */ - blob_id_iterator blob_ids_iterator() const; + const log_entry_container& finalize_snapshot(); /* * Resets the internal state for a new garbage collection cycle. @@ -127,31 +78,24 @@ class blob_file_gc_snapshot { void reset(); private: - /* - * Merges entries from multiple sources (if applicable). - */ - void merge_entries(); - - /* - * Removes duplicate entries by retaining only the entry with the highest write_version for each key_sid. - */ - void remove_duplicate_entries(); + // Thread-local pointer to each thread's log_entry_container. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) + static thread_local std::shared_ptr tls_container_; // The threshold for write_version used in garbage collection. write_version_type threshold_; - // Internal buffer to accumulate log entries from multiple threads. - std::vector entries_; - // Final snapshot after merging, sorting, and duplicate removal. log_entry_container snapshot_; // Mutex to ensure thread-safe access to internal data. mutable std::mutex mtx_; - std::vector thread_containers_; + // List of thread-local containers to be merged into the final snapshot. + std::vector> thread_containers_; + + // Global mutex to safely access thread_containers_. std::mutex global_mtx_; }; -} // namespace internal -} // namespace limestone +} // namespace limestone::internal diff --git a/src/limestone/log_entry.h b/src/limestone/log_entry.h index d1f0fb4..7af3087 100644 --- a/src/limestone/log_entry.h +++ b/src/limestone/log_entry.h @@ -482,6 +482,26 @@ class log_entry { return parse_blob_ids(blob_ids_); } + /** + * @brief Truncates the value portion of value_etc_, keeping only the write_version header, + * but only for entries of type normal_entry and normal_with_blob. + * + * For these entry types, value_etc_ contains the write_version header followed by the value. + * This method removes any data beyond the header, leaving only the write_version information. + * For other entry types, the method does nothing. + */ + void truncate_value_from_normal_entry() { + // Process only normal_entry and normal_with_blob entry types. + if (entry_type_ != entry_type::normal_entry && entry_type_ != entry_type::normal_with_blob) { + return; + } + + constexpr std::size_t header_size = sizeof(epoch_id_type) + sizeof(std::uint64_t); + if (value_etc_.size() > header_size) { + value_etc_.resize(header_size); + } + } + private: entry_type entry_type_{}; epoch_id_type epoch_id_{}; diff --git a/src/limestone/log_entry_container.cpp b/src/limestone/log_entry_container.cpp index c2a3b4e..e6e3c0f 100644 --- a/src/limestone/log_entry_container.cpp +++ b/src/limestone/log_entry_container.cpp @@ -83,66 +83,67 @@ // Static merge function using a multi-way merge algorithm with Boost's binomial_heap. - log_entry_container log_entry_container::merge_sorted_collections( - std::vector& container_list) { - - log_entry_container merged; - - // First, ensure that each collection is sorted. - for (auto& container : container_list) { - container.sort_descending(); // This will ensure that each collection is sorted. - } - - // Reserve capacity for the merged entries. - std::size_t total_size = 0; - for (const auto& container : container_list) { - total_size += container.size(); - } - // If total_size is 0, then all collections are empty; return an empty merged collection. - if (total_size == 0) { - return merged; - } - - merged.entries_.reserve(total_size); - - // Define a Boost binomial_heap for iterator_range with our custom comparator. - boost::heap::binomial_heap> heap; - - // Push non-empty collections into the heap. - for (auto& container : container_list) { - if (container.size() > 0) { - iterator_range range { container.begin(), container.end() }; - heap.push(range); - } - } - - // Multi-way merge: repeatedly extract the largest key_sid element and push the next element from that range. - while (!heap.empty()) { - auto top = heap.top(); - heap.pop(); - - // Append the largest key_sid log_entry to the merged collection. - merged.entries_.push_back(*(top.current)); - - // Advance the iterator in the extracted range. - auto next_it = top.current; - ++next_it; - if (next_it != top.end) { - // If there are remaining elements in this range, push the updated range back into the heap. - heap.push(iterator_range{ next_it, top.end }); - } - } - - // Mark the merged collection as sorted. - merged.sorted_ = true; - - // Clear the input collections as specified. - for (auto& container : container_list) { - container.clear(); - } - - return merged; - } +log_entry_container log_entry_container::merge_sorted_collections( + std::vector>& container_list) { + + log_entry_container merged; + + // First, ensure that each collection is sorted. + for (auto& container_ptr : container_list) { + container_ptr->sort_descending(); // Ensure each container is sorted. + } + + // Reserve capacity for the merged entries. + std::size_t total_size = 0; + for (const auto& container_ptr : container_list) { + total_size += container_ptr->size(); + } + // If total_size is 0, then all collections are empty; return an empty merged container. + if (total_size == 0) { + return merged; + } + + merged.entries_.reserve(total_size); + + // Define a Boost binomial_heap for iterator_range with our custom comparator. + boost::heap::binomial_heap> heap; + + // Push non-empty collections into the heap. + for (auto& container_ptr : container_list) { + if (container_ptr->size() > 0) { + // Note: container_ptr->begin() and container_ptr->end() return iterators into the internal vector. + iterator_range range { container_ptr->begin(), container_ptr->end() }; + heap.push(range); + } + } + + // Multi-way merge: repeatedly extract the largest key_sid element and push the next element from that range. + while (!heap.empty()) { + auto top = heap.top(); + heap.pop(); + + // Append the largest key_sid log_entry to the merged collection. + merged.entries_.push_back(*(top.current)); + + // Advance the iterator in the extracted range. + auto next_it = top.current; + ++next_it; + if (next_it != top.end) { + // If there are remaining elements in this range, push the updated range back into the heap. + heap.push(iterator_range{ next_it, top.end }); + } + } + + // Mark the merged collection as sorted. + merged.sorted_ = true; + + // Clear the input collections as specified. + for (auto& container_ptr : container_list) { + container_ptr->clear(); + } + + return merged; +} } // namespace limestone::internal \ No newline at end of file diff --git a/src/limestone/log_entry_container.h b/src/limestone/log_entry_container.h index 36955af..874cf78 100644 --- a/src/limestone/log_entry_container.h +++ b/src/limestone/log_entry_container.h @@ -71,9 +71,9 @@ class log_entry_container { /// The merge is performed internally using a merge sort algorithm. /// The containers provided as arguments are merged, and the original containers are cleared. /// - /// @param containers A vector of log_entry_container objects to merge. + /// @param container_list A vector of shared_ptr to log_entry_container objects to merge. /// @return A single sorted log_entry_container containing all merged entries. - static log_entry_container merge_sorted_collections(std::vector& container_list); + static log_entry_container merge_sorted_collections(std::vector>& container_list); private: // Internal data structure (can be replaced later for optimizations like lazy loading) diff --git a/test/limestone/blob/log_entry_container_test.cpp b/test/limestone/blob/log_entry_container_test.cpp index 8b8eb80..86b576b 100644 --- a/test/limestone/blob/log_entry_container_test.cpp +++ b/test/limestone/blob/log_entry_container_test.cpp @@ -152,148 +152,122 @@ TEST_F(log_entry_container_test, sort_order) { EXPECT_EQ(it, container.end()); } -// Test merging two sorted containers. +// Test merging three sorted containers. TEST_F(log_entry_container_test, merge_sorted_collections) { -// Create three containers. -log_entry_container container1, container2, container3; - -// --- Container 1 --- -// Two entries: -// Entry 1: storage=100, key="D", value="val1", write_version=(1,0) -// Entry 2: storage=100, key="B", value="val2", write_version=(2,0) -// In descending order (using sort_descending()), container1 becomes: -// 1st: ("D", (1,0)) -// 2nd: ("B", (2,0)) -log_entry c1_e1 = create_normal_log_entry(100, "D", "val1", write_version_type(1, 0)); -log_entry c1_e2 = create_normal_log_entry(100, "B", "val2", write_version_type(2, 0)); -container1.append(c1_e1); -container1.append(c1_e2); - -// --- Container 2 --- -// Two entries: -// Entry 1: storage=100, key="C", value="val3", write_version=(3,0) -// Entry 2: storage=100, key="A", value="val4", write_version=(4,0) -// Sorted descending: first "C", then "A". -log_entry c2_e1 = create_normal_log_entry(100, "C", "val3", write_version_type(3, 0)); -log_entry c2_e2 = create_normal_log_entry(100, "A", "val4", write_version_type(4, 0)); -container2.append(c2_e1); -container2.append(c2_e2); - -// --- Container 3 --- -// Three entries: -// Entry 1: storage=100, key="E", value="val5", write_version=(5,0) -// Entry 2: storage=100, key="B", value="val6", write_version=(6,0) -// Entry 3: storage=100, key="A", value="val7", write_version=(7,0) -// Sorted descending: first "E", then "B", then "A". -log_entry c3_e1 = create_normal_log_entry(100, "E", "val5", write_version_type(5, 0)); -log_entry c3_e2 = create_normal_log_entry(100, "B", "val6", write_version_type(6, 0)); -log_entry c3_e3 = create_normal_log_entry(100, "A", "val7", write_version_type(7, 0)); -container3.append(c3_e1); -container3.append(c3_e2); -container3.append(c3_e3); - -// Prepare a vector of containers to merge. -// The merge_sorted_collections() function will first sort each container in descending order. -std::vector containers{ container1, container2, container3 }; - -// Perform the merge. -log_entry_container merged = log_entry_container::merge_sorted_collections(containers); - -// Expected merged order (descending): -// Compute ascending order first (based on storage, key, write_version ascending): -// Ascending order would be: -// ("A", (4,0)) from container2, -// ("A", (7,0)) from container3, -// ("B", (2,0)) from container1, -// ("B", (6,0)) from container3, -// ("C", (3,0)) from container2, -// ("D", (1,0)) from container1, -// ("E", (5,0)) from container3. -// Reversing, the descending order becomes: -// 1. ("E", (5,0)) -- container3 -// 2. ("D", (1,0)) -- container1 -// 3. ("C", (3,0)) -- container2 -// 4. ("B", (6,0)) -- container3 -// 5. ("B", (2,0)) -- container1 -// 6. ("A", (7,0)) -- container3 -// 7. ("A", (4,0)) -- container2 - -auto it = merged.begin(); -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "E", "val5", write_version_type(5, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "D", "val1", write_version_type(1, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "C", "val3", write_version_type(3, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "B", "val6", write_version_type(6, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "B", "val2", write_version_type(2, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "A", "val7", write_version_type(7, 0)); - -++it; -ASSERT_NE(it, merged.end()); -check_log_entry(*it, 100, "A", "val4", write_version_type(4, 0)); - -++it; -EXPECT_EQ(it, merged.end()); - -// Verify that all original containers have been cleared. -for (auto& c : containers) { -EXPECT_EQ(c.size(), 0u); -} + // Create three containers. + log_entry_container container1, container2, container3; + + // --- Container 1 --- + // Two entries: + // Entry 1: storage=100, key="D", value="val1", write_version=(1,0) + // Entry 2: storage=100, key="B", value="val2", write_version=(2,0) + log_entry c1_e1 = create_normal_log_entry(100, "D", "val1", write_version_type(1, 0)); + log_entry c1_e2 = create_normal_log_entry(100, "B", "val2", write_version_type(2, 0)); + container1.append(c1_e1); + container1.append(c1_e2); + + // --- Container 2 --- + // Two entries: + // Entry 1: storage=100, key="C", value="val3", write_version=(3,0) + // Entry 2: storage=100, key="A", value="val4", write_version=(4,0) + log_entry c2_e1 = create_normal_log_entry(100, "C", "val3", write_version_type(3, 0)); + log_entry c2_e2 = create_normal_log_entry(100, "A", "val4", write_version_type(4, 0)); + container2.append(c2_e1); + container2.append(c2_e2); + + // --- Container 3 --- + // Three entries: + // Entry 1: storage=100, key="E", value="val5", write_version=(5,0) + // Entry 2: storage=100, key="B", value="val6", write_version=(6,0) + // Entry 3: storage=100, key="A", value="val7", write_version=(7,0) + log_entry c3_e1 = create_normal_log_entry(100, "E", "val5", write_version_type(5, 0)); + log_entry c3_e2 = create_normal_log_entry(100, "B", "val6", write_version_type(6, 0)); + log_entry c3_e3 = create_normal_log_entry(100, "A", "val7", write_version_type(7, 0)); + container3.append(c3_e1); + container3.append(c3_e2); + container3.append(c3_e3); + + // Prepare a vector of containers to merge using shared_ptr. + std::vector> containers; + containers.push_back(std::make_shared(std::move(container1))); + containers.push_back(std::make_shared(std::move(container2))); + containers.push_back(std::make_shared(std::move(container3))); + + // Perform the merge. + log_entry_container merged = log_entry_container::merge_sorted_collections(containers); + + // Expected merged order (descending): + // 1. ("E", (5,0)) -- container3 + // 2. ("D", (1,0)) -- container1 + // 3. ("C", (3,0)) -- container2 + // 4. ("B", (6,0)) -- container3 + // 5. ("B", (2,0)) -- container1 + // 6. ("A", (7,0)) -- container3 + // 7. ("A", (4,0)) -- container2 + auto it = merged.begin(); + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "E", "val5", write_version_type(5, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "D", "val1", write_version_type(1, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "C", "val3", write_version_type(3, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "B", "val6", write_version_type(6, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "B", "val2", write_version_type(2, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "A", "val7", write_version_type(7, 0)); + + ++it; + ASSERT_NE(it, merged.end()); + check_log_entry(*it, 100, "A", "val4", write_version_type(4, 0)); + + ++it; + EXPECT_EQ(it, merged.end()); + + // Verify that all original containers have been cleared. + for (auto& uptr : containers) { + EXPECT_EQ(uptr->size(), 0u); + } } + + // Test case: Merge sorted collections with an empty container included. TEST_F(log_entry_container_test, merge_sorted_collections_with_empty_container) { // Create two non-empty containers and one empty container. log_entry_container container1, container2, container_empty; // --- Container 1 --- - // Two entries: - // Entry 1: storage=100, key="B", value="val1", write_version=(2,0) - // Entry 2: storage=100, key="A", value="val2", write_version=(1,0) log_entry c1_e1 = create_normal_log_entry(100, "B", "val1", write_version_type(2, 0)); log_entry c1_e2 = create_normal_log_entry(100, "A", "val2", write_version_type(1, 0)); container1.append(c1_e1); container1.append(c1_e2); // --- Container 2 --- - // One entry: - // Entry 1: storage=100, key="C", value="val3", write_version=(3,0) log_entry c2_e1 = create_normal_log_entry(100, "C", "val3", write_version_type(3, 0)); container2.append(c2_e1); - // --- Container Empty --- - // This container remains empty. - - // Prepare a vector of containers to merge. - // Note: container_empty is intentionally empty. - std::vector containers{ container1, container_empty, container2 }; + // Prepare vector using shared_ptr. + std::vector> containers; + containers.push_back(std::make_unique(std::move(container1))); + containers.push_back(std::make_unique(std::move(container_empty))); + containers.push_back(std::make_unique(std::move(container2))); // Perform the merge. log_entry_container merged = log_entry_container::merge_sorted_collections(containers); - // Expected merged order (descending order): - // - Since sort_descending() is used and key_sid() reflects storage and key, - // we assume that keys are compared lexicographically. - // - With keys "A", "B", "C" and descending order, "C" is the largest, - // then "B", then "A". - // - Thus, expected merged order: - // 1st: ("C", "val3", (3,0)) - // 2nd: ("B", "val1", (2,0)) - // 3rd: ("A", "val2", (1,0)) + // Expected descending order: "C" > "B" > "A" auto it = merged.begin(); ASSERT_NE(it, merged.end()); check_log_entry(*it, 100, "C", "val3", write_version_type(3, 0)); @@ -310,8 +284,8 @@ TEST_F(log_entry_container_test, merge_sorted_collections_with_empty_container) EXPECT_EQ(it, merged.end()); // Verify that all original containers have been cleared. - for (auto& c : containers) { - EXPECT_EQ(c.size(), 0u); + for (auto& uptr : containers) { + EXPECT_EQ(uptr->size(), 0u); } } @@ -320,8 +294,11 @@ TEST_F(log_entry_container_test, merge_all_empty_containers) { // Create three empty containers. log_entry_container container1, container2, container3; - // Prepare a vector of empty containers. - std::vector containers{ container1, container2, container3 }; + // Prepare vector using shared_ptr. + std::vector> containers; + containers.push_back(std::make_unique(std::move(container1))); + containers.push_back(std::make_unique(std::move(container2))); + containers.push_back(std::make_unique(std::move(container3))); // Perform the merge. log_entry_container merged = log_entry_container::merge_sorted_collections(containers); @@ -330,15 +307,15 @@ TEST_F(log_entry_container_test, merge_all_empty_containers) { EXPECT_EQ(merged.size(), 0u); // Also, each original container should be cleared. - for (auto& c : containers) { - EXPECT_EQ(c.size(), 0u); + for (auto& uptr : containers) { + EXPECT_EQ(uptr->size(), 0u); } } // Test case: Container list is empty. TEST_F(log_entry_container_test, merge_empty_container_list) { - // Prepare an empty vector of containers. - std::vector containers; + // Prepare an empty vector of containers using shared_ptr. + std::vector> containers; // Perform the merge. log_entry_container merged = log_entry_container::merge_sorted_collections(containers); @@ -347,30 +324,31 @@ TEST_F(log_entry_container_test, merge_empty_container_list) { EXPECT_EQ(merged.size(), 0u); } + // Test case: Each container contains a single entry. TEST_F(log_entry_container_test, merge_single_entry_containers) { // Create three containers, each with one entry. log_entry_container container1, container2, container3; - // Container 1: one entry. log_entry c1_e1 = create_normal_log_entry(100, "A", "val1", write_version_type(1, 0)); container1.append(c1_e1); - // Container 2: one entry. log_entry c2_e1 = create_normal_log_entry(100, "B", "val2", write_version_type(2, 0)); container2.append(c2_e1); - // Container 3: one entry. log_entry c3_e1 = create_normal_log_entry(100, "C", "val3", write_version_type(3, 0)); container3.append(c3_e1); - // Prepare a vector of containers. - std::vector containers{ container1, container2, container3 }; + // Prepare vector using shared_ptr. + std::vector> containers; + containers.push_back(std::make_unique(std::move(container1))); + containers.push_back(std::make_unique(std::move(container2))); + containers.push_back(std::make_unique(std::move(container3))); // Perform the merge. log_entry_container merged = log_entry_container::merge_sorted_collections(containers); - // Expected descending order: key "C" > "B" > "A" + // Expected descending order: "C" > "B" > "A" auto it = merged.begin(); ASSERT_NE(it, merged.end()); check_log_entry(*it, 100, "C", "val3", write_version_type(3, 0)); @@ -387,50 +365,46 @@ TEST_F(log_entry_container_test, merge_single_entry_containers) { EXPECT_EQ(it, merged.end()); // Verify that all original containers have been cleared. - for (auto& c : containers) { - EXPECT_EQ(c.size(), 0u); + for (auto& uptr : containers) { + EXPECT_EQ(uptr->size(), 0u); } } + // Test case: Merge sorted collections with duplicate entries. TEST_F(log_entry_container_test, merge_with_duplicate_entries) { // Create two containers with duplicate entries. log_entry_container container1, container2; - // Both containers contain entries with identical values: - // Entry: storage=100, key="X", value="dup", write_version=(5,0) log_entry dup_entry1 = create_normal_log_entry(100, "X", "dup", write_version_type(5, 0)); log_entry dup_entry2 = create_normal_log_entry(100, "X", "dup", write_version_type(5, 0)); log_entry dup_entry3 = create_normal_log_entry(100, "X", "dup", write_version_type(5, 0)); - // Container 1: two duplicate entries. container1.append(dup_entry1); container1.append(dup_entry2); - - // Container 2: one duplicate entry. container2.append(dup_entry3); - // Prepare a vector of containers to merge. - std::vector containers{ container1, container2 }; + // Prepare vector using shared_ptr. + std::vector> containers; + containers.push_back(std::make_unique(std::move(container1))); + containers.push_back(std::make_unique(std::move(container2))); // Perform the merge. log_entry_container merged = log_entry_container::merge_sorted_collections(containers); // Since all entries are identical, merged container should contain all 3 entries. - auto it = merged.begin(); int count = 0; - for (; it != merged.end(); ++it) { + for (auto it = merged.begin(); it != merged.end(); ++it) { check_log_entry(*it, 100, "X", "dup", write_version_type(5, 0)); count++; } EXPECT_EQ(count, 3); // Verify that all original containers have been cleared. - for (auto& c : containers) { - EXPECT_EQ(c.size(), 0u); + for (auto& uptr : containers) { + EXPECT_EQ(uptr->size(), 0u); } } - } // namespace testing } // namespace limestone \ No newline at end of file diff --git a/test/limestone/blob/log_entry_truncate_value_test.cpp b/test/limestone/blob/log_entry_truncate_value_test.cpp new file mode 100644 index 0000000..1772383 --- /dev/null +++ b/test/limestone/blob/log_entry_truncate_value_test.cpp @@ -0,0 +1,231 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "log_entry.h" + +using namespace limestone::api; + +namespace limestone::testing { + +// Test fixture for testing truncate_value_from_normal_entry() +class log_entry_truncate_value_test : public ::testing::Test { +protected: + std::string temp_dir; + int file_counter = 0; + + void SetUp() override { + // Remove any previous test directory. + if (system("rm -rf /tmp/limestone_log_entry_truncate_value_test") != 0) { + std::cerr << "Failed to remove directory /tmp/limestone_log_entry_truncate_value_test" << std::endl; + } + // Create the test directory. + if (system("mkdir -p /tmp/limestone_log_entry_truncate_value_test") != 0) { + std::cerr << "Failed to create directory /tmp/limestone_log_entry_truncate_value_test" << std::endl; + } + temp_dir = "/tmp/limestone_log_entry_truncate_value_test"; + file_counter = 0; + } + + void TearDown() override { + boost::filesystem::remove_all(temp_dir); + } + + std::string get_temp_file_name() { + file_counter++; + return temp_dir + "/temp_file_" + std::to_string(file_counter); + } +}; + +// Helper: Returns the header size for write_version (epoch_id_type + std::uint64_t). +constexpr std::size_t header_size() { + return sizeof(epoch_id_type) + sizeof(std::uint64_t); +} + +/** + * @brief Verify that for a normal_entry created with log_entry::write(), + * truncate_value_from_normal_entry() removes the appended value data + * (resulting in an empty extracted value), while storage_id, key, + * and write_version remain unchanged. + * + * The checking order is: storage_id, key, value, write_version. + */ +TEST_F(log_entry_truncate_value_test, truncate_value_normal_entry) { + // Prepare test data. + storage_id_type storage = 123; + std::string key = "testKey"; + std::string value = "testValue"; // The value part appended after the header. + write_version_type wv(100, 10); + + // Write a normal entry using log_entry::write(). + std::string temp_file = get_temp_file_name(); + FILE* out = std::fopen(temp_file.c_str(), "wb"); + ASSERT_NE(out, nullptr); + log_entry::write(out, storage, key, value, wv); + std::fclose(out); + + // Read the entry back. + std::ifstream in(temp_file, std::ios::binary); + ASSERT_TRUE(in.is_open()); + log_entry entry; + bool rc = entry.read(in); + in.close(); + boost::filesystem::remove(temp_file); + ASSERT_TRUE(rc); + + // Verify that the entry is of type normal_entry and that value_etc contains header + value. + EXPECT_EQ(entry.type(), log_entry::entry_type::normal_entry); + EXPECT_EQ(entry.value_etc().size(), header_size() + value.size()); + + // Capture fields before truncation in order: storage_id, key, value, write_version. + storage_id_type storage_before = entry.storage(); + std::string key_before; + entry.key(key_before); + std::string value_before; + entry.value(value_before); + write_version_type wv_before; + entry.write_version(wv_before); + + EXPECT_EQ(storage_before, storage); + EXPECT_EQ(key_before, key); + EXPECT_EQ(value_before, value); + EXPECT_EQ(wv_before.get_major(), wv.get_major()); + EXPECT_EQ(wv_before.get_minor(), wv.get_minor()); + + // Invoke truncation. + entry.truncate_value_from_normal_entry(); + + // After truncation, only the header should remain. + EXPECT_EQ(entry.value_etc().size(), header_size()); + + // Verify that storage_id, key, and write_version remain unchanged. + storage_id_type storage_after = entry.storage(); + std::string key_after; + entry.key(key_after); + std::string value_after; + entry.value(value_after); + write_version_type wv_after; + entry.write_version(wv_after); + + EXPECT_EQ(storage_after, storage_before); + EXPECT_EQ(key_after, key_before); + EXPECT_EQ(wv_after.get_major(), wv_before.get_major()); + EXPECT_EQ(wv_after.get_minor(), wv_before.get_minor()); + // The extracted value should now be empty. + EXPECT_TRUE(value_after.empty()); +} + +/** + * @brief Verify that for a normal_with_blob entry created with log_entry::write_with_blob(), + * truncate_value_from_normal_entry() removes the appended value data + * (resulting in an empty extracted value), while storage_id, key, + * and write_version remain unchanged. + * + * The checking order is: storage_id, key, value, write_version. + * + * This test uses the new write_with_blob signature: + * write_with_blob(FILE*, storage_id_type, std::string_view key, std::string_view value, + * write_version_type, const std::vector& large_objects) + */ +TEST_F(log_entry_truncate_value_test, truncate_value_with_blob) { + // Prepare test data. + storage_id_type storage = 456; + std::string key = "blobKey"; + std::string value = "blobValue"; // The value part appended after the header. + write_version_type wv(200, 20); + // Prepare some dummy blob_ids. + std::vector large_objects = { 42, 43, 44 }; + + // Write the blob entry using the new write_with_blob. + std::string temp_file = get_temp_file_name(); + FILE* out = std::fopen(temp_file.c_str(), "wb"); + ASSERT_NE(out, nullptr); + log_entry::write_with_blob(out, storage, key, value, wv, large_objects); + std::fclose(out); + + // Read the entry back. + std::ifstream in(temp_file, std::ios::binary); + ASSERT_TRUE(in.is_open()); + log_entry entry; + bool rc = entry.read(in); + in.close(); + boost::filesystem::remove(temp_file); + ASSERT_TRUE(rc); + + // Verify that the entry is of type normal_with_blob and that value_etc contains header + value. + EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob); + EXPECT_EQ(entry.value_etc().size(), header_size() + value.size()); + + // Capture fields before truncation in order: storage_id, key, value, write_version. + storage_id_type storage_before = entry.storage(); + std::string key_before; + entry.key(key_before); + std::string value_before; + entry.value(value_before); + write_version_type wv_before; + entry.write_version(wv_before); + + EXPECT_EQ(storage_before, storage); + EXPECT_EQ(key_before, key); + EXPECT_EQ(value_before, value); + EXPECT_EQ(wv_before.get_major(), wv.get_major()); + EXPECT_EQ(wv_before.get_minor(), wv.get_minor()); + + // Invoke truncation. + entry.truncate_value_from_normal_entry(); + + // After truncation, only the header should remain. + EXPECT_EQ(entry.value_etc().size(), header_size()); + + // Verify that storage_id, key, and write_version remain unchanged. + storage_id_type storage_after = entry.storage(); + std::string key_after; + entry.key(key_after); + std::string value_after; + entry.value(value_after); + write_version_type wv_after; + entry.write_version(wv_after); + + EXPECT_EQ(storage_after, storage_before); + EXPECT_EQ(key_after, key_before); + EXPECT_EQ(wv_after.get_major(), wv_before.get_major()); + EXPECT_EQ(wv_after.get_minor(), wv_before.get_minor()); + // The extracted value should now be empty. + EXPECT_TRUE(value_after.empty()); +} + +/** + * @brief Verify that for an entry type without a value portion (e.g. marker_begin), + * truncate_value_from_normal_entry() does not modify value_etc. + */ +TEST_F(log_entry_truncate_value_test, truncate_value_non_normal_entry) { + // Create a marker_begin entry. + std::string temp_file = get_temp_file_name(); + FILE* out = std::fopen(temp_file.c_str(), "wb"); + ASSERT_NE(out, nullptr); + epoch_id_type epoch = 999; + log_entry::begin_session(out, epoch); + std::fclose(out); + + std::ifstream in(temp_file, std::ios::binary); + ASSERT_TRUE(in.is_open()); + log_entry entry; + bool rc = entry.read(in); + in.close(); + boost::filesystem::remove(temp_file); + ASSERT_TRUE(rc); + + // Capture the original value_etc. + std::string original_value_etc = entry.value_etc(); + // Invoke truncation. + entry.truncate_value_from_normal_entry(); + // For entries like marker_begin that do not have a value portion, value_etc should remain unchanged. + EXPECT_EQ(entry.value_etc(), original_value_etc); +} + +} // namespace limestone::testing