diff --git a/src/limestone/blob_file_garbage_collector.cpp b/src/limestone/blob_file_garbage_collector.cpp index 94672cb..680fd9b 100644 --- a/src/limestone/blob_file_garbage_collector.cpp +++ b/src/limestone/blob_file_garbage_collector.cpp @@ -104,6 +104,7 @@ using limestone::api::log_entry; } } } + VLOG_LP(log_trace_fine) << "Blob file scan complete."; } catch (const std::exception &e) { LOG_LP(ERROR) << "Exception in blob_file_garbage_collector::scan_directory: " << e.what(); } @@ -135,12 +136,15 @@ using limestone::api::log_entry; // Calculate the difference and perform deletion operations scanned_blobs_->diff(*gc_exempt_blob_); + for (const auto &id : *scanned_blobs_) { if (shutdown_requested_.load(std::memory_order_acquire)) { break; } boost::filesystem::path file_path = resolver_->resolve_path(id); boost::system::error_code ec; + VLOG_LP(log_trace_fine) << "Removing blob id: " << id; + VLOG_LP(log_trace_fine) << "Removing blob file: " << file_path.string(); file_ops_->remove(file_path, ec); if (ec && ec != boost::system::errc::no_such_file_or_directory) { LOG_LP(ERROR) << "Failed to remove file: " << file_path.string() @@ -243,10 +247,12 @@ void blob_file_garbage_collector::scan_snapshot(const boost::filesystem::path &s if (cur->type() == log_entry::entry_type::normal_with_blob) { auto blob_ids = cur->blob_ids(); for (auto id : blob_ids) { + VLOG_LP(log_trace_fine) << "Scanned blob id: " << id; gc_exempt_blob_->add_blob_id(id); } } } + VLOG_LP(log_trace_fine) << "Snapshot scan finished."; finalize_scan_and_cleanup(); } catch (const limestone_exception &e) { LOG_LP(ERROR) << "Exception in snapshot scan thread: " << e.what(); diff --git a/src/limestone/blob_id_container.cpp b/src/limestone/blob_id_container.cpp index ac77a58..5bb6730 100644 --- a/src/limestone/blob_id_container.cpp +++ b/src/limestone/blob_id_container.cpp @@ -17,6 +17,7 @@ #include "blob_id_container.h" #include #include +#include // added for debug_string namespace limestone::internal { @@ -39,14 +40,15 @@ void blob_id_container::diff(const blob_id_container &other) { container_type old_ids = std::move(ids_); ids_.clear(); - std::sort(old_ids.begin(), old_ids.end()); + + container_type sorted_other = other.ids_; + std::sort(sorted_other.begin(), sorted_other.end()); - // other.ids_ はすでにソート済みであることを仮定 auto it1 = old_ids.begin(); auto end1 = old_ids.end(); - auto it2 = other.ids_.begin(); - auto end2 = other.ids_.end(); + auto it2 = sorted_other.begin(); + auto end2 = sorted_other.end(); while (it1 != end1) { while (it2 != end2 && *it2 < *it1) { @@ -90,4 +92,17 @@ void blob_id_container::sort() { std::sort(ids_.begin(), ids_.end()); } +std::string blob_id_container::debug_string() const { + std::ostringstream oss; + oss << "["; + for (std::size_t i = 0; i < ids_.size(); ++i) { + oss << ids_[i]; + if (i + 1 < ids_.size()) { + oss << ", "; + } + } + oss << "]"; + return oss.str(); +} + } // namespace limestone::internal diff --git a/src/limestone/blob_id_container.h b/src/limestone/blob_id_container.h index a16b97b..471906d 100644 --- a/src/limestone/blob_id_container.h +++ b/src/limestone/blob_id_container.h @@ -79,6 +79,9 @@ class blob_id_container { [[nodiscard]] const_iterator begin() const; [[nodiscard]] const_iterator end() const; + // Returns a string representation of the blob IDs for debugging. + [[nodiscard]] std::string debug_string() const; + private: bool iterator_used_ = false; container_type ids_; diff --git a/test/limestone/blob/blob_id_container_test.cpp b/test/limestone/blob/blob_id_container_test.cpp index 417f6c5..427b0e8 100644 --- a/test/limestone/blob/blob_id_container_test.cpp +++ b/test/limestone/blob/blob_id_container_test.cpp @@ -75,6 +75,29 @@ TEST(blob_id_container_test, diff_removes_matching_items) { EXPECT_EQ(result, expected); } +TEST(blob_id_container_test, diff_removes_matching_items2) { + blob_id_container container; + container.add_blob_id(1003); + container.add_blob_id(2002); + container.add_blob_id(1002); + container.add_blob_id(1001); + container.add_blob_id(2001); + + blob_id_container other; + other.add_blob_id(2001); + other.add_blob_id(2002); + other.add_blob_id(1003); + + // Execute diff: remove blob IDs present in the other container. + container.diff(other); + + // After diff, container should contain blob IDs: 1 and 3. + std::vector result = get_blob_ids(container); + std::vector expected {1001, 1002}; + EXPECT_EQ(result, expected); +} + + TEST(blob_id_container_test, diff_with_our_container_empty) { // Our container is empty. blob_id_container container; diff --git a/test/limestone/blob/compaction_blob_gc_test.cpp b/test/limestone/blob/compaction_blob_gc_test.cpp new file mode 100644 index 0000000..616f4df --- /dev/null +++ b/test/limestone/blob/compaction_blob_gc_test.cpp @@ -0,0 +1,226 @@ +/* + * Copyright 2022-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include "limestone/compaction/compaction_test_fixture.h" + +namespace limestone::testing { + +using namespace std::literals; +using namespace limestone::api; +using namespace limestone::internal; + + +TEST_F(compaction_test, basic_blob_gc_test) { + // Epoch 1: Prepare initial entries. + gen_datastore(); + datastore_->switch_epoch(1); + + // Create two entries with blob data using lc0. + lc0_->begin_session(); + lc0_->add_entry(1, "blob_key1", "blob_value1", {1, 0}, {1001, 1002}); + lc0_->add_entry(1, "blob_key2", "blob_value2", {1, 1}, {1003}); + lc0_->end_session(); + + // Create two entries without blob data using lc0. + lc0_->begin_session(); + lc0_->add_entry(1, "noblob_key1", "noblob_value1", {1, 2}); + lc0_->add_entry(1, "noblob_key2", "noblob_value2", {1, 3}); + lc0_->end_session(); + + // Epoch 2: Switch epoch and update some entries with the same keys. + datastore_->switch_epoch(2); + lc0_->begin_session(); + // Update "blob_key1" with new blob data. + lc0_->add_entry(1, "blob_key1", "blob_value1_epoch2", {2, 0}, {2001, 2002}); + // Update "noblob_key1" with a new value. + lc0_->add_entry(1, "noblob_key1", "noblob_value1_epoch2", {2, 1}); + lc0_->end_session(); + + // Create dummy blob files for the blob IDs. + auto path1001 = create_dummy_blob_files(1001); + auto path1002 = create_dummy_blob_files(1002); + auto path1003 = create_dummy_blob_files(1003); + auto path2001 = create_dummy_blob_files(2001); + auto path2002 = create_dummy_blob_files(2002); + + + // Verify PWAL content before compaction. + // Here, we assume that "pwal_0000" aggregates entries from both epoch 1 and epoch 2. + std::vector log_entries = read_log_file("pwal_0000", location); + // Expecting six entries: four from epoch 1 and two from epoch 2. + ASSERT_EQ(log_entries.size(), 6); + EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1", 1, 2, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[5], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry)); + + EXPECT_TRUE(boost::filesystem::exists(path1001)); + EXPECT_TRUE(boost::filesystem::exists(path1002)); + EXPECT_TRUE(boost::filesystem::exists(path1003)); + EXPECT_TRUE(boost::filesystem::exists(path2001)); + EXPECT_TRUE(boost::filesystem::exists(path2002)); + + // Perform compaction in epoch 3. + run_compact_with_epoch_switch(3); + + // Verify compaction catalog. + compaction_catalog catalog = compaction_catalog::from_catalog_file(location); + // Ensure that at least one compacted file exists. + EXPECT_FALSE(catalog.get_compacted_files().empty()); + // Expect the max blob id to be updated to the highest blob id in use (i.e. 2002). + EXPECT_EQ(catalog.get_max_blob_id(), 2002); + + // Verify the content of the compacted PWAL. + // Assuming the compacted file is named "pwal_0000.compacted". + log_entries = read_log_file("pwal_0000.compacted", location); + // Expected effective state: + // - "blob_key1": effective value from epoch 2 ("blob_value1_epoch2") with blob IDs {2001,2002}. + // - "blob_key2": remains from epoch 1. + // - "noblob_key1": updated in epoch 2. + // - "noblob_key2": remains from epoch 1. + ASSERT_EQ(log_entries.size(), 5); + EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry)); + + // Verify the existence of the compacted blob files. + EXPECT_TRUE(boost::filesystem::exists(path1001)); + EXPECT_TRUE(boost::filesystem::exists(path1002)); + EXPECT_TRUE(boost::filesystem::exists(path1003)); + EXPECT_TRUE(boost::filesystem::exists(path2001)); + EXPECT_TRUE(boost::filesystem::exists(path2002)); + + // Restart datastore and verify snapshot content. + std::vector> kv_list = restart_datastore_and_read_snapshot(); + ASSERT_EQ(kv_list.size(), 4); + EXPECT_EQ(kv_list[0].first, "blob_key1"); + EXPECT_EQ(kv_list[0].second, "blob_value1_epoch2"); + EXPECT_EQ(kv_list[1].first, "blob_key2"); + EXPECT_EQ(kv_list[1].second, "blob_value2"); + EXPECT_EQ(kv_list[2].first, "noblob_key1"); + EXPECT_EQ(kv_list[2].second, "noblob_value1_epoch2"); + EXPECT_EQ(kv_list[3].first, "noblob_key2"); + EXPECT_EQ(kv_list[3].second, "noblob_value2"); + + // Verify that no snapshot PWAL file exists. + log_entries = read_log_file("data/snapshot", location); + ASSERT_TRUE(log_entries.empty()); + + // Verify that the blob files are still present. + EXPECT_FALSE(boost::filesystem::exists(path1001)); + EXPECT_FALSE(boost::filesystem::exists(path1002)); + EXPECT_TRUE(boost::filesystem::exists(path1003)); + EXPECT_TRUE(boost::filesystem::exists(path2001)); + EXPECT_TRUE(boost::filesystem::exists(path2002)); +} + +TEST_F(compaction_test, basic_blob_gc_reboot_test) { + // Epoch 1: Prepare initial entries. + gen_datastore(); + datastore_->switch_epoch(1); + + // Create two entries with blob data using lc0. + lc0_->begin_session(); + lc0_->add_entry(1, "blob_key1", "blob_value1", {1, 0}, {1001, 1002}); + lc0_->add_entry(1, "blob_key2", "blob_value2", {1, 1}, {1003}); + lc0_->end_session(); + + // Create two entries without blob data using lc0. + lc0_->begin_session(); + lc0_->add_entry(1, "noblob_key1", "noblob_value1", {1, 2}); + lc0_->add_entry(1, "noblob_key2", "noblob_value2", {1, 3}); + lc0_->end_session(); + + // Epoch 2: Switch epoch and update some entries with the same keys. + datastore_->switch_epoch(2); + lc0_->begin_session(); + // Update "blob_key1" with new blob data. + lc0_->add_entry(1, "blob_key1", "blob_value1_epoch2", {2, 0}, {2001, 2002}); + // Update "noblob_key1" with a new value. + lc0_->add_entry(1, "noblob_key1", "noblob_value1_epoch2", {2, 1}); + lc0_->end_session(); + datastore_->switch_epoch(3); + + // Create dummy blob files for the blob IDs. + auto path1001 = create_dummy_blob_files(1001); + auto path1002 = create_dummy_blob_files(1002); + auto path1003 = create_dummy_blob_files(1003); + auto path2001 = create_dummy_blob_files(2001); + auto path2002 = create_dummy_blob_files(2002); + + // Verify PWAL content before reboot. + // Here, we assume that "pwal_0000" aggregates entries from both epoch 1 and epoch 2. + std::vector log_entries = read_log_file("pwal_0000", location); + // Expecting six entries: four from epoch 1 and two from epoch 2. + ASSERT_EQ(log_entries.size(), 6); + EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1", 1, 2, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[5], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry)); + + EXPECT_TRUE(boost::filesystem::exists(path1001)); + EXPECT_TRUE(boost::filesystem::exists(path1002)); + EXPECT_TRUE(boost::filesystem::exists(path1003)); + EXPECT_TRUE(boost::filesystem::exists(path2001)); + EXPECT_TRUE(boost::filesystem::exists(path2002)); + + // ----- Online compaction is NOT performed. ----- + + // Instead, restart the datastore directly. + FLAGS_v = 70; + std::vector> kv_list = restart_datastore_and_read_snapshot(); + FLAGS_v = 30; + + // Verify snapshot content. + // Expected effective state: + // - "blob_key1": updated in epoch 2 → "blob_value1_epoch2" + // - "blob_key2": remains from epoch 1. + // - "noblob_key1": updated in epoch 2 → "noblob_value1_epoch2" + // - "noblob_key2": remains from epoch 1. + ASSERT_EQ(kv_list.size(), 4); + EXPECT_EQ(kv_list[0].first, "blob_key1"); + EXPECT_EQ(kv_list[0].second, "blob_value1_epoch2"); + EXPECT_EQ(kv_list[1].first, "blob_key2"); + EXPECT_EQ(kv_list[1].second, "blob_value2"); + EXPECT_EQ(kv_list[2].first, "noblob_key1"); + EXPECT_EQ(kv_list[2].second, "noblob_value1_epoch2"); + EXPECT_EQ(kv_list[3].first, "noblob_key2"); + EXPECT_EQ(kv_list[3].second, "noblob_value2"); + + // Verify that no snapshot PWAL file exists. + log_entries = read_log_file("data/snapshot", location); + ASSERT_EQ(log_entries.size(), 4); + EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob)); + EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry)); + EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry)); + + // Verify that the blob files are still present. + datastore_->wait_for_blob_file_garbace_collector(); + EXPECT_FALSE(boost::filesystem::exists(path1001)); + EXPECT_FALSE(boost::filesystem::exists(path1002)); + EXPECT_TRUE(boost::filesystem::exists(path1003)); + EXPECT_TRUE(boost::filesystem::exists(path2001)); + EXPECT_TRUE(boost::filesystem::exists(path2002)); +} + +} // namespace limestone::testing diff --git a/test/limestone/blob/log_entry_blob_test.cpp b/test/limestone/blob/log_entry_blob_test.cpp index 0b4545d..1c3404d 100644 --- a/test/limestone/blob/log_entry_blob_test.cpp +++ b/test/limestone/blob/log_entry_blob_test.cpp @@ -244,7 +244,7 @@ TEST_F(log_entry_blob_test, make_normal_with_blob_log_entry) { EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob); EXPECT_EQ(entry.key_sid(), key_sid); EXPECT_EQ(entry.value_etc(), value_etc); - EXPECT_EQ(entry.blob_ids(), blob_ids); + EXPECT_EQ(entry.raw_blob_ids(), blob_ids); } /** @@ -261,7 +261,7 @@ TEST_F(log_entry_blob_test, make_normal_with_blob_log_entry_default_blob_ids) { EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob); EXPECT_EQ(entry.key_sid(), key_sid); EXPECT_EQ(entry.value_etc(), value_etc); - EXPECT_EQ(entry.blob_ids(), std::string()); + EXPECT_EQ(entry.raw_blob_ids(), std::string()); } diff --git a/test/limestone/compaction/compaction_test_fixture.h b/test/limestone/compaction/compaction_test_fixture.h index a5d12fc..de30b6e 100644 --- a/test/limestone/compaction/compaction_test_fixture.h +++ b/test/limestone/compaction/compaction_test_fixture.h @@ -194,6 +194,19 @@ class compaction_test : public ::testing::Test { << ", Minor: " << log_entry::write_version_minor_write_version(entry.value_etc()) << std::endl; break; } + case log_entry::entry_type::normal_with_blob: { + std::string value; + entry.value(value); + std::cout << "Entry Type: normal_with_blob, Storage ID: " << storage_id << ", Key: " << key << ", Value: " << value + << ", Write Version: Epoch: " << log_entry::write_version_epoch_number(entry.value_etc()) + << ", Minor: " << log_entry::write_version_minor_write_version(entry.value_etc()) + << ", Blob IDs: "; + for (const auto& blob_id : entry.get_blob_ids()) { + std::cout << blob_id << " "; + } + std::cout << std::endl; + break; + } case log_entry::entry_type::remove_entry: { std::cout << "Entry Type: remove_entry, Storage ID: " << storage_id << ", Key: " << key << ", Write Version: Epoch: " << log_entry::write_version_epoch_number(entry.value_etc()) @@ -384,7 +397,7 @@ class compaction_test : public ::testing::Test { } // Check the blob IDs - { + if (entry.type() == log_entry::entry_type::normal_with_blob) { std::vector actual_blob_ids = entry.get_blob_ids(); if (actual_blob_ids.size() != expected_blob_ids.size()) { return ::testing::AssertionFailure() << "Expected blob IDs size: " << expected_blob_ids.size() << ", but got: " << actual_blob_ids.size(); @@ -406,7 +419,24 @@ class compaction_test : public ::testing::Test { std::sort(list.begin(), list.end()); return list; } -}; + boost::filesystem::path create_dummy_blob_files(blob_id_type blob_id) { + boost::filesystem::path path = datastore_->get_blob_file(blob_id).path(); + if (!boost::filesystem::exists(path)) { + boost::filesystem::path dir = path.parent_path(); + if (!boost::filesystem::exists(dir)) { + if (!boost::filesystem::create_directories(dir)) { + std::cerr << "Failed to create directory: " << dir.string() << std::endl; + } + } + boost::filesystem::ofstream ofs(path, std::ios::binary); + if (!ofs) { + throw std::runtime_error("Failed to open file: " + path.string()); + } + ofs << "dummy_blob_data"; + } + return path; + } +}; } // namespace limestone::testing