Skip to content

Commit

Permalink
Add test cases for blob file GC with online compaction (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
umegane committed Feb 20, 2025
1 parent f0d7827 commit ac199e7
Show file tree
Hide file tree
Showing 7 changed files with 311 additions and 8 deletions.
6 changes: 6 additions & 0 deletions src/limestone/blob_file_garbage_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ using limestone::api::log_entry;
}
}
}
VLOG_LP(log_trace_fine) << "Blob file scan complete.";
} catch (const std::exception &e) {
LOG_LP(ERROR) << "Exception in blob_file_garbage_collector::scan_directory: " << e.what();
}
Expand Down Expand Up @@ -135,12 +136,15 @@ using limestone::api::log_entry;

// Calculate the difference and perform deletion operations
scanned_blobs_->diff(*gc_exempt_blob_);

for (const auto &id : *scanned_blobs_) {
if (shutdown_requested_.load(std::memory_order_acquire)) {
break;
}
boost::filesystem::path file_path = resolver_->resolve_path(id);
boost::system::error_code ec;
VLOG_LP(log_trace_fine) << "Removing blob id: " << id;
VLOG_LP(log_trace_fine) << "Removing blob file: " << file_path.string();
file_ops_->remove(file_path, ec);
if (ec && ec != boost::system::errc::no_such_file_or_directory) {
LOG_LP(ERROR) << "Failed to remove file: " << file_path.string()
Expand Down Expand Up @@ -243,10 +247,12 @@ void blob_file_garbage_collector::scan_snapshot(const boost::filesystem::path &s
if (cur->type() == log_entry::entry_type::normal_with_blob) {
auto blob_ids = cur->blob_ids();
for (auto id : blob_ids) {
VLOG_LP(log_trace_fine) << "Scanned blob id: " << id;
gc_exempt_blob_->add_blob_id(id);
}
}
}
VLOG_LP(log_trace_fine) << "Snapshot scan finished.";
finalize_scan_and_cleanup();
} catch (const limestone_exception &e) {
LOG_LP(ERROR) << "Exception in snapshot scan thread: " << e.what();
Expand Down
23 changes: 19 additions & 4 deletions src/limestone/blob_id_container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "blob_id_container.h"
#include <stdexcept>
#include <algorithm>
#include <sstream> // added for debug_string

namespace limestone::internal {

Expand All @@ -39,14 +40,15 @@ void blob_id_container::diff(const blob_id_container &other) {

container_type old_ids = std::move(ids_);
ids_.clear();

std::sort(old_ids.begin(), old_ids.end());

container_type sorted_other = other.ids_;
std::sort(sorted_other.begin(), sorted_other.end());

// other.ids_ はすでにソート済みであることを仮定
auto it1 = old_ids.begin();
auto end1 = old_ids.end();
auto it2 = other.ids_.begin();
auto end2 = other.ids_.end();
auto it2 = sorted_other.begin();
auto end2 = sorted_other.end();

while (it1 != end1) {
while (it2 != end2 && *it2 < *it1) {
Expand Down Expand Up @@ -90,4 +92,17 @@ void blob_id_container::sort() {
std::sort(ids_.begin(), ids_.end());
}

std::string blob_id_container::debug_string() const {
std::ostringstream oss;
oss << "[";
for (std::size_t i = 0; i < ids_.size(); ++i) {
oss << ids_[i];
if (i + 1 < ids_.size()) {
oss << ", ";
}
}
oss << "]";
return oss.str();
}

} // namespace limestone::internal
3 changes: 3 additions & 0 deletions src/limestone/blob_id_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class blob_id_container {
[[nodiscard]] const_iterator begin() const;
[[nodiscard]] const_iterator end() const;

// Returns a string representation of the blob IDs for debugging.
[[nodiscard]] std::string debug_string() const;

private:
bool iterator_used_ = false;
container_type ids_;
Expand Down
23 changes: 23 additions & 0 deletions test/limestone/blob/blob_id_container_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,29 @@ TEST(blob_id_container_test, diff_removes_matching_items) {
EXPECT_EQ(result, expected);
}

TEST(blob_id_container_test, diff_removes_matching_items2) {
blob_id_container container;
container.add_blob_id(1003);
container.add_blob_id(2002);
container.add_blob_id(1002);
container.add_blob_id(1001);
container.add_blob_id(2001);

blob_id_container other;
other.add_blob_id(2001);
other.add_blob_id(2002);
other.add_blob_id(1003);

// Execute diff: remove blob IDs present in the other container.
container.diff(other);

// After diff, container should contain blob IDs: 1 and 3.
std::vector<blob_id_type> result = get_blob_ids(container);
std::vector<blob_id_type> expected {1001, 1002};
EXPECT_EQ(result, expected);
}


TEST(blob_id_container_test, diff_with_our_container_empty) {
// Our container is empty.
blob_id_container container;
Expand Down
226 changes: 226 additions & 0 deletions test/limestone/blob/compaction_blob_gc_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/*
* Copyright 2022-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "limestone/compaction/compaction_test_fixture.h"

namespace limestone::testing {

using namespace std::literals;
using namespace limestone::api;
using namespace limestone::internal;


TEST_F(compaction_test, basic_blob_gc_test) {
// Epoch 1: Prepare initial entries.
gen_datastore();
datastore_->switch_epoch(1);

// Create two entries with blob data using lc0.
lc0_->begin_session();
lc0_->add_entry(1, "blob_key1", "blob_value1", {1, 0}, {1001, 1002});
lc0_->add_entry(1, "blob_key2", "blob_value2", {1, 1}, {1003});
lc0_->end_session();

// Create two entries without blob data using lc0.
lc0_->begin_session();
lc0_->add_entry(1, "noblob_key1", "noblob_value1", {1, 2});
lc0_->add_entry(1, "noblob_key2", "noblob_value2", {1, 3});
lc0_->end_session();

// Epoch 2: Switch epoch and update some entries with the same keys.
datastore_->switch_epoch(2);
lc0_->begin_session();
// Update "blob_key1" with new blob data.
lc0_->add_entry(1, "blob_key1", "blob_value1_epoch2", {2, 0}, {2001, 2002});
// Update "noblob_key1" with a new value.
lc0_->add_entry(1, "noblob_key1", "noblob_value1_epoch2", {2, 1});
lc0_->end_session();

// Create dummy blob files for the blob IDs.
auto path1001 = create_dummy_blob_files(1001);
auto path1002 = create_dummy_blob_files(1002);
auto path1003 = create_dummy_blob_files(1003);
auto path2001 = create_dummy_blob_files(2001);
auto path2002 = create_dummy_blob_files(2002);


// Verify PWAL content before compaction.
// Here, we assume that "pwal_0000" aggregates entries from both epoch 1 and epoch 2.
std::vector<log_entry> log_entries = read_log_file("pwal_0000", location);
// Expecting six entries: four from epoch 1 and two from epoch 2.
ASSERT_EQ(log_entries.size(), 6);
EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1", 1, 2, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[5], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry));

EXPECT_TRUE(boost::filesystem::exists(path1001));
EXPECT_TRUE(boost::filesystem::exists(path1002));
EXPECT_TRUE(boost::filesystem::exists(path1003));
EXPECT_TRUE(boost::filesystem::exists(path2001));
EXPECT_TRUE(boost::filesystem::exists(path2002));

// Perform compaction in epoch 3.
run_compact_with_epoch_switch(3);

// Verify compaction catalog.
compaction_catalog catalog = compaction_catalog::from_catalog_file(location);
// Ensure that at least one compacted file exists.
EXPECT_FALSE(catalog.get_compacted_files().empty());
// Expect the max blob id to be updated to the highest blob id in use (i.e. 2002).
EXPECT_EQ(catalog.get_max_blob_id(), 2002);

// Verify the content of the compacted PWAL.
// Assuming the compacted file is named "pwal_0000.compacted".
log_entries = read_log_file("pwal_0000.compacted", location);
// Expected effective state:
// - "blob_key1": effective value from epoch 2 ("blob_value1_epoch2") with blob IDs {2001,2002}.
// - "blob_key2": remains from epoch 1.
// - "noblob_key1": updated in epoch 2.
// - "noblob_key2": remains from epoch 1.
ASSERT_EQ(log_entries.size(), 5);
EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry));

// Verify the existence of the compacted blob files.
EXPECT_TRUE(boost::filesystem::exists(path1001));
EXPECT_TRUE(boost::filesystem::exists(path1002));
EXPECT_TRUE(boost::filesystem::exists(path1003));
EXPECT_TRUE(boost::filesystem::exists(path2001));
EXPECT_TRUE(boost::filesystem::exists(path2002));

// Restart datastore and verify snapshot content.
std::vector<std::pair<std::string, std::string>> kv_list = restart_datastore_and_read_snapshot();
ASSERT_EQ(kv_list.size(), 4);
EXPECT_EQ(kv_list[0].first, "blob_key1");
EXPECT_EQ(kv_list[0].second, "blob_value1_epoch2");
EXPECT_EQ(kv_list[1].first, "blob_key2");
EXPECT_EQ(kv_list[1].second, "blob_value2");
EXPECT_EQ(kv_list[2].first, "noblob_key1");
EXPECT_EQ(kv_list[2].second, "noblob_value1_epoch2");
EXPECT_EQ(kv_list[3].first, "noblob_key2");
EXPECT_EQ(kv_list[3].second, "noblob_value2");

// Verify that no snapshot PWAL file exists.
log_entries = read_log_file("data/snapshot", location);
ASSERT_TRUE(log_entries.empty());

// Verify that the blob files are still present.
EXPECT_FALSE(boost::filesystem::exists(path1001));

Check failure on line 127 in test/limestone/blob/compaction_blob_gc_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-22.04)

compaction_test.basic_blob_gc_test

Value of: boost::filesystem::exists(path1001) Actual: true Expected: false

Check failure on line 127 in test/limestone/blob/compaction_blob_gc_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-24.04)

compaction_test.basic_blob_gc_test

Value of: boost::filesystem::exists(path1001) Actual: true Expected: false
EXPECT_FALSE(boost::filesystem::exists(path1002));

Check failure on line 128 in test/limestone/blob/compaction_blob_gc_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-22.04)

compaction_test.basic_blob_gc_test

Value of: boost::filesystem::exists(path1002) Actual: true Expected: false

Check failure on line 128 in test/limestone/blob/compaction_blob_gc_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-24.04)

compaction_test.basic_blob_gc_test

Value of: boost::filesystem::exists(path1002) Actual: true Expected: false
EXPECT_TRUE(boost::filesystem::exists(path1003));
EXPECT_TRUE(boost::filesystem::exists(path2001));
EXPECT_TRUE(boost::filesystem::exists(path2002));
}

TEST_F(compaction_test, basic_blob_gc_reboot_test) {
// Epoch 1: Prepare initial entries.
gen_datastore();
datastore_->switch_epoch(1);

// Create two entries with blob data using lc0.
lc0_->begin_session();
lc0_->add_entry(1, "blob_key1", "blob_value1", {1, 0}, {1001, 1002});
lc0_->add_entry(1, "blob_key2", "blob_value2", {1, 1}, {1003});
lc0_->end_session();

// Create two entries without blob data using lc0.
lc0_->begin_session();
lc0_->add_entry(1, "noblob_key1", "noblob_value1", {1, 2});
lc0_->add_entry(1, "noblob_key2", "noblob_value2", {1, 3});
lc0_->end_session();

// Epoch 2: Switch epoch and update some entries with the same keys.
datastore_->switch_epoch(2);
lc0_->begin_session();
// Update "blob_key1" with new blob data.
lc0_->add_entry(1, "blob_key1", "blob_value1_epoch2", {2, 0}, {2001, 2002});
// Update "noblob_key1" with a new value.
lc0_->add_entry(1, "noblob_key1", "noblob_value1_epoch2", {2, 1});
lc0_->end_session();
datastore_->switch_epoch(3);

// Create dummy blob files for the blob IDs.
auto path1001 = create_dummy_blob_files(1001);
auto path1002 = create_dummy_blob_files(1002);
auto path1003 = create_dummy_blob_files(1003);
auto path2001 = create_dummy_blob_files(2001);
auto path2002 = create_dummy_blob_files(2002);

// Verify PWAL content before reboot.
// Here, we assume that "pwal_0000" aggregates entries from both epoch 1 and epoch 2.
std::vector<log_entry> log_entries = read_log_file("pwal_0000", location);
// Expecting six entries: four from epoch 1 and two from epoch 2.
ASSERT_EQ(log_entries.size(), 6);
EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1", 1, 0, {1001, 1002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1", 1, 2, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[4], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[5], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry));

EXPECT_TRUE(boost::filesystem::exists(path1001));
EXPECT_TRUE(boost::filesystem::exists(path1002));
EXPECT_TRUE(boost::filesystem::exists(path1003));
EXPECT_TRUE(boost::filesystem::exists(path2001));
EXPECT_TRUE(boost::filesystem::exists(path2002));

// ----- Online compaction is NOT performed. -----

// Instead, restart the datastore directly.
FLAGS_v = 70;
std::vector<std::pair<std::string, std::string>> kv_list = restart_datastore_and_read_snapshot();
FLAGS_v = 30;

// Verify snapshot content.
// Expected effective state:
// - "blob_key1": updated in epoch 2 → "blob_value1_epoch2"
// - "blob_key2": remains from epoch 1.
// - "noblob_key1": updated in epoch 2 → "noblob_value1_epoch2"
// - "noblob_key2": remains from epoch 1.
ASSERT_EQ(kv_list.size(), 4);
EXPECT_EQ(kv_list[0].first, "blob_key1");
EXPECT_EQ(kv_list[0].second, "blob_value1_epoch2");
EXPECT_EQ(kv_list[1].first, "blob_key2");
EXPECT_EQ(kv_list[1].second, "blob_value2");
EXPECT_EQ(kv_list[2].first, "noblob_key1");
EXPECT_EQ(kv_list[2].second, "noblob_value1_epoch2");
EXPECT_EQ(kv_list[3].first, "noblob_key2");
EXPECT_EQ(kv_list[3].second, "noblob_value2");

// Verify that no snapshot PWAL file exists.
log_entries = read_log_file("data/snapshot", location);
ASSERT_EQ(log_entries.size(), 4);
EXPECT_TRUE(AssertLogEntry(log_entries[0], 1, "blob_key1", "blob_value1_epoch2", 2, 0, {2001, 2002}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[1], 1, "blob_key2", "blob_value2", 1, 1, {1003}, log_entry::entry_type::normal_with_blob));
EXPECT_TRUE(AssertLogEntry(log_entries[2], 1, "noblob_key1", "noblob_value1_epoch2", 2, 1, {}, log_entry::entry_type::normal_entry));
EXPECT_TRUE(AssertLogEntry(log_entries[3], 1, "noblob_key2", "noblob_value2", 1, 3, {}, log_entry::entry_type::normal_entry));

// Verify that the blob files are still present.
datastore_->wait_for_blob_file_garbace_collector();
EXPECT_FALSE(boost::filesystem::exists(path1001));
EXPECT_FALSE(boost::filesystem::exists(path1002));
EXPECT_TRUE(boost::filesystem::exists(path1003));
EXPECT_TRUE(boost::filesystem::exists(path2001));
EXPECT_TRUE(boost::filesystem::exists(path2002));
}

} // namespace limestone::testing
4 changes: 2 additions & 2 deletions test/limestone/blob/log_entry_blob_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ TEST_F(log_entry_blob_test, make_normal_with_blob_log_entry) {
EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob);
EXPECT_EQ(entry.key_sid(), key_sid);
EXPECT_EQ(entry.value_etc(), value_etc);
EXPECT_EQ(entry.blob_ids(), blob_ids);
EXPECT_EQ(entry.raw_blob_ids(), blob_ids);
}

/**
Expand All @@ -261,7 +261,7 @@ TEST_F(log_entry_blob_test, make_normal_with_blob_log_entry_default_blob_ids) {
EXPECT_EQ(entry.type(), log_entry::entry_type::normal_with_blob);
EXPECT_EQ(entry.key_sid(), key_sid);
EXPECT_EQ(entry.value_etc(), value_etc);
EXPECT_EQ(entry.blob_ids(), std::string());
EXPECT_EQ(entry.raw_blob_ids(), std::string());
}


Expand Down
Loading

0 comments on commit ac199e7

Please sign in to comment.