Skip to content

Commit

Permalink
Refactor compaction_options to hold blob_file_gc_snapshot by referenc…
Browse files Browse the repository at this point in the history
…e and support copy/move semantics
  • Loading branch information
umegane committed Feb 18, 2025
1 parent f4ad140 commit 8a5de59
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 88 deletions.
10 changes: 7 additions & 3 deletions src/limestone/blob_file_gc_snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
thread_local std::shared_ptr<log_entry_container> blob_file_gc_snapshot::tls_container_ = nullptr;

blob_file_gc_snapshot::blob_file_gc_snapshot(const write_version_type& threshold)
: threshold_(threshold) {
blob_file_gc_snapshot::blob_file_gc_snapshot(const write_version_type& boundary_version)
: boundary_version_(boundary_version) {
// The thread_containers_ vector and snapshot_ are default-constructed.
}

Expand Down Expand Up @@ -55,7 +55,7 @@ thread_local std::shared_ptr<log_entry_container> blob_file_gc_snapshot::tls_con
modified_entry.write_version(entry_wv);

// Compare the obtained write_version with the threshold.
if (!(entry_wv < threshold_)) {
if (!(entry_wv < boundary_version_)) {
return;
}

Expand Down Expand Up @@ -109,5 +109,9 @@ thread_local std::shared_ptr<log_entry_container> blob_file_gc_snapshot::tls_con
// Its lifetime is managed per thread; if needed, threads can reset it.
}

const write_version_type& blob_file_gc_snapshot::boundary_version() const {
return boundary_version_;
}

} // namespace limestone::internal

19 changes: 13 additions & 6 deletions src/limestone/blob_file_gc_snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ namespace limestone::internal {
class blob_file_gc_snapshot {
public:
/*
* Constructs a blob_file_gc_snapshot with the given threshold.
* @param threshold The write_version threshold for garbage collection.
* Constructs a blob_file_gc_snapshot with the given boundary_version.
* @param boundary_version The boundary_version for garbage collection.
*/
explicit blob_file_gc_snapshot(const write_version_type& threshold);
explicit blob_file_gc_snapshot(const write_version_type& boundary_version);

// Disable copy and move semantics.
blob_file_gc_snapshot(const blob_file_gc_snapshot&) = delete;
Expand All @@ -55,7 +55,7 @@ class blob_file_gc_snapshot {
*
* Only entries of type normal_with_blob are processed.
* The method clears the payload from the entry’s value_etc (keeping the write_version header)
* and adds the entry if its write_version is below the threshold.
* and adds the entry if its write_version is below the boundary_version.
*
* @param entry The log_entry to be processed and potentially added.
*/
Expand All @@ -81,13 +81,20 @@ class blob_file_gc_snapshot {
*/
void reset();

/**
* @brief Returns the boundary version used for garbage collection.
*
* @return const write_version_type& The boundary version.
*/
const write_version_type& boundary_version() const;

private:
// Thread-local pointer to each thread's log_entry_container.
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static thread_local std::shared_ptr<log_entry_container> tls_container_;

// The threshold for write_version used in garbage collection.
write_version_type threshold_;
// The boundary version for write_version used in garbage collection.
write_version_type boundary_version_;

// Final snapshot after merging, sorting, and duplicate removal.
log_entry_container snapshot_;
Expand Down
148 changes: 72 additions & 76 deletions src/limestone/compaction_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,80 +14,76 @@
* limitations under the License.
*/

#pragma once
#pragma once

#include <optional>
#include <set>
#include <string>
#include <boost/filesystem.hpp>
#include "blob_file_gc_snapshot.h"
#include "limestone/api/write_version_type.h"

namespace limestone::internal {

class compaction_options {
public:
// Constructor: No file set
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(), use_file_set_(false),
available_boundary_version_(0, 0),
gc_snapshot_(std::nullopt) // GC is disabled
{}

// Constructor: File set available, GC disabled
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers,
std::set<std::string> file_names
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(file_names), use_file_set_(true),
available_boundary_version_(0, 0),
gc_snapshot_(std::nullopt) // GC is disabled
{}

// Constructor: File set available, GC enabled
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers,
std::set<std::string> file_names,
write_version_type boundary_version
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(file_names), use_file_set_(true),
available_boundary_version_(boundary_version),
gc_snapshot_(std::in_place, boundary_version) // GC is enabled
{}

// Getters
const boost::filesystem::path& get_from_dir() const { return from_dir_; }
const boost::filesystem::path& get_to_dir() const { return to_dir_; }
int get_num_worker() const { return num_worker_; }

const std::set<std::string>& get_file_names() const { return file_names_; }
bool is_using_file_set() const { return use_file_set_; }

bool is_gc_enabled() const { return gc_snapshot_.has_value(); }
write_version_type get_available_boundary_version() const { return available_boundary_version_; }
const blob_file_gc_snapshot& get_gc_snapshot() const { return gc_snapshot_.value(); }

private:
// Basic compaction settings
boost::filesystem::path from_dir_;
boost::filesystem::path to_dir_;
int num_worker_;

std::set<std::string> file_names_;
bool use_file_set_;

// Garbage collection settings
write_version_type available_boundary_version_;
std::optional<blob_file_gc_snapshot> gc_snapshot_;
};

} // namespace limestone::internal

#include <optional>
#include <set>
#include <string>
#include <functional> // std::reference_wrapper
#include <boost/filesystem.hpp>
#include "blob_file_gc_snapshot.h"
#include "limestone/api/write_version_type.h"

namespace limestone::internal {

class compaction_options {
public:
// Constructor: No file set, GC disabled
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(), use_file_set_(false),
gc_snapshot_(std::nullopt) // GC is disabled
{}

// Constructor: File set available, GC disabled
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers,
std::set<std::string> file_names
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(file_names), use_file_set_(true),
gc_snapshot_(std::nullopt) // GC is disabled
{}

// Constructor: File set available, GC enabled
// Note: The blob_file_gc_snapshot is provided externally by reference.
compaction_options(
const boost::filesystem::path& from,
const boost::filesystem::path& to,
int workers,
std::set<std::string> file_names,
blob_file_gc_snapshot& gc_snapshot
) : from_dir_(from), to_dir_(to), num_worker_(workers),
file_names_(file_names), use_file_set_(true),
gc_snapshot_(std::ref(gc_snapshot))
{}

// Getters
const boost::filesystem::path& get_from_dir() const { return from_dir_; }
const boost::filesystem::path& get_to_dir() const { return to_dir_; }
int get_num_worker() const { return num_worker_; }

const std::set<std::string>& get_file_names() const { return file_names_; }
bool is_using_file_set() const { return use_file_set_; }

bool is_gc_enabled() const { return gc_snapshot_.has_value(); }
const blob_file_gc_snapshot& get_gc_snapshot() const { return gc_snapshot_.value().get(); }

private:
// Basic compaction settings
boost::filesystem::path from_dir_;
boost::filesystem::path to_dir_;
int num_worker_;

std::set<std::string> file_names_;
bool use_file_set_;

// Garbage collection settings
std::optional<std::reference_wrapper<blob_file_gc_snapshot>> gc_snapshot_;
};

} // namespace limestone::internal
4 changes: 3 additions & 1 deletion test/limestone/blob/blob_file_gc_snapshot_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ TEST_F(blob_file_gc_snapshot_test, finalize_snapshot_merging_duplicates) {
TEST_F(blob_file_gc_snapshot_test, tls_container_null_state_behavior) {
// Create a snapshot instance with a given threshold.
blob_file_gc_snapshot snapshot(write_version_type(100, 1));

EXPECT_EQ(snapshot.boundary_version(), write_version_type(100, 1));

// Ensure the internal state is reset so that tls_container_ becomes nullptr.
snapshot.reset();

Expand All @@ -272,6 +273,7 @@ TEST_F(blob_file_gc_snapshot_test, tls_container_null_state_behavior) {
TEST_F(blob_file_gc_snapshot_test, threshold_boundary_test) {
// Create a snapshot with threshold write_version (100, 1)
blob_file_gc_snapshot snapshot(write_version_type(100, 1));
EXPECT_EQ(snapshot.boundary_version(), write_version_type(100, 1));

// Case 1: Entry with write_version exactly equal to threshold.
// Expected: The entry should NOT be added (because it's not less than the threshold).
Expand Down
4 changes: 2 additions & 2 deletions test/limestone/compaction/compaction_options_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@
std::set<std::string> file_names = {"file1", "file2"};
write_version_type boundary_version(42, 5);

compaction_options options(from_dir_, to_dir_, num_workers_, file_names, boundary_version);
blob_file_gc_snapshot gc_snapshot(boundary_version);
compaction_options options(from_dir_, to_dir_, num_workers_, file_names, gc_snapshot);

EXPECT_EQ(options.get_from_dir(), from_dir_);
EXPECT_EQ(options.get_to_dir(), to_dir_);
EXPECT_EQ(options.get_num_worker(), num_workers_);
EXPECT_TRUE(options.is_using_file_set());
EXPECT_EQ(options.get_file_names(), file_names);
EXPECT_TRUE(options.is_gc_enabled());
EXPECT_EQ(options.get_available_boundary_version(), boundary_version);
}

TEST_F(compaction_options_test, get_gc_snapshot_without_gc_enabled) {
Expand Down

0 comments on commit 8a5de59

Please sign in to comment.