Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multiple chunk test for journal vdev. #358

Merged
merged 1 commit into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "6.2.1"
version = "6.2.2"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
24 changes: 18 additions & 6 deletions src/lib/device/journal_vdev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,11 +472,19 @@ off_t JournalVirtualDev::Descriptor::dev_offset(off_t nbytes) const {
}

void JournalVirtualDev::Descriptor::update_data_start_offset(off_t offset) {
m_data_start_offset = offset;
auto data_start_offset_aligned = sisl::round_down(m_data_start_offset, m_vdev.info().chunk_size);
m_end_offset = data_start_offset_aligned + m_journal_chunks.size() * m_vdev.info().chunk_size;
LOGINFOMOD(journalvdev, "Updated data start offset off 0x{} {}", to_hex(offset), to_string());
RELEASE_ASSERT_EQ(m_end_offset - data_start_offset_aligned, m_total_size, "offset size mismatch {}", to_string());
if (!m_journal_chunks.empty()) {
m_data_start_offset = offset;
auto data_start_offset_aligned = sisl::round_down(m_data_start_offset, m_vdev.info().chunk_size);
m_end_offset = data_start_offset_aligned + m_journal_chunks.size() * m_vdev.info().chunk_size;
LOGINFOMOD(journalvdev, "Updated data start offset off 0x{} {}", to_hex(offset), to_string());
RELEASE_ASSERT_EQ(m_end_offset - data_start_offset_aligned, m_total_size, "offset size mismatch {}",
to_string());
} else {
// If there are no chunks, we round up to the next chunk size.
m_data_start_offset = sisl::round_up(offset, m_vdev.info().chunk_size);
m_end_offset = m_data_start_offset;
LOGINFOMOD(journalvdev, "No chunks, updated data start offset off 0x{} {}", to_hex(offset), to_string());
}
}

off_t JournalVirtualDev::Descriptor::tail_offset(bool reserve_space_include) const {
Expand Down Expand Up @@ -536,6 +544,9 @@ void JournalVirtualDev::Descriptor::truncate(off_t truncate_offset) {
for (auto it = m_journal_chunks.begin(); it != m_journal_chunks.end();) {
auto chunk = *it;
start += chunk->size();

// Also if its the last chunk and there is no data after truncate, we release chunk.
auto write_sz_in_total = m_write_sz_in_total.load(std::memory_order_relaxed);
if (start >= truncate_offset) { break; }

m_total_size -= chunk->size();
Expand All @@ -551,6 +562,7 @@ void JournalVirtualDev::Descriptor::truncate(off_t truncate_offset) {
// to know the end offset of the log dev during recovery.
// Format and add back to pool.
m_vdev.m_chunk_pool->enqueue(chunk);
LOGINFOMOD(journalvdev, "After truncate released chunk {}", chunk->to_string());
}

// Update our start offset, to keep track of actual size
Expand All @@ -561,7 +573,7 @@ void JournalVirtualDev::Descriptor::truncate(off_t truncate_offset) {
m_write_sz_in_total.fetch_sub(size_to_truncate, std::memory_order_relaxed);
m_truncate_done = true;

HS_PERIODIC_LOG(DEBUG, journalvdev, "After truncate desc {}", to_string());
HS_PERIODIC_LOG(INFO, journalvdev, "After truncate desc {}", to_string());
}

#if 0
Expand Down
10 changes: 9 additions & 1 deletion src/lib/device/journal_vdev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class JournalVirtualDev : public VirtualDev {

off_t m_data_start_offset{0}; // Start offset of where actual data begin for this vdev
std::atomic< uint64_t > m_write_sz_in_total{0}; // Size will be decreased by truncate and increased by append;
bool m_truncate_done{true};
bool m_truncate_done{false};
uint64_t m_reserved_sz{0}; // write size within chunk, used to check chunk boundary;
std::vector< shared< Chunk > > m_journal_chunks; // Chunks part of this journal in order.
uint64_t m_total_size{0}; // Total size of all chunks.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at line: 71, m_end_offset is actually being updated after init, right? If so, can you update the comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes its updated in init and during recovery time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the comment accordingly at line 71?

Expand Down Expand Up @@ -227,6 +227,14 @@ class JournalVirtualDev : public VirtualDev {

off_t end_offset() const { return m_end_offset; }

uint64_t write_sz_in_total() const { return m_write_sz_in_total.load(); }

uint32_t num_chunks_used() const { return m_journal_chunks.size(); }

bool truncate_done() const { return m_truncate_done; }

uint64_t reserved_size() const { return m_reserved_sz; }

/**
* @brief : persist start logical offset to vdev's super block
* Supposed to be called when truncate happens;
Expand Down
2 changes: 2 additions & 0 deletions src/lib/logstore/log_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ class LogDev : public std::enable_shared_from_this< LogDev > {
logdev_key do_device_truncate(bool dry_run = false);
void handle_unopened_log_stores(bool format);
logdev_id_t get_id() { return m_logdev_id; }
shared< JournalVirtualDev::Descriptor > get_journal_descriptor() const { return m_vdev_jd; }

private:
LogGroup* make_log_group(uint32_t estimated_records) {
Expand Down Expand Up @@ -872,6 +873,7 @@ class LogDev : public std::enable_shared_from_this< LogDev > {
std::atomic< bool > m_flush_status = false;
// Timer handle
iomgr::timer_handle_t m_flush_timer_hdl{iomgr::null_timer_handle};

}; // LogDev

} // namespace homestore
185 changes: 160 additions & 25 deletions src/tests/test_journal_vdev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ class VDevJournalIOTest : public ::testing::Test {
{HS_SERVICE::META, {.size_pct = 15.0}},
{HS_SERVICE::LOG,
{.size_pct = 50.0,
.chunk_size = 32 * 1024 * 1024,
.chunk_size = 8 * 1024 * 1024,
.min_chunk_size = 8 * 1024 * 1024,
.vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}},
},
nullptr /* starting_cb */, false /* restart */);
Expand All @@ -90,7 +91,8 @@ class VDevJournalIOTest : public ::testing::Test {
{HS_SERVICE::META, {.size_pct = 15.0}},
{HS_SERVICE::LOG,
{.size_pct = 50.0,
.chunk_size = 32 * 1024 * 1024,
.chunk_size = 8 * 1024 * 1024,
.min_chunk_size = 8 * 1024 * 1024,
.vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}},
},
nullptr /* starting_cb */, true /* restart */);
Expand All @@ -103,14 +105,33 @@ class JournalDescriptorTest {
uint64_t crc;
};

struct VerifyDescriptor {
uint64_t ds{0};
uint64_t end{0};
uint64_t writesz{0};
uint64_t tail{0};
uint64_t rsvdsz{0};
int32_t chunks{0};
bool trunc{false};
uint64_t total{0};
uint64_t seek{0};
};

public:
JournalDescriptorTest(logdev_id_t id) : m_logdev_id(id) { reinit(); }
JournalDescriptorTest(logdev_id_t id) : m_logdev_id(id) { restore(); }

std::shared_ptr< JournalVirtualDev::Descriptor > vdev_jd() { return m_vdev_jd; }

void reinit() {
void save() {
last_start_offset = m_vdev_jd->data_start_offset();
last_tail_offset = m_vdev_jd->tail_offset();
}

void restore() {
auto vdev = hs()->logstore_service().get_vdev();
m_vdev_jd = vdev->open(m_logdev_id);
m_vdev_jd->update_data_start_offset(last_start_offset);
m_vdev_jd->update_tail_offset(last_tail_offset);
}

uint64_t get_elapsed_time(Clock::time_point start) {
Expand Down Expand Up @@ -173,17 +194,18 @@ class JournalDescriptorTest {
}

void truncate(off_t off_to_truncate) {
LOGDEBUG("truncating to offset: 0x{}, start: 0x{}, tail: 0x{}", to_hex(off_to_truncate),
to_hex(m_vdev_jd->data_start_offset()), to_hex(m_vdev_jd->tail_offset()));
LOGDEBUG("truncating to offset: 0x{}, desc: {}", to_hex(off_to_truncate), m_vdev_jd->to_string());

validate_truncate_offset(off_to_truncate);

auto tail_before = m_vdev_jd->tail_offset();
m_vdev_jd->truncate(off_to_truncate);
auto tail_after = m_vdev_jd->tail_offset();

HS_DBG_ASSERT_EQ(tail_before, tail_after);
HS_DBG_ASSERT_EQ(off_to_truncate, m_vdev_jd->data_start_offset());
if (m_vdev_jd->num_chunks_used()) {
HS_DBG_ASSERT_EQ(tail_before, tail_after);
HS_DBG_ASSERT_EQ(off_to_truncate, m_vdev_jd->data_start_offset());
}

if (off_to_truncate > m_start_off) {
// remove the offsets before truncate offset, since they are not valid for read anymore;
Expand Down Expand Up @@ -231,9 +253,13 @@ class JournalDescriptorTest {
auto used_space = m_vdev_jd->used_size();
auto start_off = m_vdev_jd->data_start_offset();

HS_DBG_ASSERT_GT(m_vdev_jd->size(), 0);
HS_DBG_ASSERT_LT(used_space, m_vdev_jd->size());
HS_DBG_ASSERT_EQ(start_off, m_start_off);
if (m_vdev_jd->num_chunks_used() != 0) {
HS_DBG_ASSERT_GT(m_vdev_jd->size(), 0);
HS_DBG_ASSERT_LE(used_space, m_vdev_jd->size());
HS_DBG_ASSERT_EQ(start_off, m_start_off);
} else {
HS_DBG_ASSERT_EQ(m_vdev_jd->size(), 0);
}
}

bool time_to_truncate() {
Expand Down Expand Up @@ -270,7 +296,7 @@ class JournalDescriptorTest {
HS_DBG_ASSERT_EQ(m_start_off, start_offset);
if (start_offset < tail_offset) {
HS_DBG_ASSERT_GE(off, start_offset, "Wrong offset: {}, start_off: {}", off, start_offset);
HS_DBG_ASSERT_LT(off, tail_offset, "Wrong offset: {}, tail_offset: {}", off, tail_offset);
HS_DBG_ASSERT_LE(off, tail_offset, "Wrong offset: {}, tail_offset: {}", off, tail_offset);
} else {
HS_DBG_ASSERT(off < tail_offset || off >= start_offset, "Wrong offset: {}, start: {}, tail: {}", off,
start_offset, tail_offset);
Expand Down Expand Up @@ -311,8 +337,9 @@ class JournalDescriptorTest {
m_read_cnt++;
}

void random_write() {
auto sz_to_wrt = rand_size();
void random_write() { alloc_write(rand_size()); }
void fixed_write(int size) { alloc_write(size); }
void alloc_write(int sz_to_wrt) {
auto off_to_wrt = m_vdev_jd->alloc_next_append_blk(sz_to_wrt);

auto it = m_off_to_info_map.find(off_to_wrt);
Expand Down Expand Up @@ -361,6 +388,17 @@ class JournalDescriptorTest {
return sisl::round_up(dist(generator), dma_alignment);
}

void verify_journal_descriptor(shared< JournalVirtualDev::Descriptor > logdev_jd, VerifyDescriptor d) {
LOGINFO("{}", logdev_jd->to_string());
ASSERT_EQ(logdev_jd->data_start_offset(), d.ds);
ASSERT_EQ(logdev_jd->end_offset(), d.end);
ASSERT_EQ(logdev_jd->write_sz_in_total(), d.writesz);
ASSERT_EQ(logdev_jd->reserved_size(), d.rsvdsz);
ASSERT_EQ(logdev_jd->num_chunks_used(), d.chunks);
ASSERT_EQ(logdev_jd->truncate_done(), d.trunc);
ASSERT_EQ(logdev_jd->size(), d.total);
}

private:
logdev_id_t m_logdev_id = 0;
off_t m_start_off = 0;
Expand All @@ -371,6 +409,8 @@ class JournalDescriptorTest {
std::map< off_t, write_info > m_off_to_info_map;
Clock::time_point m_start_time;
std::shared_ptr< JournalVirtualDev::Descriptor > m_vdev_jd;
uint64_t last_tail_offset = 0;
uint64_t last_start_offset = 0;
friend class VDevJournalIOTest;
};

Expand Down Expand Up @@ -426,22 +466,16 @@ TEST_F(VDevJournalIOTest, Recovery) {
LOGINFO("Restart homestore");

// Record the offsets of the journal descriptors.
std::vector< uint64_t > last_tail_offset, last_start_offset;
for (uint32_t i = 0; i < tests.size(); i++) {
auto vdev_jd = tests[i].vdev_jd();
last_tail_offset.push_back(vdev_jd->tail_offset());
last_start_offset.push_back(vdev_jd->data_start_offset());
for (auto& t : tests) {
t.save();
}

// Restart homestore.
restart_homestore();

// Set the offsets after restart.
for (uint32_t i = 0; i < tests.size(); i++) {
tests[i].reinit();
auto vdev_jd = tests[i].vdev_jd();
vdev_jd->update_data_start_offset(last_start_offset[i]);
vdev_jd->update_tail_offset(last_tail_offset[i]);
// Restore the offsets after restart.
for (auto& t : tests) {
t.restore();
}

// Validate all logs.
Expand Down Expand Up @@ -470,6 +504,107 @@ TEST_F(VDevJournalIOTest, Recovery) {
}
}

TEST_F(VDevJournalIOTest, MultipleChunkTest) {
// Chunk size is 8MB and each data log entry will be of size 3MB to create gaps.
uint64_t MB = 1024 * 1024;
uint64_t chunk_size = 8 * MB;
uint64_t data_size = 3 * MB;
JournalDescriptorTest test(1);
auto log_dev_jd = test.vdev_jd();

auto restart_restore = [&]() {
test.save();
restart_homestore();
test.restore();
log_dev_jd = test.vdev_jd();
};

// clang-format off
// Initially no chunks used and offsets are zero.
uint64_t writesz = 0;
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = 0, .writesz = writesz, .rsvdsz = 0, .chunks = 0,
.trunc = false, .total = 0, .seek = 0x0});

// Insert two entries. Create one chunk 1.
LOGINFO("Inserting two entries");
for (int i = 0; i < 2; i++) {
test.fixed_write(data_size);
}

// Verify write size has two data entries and one chunk.
writesz = 2 * data_size;
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = chunk_size, .writesz = writesz, .rsvdsz = 0,
.chunks = 1, .trunc = false, .total = chunk_size, .seek = 0x0});

// Add three more entries. Now chunk 2 and 3 has to be created.
LOGINFO("Inserting three entries");
for (int i = 0; i < 3; i++) {
test.fixed_write(data_size);
}

// Total three chunks of 8MB, write size will be two whole chunk and last chunk 3 contains
// one data log entries. There will be gap at the end of chunk 1 and 2.
writesz = 2 * chunk_size + data_size;
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = 3 * chunk_size, .writesz = writesz, .rsvdsz = 0,
.chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0});
test.read_all();

// Restart homestore and restore the offsets.
LOGINFO("Restart homestore");
restart_restore();

// Verify the same as above after restart.
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = 3 * chunk_size, .writesz = writesz, .rsvdsz = 0,
.chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0});
test.read_all();

// Add one data entry. No additional chunks because there is enough space in chunk 3 but write size increased.
LOGINFO("Inserting one entry");
test.fixed_write(data_size);
writesz = 2 * chunk_size + 2 * data_size;
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = 3 * chunk_size, .writesz = writesz, .rsvdsz = 0,
.chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0});

// Add one data entry. No more space in chunk 3. Additional chunk 4 created.
LOGINFO("Inserting one entry");
test.fixed_write(data_size);
writesz = 3 * chunk_size + data_size;
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x0, .end = 4 * chunk_size, .writesz = writesz, .rsvdsz = 0,
.chunks = 4, .trunc = false, .total = 4 * chunk_size, .seek = 0x0});

// Truncate two data entries. No change in chunk count, only write size and data start changed.
LOGINFO("Truncating two entries");
uint64_t trunc_sz = 2 * data_size;
uint64_t trunc_offset = 2 * data_size;
test.truncate(trunc_offset);
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x600000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz,
.rsvdsz = 0, .chunks = 4, .trunc = true, .total = 4 * chunk_size, .seek = 0x0});

// Truncate one more entry. Release one chunk back and reduce chunk count. Increase the data start.
LOGINFO("Truncating one entry");
trunc_offset = chunk_size + data_size;
trunc_sz = chunk_size + data_size;
test.truncate(trunc_offset);
test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz,
.rsvdsz = 0, .chunks = 3, .trunc = true, .total = 3 * chunk_size, .seek = 0x0});

// Restart homestore and restore the offsets.
LOGINFO("Restart homestore");
restart_restore();
test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz,
.rsvdsz = 0, .chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0});
test.read_all();

// Truncate all entries. Num chunks 1, write sz should be 0.
LOGINFO("Truncating all entries");
trunc_offset = log_dev_jd->tail_offset();
test.truncate(trunc_offset);
test.verify_journal_descriptor(log_dev_jd, {.ds = 0x1b00000, .end = 0x2000000, .writesz = 0, .rsvdsz = 0,
.chunks = 1, .trunc = true, .total = 8388608, .seek = 0x0});

// clang-format on
}

SISL_OPTION_GROUP(test_journal_vdev,
(truncate_watermark_percentage, "", "truncate_watermark_percentage",
"percentage of space usage to trigger truncate", ::cxxopts::value< uint32_t >()->default_value("80"),
Expand Down
Loading
Loading