From c2046bfc67348e1e1c9664df6a5b996b69e72b1f Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Mon, 15 Apr 2024 16:10:13 +0800 Subject: [PATCH 1/5] Bug fixes for create_vdev 1. max_num_chunks should based on vdev_size/min_chunk_size, previous implementation wrongly include other pdevs that doesnt belongs to this vdev(e.g different type). 2. we should never round up vdev size as this might causing vdev size exceed underlaying pdev size. 3. a few other fixes. Add a set of sanity checks as well as UT to ensure chunks are align to pdev->align_size(). Fixes: https://github.com/eBay/HomeStore/issues/377 Signed-off-by: Xiaoxi Chen --- conanfile.py | 2 +- src/lib/device/chunk.h | 4 +++ src/lib/device/device_manager.cpp | 60 ++++++++++++++++++++++--------- src/lib/device/hs_super_blk.h | 9 +++++ src/tests/test_device_manager.cpp | 26 +++++++++++++- 5 files changed, 83 insertions(+), 18 deletions(-) diff --git a/conanfile.py b/conanfile.py index 9186f0b5a..a9bb86359 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.2.2" + version = "6.2.3" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/device/chunk.h b/src/lib/device/chunk.h index 777291292..248d62d47 100644 --- a/src/lib/device/chunk.h +++ b/src/lib/device/chunk.h @@ -47,6 +47,10 @@ class Chunk { uint64_t start_offset() const { return m_chunk_info.chunk_start_offset; } uint64_t size() const { return m_chunk_info.chunk_size; } bool is_busy() const { return m_chunk_info.is_allocated(); } + bool is_align() const { + return (m_chunk_info.chunk_start_offset % m_pdev->align_size() == 0) && + (m_chunk_info.chunk_size % m_pdev->align_size() == 0); + } uint32_t vdev_id() const { return m_chunk_info.vdev_id; } uint16_t chunk_id() const { return static_cast< uint16_t >(m_chunk_info.chunk_id); } uint32_t pdev_ordinal() const { return m_chunk_info.chunk_ordinal; } diff --git a/src/lib/device/device_manager.cpp b/src/lib/device/device_manager.cpp index 6c7c59ef1..33f7e9c28 100644 --- a/src/lib/device/device_manager.cpp +++ b/src/lib/device/device_manager.cpp @@ -23,6 +23,7 @@ #include #include "device/chunk.h" #include "device/device.h" +#include "device/hs_super_blk.h" #include "device/physical_dev.hpp" #include "device/virtual_dev.hpp" #include "common/homestore_utils.hpp" @@ -189,8 +190,8 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { // Determine if we have a devices available on requested dev_tier. If so use them, else fallback to data tier std::vector< PhysicalDev* > pdevs = pdevs_by_type_internal(vparam.dev_type); - RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for even data tier, can't create vdev"); - + RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for given vdev type, can't create vdev"); + RELEASE_ASSERT(vparam.blk_size % pdevs[0]->align_size() == 0, "blk_size should be multiple of pdev align_size"); // Identify the number of chunks if (vparam.multi_pdev_opts == vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED) { auto total_streams = std::accumulate(pdevs.begin(), pdevs.end(), 0u, @@ -204,11 +205,12 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { pdevs.erase(pdevs.begin() + 1, pdevs.end()); // TODO: Pick random one } - // Adjust the maximum number chunks requested before round up vdev size. - uint32_t max_num_chunks = 0; - for (const auto& d : m_dev_infos) { - max_num_chunks += hs_super_blk::max_chunks_in_pdev(d); - } + // Based on the min chunk size, we calculate the max number of chunks that can be created in each target pdev + uint32_t min_chunk_size = hs_super_blk::min_chunk_size(vparam.dev_type); + // FIXME: it is possible that each vdev is less than max_num_chunks, but total is more than MAX_CHUNKS_IN_SYSTEM. + uint32_t max_num_chunks = vparam.vdev_size / min_chunk_size > hs_super_blk::MAX_CHUNKS_IN_SYSTEM + ? hs_super_blk::MAX_CHUNKS_IN_SYSTEM + : vparam.vdev_size / min_chunk_size; auto input_vdev_size = vparam.vdev_size; if (vparam.size_type == vdev_size_type_t::VDEV_SIZE_STATIC) { @@ -217,13 +219,6 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { // Either num_chunks or chunk_size can be provided and we calculate the other. if (vparam.num_chunks != 0) { - vparam.vdev_size = sisl::round_up(vparam.vdev_size, vparam.num_chunks * vparam.blk_size); - if (input_vdev_size != vparam.vdev_size) { - LOGINFO( - "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", - vparam.vdev_name, in_bytes(input_vdev_size), in_bytes(vparam.vdev_size)); - } - auto input_num_chunks = vparam.num_chunks; vparam.num_chunks = std::min(vparam.num_chunks, max_num_chunks); if (input_num_chunks != vparam.num_chunks) { @@ -231,17 +226,28 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { "new_num_chunks={}", vparam.vdev_name, in_bytes(input_num_chunks), in_bytes(vparam.num_chunks)); } + + // this ensure chunk_size % vparam.blk_size == 0 + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.num_chunks * vparam.blk_size); + if (input_vdev_size != vparam.vdev_size) { + LOGINFO( + "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}" + " to be the multiple of {} (num_chunks {} * blk_size {}).", + vparam.vdev_name, input_vdev_size, vparam.vdev_size, + in_bytes(vparam.num_chunks * vparam.blk_size), vparam.num_chunks, in_bytes(vparam.blk_size)); + } vparam.chunk_size = vparam.vdev_size / vparam.num_chunks; } else if (vparam.chunk_size != 0) { auto input_chunk_size = vparam.chunk_size; - vparam.chunk_size = std::min(vparam.chunk_size, hs_super_blk::MAX_CHUNKS_IN_SYSTEM); + vparam.chunk_size = std::max(vparam.chunk_size, min_chunk_size); + vparam.chunk_size = sisl::round_up(vparam.chunk_size, vparam.blk_size); if (input_chunk_size != vparam.chunk_size) { LOGINFO("{} Virtual device is attempted to be created with chunk_size={}, it needs to be adjust to " "new_chunk_size={}", vparam.vdev_name, in_bytes(input_chunk_size), in_bytes(vparam.chunk_size)); } - vparam.vdev_size = sisl::round_up(vparam.vdev_size, vparam.chunk_size); + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.chunk_size); if (input_vdev_size != vparam.vdev_size) { LOGINFO( "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", @@ -256,7 +262,29 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { } else { // We need chunk_size. We start with zero num_chunks. RELEASE_ASSERT_GT(vparam.chunk_size, 0, "Chunk size should be provided"); + auto input_chunk_size = vparam.chunk_size; + vparam.chunk_size = std::max(vparam.chunk_size, min_chunk_size); + vparam.chunk_size = sisl::round_up(vparam.chunk_size, vparam.blk_size); + if (input_chunk_size != vparam.chunk_size) { + LOGINFO("{} Virtual device is attempted to be created with chunk_size={}, it needs to be adjust to " + "new_chunk_size={}", + vparam.vdev_name, in_bytes(input_chunk_size), in_bytes(vparam.chunk_size)); + } + + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.chunk_size); + if (input_vdev_size != vparam.vdev_size) { + LOGINFO( + "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", + vparam.vdev_name, in_bytes(input_vdev_size), in_bytes(vparam.vdev_size)); + } } + // sanity checks + RELEASE_ASSERT(vparam.vdev_size % vparam.chunk_size == 0, "vdev_size should be multiple of chunk_size"); + RELEASE_ASSERT(vparam.chunk_size % vparam.blk_size == 0, "chunk_size should be multiple of blk_size"); + RELEASE_ASSERT(vparam.chunk_size >= min_chunk_size, "chunk_size should be greater than or equal to min_chunk_size"); + + RELEASE_ASSERT(vparam.num_chunks <= max_num_chunks, "num_chunks should be less than or equal to max_num_chunks"); + RELEASE_ASSERT(input_vdev_size >= vparam.vdev_size, "vdev_size should be less than or equal to input_vdev_size"); LOGINFO( "New Virtal Dev={} of size={} with id={} is attempted to be created with multi_pdev_opts={}. The params are " diff --git a/src/lib/device/hs_super_blk.h b/src/lib/device/hs_super_blk.h index a2df05f3d..460894b35 100644 --- a/src/lib/device/hs_super_blk.h +++ b/src/lib/device/hs_super_blk.h @@ -202,6 +202,15 @@ class hs_super_blk { #endif return (dinfo.dev_size - 1) / min_chunk_size + 1; } + static uint32_t min_chunk_size(HSDevType dtype) { + uint64_t min_chunk_size = + (dtype == HSDevType::Fast) ? MIN_CHUNK_SIZE_FAST_DEVICE : MIN_CHUNK_SIZE_DATA_DEVICE; +#ifdef _PRERELEASE + auto chunk_size = iomgr_flip::instance()->get_test_flip< long >("set_minimum_chunk_size"); + if (chunk_size) { min_chunk_size = chunk_size.get(); } +#endif + return min_chunk_size; + } }; } // namespace homestore diff --git a/src/tests/test_device_manager.cpp b/src/tests/test_device_manager.cpp index 6e1cdcfcc..fd0462289 100644 --- a/src/tests/test_device_manager.cpp +++ b/src/tests/test_device_manager.cpp @@ -121,6 +121,7 @@ class DeviceMgrTest : public ::testing::Test { std::map< const PhysicalDev*, uint32_t > chunks_in_pdev_count; for (const auto& chunk : chunks) { ASSERT_EQ(chunk->size(), size) << "All chunks are not equally sized in vdev"; + ASSERT_EQ(chunk->is_align(), true) << "All chunks should be aligned"; auto [it, inserted] = chunks_in_pdev_count.insert(std::pair(chunk->physical_dev(), 1u)); if (!inserted) { ++(it->second); } @@ -140,7 +141,7 @@ TEST_F(DeviceMgrTest, StripedVDevCreation) { avail_size += pdev->data_size(); } - uint32_t size_pct = 2; + uint32_t size_pct = 4; uint64_t remain_size = avail_size; LOGINFO("Step 1: Creating {} vdevs with combined size as {}", num_test_vdevs, in_bytes(avail_size)); @@ -174,6 +175,29 @@ TEST_F(DeviceMgrTest, StripedVDevCreation) { this->validate_striped_vdevs(); } +TEST_F(DeviceMgrTest, SmallStripedVDevCreation) { + std::string name = "test_vdev_small"; + + // Create a vdev small to one minimal chunk per pdev + auto num_chunks = uint32_cast(m_pdevs.size() * 2); + auto size = m_pdevs.size() * hs_super_blk::min_chunk_size(homestore::HSDevType::Data); + + LOGINFO("Step 1: Creating vdev of name={} with size={}", name, in_bytes(size)); + auto vdev = + m_dmgr->create_vdev(homestore::vdev_parameters{.vdev_name = name, + .vdev_size = size, + .num_chunks = num_chunks, + .blk_size = 4096, + .dev_type = HSDevType::Data, + .alloc_type = blk_allocator_type_t::none, + .chunk_sel_type = chunk_selector_type_t::NONE, + .multi_pdev_opts = vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED, + .context_data = sisl::blob{}}); + + + ASSERT_EQ(vdev->get_chunks().size(), m_pdevs.size()) << "Expected vdev to be created with 1 chunk per pdev"; +} + TEST_F(DeviceMgrTest, CreateChunk) { // Create dynamically chunks and verify no two chunks ahve same start offset. uint64_t avail_size{0}; From 930246e98d030c01329c1c22388c032bfc5f8468 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Tue, 16 Apr 2024 09:01:02 +0800 Subject: [PATCH 2/5] address comments Signed-off-by: Xiaoxi Chen --- src/lib/device/device_manager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/device/device_manager.cpp b/src/lib/device/device_manager.cpp index 33f7e9c28..716ace2a0 100644 --- a/src/lib/device/device_manager.cpp +++ b/src/lib/device/device_manager.cpp @@ -192,6 +192,7 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { std::vector< PhysicalDev* > pdevs = pdevs_by_type_internal(vparam.dev_type); RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for given vdev type, can't create vdev"); RELEASE_ASSERT(vparam.blk_size % pdevs[0]->align_size() == 0, "blk_size should be multiple of pdev align_size"); + RELEASE_ASSERT(vparam.blk_size % 512 == 0, "blk_size should be multiple of 512bytes to be DMA aligned"); // Identify the number of chunks if (vparam.multi_pdev_opts == vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED) { auto total_streams = std::accumulate(pdevs.begin(), pdevs.end(), 0u, @@ -208,9 +209,8 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { // Based on the min chunk size, we calculate the max number of chunks that can be created in each target pdev uint32_t min_chunk_size = hs_super_blk::min_chunk_size(vparam.dev_type); // FIXME: it is possible that each vdev is less than max_num_chunks, but total is more than MAX_CHUNKS_IN_SYSTEM. - uint32_t max_num_chunks = vparam.vdev_size / min_chunk_size > hs_super_blk::MAX_CHUNKS_IN_SYSTEM - ? hs_super_blk::MAX_CHUNKS_IN_SYSTEM - : vparam.vdev_size / min_chunk_size; + // uint32 convert is safe as it only overflow when vdev size > 64PB with 16MB min_chunk_size. + uint32_t max_num_chunks = std::min(uint32_t(vparam.vdev_size / min_chunk_size), hs_super_blk::MAX_CHUNKS_IN_SYSTEM); auto input_vdev_size = vparam.vdev_size; if (vparam.size_type == vdev_size_type_t::VDEV_SIZE_STATIC) { From add6fd294ee50adac737e30b3121f0036e24cd0e Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Tue, 16 Apr 2024 13:38:30 +0800 Subject: [PATCH 3/5] Fix logdev UT that use 8MB chunk size Signed-off-by: Xiaoxi Chen --- src/tests/test_journal_vdev.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tests/test_journal_vdev.cpp b/src/tests/test_journal_vdev.cpp index 84b47daf4..cfd4d6500 100644 --- a/src/tests/test_journal_vdev.cpp +++ b/src/tests/test_journal_vdev.cpp @@ -76,8 +76,8 @@ class VDevJournalIOTest : public ::testing::Test { {HS_SERVICE::META, {.size_pct = 15.0}}, {HS_SERVICE::LOG, {.size_pct = 50.0, - .chunk_size = 8 * 1024 * 1024, - .min_chunk_size = 8 * 1024 * 1024, + .chunk_size = 16 * 1024 * 1024, + .min_chunk_size = 16 * 1024 * 1024, .vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}}, }, nullptr /* starting_cb */, false /* restart */); @@ -91,8 +91,8 @@ class VDevJournalIOTest : public ::testing::Test { {HS_SERVICE::META, {.size_pct = 15.0}}, {HS_SERVICE::LOG, {.size_pct = 50.0, - .chunk_size = 8 * 1024 * 1024, - .min_chunk_size = 8 * 1024 * 1024, + .chunk_size = 16 * 1024 * 1024, + .min_chunk_size = 16 * 1024 * 1024, .vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}}, }, nullptr /* starting_cb */, true /* restart */); @@ -505,10 +505,10 @@ TEST_F(VDevJournalIOTest, Recovery) { } TEST_F(VDevJournalIOTest, MultipleChunkTest) { - // Chunk size is 8MB and each data log entry will be of size 3MB to create gaps. + // Chunk size is 16MB and each data log entry will be of size 6MB to create gaps. uint64_t MB = 1024 * 1024; - uint64_t chunk_size = 8 * MB; - uint64_t data_size = 3 * MB; + uint64_t chunk_size = 16 * MB; + uint64_t data_size = 6 * MB; JournalDescriptorTest test(1); auto log_dev_jd = test.vdev_jd(); @@ -577,7 +577,7 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { uint64_t trunc_sz = 2 * data_size; uint64_t trunc_offset = 2 * data_size; test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0x600000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 4, .trunc = true, .total = 4 * chunk_size, .seek = 0x0}); // Truncate one more entry. Release one chunk back and reduce chunk count. Increase the data start. @@ -585,13 +585,13 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { trunc_offset = chunk_size + data_size; trunc_sz = chunk_size + data_size; test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 3, .trunc = true, .total = 3 * chunk_size, .seek = 0x0}); // Restart homestore and restore the offsets. LOGINFO("Restart homestore"); restart_restore(); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0}); test.read_all(); @@ -599,8 +599,8 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { LOGINFO("Truncating all entries"); trunc_offset = log_dev_jd->tail_offset(); test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0x1b00000, .end = 0x2000000, .writesz = 0, .rsvdsz = 0, - .chunks = 1, .trunc = true, .total = 8388608, .seek = 0x0}); + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = 0, .rsvdsz = 0, + .chunks = 1, .trunc = true, .total = chunk_size, .seek = 0x0}); // clang-format on } From e9f06ce2ba720b48cbae14af6bd26aa79ef85b3b Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Tue, 16 Apr 2024 00:54:38 -0700 Subject: [PATCH 4/5] Fix device_manager ut previously we have use-after-release as during restart HS all chunk are released. But after restart we dont refresh our vdev vector which still refering to old chunks. Signed-off-by: Xiaoxi Chen --- src/tests/test_device_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/test_device_manager.cpp b/src/tests/test_device_manager.cpp index fd0462289..eb69b1529 100644 --- a/src/tests/test_device_manager.cpp +++ b/src/tests/test_device_manager.cpp @@ -80,6 +80,7 @@ class DeviceMgrTest : public ::testing::Test { }); m_dmgr->is_first_time_boot() ? m_dmgr->format_devices() : m_dmgr->load_devices(); m_pdevs = m_dmgr->get_pdevs_by_dev_type(homestore::HSDevType::Data); + m_vdevs = m_dmgr->get_vdevs(); } void restart() { From 683ad1c382be5d82235d4d23777b546237b77027 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Wed, 17 Apr 2024 00:41:11 +0800 Subject: [PATCH 5/5] comments Signed-off-by: Xiaoxi Chen --- src/lib/device/device_manager.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib/device/device_manager.cpp b/src/lib/device/device_manager.cpp index 716ace2a0..33e96d1dc 100644 --- a/src/lib/device/device_manager.cpp +++ b/src/lib/device/device_manager.cpp @@ -188,11 +188,9 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { if (vdev_id == sisl::Bitset::npos) { throw std::out_of_range("System has no room for additional vdev"); } m_vdev_id_bm.set_bit(vdev_id); - // Determine if we have a devices available on requested dev_tier. If so use them, else fallback to data tier std::vector< PhysicalDev* > pdevs = pdevs_by_type_internal(vparam.dev_type); RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for given vdev type, can't create vdev"); RELEASE_ASSERT(vparam.blk_size % pdevs[0]->align_size() == 0, "blk_size should be multiple of pdev align_size"); - RELEASE_ASSERT(vparam.blk_size % 512 == 0, "blk_size should be multiple of 512bytes to be DMA aligned"); // Identify the number of chunks if (vparam.multi_pdev_opts == vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED) { auto total_streams = std::accumulate(pdevs.begin(), pdevs.end(), 0u,