diff --git a/conanfile.py b/conanfile.py index 9186f0b5a..a9bb86359 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.2.2" + version = "6.2.3" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/device/chunk.h b/src/lib/device/chunk.h index 777291292..248d62d47 100644 --- a/src/lib/device/chunk.h +++ b/src/lib/device/chunk.h @@ -47,6 +47,10 @@ class Chunk { uint64_t start_offset() const { return m_chunk_info.chunk_start_offset; } uint64_t size() const { return m_chunk_info.chunk_size; } bool is_busy() const { return m_chunk_info.is_allocated(); } + bool is_align() const { + return (m_chunk_info.chunk_start_offset % m_pdev->align_size() == 0) && + (m_chunk_info.chunk_size % m_pdev->align_size() == 0); + } uint32_t vdev_id() const { return m_chunk_info.vdev_id; } uint16_t chunk_id() const { return static_cast< uint16_t >(m_chunk_info.chunk_id); } uint32_t pdev_ordinal() const { return m_chunk_info.chunk_ordinal; } diff --git a/src/lib/device/device_manager.cpp b/src/lib/device/device_manager.cpp index 6c7c59ef1..33e96d1dc 100644 --- a/src/lib/device/device_manager.cpp +++ b/src/lib/device/device_manager.cpp @@ -23,6 +23,7 @@ #include #include "device/chunk.h" #include "device/device.h" +#include "device/hs_super_blk.h" #include "device/physical_dev.hpp" #include "device/virtual_dev.hpp" #include "common/homestore_utils.hpp" @@ -187,10 +188,9 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { if (vdev_id == sisl::Bitset::npos) { throw std::out_of_range("System has no room for additional vdev"); } m_vdev_id_bm.set_bit(vdev_id); - // Determine if we have a devices available on requested dev_tier. If so use them, else fallback to data tier std::vector< PhysicalDev* > pdevs = pdevs_by_type_internal(vparam.dev_type); - RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for even data tier, can't create vdev"); - + RELEASE_ASSERT_GT(pdevs.size(), 0, "Unable to find any pdevs for given vdev type, can't create vdev"); + RELEASE_ASSERT(vparam.blk_size % pdevs[0]->align_size() == 0, "blk_size should be multiple of pdev align_size"); // Identify the number of chunks if (vparam.multi_pdev_opts == vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED) { auto total_streams = std::accumulate(pdevs.begin(), pdevs.end(), 0u, @@ -204,11 +204,11 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { pdevs.erase(pdevs.begin() + 1, pdevs.end()); // TODO: Pick random one } - // Adjust the maximum number chunks requested before round up vdev size. - uint32_t max_num_chunks = 0; - for (const auto& d : m_dev_infos) { - max_num_chunks += hs_super_blk::max_chunks_in_pdev(d); - } + // Based on the min chunk size, we calculate the max number of chunks that can be created in each target pdev + uint32_t min_chunk_size = hs_super_blk::min_chunk_size(vparam.dev_type); + // FIXME: it is possible that each vdev is less than max_num_chunks, but total is more than MAX_CHUNKS_IN_SYSTEM. + // uint32 convert is safe as it only overflow when vdev size > 64PB with 16MB min_chunk_size. + uint32_t max_num_chunks = std::min(uint32_t(vparam.vdev_size / min_chunk_size), hs_super_blk::MAX_CHUNKS_IN_SYSTEM); auto input_vdev_size = vparam.vdev_size; if (vparam.size_type == vdev_size_type_t::VDEV_SIZE_STATIC) { @@ -217,13 +217,6 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { // Either num_chunks or chunk_size can be provided and we calculate the other. if (vparam.num_chunks != 0) { - vparam.vdev_size = sisl::round_up(vparam.vdev_size, vparam.num_chunks * vparam.blk_size); - if (input_vdev_size != vparam.vdev_size) { - LOGINFO( - "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", - vparam.vdev_name, in_bytes(input_vdev_size), in_bytes(vparam.vdev_size)); - } - auto input_num_chunks = vparam.num_chunks; vparam.num_chunks = std::min(vparam.num_chunks, max_num_chunks); if (input_num_chunks != vparam.num_chunks) { @@ -231,17 +224,28 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { "new_num_chunks={}", vparam.vdev_name, in_bytes(input_num_chunks), in_bytes(vparam.num_chunks)); } + + // this ensure chunk_size % vparam.blk_size == 0 + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.num_chunks * vparam.blk_size); + if (input_vdev_size != vparam.vdev_size) { + LOGINFO( + "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}" + " to be the multiple of {} (num_chunks {} * blk_size {}).", + vparam.vdev_name, input_vdev_size, vparam.vdev_size, + in_bytes(vparam.num_chunks * vparam.blk_size), vparam.num_chunks, in_bytes(vparam.blk_size)); + } vparam.chunk_size = vparam.vdev_size / vparam.num_chunks; } else if (vparam.chunk_size != 0) { auto input_chunk_size = vparam.chunk_size; - vparam.chunk_size = std::min(vparam.chunk_size, hs_super_blk::MAX_CHUNKS_IN_SYSTEM); + vparam.chunk_size = std::max(vparam.chunk_size, min_chunk_size); + vparam.chunk_size = sisl::round_up(vparam.chunk_size, vparam.blk_size); if (input_chunk_size != vparam.chunk_size) { LOGINFO("{} Virtual device is attempted to be created with chunk_size={}, it needs to be adjust to " "new_chunk_size={}", vparam.vdev_name, in_bytes(input_chunk_size), in_bytes(vparam.chunk_size)); } - vparam.vdev_size = sisl::round_up(vparam.vdev_size, vparam.chunk_size); + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.chunk_size); if (input_vdev_size != vparam.vdev_size) { LOGINFO( "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", @@ -256,7 +260,29 @@ shared< VirtualDev > DeviceManager::create_vdev(vdev_parameters&& vparam) { } else { // We need chunk_size. We start with zero num_chunks. RELEASE_ASSERT_GT(vparam.chunk_size, 0, "Chunk size should be provided"); + auto input_chunk_size = vparam.chunk_size; + vparam.chunk_size = std::max(vparam.chunk_size, min_chunk_size); + vparam.chunk_size = sisl::round_up(vparam.chunk_size, vparam.blk_size); + if (input_chunk_size != vparam.chunk_size) { + LOGINFO("{} Virtual device is attempted to be created with chunk_size={}, it needs to be adjust to " + "new_chunk_size={}", + vparam.vdev_name, in_bytes(input_chunk_size), in_bytes(vparam.chunk_size)); + } + + vparam.vdev_size = sisl::round_down(vparam.vdev_size, vparam.chunk_size); + if (input_vdev_size != vparam.vdev_size) { + LOGINFO( + "{} Virtual device is attempted to be created with size={}, it needs to be rounded to new_size={}", + vparam.vdev_name, in_bytes(input_vdev_size), in_bytes(vparam.vdev_size)); + } } + // sanity checks + RELEASE_ASSERT(vparam.vdev_size % vparam.chunk_size == 0, "vdev_size should be multiple of chunk_size"); + RELEASE_ASSERT(vparam.chunk_size % vparam.blk_size == 0, "chunk_size should be multiple of blk_size"); + RELEASE_ASSERT(vparam.chunk_size >= min_chunk_size, "chunk_size should be greater than or equal to min_chunk_size"); + + RELEASE_ASSERT(vparam.num_chunks <= max_num_chunks, "num_chunks should be less than or equal to max_num_chunks"); + RELEASE_ASSERT(input_vdev_size >= vparam.vdev_size, "vdev_size should be less than or equal to input_vdev_size"); LOGINFO( "New Virtal Dev={} of size={} with id={} is attempted to be created with multi_pdev_opts={}. The params are " diff --git a/src/lib/device/hs_super_blk.h b/src/lib/device/hs_super_blk.h index a2df05f3d..460894b35 100644 --- a/src/lib/device/hs_super_blk.h +++ b/src/lib/device/hs_super_blk.h @@ -202,6 +202,15 @@ class hs_super_blk { #endif return (dinfo.dev_size - 1) / min_chunk_size + 1; } + static uint32_t min_chunk_size(HSDevType dtype) { + uint64_t min_chunk_size = + (dtype == HSDevType::Fast) ? MIN_CHUNK_SIZE_FAST_DEVICE : MIN_CHUNK_SIZE_DATA_DEVICE; +#ifdef _PRERELEASE + auto chunk_size = iomgr_flip::instance()->get_test_flip< long >("set_minimum_chunk_size"); + if (chunk_size) { min_chunk_size = chunk_size.get(); } +#endif + return min_chunk_size; + } }; } // namespace homestore diff --git a/src/tests/test_device_manager.cpp b/src/tests/test_device_manager.cpp index 6e1cdcfcc..eb69b1529 100644 --- a/src/tests/test_device_manager.cpp +++ b/src/tests/test_device_manager.cpp @@ -80,6 +80,7 @@ class DeviceMgrTest : public ::testing::Test { }); m_dmgr->is_first_time_boot() ? m_dmgr->format_devices() : m_dmgr->load_devices(); m_pdevs = m_dmgr->get_pdevs_by_dev_type(homestore::HSDevType::Data); + m_vdevs = m_dmgr->get_vdevs(); } void restart() { @@ -121,6 +122,7 @@ class DeviceMgrTest : public ::testing::Test { std::map< const PhysicalDev*, uint32_t > chunks_in_pdev_count; for (const auto& chunk : chunks) { ASSERT_EQ(chunk->size(), size) << "All chunks are not equally sized in vdev"; + ASSERT_EQ(chunk->is_align(), true) << "All chunks should be aligned"; auto [it, inserted] = chunks_in_pdev_count.insert(std::pair(chunk->physical_dev(), 1u)); if (!inserted) { ++(it->second); } @@ -140,7 +142,7 @@ TEST_F(DeviceMgrTest, StripedVDevCreation) { avail_size += pdev->data_size(); } - uint32_t size_pct = 2; + uint32_t size_pct = 4; uint64_t remain_size = avail_size; LOGINFO("Step 1: Creating {} vdevs with combined size as {}", num_test_vdevs, in_bytes(avail_size)); @@ -174,6 +176,29 @@ TEST_F(DeviceMgrTest, StripedVDevCreation) { this->validate_striped_vdevs(); } +TEST_F(DeviceMgrTest, SmallStripedVDevCreation) { + std::string name = "test_vdev_small"; + + // Create a vdev small to one minimal chunk per pdev + auto num_chunks = uint32_cast(m_pdevs.size() * 2); + auto size = m_pdevs.size() * hs_super_blk::min_chunk_size(homestore::HSDevType::Data); + + LOGINFO("Step 1: Creating vdev of name={} with size={}", name, in_bytes(size)); + auto vdev = + m_dmgr->create_vdev(homestore::vdev_parameters{.vdev_name = name, + .vdev_size = size, + .num_chunks = num_chunks, + .blk_size = 4096, + .dev_type = HSDevType::Data, + .alloc_type = blk_allocator_type_t::none, + .chunk_sel_type = chunk_selector_type_t::NONE, + .multi_pdev_opts = vdev_multi_pdev_opts_t::ALL_PDEV_STRIPED, + .context_data = sisl::blob{}}); + + + ASSERT_EQ(vdev->get_chunks().size(), m_pdevs.size()) << "Expected vdev to be created with 1 chunk per pdev"; +} + TEST_F(DeviceMgrTest, CreateChunk) { // Create dynamically chunks and verify no two chunks ahve same start offset. uint64_t avail_size{0}; diff --git a/src/tests/test_journal_vdev.cpp b/src/tests/test_journal_vdev.cpp index 84b47daf4..cfd4d6500 100644 --- a/src/tests/test_journal_vdev.cpp +++ b/src/tests/test_journal_vdev.cpp @@ -76,8 +76,8 @@ class VDevJournalIOTest : public ::testing::Test { {HS_SERVICE::META, {.size_pct = 15.0}}, {HS_SERVICE::LOG, {.size_pct = 50.0, - .chunk_size = 8 * 1024 * 1024, - .min_chunk_size = 8 * 1024 * 1024, + .chunk_size = 16 * 1024 * 1024, + .min_chunk_size = 16 * 1024 * 1024, .vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}}, }, nullptr /* starting_cb */, false /* restart */); @@ -91,8 +91,8 @@ class VDevJournalIOTest : public ::testing::Test { {HS_SERVICE::META, {.size_pct = 15.0}}, {HS_SERVICE::LOG, {.size_pct = 50.0, - .chunk_size = 8 * 1024 * 1024, - .min_chunk_size = 8 * 1024 * 1024, + .chunk_size = 16 * 1024 * 1024, + .min_chunk_size = 16 * 1024 * 1024, .vdev_size_type = vdev_size_type_t::VDEV_SIZE_DYNAMIC}}, }, nullptr /* starting_cb */, true /* restart */); @@ -505,10 +505,10 @@ TEST_F(VDevJournalIOTest, Recovery) { } TEST_F(VDevJournalIOTest, MultipleChunkTest) { - // Chunk size is 8MB and each data log entry will be of size 3MB to create gaps. + // Chunk size is 16MB and each data log entry will be of size 6MB to create gaps. uint64_t MB = 1024 * 1024; - uint64_t chunk_size = 8 * MB; - uint64_t data_size = 3 * MB; + uint64_t chunk_size = 16 * MB; + uint64_t data_size = 6 * MB; JournalDescriptorTest test(1); auto log_dev_jd = test.vdev_jd(); @@ -577,7 +577,7 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { uint64_t trunc_sz = 2 * data_size; uint64_t trunc_offset = 2 * data_size; test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0x600000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 4, .trunc = true, .total = 4 * chunk_size, .seek = 0x0}); // Truncate one more entry. Release one chunk back and reduce chunk count. Increase the data start. @@ -585,13 +585,13 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { trunc_offset = chunk_size + data_size; trunc_sz = chunk_size + data_size; test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 3, .trunc = true, .total = 3 * chunk_size, .seek = 0x0}); // Restart homestore and restore the offsets. LOGINFO("Restart homestore"); restart_restore(); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0xb00000, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = writesz - trunc_sz, .rsvdsz = 0, .chunks = 3, .trunc = false, .total = 3 * chunk_size, .seek = 0x0}); test.read_all(); @@ -599,8 +599,8 @@ TEST_F(VDevJournalIOTest, MultipleChunkTest) { LOGINFO("Truncating all entries"); trunc_offset = log_dev_jd->tail_offset(); test.truncate(trunc_offset); - test.verify_journal_descriptor(log_dev_jd, {.ds = 0x1b00000, .end = 0x2000000, .writesz = 0, .rsvdsz = 0, - .chunks = 1, .trunc = true, .total = 8388608, .seek = 0x0}); + test.verify_journal_descriptor(log_dev_jd, {.ds = trunc_offset, .end = 4 * chunk_size, .writesz = 0, .rsvdsz = 0, + .chunks = 1, .trunc = true, .total = chunk_size, .seek = 0x0}); // clang-format on }