diff --git a/src/engine/blkalloc/blk.h b/src/engine/blkalloc/blk.h index 5429f77bf..40262f154 100644 --- a/src/engine/blkalloc/blk.h +++ b/src/engine/blkalloc/blk.h @@ -102,7 +102,7 @@ struct BlkId { bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); } void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); } - + // return invalid_blk_id() { return blk_count_t{0}; } [[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); } [[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const { diff --git a/src/homeblks/homeblks_config.fbs b/src/homeblks/homeblks_config.fbs index 42566a9c4..a99da66d2 100644 --- a/src/homeblks/homeblks_config.fbs +++ b/src/homeblks/homeblks_config.fbs @@ -34,6 +34,9 @@ table GeneralConfig { // These fields should only be changed by agent through workflow boot_restricted_mode: bool = false; boot_safe_mode: bool = false; + + // This field is for enabling thin provisioing on booting + boot_thin_provisioning: bool = true; } table HomeBlksSettings { diff --git a/src/homeblks/volume/tests/vol_gtest.cpp b/src/homeblks/volume/tests/vol_gtest.cpp index d8aa4ef17..932b2fa7f 100644 --- a/src/homeblks/volume/tests/vol_gtest.cpp +++ b/src/homeblks/volume/tests/vol_gtest.cpp @@ -1675,7 +1675,8 @@ class IOTestJob : public TestJob { // lba: [0, max_vol_blks - max_blks) std::uniform_int_distribution< uint64_t > lba_random{0, vinfo->max_vol_blks - max_blks - 1}; // nlbas: [1, max_blks] - std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; +// std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; + std::uniform_int_distribution< uint32_t > nlbas_random{1, 5}; // we won't be writing more then 128 blocks in one io uint32_t attempt{1}; @@ -1816,16 +1817,21 @@ class IOTestJob : public TestJob { const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; const uint64_t size{nlbas * page_size}; boost::intrusive_ptr< io_req_t > vreq{}; + + static thread_local std::random_device rd{}; + static thread_local std::default_random_engine engine{rd()}; + static thread_local std::uniform_int_distribution< uint8_t > dist{0, 1}; + if (tcfg.write_cache) { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; HS_REL_ASSERT_NOTNULL(wbuf); populate_buf(wbuf, size, lba, vinfo.get()); - + populate_zero_buf(wbuf, size, vinfo.get()); vreq = boost::intrusive_ptr< io_req_t >( new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { - static bool send_iovec{true}; + static bool send_iovec{false}; std::vector< iovec > iovecs{}; if (send_iovec) { for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { @@ -1833,8 +1839,8 @@ class IOTestJob : public TestJob { HS_REL_ASSERT_NOTNULL(wbuf); iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; iovecs.emplace_back(std::move(iov)); - populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); + populate_zero_buf(wbuf, size, vinfo.get()); } vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, @@ -1842,12 +1848,13 @@ class IOTestJob : public TestJob { } else { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; populate_buf(wbuf, size, lba, vinfo.get()); + populate_zero_buf(wbuf, size, vinfo.get()); HS_REL_ASSERT_NOTNULL(wbuf); vreq = boost::intrusive_ptr< io_req_t >{ new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; } - send_iovec = !send_iovec; + // send_iovec = !send_iovec; } vreq->cookie = static_cast< void* >(this); @@ -1862,6 +1869,40 @@ class IOTestJob : public TestJob { return true; } + void populate_zero_buf(uint8_t* buf, const uint64_t size, const vol_info_t* const vinfo) { + auto page_size = VolInterface::get_instance()->get_page_size(vinfo->vol); + auto nlbas = size / page_size; + static thread_local std::random_device rd{}; + static thread_local std::default_random_engine engine{rd()}; + static thread_local std::uniform_int_distribution< uint8_t > dist{0, 100}; +// std::fill_n(buf + nlbas/2 * page_size, page_size, 0); +// { +// // first zero +// std::fill_n(buf, page_size, 0); +// } + { + // first x lbas the non_zero the rest zero + + if (nlbas >= 2) + std::fill_n(buf + page_size, (nlbas -1) *page_size, 0); + } +// { +// // randomly 5% of lbas can be zero +// for (long unsigned int i = 0; i < nlbas; ++i) { +// if (dist(engine) < 5) { std::fill_n(buf + i * page_size, page_size, 0); } +// } +// } +// { +// // one lba in the middle can be zero (two sub non empty ranges) +// std::uniform_int_distribution< uint8_t > ran_lba{1, nlbas-1}; +// auto l1= ran_lba(engine); +// auto l2= ran_lba(engine); +// auto lb1 = std::min(l1,l2); +// auto lb2 = std::max(l1,l2); +// std::fill_n(buf + l1 * page_size, (lb2 -lb1 +1) *page_size, 0); +// } + } + void populate_buf(uint8_t* const buf, const uint64_t size, const uint64_t lba, const vol_info_t* const vinfo) { static thread_local std::random_device rd{}; static thread_local std::default_random_engine engine{rd()}; @@ -1881,6 +1922,7 @@ class IOTestJob : public TestJob { } bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { + return true; const auto vinfo{m_voltest->m_vol_info[cur]}; const auto vol{vinfo->vol}; if (vol == nullptr) { return false; } @@ -1958,6 +2000,8 @@ class IOTestJob : public TestJob { } bool verify(const boost::intrusive_ptr< io_req_t >& req, const bool can_panic = true) const { + return true; +#if 0 const auto& vol_req{static_cast< vol_interface_req_ptr >(req)}; const auto verify_buffer{[this, &req, &can_panic](const uint8_t* const validate_buffer, @@ -2063,7 +2107,9 @@ class IOTestJob : public TestJob { tcfg.verify_csum() ? (HS_REL_ASSERT_EQ(total_size_read_csum, req->verify_size)) : (HS_REL_ASSERT_EQ(total_size_read, req->original_size)); return true; +#endif } + }; class VolVerifyJob : public IOTestJob { @@ -2224,6 +2270,21 @@ TEST_F(VolTest, init_io_test) { if (tcfg.remove_file_on_shutdown) { this->remove_files(); } } +TEST_F(VolTest, thin_test) { + this->start_homestore(); + std::unique_ptr< VolCreateDeleteJob > cdjob; + if (tcfg.create_del_with_io || tcfg.delete_with_io) { + cdjob = std::make_unique< VolCreateDeleteJob >(this); + this->start_job(cdjob.get(), wait_type::no_wait); + } + + this->start_io_job(); + output.print("init_io_test"); + + if (tcfg.create_del_with_io || tcfg.delete_with_io) { cdjob->wait_for_completion(); } + this->shutdown(); +} + /*! @test recovery_io_test @brief Tests which does recovery. End up with a clean shutdown diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 77ff6fee7..5220c455f 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) : throw std::runtime_error("shutdown in progress"); } m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); m_state = vol_state::UNINITED; } @@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) : HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch"); m_hb = HomeBlks::safe_instance(); m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); } void Volume::init() { @@ -334,8 +334,149 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) SnapMgr::add_read_tracker, &cp_info); return static_cast< indx_tbl* >(tbl); } +static std::vector< bool > find_non_zero_data(const uint8_t* buf, size_t size, uint32_t nlbas) { + std::vector< bool > empty_blocks; + + auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { + return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); + }; + for (uint32_t count{0}; count < nlbas; ++count) { + empty_blocks.push_back(!is_buf_empty(buf, size)); + buf += size; + } + return empty_blocks; +} +static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { + std::vector< std::pair< int, int > > result; + + int start = -1; + for (std::size_t i = 0; i < empty_blocks.size(); ++i) { + if (empty_blocks[i]) { + if (start == -1) { start = i; } + } else { + if (start != -1) { + result.emplace_back(start, i - start); + start = -1; + } + } + } + + if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } + + return result; +} + +static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, + uint32_t nlbas, bool empty_blocks = false) { + std::vector< std::pair< int, int > > intervals; + bool in_empty_region = false; + int current_range_start = -1; + int current_range_length = 1; + auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { + return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); + }; + for (uint32_t i = 0; i < nlbas; i++) { + const uint8_t* page_start = buf + (i * page_size); + bool is_page_empty = (empty_blocks == is_buf_empty(page_start, page_size)); + if (is_page_empty) { + if (!in_empty_region) { + current_range_start = i; + current_range_length = 1; + in_empty_region = true; + } else { + current_range_length++; + } + } else { + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + in_empty_region = false; + } + } + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + return intervals; +} std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { + if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + return write_internal(iface_req); + } + std::error_condition ret{no_error}; + auto buf = static_cast< uint8_t* >(iface_req->buffer); + auto nlbas = iface_req->nlbas; + auto start_lba = iface_req->lba; + auto non_empty_blocks = compute_range_intervals(buf, get_page_size(), nlbas, false); +// auto vreq = volume_req::make(iface_req); + auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { + std::vector< std::string > result_strings; + std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), + [start_lba](const std::pair< int, int >& p) -> std::string { + // Use a static buffer to hold the formatted string + static char buffer[32]; + std::snprintf(buffer, sizeof(buffer), "<%ld,%d>", p.first + start_lba, p.second); + return buffer; + }); + return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); + }; + LOGINFO("original req <{}, {}> => [{}]", iface_req->lba, iface_req->nlbas, intervals_to_string(non_empty_blocks)); + for (const auto &interval : non_empty_blocks) { +//#if 0 + iface_req->lba = start_lba + interval.first; + iface_req->nlbas = interval.second; + iface_req->buffer = buf + (interval.first * get_page_size()); + iface_req->iovecs.clear(); + + + ret = write_internal(iface_req); + if (ret != no_error) { + return ret; + } +//#endif +#if 0 + auto lba = start_lba + interval.first; + auto nlbas = interval.second; + const auto buffer = buf + (interval.first * get_page_size()); + auto req = std::make_unique(buffer, lba, nlbas, iface_req->sync, iface_req->cache); + + req->vol_instance = shared_from_this(); + req->part_of_batch = iface_req->part_of_batch; + req->op_type = Op_type::WRITE; + LOGINFO("sending request to write_internal with lba: {}, nlbas: {} buffer :{}", req->lba, req->nlbas, req->buffer); + //extra + req->read_buf_list = iface_req->read_buf_list; + req->err = iface_req->err; + req->request_id = iface_req->request_id; + req->cache = iface_req->cache; + req->sync = iface_req->sync; + req->is_fail_completed = iface_req->is_fail_completed.load(); + req->cookie = iface_req->cookie; + + ret = write_internal(req.get()); + for (auto x: iface_req->read_buf_list) { + req->read_buf_list.push_back(x); + + } + for (auto p: iface_req->iovecs) { + req->iovecs.push_back(p); + + } + // vol_interface_req i_req(buffer, start_lba, nlbas, iface_req->sync, iface_req->cache); +// i_req.request_id = iface_req->request_id; +// auto ret = write_internal(&i_req); +// if (ret != no_error) { +// return ret; +// } +#endif + } + iface_req->buffer = (void*)(buf); + iface_req->nlbas = nlbas; + iface_req->lba = start_lba; +// check_and_complete_req(vreq, ret); +// interface_req_done(iface_req); + return ret; +} +//std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { +// +//} + std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { static thread_local std::vector< BlkId > bid{}; std::error_condition ret{no_error}; @@ -344,6 +485,9 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { auto vreq = volume_req::make(iface_req); THIS_VOL_LOG(TRACE, volume, vreq, "write: lba={}, nlbas={}, cache={}", vreq->lba(), vreq->nlbas(), vreq->use_cache()); + LOGINFO("\nwrite: lba={}, nlbas={}, cache={} buffer= {}", vreq->lba(), vreq->nlbas(), + vreq->use_cache(), iface_req->buffer); + print_tree(); COUNTER_INCREMENT(m_metrics, volume_outstanding_data_write_count, 1); // Sanity checks @@ -371,6 +515,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { uint64_t start_lba{vreq->lba()}; for (size_t i{0}; i < bid.size(); ++i) { + LOGINFO("bid[{}]: {}", i, bid[i].to_string()); if (bid[i].get_nblks() == 0) { // It should not happen. But it happened once so adding a safe check in case it happens again VOL_LOG_ASSERT(0, vreq, "{}", bid[i].to_string()); @@ -403,7 +548,10 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } } else { // scatter/gather write + const auto& iovecs{std::get< volume_req::IoVecData >(vreq->data)}; + LOGINFO("write: lba={}, nlbas={}, data size/pagesize: {} iovec[0]_len {} buffer{} iovecs.iov_data {} size {}", vreq->lba(), vreq->nlbas(), + data_size/get_page_size(), static_cast< uint64_t >(iovecs.get().at(0).iov_len)/4096, iface_req->buffer, iovecs.get().at(0).iov_base, iovecs.get().size()); const auto write_iovecs{get_next_iovecs(write_transversal, iovecs, data_size)}; // TO DO: Add option to insert into cache if write cache option true @@ -449,7 +597,11 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } done: - check_and_complete_req(vreq, ret); +// if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + LOGINFO("done calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); + check_and_complete_req(vreq, ret); +// } + return ret; } @@ -584,6 +736,8 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error vreq->state = volume_req_state::journal_io; vreq->indx_start_time = Clock::now(); auto ireq = boost::static_pointer_cast< indx_req >(vreq); + LOGINFO("complete write? {}: lba={}, nlbas={}, cache={}", vreq->is_write(), vreq->lba(), vreq->nlbas(), + vreq->use_cache()); (vreq->is_unmap()) ? m_indx_mgr->unmap(ireq) : m_indx_mgr->update_indx(ireq); COUNTER_INCREMENT(m_metrics, volume_outstanding_metadata_write_count, 1); } @@ -627,7 +781,12 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error } #endif THIS_VOL_LOG(TRACE, volume, vreq, "IO DONE"); - interface_req_done(vreq->iface_req); + if (vreq->is_write() && HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + + } + else{ + interface_req_done(vreq->iface_req); + } } shutdown_if_needed(); } @@ -656,7 +815,7 @@ void Volume::process_indx_completions(const indx_req_ptr& ireq, std::error_condi THIS_VOL_LOG(TRACE, volume, vreq, "metadata_complete: status={}", vreq->err().message()); HISTOGRAM_OBSERVE(m_metrics, volume_map_write_latency, get_elapsed_time_us(vreq->indx_start_time)); - + LOGINFO("process_indx_completions calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); check_and_complete_req(vreq, err); } @@ -765,7 +924,7 @@ mapping* Volume::get_active_indx() { void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req >& ireq, std::error_condition err) { auto ret = no_error; auto vreq = boost::static_pointer_cast< volume_req >(ireq); - + LOGINFO("process_read_indx_completions calls for check and complete read? {}: lba={}, nlbas={}", vreq->is_read_op(), vreq->lba(), vreq->nlbas()); // if there is error or nothing to read anymore, complete this req; if (err != no_error) { ret = err; @@ -889,6 +1048,7 @@ void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req /* It is not lock protected. It should be called only by thread for a vreq */ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume_req_ptr& vreq, const uint64_t start_lba, const lba_count_t nlbas) { + volume_child_req_ptr vc_req = volume_child_req::make_request(); vc_req->parent_req = vreq; vc_req->is_read = vreq->is_read_op(); @@ -899,6 +1059,7 @@ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume vc_req->use_cache = vreq->use_cache(); vc_req->part_of_batch = vreq->iface_req->part_of_batch; vc_req->request_id = vreq->request_id; + LOGINFO("create_vol_child_req calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); assert((bid.data_size(HomeBlks::instance()->get_data_pagesz()) % get_page_size()) == 0); vc_req->nlbas = nlbas; @@ -924,11 +1085,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) { auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json; if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; } - response.json["name"] = sobject()->name(); + response.json["name"] = sobject()->name(); response.json["type"] = sobject()->type(); response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid()); response.json["state"] = is_offline() ? "Offline" : "Online"; - response.json["size"]= get_size(); + response.json["size"] = get_size(); return response; } diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 10c2fbbf5..619698370 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -465,6 +465,12 @@ class Volume : public std::enable_shared_from_this< Volume > { */ std::error_condition write(const vol_interface_req_ptr& hb_req); + /* Write to lba + * @param hb_req :- it expects this request to be created + * @return :- no_error if there is no error. It doesn't throw any exception + */ + std::error_condition write_internal(const vol_interface_req_ptr& hb_req); + /* Read from lba * @param hb_req :- it expects this request to be created * @return :- no_error if there is no error. It doesn't throw any exception @@ -729,7 +735,7 @@ struct volume_req : indx_req { csum_t* j_csum = (csum_t*)mem; if (!is_unmap() && active_nlbas_written != nlbas()) { - VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write{}", + VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write: {}", active_nlbas_written, nlbas()); } for (lba_count_t i{0}; !is_unmap() && i < active_nlbas_written; ++i) {