From eb585f7d95d588bf8450c3cec02c36bb42c5e429 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 5 Aug 2024 15:06:40 +0800 Subject: [PATCH] PageStorage: Fix empty page cause TiFlash failed to start (#9283) (#9285) close pingcap/tiflash#9282 PageStorage: Fix empty page cause TiFlash failed to start Signed-off-by: ti-chi-bot Co-authored-by: JaySon Co-authored-by: JaySon-Huang --- dbms/src/Common/UniThreadPool.h | 1 - dbms/src/Storages/Page/V3/Blob/BlobStat.cpp | 9 +- dbms/src/Storages/Page/V3/Blob/GCInfo.cpp | 7 +- dbms/src/Storages/Page/V3/BlobStore.cpp | 125 ++++++++++-------- dbms/src/Storages/Page/V3/BlobStore.h | 2 +- dbms/src/Storages/Page/V3/PageDirectory.cpp | 6 +- .../Storages/Page/V3/spacemap/SpaceMap.cpp | 16 +-- dbms/src/Storages/Page/V3/spacemap/SpaceMap.h | 4 +- .../Page/V3/spacemap/SpaceMapSTDMap.h | 26 +++- .../Page/V3/tests/gtest_blob_stat.cpp | 62 ++++++++- .../Page/V3/tests/gtest_blob_store.cpp | 58 +++++++- .../Storages/Page/V3/tests/gtest_free_map.cpp | 26 +++- .../Page/V3/tests/gtest_page_directory.cpp | 47 +++++++ .../Page/V3/tests/gtest_page_storage.cpp | 1 + dbms/src/Storages/S3/S3RandomAccessFile.cpp | 4 + 15 files changed, 303 insertions(+), 91 deletions(-) diff --git a/dbms/src/Common/UniThreadPool.h b/dbms/src/Common/UniThreadPool.h index 0e6857fae2f..d61d7a2f35c 100644 --- a/dbms/src/Common/UniThreadPool.h +++ b/dbms/src/Common/UniThreadPool.h @@ -29,7 +29,6 @@ #include #include #include -#include #include namespace DB diff --git a/dbms/src/Storages/Page/V3/Blob/BlobStat.cpp b/dbms/src/Storages/Page/V3/Blob/BlobStat.cpp index c47bf595099..2d18b53cb56 100644 --- a/dbms/src/Storages/Page/V3/Blob/BlobStat.cpp +++ b/dbms/src/Storages/Page/V3/Blob/BlobStat.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -216,8 +217,6 @@ std::pair BlobStats::chooseStat( PageType page_type, const std::lock_guard &) { - BlobStatPtr stat_ptr = nullptr; - // No stats exist if (stats_map.empty()) { @@ -301,6 +300,12 @@ BlobStats::StatsMap BlobStats::getStats() const NO_THREAD_SAFETY_ANALYSIS BlobFileOffset BlobStats::BlobStat::getPosFromStat(size_t buf_size, const std::unique_lock &) { + // A shortcut for empty page. All empty pages will be stored + // at the beginning of the BlobFile. It should not affects the + // sm_max_caps or other fields by adding these empty pages. + if (unlikely(buf_size == 0)) + return 0; + BlobFileOffset offset = 0; UInt64 max_cap = 0; bool expansion = true; diff --git a/dbms/src/Storages/Page/V3/Blob/GCInfo.cpp b/dbms/src/Storages/Page/V3/Blob/GCInfo.cpp index af18c580af4..320cee5584c 100644 --- a/dbms/src/Storages/Page/V3/Blob/GCInfo.cpp +++ b/dbms/src/Storages/Page/V3/Blob/GCInfo.cpp @@ -33,7 +33,7 @@ struct fmt::formatter template auto format(const DB::PS::V3::BlobFileGCInfo & i, FormatContext & ctx) const { - return fmt::format_to(ctx.out(), "", i.blob_id, i.valid_rate); + return fmt::format_to(ctx.out(), "", i.blob_id, i.valid_rate); } }; template <> @@ -53,7 +53,7 @@ struct fmt::formatter { return fmt::format_to( ctx.out(), - "", + "", i.blob_id, i.origin_size, i.truncated_size, @@ -86,7 +86,7 @@ String BlobStoreGCInfo::toTypeString(const Type type_index) const if (blob_gc_info[type_index].empty()) return fmt::format("{{{}: [null]}}", magic_enum::enum_name(type_index)); - // e.g. {FullGC: []}} + // e.g. {FullGC: []} FmtBuffer fmt_buf; fmt_buf.fmtAppend("{{{}: [", magic_enum::enum_name(type_index)) .joinStr( @@ -103,6 +103,7 @@ String BlobStoreGCInfo::toTypeTruncateString(const Type type_index) const if (blob_gc_truncate_info.empty()) return fmt::format("{{{}: [null]}}", magic_enum::enum_name(type_index)); + // e.g. {Truncated: []} FmtBuffer fmt_buf; fmt_buf.fmtAppend("{{{}: [", magic_enum::enum_name(type_index)) .joinStr( diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 15f92f8d6ac..b37d05e35e0 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -36,7 +36,6 @@ #include #include -#include #include #include @@ -128,7 +127,7 @@ void BlobStore::registerPaths() NO_THREAD_SAFETY_ANALYSIS } else { - LOG_INFO(log, "Ignore not blob file [dir={}] [file={}] [err_msg={}]", path, blob_name, err_msg); + LOG_INFO(log, "Ignore not blob file, dir={} file={} err_msg={}", path, blob_name, err_msg); } } } @@ -690,7 +689,7 @@ void BlobStore::remove(const PageEntries & del_entries) NO_THREAD_SAFETY_ } catch (DB::Exception & e) { - e.addMessage(fmt::format("while removing entry [entry={}]", entry)); + e.addMessage(fmt::format("while removing entry, entry={}", entry)); e.rethrow(); } } @@ -712,8 +711,8 @@ void BlobStore::remove(const PageEntries & del_entries) NO_THREAD_SAFETY_ } LOG_TRACE( log, - "Blob recalculated capability [blob_id={}] [max_cap={}] " - "[total_size={}] [valid_size={}] [valid_rate={}]", + "Blob recalculated capability blob_id={} max_cap={} " + "total_size={} valid_size={} valid_rate={}", blob_id, stat->sm_max_caps, stat->sm_total_size, @@ -730,6 +729,7 @@ std::pair BlobStore::getPosFromStats(size_t s Stopwatch watch; BlobStatPtr stat; + // TODO: make this lambda as a function of BlobStats to simplify code auto lock_stat = [size, this, &stat, &page_type]() NO_THREAD_SAFETY_ANALYSIS { auto lock_stats = blob_stats.lock(); BlobFileId blob_file_id = INVALID_BLOBFILE_ID; @@ -770,8 +770,8 @@ std::pair BlobStore::getPosFromStats(size_t s LOG_ERROR(Logger::get(), stat->smap->toDebugString()); throw Exception( ErrorCodes::LOGICAL_ERROR, - "Get postion from BlobStat failed, it may caused by `sm_max_caps` is no correct. [size={}] " - "[old_max_caps={}] [max_caps={}] [blob_id={}]", + "Get postion from BlobStat failed, it may caused by `sm_max_caps` is no correct. size={} " + "old_max_caps={} max_caps={} blob_id={}", size, old_max_cap, stat->sm_max_caps, @@ -801,7 +801,13 @@ void BlobStore::removePosFromStats(BlobFileId blob_id, BlobFileOffset off // Note that we must release the lock on blob_stat before removing it // from all blob_stats, or deadlocks could happen. // As the blob_stat has been became read-only, it is safe to release the lock. - LOG_INFO(log, "Removing BlobFile [blob_id={}]", blob_id); + LOG_INFO( + log, + "Removing BlobFile, blob_id={} read_only={} offset={} size={}", + blob_id, + stat->isReadOnly(), + offset, + size); { // Remove the stat from memory @@ -877,8 +883,9 @@ typename BlobStore::PageMap BlobStore::read(FieldReadInfos & to_re #ifndef NDEBUG // throw an exception under debug mode so we should change the upper layer logic throw Exception( - fmt::format("Reading with fields but entry size is 0, read_info=[{}]", buf.toString()), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::LOGICAL_ERROR, + "Reading with fields but entry size is 0, read_info=[{}]", + buf.toString()); #endif // Log a warning under production release LOG_WARNING(log, "Reading with fields but entry size is 0, read_info=[{}]", buf.toString()); @@ -911,8 +918,7 @@ typename BlobStore::PageMap BlobStore::read(FieldReadInfos & to_re // TODO: Continuously fields can read by one system call. const auto [beg_offset, end_offset] = entry.getFieldOffsets(field_index); const auto size_to_read = end_offset - beg_offset; - auto blob_file - = read(page_id_v3, entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); + read(page_id_v3, entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); fields_offset_in_page.emplace(field_index, read_size_this_entry); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) @@ -926,17 +932,16 @@ typename BlobStore::PageMap BlobStore::read(FieldReadInfos & to_re throw Exception( ErrorCodes::CHECKSUM_DOESNT_MATCH, "Reading with fields meet checksum not match " - "[page_id={}] [expected=0x{:X}] [actual=0x{:X}] " - "[field_index={}] [field_offset={}] [field_size={}] " - "[entry={}] [file={}]", + "page_id={} expected=0x{:X} actual=0x{:X} " + "field_index={} field_offset={} field_size={} " + "entry={}", page_id_v3, expect_checksum, field_checksum, field_index, beg_offset, size_to_read, - entry, - blob_file->getPath()); + entry); } } @@ -966,12 +971,11 @@ typename BlobStore::PageMap BlobStore::read(FieldReadInfos & to_re }, ","); throw Exception( - fmt::format( - "unexpected read size, end_pos={} current_pos={} read_info=[{}]", - shared_data_buf + buf_size, - pos, - buf.toString()), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::LOGICAL_ERROR, + "unexpected read size, end_pos={} current_pos={} read_info=[{}]", + shared_data_buf + buf_size, + pos, + buf.toString()); } return page_map; } @@ -1008,8 +1012,8 @@ typename BlobStore::PageMap BlobStore::read( for (const auto & [page_id_v3, entry] : entries) { // Unexpected behavior but do no harm - LOG_INFO(log, "Read entry without entry size, page_id={} entry={}", page_id_v3, entry); Page page(Trait::PageIdTrait::getU64ID(page_id_v3)); + page.data = std::string_view(nullptr, 0); page_map.emplace(Trait::PageIdTrait::getPageMapKey(page_id_v3), page); } return page_map; @@ -1022,7 +1026,7 @@ typename BlobStore::PageMap BlobStore::read( PageMap page_map; for (const auto & [page_id_v3, entry] : entries) { - auto blob_file = read(page_id_v3, entry.file_id, entry.offset, pos, entry.size, read_limiter); + read(page_id_v3, entry.file_id, entry.offset, pos, entry.size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { @@ -1032,15 +1036,13 @@ typename BlobStore::PageMap BlobStore::read( if (unlikely(entry.size != 0 && checksum != entry.checksum)) { throw Exception( - fmt::format( - "Reading with entries meet checksum not match [page_id={}] [expected=0x{:X}] [actual=0x{:X}] " - "[entry={}] [file={}]", - page_id_v3, - entry.checksum, - checksum, - entry, - blob_file->getPath()), - ErrorCodes::CHECKSUM_DOESNT_MATCH); + ErrorCodes::CHECKSUM_DOESNT_MATCH, + "Reading with entries meet checksum not match page_id={} expected=0x{:X} actual=0x{:X} " + "entry={}", + page_id_v3, + entry.checksum, + checksum, + entry); } } @@ -1098,15 +1100,15 @@ Page BlobStore::read(const PageIdAndEntry & id_entry, const ReadLimiterPt if (buf_size == 0) { // Unexpected behavior but do no harm - LOG_INFO(log, "Read entry without entry size, page_id={} entry={}", page_id_v3, entry); Page page(Trait::PageIdTrait::getU64ID(page_id_v3)); + page.data = std::string_view(nullptr, 0); return page; } char * data_buf = static_cast(alloc(buf_size)); MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { free(p, buf_size); }); - auto blob_file = read(page_id_v3, entry.file_id, entry.offset, data_buf, buf_size, read_limiter); + read(page_id_v3, entry.file_id, entry.offset, data_buf, buf_size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { ChecksumClass digest; @@ -1115,15 +1117,13 @@ Page BlobStore::read(const PageIdAndEntry & id_entry, const ReadLimiterPt if (unlikely(entry.size != 0 && checksum != entry.checksum)) { throw Exception( - fmt::format( - "Reading with entries meet checksum not match [page_id={}] [expected=0x{:X}] [actual=0x{:X}] " - "[entry={}] [file={}]", - page_id_v3, - entry.checksum, - checksum, - entry, - blob_file->getPath()), - ErrorCodes::CHECKSUM_DOESNT_MATCH); + ErrorCodes::CHECKSUM_DOESNT_MATCH, + "Reading with entries meet checksum not match page_id={} expected=0x{:X} actual=0x{:X} " + "entry={}", + page_id_v3, + entry.checksum, + checksum, + entry); } } @@ -1142,7 +1142,7 @@ Page BlobStore::read(const PageIdAndEntry & id_entry, const ReadLimiterPt } template -BlobFilePtr BlobStore::read( +void BlobStore::read( const typename BlobStore::PageId & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, @@ -1152,6 +1152,12 @@ BlobFilePtr BlobStore::read( bool background) { GET_METRIC(tiflash_storage_page_command_count, type_read_blob).Increment(); + + // A shortcut to avoid unnecessary locks / system call when "reading an empty page" + // Reading an empty page should not create a BlobFile if it has already removed. + if (unlikely(size == 0)) + return; + assert(buffers != nullptr); BlobFilePtr blob_file = getBlobFile(blob_id); try @@ -1170,7 +1176,6 @@ BlobFilePtr BlobStore::read( background)); e.rethrow(); } - return blob_file; } @@ -1211,7 +1216,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ if (stat->isReadOnly()) { blobstore_gc_info.appendToReadOnlyBlob(stat->id, stat->sm_valid_rate); - LOG_TRACE(log, "Current [blob_id={}] is read-only", stat->id); + LOG_TRACE(log, "Current blob is read-only, blob_id={}", stat->id); continue; } @@ -1226,7 +1231,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ // TODO: avoid always truncate on empty BlobFile RUNTIME_CHECK_MSG( stat->sm_valid_size == 0, - "Current blob is empty, but valid size is not 0. [blob_id={}] [valid_size={}] [valid_rate={}]", + "Current blob is empty, but valid size is not 0, blob_id={} valid_size={} valid_rate={}", stat->id, stat->sm_valid_size, stat->sm_valid_rate); @@ -1236,7 +1241,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ auto blobfile = getBlobFile(stat->id); LOG_INFO( log, - "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", + "Current blob file is empty, truncated to zero, blob_id={} total_size={} valid_rate={}", stat->id, stat->sm_total_size, stat->sm_valid_rate); @@ -1254,8 +1259,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ { LOG_ERROR( log, - "Current blob got an invalid rate {:.2f}, total size is {}, valid size is {}, right boundary is {} " - "[blob_id={}]", + "Current blob got an invalid rate {:.2f}, total_size={} valid_size={} right_boundary={} blob_id={}", stat->sm_valid_rate, stat->sm_total_size, stat->sm_valid_size, @@ -1279,7 +1283,11 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ bool do_full_gc = stat->sm_valid_rate <= heavy_gc_threhold; if (do_full_gc) { - LOG_TRACE(log, "Current [blob_id={}] valid rate is {:.2f}, full GC", stat->id, stat->sm_valid_rate); + LOG_TRACE( + log, + "Current blob will run full GC, blob_id={} valid_rate={:.2f}", + stat->id, + stat->sm_valid_rate); if (blob_need_gc.find(page_type) == blob_need_gc.end()) { blob_need_gc.emplace(page_type, std::vector()); @@ -1293,7 +1301,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ else { blobstore_gc_info.appendToNoNeedGCBlob(stat->id, stat->sm_valid_rate); - LOG_TRACE(log, "Current [blob_id={}] valid rate is {:.2f}, unchange", stat->id, stat->sm_valid_rate); + LOG_TRACE(log, "Current blob unchange, blob_id={} valid_rate={:.2f}", stat->id, stat->sm_valid_rate); } if (right_boundary != stat->sm_total_size) @@ -1301,7 +1309,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ auto blobfile = getBlobFile(stat->id); LOG_TRACE( log, - "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", + "Truncate blob file, blob_id={} origin_size={} truncated_size={}", stat->id, stat->sm_total_size, right_boundary); @@ -1318,7 +1326,7 @@ typename BlobStore::PageTypeAndBlobIds BlobStore::getGCStats() NO_ LOG_IMPL( log, blobstore_gc_info.getLoggingLevel(), - "BlobStore gc get status done. blob_ids details {}", + "BlobStore gc get status done. details {}", blobstore_gc_info.toString()); return blob_need_gc; @@ -1359,7 +1367,7 @@ void BlobStore::gc( written_blobs.emplace_back(file_id, file_offset, data_size); LOG_INFO( log, - "BlobStore gc write (partially) done [blob_id={}] [file_offset={}] [size={}] [total_size={}]", + "BlobStore gc write (partially) done, blob_id={} file_offset={} size={} total_size={}", file_id, file_offset, data_size, @@ -1370,7 +1378,7 @@ void BlobStore::gc( { LOG_ERROR( log, - "BlobStore gc write failed [blob_id={}] [offset={}] [size={}] [total_size={}]", + "BlobStore gc write failed, blob_id={} offset={} size={} total_size={}", file_id, file_offset, data_size, @@ -1479,6 +1487,7 @@ void BlobStore::gc( { write_blob(blobfile_id, data_buf, file_offset_begin, offset_in_data); } + LOG_INFO(log, "BlobStore gc write done, blob_id={} type={}", blobfile_id, magic_enum::enum_name(page_type)); } template diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index 6aaf783b172..b887d9165fa 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -126,7 +126,7 @@ class BlobStore : private Allocator PageType page_type, const WriteLimiterPtr & write_limiter = nullptr); - BlobFilePtr read( + void read( const PageId & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 80f9ae96126..ba922ea5d71 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -1955,7 +1955,7 @@ std::pair::GcEntriesMap, PageSize> PageDirectory::PageEntries PageDirectory::gcInMemEntries( { LOG_WARNING( log, - "Meet a stale snapshot [thread id={}] [tracing id={}] [seq={}] [alive time(s)={}]", + "Meet a stale snapshot, create_thread={} tracing_id={} seq={} alive_time={:.3f}", snap->create_thread, snap->tracing_id, snap->sequence, @@ -2242,7 +2242,7 @@ typename PageDirectory::PageEntriesEdit PageDirectory::dumpSnapsho } } - LOG_INFO(log, "Dumped snapshot to edits.[sequence={}]", snap->sequence); + LOG_INFO(log, "Dumped snapshot to edits, sequence={} edit_size={}", snap->sequence, edit.size()); return edit; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp index a2d52664172..3622e1b3129 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include namespace DB { @@ -52,14 +49,17 @@ SpaceMapPtr SpaceMap::createSpaceMap(SpaceMapType type, UInt64 start, UInt64 end return smap; } -bool SpaceMap::checkSpace(UInt64 offset, size_t size) const +bool SpaceMap::isInvalidRange(UInt64 offset, size_t size) const { - return (offset < start) || (offset > end) || (offset + size - 1 > end); + return (offset < start) + || (offset > end) + // check whether it can be changed to `(offset + size > end)` + || ((size != 0) && (offset + size - 1 > end)); } bool SpaceMap::markFree(UInt64 offset, size_t length) { - if (checkSpace(offset, length)) + if (isInvalidRange(offset, length)) { throw Exception( fmt::format( @@ -75,7 +75,7 @@ bool SpaceMap::markFree(UInt64 offset, size_t length) bool SpaceMap::markUsed(UInt64 offset, size_t length) { - if (checkSpace(offset, length)) + if (isInvalidRange(offset, length)) { throw Exception( fmt::format( @@ -91,7 +91,7 @@ bool SpaceMap::markUsed(UInt64 offset, size_t length) bool SpaceMap::isMarkUsed(UInt64 offset, size_t length) { - if (checkSpace(offset, length)) + if (isInvalidRange(offset, length)) { throw Exception( fmt::format( diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index e110dd3d4f8..23e0a0f9160 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -82,7 +82,7 @@ class SpaceMap * If such span is found. * It will mark that span to be used and also return a hint of the max capacity available in this SpaceMap. * - * return value is : + * return value is : * insert_offset: start offset for the inserted space * max_cap: A hint of the largest available space this SpaceMap can hold. * is_expansion: Whether it is an expansion span @@ -145,7 +145,7 @@ class SpaceMap private: /* Check the range */ - bool checkSpace(UInt64 offset, size_t size) const; + bool isInvalidRange(UInt64 offset, size_t size) const; #ifndef DBMS_PUBLIC_GTEST protected: diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index d07955e17f9..e0b92cf65ab 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -20,6 +20,7 @@ #include #include +#include namespace DB { @@ -153,6 +154,11 @@ class STDMapSpaceMap bool markUsedImpl(UInt64 offset, size_t length) override { + // An empty data, we can simply consider it is stored and return true + // Do not let it split the space into smaller pieces. + if (length == 0) + return true; + auto it = MapUtils::findLessEQ(free_map, offset); // first free block <= `offset` if (it == free_map.end()) { @@ -217,8 +223,15 @@ class STDMapSpaceMap return true; } + // return value is std::tuple searchInsertOffset(size_t size) override { + if (unlikely(size == 0)) + { + // The returned `max_cap` is 0 under this case, user should not use it. + return std::make_tuple(0, 0, false); + } + if (unlikely(free_map.empty())) { LOG_ERROR(Logger::get(), "Current space map is full"); @@ -255,24 +268,23 @@ class STDMapSpaceMap bool markFreeImpl(UInt64 offset, size_t length) override { - auto it = free_map.find(offset); + // for an empty blob, no new free block is created, just skip + if (length == 0) + { + return true; + } /** * already unmarked. * The `offset` won't be mid of free space. * Because we alloc space from left to right. */ + auto it = free_map.find(offset); if (it != free_map.end()) { return true; } - // for an empty blob, no new free block is created, just skip - if (length == 0) - { - return true; - } - bool meanless = false; std::tie(it, meanless) = free_map.insert({offset, length}); insertIntoInvertIndex(length, offset); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_stat.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_stat.cpp index 4c4492e8eac..d6a6326ee9d 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_stat.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_stat.cpp @@ -165,7 +165,7 @@ try } CATCH -TEST_F(BlobStoreStatsTest, RestoreWithEmptyPage) +TEST_F(BlobStoreStatsTest, RestoreWithEmptyPageSamePosition) try { BlobStats stats(logger, delegator, config); @@ -221,6 +221,54 @@ try } CATCH +TEST_F(BlobStoreStatsTest, RestoreWithEmptyPageSplitSpace) +try +{ + BlobStats stats(logger, delegator, config); + + BlobFileId file_id1 = 11; + + { + std::lock_guard lock(stats.lock_stats); + stats.createStatNotChecking(file_id1, config.file_limit_size, lock); + } + + { + // an entry at offset=0x15376, size=0 + stats.restoreByEntry(PageEntryV3{ + .file_id = file_id1, + .size = 0, + .padded_size = 0, + .tag = 0, + .offset = 0x15376, + .checksum = 0x4567, + }); + // an entry at offset=0x15373, size=15. offset+size > 0x15376, but should be able to insert + stats.restoreByEntry(PageEntryV3{ + .file_id = file_id1, + .size = 15, + .padded_size = 0, + .tag = 0, + .offset = 0x15373, + .checksum = 0x4567, + }); + stats.restore(); + } + + auto stats_copy = stats.getStats(); + + ASSERT_EQ(stats_copy.size(), std::min(getTotalStatsNum(stats_copy), path_num)); + ASSERT_EQ(getTotalStatsNum(stats_copy), 1); + EXPECT_EQ(stats.cur_max_id, file_id1); + + auto stat = stats.blobIdToStat(file_id1); + EXPECT_EQ(stat->sm_total_size, 0x15373 + 15); + EXPECT_EQ(stat->sm_valid_size, 15); + + EXPECT_ANY_THROW({ stats.createStat(file_id1, config.file_limit_size, stats.lock()); }); +} +CATCH + TEST_F(BlobStoreStatsTest, testStats) { BlobStats stats(logger, delegator, config); @@ -343,7 +391,11 @@ TEST_F(BlobStoreStatsTest, StatWithEmptyBlob) ASSERT_EQ(offset, 0); offset = stat->getPosFromStat(0, stat->lock()); // empty - ASSERT_EQ(offset, 10); + ASSERT_EQ(offset, 0); // empty page always "stored" to the beginning of the space + offset = stat->getPosFromStat(0, stat->lock()); // empty + ASSERT_EQ(offset, 0); // empty page always "stored" to the beginning of the space + offset = stat->getPosFromStat(0, stat->lock()); // empty + ASSERT_EQ(offset, 0); // empty page always "stored" to the beginning of the space offset = stat->getPosFromStat(20, stat->lock()); ASSERT_EQ(offset, 10); @@ -351,6 +403,9 @@ TEST_F(BlobStoreStatsTest, StatWithEmptyBlob) offset = stat->getPosFromStat(100, stat->lock()); ASSERT_EQ(offset, 30); + offset = stat->getPosFromStat(0, stat->lock()); // empty + ASSERT_EQ(offset, 0); // empty page always "stored" to the beginning of the space + ASSERT_EQ(stat->sm_total_size, 10 + 20 + 100); ASSERT_EQ(stat->sm_valid_size, 10 + 20 + 100); ASSERT_EQ(stat->sm_valid_rate, 1); @@ -371,9 +426,8 @@ TEST_F(BlobStoreStatsTest, testFullStats) BlobStats stats(logger, delegator, config); { - std::lock_guard lock(stats.lock_stats); BlobFileId file_id = 10; - BlobStats::BlobStatPtr stat = stats.createStat(file_id, config.file_limit_size, lock); + BlobStats::BlobStatPtr stat = stats.createStat(file_id, config.file_limit_size, stats.lock()); auto offset = stat->getPosFromStat(BLOBFILE_LIMIT_SIZE - 1, stat->lock()); ASSERT_EQ(offset, 0); stats.cur_max_id = file_id; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 0b688a96bc4..a7dac9a3349 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -31,9 +31,12 @@ #include #include +#include + namespace DB::FailPoints { extern const char exception_after_large_write_exceed[]; +extern const char force_pick_all_blobs_to_full_gc[]; } // namespace DB::FailPoints namespace DB::PS::V3::tests @@ -1645,7 +1648,7 @@ try page_type_and_config); { - size_t size_500 = 500; + const size_t size_500 = 500; char c_buff[size_500]; WriteBatch wb; @@ -1656,6 +1659,7 @@ try const auto & gc_info = blob_store.getGCStats(); ASSERT_TRUE(gc_info.empty()); + // only one blob_file for the large write ASSERT_EQ(getTotalStatsNum(blob_store.blob_stats.getStats()), 1); const auto & records = edit.getRecords(); ASSERT_EQ(records.size(), 1); @@ -1665,6 +1669,58 @@ try } CATCH +TEST_F(BlobStoreTest, ReadEmptyPageAfterAllValidEntriesRmoved) +try +{ + const auto file_provider = DB::tests::TiFlashTestEnv::getMockFileProvider(); + + BlobConfig config_with_small_file_limit_size; + config_with_small_file_limit_size.file_limit_size = 400; + auto blob_store = BlobStore( + getCurrentTestName(), + file_provider, + delegator, + config_with_small_file_limit_size, + page_type_and_config); + + { + const size_t size_500 = 500; + char c_buff[size_500]; + + WriteBatch wb; + ReadBufferPtr buff = std::make_shared(c_buff, size_500); + wb.putPage(50, /* tag */ 0, buff, size_500); + wb.putPage(51, 0, std::make_shared(c_buff, 0), 0); + wb.putPage(52, 0, std::make_shared(c_buff, 20), 20); + wb.putPage(53, 0, std::make_shared(c_buff, 0), 0); + PageEntriesEdit edit = blob_store.write(std::move(wb)); + + // make the blob_file to be READ_ONLY + FailPointHelper::enableFailPoint(FailPoints::force_pick_all_blobs_to_full_gc); + SCOPE_EXIT({ FailPointHelper::disableFailPoint(FailPoints::force_pick_all_blobs_to_full_gc); }); + const auto & gc_info = blob_store.getGCStats(); + ASSERT_FALSE(gc_info.empty()); + + // one blob_file for the large write, another for the other pages + ASSERT_EQ(getTotalStatsNum(blob_store.blob_stats.getStats()), 2); + const auto & records = edit.getRecords(); + ASSERT_EQ(records.size(), 4); + // remove the non-empty entries + blob_store.remove({records[0].entry, records[2].entry}); + + /// try to read the empty pages after the blob_file get removed, no exception should happen + // read-1-entry + blob_store.read(PageIDAndEntryV3(51, records[1].entry), nullptr); + blob_store.read(PageIDAndEntryV3(53, records[3].entry), nullptr); + // read multiple entry + PageIDAndEntriesV3 entries{PageIDAndEntryV3(51, records[1].entry), PageIDAndEntryV3(53, records[3].entry)}; + blob_store.read(entries, nullptr); + + ASSERT_EQ(getTotalStatsNum(blob_store.blob_stats.getStats()), 0); + } +} +CATCH + TEST_F(BlobStoreTest, LargeWriteRegisterPath) try { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index 4c7d4bd89df..8ff108285b9 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -43,7 +43,7 @@ class SpaceMapTest : public testing::TestWithParam return [ranges, range_size](size_t idx, UInt64 start, UInt64 end) -> bool { return idx < range_size && ranges[idx].start == start && ranges[idx].end == end; }; - }; + } }; TEST_P(SpaceMapTest, InitAndDestory) @@ -60,6 +60,13 @@ TEST_P(SpaceMapTest, MarkUnmark) Range ranges[] = {{.start = 0, .end = 100}}; ASSERT_TRUE(smap->check(genChecker(ranges, 1), 1)); + ASSERT_TRUE(smap->markUsed(0, 100)); + ASSERT_TRUE(smap->markUsed(0, 0)); + ASSERT_TRUE(smap->markUsed(100, 0)); + ASSERT_TRUE(smap->markFree(0, 100)); + // Now off-by-1 will return false but no exception + ASSERT_FALSE(smap->markUsed(0, 101)); + ASSERT_TRUE(smap->markUsed(50, 1)); ASSERT_FALSE(smap->markUsed(50, 1)); @@ -440,6 +447,23 @@ TEST_P(SpaceMapTest, EmptyBlob) ASSERT_EQ(sizes.second, 10); } +TEST_P(SpaceMapTest, Fragmentation) +{ + auto smap = SpaceMap::createSpaceMap(SpaceMap::SMAP64_STD_MAP, 0, 100); + // add an empty page + ASSERT_TRUE(smap->markUsed(60, 0)); + auto sizes = smap->getSizes(); + ASSERT_EQ(sizes.first, 0); + ASSERT_EQ(sizes.second, 0); + ASSERT_EQ(smap->getUsedBoundary(), 0); // used boundary won't contain the empty page + + // add [50, 70), should success + ASSERT_TRUE(smap->markUsed(50, 20)); + sizes = smap->getSizes(); + ASSERT_EQ(sizes.first, 70); + ASSERT_EQ(sizes.second, 20); + ASSERT_EQ(smap->getUsedBoundary(), 70); +} INSTANTIATE_TEST_CASE_P(Type, SpaceMapTest, testing::Values(SpaceMap::SMAP64_STD_MAP)); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index e910307ffea..1a835d376a0 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -1120,6 +1120,53 @@ try } CATCH +TEST_F(PageDirectoryTest, EmptyPage) +try +{ + { + PageEntriesEdit edit; + edit.put(buildV3Id(TEST_NAMESPACE_ID, 9), PageEntryV3{.file_id = 551, .size = 0, .offset = 0x15376}); + edit.put(buildV3Id(TEST_NAMESPACE_ID, 10), PageEntryV3{.file_id = 551, .size = 15, .offset = 0x15373}); + edit.put(buildV3Id(TEST_NAMESPACE_ID, 100), PageEntryV3{.file_id = 551, .size = 0, .offset = 0x0}); + edit.put( + buildV3Id(TEST_NAMESPACE_ID, 101), + PageEntryV3{.file_id = 552, .size = BLOBFILE_LIMIT_SIZE, .offset = 0x0}); + dir->apply(std::move(edit)); + } + + auto s0 = dir->createSnapshot(); + auto edit = dir->dumpSnapshotToEdit(s0); + auto restore_from_edit = [](const PageEntriesEdit & edit, BlobStats & stats) { + auto deseri_edit = u128::Serializer::deserializeFrom(u128::Serializer::serializeTo(edit), nullptr); + auto provider = DB::tests::TiFlashTestEnv::getDefaultFileProvider(); + auto path = getTemporaryPath(); + PSDiskDelegatorPtr delegator = std::make_shared(path); + PageDirectoryFactory factory; + auto d + = factory.setBlobStats(stats).createFromEditForTest(getCurrentTestName(), provider, delegator, deseri_edit); + return d; + }; + + { + auto path = getTemporaryPath(); + PSDiskDelegatorPtr delegator = std::make_shared(path); + auto config = BlobConfig{}; + BlobStats stats(log, delegator, config); + { + std::lock_guard lock(stats.lock_stats); + stats.createStatNotChecking(551, BLOBFILE_LIMIT_SIZE, lock); + stats.createStatNotChecking(552, BLOBFILE_LIMIT_SIZE, lock); + } + auto restored_dir = restore_from_edit(edit, stats); + auto snap = restored_dir->createSnapshot(); + ASSERT_EQ(getEntry(restored_dir, 9, snap).offset, 0x15376); + ASSERT_EQ(getEntry(restored_dir, 10, snap).offset, 0x15373); + ASSERT_EQ(getEntry(restored_dir, 100, snap).offset, 0x0); + ASSERT_EQ(getEntry(restored_dir, 101, snap).offset, 0x0); + } +} +CATCH + class PageDirectoryGCTest : public PageDirectoryTest { }; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index ab4b517fe1e..26f2cc28434 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1950,6 +1950,7 @@ try } FailPointHelper::enableFailPoint(FailPoints::force_pick_all_blobs_to_full_gc); + SCOPE_EXIT({ FailPointHelper::disableFailPoint(FailPoints::force_pick_all_blobs_to_full_gc); }); auto done_full_gc = page_storage->gc(); EXPECT_TRUE(done_full_gc); diff --git a/dbms/src/Storages/S3/S3RandomAccessFile.cpp b/dbms/src/Storages/S3/S3RandomAccessFile.cpp index 9d63ec82f37..3826dfe6424 100644 --- a/dbms/src/Storages/S3/S3RandomAccessFile.cpp +++ b/dbms/src/Storages/S3/S3RandomAccessFile.cpp @@ -206,6 +206,10 @@ bool S3RandomAccessFile::initialize() GET_METRIC(tiflash_storage_s3_request_seconds, type_get_object).Observe(sw.elapsedSeconds()); break; } + if (cur_retry >= max_retry && !request_succ) + { + LOG_INFO(log, "S3 GetObject timeout: {}, max_retry={}", remote_fname, max_retry); + } return request_succ; }