From 0965abb534cfaf5767ee31c696a7e4f661013863 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 18 Nov 2024 12:20:38 +0800 Subject: [PATCH] add ut for do_delete_bitmap_integrity_check --- cloud/src/recycler/checker.cpp | 45 +++++++---- cloud/src/recycler/checker.h | 7 +- cloud/src/recycler/util.cpp | 9 ++- cloud/test/recycler_test.cpp | 135 ++++++++++++++++++++++++++++++++- 4 files changed, 175 insertions(+), 21 deletions(-) diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp index 298796595d64376..8ea6315418049fd 100644 --- a/cloud/src/recycler/checker.cpp +++ b/cloud/src/recycler/checker.cpp @@ -760,6 +760,10 @@ int InstanceChecker::do_inverted_check() { return num_file_leak > 0 ? 1 : check_ret; } +std::string InstanceChecker::RowsetDigest::to_string() const { + return fmt::format("rowset_id={}, version=[{}-{}]", rowset_id, version.first, version.second); +} + int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) { std::vector tablet_rowsets; // Get all visible rowsets of this tablet @@ -802,9 +806,14 @@ int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) { // Check delete bitmaps integrity of this tablet int64_t abnormal_rowsets_num {0}; for (const auto& rowset : tablet_rowsets) { + if (rowset.version.second <= 1) { + // skip dummy rowset [0-1] + continue; + } auto begin = meta_delete_bitmap_key({instance_id_, tablet_id, rowset.rowset_id, 0, 0}); auto end = meta_delete_bitmap_key({instance_id_, tablet_id, rowset.rowset_id, - std::numeric_limits::max(), 0}); + std::numeric_limits::max(), + std::numeric_limits::max()}); std::unique_ptr it; err = txn->get(begin, end, &it); if (err != TxnErrorCode::TXN_OK) { @@ -816,35 +825,37 @@ int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) { ++abnormal_rowsets_num; LOG(WARNING) << fmt::format( "[delete bitmap checker] can't find corresponding delete bitmap for " - "rowset_id={}", - rowset.rowset_id); + "instance_id={}, tablet_id={}, {}", + instance_id_, tablet_id, rowset.to_string()); } } if (abnormal_rowsets_num > 0) { LOG(WARNING) << fmt::format( - "[delete bitmap checker] can't find corresponding delete bitmap for {} " - "rowsets.", - abnormal_rowsets_num); + "[delete bitmap checker] can't find corresponding delete bitmap for " + "instance_id={}, tablet_id={}, abnormal_rowsets_num={}", + instance_id_, tablet_id, abnormal_rowsets_num); return 1; } - // 2. check the recycled rowsets' delete bitmap is cleared from ms - - // 3. check that delete bitmaps of rowsets which has been compacted is pruned from ms - - // 4. check that https://github.com/apache/doris/pull/40204 works as expected - + LOG(INFO) << fmt::format( + "[delete bitmap checker] check delete bitmap integrity for tablet={} successfully.", + tablet_id); return 0; } int InstanceChecker::do_delete_bitmap_integrity_check() { + // check that for every visible rowsets, there exists an corresponding delete bitmap in ms return traverse_mow_tablet( [&](int64_t tablet_id) { return check_delete_bitmap_integrity(tablet_id); }); } +int InstanceChecker::do_delete_bitmap_inverted_check() { + // TODO: + return 0; +} + int InstanceChecker::traverse_mow_tablet(const std::function& check_func) { - bool succ {true}; std::unique_ptr txn; TxnErrorCode err = txn_kv_->create_txn(&txn); if (err != TxnErrorCode::TXN_OK) { @@ -893,13 +904,15 @@ int InstanceChecker::traverse_mow_tablet(const std::function& chec // TODO(bobhan1): handle check result int ret = check_func(tablet_id); - if (!ret) { - succ = false; + if (ret != 0) { + // TODO(bobhan1): return immediately on non-zero status + // or continue to check other tablet ? + return ret; } } } } while (it->more() && !stopped()); - return succ; + return 0; } } // namespace doris::cloud diff --git a/cloud/src/recycler/checker.h b/cloud/src/recycler/checker.h index 444e2ae13f88814..fbd3aec946fc8ab 100644 --- a/cloud/src/recycler/checker.h +++ b/cloud/src/recycler/checker.h @@ -98,6 +98,8 @@ class InstanceChecker { // Return negative if a temporary error occurred during the check process. int do_delete_bitmap_inverted_check(); + // 4. check that https://github.com/apache/doris/pull/40204 works as expected + // If there are multiple buckets, return the minimum lifecycle; if there are no buckets (i.e. // all accessors are HdfsAccessor), return INT64_MAX. // Return 0 if success, otherwise error @@ -112,9 +114,12 @@ class InstanceChecker { // returns 0 for success otherwise error int init_storage_vault_accessors(const InstanceInfoPB& instance); + using Version = std::pair; struct RowsetDigest { std::string rowset_id; - std::pair version; + Version version; + + std::string to_string() const; }; int traverse_mow_tablet(const std::function& check_func); diff --git a/cloud/src/recycler/util.cpp b/cloud/src/recycler/util.cpp index ae3d15215bcfbf3..6797782d5d665c3 100644 --- a/cloud/src/recycler/util.cpp +++ b/cloud/src/recycler/util.cpp @@ -280,13 +280,16 @@ int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t table } std::string key, val; - meta_tablet_key({instance_id, tablet_idx.tablet_id(), tablet_idx.index_id(), + meta_tablet_key({instance_id, tablet_idx.table_id(), tablet_idx.index_id(), tablet_idx.partition_id(), tablet_id}, &key); err = txn->get(key, &val); if (err != TxnErrorCode::TXN_OK) { - LOG(WARNING) << fmt::format("failed to get tablet, err={} tablet_id={} key={}", err, - tablet_id, hex(key)); + LOG(WARNING) << fmt::format( + "failed to get tablet, err={}, table_id={}, index_id={}, partition_id={}, " + "tablet_id={} key={}", + err, tablet_idx.table_id(), tablet_idx.index_id(), tablet_idx.partition_id(), + tablet_id, hex(key)); return -1; } if (!tablet_meta.ParseFromString(val)) [[unlikely]] { diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index 146873548398bf0..c369339e949c87f 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -255,14 +255,52 @@ static int create_committed_rowset(TxnKv* txn_kv, StorageVaultAccessor* accessor return 0; } +static int create_committed_rowset_with_rowset_id(TxnKv* txn_kv, StorageVaultAccessor* accessor, + const std::string& resource_id, int64_t tablet_id, + int64_t version, std::string rowset_id, + int num_segments = 1) { + std::string key; + std::string val; + + MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; + meta_rowset_key(key_info, &key); + + doris::RowsetMetaCloudPB rowset_pb; + rowset_pb.set_rowset_id(0); // useless but required + rowset_pb.set_rowset_id_v2(rowset_id); + rowset_pb.set_num_segments(num_segments); + rowset_pb.set_tablet_id(tablet_id); + rowset_pb.set_resource_id(resource_id); + rowset_pb.set_creation_time(current_time); + rowset_pb.set_start_version(version); + rowset_pb.set_end_version(version); + rowset_pb.SerializeToString(&val); + + std::unique_ptr txn; + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + for (int i = 0; i < num_segments; ++i) { + auto path = segment_path(tablet_id, rowset_id, i); + accessor->put_file(path, ""); + } + return 0; +} + static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id, int64_t partition_id, - int64_t tablet_id) { + int64_t tablet_id, bool is_mow = false) { std::unique_ptr txn; if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { return -1; } doris::TabletMetaCloudPB tablet_meta; tablet_meta.set_tablet_id(tablet_id); + tablet_meta.set_enable_unique_key_merge_on_write(is_mow); auto val = tablet_meta.SerializeAsString(); auto key = meta_tablet_key({instance_id, table_id, index_id, partition_id, tablet_id}); txn->put(key, val); @@ -275,6 +313,7 @@ static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id, int6 TabletIndexPB tablet_idx_pb; tablet_idx_pb.set_db_id(db_id); tablet_idx_pb.set_table_id(table_id); + tablet_idx_pb.set_index_id(index_id); tablet_idx_pb.set_partition_id(partition_id); tablet_idx_pb.set_tablet_id(tablet_id); auto idx_val = tablet_idx_pb.SerializeAsString(); @@ -2576,6 +2615,100 @@ TEST(CheckerTest, do_inspect) { } } +TEST(CheckerTest, delete_bitmap_integrity_check_normal) { + // normal case, all rowsets have corresponding delete bitmaps + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + auto accessor = checker.accessor_map_.begin()->second; + + std::unique_ptr txn; + ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn)); + + constexpr int table_id = 10000, index_id = 10001, partition_id = 10002; + // create some rowsets with delete bitmaps in merge-on-write tablet + for (int tablet_id = 300001; tablet_id <= 300010; ++tablet_id) { + ASSERT_EQ(0, + create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, true)); + for (int64_t rowset_id = 100; rowset_id <= 110; rowset_id++) { + for (int ver = 2; ver < 10; ++ver) { + create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id, + ver, std::to_string(rowset_id), 1); + auto delete_bitmap_key = meta_delete_bitmap_key( + {instance_id, tablet_id, std::to_string(rowset_id), ver, 0}); + std::string delete_bitmap_val {"test"}; + txn->put(delete_bitmap_key, delete_bitmap_val); + } + } + } + + // also create some rowsets without delete bitmaps in non merge-on-write tablet + for (int tablet_id = 400001; tablet_id <= 400010; ++tablet_id) { + ASSERT_EQ(0, + create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, false)); + for (int64_t rowset_id = 200; rowset_id <= 210; rowset_id++) { + for (int ver = 2; ver < 10; ++ver) { + create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id, + ver, std::to_string(rowset_id), 1); + } + } + } + + // TODO(bobhan1): add splitted delete bitmap keys + ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit()); + + ASSERT_EQ(checker.do_delete_bitmap_integrity_check(), 0); +} + +TEST(CheckerTest, delete_bitmap_integrity_check_abnormal) { + // abnormal case, some rowsets don't have corresponding delete bitmaps + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + auto accessor = checker.accessor_map_.begin()->second; + + std::unique_ptr txn; + ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn)); + + constexpr int table_id = 10000, index_id = 10001, partition_id = 10002; + // create some rowsets, some with delete bitmaps, some without delete bitmaps in merge-on-write tablet + for (int tablet_id = 300001; tablet_id <= 300010; ++tablet_id) { + ASSERT_EQ(0, + create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, true)); + for (int64_t rowset_id = 100; rowset_id <= 110; rowset_id++) { + for (int ver = 2; ver < 10; ++ver) { + create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id, + ver, std::to_string(rowset_id), 1); + if (rowset_id <= 107) { + auto delete_bitmap_key = meta_delete_bitmap_key( + {instance_id, tablet_id, std::to_string(rowset_id), ver, 0}); + std::string delete_bitmap_val {"test"}; + txn->put(delete_bitmap_key, delete_bitmap_val); + } + } + } + } + + // TODO(bobhan1): add splitted delete bitmap keys + ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit()); + + ASSERT_NE(checker.do_delete_bitmap_integrity_check(), 0); +} + TEST(RecyclerTest, delete_rowset_data) { auto txn_kv = std::make_shared(); ASSERT_EQ(txn_kv->init(), 0);