Skip to content

Commit

Permalink
add ut for do_delete_bitmap_integrity_check
Browse files Browse the repository at this point in the history
  • Loading branch information
bobhan1 committed Nov 18, 2024
1 parent 617cd38 commit 0965abb
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 21 deletions.
45 changes: 29 additions & 16 deletions cloud/src/recycler/checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,10 @@ int InstanceChecker::do_inverted_check() {
return num_file_leak > 0 ? 1 : check_ret;
}

std::string InstanceChecker::RowsetDigest::to_string() const {
return fmt::format("rowset_id={}, version=[{}-{}]", rowset_id, version.first, version.second);
}

int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) {
std::vector<RowsetDigest> tablet_rowsets;
// Get all visible rowsets of this tablet
Expand Down Expand Up @@ -802,9 +806,14 @@ int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) {
// Check delete bitmaps integrity of this tablet
int64_t abnormal_rowsets_num {0};
for (const auto& rowset : tablet_rowsets) {
if (rowset.version.second <= 1) {
// skip dummy rowset [0-1]
continue;
}
auto begin = meta_delete_bitmap_key({instance_id_, tablet_id, rowset.rowset_id, 0, 0});
auto end = meta_delete_bitmap_key({instance_id_, tablet_id, rowset.rowset_id,
std::numeric_limits<int64_t>::max(), 0});
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max()});
std::unique_ptr<RangeGetIterator> it;
err = txn->get(begin, end, &it);
if (err != TxnErrorCode::TXN_OK) {
Expand All @@ -816,35 +825,37 @@ int InstanceChecker::check_delete_bitmap_integrity(int64_t tablet_id) {
++abnormal_rowsets_num;
LOG(WARNING) << fmt::format(
"[delete bitmap checker] can't find corresponding delete bitmap for "
"rowset_id={}",
rowset.rowset_id);
"instance_id={}, tablet_id={}, {}",
instance_id_, tablet_id, rowset.to_string());
}
}

if (abnormal_rowsets_num > 0) {
LOG(WARNING) << fmt::format(
"[delete bitmap checker] can't find corresponding delete bitmap for {} "
"rowsets.",
abnormal_rowsets_num);
"[delete bitmap checker] can't find corresponding delete bitmap for "
"instance_id={}, tablet_id={}, abnormal_rowsets_num={}",
instance_id_, tablet_id, abnormal_rowsets_num);
return 1;
}

// 2. check the recycled rowsets' delete bitmap is cleared from ms

// 3. check that delete bitmaps of rowsets which has been compacted is pruned from ms

// 4. check that https://github.com/apache/doris/pull/40204 works as expected

LOG(INFO) << fmt::format(
"[delete bitmap checker] check delete bitmap integrity for tablet={} successfully.",
tablet_id);
return 0;
}

int InstanceChecker::do_delete_bitmap_integrity_check() {
// check that for every visible rowsets, there exists an corresponding delete bitmap in ms
return traverse_mow_tablet(
[&](int64_t tablet_id) { return check_delete_bitmap_integrity(tablet_id); });
}

int InstanceChecker::do_delete_bitmap_inverted_check() {
// TODO:
return 0;
}

int InstanceChecker::traverse_mow_tablet(const std::function<int(int64_t)>& check_func) {
bool succ {true};
std::unique_ptr<Transaction> txn;
TxnErrorCode err = txn_kv_->create_txn(&txn);
if (err != TxnErrorCode::TXN_OK) {
Expand Down Expand Up @@ -893,13 +904,15 @@ int InstanceChecker::traverse_mow_tablet(const std::function<int(int64_t)>& chec

// TODO(bobhan1): handle check result
int ret = check_func(tablet_id);
if (!ret) {
succ = false;
if (ret != 0) {
// TODO(bobhan1): return immediately on non-zero status
// or continue to check other tablet ?
return ret;
}
}
}
} while (it->more() && !stopped());
return succ;
return 0;
}

} // namespace doris::cloud
7 changes: 6 additions & 1 deletion cloud/src/recycler/checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class InstanceChecker {
// Return negative if a temporary error occurred during the check process.
int do_delete_bitmap_inverted_check();

// 4. check that https://github.com/apache/doris/pull/40204 works as expected

// If there are multiple buckets, return the minimum lifecycle; if there are no buckets (i.e.
// all accessors are HdfsAccessor), return INT64_MAX.
// Return 0 if success, otherwise error
Expand All @@ -112,9 +114,12 @@ class InstanceChecker {
// returns 0 for success otherwise error
int init_storage_vault_accessors(const InstanceInfoPB& instance);

using Version = std::pair<int64_t, int64_t>;
struct RowsetDigest {
std::string rowset_id;
std::pair<int64_t, int64_t> version;
Version version;

std::string to_string() const;
};

int traverse_mow_tablet(const std::function<int(int64_t)>& check_func);
Expand Down
9 changes: 6 additions & 3 deletions cloud/src/recycler/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,16 @@ int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t table
}

std::string key, val;
meta_tablet_key({instance_id, tablet_idx.tablet_id(), tablet_idx.index_id(),
meta_tablet_key({instance_id, tablet_idx.table_id(), tablet_idx.index_id(),
tablet_idx.partition_id(), tablet_id},
&key);
err = txn->get(key, &val);
if (err != TxnErrorCode::TXN_OK) {
LOG(WARNING) << fmt::format("failed to get tablet, err={} tablet_id={} key={}", err,
tablet_id, hex(key));
LOG(WARNING) << fmt::format(
"failed to get tablet, err={}, table_id={}, index_id={}, partition_id={}, "
"tablet_id={} key={}",
err, tablet_idx.table_id(), tablet_idx.index_id(), tablet_idx.partition_id(),
tablet_id, hex(key));
return -1;
}
if (!tablet_meta.ParseFromString(val)) [[unlikely]] {
Expand Down
135 changes: 134 additions & 1 deletion cloud/test/recycler_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,52 @@ static int create_committed_rowset(TxnKv* txn_kv, StorageVaultAccessor* accessor
return 0;
}

static int create_committed_rowset_with_rowset_id(TxnKv* txn_kv, StorageVaultAccessor* accessor,
const std::string& resource_id, int64_t tablet_id,
int64_t version, std::string rowset_id,
int num_segments = 1) {
std::string key;
std::string val;

MetaRowsetKeyInfo key_info {instance_id, tablet_id, version};
meta_rowset_key(key_info, &key);

doris::RowsetMetaCloudPB rowset_pb;
rowset_pb.set_rowset_id(0); // useless but required
rowset_pb.set_rowset_id_v2(rowset_id);
rowset_pb.set_num_segments(num_segments);
rowset_pb.set_tablet_id(tablet_id);
rowset_pb.set_resource_id(resource_id);
rowset_pb.set_creation_time(current_time);
rowset_pb.set_start_version(version);
rowset_pb.set_end_version(version);
rowset_pb.SerializeToString(&val);

std::unique_ptr<Transaction> txn;
if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
return -1;
}
txn->put(key, val);
if (txn->commit() != TxnErrorCode::TXN_OK) {
return -1;
}

for (int i = 0; i < num_segments; ++i) {
auto path = segment_path(tablet_id, rowset_id, i);
accessor->put_file(path, "");
}
return 0;
}

static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id, int64_t partition_id,
int64_t tablet_id) {
int64_t tablet_id, bool is_mow = false) {
std::unique_ptr<Transaction> txn;
if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
return -1;
}
doris::TabletMetaCloudPB tablet_meta;
tablet_meta.set_tablet_id(tablet_id);
tablet_meta.set_enable_unique_key_merge_on_write(is_mow);
auto val = tablet_meta.SerializeAsString();
auto key = meta_tablet_key({instance_id, table_id, index_id, partition_id, tablet_id});
txn->put(key, val);
Expand All @@ -275,6 +313,7 @@ static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id, int6
TabletIndexPB tablet_idx_pb;
tablet_idx_pb.set_db_id(db_id);
tablet_idx_pb.set_table_id(table_id);
tablet_idx_pb.set_index_id(index_id);
tablet_idx_pb.set_partition_id(partition_id);
tablet_idx_pb.set_tablet_id(tablet_id);
auto idx_val = tablet_idx_pb.SerializeAsString();
Expand Down Expand Up @@ -2576,6 +2615,100 @@ TEST(CheckerTest, do_inspect) {
}
}

TEST(CheckerTest, delete_bitmap_integrity_check_normal) {
// normal case, all rowsets have corresponding delete bitmaps
auto txn_kv = std::make_shared<MemTxnKv>();
ASSERT_EQ(txn_kv->init(), 0);

InstanceInfoPB instance;
instance.set_instance_id(instance_id);
auto obj_info = instance.add_obj_info();
obj_info->set_id("1");

InstanceChecker checker(txn_kv, instance_id);
ASSERT_EQ(checker.init(instance), 0);
auto accessor = checker.accessor_map_.begin()->second;

std::unique_ptr<Transaction> txn;
ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));

constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
// create some rowsets with delete bitmaps in merge-on-write tablet
for (int tablet_id = 300001; tablet_id <= 300010; ++tablet_id) {
ASSERT_EQ(0,
create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, true));
for (int64_t rowset_id = 100; rowset_id <= 110; rowset_id++) {
for (int ver = 2; ver < 10; ++ver) {
create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id,
ver, std::to_string(rowset_id), 1);
auto delete_bitmap_key = meta_delete_bitmap_key(
{instance_id, tablet_id, std::to_string(rowset_id), ver, 0});
std::string delete_bitmap_val {"test"};
txn->put(delete_bitmap_key, delete_bitmap_val);
}
}
}

// also create some rowsets without delete bitmaps in non merge-on-write tablet
for (int tablet_id = 400001; tablet_id <= 400010; ++tablet_id) {
ASSERT_EQ(0,
create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, false));
for (int64_t rowset_id = 200; rowset_id <= 210; rowset_id++) {
for (int ver = 2; ver < 10; ++ver) {
create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id,
ver, std::to_string(rowset_id), 1);
}
}
}

// TODO(bobhan1): add splitted delete bitmap keys
ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());

ASSERT_EQ(checker.do_delete_bitmap_integrity_check(), 0);
}

TEST(CheckerTest, delete_bitmap_integrity_check_abnormal) {
// abnormal case, some rowsets don't have corresponding delete bitmaps
auto txn_kv = std::make_shared<MemTxnKv>();
ASSERT_EQ(txn_kv->init(), 0);

InstanceInfoPB instance;
instance.set_instance_id(instance_id);
auto obj_info = instance.add_obj_info();
obj_info->set_id("1");

InstanceChecker checker(txn_kv, instance_id);
ASSERT_EQ(checker.init(instance), 0);
auto accessor = checker.accessor_map_.begin()->second;

std::unique_ptr<Transaction> txn;
ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));

constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
// create some rowsets, some with delete bitmaps, some without delete bitmaps in merge-on-write tablet
for (int tablet_id = 300001; tablet_id <= 300010; ++tablet_id) {
ASSERT_EQ(0,
create_tablet(txn_kv.get(), table_id, index_id, partition_id, tablet_id, true));
for (int64_t rowset_id = 100; rowset_id <= 110; rowset_id++) {
for (int ver = 2; ver < 10; ++ver) {
create_committed_rowset_with_rowset_id(txn_kv.get(), accessor.get(), "1", tablet_id,
ver, std::to_string(rowset_id), 1);
if (rowset_id <= 107) {
auto delete_bitmap_key = meta_delete_bitmap_key(
{instance_id, tablet_id, std::to_string(rowset_id), ver, 0});
std::string delete_bitmap_val {"test"};
txn->put(delete_bitmap_key, delete_bitmap_val);
}
}
}
}

// TODO(bobhan1): add splitted delete bitmap keys
ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());

ASSERT_NE(checker.do_delete_bitmap_integrity_check(), 0);
}

TEST(RecyclerTest, delete_rowset_data) {
auto txn_kv = std::make_shared<MemTxnKv>();
ASSERT_EQ(txn_kv->init(), 0);
Expand Down

0 comments on commit 0965abb

Please sign in to comment.