Skip to content

Commit

Permalink
[fix](olap) Set the original tablet state to TABLET_SHUTDOWN (#45812)
Browse files Browse the repository at this point in the history
Set the original tablet state to TABLET_SHUTDOWN when loading a new
tablet from the disk during the restore job. Otherwise, the other thread
may hold the old tablet object, and save meta too.
  • Loading branch information
w41ter authored Dec 25, 2024
1 parent 3216257 commit 190c12d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 13 deletions.
32 changes: 19 additions & 13 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ Status TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id,
Status TabletManager::_drop_tablet(TTabletId tablet_id, TReplicaId replica_id, bool keep_files,
bool is_drop_table_or_partition, bool had_held_shard_lock) {
LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id
<< ", is_drop_table_or_partition=" << is_drop_table_or_partition;
<< ", is_drop_table_or_partition=" << is_drop_table_or_partition
<< ", keep_files=" << keep_files;
DorisMetrics::instance()->drop_tablet_requests_total->increment(1);

RETURN_IF_ERROR(register_transition_tablet(tablet_id, "drop tablet"));
Expand Down Expand Up @@ -558,27 +559,32 @@ Status TabletManager::_drop_tablet(TTabletId tablet_id, TReplicaId replica_id, b

to_drop_tablet->clear_cache();

if (!keep_files) {
{
// drop tablet will update tablet meta, should lock
std::lock_guard<std::shared_mutex> wrlock(to_drop_tablet->get_header_lock());
SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
LOG(INFO) << "set tablet to shutdown state and remove it from memory. "
<< "tablet_id=" << tablet_id << ", tablet_path=" << to_drop_tablet->tablet_path();
// NOTE: has to update tablet here, but must not update tablet meta directly.
// because other thread may hold the tablet object, they may save meta too.
// If update meta directly here, other thread may override the meta
// and the tablet will be loaded at restart time.
// To avoid this exception, we first set the state of the tablet to `SHUTDOWN`.
//
// Until now, only the restore task uses keep files.
RETURN_IF_ERROR(to_drop_tablet->set_tablet_state(TABLET_SHUTDOWN));
// We must record unused remote rowsets path info to OlapMeta before tablet state is marked as TABLET_SHUTDOWN in OlapMeta,
// otherwise if BE shutdown after saving tablet state, these remote rowsets path info will lost.
if (is_drop_table_or_partition) {
RETURN_IF_ERROR(to_drop_tablet->remove_all_remote_rowsets());
}
to_drop_tablet->save_meta();
{
std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock);
_shutdown_tablets.push_back(to_drop_tablet);
if (!keep_files) {
LOG(INFO) << "set tablet to shutdown state and remove it from memory. "
<< "tablet_id=" << tablet_id
<< ", tablet_path=" << to_drop_tablet->tablet_path();
// We must record unused remote rowsets path info to OlapMeta before tablet state is marked as TABLET_SHUTDOWN in OlapMeta,
// otherwise if BE shutdown after saving tablet state, these remote rowsets path info will lost.
if (is_drop_table_or_partition) {
RETURN_IF_ERROR(to_drop_tablet->remove_all_remote_rowsets());
}
to_drop_tablet->save_meta();
{
std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock);
_shutdown_tablets.push_back(to_drop_tablet);
}
}
}

Expand Down
45 changes: 45 additions & 0 deletions be/test/olap/tablet_mgr_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,4 +549,49 @@ TEST_F(TabletMgrTest, FindTabletWithCompact) {
ASSERT_TRUE(trash_st.ok()) << trash_st;
}

TEST_F(TabletMgrTest, LoadTabletFromMeta) {
TTabletId tablet_id = 111;
TSchemaHash schema_hash = 3333;
TColumnType col_type;
col_type.__set_type(TPrimitiveType::SMALLINT);
TColumn col1;
col1.__set_column_name("col1");
col1.__set_column_type(col_type);
col1.__set_is_key(true);
std::vector<TColumn> cols;
cols.push_back(col1);
TTabletSchema tablet_schema;
tablet_schema.__set_short_key_column_count(1);
tablet_schema.__set_schema_hash(3333);
tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
tablet_schema.__set_storage_type(TStorageType::COLUMN);
tablet_schema.__set_columns(cols);
TCreateTabletReq create_tablet_req;
create_tablet_req.__set_tablet_schema(tablet_schema);
create_tablet_req.__set_tablet_id(111);
create_tablet_req.__set_version(2);
std::vector<DataDir*> data_dirs;
data_dirs.push_back(_data_dir);
RuntimeProfile profile("CreateTablet");
Status create_st =
k_engine->tablet_manager()->create_tablet(create_tablet_req, data_dirs, &profile);
EXPECT_TRUE(create_st == Status::OK());
TabletSharedPtr tablet = k_engine->tablet_manager()->get_tablet(111);
EXPECT_TRUE(tablet != nullptr);

std::string serialized_tablet_meta;
tablet->tablet_meta()->serialize(&serialized_tablet_meta);
bool update_meta = true;
bool force = true;
bool restore = false;
bool check_path = true;
Status st = _tablet_mgr->load_tablet_from_meta(_data_dir, tablet_id, schema_hash,
serialized_tablet_meta, update_meta, force,
restore, check_path);
ASSERT_TRUE(st.ok()) << st.to_string();

// After reload, the original tablet should not be allowed to save meta.
ASSERT_FALSE(tablet->do_tablet_meta_checkpoint());
}

} // namespace doris

0 comments on commit 190c12d

Please sign in to comment.