From fb6fd087ed4eda475baee597c8e4527b1ad0ecf9 Mon Sep 17 00:00:00 2001 From: ywz <649521587@qq.com> Date: Wed, 12 Feb 2025 16:54:43 +0800 Subject: [PATCH] Add function to support purging existing logs. (#643) This change is necessary for baseline resync and can be called by the upper layer to purge existing logs, which resolves the following issue: If a follower restarts during baseline resync, it will replay the remaining logs first. However, shard info has already been cleared at the beginning of resync (from the HO side), making it impossible to retrieve shard info while replaying logs, which results in errors. Co-authored-by: yawzhang --- src/include/homestore/replication/repl_dev.h | 3 +++ src/lib/replication/log_store/home_raft_log_store.cpp | 7 +++++++ src/lib/replication/log_store/home_raft_log_store.h | 6 ++++++ src/lib/replication/repl_dev/raft_repl_dev.h | 4 ++++ src/lib/replication/repl_dev/solo_repl_dev.h | 1 + 5 files changed, 21 insertions(+) diff --git a/src/include/homestore/replication/repl_dev.h b/src/include/homestore/replication/repl_dev.h index d05be3fde..937450336 100644 --- a/src/include/homestore/replication/repl_dev.h +++ b/src/include/homestore/replication/repl_dev.h @@ -477,6 +477,9 @@ class ReplDev { /// @return true if ready, false otherwise virtual bool is_ready_for_traffic() const = 0; + /// @brief Clean up resources on this repl dev. + virtual void purge() = 0; + virtual void attach_listener(shared< ReplDevListener > listener) { m_listener = std::move(listener); } virtual void detach_listener() { diff --git a/src/lib/replication/log_store/home_raft_log_store.cpp b/src/lib/replication/log_store/home_raft_log_store.cpp index 55cd690e4..be7039059 100644 --- a/src/lib/replication/log_store/home_raft_log_store.cpp +++ b/src/lib/replication/log_store/home_raft_log_store.cpp @@ -380,6 +380,13 @@ ulong HomeRaftLogStore::last_durable_index() { return to_repl_lsn(m_last_durable_lsn); } +void HomeRaftLogStore::purge_all_logs() { + auto last_lsn = m_log_store->get_contiguous_issued_seq_num(m_last_durable_lsn); + REPL_STORE_LOG(INFO, "Store={} LogDev={}: Purging all logs in the log store, last_lsn={}", + m_logstore_id, m_logdev_id, last_lsn); + m_log_store->truncate(last_lsn, false /* in_memory_truncate_only */); +} + void HomeRaftLogStore::wait_for_log_store_ready() { m_log_store_future.wait(); } void HomeRaftLogStore::set_last_durable_lsn(repl_lsn_t lsn) { m_last_durable_lsn = to_store_lsn(lsn); } diff --git a/src/lib/replication/log_store/home_raft_log_store.h b/src/lib/replication/log_store/home_raft_log_store.h index d2c0fd57b..7fb96a5d4 100644 --- a/src/lib/replication/log_store/home_raft_log_store.h +++ b/src/lib/replication/log_store/home_raft_log_store.h @@ -215,6 +215,12 @@ class HomeRaftLogStore : public nuraft::log_store { void truncate(uint32_t num_reserved_cnt, repl_lsn_t compact_lsn); #endif + /** + * Purge all logs in the log store + * It is a dangerous operation and is only used in baseline resync now (purge all logs and restore by snapshot). + */ + void purge_all_logs(); + void wait_for_log_store_ready(); void set_last_durable_lsn(repl_lsn_t lsn); diff --git a/src/lib/replication/repl_dev/raft_repl_dev.h b/src/lib/replication/repl_dev/raft_repl_dev.h index 619da7843..b6cd9d744 100644 --- a/src/lib/replication/repl_dev/raft_repl_dev.h +++ b/src/lib/replication/repl_dev/raft_repl_dev.h @@ -198,6 +198,10 @@ class RaftReplDev : public ReplDev, if (!ready) { RD_LOGD("Not yet ready for traffic, committed to {} but gate is {}", committed_lsn, gate); } return ready; } + void purge() override { + // clean up existing logs in log store + m_data_journal->purge_all_logs(); + } //////////////// Accessor/shortcut methods /////////////////////// nuraft_mesg::repl_service_ctx* group_msg_service(); diff --git a/src/lib/replication/repl_dev/solo_repl_dev.h b/src/lib/replication/repl_dev/solo_repl_dev.h index e5f33fb63..f252dd209 100644 --- a/src/lib/replication/repl_dev/solo_repl_dev.h +++ b/src/lib/replication/repl_dev/solo_repl_dev.h @@ -54,6 +54,7 @@ class SoloReplDev : public ReplDev { return std::vector< peer_info >{peer_info{.id_ = m_group_id, .replication_idx_ = 0, .last_succ_resp_us_ = 0}}; } bool is_ready_for_traffic() const override { return true; } + void purge() override {} uuid_t group_id() const override { return m_group_id; }