From 42543b667d8ba5fc7915393b7e75bc60ad2c8721 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Thu, 23 Jun 2022 18:32:25 -0700 Subject: [PATCH] Fix race condition between file purge and backup/checkpoint (#10187) Summary: Resolves https://github.com/facebook/rocksdb/issues/10129 I extracted this fix from https://github.com/facebook/rocksdb/issues/7516 since it's also already a bug in main branch, and we want to separate it from the main part of the PR. There can be a race condition between two threads. Thread 1 executes `DBImpl::FindObsoleteFiles()` while thread 2 executes `GetSortedWals()`. ``` Time thread 1 thread 2 | mutex_.lock | read disable_delete_obsolete_files_ | ... | wait on log_sync_cv and release mutex_ | mutex_.lock | ++disable_delete_obsolete_files_ | mutex_.unlock | mutex_.lock | while (pending_purge_obsolete_files > 0) { bg_cv.wait;} | wake up with mutex_ locked | compute WALs tracked by MANIFEST | mutex_.unlock | wake up with mutex_ locked | ++pending_purge_obsolete_files_ | mutex_.unlock | | delete obsolete WAL | WAL missing but tracked in MANIFEST. V ``` The fix proposed eliminates the possibility of the above by increasing `pending_purge_obsolete_files_` before `FindObsoleteFiles()` can possibly release the mutex. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10187 Test Plan: make check Reviewed By: ltamasi Differential Revision: D37214235 Pulled By: riversand963 fbshipit-source-id: 556ab1b58ae6d19150169dfac4db08195c797184 Signed-off-by: tabokie --- HISTORY.md | 1 + db/db_impl/db_impl_files.cc | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 72af52006fc..bb3b0460630 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Fix unprotected concurrent accesses to `WritableFileWriter::filesize_` by `DB::SyncWAL()` and `DB::Put()` in two write queue mode. * Fix a bug in WAL tracking. Before this PR (#10087), calling `SyncWAL()` on the only WAL file of the db will not log the event in MANIFEST, thus allowing a subsequent `DB::Open` even if the WAL file is missing or corrupted. * Fixed a possible corruption for users of `manual_wal_flush` and/or `FlushWAL(true /* sync */)`, together with `track_and_verify_wals_in_manifest == true`. For those users, losing unsynced data (e.g., due to power loss) could make future DB opens fail with a `Status::Corruption` complaining about missing WAL data. +* Fix a bug in which backup/checkpoint can include a WAL deleted by RocksDB. ### Public API changes * Remove ReadOptions::iter_start_seqnum which has been deprecated. diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index ccd9aa3dde7..b0187d4f73e 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -19,6 +19,7 @@ #include "logging/logging.h" #include "port/port.h" #include "util/autovector.h" +#include "util/defer.h" namespace ROCKSDB_NAMESPACE { @@ -252,6 +253,22 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, job_context->blob_delete_files); } + // Before potentially releasing mutex and waiting on condvar, increment + // pending_purge_obsolete_files_ so that another thread executing + // `GetSortedWals` will wait until this thread finishes execution since the + // other thread will be waiting for `pending_purge_obsolete_files_`. + // pending_purge_obsolete_files_ MUST be decremented if there is nothing to + // delete. + ++pending_purge_obsolete_files_; + + Defer cleanup([job_context, this]() { + assert(job_context != nullptr); + if (!job_context->HaveSomethingToDelete()) { + mutex_.AssertHeld(); + --pending_purge_obsolete_files_; + } + }); + // logs_ is empty when called during recovery, in which case there can't yet // be any tracked obsolete logs if (!alive_log_files_.empty() && !logs_.empty()) { @@ -308,9 +325,6 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, job_context->logs_to_free = logs_to_free_; job_context->log_recycle_files.assign(log_recycle_files_.begin(), log_recycle_files_.end()); - if (job_context->HaveSomethingToDelete()) { - ++pending_purge_obsolete_files_; - } logs_to_free_.clear(); }