Skip to content

Commit 43b239a

Browse files
committed
MDEV-24915 Galera conflict resolution is unnecessarily complex
The fix of MDEV-23328 introduced a background thread for killing conflicting transactions. Thanks to the refactoring that was conducted in MDEV-24671, the high-priority ("brute-force") applier thread can kill the conflicting transactions itself, before waiting for the locks to be finally released (after the conflicting transactions have been rolled back). This also allows us to remove the hack LockGGuard that had to be added in MDEV-20612, and remove Galera-related function parameters from lock creation.
1 parent 18dc5b0 commit 43b239a

File tree

6 files changed

+128
-491
lines changed

6 files changed

+128
-491
lines changed

Diff for: storage/innobase/handler/ha_innodb.cc

+58-154
Original file line numberDiff line numberDiff line change
@@ -17991,162 +17991,66 @@ static struct st_mysql_storage_engine innobase_storage_engine=
1799117991
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
1799217992

1799317993
#ifdef WITH_WSREP
17994-
17995-
struct bg_wsrep_kill_trx_arg {
17996-
my_thread_id thd_id, bf_thd_id;
17997-
trx_id_t trx_id, bf_trx_id;
17998-
bool signal;
17999-
};
18000-
18001-
/** Kill one transaction from a background manager thread
18002-
18003-
wsrep_innobase_kill_one_trx() is invoked when lock_sys.mutex and trx mutex
18004-
are taken, wsrep_thd_bf_abort() cannot be used there as it takes THD mutexes
18005-
that must be taken before lock_sys.mutex and trx mutex. That's why
18006-
wsrep_innobase_kill_one_trx only posts the killing task to the manager thread
18007-
and the actual killing happens asynchronously here.
18008-
18009-
As no mutexes were held we don't know whether THD or trx pointers are still
18010-
valid, so we need to pass thread/trx ids and perform a lookup.
18011-
*/
18012-
static void bg_wsrep_kill_trx(void *void_arg)
18013-
{
18014-
bg_wsrep_kill_trx_arg *arg= (bg_wsrep_kill_trx_arg *)void_arg;
18015-
THD *thd, *bf_thd;
18016-
trx_t *victim_trx;
18017-
bool aborting= false;
18018-
18019-
if ((bf_thd= find_thread_by_id(arg->bf_thd_id)))
18020-
wsrep_thd_LOCK(bf_thd);
18021-
if ((thd= find_thread_by_id(arg->thd_id)))
18022-
wsrep_thd_LOCK(thd);
18023-
18024-
if (!thd || !bf_thd || !(victim_trx= thd_to_trx(thd)))
18025-
goto ret0;
18026-
18027-
lock_sys.wr_lock(SRW_LOCK_CALL);
18028-
mysql_mutex_lock(&lock_sys.wait_mutex);
18029-
victim_trx->mutex_lock();
18030-
if (victim_trx->id != arg->trx_id ||
18031-
victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
18032-
{
18033-
/* apparently victim trx was meanwhile rolled back. */
18034-
goto ret1;
18035-
}
18036-
18037-
DBUG_ASSERT(wsrep_on(bf_thd));
18038-
18039-
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
18040-
18041-
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
18042-
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
18043-
"query: %s",
18044-
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
18045-
arg->bf_trx_id,
18046-
thd_get_thread_id(bf_thd),
18047-
wsrep_thd_trx_seqno(bf_thd),
18048-
wsrep_thd_client_state_str(bf_thd),
18049-
wsrep_thd_client_mode_str(bf_thd),
18050-
wsrep_thd_transaction_state_str(bf_thd),
18051-
wsrep_thd_query(bf_thd));
18052-
18053-
WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld "
18054-
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
18055-
"query: %s",
18056-
wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
18057-
victim_trx->id,
18058-
thd_get_thread_id(thd),
18059-
wsrep_thd_trx_seqno(thd),
18060-
wsrep_thd_client_state_str(thd),
18061-
wsrep_thd_client_mode_str(thd),
18062-
wsrep_thd_transaction_state_str(thd),
18063-
wsrep_thd_query(thd));
18064-
18065-
/* Mark transaction as a victim for Galera abort */
18066-
victim_trx->lock.was_chosen_as_deadlock_victim.fetch_or(2);
18067-
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
18068-
{
18069-
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
18070-
goto ret1;
18071-
}
18072-
18073-
aborting= true;
18074-
18075-
ret1:
18076-
victim_trx->mutex_unlock();
18077-
lock_sys.wr_unlock();
18078-
mysql_mutex_unlock(&lock_sys.wait_mutex);
18079-
ret0:
18080-
if (thd) {
18081-
wsrep_thd_UNLOCK(thd);
18082-
if (aborting) {
18083-
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
18084-
wsrep_thd_bf_abort(bf_thd, thd, arg->signal);
18085-
}
18086-
wsrep_thd_kill_UNLOCK(thd);
18087-
}
18088-
if (bf_thd) {
18089-
wsrep_thd_UNLOCK(bf_thd);
18090-
wsrep_thd_kill_UNLOCK(bf_thd);
18091-
}
18092-
free(arg);
18093-
}
18094-
18095-
/** This function is used to kill one transaction.
18096-
18097-
This transaction was open on this node (not-yet-committed), and a
18098-
conflicting writeset from some other node that was being applied
18099-
caused a locking conflict. First committed (from other node)
18100-
wins, thus open transaction is rolled back. BF stands for
18101-
brute-force: any transaction can get aborted by galera any time
18102-
it is necessary.
18103-
18104-
This conflict can happen only when the replicated writeset (from
18105-
other node) is being applied, not when it’s waiting in the queue.
18106-
If our local transaction reached its COMMIT and this conflicting
18107-
writeset was in the queue, then it should fail the local
18108-
certification test instead.
18109-
18110-
A brute force abort is only triggered by a locking conflict
18111-
between a writeset being applied by an applier thread (slave thread)
18112-
and an open transaction on the node, not by a Galera writeset
18113-
comparison as in the local certification failure.
18114-
18115-
@param[in] bf_thd Brute force (BF) thread
18116-
@param[in,out] victim_trx Vimtim trx to be killed
18117-
@param[in] signal Should victim be signaled */
18118-
void
18119-
wsrep_innobase_kill_one_trx(
18120-
THD* bf_thd,
18121-
trx_t *victim_trx,
18122-
bool signal)
17994+
/** Request a transaction to be killed that holds a conflicting lock.
17995+
@param bf_trx brute force applier transaction
17996+
@param thd_id thd_get_thread_id(victim_trx->mysql_htd)
17997+
@param trx_id victim_trx->id */
17998+
void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id)
1812317999
{
18124-
ut_ad(bf_thd);
18125-
ut_ad(victim_trx);
18126-
ut_ad(victim_trx->mutex_is_owner());
18127-
18128-
DBUG_ENTER("wsrep_innobase_kill_one_trx");
18129-
18130-
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
18131-
{
18132-
const char act[]=
18133-
"now "
18134-
"SIGNAL sync.before_wsrep_thd_abort_reached "
18135-
"WAIT_FOR signal.before_wsrep_thd_abort";
18136-
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
18137-
STRING_WITH_LEN(act)));
18138-
};);
18139-
18140-
trx_t* bf_trx= thd_to_trx(bf_thd);
18141-
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
18142-
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
18143-
arg->trx_id = victim_trx->id;
18144-
arg->bf_thd_id = thd_get_thread_id(bf_thd);
18145-
arg->bf_trx_id = bf_trx ? bf_trx->id : TRX_ID_MAX;
18146-
arg->signal = signal;
18147-
mysql_manager_submit(bg_wsrep_kill_trx, arg);
18000+
THD *bf_thd= bf_trx->mysql_thd;
1814818001

18149-
DBUG_VOID_RETURN;
18002+
if (THD *vthd= find_thread_by_id(thd_id))
18003+
{
18004+
bool aborting= false;
18005+
wsrep_thd_LOCK(vthd);
18006+
if (trx_t *vtrx= thd_to_trx(vthd))
18007+
{
18008+
lock_sys.wr_lock(SRW_LOCK_CALL);
18009+
mysql_mutex_lock(&lock_sys.wait_mutex);
18010+
vtrx->mutex_lock();
18011+
if (vtrx->id == trx_id && vtrx->state == TRX_STATE_ACTIVE)
18012+
{
18013+
WSREP_LOG_CONFLICT(bf_thd, vthd, TRUE);
18014+
WSREP_DEBUG("Aborter BF trx_id: " TRX_ID_FMT " thread: %ld "
18015+
"seqno: %lld client_state: %s "
18016+
"client_mode: %s transaction_mode: %s query: %s",
18017+
bf_trx->id,
18018+
thd_get_thread_id(bf_thd),
18019+
wsrep_thd_trx_seqno(bf_thd),
18020+
wsrep_thd_client_state_str(bf_thd),
18021+
wsrep_thd_client_mode_str(bf_thd),
18022+
wsrep_thd_transaction_state_str(bf_thd),
18023+
wsrep_thd_query(bf_thd));
18024+
WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld "
18025+
"seqno: %lld client_state: %s "
18026+
"client_mode: %s transaction_mode: %s query: %s",
18027+
wsrep_thd_is_BF(vthd, false) ? "BF" : "normal",
18028+
vtrx->id,
18029+
thd_get_thread_id(vthd),
18030+
wsrep_thd_trx_seqno(vthd),
18031+
wsrep_thd_client_state_str(vthd),
18032+
wsrep_thd_client_mode_str(vthd),
18033+
wsrep_thd_transaction_state_str(vthd),
18034+
wsrep_thd_query(vthd));
18035+
/* Mark transaction as a victim for Galera abort */
18036+
vtrx->lock.was_chosen_as_deadlock_victim.fetch_or(2);
18037+
if (!wsrep_thd_set_wsrep_aborter(bf_thd, vthd))
18038+
aborting= true;
18039+
else
18040+
WSREP_DEBUG("kill transaction skipped due to wsrep_aborter set");
18041+
}
18042+
lock_sys.wr_unlock();
18043+
mysql_mutex_unlock(&lock_sys.wait_mutex);
18044+
vtrx->mutex_unlock();
18045+
}
18046+
wsrep_thd_UNLOCK(vthd);
18047+
if (aborting)
18048+
{
18049+
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
18050+
wsrep_thd_bf_abort(bf_thd, vthd, true);
18051+
}
18052+
wsrep_thd_kill_UNLOCK(vthd);
18053+
}
1815018054
}
1815118055

1815218056
/** This function forces the victim transaction to abort. Aborting the

Diff for: storage/innobase/include/ha_prototypes.h

-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ innobase_casedn_str(
209209
char* a); /*!< in/out: string to put in lower case */
210210

211211
#ifdef WITH_WSREP
212-
void wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal);
213212
ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
214213
unsigned char* str, ulint str_length,
215214
unsigned int buf_length);

Diff for: storage/innobase/include/lock0lock.h

-19
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,6 @@ class lock_sys_t
580580
{
581581
friend struct LockGuard;
582582
friend struct LockMultiGuard;
583-
friend struct LockGGuard;
584583

585584
/** Hash table latch */
586585
struct hash_latch
@@ -920,18 +919,6 @@ struct LockGuard
920919
lock_sys_t::hash_latch *latch;
921920
};
922921

923-
#ifdef WITH_WSREP
924-
/** lock_sys.latch guard for a page_id_t shard */
925-
struct LockGGuard
926-
{
927-
LockGGuard(lock_sys_t::hash_table &hash, const page_id_t id, bool all);
928-
~LockGGuard();
929-
private:
930-
/** The hash bucket (nullptr if all of them) */
931-
lock_sys_t::hash_latch *latch;
932-
};
933-
#endif
934-
935922
/** lock_sys.latch guard for 2 page_id_t shards */
936923
struct LockMultiGuard
937924
{
@@ -952,9 +939,6 @@ lock_t*
952939
lock_rec_create(
953940
/*============*/
954941
lock_t* c_lock, /*!< conflicting lock */
955-
#ifdef WITH_WSREP
956-
que_thr_t* thr, /*!< thread owning trx */
957-
#endif
958942
unsigned type_mode,/*!< in: lock mode and wait flag */
959943
const buf_block_t* block, /*!< in: buffer block containing
960944
the record */
@@ -984,9 +968,6 @@ without checking for deadlocks or conflicts.
984968
lock_t*
985969
lock_rec_create_low(
986970
lock_t* c_lock,
987-
#ifdef WITH_WSREP
988-
que_thr_t* thr, /*!< thread owning trx */
989-
#endif
990971
unsigned type_mode,
991972
const page_id_t page_id,
992973
const page_t* page,

Diff for: storage/innobase/include/lock0lock.ic

-6
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ lock_t*
6161
lock_rec_create(
6262
/*============*/
6363
lock_t* c_lock, /*!< conflicting lock */
64-
#ifdef WITH_WSREP
65-
que_thr_t* thr, /*!< thread owning trx */
66-
#endif
6764
unsigned type_mode,/*!< in: lock mode and wait flag */
6865
const buf_block_t* block, /*!< in: buffer block containing
6966
the record */
@@ -77,9 +74,6 @@ lock_rec_create(
7774
btr_assert_not_corrupted(block, index);
7875
return lock_rec_create_low(
7976
c_lock,
80-
#ifdef WITH_WSREP
81-
thr,
82-
#endif
8377
type_mode, block->page.id(), block->frame, heap_no,
8478
index, trx, caller_owns_trx_mutex);
8579
}

0 commit comments

Comments
 (0)