Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MDEV-34836: TOI on parent table must BF abort SR in progress on a child #3534

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/mysql/service_wsrep.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ extern "C" my_bool wsrep_thd_is_local_toi(const MYSQL_THD thd);
extern "C" my_bool wsrep_thd_is_in_rsu(const MYSQL_THD thd);
/* Return true if thd is in BF mode, either high_priority or TOI */
extern "C" my_bool wsrep_thd_is_BF(const MYSQL_THD thd, my_bool sync);
/* Return true if thd is streaming */
/* Return true if thd is streaming in progress */
extern "C" my_bool wsrep_thd_is_SR(const MYSQL_THD thd);
extern "C" void wsrep_handle_SR_rollback(MYSQL_THD BF_thd, MYSQL_THD victim_thd);
/* Return thd retry counter */
Expand Down
23 changes: 23 additions & 0 deletions mysql-test/suite/galera_sr/r/MDEV-34836.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
connection node_2;
connection node_1;
connection node_1;
CREATE TABLE parent (id INT AUTO_INCREMENT PRIMARY KEY, v INT) ENGINE=InnoDB;
INSERT INTO parent VALUES (1, 1),(2, 2),(3, 3);
CREATE TABLE child (id INT AUTO_INCREMENT PRIMARY KEY, parent_id INT, CONSTRAINT parent_fk
FOREIGN KEY (parent_id) REFERENCES parent (id)) ENGINE=InnoDB;
connection node_2;
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO child (parent_id) VALUES (1),(2),(3);
connection node_1;
SET SESSION wsrep_sync_wait = 15;
SELECT COUNT(*) FROM child;
COUNT(*)
0
ALTER TABLE parent AUTO_INCREMENT = 100;
connection node_2;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
DROP TABLE child, parent;
disconnect node_2;
disconnect node_1;
56 changes: 56 additions & 0 deletions mysql-test/suite/galera_sr/t/MDEV-34836.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# MDEV-34836: TOI transaction on FK-referenced parent table must BF abort
# SR transaction in progress on a child table.
#
# Applied SR transaction on the child table was not BF aborted by TOI running
# on the parent table for several reasons:
# Although SR correctly collected FK-referenced keys to parent, TOI in Galera
# disregards common certification index and simply sets itself to depend on the
# latest certified write set seqno.
# Since this write set was the fragment of SR transaction, TOI was allowed to run in
# parallel with SR presuming it would BF abort the latter.
#
# At the same time, DML transactions in the server don't grab MDL locks on FK-referenced
# tables, thus parent table wasn't protected by an MDL lock from SR and it couldn't
# provoke MDL lock conflict for TOI to BF abort SR transaction.
# In InnoDB, DDL transactions grab shared MDL locks on child tables, which is not enough
# to trigger MDL conflict in Galera.
# InnoDB-level Wsrep patch didn't contain correct conflict resolution logic due to the
# fact that it was believed MDL locking should always produce conflicts correctly.
#
# The fix brings conflict resolution rules similar to MDL-level checks to InnoDB, thus
# accounting for the problematic case.
#

--source include/galera_cluster.inc
--source include/have_innodb.inc

--connection node_1
CREATE TABLE parent (id INT AUTO_INCREMENT PRIMARY KEY, v INT) ENGINE=InnoDB;
INSERT INTO parent VALUES (1, 1),(2, 2),(3, 3);

CREATE TABLE child (id INT AUTO_INCREMENT PRIMARY KEY, parent_id INT, CONSTRAINT parent_fk
FOREIGN KEY (parent_id) REFERENCES parent (id)) ENGINE=InnoDB;

--connection node_2
# Start SR transaction and make it lock both parent and child tales.
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO child (parent_id) VALUES (1),(2),(3);

--connection node_1
# Sync wait for SR transaction to replicate and apply fragments, thus
# locking parent table as well.
SET SESSION wsrep_sync_wait = 15;
SELECT COUNT(*) FROM child;
# Now run TOI on the parent, which BF-aborts the SR-transaction in progress.
ALTER TABLE parent AUTO_INCREMENT = 100;

--connection node_2
# Check that SR is BF-aborted.
--error ER_LOCK_DEADLOCK
COMMIT;

# Cleanup
DROP TABLE child, parent;
--source include/galera_end.inc
3 changes: 2 additions & 1 deletion sql/service_wsrep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ extern "C" my_bool wsrep_thd_is_BF(const THD *thd, my_bool sync)

extern "C" my_bool wsrep_thd_is_SR(const THD *thd)
{
return thd && thd->wsrep_cs().transaction().is_streaming();
return thd && thd->wsrep_cs().transaction().is_streaming() &&
thd->wsrep_cs().transaction().state() == wsrep::transaction::s_executing;
}

extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
Expand Down
160 changes: 89 additions & 71 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -496,15 +496,13 @@ void lock_sys_t::close()

#ifdef WITH_WSREP
# ifdef UNIV_DEBUG
/** Check if both conflicting lock transaction and other transaction
requesting record lock are brute force (BF). If they are check is
this BF-BF wait correct and if not report BF wait and assert.
/** Check if this BF-BF wait is correct and if not report and abort.

@param lock other waiting lock
@param trx transaction requesting conflicting lock
*/
static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
const unsigned type_mode = LOCK_NONE)
static void wsrep_assert_valid_bf_bf_wait(const lock_t *lock, const trx_t *trx,
const unsigned type_mode)
{
ut_ad(!lock->is_table());
lock_sys.assert_locked(*lock);
Expand All @@ -514,12 +512,8 @@ static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
not acquire THD::LOCK_thd_data mutex below to avoid latching
order violation. */

if (!trx->is_wsrep() || !lock_trx->is_wsrep())
return;
if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
|| UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)))
return;

ut_ad(wsrep_thd_is_BF(trx->mysql_thd, FALSE));
ut_ad(wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE));
ut_ad(trx->state == TRX_STATE_ACTIVE);

switch (lock_trx->state) {
Expand All @@ -536,16 +530,6 @@ static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
ut_ad("invalid state" == 0);
}

/* If BF - BF order is honored, i.e. trx already holding
record lock should be ordered before this new lock request
we can keep trx waiting for the lock. If conflicting
transaction is already aborting or rolling back for replaying
we can also let new transaction waiting. */
if (wsrep_thd_order_before(lock_trx->mysql_thd, trx->mysql_thd)
|| wsrep_thd_is_aborting(lock_trx->mysql_thd)) {
return;
}

if (type_mode != LOCK_NONE)
ib::error() << " Requested lock "
<< ((type_mode & LOCK_TABLE) ? "on table " : " on record ")
Expand Down Expand Up @@ -573,8 +557,81 @@ static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
/* BF-BF wait is a bug */
ut_error;
}

void wsrep_report_error(const lock_t* victim_lock, const trx_t *bf_trx);
# endif /* UNIV_DEBUG */

/** Check if a high priority (BF) trx has to wait for the current
lock holder based on Wsrep transaction state relations.

This code resembles the one in `wsrep_handle_mdl_conflict()`, but
it's specific to the storage engine and it doesn't perform any
BF aborts by itself.
The decision whether to BF abort a victim may depend on other conditions
like lock compatibility between InnoDB transactions.

@param lock other waiting lock
@param trx BF transaction requesting conflicting lock
@return TRUE if BF trx has to wait for the lock to be removed
*/
static bool wsrep_BF_has_to_wait(const lock_t *lock, const trx_t *trx,
bool report_bf_bf_wait,
const unsigned type_mode = LOCK_NONE)
{
THD *request_thd= trx->mysql_thd;
THD *granted_thd= lock->trx->mysql_thd;

ut_ad(wsrep_thd_is_BF(request_thd, false));
ut_ad(lock->trx->is_wsrep());

/* Granted is aborting, let it complete. */
if (wsrep_thd_is_aborting(granted_thd))
return true;

/* Granted is not BF, may BF abort it. */
if (!wsrep_thd_is_BF(granted_thd, false))
return false;

/* Applying SR cannot BF abort other high priority (BF). */
if (wsrep_thd_is_SR(request_thd))
return true;

/* Requester is truly BF and granted is applying SR in progress. */
if (wsrep_thd_is_SR(granted_thd))
return false;

#ifdef UNIV_DEBUG
if (report_bf_bf_wait)
wsrep_report_error(lock, trx);
/* We very well can let BF to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported BF wait if any. */
ut_d(wsrep_assert_valid_bf_bf_wait(lock, trx, type_mode));
#endif
return true;
}

/** Determine whether BF abort on the lock holder is needed.

@param lock other waiting lock
@param trx BF transaction requesting conflicting lock
@return TRUE if BF abort is needed
*/
static bool wsrep_will_BF_abort(const lock_t *lock, const trx_t *trx)
{
ut_ad(wsrep_thd_is_BF(trx->mysql_thd, false));

/* Don't BF abort system transactions. */
if (!lock->trx->is_wsrep())
return false;

/* BF trx will wait for the lock, but it doesn't have to according
to Wsrep rules, meaning it must BF abort the lock holder. */
return lock_has_to_wait(trx->lock.wait_lock, lock) &&
!wsrep_BF_has_to_wait(lock, trx, true);
}

/** check if lock timeout was for priority thread,
as a side effect trigger lock monitor
@param trx transaction owning the lock
Expand Down Expand Up @@ -648,19 +705,9 @@ bool lock_rec_has_to_wait_wsrep(const trx_t *trx,
return false;
}

if (wsrep_thd_order_before(trx->mysql_thd, trx2->mysql_thd))
{
/* If two high priority threads have lock conflict, we look at the
order of these transactions and honor the earlier transaction. */

return false;
}

/* We very well can let bf to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported bf wait if any. */
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx, type_mode));
/* If two high priority threads have lock conflict, we check if
new lock request has to wait for the transaction holding the lock. */
return wsrep_BF_has_to_wait(lock2, trx, false, type_mode);
}

return true;
Expand Down Expand Up @@ -997,14 +1044,13 @@ void wsrep_report_error(const lock_t* victim_lock, const trx_t *bf_trx)
lock_rec_print(stderr, bf_trx->lock.wait_lock, mtr);
WSREP_ERROR("victim holding lock: ");
lock_rec_print(stderr, victim_lock, mtr);
wsrep_assert_no_bf_bf_wait(victim_lock, bf_trx);
}
#endif /* WITH_DEBUG */

/** Kill the holders of conflicting locks.
@param trx brute-force applier transaction running in the current thread */
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
static void lock_wait_wsrep(trx_t *trx)
static void wsrep_handle_lock_conflict(trx_t *trx)
{
DBUG_ASSERT(wsrep_on(trx->mysql_thd));
if (!wsrep_thd_is_BF(trx->mysql_thd, false))
Expand All @@ -1030,17 +1076,17 @@ static void lock_wait_wsrep(trx_t *trx)
for (lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock;
lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock))
{
/* Victim trx needs to be different from BF trx and it has to have a
THD so that we can kill it. Victim might not have THD in two cases:
/* Victim trx has to have a THD so that we can kill it.
Victim might not have THD in two cases:

(1) An incomplete transaction that was recovered from undo logs
on server startup (and not yet rolled back).

(2) Transaction that is in XA PREPARE state and whose client
connection was disconnected.

Neither of these can complete before lock_wait_wsrep() releases
lock_sys.latch.
Neither of these can complete before wsrep_handle_lock_conflict()
releases lock_sys.latch.

(1) trx_t::commit_in_memory() is clearing both
trx_t::state and trx_t::is_recovered before it invokes
Expand All @@ -1051,24 +1097,9 @@ static void lock_wait_wsrep(trx_t *trx)
(2) If is in XA PREPARE state, it would eventually be rolled
back and the lock conflict would be resolved when an XA COMMIT
or XA ROLLBACK statement is executed in some other connection.

If victim has also BF status, but has earlier seqno, we have to wait.
*/
if (lock->trx != trx && lock->trx->mysql_thd &&
!(wsrep_thd_is_BF(lock->trx->mysql_thd, false) &&
wsrep_thd_order_before(lock->trx->mysql_thd, trx->mysql_thd)))
if (lock->trx->mysql_thd && wsrep_will_BF_abort(lock, trx))
{
if (wsrep_thd_is_BF(lock->trx->mysql_thd, false))
{
// There is no need to kill victim with compatible lock
if (!lock_has_to_wait(trx->lock.wait_lock, lock))
continue;

#ifdef UNIV_DEBUG
wsrep_report_error(lock, trx);
#endif
}

victims.emplace(lock->trx);
}
}
Expand All @@ -1090,21 +1121,8 @@ static void lock_wait_wsrep(trx_t *trx)
record-locks instead of table locks. See details
from comment above.
*/
if (lock->trx != trx && lock->trx->mysql_thd &&
!(wsrep_thd_is_BF(lock->trx->mysql_thd, false) &&
wsrep_thd_order_before(lock->trx->mysql_thd, trx->mysql_thd)))
if (lock->trx->mysql_thd && wsrep_will_BF_abort(lock, trx))
{
if (wsrep_thd_is_BF(lock->trx->mysql_thd, false))
{
// There is no need to kill victim with compatible lock
if (!lock_has_to_wait(trx->lock.wait_lock, lock))
continue;

#ifdef UNIV_DEBUG
wsrep_report_error(lock, trx);
#endif
}

victims.emplace(lock->trx);
}
} while ((lock= lock_rec_get_next(heap_no, lock)));
Expand Down Expand Up @@ -2015,7 +2033,7 @@ dberr_t lock_wait(que_thr_t *thr)

ut_ad(!trx->dict_operation_lock_mode);

IF_WSREP(if (trx->is_wsrep()) lock_wait_wsrep(trx),);
IF_WSREP(if (trx->is_wsrep()) wsrep_handle_lock_conflict(trx),);

const auto type_mode= wait_lock->type_mode;
#ifdef HAVE_REPLICATION
Expand Down