MDEV-35355 : Galera test failure on galera_sr.mysql-wsrep-features#165
Problem was that in DeadlockChecker::trx_rollback() we hold lock_sys before we call wsrep_handle_SR_rollback() where THD::LOCK_thd_data (and some cases THD::LOCK_thd_kill) are acquired. This is against current mutex ordering rules. However, acquiring THD::LOCK_thd_data is not necessary because we always are in victim_thd context, either client session is rolling back or rollbacker thread should be in control. Therefore, we should always use wsrep_thd_self_abort() and then no additional mutexes are required. Fixed by removing locking of THD::LOCK_thd_data and using only wsrep_thd_self_abort(). In debug builds added assertions to verify that we are always in victim_thd context. This fix is for MariaDB 10.5 and we already have a test case that sporadically fail in Jenkins before this fix. Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
parent
af50783fcd
commit
f219fb8489
@ -186,44 +186,31 @@ extern "C" my_bool wsrep_thd_is_SR(const THD *thd)
|
|||||||
return thd && thd->wsrep_cs().transaction().is_streaming();
|
return thd && thd->wsrep_cs().transaction().is_streaming();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
|
extern "C" void wsrep_handle_SR_rollback(THD *bf_thd __attribute__((unused)),
|
||||||
THD *victim_thd)
|
THD *victim_thd)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
We should always be in victim_thd context, either client session is
|
||||||
|
rolling back or rollbacker thread should be in control.
|
||||||
|
*/
|
||||||
DBUG_ASSERT(victim_thd);
|
DBUG_ASSERT(victim_thd);
|
||||||
|
DBUG_ASSERT(current_thd == victim_thd);
|
||||||
DBUG_ASSERT(wsrep_thd_is_SR(victim_thd));
|
DBUG_ASSERT(wsrep_thd_is_SR(victim_thd));
|
||||||
if (!victim_thd || !wsrep_on(bf_thd)) return;
|
|
||||||
|
|
||||||
wsrep_thd_LOCK(victim_thd);
|
/* Defensive measure to avoid crash in production. */
|
||||||
|
if (!victim_thd) return;
|
||||||
|
|
||||||
WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s",
|
WSREP_DEBUG("Handle SR rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s",
|
||||||
victim_thd->thread_id,
|
victim_thd->thread_id,
|
||||||
victim_thd->wsrep_trx_id(),
|
victim_thd->wsrep_trx_id(),
|
||||||
victim_thd->wsrep_sr().fragments_certified(),
|
victim_thd->wsrep_sr().fragments_certified(),
|
||||||
wsrep_thd_transaction_state_str(victim_thd));
|
wsrep_thd_transaction_state_str(victim_thd));
|
||||||
|
|
||||||
/* Note: do not store/reset globals before wsrep_bf_abort() call
|
|
||||||
to avoid losing BF thd context. */
|
|
||||||
if (!(bf_thd && bf_thd != victim_thd))
|
|
||||||
{
|
|
||||||
DEBUG_SYNC(victim_thd, "wsrep_before_SR_rollback");
|
DEBUG_SYNC(victim_thd, "wsrep_before_SR_rollback");
|
||||||
}
|
|
||||||
if (bf_thd)
|
|
||||||
{
|
|
||||||
wsrep_bf_abort(bf_thd, victim_thd);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
wsrep_thd_self_abort(victim_thd);
|
wsrep_thd_self_abort(victim_thd);
|
||||||
}
|
}
|
||||||
|
|
||||||
wsrep_thd_UNLOCK(victim_thd);
|
|
||||||
|
|
||||||
if (bf_thd)
|
|
||||||
{
|
|
||||||
wsrep_store_threadvars(bf_thd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
||||||
my_bool signal)
|
my_bool signal)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user