MDEV-25114: Crash: WSREP: invalid state ROLLED_BACK (FATAL)
Revert "MDEV-23328 Server hang due to Galera lock conflict resolution" This reverts commit eac8341df4c3c7b98360f4e9498acf393dc055e3.
This commit is contained in:
parent
e10838268e
commit
aa7ca987db
@ -84,7 +84,7 @@ SET DEBUG_SYNC = "now SIGNAL continue_kill";
|
|||||||
--reap
|
--reap
|
||||||
|
|
||||||
--connection node_2a
|
--connection node_2a
|
||||||
--error 0,1213,2013
|
--error 0,1213
|
||||||
select * from t1;
|
select * from t1;
|
||||||
|
|
||||||
--connection node_2
|
--connection node_2
|
||||||
|
@ -8985,20 +8985,8 @@ err:
|
|||||||
}
|
}
|
||||||
#endif /* MYSQL_CLIENT */
|
#endif /* MYSQL_CLIENT */
|
||||||
|
|
||||||
|
|
||||||
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
|
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
|
||||||
static bool wsrep_must_replay(THD *thd)
|
|
||||||
{
|
|
||||||
#ifdef WITH_WSREP
|
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
|
||||||
bool res= WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay;
|
|
||||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
|
||||||
return res;
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int Xid_log_event::do_apply_event(rpl_group_info *rgi)
|
int Xid_log_event::do_apply_event(rpl_group_info *rgi)
|
||||||
{
|
{
|
||||||
bool res;
|
bool res;
|
||||||
@ -9063,8 +9051,14 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
|
|||||||
res= trans_commit(thd); /* Automatically rolls back on error. */
|
res= trans_commit(thd); /* Automatically rolls back on error. */
|
||||||
thd->release_transactional_locks();
|
thd->release_transactional_locks();
|
||||||
|
|
||||||
if (sub_id && (!res || wsrep_must_replay(thd)))
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
#ifdef WITH_WSREP
|
||||||
|
if (sub_id && (!res || (WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay)))
|
||||||
|
#else
|
||||||
|
if (sub_id && !res)
|
||||||
|
#endif /* WITH_WSREP */
|
||||||
rpl_global_gtid_slave_state->update_state_hash(sub_id, >id, hton, rgi);
|
rpl_global_gtid_slave_state->update_state_hash(sub_id, >id, hton, rgi);
|
||||||
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||||
/*
|
/*
|
||||||
Increment the global status commit count variable
|
Increment the global status commit count variable
|
||||||
*/
|
*/
|
||||||
|
@ -218,8 +218,16 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
|
|||||||
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
||||||
my_bool signal)
|
my_bool signal)
|
||||||
{
|
{
|
||||||
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
|
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
|
||||||
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
|
{
|
||||||
|
const char act[]=
|
||||||
|
"now "
|
||||||
|
"SIGNAL sync.before_wsrep_thd_abort_reached "
|
||||||
|
"WAIT_FOR signal.before_wsrep_thd_abort";
|
||||||
|
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
|
||||||
|
STRING_WITH_LEN(act)));
|
||||||
|
};);
|
||||||
|
|
||||||
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
|
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
|
||||||
/*
|
/*
|
||||||
Send awake signal if victim was BF aborted or does not
|
Send awake signal if victim was BF aborted or does not
|
||||||
@ -228,6 +236,8 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
|||||||
*/
|
*/
|
||||||
if ((ret || !wsrep_on(victim_thd)) && signal)
|
if ((ret || !wsrep_on(victim_thd)) && signal)
|
||||||
{
|
{
|
||||||
|
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
|
||||||
|
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
|
||||||
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
|
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
|
||||||
|
|
||||||
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
|
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
|
||||||
@ -238,8 +248,10 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
|
||||||
victim_thd->wsrep_aborter= bf_thd->thread_id;
|
victim_thd->wsrep_aborter= bf_thd->thread_id;
|
||||||
victim_thd->awake_no_mutex(KILL_QUERY);
|
victim_thd->awake_no_mutex(KILL_QUERY);
|
||||||
|
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
|
||||||
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
|
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
|
||||||
} else {
|
} else {
|
||||||
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
|
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
|
||||||
|
@ -1069,8 +1069,8 @@ terminate_slave_thread(THD *thd,
|
|||||||
int error __attribute__((unused));
|
int error __attribute__((unused));
|
||||||
DBUG_PRINT("loop", ("killing slave thread"));
|
DBUG_PRINT("loop", ("killing slave thread"));
|
||||||
|
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_kill);
|
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
mysql_mutex_lock(&thd->LOCK_thd_kill);
|
||||||
#ifndef DONT_USE_THR_ALARM
|
#ifndef DONT_USE_THR_ALARM
|
||||||
/*
|
/*
|
||||||
Error codes from pthread_kill are:
|
Error codes from pthread_kill are:
|
||||||
|
@ -788,7 +788,6 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
|
|||||||
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
|
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
|
||||||
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
|
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
|
||||||
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
|
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
|
||||||
mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data);
|
|
||||||
|
|
||||||
/* Variables with default values */
|
/* Variables with default values */
|
||||||
proc_info="login";
|
proc_info="login";
|
||||||
@ -5108,13 +5107,11 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
|
|||||||
#ifdef WITH_WSREP
|
#ifdef WITH_WSREP
|
||||||
/* wsrep applier, replayer and TOI processing threads are ordered
|
/* wsrep applier, replayer and TOI processing threads are ordered
|
||||||
by replication provider, relaxed GAP locking protocol can be used
|
by replication provider, relaxed GAP locking protocol can be used
|
||||||
between high priority wsrep threads.
|
between high priority wsrep threads
|
||||||
Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd,
|
|
||||||
the caller should guarantee that the BF state won't change.
|
|
||||||
(e.g. InnoDB does it by keeping lock_sys.mutex locked)
|
|
||||||
*/
|
*/
|
||||||
if (WSREP_ON && wsrep_thd_is_BF(thd, false) &&
|
if (WSREP_ON &&
|
||||||
wsrep_thd_is_BF(other_thd, false))
|
wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
|
||||||
|
wsrep_thd_is_BF(const_cast<THD *>(other_thd), true))
|
||||||
return 0;
|
return 0;
|
||||||
#endif /* WITH_WSREP */
|
#endif /* WITH_WSREP */
|
||||||
rgi= thd->rgi_slave;
|
rgi= thd->rgi_slave;
|
||||||
|
@ -3327,11 +3327,11 @@ public:
|
|||||||
void awake_no_mutex(killed_state state_to_set);
|
void awake_no_mutex(killed_state state_to_set);
|
||||||
void awake(killed_state state_to_set)
|
void awake(killed_state state_to_set)
|
||||||
{
|
{
|
||||||
mysql_mutex_lock(&LOCK_thd_kill);
|
|
||||||
mysql_mutex_lock(&LOCK_thd_data);
|
mysql_mutex_lock(&LOCK_thd_data);
|
||||||
|
mysql_mutex_lock(&LOCK_thd_kill);
|
||||||
awake_no_mutex(state_to_set);
|
awake_no_mutex(state_to_set);
|
||||||
mysql_mutex_unlock(&LOCK_thd_data);
|
|
||||||
mysql_mutex_unlock(&LOCK_thd_kill);
|
mysql_mutex_unlock(&LOCK_thd_kill);
|
||||||
|
mysql_mutex_unlock(&LOCK_thd_data);
|
||||||
}
|
}
|
||||||
void abort_current_cond_wait(bool force);
|
void abort_current_cond_wait(bool force);
|
||||||
|
|
||||||
|
@ -9144,6 +9144,7 @@ THD *find_thread_by_id(longlong id, bool query_id)
|
|||||||
return arg.thd;
|
return arg.thd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
kill one thread.
|
kill one thread.
|
||||||
|
|
||||||
@ -9267,8 +9268,8 @@ static my_bool kill_threads_callback(THD *thd, kill_threads_callback_arg *arg)
|
|||||||
return 1;
|
return 1;
|
||||||
if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root))
|
if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root))
|
||||||
{
|
{
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
|
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3497,8 +3497,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg)
|
|||||||
thd->variables.server_id == arg->slave_server_id)
|
thd->variables.server_id == arg->slave_server_id)
|
||||||
{
|
{
|
||||||
arg->thd= thd;
|
arg->thd= thd;
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
|
|
||||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -69,13 +69,20 @@ bool Wsrep_client_service::interrupted(
|
|||||||
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
|
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
|
||||||
{
|
{
|
||||||
DBUG_ASSERT(m_thd == current_thd);
|
DBUG_ASSERT(m_thd == current_thd);
|
||||||
|
/* Underlying mutex in lock object points to LOCK_thd_data, which
|
||||||
|
protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed.
|
||||||
|
Locking order is:
|
||||||
|
1) LOCK_thd_data
|
||||||
|
2) LOCK_thd_kill */
|
||||||
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
|
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
|
||||||
|
mysql_mutex_lock(&m_thd->LOCK_thd_kill);
|
||||||
bool ret= (m_thd->killed != NOT_KILLED);
|
bool ret= (m_thd->killed != NOT_KILLED);
|
||||||
if (ret)
|
if (ret)
|
||||||
{
|
{
|
||||||
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
|
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
|
||||||
m_thd->killed, m_thd->wsrep_trx().state());
|
m_thd->killed, m_thd->wsrep_trx().state());
|
||||||
}
|
}
|
||||||
|
mysql_mutex_unlock(&m_thd->LOCK_thd_kill);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,6 @@ static void init_service_thd(THD* thd, char* thread_stack)
|
|||||||
thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
|
thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
|
||||||
thd->set_command(COM_SLEEP);
|
thd->set_command(COM_SLEEP);
|
||||||
thd->reset_for_next_command(true);
|
thd->reset_for_next_command(true);
|
||||||
server_threads.insert(thd); // as wsrep_innobase_kill_one_trx() uses find_thread_by_id()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Wsrep_storage_service*
|
Wsrep_storage_service*
|
||||||
@ -81,7 +80,6 @@ void Wsrep_server_service::release_storage_service(
|
|||||||
static_cast<Wsrep_storage_service*>(storage_service);
|
static_cast<Wsrep_storage_service*>(storage_service);
|
||||||
THD* thd= ss->m_thd;
|
THD* thd= ss->m_thd;
|
||||||
wsrep_reset_threadvars(thd);
|
wsrep_reset_threadvars(thd);
|
||||||
server_threads.erase(thd);
|
|
||||||
delete ss;
|
delete ss;
|
||||||
delete thd;
|
delete thd;
|
||||||
}
|
}
|
||||||
@ -95,8 +93,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
|
|||||||
streaming transaction is BF aborted and streaming applier
|
streaming transaction is BF aborted and streaming applier
|
||||||
is created from BF aborter context. */
|
is created from BF aborter context. */
|
||||||
Wsrep_threadvars saved_threadvars(wsrep_save_threadvars());
|
Wsrep_threadvars saved_threadvars(wsrep_save_threadvars());
|
||||||
if (saved_threadvars.cur_thd)
|
wsrep_reset_threadvars(saved_threadvars.cur_thd);
|
||||||
wsrep_reset_threadvars(saved_threadvars.cur_thd);
|
|
||||||
THD *thd= 0;
|
THD *thd= 0;
|
||||||
Wsrep_applier_service *ret= 0;
|
Wsrep_applier_service *ret= 0;
|
||||||
if (!wsrep_create_threadvars() &&
|
if (!wsrep_create_threadvars() &&
|
||||||
@ -113,8 +110,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
|
|||||||
}
|
}
|
||||||
/* Restore original thread local storage state before returning. */
|
/* Restore original thread local storage state before returning. */
|
||||||
wsrep_restore_threadvars(saved_threadvars);
|
wsrep_restore_threadvars(saved_threadvars);
|
||||||
if (saved_threadvars.cur_thd)
|
wsrep_store_threadvars(saved_threadvars.cur_thd);
|
||||||
wsrep_store_threadvars(saved_threadvars.cur_thd);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +139,6 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se
|
|||||||
THD* thd= hps->m_thd;
|
THD* thd= hps->m_thd;
|
||||||
delete hps;
|
delete hps;
|
||||||
wsrep_store_threadvars(thd);
|
wsrep_store_threadvars(thd);
|
||||||
server_threads.erase(thd);
|
|
||||||
delete thd;
|
delete thd;
|
||||||
wsrep_delete_threadvars();
|
wsrep_delete_threadvars();
|
||||||
}
|
}
|
||||||
|
@ -62,7 +62,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||||||
|
|
||||||
#include <my_service_manager.h>
|
#include <my_service_manager.h>
|
||||||
#include <key.h>
|
#include <key.h>
|
||||||
#include <sql_manager.h>
|
|
||||||
|
|
||||||
/* Include necessary InnoDB headers */
|
/* Include necessary InnoDB headers */
|
||||||
#include "btr0btr.h"
|
#include "btr0btr.h"
|
||||||
@ -5204,8 +5203,6 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
|
|||||||
if (lock_t *lock= trx->lock.wait_lock)
|
if (lock_t *lock= trx->lock.wait_lock)
|
||||||
{
|
{
|
||||||
trx_mutex_enter(trx);
|
trx_mutex_enter(trx);
|
||||||
if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
|
|
||||||
trx->lock.was_chosen_as_deadlock_victim= TRUE;
|
|
||||||
lock_cancel_waiting_and_release(lock);
|
lock_cancel_waiting_and_release(lock);
|
||||||
trx_mutex_exit(trx);
|
trx_mutex_exit(trx);
|
||||||
}
|
}
|
||||||
@ -18607,67 +18604,62 @@ static struct st_mysql_storage_engine innobase_storage_engine=
|
|||||||
|
|
||||||
#ifdef WITH_WSREP
|
#ifdef WITH_WSREP
|
||||||
|
|
||||||
struct bg_wsrep_kill_trx_arg {
|
/** This function is used to kill one transaction.
|
||||||
my_thread_id thd_id, bf_thd_id;
|
|
||||||
trx_id_t trx_id, bf_trx_id;
|
|
||||||
bool signal;
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Kill one transaction from a background manager thread
|
This transaction was open on this node (not-yet-committed), and a
|
||||||
|
conflicting writeset from some other node that was being applied
|
||||||
|
caused a locking conflict. First committed (from other node)
|
||||||
|
wins, thus open transaction is rolled back. BF stands for
|
||||||
|
brute-force: any transaction can get aborted by galera any time
|
||||||
|
it is necessary.
|
||||||
|
|
||||||
wsrep_innobase_kill_one_trx() is invoked when lock_sys.mutex and trx mutex
|
This conflict can happen only when the replicated writeset (from
|
||||||
are taken, wsrep_thd_bf_abort() cannot be used there as it takes THD mutexes
|
other node) is being applied, not when it’s waiting in the queue.
|
||||||
that must be taken before lock_sys.mutex and trx mutex. That's why
|
If our local transaction reached its COMMIT and this conflicting
|
||||||
wsrep_innobase_kill_one_trx only posts the killing task to the manager thread
|
writeset was in the queue, then it should fail the local
|
||||||
and the actual killing happens asynchronously here.
|
certification test instead.
|
||||||
|
|
||||||
As no mutexes were held we don't know whether THD or trx pointers are still
|
A brute force abort is only triggered by a locking conflict
|
||||||
valid, so we need to pass thread/trx ids and perform a lookup.
|
between a writeset being applied by an applier thread (slave thread)
|
||||||
*/
|
and an open transaction on the node, not by a Galera writeset
|
||||||
static void bg_wsrep_kill_trx(void *void_arg)
|
comparison as in the local certification failure.
|
||||||
|
|
||||||
|
@param[in] bf_thd Brute force (BF) thread
|
||||||
|
@param[in,out] victim_trx Vimtim trx to be killed
|
||||||
|
@param[in] signal Should victim be signaled */
|
||||||
|
UNIV_INTERN
|
||||||
|
void
|
||||||
|
wsrep_innobase_kill_one_trx(
|
||||||
|
THD* bf_thd,
|
||||||
|
trx_t *victim_trx,
|
||||||
|
bool signal)
|
||||||
{
|
{
|
||||||
bg_wsrep_kill_trx_arg *arg= (bg_wsrep_kill_trx_arg *)void_arg;
|
ut_ad(bf_thd);
|
||||||
THD *thd, *bf_thd;
|
ut_ad(victim_trx);
|
||||||
trx_t *victim_trx;
|
ut_ad(lock_mutex_own());
|
||||||
bool aborting= false;
|
ut_ad(trx_mutex_own(victim_trx));
|
||||||
|
|
||||||
if ((bf_thd= find_thread_by_id(arg->bf_thd_id)))
|
DBUG_ENTER("wsrep_innobase_kill_one_trx");
|
||||||
wsrep_thd_LOCK(bf_thd);
|
|
||||||
if ((thd= find_thread_by_id(arg->thd_id)))
|
|
||||||
wsrep_thd_LOCK(thd);
|
|
||||||
|
|
||||||
if (!thd || !bf_thd || !(victim_trx= thd_to_trx(thd)))
|
|
||||||
goto ret0;
|
|
||||||
|
|
||||||
lock_mutex_enter();
|
|
||||||
trx_mutex_enter(victim_trx);
|
|
||||||
if (victim_trx->id != arg->trx_id
|
|
||||||
|| victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
|
|
||||||
{
|
|
||||||
/* Apparently victim trx was meanwhile rolled back or
|
|
||||||
committed. Tell bf thd not to wait, in case it already
|
|
||||||
started to. */
|
|
||||||
trx_t *trx= thd_to_trx(bf_thd);
|
|
||||||
if (!trx) {
|
|
||||||
/* bf_thd might not be associated with a
|
|
||||||
transaction, in case of MDL conflict */
|
|
||||||
} else if (lock_t *lock = trx->lock.wait_lock) {
|
|
||||||
trx_mutex_enter(trx);
|
|
||||||
lock_cancel_waiting_and_release(lock);
|
|
||||||
trx_mutex_exit(trx);
|
|
||||||
}
|
|
||||||
goto ret1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
THD *thd= (THD *) victim_trx->mysql_thd;
|
||||||
|
ut_ad(thd);
|
||||||
|
/* Note that bf_trx might not exist here e.g. on MDL conflict
|
||||||
|
case (test: galera_concurrent_ctas). Similarly, BF thread
|
||||||
|
could be also acquiring MDL-lock causing victim to be
|
||||||
|
aborted. However, we have not yet called innobase_trx_init()
|
||||||
|
for BF transaction (test: galera_many_columns)*/
|
||||||
|
trx_t* bf_trx= thd_to_trx(bf_thd);
|
||||||
DBUG_ASSERT(wsrep_on(bf_thd));
|
DBUG_ASSERT(wsrep_on(bf_thd));
|
||||||
|
|
||||||
|
wsrep_thd_LOCK(thd);
|
||||||
|
|
||||||
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
|
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
|
||||||
|
|
||||||
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
|
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
|
||||||
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
|
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
|
||||||
"query: %s",
|
"query: %s",
|
||||||
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
|
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
|
||||||
arg->bf_trx_id,
|
bf_trx ? bf_trx->id : TRX_ID_MAX,
|
||||||
thd_get_thread_id(bf_thd),
|
thd_get_thread_id(bf_thd),
|
||||||
wsrep_thd_trx_seqno(bf_thd),
|
wsrep_thd_trx_seqno(bf_thd),
|
||||||
wsrep_thd_client_state_str(bf_thd),
|
wsrep_thd_client_state_str(bf_thd),
|
||||||
@ -18692,84 +18684,28 @@ static void bg_wsrep_kill_trx(void *void_arg)
|
|||||||
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
|
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
|
||||||
{
|
{
|
||||||
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
|
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
|
||||||
goto ret1;
|
wsrep_thd_UNLOCK(thd);
|
||||||
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
aborting= true;
|
/* Note that we need to release this as it will be acquired
|
||||||
|
below in wsrep-lib */
|
||||||
|
wsrep_thd_UNLOCK(thd);
|
||||||
|
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
|
||||||
|
|
||||||
ret1:
|
if (wsrep_thd_bf_abort(bf_thd, thd, signal))
|
||||||
trx_mutex_exit(victim_trx);
|
{
|
||||||
lock_mutex_exit();
|
lock_t* wait_lock = victim_trx->lock.wait_lock;
|
||||||
ret0:
|
if (wait_lock) {
|
||||||
if (thd) {
|
DBUG_ASSERT(victim_trx->is_wsrep());
|
||||||
wsrep_thd_UNLOCK(thd);
|
WSREP_DEBUG("victim has wait flag: %lu",
|
||||||
if (aborting) {
|
thd_get_thread_id(thd));
|
||||||
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
|
|
||||||
wsrep_thd_bf_abort(bf_thd, thd, arg->signal);
|
WSREP_DEBUG("canceling wait lock");
|
||||||
|
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
|
||||||
|
lock_cancel_waiting_and_release(wait_lock);
|
||||||
}
|
}
|
||||||
wsrep_thd_kill_UNLOCK(thd);
|
|
||||||
}
|
}
|
||||||
if (bf_thd) {
|
|
||||||
wsrep_thd_UNLOCK(bf_thd);
|
|
||||||
wsrep_thd_kill_UNLOCK(bf_thd);
|
|
||||||
}
|
|
||||||
free(arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** This function is used to kill one transaction.
|
|
||||||
|
|
||||||
This transaction was open on this node (not-yet-committed), and a
|
|
||||||
conflicting writeset from some other node that was being applied
|
|
||||||
caused a locking conflict. First committed (from other node)
|
|
||||||
wins, thus open transaction is rolled back. BF stands for
|
|
||||||
brute-force: any transaction can get aborted by galera any time
|
|
||||||
it is necessary.
|
|
||||||
|
|
||||||
This conflict can happen only when the replicated writeset (from
|
|
||||||
other node) is being applied, not when it’s waiting in the queue.
|
|
||||||
If our local transaction reached its COMMIT and this conflicting
|
|
||||||
writeset was in the queue, then it should fail the local
|
|
||||||
certification test instead.
|
|
||||||
|
|
||||||
A brute force abort is only triggered by a locking conflict
|
|
||||||
between a writeset being applied by an applier thread (slave thread)
|
|
||||||
and an open transaction on the node, not by a Galera writeset
|
|
||||||
comparison as in the local certification failure.
|
|
||||||
|
|
||||||
@param[in] bf_thd Brute force (BF) thread
|
|
||||||
@param[in,out] victim_trx Transaction to be killed
|
|
||||||
@param[in] signal Should victim be signaled */
|
|
||||||
void
|
|
||||||
wsrep_innobase_kill_one_trx(
|
|
||||||
THD* bf_thd,
|
|
||||||
trx_t *victim_trx,
|
|
||||||
bool signal)
|
|
||||||
{
|
|
||||||
ut_ad(bf_thd);
|
|
||||||
ut_ad(victim_trx);
|
|
||||||
ut_ad(lock_mutex_own());
|
|
||||||
ut_ad(trx_mutex_own(victim_trx));
|
|
||||||
|
|
||||||
DBUG_ENTER("wsrep_innobase_kill_one_trx");
|
|
||||||
|
|
||||||
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
|
|
||||||
{
|
|
||||||
const char act[]=
|
|
||||||
"now "
|
|
||||||
"SIGNAL sync.before_wsrep_thd_abort_reached "
|
|
||||||
"WAIT_FOR signal.before_wsrep_thd_abort";
|
|
||||||
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
|
|
||||||
STRING_WITH_LEN(act)));
|
|
||||||
};);
|
|
||||||
|
|
||||||
trx_t* bf_trx= thd_to_trx(bf_thd);
|
|
||||||
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
|
|
||||||
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
|
|
||||||
arg->trx_id = victim_trx->id;
|
|
||||||
arg->bf_thd_id = thd_get_thread_id(bf_thd);
|
|
||||||
arg->bf_trx_id = bf_trx ? bf_trx->id : TRX_ID_MAX;
|
|
||||||
arg->signal = signal;
|
|
||||||
mysql_manager_submit(bg_wsrep_kill_trx, arg);
|
|
||||||
|
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
@ -18805,44 +18741,13 @@ wsrep_abort_transaction(
|
|||||||
if (victim_trx) {
|
if (victim_trx) {
|
||||||
lock_mutex_enter();
|
lock_mutex_enter();
|
||||||
trx_mutex_enter(victim_trx);
|
trx_mutex_enter(victim_trx);
|
||||||
victim_trx->lock.was_chosen_as_wsrep_victim= true;
|
wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal);
|
||||||
trx_mutex_exit(victim_trx);
|
trx_mutex_exit(victim_trx);
|
||||||
lock_mutex_exit();
|
lock_mutex_exit();
|
||||||
|
|
||||||
wsrep_thd_kill_LOCK(victim_thd);
|
|
||||||
wsrep_thd_LOCK(victim_thd);
|
|
||||||
bool aborting= !wsrep_thd_set_wsrep_aborter(bf_thd, victim_thd);
|
|
||||||
wsrep_thd_UNLOCK(victim_thd);
|
|
||||||
if (aborting) {
|
|
||||||
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
|
|
||||||
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
|
|
||||||
{
|
|
||||||
const char act[]=
|
|
||||||
"now "
|
|
||||||
"SIGNAL sync.before_wsrep_thd_abort_reached "
|
|
||||||
"WAIT_FOR signal.before_wsrep_thd_abort";
|
|
||||||
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
|
|
||||||
STRING_WITH_LEN(act)));
|
|
||||||
};);
|
|
||||||
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
|
|
||||||
}
|
|
||||||
wsrep_thd_kill_UNLOCK(victim_thd);
|
|
||||||
|
|
||||||
wsrep_srv_conc_cancel_wait(victim_trx);
|
wsrep_srv_conc_cancel_wait(victim_trx);
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
} else {
|
} else {
|
||||||
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
|
|
||||||
{
|
|
||||||
const char act[]=
|
|
||||||
"now "
|
|
||||||
"SIGNAL sync.before_wsrep_thd_abort_reached "
|
|
||||||
"WAIT_FOR signal.before_wsrep_thd_abort";
|
|
||||||
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
|
|
||||||
STRING_WITH_LEN(act)));
|
|
||||||
};);
|
|
||||||
wsrep_thd_kill_LOCK(victim_thd);
|
|
||||||
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
|
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
|
||||||
wsrep_thd_kill_UNLOCK(victim_thd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user