fix for thread getting stuck after BF ABORT (#1362)
- Fixes a situation in which a thread gets BF aborted and does not send the reply back to the client, even though the connection is still alive. That caused both sides to hang waiting for the next message. Now we explicitly check that the connection is still alive. - MTR test for the above - Replaced thd->killed assignments to thd->reset_kill_query where applicable.
This commit is contained in:
parent
c793f07841
commit
efefafd02f
@ -0,0 +1,21 @@
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
CREATE TABLE t1 (id INT PRIMARY KEY, val INT);
|
||||
INSERT INTO t1 VALUES (1, 1);
|
||||
connection node_2;
|
||||
START TRANSACTION;
|
||||
SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue';
|
||||
UPDATE t1 SET val=2 WHERE id=1;
|
||||
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
|
||||
SET DEBUG_SYNC = 'now WAIT_FOR blocked';
|
||||
connection node_1;
|
||||
UPDATE t1 SET val=3 WHERE id=1;
|
||||
connection node_2a;
|
||||
SET DEBUG_SYNC = 'now SIGNAL continue';
|
||||
connection node_2;
|
||||
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
|
||||
COMMIT;
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
connection node_1;
|
||||
DROP TABLE t1;
|
@ -0,0 +1,37 @@
|
||||
#
|
||||
# This test checks that the client gets an error for a BF abort,
|
||||
# specifically when it gets detected inside the wsrep_after_statement hook
|
||||
#
|
||||
|
||||
--source include/have_innodb.inc
|
||||
--source include/galera_cluster.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--connection node_1
|
||||
CREATE TABLE t1 (id INT PRIMARY KEY, val INT);
|
||||
INSERT INTO t1 VALUES (1, 1);
|
||||
|
||||
--connection node_2
|
||||
START TRANSACTION;
|
||||
SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue';
|
||||
--send UPDATE t1 SET val=2 WHERE id=1
|
||||
|
||||
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
|
||||
SET DEBUG_SYNC = 'now WAIT_FOR blocked';
|
||||
|
||||
--connection node_1
|
||||
# cause BF abort on other node
|
||||
UPDATE t1 SET val=3 WHERE id=1;
|
||||
|
||||
--connection node_2a
|
||||
SET DEBUG_SYNC = 'now SIGNAL continue';
|
||||
|
||||
--connection node_2
|
||||
# check we get BF aborted
|
||||
--error ER_LOCK_DEADLOCK
|
||||
--reap
|
||||
COMMIT;
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
|
||||
--connection node_1
|
||||
DROP TABLE t1;
|
@ -1829,8 +1829,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
|
||||
{
|
||||
WSREP_DEBUG("Deadlock error for: %s", thd->query());
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
thd->killed = NOT_KILLED;
|
||||
thd->mysys_var->abort = 0;
|
||||
thd->reset_kill_query();
|
||||
thd->wsrep_retry_counter = 0;
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
goto dispatch_end;
|
||||
@ -1933,8 +1932,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
|
||||
{
|
||||
WSREP_DEBUG("Deadlock error for: %s", thd->query());
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
thd->killed = NOT_KILLED;
|
||||
thd->mysys_var->abort = 0;
|
||||
thd->reset_kill_query();
|
||||
thd->wsrep_retry_counter = 0;
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
|
||||
@ -2410,13 +2408,11 @@ dispatch_end:
|
||||
*/
|
||||
DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE)
|
||||
|| thd->get_stmt_da()->is_disabled());
|
||||
DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying);
|
||||
/* wsrep BF abort in query exec phase */
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
do_end_of_statement=
|
||||
thd->wsrep_trx().state() != wsrep::transaction::s_replaying
|
||||
&& !thd->killed;
|
||||
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
mysql_mutex_lock(&thd->LOCK_thd_kill);
|
||||
do_end_of_statement= thd_is_connection_alive(thd);
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_kill);
|
||||
}
|
||||
else
|
||||
do_end_of_statement= true;
|
||||
@ -7737,14 +7733,22 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
|
||||
(thd->get_stmt_da()->is_error()) ?
|
||||
thd->get_stmt_da()->sql_errno() : 0);
|
||||
|
||||
thd->killed = NOT_KILLED;
|
||||
thd->reset_kill_query();
|
||||
wsrep_override_error(thd, ER_LOCK_DEADLOCK);
|
||||
}
|
||||
|
||||
if (wsrep_after_statement(thd) && is_autocommit)
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
/* we need the test otherwise we get stuck in the "SET DEBUG_SYNC" itself */
|
||||
if (thd->lex->sql_command != SQLCOM_SET_OPTION)
|
||||
DEBUG_SYNC(thd, "wsrep_after_statement_enter");
|
||||
#endif
|
||||
|
||||
if (wsrep_after_statement(thd) &&
|
||||
is_autocommit &&
|
||||
thd_is_connection_alive(thd))
|
||||
{
|
||||
thd->reset_for_next_command();
|
||||
thd->killed= NOT_KILLED;
|
||||
thd->reset_kill_query();
|
||||
if (is_autocommit &&
|
||||
thd->lex->sql_command != SQLCOM_SELECT &&
|
||||
thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)
|
||||
@ -7774,7 +7778,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
|
||||
thd->variables.wsrep_retry_autocommit,
|
||||
WSREP_QUERY(thd));
|
||||
my_error(ER_LOCK_DEADLOCK, MYF(0));
|
||||
thd->killed= NOT_KILLED;
|
||||
thd->reset_kill_query();
|
||||
thd->wsrep_retry_counter= 0; // reset
|
||||
}
|
||||
}
|
||||
|
@ -598,7 +598,7 @@ Wsrep_replayer_service::~Wsrep_replayer_service()
|
||||
if (m_replay_status == wsrep::provider::success)
|
||||
{
|
||||
DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success);
|
||||
orig_thd->killed= NOT_KILLED;
|
||||
orig_thd->reset_kill_query();
|
||||
my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
|
||||
}
|
||||
else if (m_replay_status == wsrep::provider::error_certification_failed)
|
||||
|
Loading…
x
Reference in New Issue
Block a user