fix for thread getting stuck after BF ABORT (#1362)

- Fixes a situation in which a thread gets BF aborted and does not send the reply back to
  the client, even though the connection is still alive. That caused
  both sides to hang waiting for the next message. Now we explicitly
  check that the connection is still alive.
- MTR test for the above
- Replaced thd->killed assignments to thd->reset_kill_query where applicable.
This commit is contained in:
Leandro Pacheco 2019-09-17 04:58:20 -03:00 committed by Jan Lindström
parent c793f07841
commit efefafd02f
4 changed files with 77 additions and 15 deletions

View File

@ -0,0 +1,21 @@
connection node_2;
connection node_1;
connection node_1;
CREATE TABLE t1 (id INT PRIMARY KEY, val INT);
INSERT INTO t1 VALUES (1, 1);
connection node_2;
START TRANSACTION;
SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue';
UPDATE t1 SET val=2 WHERE id=1;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
SET DEBUG_SYNC = 'now WAIT_FOR blocked';
connection node_1;
UPDATE t1 SET val=3 WHERE id=1;
connection node_2a;
SET DEBUG_SYNC = 'now SIGNAL continue';
connection node_2;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
COMMIT;
SET DEBUG_SYNC = 'RESET';
connection node_1;
DROP TABLE t1;

View File

@ -0,0 +1,37 @@
#
# This test checks that the client gets an error for a BF abort,
# specifically when it gets detected inside the wsrep_after_statement hook
#
--source include/have_innodb.inc
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--connection node_1
CREATE TABLE t1 (id INT PRIMARY KEY, val INT);
INSERT INTO t1 VALUES (1, 1);
--connection node_2
START TRANSACTION;
SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue';
--send UPDATE t1 SET val=2 WHERE id=1
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
SET DEBUG_SYNC = 'now WAIT_FOR blocked';
--connection node_1
# cause BF abort on other node
UPDATE t1 SET val=3 WHERE id=1;
--connection node_2a
SET DEBUG_SYNC = 'now SIGNAL continue';
--connection node_2
# check we get BF aborted
--error ER_LOCK_DEADLOCK
--reap
COMMIT;
SET DEBUG_SYNC = 'RESET';
--connection node_1
DROP TABLE t1;

View File

@ -1829,8 +1829,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
{
WSREP_DEBUG("Deadlock error for: %s", thd->query());
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->killed = NOT_KILLED;
thd->mysys_var->abort = 0;
thd->reset_kill_query();
thd->wsrep_retry_counter = 0;
mysql_mutex_unlock(&thd->LOCK_thd_data);
goto dispatch_end;
@ -1933,8 +1932,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
{
WSREP_DEBUG("Deadlock error for: %s", thd->query());
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->killed = NOT_KILLED;
thd->mysys_var->abort = 0;
thd->reset_kill_query();
thd->wsrep_retry_counter = 0;
mysql_mutex_unlock(&thd->LOCK_thd_data);
@ -2410,13 +2408,11 @@ dispatch_end:
*/
DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE)
|| thd->get_stmt_da()->is_disabled());
DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying);
/* wsrep BF abort in query exec phase */
mysql_mutex_lock(&thd->LOCK_thd_data);
do_end_of_statement=
thd->wsrep_trx().state() != wsrep::transaction::s_replaying
&& !thd->killed;
mysql_mutex_unlock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill);
do_end_of_statement= thd_is_connection_alive(thd);
mysql_mutex_unlock(&thd->LOCK_thd_kill);
}
else
do_end_of_statement= true;
@ -7737,14 +7733,22 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
(thd->get_stmt_da()->is_error()) ?
thd->get_stmt_da()->sql_errno() : 0);
thd->killed = NOT_KILLED;
thd->reset_kill_query();
wsrep_override_error(thd, ER_LOCK_DEADLOCK);
}
if (wsrep_after_statement(thd) && is_autocommit)
#ifdef ENABLED_DEBUG_SYNC
/* we need the test otherwise we get stuck in the "SET DEBUG_SYNC" itself */
if (thd->lex->sql_command != SQLCOM_SET_OPTION)
DEBUG_SYNC(thd, "wsrep_after_statement_enter");
#endif
if (wsrep_after_statement(thd) &&
is_autocommit &&
thd_is_connection_alive(thd))
{
thd->reset_for_next_command();
thd->killed= NOT_KILLED;
thd->reset_kill_query();
if (is_autocommit &&
thd->lex->sql_command != SQLCOM_SELECT &&
thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)
@ -7774,7 +7778,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
thd->variables.wsrep_retry_autocommit,
WSREP_QUERY(thd));
my_error(ER_LOCK_DEADLOCK, MYF(0));
thd->killed= NOT_KILLED;
thd->reset_kill_query();
thd->wsrep_retry_counter= 0; // reset
}
}

View File

@ -598,7 +598,7 @@ Wsrep_replayer_service::~Wsrep_replayer_service()
if (m_replay_status == wsrep::provider::success)
{
DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success);
orig_thd->killed= NOT_KILLED;
orig_thd->reset_kill_query();
my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
}
else if (m_replay_status == wsrep::provider::error_certification_failed)