From efefafd02f72b3d56f82a29a9fa4a100932d55c0 Mon Sep 17 00:00:00 2001 From: Leandro Pacheco Date: Tue, 17 Sep 2019 04:58:20 -0300 Subject: [PATCH] fix for thread getting stuck after BF ABORT (#1362) - Fixes a situation in which a thread gets BF aborted and does not send the reply back to the client, even though the connection is still alive. That caused both sides to hang waiting for the next message. Now we explicitly check that the connection is still alive. - MTR test for the above - Replaced thd->killed assignments to thd->reset_kill_query where applicable. --- .../galera_bf_abort_at_after_statement.result | 21 +++++++++++ .../t/galera_bf_abort_at_after_statement.test | 37 +++++++++++++++++++ sql/sql_parse.cc | 32 +++++++++------- sql/wsrep_high_priority_service.cc | 2 +- 4 files changed, 77 insertions(+), 15 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_at_after_statement.result create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test diff --git a/mysql-test/suite/galera/r/galera_bf_abort_at_after_statement.result b/mysql-test/suite/galera/r/galera_bf_abort_at_after_statement.result new file mode 100644 index 00000000000..e44a925baf4 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_at_after_statement.result @@ -0,0 +1,21 @@ +connection node_2; +connection node_1; +connection node_1; +CREATE TABLE t1 (id INT PRIMARY KEY, val INT); +INSERT INTO t1 VALUES (1, 1); +connection node_2; +START TRANSACTION; +SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue'; +UPDATE t1 SET val=2 WHERE id=1; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; +connection node_1; +UPDATE t1 SET val=3 WHERE id=1; +connection node_2a; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection node_2; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +COMMIT; +SET DEBUG_SYNC = 'RESET'; +connection node_1; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test b/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test new file mode 100644 index 00000000000..b1fe3e64cd8 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test @@ -0,0 +1,37 @@ +# +# This test checks that the client gets an error for a BF abort, +# specifically when it gets detected inside the wsrep_after_statement hook +# + +--source include/have_innodb.inc +--source include/galera_cluster.inc +--source include/have_debug_sync.inc + +--connection node_1 +CREATE TABLE t1 (id INT PRIMARY KEY, val INT); +INSERT INTO t1 VALUES (1, 1); + +--connection node_2 +START TRANSACTION; +SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked WAIT_FOR continue'; +--send UPDATE t1 SET val=2 WHERE id=1 + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; + +--connection node_1 +# cause BF abort on other node +UPDATE t1 SET val=3 WHERE id=1; + +--connection node_2a +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection node_2 +# check we get BF aborted +--error ER_LOCK_DEADLOCK +--reap +COMMIT; +SET DEBUG_SYNC = 'RESET'; + +--connection node_1 +DROP TABLE t1; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 4e04ddf544f..c0b78ead6d6 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1829,8 +1829,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); - thd->killed = NOT_KILLED; - thd->mysys_var->abort = 0; + thd->reset_kill_query(); thd->wsrep_retry_counter = 0; mysql_mutex_unlock(&thd->LOCK_thd_data); goto dispatch_end; @@ -1933,8 +1932,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); - thd->killed = NOT_KILLED; - thd->mysys_var->abort = 0; + thd->reset_kill_query(); thd->wsrep_retry_counter = 0; mysql_mutex_unlock(&thd->LOCK_thd_data); @@ -2410,13 +2408,11 @@ dispatch_end: */ DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE) || thd->get_stmt_da()->is_disabled()); + DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying); /* wsrep BF abort in query exec phase */ - mysql_mutex_lock(&thd->LOCK_thd_data); - do_end_of_statement= - thd->wsrep_trx().state() != wsrep::transaction::s_replaying - && !thd->killed; - - mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); + do_end_of_statement= thd_is_connection_alive(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); } else do_end_of_statement= true; @@ -7737,14 +7733,22 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, (thd->get_stmt_da()->is_error()) ? thd->get_stmt_da()->sql_errno() : 0); - thd->killed = NOT_KILLED; + thd->reset_kill_query(); wsrep_override_error(thd, ER_LOCK_DEADLOCK); } - if (wsrep_after_statement(thd) && is_autocommit) +#ifdef ENABLED_DEBUG_SYNC + /* we need the test otherwise we get stuck in the "SET DEBUG_SYNC" itself */ + if (thd->lex->sql_command != SQLCOM_SET_OPTION) + DEBUG_SYNC(thd, "wsrep_after_statement_enter"); +#endif + + if (wsrep_after_statement(thd) && + is_autocommit && + thd_is_connection_alive(thd)) { thd->reset_for_next_command(); - thd->killed= NOT_KILLED; + thd->reset_kill_query(); if (is_autocommit && thd->lex->sql_command != SQLCOM_SELECT && thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit) @@ -7774,7 +7778,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, thd->variables.wsrep_retry_autocommit, WSREP_QUERY(thd)); my_error(ER_LOCK_DEADLOCK, MYF(0)); - thd->killed= NOT_KILLED; + thd->reset_kill_query(); thd->wsrep_retry_counter= 0; // reset } } diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 73cdbd1c217..3077985250d 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -598,7 +598,7 @@ Wsrep_replayer_service::~Wsrep_replayer_service() if (m_replay_status == wsrep::provider::success) { DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success); - orig_thd->killed= NOT_KILLED; + orig_thd->reset_kill_query(); my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); } else if (m_replay_status == wsrep::provider::error_certification_failed)