diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 42b758c03f3..f3588da4b46 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -57,6 +57,7 @@ extern struct wsrep_service_st { my_bool (*wsrep_on_func)(const MYSQL_THD thd); bool (*wsrep_prepare_key_for_innodb_func)(MYSQL_THD thd, const unsigned char*, size_t, const unsigned char*, size_t, struct wsrep_buf*, size_t*); void (*wsrep_thd_LOCK_func)(const MYSQL_THD thd); + int (*wsrep_thd_TRYLOCK_func)(const MYSQL_THD thd); void (*wsrep_thd_UNLOCK_func)(const MYSQL_THD thd); const char * (*wsrep_thd_query_func)(const MYSQL_THD thd); int (*wsrep_thd_retry_counter_func)(const MYSQL_THD thd); @@ -86,7 +87,6 @@ extern struct wsrep_service_st { ulong (*wsrep_OSU_method_get_func)(const MYSQL_THD thd); my_bool (*wsrep_thd_has_ignored_error_func)(const MYSQL_THD thd); void (*wsrep_thd_set_ignored_error_func)(MYSQL_THD thd, my_bool val); - bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd); void (*wsrep_report_bf_lock_wait_func)(const MYSQL_THD thd, unsigned long long trx_id); void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd); @@ -108,6 +108,7 @@ extern struct wsrep_service_st { #define wsrep_on(thd) (thd) && WSREP_ON && wsrep_service->wsrep_on_func(thd) #define wsrep_prepare_key_for_innodb(A,B,C,D,E,F,G) wsrep_service->wsrep_prepare_key_for_innodb_func(A,B,C,D,E,F,G) #define wsrep_thd_LOCK(T) wsrep_service->wsrep_thd_LOCK_func(T) +#define wsrep_thd_TRYLOCK(T) wsrep_service->wsrep_thd_TRYLOCK_func(T) #define wsrep_thd_UNLOCK(T) wsrep_service->wsrep_thd_UNLOCK_func(T) #define wsrep_thd_kill_LOCK(T) wsrep_service->wsrep_thd_kill_LOCK_func(T) #define wsrep_thd_kill_UNLOCK(T) wsrep_service->wsrep_thd_kill_UNLOCK_func(T) @@ -136,7 +137,6 @@ extern struct wsrep_service_st { #define wsrep_OSU_method_get(T) wsrep_service->wsrep_OSU_method_get_func(T) #define wsrep_thd_has_ignored_error(T) wsrep_service->wsrep_thd_has_ignored_error_func(T) #define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V) -#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2) #define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T) #else @@ -170,6 +170,8 @@ void wsrep_set_data_home_dir(const char *data_dir); extern "C" my_bool wsrep_on(const MYSQL_THD thd); /* Lock thd wsrep lock */ extern "C" void wsrep_thd_LOCK(const MYSQL_THD thd); +/* Try thd wsrep lock. Return non-zero if lock could not be taken. */ +extern "C" int wsrep_thd_TRYLOCK(const MYSQL_THD thd); /* Unlock thd wsrep lock */ extern "C" void wsrep_thd_UNLOCK(const MYSQL_THD thd); @@ -192,8 +194,6 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd); /* Return true if thd is in high priority mode */ /* todo: rename to is_high_priority() */ extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd); -/* set wsrep_aborter for the target THD */ -extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd); /* Return true if thd is in TOI mode */ extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd); /* Return true if thd is in replicating TOI mode */ @@ -237,7 +237,6 @@ extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd); extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd); extern "C" my_bool wsrep_thd_has_ignored_error(const MYSQL_THD thd); extern "C" void wsrep_thd_set_ignored_error(MYSQL_THD thd, my_bool val); -extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd); extern "C" void wsrep_report_bf_lock_wait(const THD *thd, unsigned long long trx_id); /* declare parallel applying unsafety for the THD */ diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index d15f2d271ca..0c98133ab5f 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -22,3 +22,5 @@ MDEV-26575 : MDEV-29878 Galera test failure on MDEV-26575 galera_bf_abort_shutdown : MDEV-29918 Assertion failure on galera_bf_abort_shutdown galera_wan : [ERROR] WSREP: /home/buildbot/buildbot/build/gcs/src/gcs_state_msg.cpp:gcs_state_msg_get_quorum():947: Failed to establish quorum. galera_var_ignore_apply_errors : 28: "Server did not transition to READY state" +MDEV-27713 : test is using get_lock(), which is now rejected in cluster +galera_bf_abort_group_commit : MDEV-30855 PR to remove the test exists diff --git a/mysql-test/suite/galera/r/MDEV-29293.result b/mysql-test/suite/galera/r/MDEV-29293.result new file mode 100644 index 00000000000..70c0cc84a31 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-29293.result @@ -0,0 +1,21 @@ +connection node_2; +connection node_1; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +set wsrep_sync_wait = 0; +CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=InnoDB; +INSERT INTO t1 VALUES (1,2); +connection node_1a; +BEGIN; +UPDATE t1 SET b=3 WHERE a=1; +connection node_1; +set debug_sync='wsrep_kill_before_awake_no_mutex SIGNAL before_kill WAIT_FOR continue'; +connection node_1b; +set debug_sync= 'now WAIT_FOR before_kill'; +connection node_2; +UPDATE t1 SET b=7 WHERE a=1; +connection node_1b; +set debug_sync= 'now SIGNAL continue'; +connection node_1; +DROP TABLE t1; +SET DEBUG_SYNC= 'RESET'; diff --git a/mysql-test/suite/galera/r/galera_create_table_as_select.result b/mysql-test/suite/galera/r/galera_create_table_as_select.result index 6f65ee99f0a..beda5f30fe2 100644 --- a/mysql-test/suite/galera/r/galera_create_table_as_select.result +++ b/mysql-test/suite/galera/r/galera_create_table_as_select.result @@ -82,6 +82,7 @@ connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; LOCK TABLE t2 WRITE; connection node_1; CREATE TABLE t1 AS SELECT * FROM t2;; +connection node_1a; connection node_2; SELECT COUNT(*) = 5 FROM t2; COUNT(*) = 5 diff --git a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result index 8495bfde2f9..9b1e8105c1c 100644 --- a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result +++ b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result @@ -134,6 +134,3 @@ connection node_1; call mtr.add_suppression("Error in Log_event::read_log_event():.*"); CALL mtr.add_suppression("conflict state 7 after post commit"); CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -connection node_2; -call mtr.add_suppression("Error in Log_event::read_log_event():.*"); -CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); diff --git a/mysql-test/suite/galera/r/galera_kill_group_commit.result b/mysql-test/suite/galera/r/galera_kill_group_commit.result new file mode 100644 index 00000000000..bb59ce1486f --- /dev/null +++ b/mysql-test/suite/galera/r/galera_kill_group_commit.result @@ -0,0 +1,27 @@ +connection node_2; +connection node_1; +connect node_1_kill, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET SESSION wsrep_sync_wait = 0; +connect node_1_follower, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET SESSION wsrep_sync_wait = 0; +connection node_1; +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; +SET SESSION DEBUG_SYNC = "commit_before_enqueue SIGNAL leader_before_enqueue_reached WAIT_FOR leader_before_enqueue_continue"; +INSERT INTO t1 VALUES (1); +connection node_1_ctrl; +SET DEBUG_SYNC = "now WAIT_FOR leader_before_enqueue_reached"; +connection node_1_follower; +INSERT INTO t1 VALUES (2);; +connection node_1_ctrl; +connection node_1_kill; +# Execute KILL QUERY for group commit follower +SET DEBUG_SYNC = "now SIGNAL leader_before_enqueue_continue"; +connection node_1_follower; +connection node_1; +SELECT * FROM t1; +f1 +1 +2 +SET DEBUG_SYNC = "RESET"; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/MDEV-29293.test b/mysql-test/suite/galera/t/MDEV-29293.test new file mode 100644 index 00000000000..dacbf714c06 --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-29293.test @@ -0,0 +1,41 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 +set wsrep_sync_wait = 0; + +CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=InnoDB; +INSERT INTO t1 VALUES (1,2); + +--connection node_1a +--let $victim_id = `SELECT CONNECTION_ID()` +BEGIN; +UPDATE t1 SET b=3 WHERE a=1; + +--connection node_1 +set debug_sync='wsrep_kill_before_awake_no_mutex SIGNAL before_kill WAIT_FOR continue'; +--disable_query_log +--disable_result_log +--send_eval KILL CONNECTION $victim_id +--enable_result_log +--enable_query_log + +--connection node_1b +set debug_sync= 'now WAIT_FOR before_kill'; + +--connection node_2 +UPDATE t1 SET b=7 WHERE a=1; + +--connection node_1b +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE User = 'system user' AND State LIKE 'Update_rows_log_event%'; +--source include/wait_condition.inc +set debug_sync= 'now SIGNAL continue'; + +--connection node_1 +--reap +DROP TABLE t1; +SET DEBUG_SYNC= 'RESET'; + diff --git a/mysql-test/suite/galera/t/galera_create_table_as_select.test b/mysql-test/suite/galera/t/galera_create_table_as_select.test index a6c1f657280..cfee63e5e27 100644 --- a/mysql-test/suite/galera/t/galera_create_table_as_select.test +++ b/mysql-test/suite/galera/t/galera_create_table_as_select.test @@ -113,6 +113,10 @@ LOCK TABLE t2 WRITE; --connection node_1 --send CREATE TABLE t1 AS SELECT * FROM t2; +--connection node_1a +--let $wait_condition = SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE STATE LIKE 'Waiting for table metadata lock%' +--source include/wait_condition.inc + --connection node_2 SELECT COUNT(*) = 5 FROM t2; CREATE TABLE t1 AS SELECT * FROM t2; @@ -121,7 +125,7 @@ CREATE TABLE t1 AS SELECT * FROM t2; UNLOCK TABLES; --connection node_1 ---error ER_TABLE_EXISTS_ERROR,ER_LOCK_DEADLOCK +--error ER_TABLE_EXISTS_ERROR,ER_QUERY_INTERRUPTED --reap DROP TABLE t1, t2; diff --git a/mysql-test/suite/galera/t/galera_kill_group_commit.cnf b/mysql-test/suite/galera/t/galera_kill_group_commit.cnf new file mode 100644 index 00000000000..60f4f776409 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_kill_group_commit.cnf @@ -0,0 +1,5 @@ +!include ../galera_2nodes.cnf + +[mysqld] +log-bin +log-slave-updates diff --git a/mysql-test/suite/galera/t/galera_kill_group_commit.test b/mysql-test/suite/galera/t/galera_kill_group_commit.test new file mode 100644 index 00000000000..4b84f2d90ef --- /dev/null +++ b/mysql-test/suite/galera/t/galera_kill_group_commit.test @@ -0,0 +1,69 @@ +# +# Verify that transaction which has reached group commit queue +# cannot be killed. If the kill succeeds, assertion for +# wsrep transaction state will fail. +# +# If the bug is present, i.e. wsrep transaction gets killed during +# group commit wait, this test is enough to reproduce the crash +# most of the time. +# + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_cluster.inc + +# Connection for KILL commands +--connect node_1_kill, 127.0.0.1, root, , test, $NODE_MYPORT_1 +# Connection for sync point control +--connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1 +SET SESSION wsrep_sync_wait = 0; +# Connection for group commit follower +--connect node_1_follower, 127.0.0.1, root, , test, $NODE_MYPORT_1 +# Need to disable sync wait to reach commit queue when leader +# is blocked. +SET SESSION wsrep_sync_wait = 0; +--let $follower_id = `SELECT CONNECTION_ID()` + +--connection node_1 +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; + +SET SESSION DEBUG_SYNC = "commit_before_enqueue SIGNAL leader_before_enqueue_reached WAIT_FOR leader_before_enqueue_continue"; +--send INSERT INTO t1 VALUES (1) + +--connection node_1_ctrl +SET DEBUG_SYNC = "now WAIT_FOR leader_before_enqueue_reached"; + +--connection node_1_follower +# SET SESSION DEBUG_SYNC = "group_commit_waiting_for_prior SIGNAL follower_waiting_for_prior_reached WAIT_FOR follower_waiting_for_prior_continue"; +--send INSERT INTO t1 VALUES (2); + +--connection node_1_ctrl +# TODO: Is it possible to use sync points to enforce group commit to happen? +# The leader will hold commit monitor in commit_before_enqueue sync point, +# which prevents the follower to reach the group commit wait state. +# We now sleep and expect the follower to reach group commit, but this +# may cause false negatives. +--sleep 1 + +--connection node_1_kill +--echo # Execute KILL QUERY for group commit follower +--disable_query_log +--disable_result_log +# Because it is currently impossible to verify that the +# follower has reached group commit queue, the KILL may +# sometimes return success. +--error 0,ER_KILL_DENIED_ERROR +--eval KILL QUERY $follower_id +--enable_result_log +--enable_query_log + +SET DEBUG_SYNC = "now SIGNAL leader_before_enqueue_continue"; +--connection node_1_follower +--reap + +--connection node_1 +--reap +SELECT * FROM t1; + +SET DEBUG_SYNC = "RESET"; +DROP TABLE t1; diff --git a/sql/handler.cc b/sql/handler.cc index eaaf4664c07..7f591b8456c 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -7599,6 +7599,9 @@ Compare_keys handler::compare_key_parts(const Field &old_field, concurrent accesses. And it's an overkill to take LOCK_plugin and iterate the whole installed_htons[] array every time. + @note Object victim_thd is not guaranteed to exist after this + function returns. + @param bf_thd brute force THD asking for the abort @param victim_thd victim THD to be aborted @@ -7612,6 +7615,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) if (!WSREP(bf_thd) && !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && wsrep_thd_is_toi(bf_thd))) { + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); DBUG_RETURN(0); } @@ -7623,6 +7628,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) else { WSREP_WARN("Cannot abort InnoDB transaction"); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); } DBUG_RETURN(0); diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 2d8eff2bd4b..ccce076d8a9 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -29,14 +29,17 @@ extern "C" my_bool wsrep_on(const THD *thd) extern "C" void wsrep_thd_LOCK(const THD *thd) { - mysql_mutex_lock(&thd->LOCK_thd_kill); mysql_mutex_lock(&thd->LOCK_thd_data); } +extern "C" int wsrep_thd_TRYLOCK(const THD *thd) +{ + return mysql_mutex_trylock(&thd->LOCK_thd_data); +} + extern "C" void wsrep_thd_UNLOCK(const THD *thd) { mysql_mutex_unlock(&thd->LOCK_thd_data); - mysql_mutex_unlock(&thd->LOCK_thd_kill); } extern "C" void wsrep_thd_kill_LOCK(const THD *thd) @@ -248,21 +251,12 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, if ((ret || !wsrep_on(victim_thd)) && signal) { - if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) - { - WSREP_DEBUG("victim is killed already by %llu, skipping awake", - victim_thd->wsrep_aborter); - wsrep_thd_UNLOCK(victim_thd); - return false; - } - - victim_thd->wsrep_aborter= bf_thd->thread_id; victim_thd->awake_no_mutex(KILL_QUERY_HARD); } else - WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd)); + WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", + thd_get_thread_id(victim_thd)); - wsrep_thd_UNLOCK(victim_thd); return ret; } @@ -385,25 +379,6 @@ extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd) return(global_system_variables.wsrep_OSU_method); } -extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) -{ - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); - if (!bf_thd) - { - victim_thd->wsrep_aborter= 0; - WSREP_DEBUG("wsrep_thd_set_wsrep_aborter resetting wsrep_aborter"); - return false; - } - if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) - { - return true; - } - victim_thd->wsrep_aborter= bf_thd->thread_id; - WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u", - victim_thd->wsrep_aborter); - return false; -} - extern "C" void wsrep_report_bf_lock_wait(const THD *thd, unsigned long long trx_id) { diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 95a777c75cf..b22b766c409 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1305,6 +1305,11 @@ void THD::init() wsrep_affected_rows = 0; m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID; wsrep_aborter = 0; + wsrep_abort_by_kill = NOT_KILLED; + wsrep_abort_by_kill_err = 0; +#ifndef DBUG_OFF + wsrep_killed_state = 0; +#endif /* DBUG_OFF */ wsrep_desynced_backup_stage= false; #endif /* WITH_WSREP */ @@ -1656,6 +1661,13 @@ void THD::reset_for_reuse() #endif #ifdef WITH_WSREP wsrep_free_status(this); + wsrep_cs().reset_error(); + wsrep_aborter= 0; + wsrep_abort_by_kill= NOT_KILLED; + wsrep_abort_by_kill_err= 0; +#ifndef DBUG_OFF + wsrep_killed_state= 0; +#endif /* DBUG_OFF */ #endif /* WITH_WSREP */ } @@ -1911,7 +1923,9 @@ void THD::awake_no_mutex(killed_state state_to_set) } /* Interrupt target waiting inside a storage engine. */ - if (state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this)) + if (state_to_set != NOT_KILLED && + IF_WSREP(!wsrep_is_bf_aborted(this) && wsrep_abort_by_kill == NOT_KILLED, + true)) ha_kill_query(this, thd_kill_level(this)); abort_current_cond_wait(false); @@ -2153,6 +2167,17 @@ void THD::reset_killed() mysql_mutex_unlock(&LOCK_thd_kill); } #ifdef WITH_WSREP + if (WSREP_NNULL(this)) + { + if (wsrep_abort_by_kill != NOT_KILLED) + { + mysql_mutex_assert_not_owner(&LOCK_thd_kill); + mysql_mutex_lock(&LOCK_thd_kill); + wsrep_abort_by_kill= NOT_KILLED; + wsrep_abort_by_kill_err= 0; + mysql_mutex_unlock(&LOCK_thd_kill); + } + } mysql_mutex_assert_not_owner(&LOCK_thd_data); mysql_mutex_lock(&LOCK_thd_data); wsrep_aborter= 0; diff --git a/sql/sql_class.h b/sql/sql_class.h index 68a69762354..c373c0f6a43 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -5147,7 +5147,14 @@ public: bool wsrep_ignore_table; /* thread who has started kill for this THD protected by LOCK_thd_data*/ my_thread_id wsrep_aborter; - + /* Kill signal used, if thread was killed by manual KILL. Protected by + LOCK_thd_kill. */ + std::atomic wsrep_abort_by_kill; + /* */ + struct err_info* wsrep_abort_by_kill_err; +#ifndef DBUG_OFF + int wsrep_killed_state; +#endif /* DBUG_OFF */ /* true if BF abort is observed in do_command() right after reading client's packet, and if the client has sent PS execute command. */ bool wsrep_delayed_BF_abort; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 4c7313265e8..59f21247445 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -7945,7 +7945,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit) { #ifdef ENABLED_DEBUG_SYNC - DBUG_EXECUTE_IF("sync.wsrep_retry_autocommit", + DBUG_EXECUTE_IF("sync.wsrep_retry_autocommit", { const char act[]= "now " @@ -9309,21 +9309,15 @@ kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type thd->security_ctx->user_matches(tmp->security_ctx)) #endif /* WITH_WSREP */ { + { #ifdef WITH_WSREP - DEBUG_SYNC(thd, "before_awake_no_mutex"); - if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id) - { - /* victim is in hit list already, bail out */ - WSREP_DEBUG("victim %lld has wsrep aborter: %lu, skipping awake()", - id, tmp->wsrep_aborter); - error= 0; - } - else + if (WSREP(tmp)) + { + /* Object tmp is not guaranteed to exist after wsrep_kill_thd() + returns, so do early return from this function. */ + DBUG_RETURN(wsrep_kill_thd(thd, tmp, kill_signal)); + } #endif /* WITH_WSREP */ - { - WSREP_DEBUG("kill_one_thread victim: %lld wsrep_aborter %lu" - " by signal %d", - id, tmp->wsrep_aborter, kill_signal); tmp->awake_no_mutex(kill_signal); error= 0; } @@ -9448,18 +9442,6 @@ static void sql_kill(THD *thd, my_thread_id id, killed_state state, killed_type type) { uint error; -#ifdef WITH_WSREP - if (WSREP(thd)) - { - WSREP_DEBUG("sql_kill called"); - if (thd->wsrep_applier) - { - WSREP_DEBUG("KILL in applying, bailing out here"); - return; - } - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) - } -#endif /* WITH_WSREP */ if (likely(!(error= kill_one_thread(thd, id, state, type)))) { if (!thd->killed) @@ -9469,13 +9451,6 @@ void sql_kill(THD *thd, my_thread_id id, killed_state state, killed_type type) } else my_error(error, MYF(0), id); -#ifdef WITH_WSREP - return; - wsrep_error_label: - error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : - ER_KILL_DENIED_ERROR); - my_error(error, MYF(0), (long long) id); -#endif /* WITH_WSREP */ } @@ -9484,18 +9459,6 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state) { uint error; ha_rows rows; -#ifdef WITH_WSREP - if (WSREP(thd)) - { - WSREP_DEBUG("sql_kill_user called"); - if (thd->wsrep_applier) - { - WSREP_DEBUG("KILL in applying, bailing out here"); - return; - } - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) - } -#endif /* WITH_WSREP */ switch (error= kill_threads_for_user(thd, user, state, &rows)) { case 0: @@ -9511,11 +9474,6 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state) default: my_error(error, MYF(0)); } -#ifdef WITH_WSREP - return; - wsrep_error_label: - my_error(ER_CANNOT_USER, MYF(0), user ? user->user.str : "NULL"); -#endif /* WITH_WSREP */ } diff --git a/sql/sql_plugin_services.inl b/sql/sql_plugin_services.inl index c6f07158003..8863f581afb 100644 --- a/sql/sql_plugin_services.inl +++ b/sql/sql_plugin_services.inl @@ -151,6 +151,7 @@ static struct wsrep_service_st wsrep_handler = { wsrep_on, wsrep_prepare_key_for_innodb, wsrep_thd_LOCK, + wsrep_thd_TRYLOCK, wsrep_thd_UNLOCK, wsrep_thd_query, wsrep_thd_retry_counter, @@ -177,7 +178,6 @@ static struct wsrep_service_st wsrep_handler = { wsrep_OSU_method_get, wsrep_thd_has_ignored_error, wsrep_thd_set_ignored_error, - wsrep_thd_set_wsrep_aborter, wsrep_report_bf_lock_wait, wsrep_thd_kill_LOCK, wsrep_thd_kill_UNLOCK, diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index ac14fc4597a..a67da77c472 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -56,6 +56,11 @@ my_bool wsrep_on(const THD *) void wsrep_thd_LOCK(const THD *) { } +int wsrep_thd_TRYLOCK(const THD *) +{ + return 0; +} + void wsrep_thd_UNLOCK(const THD *) { } @@ -148,8 +153,6 @@ void wsrep_thd_set_ignored_error(THD*, my_bool) { } ulong wsrep_OSU_method_get(const THD*) { return 0;} -bool wsrep_thd_set_wsrep_aborter(THD*, THD*) -{ return 0;} void wsrep_report_bf_lock_wait(const THD*, unsigned long long) diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 53ef20f3e78..0a2fa273723 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -510,6 +510,7 @@ int Wsrep_high_priority_service::log_dummy_write_set(const wsrep::ws_handle& ws_ m_thd->wait_for_prior_commit(); } + WSREP_DEBUG("checkpointing dummy write set %lld", ws_meta.seqno().get()); wsrep_set_SE_checkpoint(ws_meta.gtid(), wsrep_gtid_server.gtid()); if (!WSREP_EMULATE_BINLOG(m_thd)) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index db4a2a2e7b9..bf28f7fd39a 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2249,11 +2249,6 @@ static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len) case SQLCOM_DROP_TABLE: err= wsrep_drop_table_query(thd, buf, buf_len); break; - case SQLCOM_KILL: - WSREP_DEBUG("KILL as TOI: %s", thd->query()); - err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), - buf, buf_len); - break; case SQLCOM_CREATE_ROLE: if (sp_process_definer(thd)) { @@ -2675,8 +2670,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, /* Here we will call wsrep_abort_transaction so we should hold THD::LOCK_thd_data to protect victim from concurrent usage - and THD::LOCK_thd_kill to protect from disconnect or delete. */ - wsrep_thd_LOCK(granted_thd); + and THD::LOCK_thd_kill to protect from disconnect or delete. + + Note that all calls to wsrep_abort_thd() and ha_abort_transaction() + unlock LOCK_thd_kill for granted_thd, so granted_thd must not be + accessed after any of those calls. Moreover all other if branches + must release those locks. + */ + mysql_mutex_lock(&granted_thd->LOCK_thd_kill); + mysql_mutex_lock(&granted_thd->LOCK_thd_data); if (wsrep_thd_is_toi(granted_thd) || wsrep_thd_is_applying(granted_thd)) @@ -2685,22 +2687,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for SR in aborting state"); ticket->wsrep_report(wsrep_debug); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); } else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) { WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", schema, schema_len, request_thd, granted_thd); wsrep_abort_thd(request_thd, granted_thd, 1); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); unireg_abort(1); } } @@ -2709,7 +2711,8 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for FLUSH"); ticket->wsrep_report(wsrep_debug); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); } else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) { @@ -2717,8 +2720,6 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, wsrep_thd_transaction_state_str(granted_thd)); ticket->wsrep_report(wsrep_debug); wsrep_abort_thd(request_thd, granted_thd, 1); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2728,8 +2729,6 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, if (granted_thd->wsrep_trx().active()) { wsrep_abort_thd(request_thd, granted_thd, true); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2739,15 +2738,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, */ if (wsrep_thd_is_BF(request_thd, FALSE)) { + granted_thd->awake_no_mutex(KILL_QUERY_HARD); ha_abort_transaction(request_thd, granted_thd, TRUE); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { WSREP_MDL_LOG(INFO, "MDL unknown BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); unireg_abort(1); } } @@ -2763,17 +2763,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, static bool abort_replicated(THD *thd) { bool ret_code= false; + wsrep_thd_kill_LOCK(thd); wsrep_thd_LOCK(thd); if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) { WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); - (void)wsrep_abort_thd(thd, thd, TRUE); + wsrep_abort_thd(thd, thd, TRUE); ret_code= true; } else + { + /* wsrep_abort_thd() above releases LOCK_thd_data and LOCK_thd_kill, so + must do it here too. */ wsrep_thd_UNLOCK(thd); - + wsrep_thd_kill_UNLOCK(thd); + } return ret_code; } diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index ac7226b9948..a1b96a60672 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -143,9 +143,13 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se wsrep_delete_threadvars(); } -void Wsrep_server_service::background_rollback(wsrep::client_state& client_state) +void Wsrep_server_service::background_rollback( + wsrep::unique_lock &lock WSREP_UNUSED, + wsrep::client_state &client_state) { - Wsrep_client_state& cs= static_cast(client_state); + DBUG_ASSERT(lock.owns_lock()); + Wsrep_client_state &cs= static_cast(client_state); + mysql_mutex_assert_owner(&cs.thd()->LOCK_thd_data); wsrep_fire_rollbacker(cs.thd()); } diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h index 168e98206e3..0fc48402024 100644 --- a/sql/wsrep_server_service.h +++ b/sql/wsrep_server_service.h @@ -46,7 +46,8 @@ public: void release_high_priority_service(wsrep::high_priority_service*); - void background_rollback(wsrep::client_state&); + void background_rollback(wsrep::unique_lock &, + wsrep::client_state &); void bootstrap(); void log_message(enum wsrep::log::level, const char*); diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index ccb32fb13af..d05ddcbae16 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -307,50 +307,9 @@ void wsrep_fire_rollbacker(THD *thd) } } - -int wsrep_abort_thd(THD *bf_thd, - THD *victim_thd, - my_bool signal) +static bool wsrep_bf_abort_low(THD *bf_thd, THD *victim_thd) { - DBUG_ENTER("wsrep_abort_thd"); - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); - - /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd) - might not be true. - */ - if ((WSREP(bf_thd) || - ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && - wsrep_thd_is_toi(bf_thd))) && - !wsrep_thd_is_aborting(victim_thd)) - { - WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", - (long long)bf_thd->real_id, (long long)victim_thd->real_id); - ha_abort_transaction(bf_thd, victim_thd, signal); - } - else - { - WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu " - "wsrep %d wsrep_on %d RSU %d TOI %d aborting %d", - (long long)bf_thd->real_id, (long long)victim_thd->real_id, - WSREP_NNULL(bf_thd), WSREP_ON, - bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, - wsrep_thd_is_toi(bf_thd), - wsrep_thd_is_aborting(victim_thd)); - wsrep_thd_UNLOCK(victim_thd); - } - - DBUG_RETURN(1); -} - -bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) -{ - WSREP_LOG_THD(bf_thd, "BF aborter before"); - WSREP_LOG_THD(victim_thd, "victim before"); - - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("sync.wsrep_bf_abort", @@ -364,6 +323,88 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) };); #endif + wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); + bool ret; + + { + /* Adopt the lock, it is being held by the caller. */ + Wsrep_mutex wsm{&victim_thd->LOCK_thd_data}; + wsrep::unique_lock lock{wsm, std::adopt_lock}; + + if (wsrep_thd_is_toi(bf_thd)) + { + ret= victim_thd->wsrep_cs().total_order_bf_abort(lock, bf_seqno); + } + else + { + DBUG_ASSERT(WSREP(victim_thd) ? victim_thd->wsrep_trx().active() : 1); + ret= victim_thd->wsrep_cs().bf_abort(lock, bf_seqno); + } + if (ret) + { + /* BF abort should be allowed only once by wsrep-lib.*/ + DBUG_ASSERT(victim_thd->wsrep_aborter == 0); + victim_thd->wsrep_aborter= bf_thd->thread_id; + wsrep_bf_aborts_counter++; + } + lock.release(); /* No unlock at the end of the scope. */ + } + + /* Sanity check for wsrep-lib calls to return with LOCK_thd_data held. */ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + + return ret; +} + + +void wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) +{ + DBUG_ENTER("wsrep_abort_thd"); + + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + + /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd) + might not be true. + */ + if ((WSREP(bf_thd) + || ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) + && wsrep_thd_is_toi(bf_thd)) + || bf_thd->lex->sql_command == SQLCOM_KILL) + && !wsrep_thd_is_aborting(victim_thd) && + wsrep_bf_abort_low(bf_thd, victim_thd) && + !victim_thd->wsrep_cs().is_rollbacker_active()) + { + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", + (long long)bf_thd->real_id, (long long)victim_thd->real_id); + victim_thd->awake_no_mutex(KILL_QUERY_HARD); + ha_abort_transaction(bf_thd, victim_thd, signal); + } + else + { + WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu " + "wsrep %d wsrep_on %d RSU %d TOI %d aborting %d", + (long long)bf_thd->real_id, (long long)victim_thd->real_id, + WSREP_NNULL(bf_thd), WSREP_ON, + bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, + wsrep_thd_is_toi(bf_thd), + wsrep_thd_is_aborting(victim_thd)); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); + } + + DBUG_VOID_RETURN; +} + +bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) +{ + WSREP_LOG_THD(bf_thd, "BF aborter before"); + WSREP_LOG_THD(victim_thd, "victim before"); + + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) { WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); @@ -385,32 +426,81 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) mysql_mutex_lock(&victim_thd->LOCK_thd_data); } - bool ret; - wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); + return wsrep_bf_abort_low(bf_thd, victim_thd); +} - if (wsrep_thd_is_toi(bf_thd)) +uint wsrep_kill_thd(THD *thd, THD *victim_thd, killed_state kill_signal) +{ + DBUG_ENTER("wsrep_kill_thd"); + DBUG_ASSERT(WSREP(victim_thd)); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + using trans= wsrep::transaction; + auto trx_state= victim_thd->wsrep_trx().state(); +#ifndef DBUG_OFF + victim_thd->wsrep_killed_state= trx_state; +#endif /* DBUG_OFF */ + /* + Already killed or in commit codepath. Mark the victim as killed, + the killed status will be restored in wsrep_after_commit() and + will be processed after the commit is over. In case of multiple + KILLs happened on commit codepath, the last one will be effective. + */ + if (victim_thd->wsrep_abort_by_kill || + trx_state == trans::s_preparing || + trx_state == trans::s_committing || + trx_state == trans::s_ordered_commit) { - /* Here we enter wsrep-lib were LOCK_thd_data will be acquired, - thus we need to release it. However, we can still hold - LOCK_thd_kill to protect from disconnect or delete. */ + victim_thd->wsrep_abort_by_kill= kill_signal; mysql_mutex_unlock(&victim_thd->LOCK_thd_data); - ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); - mysql_mutex_lock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); + DBUG_RETURN(0); } - else + /* + Mark killed victim_thd with kill_signal so that awake_no_mutex does + not dive into storage engine. We use ha_abort_transaction() + to do the storage engine part for wsrep THDs. + */ + DEBUG_SYNC(thd, "wsrep_kill_before_awake_no_mutex"); + victim_thd->wsrep_abort_by_kill= kill_signal; + victim_thd->awake_no_mutex(kill_signal); + /* ha_abort_transaction() releases tmp->LOCK_thd_kill, so tmp + is not safe to access anymore. */ + ha_abort_transaction(thd, victim_thd, 1); + DBUG_RETURN(0); +} + +void wsrep_backup_kill_for_commit(THD *thd) +{ + DBUG_ASSERT(WSREP(thd)); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); + DBUG_ASSERT(thd->killed != NOT_KILLED); + mysql_mutex_lock(&thd->LOCK_thd_data); + /* If the transaction will roll back, keep the killed state. + For must replay, the replay will happen in different THD context + which is high priority and cannot be killed. The owning thread will + pick the killed state in after statement processing. */ + if (thd->wsrep_trx().state() != wsrep::transaction::s_cert_failed && + thd->wsrep_trx().state() != wsrep::transaction::s_must_abort && + thd->wsrep_trx().state() != wsrep::transaction::s_aborting && + thd->wsrep_trx().state() != wsrep::transaction::s_must_replay) { - /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib - were LOCK_thd_data will be acquired and later LOCK_thd_kill - thus we need to release them. */ - wsrep_thd_UNLOCK(victim_thd); - ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); - wsrep_thd_LOCK(victim_thd); + thd->wsrep_abort_by_kill= thd->killed; + thd->wsrep_abort_by_kill_err= thd->killed_err; + thd->killed= NOT_KILLED; + thd->killed_err= 0; } - if (ret) - { - wsrep_bf_aborts_counter++; - } - return ret; + mysql_mutex_unlock(&thd->LOCK_thd_data); +} + +void wsrep_restore_kill_after_commit(THD *thd) +{ + DBUG_ASSERT(WSREP(thd)); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); + thd->killed= thd->wsrep_abort_by_kill; + thd->killed_err= thd->wsrep_abort_by_kill_err; + thd->wsrep_abort_by_kill= NOT_KILLED; + thd->wsrep_abort_by_kill_err= 0; } int wsrep_create_threadvars() diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index 0ce612d6097..6f5a70a30a4 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -88,10 +88,44 @@ bool wsrep_create_appliers(long threads, bool mutex_protected=false); void wsrep_create_rollbacker(); bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd); -int wsrep_abort_thd(THD *bf_thd, +/* + Abort transaction for victim_thd. This function is called from + MDL BF abort codepath. + + @note This thread unlocks victim_thd->LOCK_thd_kill, so accessing + victim_thd after the function returns is not safe anymore. +*/ +void wsrep_abort_thd(THD *bf_thd, THD *victim_thd, my_bool signal) __attribute__((nonnull(1,2))); +/** + Kill wsrep connection with kill_signal. Object thd is not + guaranteed to exist anymore when this function returns. + + Asserts that the caller holds victim_thd->LOCK_thd_kill, + victim_thd->LOCK_thd_data. + + Releases victim_thd->LOCK_thd_kill, victim_thd->LOCK_thd_data. + + @param thd THD object for connection that executes the KILL. + @param victim_thd THD object for connection to be killed. + @param kill_signal Kill signal. + + @return Zero if the kill was successful, otherwise non-zero error code. + */ +uint wsrep_kill_thd(THD *thd, THD *victim_thd, killed_state kill_signal); + +/* + Backup kill status for commit. + */ +void wsrep_backup_kill_for_commit(THD *); + +/* + Restore KILL status after commit. + */ +void wsrep_restore_kill_after_commit(THD *); + /* Helper methods to deal with thread local storage. The purpose of these methods is to hide the details of thread diff --git a/sql/wsrep_trans_observer.h b/sql/wsrep_trans_observer.h index 8f998244ee6..40fbf80cfb3 100644 --- a/sql/wsrep_trans_observer.h +++ b/sql/wsrep_trans_observer.h @@ -256,6 +256,11 @@ static inline int wsrep_before_prepare(THD* thd, bool all) thd->wsrep_trx().ws_meta().gtid(), wsrep_gtid_server.gtid()); } + + mysql_mutex_lock(&thd->LOCK_thd_kill); + if (thd->killed) wsrep_backup_kill_for_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + DBUG_RETURN(ret); } @@ -323,6 +328,11 @@ static inline int wsrep_before_commit(THD* thd, bool all) wsrep_gtid_server.gtid()); wsrep_register_for_group_commit(thd); } + + mysql_mutex_lock(&thd->LOCK_thd_kill); + if (thd->killed) wsrep_backup_kill_for_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + DBUG_RETURN(ret); } @@ -341,7 +351,8 @@ static inline int wsrep_before_commit(THD* thd, bool all) static inline int wsrep_ordered_commit(THD* thd, bool all) { DBUG_ENTER("wsrep_ordered_commit"); - WSREP_DEBUG("wsrep_ordered_commit: %d", wsrep_is_real(thd, all)); + WSREP_DEBUG("wsrep_ordered_commit: %d %lld", wsrep_is_real(thd, all), + (long long) wsrep_thd_trx_seqno(thd)); DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); DBUG_RETURN(thd->wsrep_cs().ordered_commit()); } @@ -449,10 +460,18 @@ int wsrep_after_statement(THD* thd) wsrep::to_c_string(thd->wsrep_cs().state()), wsrep::to_c_string(thd->wsrep_cs().mode()), wsrep::to_c_string(thd->wsrep_cs().transaction().state())); - DBUG_RETURN((thd->wsrep_cs().state() != wsrep::client_state::s_none && + int ret= ((thd->wsrep_cs().state() != wsrep::client_state::s_none && thd->wsrep_cs().mode() == Wsrep_client_state::m_local) && !thd->internal_transaction() ? thd->wsrep_cs().after_statement() : 0); + + if (wsrep_is_active(thd)) + { + mysql_mutex_lock(&thd->LOCK_thd_kill); + wsrep_restore_kill_after_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + } + DBUG_RETURN(ret); } static inline void wsrep_after_apply(THD* thd) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f7dd18e0e36..98385e3b5dd 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18709,50 +18709,6 @@ static struct st_mysql_storage_engine innobase_storage_engine= #ifdef WITH_WSREP -static -void -wsrep_kill_victim( - MYSQL_THD const bf_thd, - MYSQL_THD thd, - trx_t* victim_trx, - my_bool signal) -{ - DBUG_ENTER("wsrep_kill_victim"); - - /* Mark transaction as a victim for Galera abort */ - victim_trx->lock.was_chosen_as_wsrep_victim= true; - if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) - { - WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); - wsrep_thd_UNLOCK(thd); - DBUG_VOID_RETURN; - } - - if (wsrep_thd_bf_abort(bf_thd, thd, signal)) - { - lock_t* wait_lock= victim_trx->lock.wait_lock; - if (wait_lock) - { - DBUG_ASSERT(victim_trx->is_wsrep()); - WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - lock_cancel_waiting_and_release(wait_lock); - } - } - else - { - wsrep_thd_LOCK(thd); - victim_trx->lock.was_chosen_as_wsrep_victim= false; - wsrep_thd_set_wsrep_aborter(NULL, thd); - wsrep_thd_UNLOCK(thd); - - WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive", - thd_get_thread_id(thd)); - } - - DBUG_VOID_RETURN; -} - /** This function is used to kill one transaction. This transaction was open on this node (not-yet-committed), and a @@ -18799,10 +18755,45 @@ wsrep_innobase_kill_one_trx( DBUG_VOID_RETURN; } - /* Here we need to lock THD::LOCK_thd_data to protect from - concurrent usage or disconnect or delete. */ + /* Grab reference to victim_trx before releasing the mutex, this will + prevent victim to release locks or commit while the mutex is + unlocked. The state may change to TRX_STATE_COMMITTED_IN_MEMORY. + See skip_lock_inheritance_n_ref in trx0trx.h. */ + const trx_id_t victim_trx_id= victim_trx->id; +retry_lock: + victim_trx->reference(); + trx_mutex_exit(victim_trx); + DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock"); - wsrep_thd_LOCK(thd); + wsrep_thd_kill_LOCK(thd); + /* + There is now a cycle + + trx reference + -> LOCK_commit_order + -> LOCK_thd_data + -> trx reference + + which may prevent the transaction committing because reference was grabbed + above. Try to lock LOCK_thd_data, and if not successul, enter the + trx mutex again to release the reference and try again. + */ + if (wsrep_thd_TRYLOCK(thd)) + { + wsrep_thd_kill_UNLOCK(thd); + trx_mutex_enter(victim_trx); + victim_trx->release_reference(); + if (victim_trx_id != victim_trx->id || + victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY || + victim_trx->state == TRX_STATE_NOT_STARTED) + { + WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim committed in memory"); + DBUG_VOID_RETURN; + } + goto retry_lock; + } + + DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock"); WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); @@ -18833,7 +18824,31 @@ wsrep_innobase_kill_one_trx( wsrep_thd_transaction_state_str(thd), wsrep_thd_query(thd)); - wsrep_kill_victim(bf_thd, thd, victim_trx, signal); + const bool success= wsrep_thd_bf_abort(bf_thd, thd, signal); + + wsrep_thd_UNLOCK(thd); + wsrep_thd_kill_UNLOCK(thd); + trx_mutex_enter(victim_trx); + + if (success && victim_trx->state == TRX_STATE_ACTIVE) + { + lock_t* wait_lock= victim_trx->lock.wait_lock; + if (wait_lock) + { + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + DBUG_ASSERT(victim_trx->is_wsrep()); + WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); + lock_cancel_waiting_and_release(wait_lock); + } + } + else + { + victim_trx->lock.was_chosen_as_wsrep_victim= false; + WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive", + thd_get_thread_id(thd)); + } + victim_trx->release_reference(); + DBUG_VOID_RETURN; } @@ -18854,42 +18869,61 @@ wsrep_abort_transaction( THD *victim_thd, my_bool signal) { - /* Note that victim thd is protected with - THD::LOCK_thd_data and THD::LOCK_thd_kill here. */ + /* Unlock LOCK_thd_kill and LOCK_thd_data temporarily to grab mutexes + in the right order: + lock_sys.mutex + LOCK_thd_kill + LOCK_thd_data + trx.mutex + */ trx_t* victim_trx= thd_to_trx(victim_thd); - trx_t* bf_trx= thd_to_trx(bf_thd); - WSREP_DEBUG("wsrep_abort_transaction: BF:" - " thread %ld client_state %s client_mode %s" - " trans_state %s query %s trx " TRX_ID_FMT, - thd_get_thread_id(bf_thd), - wsrep_thd_client_state_str(bf_thd), - wsrep_thd_client_mode_str(bf_thd), - wsrep_thd_transaction_state_str(bf_thd), - wsrep_thd_query(bf_thd), - bf_trx ? bf_trx->id : 0); + trx_id_t victim_trx_id= victim_trx ? victim_trx->id : 0; + wsrep_thd_UNLOCK(victim_thd); + wsrep_thd_kill_UNLOCK(victim_thd); + /* After this point must use find_thread_by_id() if victim_thd + is needed again. */ - WSREP_DEBUG("wsrep_abort_transaction: victim:" - " thread %ld client_state %s client_mode %s" - " trans_state %s query %s trx " TRX_ID_FMT, - thd_get_thread_id(victim_thd), - wsrep_thd_client_state_str(victim_thd), - wsrep_thd_client_mode_str(victim_thd), - wsrep_thd_transaction_state_str(victim_thd), - wsrep_thd_query(victim_thd), - victim_trx ? victim_trx->id : 0); - - if (victim_trx) + /* Victim didn't have active RW transaction. Note that tere is a possible + race when the victim transaction is just starting write operation + as is still read only. This however will be resolved eventually since + all the possible blocking transactions are also BF aborted, + and the victim will find that it was BF aborted on server level after + the write operation in InnoDB completes. */ + if (!victim_trx_id) { - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF( + "sync.wsrep_abort_transaction_read_only", + {const char act[]= + "now " + "SIGNAL sync.wsrep_abort_transaction_read_only_reached " + "WAIT_FOR signal.wsrep_abort_transaction_read_only"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC*/ + return; + } + lock_mutex_enter(); + + /* Check if victim trx still exists. */ + /* Note based on comment on trx0sys.h only ACTIVE or PREPARED trx + objects may participate in hash. However, transaction may get committed + before this method returns. */ + if(!(victim_trx= trx_sys.find(nullptr, victim_trx_id, true))) { + WSREP_DEBUG("wsrep_abort_transaction: Victim trx does not exist anymore"); lock_mutex_exit(); - trx_mutex_exit(victim_trx); + return; } - else - { - wsrep_thd_bf_abort(bf_thd, victim_thd, signal); + trx_mutex_enter(victim_trx); + + if (victim_trx->state == TRX_STATE_ACTIVE && victim_trx->lock.wait_lock) { + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(victim_trx->lock.wait_lock); } + + trx_mutex_exit(victim_trx); + victim_trx->release_reference(); + lock_mutex_exit(); } static diff --git a/wsrep-lib b/wsrep-lib index 4951c383577..e238c0d240c 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit 4951c38357737d568b554402bc5b6abe88a38fe1 +Subproject commit e238c0d240c2557229b0523a4a032f3cf8b41639