MDEV-33133: MDL conflict handling code should skip BF-aborted trxs
It's possible that MDL conflict handling code is called more than once for a transaction when: - it holds more than one conflicting MDL lock - reschedule_waiters() is executed, which results in repeated attempts to BF-abort already aborted transaction. In such situations, it might be that BF-aborting logic sees a partially rolled back transaction and erroneously decides on future actions for such a transaction. The specific situation tested and fixed is when a SR transaction applied in the node gets BF-aborted by a started TOI operation. It's then caught with the server transaction already rolled back, but with no MDL locks yet released. This caused wrong state detection for such a transaction during repeated MDL conflict handling code execution. Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
parent
7e748d075b
commit
235f33e360
34
mysql-test/suite/galera/r/MDEV-33133.result
Normal file
34
mysql-test/suite/galera/r/MDEV-33133.result
Normal file
@ -0,0 +1,34 @@
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1;
|
||||
connection node_1;
|
||||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
|
||||
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
|
||||
connection node_2;
|
||||
SET SESSION wsrep_trx_fragment_size = 1;
|
||||
START TRANSACTION;
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection node_1a;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
0
|
||||
SET SESSION wsrep_retry_autocommit = 0;
|
||||
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
|
||||
INSERT INTO t1 VALUES (2);
|
||||
connection node_1;
|
||||
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
|
||||
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
|
||||
TRUNCATE TABLE t1;
|
||||
connection node_1a;
|
||||
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
|
||||
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
|
||||
connection node_2;
|
||||
INSERT INTO t1 VALUES (3);
|
||||
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
|
||||
connection node_1;
|
||||
SET GLOBAL DEBUG_DBUG = '';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
DROP TABLE t1;
|
||||
disconnect node_1a;
|
||||
disconnect node_2;
|
||||
disconnect node_1;
|
80
mysql-test/suite/galera/t/MDEV-33133.test
Normal file
80
mysql-test/suite/galera/t/MDEV-33133.test
Normal file
@ -0,0 +1,80 @@
|
||||
#
|
||||
# MDEV-33133: MDL conflict handling code should skip transactions
|
||||
# BF-aborted before.
|
||||
#
|
||||
# It's possible that MDL conflict handling code is called more
|
||||
# than once for a transaction when:
|
||||
# - it holds more than one conflicting MDL lock
|
||||
# - reschedule_waiters() is executed,
|
||||
# which results in repeated attempts to BF-abort already aborted
|
||||
# transaction.
|
||||
# In such situations, it might be that BF-aborting logic sees
|
||||
# a partially rolled back transaction and erroneously decides
|
||||
# on future actions for such a transaction.
|
||||
#
|
||||
# The specific situation tested and fixed is when a SR transaction
|
||||
# applied in the node gets BF-aborted by a started TOI operation.
|
||||
# It's then caught with the server transaction already rolled back,
|
||||
# but with no MDL locks yet released. This caused wrong state
|
||||
# detection for such a transaction during repeated MDL conflict
|
||||
# handling code execution.
|
||||
#
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--source include/have_debug.inc
|
||||
|
||||
--connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1
|
||||
|
||||
--connection node_1
|
||||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
|
||||
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
|
||||
|
||||
--connection node_2
|
||||
SET SESSION wsrep_trx_fragment_size = 1;
|
||||
START TRANSACTION;
|
||||
INSERT INTO t1 VALUES (1);
|
||||
|
||||
--connection node_1a
|
||||
# Sync wait for SR transaction to replicate and apply fragment.
|
||||
SELECT COUNT(*) FROM t1;
|
||||
SET SESSION wsrep_retry_autocommit = 0;
|
||||
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
|
||||
--send
|
||||
INSERT INTO t1 VALUES (2);
|
||||
|
||||
--connection node_1
|
||||
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
|
||||
# BF-abort SR transaction and wait until it reaches the point
|
||||
# prior to release MDL locks.
|
||||
# Then abort local INSERT, which will go through rescedule_waiters()
|
||||
# and see SR transaction holding MDL locks but already rolled back.
|
||||
# In this case SR transaction should be skipped in MDL conflict
|
||||
# handling code.
|
||||
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
|
||||
--send
|
||||
TRUNCATE TABLE t1;
|
||||
|
||||
--connection node_1a
|
||||
# Local INSERT gets aborted.
|
||||
--error ER_LOCK_DEADLOCK
|
||||
--reap
|
||||
# Let the aborted SR transaction continue and finally release MDL locks,
|
||||
# which in turn allows TRUNCATE to complete.
|
||||
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
|
||||
|
||||
--connection node_2
|
||||
# SR transaction has been BF-aborted.
|
||||
--error ER_LOCK_DEADLOCK
|
||||
INSERT INTO t1 VALUES (3);
|
||||
|
||||
--connection node_1
|
||||
# TRUNCATE completes.
|
||||
--reap
|
||||
|
||||
# Cleanup
|
||||
SET GLOBAL DEBUG_DBUG = '';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
DROP TABLE t1;
|
||||
--disconnect node_1a
|
||||
--source include/galera_end.inc
|
@ -2,7 +2,6 @@
|
||||
--source include/big_test.inc
|
||||
--source include/force_restart.inc
|
||||
|
||||
|
||||
#
|
||||
# Testing gtid consistency in 3 node cluster when nodes drop
|
||||
# and join back to cluster.
|
||||
@ -378,4 +377,3 @@ DROP TABLE t3;
|
||||
--disconnect node_2b
|
||||
--disconnect node_1b
|
||||
--disconnect node_1c
|
||||
|
||||
|
@ -392,6 +392,18 @@ int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle,
|
||||
wsrep_thd_transaction_state_str(m_thd),
|
||||
m_thd->killed);
|
||||
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
DBUG_EXECUTE_IF("sync.wsrep_rollback_mdl_release",
|
||||
{
|
||||
const char act[]=
|
||||
"now "
|
||||
"SIGNAL sync.wsrep_rollback_mdl_release_reached "
|
||||
"WAIT_FOR signal.wsrep_rollback_mdl_release";
|
||||
DBUG_ASSERT(!debug_sync_set_action(m_thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
#endif
|
||||
|
||||
m_thd->release_transactional_locks();
|
||||
|
||||
free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC));
|
||||
|
@ -2747,8 +2747,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
|
||||
mysql_mutex_lock(&granted_thd->LOCK_thd_kill);
|
||||
mysql_mutex_lock(&granted_thd->LOCK_thd_data);
|
||||
|
||||
if (wsrep_thd_is_toi(granted_thd) ||
|
||||
wsrep_thd_is_applying(granted_thd))
|
||||
if (granted_thd->wsrep_aborter != 0)
|
||||
{
|
||||
DBUG_ASSERT(granted_thd->wsrep_aborter == request_thd->thread_id);
|
||||
WSREP_DEBUG("BF thread waiting for a victim to release locks");
|
||||
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
|
||||
mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
|
||||
}
|
||||
else if (wsrep_thd_is_toi(granted_thd) ||
|
||||
wsrep_thd_is_applying(granted_thd))
|
||||
{
|
||||
if (wsrep_thd_is_aborting(granted_thd))
|
||||
{
|
||||
@ -2824,6 +2831,8 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
|
||||
{
|
||||
mysql_mutex_unlock(&request_thd->LOCK_thd_data);
|
||||
}
|
||||
|
||||
DEBUG_SYNC(request_thd, "after_wsrep_thd_abort");
|
||||
}
|
||||
|
||||
/**/
|
||||
|
Loading…
x
Reference in New Issue
Block a user