MDEV-33798: ROW base optimistic deadlock with concurrent writes on same table
One case is conflicting transactions T1 and T2 with different domain id, in optimistic parallel replication in non-GTID mode. Then T2 will wait_for_prior_commit on T1; and if T1 got a row lock wait on T2 it would hang, as different domains caused the deadlock kill to be skipped in thd_rpl_deadlock_check(). More generally, if we have transactions T1 and T2 in one domain/master connection, and independent transactions U in another, then we can still deadlock like this: T1 row low wait on U U row lock wait on T2 T2 wait_for_prior_commit on T1 This commit enforces the deadlock kill in these cases. If the waited-for transaction is speculatively applied, then it will be deadlock killed in case of a conflict, even if the two transactions are in different domains or master connections. Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
parent
90b95c6149
commit
e365877bae
143
mysql-test/suite/rpl/r/rpl_mdev33798.result
Normal file
143
mysql-test/suite/rpl/r/rpl_mdev33798.result
Normal file
@ -0,0 +1,143 @@
|
||||
include/rpl_init.inc [topology=1->2,1->3]
|
||||
connect server_2b,127.0.0.1,root,,,$SERVER_MYPORT_2;
|
||||
connection server_2;
|
||||
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode= @@GLOBAL.slave_parallel_mode;
|
||||
SET @old_timeout= @@GLOBAL.lock_wait_timeout;
|
||||
SET @old_innodb_timeout= @@GLOBAL.innodb_lock_wait_timeout;
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=5;
|
||||
set global slave_parallel_mode= aggressive;
|
||||
SET GLOBAL lock_wait_timeout= 86400;
|
||||
SET GLOBAL innodb_lock_wait_timeout= 86400;
|
||||
SET STATEMENT sql_log_bin=0 FOR ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0);
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
connection server_2b;
|
||||
BEGIN;
|
||||
SELECT * FROM t1 WHERE a=1 FOR UPDATE;
|
||||
a b
|
||||
1 0
|
||||
SELECT * FROM t1 WHERE a=5 FOR UPDATE;
|
||||
a b
|
||||
5 0
|
||||
connection server_1;
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=1 WHERE a=1;
|
||||
UPDATE t1 SET b=1 WHERE a=7;
|
||||
COMMIT;
|
||||
UPDATE t1 SET b=2 WHERE a=3;
|
||||
SET SESSION gtid_domain_id=2;
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=3 WHERE a=5;
|
||||
UPDATE t1 SET b=3 WHERE a=3;
|
||||
COMMIT;
|
||||
UPDATE t1 SET b=4 WHERE a=7;
|
||||
SET SESSION gtid_domain_id= 0;
|
||||
include/save_master_gtid.inc
|
||||
connection server_2;
|
||||
include/start_slave.inc
|
||||
connection server_2b;
|
||||
ROLLBACK;
|
||||
connection server_2;
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT a, (
|
||||
(a=1 AND b=1) OR
|
||||
(a=3 AND (b=2 OR b=3)) OR
|
||||
(a=5 AND b=3) OR
|
||||
(a=7 AND (b=1 OR b=4)) OR
|
||||
((a MOD 2)=0 AND b=0)) AS `ok`
|
||||
FROM t1
|
||||
ORDER BY a;
|
||||
a ok
|
||||
1 1
|
||||
2 1
|
||||
3 1
|
||||
4 1
|
||||
5 1
|
||||
6 1
|
||||
7 1
|
||||
8 1
|
||||
connection server_3;
|
||||
include/sync_with_master_gtid.inc
|
||||
include/stop_slave.inc
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
CHANGE MASTER 'm2' to master_port=MYPORT_3 , master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
|
||||
connection server_1;
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=11 WHERE a=1;
|
||||
UPDATE t1 SET b=11 WHERE a=7;
|
||||
COMMIT;
|
||||
UPDATE t1 SET b=12 WHERE a=3;
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
connection server_3;
|
||||
SET SESSION gtid_domain_id=3;
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=13 WHERE a=5;
|
||||
UPDATE t1 SET b=13 WHERE a=3;
|
||||
COMMIT;
|
||||
UPDATE t1 SET b=14 WHERE a=7;
|
||||
include/save_master_gtid.inc
|
||||
connection server_2b;
|
||||
BEGIN;
|
||||
SELECT * FROM t1 WHERE a=1 FOR UPDATE;
|
||||
a b
|
||||
1 1
|
||||
SELECT * FROM t1 WHERE a=5 FOR UPDATE;
|
||||
a b
|
||||
5 3
|
||||
START ALL SLAVES;
|
||||
Warnings:
|
||||
Note 1937 SLAVE 'm2' started
|
||||
Note 1937 SLAVE '' started
|
||||
connection server_2b;
|
||||
ROLLBACK;
|
||||
connection server_1;
|
||||
include/save_master_gtid.inc
|
||||
connection server_2;
|
||||
include/sync_with_master_gtid.inc
|
||||
connection server_3;
|
||||
include/save_master_gtid.inc
|
||||
connection server_2;
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT a, (
|
||||
(a=1 AND b=11) OR
|
||||
(a=3 AND (b=12 OR b=13)) OR
|
||||
(a=5 AND b=13) OR
|
||||
(a=7 AND (b=11 OR b=14)) OR
|
||||
((a MOD 2)=0 AND b=0)) AS `ok`
|
||||
FROM t1
|
||||
ORDER BY a;
|
||||
a ok
|
||||
1 1
|
||||
2 1
|
||||
3 1
|
||||
4 1
|
||||
5 1
|
||||
6 1
|
||||
7 1
|
||||
8 1
|
||||
SET default_master_connection = 'm2';
|
||||
include/stop_slave.inc
|
||||
RESET SLAVE 'm2' ALL;
|
||||
SET default_master_connection = '';
|
||||
connection server_3;
|
||||
include/start_slave.inc
|
||||
disconnect server_2b;
|
||||
connection server_1;
|
||||
DROP TABLE t1;
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
set global slave_parallel_mode= @old_parallel_mode;
|
||||
SET GLOBAL lock_wait_timeout= @old_timeout;
|
||||
SET GLOBAL innodb_lock_wait_timeout= @old_innodb_timeout;
|
||||
include/start_slave.inc
|
||||
include/rpl_end.inc
|
17
mysql-test/suite/rpl/t/rpl_mdev33798.cnf
Normal file
17
mysql-test/suite/rpl/t/rpl_mdev33798.cnf
Normal file
@ -0,0 +1,17 @@
|
||||
!include suite/rpl/my.cnf
|
||||
|
||||
[mysqld.1]
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
|
||||
[mysqld.2]
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
|
||||
[mysqld.3]
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
|
||||
[ENV]
|
||||
SERVER_MYPORT_3= @mysqld.3.port
|
||||
SERVER_MYSOCK_3= @mysqld.3.socket
|
182
mysql-test/suite/rpl/t/rpl_mdev33798.test
Normal file
182
mysql-test/suite/rpl/t/rpl_mdev33798.test
Normal file
@ -0,0 +1,182 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_log_bin.inc
|
||||
--let $rpl_topology=1->2,1->3
|
||||
--source include/rpl_init.inc
|
||||
--connect (server_2b,127.0.0.1,root,,,$SERVER_MYPORT_2)
|
||||
|
||||
--connection server_2
|
||||
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode= @@GLOBAL.slave_parallel_mode;
|
||||
SET @old_timeout= @@GLOBAL.lock_wait_timeout;
|
||||
SET @old_innodb_timeout= @@GLOBAL.innodb_lock_wait_timeout;
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=5;
|
||||
set global slave_parallel_mode= aggressive;
|
||||
# High timeout so we get replication sync error and test failure if the
|
||||
# conflict handling is insufficient and lock wait timeout occurs.
|
||||
SET GLOBAL lock_wait_timeout= 86400;
|
||||
SET GLOBAL innodb_lock_wait_timeout= 86400;
|
||||
SET STATEMENT sql_log_bin=0 FOR ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection server_1
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0);
|
||||
--save_master_pos
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
--source include/stop_slave.inc
|
||||
|
||||
# Test the following scenario:
|
||||
#
|
||||
# Transactions T1, T2 in domain 1, U1, U2 in domain 2.
|
||||
# Wait cycle T1->U2->U1->T2->T1 as follows:
|
||||
# T1 row lock wait on U2
|
||||
# U2 wait_for_prior_commit on U1
|
||||
# U1 row lock wait on T2
|
||||
# T2 wait_for_prior_commit on T1
|
||||
#
|
||||
# Test that the wait cycle is broken correctly with deadlock kill.
|
||||
|
||||
--connection server_2b
|
||||
# Temporarily block T1 and U1.
|
||||
BEGIN;
|
||||
SELECT * FROM t1 WHERE a=1 FOR UPDATE;
|
||||
SELECT * FROM t1 WHERE a=5 FOR UPDATE;
|
||||
|
||||
--connection server_1
|
||||
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
# T1 in domain 1
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=1 WHERE a=1;
|
||||
UPDATE t1 SET b=1 WHERE a=7;
|
||||
COMMIT;
|
||||
# T2 in domain 1
|
||||
UPDATE t1 SET b=2 WHERE a=3;
|
||||
|
||||
SET SESSION gtid_domain_id=2;
|
||||
# U1 in domain 2
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=3 WHERE a=5;
|
||||
UPDATE t1 SET b=3 WHERE a=3;
|
||||
COMMIT;
|
||||
# U2 in domain 2
|
||||
UPDATE t1 SET b=4 WHERE a=7;
|
||||
SET SESSION gtid_domain_id= 0;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
# Wait until T2, U2 are holding the row locks.
|
||||
--let $wait_condition= SELECT COUNT(*)=2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state LIKE '%Waiting for prior transaction to commit%'
|
||||
--source include/wait_condition.inc
|
||||
|
||||
# Then let T1, U1 continue to conflict on the row locks, and check that
|
||||
# replication correctly handles the conflict.
|
||||
--connection server_2b
|
||||
ROLLBACK;
|
||||
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
# Allow either domain to "win" on the conflicting updates.
|
||||
SELECT a, (
|
||||
(a=1 AND b=1) OR
|
||||
(a=3 AND (b=2 OR b=3)) OR
|
||||
(a=5 AND b=3) OR
|
||||
(a=7 AND (b=1 OR b=4)) OR
|
||||
((a MOD 2)=0 AND b=0)) AS `ok`
|
||||
FROM t1
|
||||
ORDER BY a;
|
||||
|
||||
# Now try the same thing with multi-source replication.
|
||||
|
||||
# Make server_3 a second master
|
||||
--connection server_3
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--source include/stop_slave.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
--replace_result $SERVER_MYPORT_3 MYPORT_3
|
||||
eval CHANGE MASTER 'm2' to master_port=$SERVER_MYPORT_3 , master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
|
||||
|
||||
--connection server_1
|
||||
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
# T1 in domain 1
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=11 WHERE a=1;
|
||||
UPDATE t1 SET b=11 WHERE a=7;
|
||||
COMMIT;
|
||||
# T2 in domain 1
|
||||
UPDATE t1 SET b=12 WHERE a=3;
|
||||
SET SESSION gtid_domain_id= 1;
|
||||
|
||||
--connection server_3
|
||||
SET SESSION gtid_domain_id=3;
|
||||
# U1 in domain 3
|
||||
BEGIN;
|
||||
UPDATE t1 SET b=13 WHERE a=5;
|
||||
UPDATE t1 SET b=13 WHERE a=3;
|
||||
COMMIT;
|
||||
# U2 in domain 3
|
||||
UPDATE t1 SET b=14 WHERE a=7;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2b
|
||||
# Temporarily block T1 and U1.
|
||||
BEGIN;
|
||||
SELECT * FROM t1 WHERE a=1 FOR UPDATE;
|
||||
SELECT * FROM t1 WHERE a=5 FOR UPDATE;
|
||||
|
||||
START ALL SLAVES;
|
||||
# Wait until T2, U2 are holding the row locks.
|
||||
--let $wait_condition= SELECT COUNT(*)=2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state LIKE '%Waiting for prior transaction to commit%'
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--connection server_2b
|
||||
ROLLBACK;
|
||||
|
||||
--connection server_1
|
||||
--source include/save_master_gtid.inc
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--connection server_3
|
||||
--source include/save_master_gtid.inc
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
SELECT a, (
|
||||
(a=1 AND b=11) OR
|
||||
(a=3 AND (b=12 OR b=13)) OR
|
||||
(a=5 AND b=13) OR
|
||||
(a=7 AND (b=11 OR b=14)) OR
|
||||
((a MOD 2)=0 AND b=0)) AS `ok`
|
||||
FROM t1
|
||||
ORDER BY a;
|
||||
|
||||
SET default_master_connection = 'm2';
|
||||
--source include/stop_slave.inc
|
||||
RESET SLAVE 'm2' ALL;
|
||||
SET default_master_connection = '';
|
||||
|
||||
--connection server_3
|
||||
--source include/start_slave.inc
|
||||
|
||||
# Cleanup
|
||||
|
||||
--disconnect server_2b
|
||||
--connection server_1
|
||||
DROP TABLE t1;
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
set global slave_parallel_mode= @old_parallel_mode;
|
||||
SET GLOBAL lock_wait_timeout= @old_timeout;
|
||||
SET GLOBAL innodb_lock_wait_timeout= @old_innodb_timeout;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--source include/rpl_end.inc
|
@ -5400,14 +5400,37 @@ thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd)
|
||||
return 0;
|
||||
if (!rgi->is_parallel_exec)
|
||||
return 0;
|
||||
if (rgi->rli != other_rgi->rli)
|
||||
return 0;
|
||||
if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id)
|
||||
return 0;
|
||||
if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id)
|
||||
return 0;
|
||||
if (rgi->gtid_sub_id > other_rgi->gtid_sub_id)
|
||||
return 0;
|
||||
if (rgi->rli == other_rgi->rli)
|
||||
{
|
||||
/*
|
||||
Within the same master connection, we can compare transaction order on
|
||||
the GTID sub_id, and rollback the later transaction to allow the earlier
|
||||
transaction to commit first.
|
||||
*/
|
||||
if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id)
|
||||
return 0;
|
||||
if (rgi->gtid_sub_id > other_rgi->gtid_sub_id)
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
Lock conflicts between different master connection should usually not
|
||||
occur, but could still happen if user is running some special setup that
|
||||
tolerates conflicting updates (or in case of user error). We do not have a
|
||||
pre-defined ordering of transactions in this case, but we still need to
|
||||
handle conflicts in _some_ way to avoid undetected deadlocks and hangs.
|
||||
|
||||
We do this by rolling back and retrying any transaction that is being
|
||||
_optimistically_ applied. This can be overly conservative in some cases,
|
||||
but should be fine as conflicts between different master connections are
|
||||
not expected to be common. And it ensures that we won't end up in a
|
||||
deadlock and hang due to a transaction doing wait_for_prior_commit while
|
||||
holding locks that block something in another master connection.
|
||||
*/
|
||||
if (other_rgi->speculation != rpl_group_info::SPECULATE_OPTIMISTIC)
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
This transaction is about to wait for another transaction that is required
|
||||
by replication binlog order to commit after. This would cause a deadlock.
|
||||
|
Loading…
x
Reference in New Issue
Block a user