MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication
The --gtid-ignore-duplicates option was not working correctly with row-based replication. When a row event was completed, but before committing, there was a small window where another multi-source SQL thread could wrongly try to re-execute the same transaction, without properly ignoring the duplicate GTID. This would lead to duplicate key error or out-of-order GTID error or similar. Thanks to Matt Neth for reporting this and giving an easy way to reproduce the issue.
This commit is contained in:
parent
93c039dd3c
commit
b89de2b2ce
@ -3,21 +3,25 @@
|
|||||||
[mysqld.1]
|
[mysqld.1]
|
||||||
log-slave-updates
|
log-slave-updates
|
||||||
loose-innodb
|
loose-innodb
|
||||||
|
binlog-format=mixed
|
||||||
|
|
||||||
[mysqld.2]
|
[mysqld.2]
|
||||||
log-slave-updates
|
log-slave-updates
|
||||||
loose-innodb
|
loose-innodb
|
||||||
|
binlog-format=mixed
|
||||||
|
|
||||||
[mysqld.3]
|
[mysqld.3]
|
||||||
log-bin=server3-bin
|
log-bin=server3-bin
|
||||||
log-slave-updates
|
log-slave-updates
|
||||||
loose-innodb
|
loose-innodb
|
||||||
|
binlog-format=mixed
|
||||||
|
|
||||||
[mysqld.4]
|
[mysqld.4]
|
||||||
server-id=4
|
server-id=4
|
||||||
log-bin=server4-bin
|
log-bin=server4-bin
|
||||||
log-slave-updates
|
log-slave-updates
|
||||||
loose-innodb
|
loose-innodb
|
||||||
|
binlog-format=mixed
|
||||||
|
|
||||||
[ENV]
|
[ENV]
|
||||||
SERVER_MYPORT_4= @mysqld.4.port
|
SERVER_MYPORT_4= @mysqld.4.port
|
||||||
|
@ -242,6 +242,145 @@ a
|
|||||||
24
|
24
|
||||||
25
|
25
|
||||||
26
|
26
|
||||||
|
*** MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication ***
|
||||||
|
SET default_master_connection = "b2a";
|
||||||
|
STOP SLAVE;
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
SET default_master_connection = "c2a";
|
||||||
|
STOP SLAVE;
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
STOP SLAVE;
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
SET default_master_connection = "b2c";
|
||||||
|
STOP SLAVE;
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
SET @old_slave_mode=@@GLOBAL.slave_exec_mode;
|
||||||
|
SET GLOBAL slave_exec_mode=IDEMPOTENT;
|
||||||
|
SET @old_strict=@@GLOBAL.gtid_strict_mode;
|
||||||
|
SET GLOBAL gtid_strict_mode=1;
|
||||||
|
SET @old_dbug=@@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug="+d,inject_sleep_gtid_100_x_x";
|
||||||
|
SET @old_domain=@@SESSION.gtid_domain_id;
|
||||||
|
SET @old_format=@@SESSION.binlog_format;
|
||||||
|
SET SESSION gtid_domain_id=100;
|
||||||
|
SET SESSION binlog_format='row';
|
||||||
|
INSERT INTO t1 VALUES (30);
|
||||||
|
INSERT INTO t1 VALUES (31);
|
||||||
|
INSERT INTO t1 VALUES (32);
|
||||||
|
INSERT INTO t1 VALUES (33);
|
||||||
|
INSERT INTO t1 VALUES (34);
|
||||||
|
INSERT INTO t1 VALUES (35);
|
||||||
|
INSERT INTO t1 VALUES (36);
|
||||||
|
INSERT INTO t1 VALUES (37);
|
||||||
|
INSERT INTO t1 VALUES (38);
|
||||||
|
INSERT INTO t1 VALUES (39);
|
||||||
|
INSERT INTO t1 VALUES (40);
|
||||||
|
INSERT INTO t1 VALUES (41);
|
||||||
|
INSERT INTO t1 VALUES (42);
|
||||||
|
INSERT INTO t1 VALUES (43);
|
||||||
|
INSERT INTO t1 VALUES (44);
|
||||||
|
INSERT INTO t1 VALUES (45);
|
||||||
|
INSERT INTO t1 VALUES (46);
|
||||||
|
INSERT INTO t1 VALUES (47);
|
||||||
|
INSERT INTO t1 VALUES (48);
|
||||||
|
INSERT INTO t1 VALUES (49);
|
||||||
|
SET SESSION gtid_domain_id=@old_domain;
|
||||||
|
SET SESSION binlog_format=@old_format;
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
INSERT INTO t1 VALUES (50);
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
SET default_master_connection = "b2c";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
SELECT MASTER_GTID_WAIT("GTID", 30);
|
||||||
|
MASTER_GTID_WAIT("GTID", 30)
|
||||||
|
0
|
||||||
|
SET default_master_connection = "b2a";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
SET default_master_connection = "c2a";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
a
|
||||||
|
30
|
||||||
|
31
|
||||||
|
32
|
||||||
|
33
|
||||||
|
34
|
||||||
|
35
|
||||||
|
36
|
||||||
|
37
|
||||||
|
38
|
||||||
|
39
|
||||||
|
40
|
||||||
|
41
|
||||||
|
42
|
||||||
|
43
|
||||||
|
44
|
||||||
|
45
|
||||||
|
46
|
||||||
|
47
|
||||||
|
48
|
||||||
|
49
|
||||||
|
50
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
a
|
||||||
|
30
|
||||||
|
31
|
||||||
|
32
|
||||||
|
33
|
||||||
|
34
|
||||||
|
35
|
||||||
|
36
|
||||||
|
37
|
||||||
|
38
|
||||||
|
39
|
||||||
|
40
|
||||||
|
41
|
||||||
|
42
|
||||||
|
43
|
||||||
|
44
|
||||||
|
45
|
||||||
|
46
|
||||||
|
47
|
||||||
|
48
|
||||||
|
49
|
||||||
|
50
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SET GLOBAL debug_dbug=@old_dbug;
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
a
|
||||||
|
30
|
||||||
|
31
|
||||||
|
32
|
||||||
|
33
|
||||||
|
34
|
||||||
|
35
|
||||||
|
36
|
||||||
|
37
|
||||||
|
38
|
||||||
|
39
|
||||||
|
40
|
||||||
|
41
|
||||||
|
42
|
||||||
|
43
|
||||||
|
44
|
||||||
|
45
|
||||||
|
46
|
||||||
|
47
|
||||||
|
48
|
||||||
|
49
|
||||||
|
50
|
||||||
|
SET GLOBAL slave_exec_mode=@old_slave_mode;
|
||||||
|
SET GLOBAL gtid_strict_mode=@old_strict;
|
||||||
SET GLOBAL gtid_domain_id=0;
|
SET GLOBAL gtid_domain_id=0;
|
||||||
STOP ALL SLAVES;
|
STOP ALL SLAVES;
|
||||||
Warnings:
|
Warnings:
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
--source include/not_embedded.inc
|
--source include/not_embedded.inc
|
||||||
--source include/have_innodb.inc
|
--source include/have_innodb.inc
|
||||||
|
--source include/have_debug.inc
|
||||||
|
|
||||||
|
|
||||||
--echo *** Test all-to-all replication with --gtid-ignore-duplicates ***
|
--echo *** Test all-to-all replication with --gtid-ignore-duplicates ***
|
||||||
|
|
||||||
@ -258,6 +260,112 @@ SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
|
|||||||
SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
|
SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
|
||||||
|
|
||||||
|
|
||||||
|
--echo *** MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication ***
|
||||||
|
|
||||||
|
# Have only A->C A->B initially.
|
||||||
|
--connection server_1
|
||||||
|
SET default_master_connection = "b2a";
|
||||||
|
STOP SLAVE;
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
SET default_master_connection = "c2a";
|
||||||
|
STOP SLAVE;
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
STOP SLAVE;
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
|
||||||
|
--connection server_3
|
||||||
|
SET default_master_connection = "b2c";
|
||||||
|
STOP SLAVE;
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
SET @old_slave_mode=@@GLOBAL.slave_exec_mode;
|
||||||
|
SET GLOBAL slave_exec_mode=IDEMPOTENT;
|
||||||
|
SET @old_strict=@@GLOBAL.gtid_strict_mode;
|
||||||
|
SET GLOBAL gtid_strict_mode=1;
|
||||||
|
|
||||||
|
SET @old_dbug=@@GLOBAL.debug_dbug;
|
||||||
|
# This will inject a small sleep that helps trigger the race. I did not manage
|
||||||
|
# to create a non-sleeping version with debug_sync for this; the problem is
|
||||||
|
# that once the bug is fixed, the race becomes impossible, so even with
|
||||||
|
# debug_sync at best we can check that the debug_sync times out. Which is
|
||||||
|
# just another way of adding a sleep.
|
||||||
|
#
|
||||||
|
# The bug was a race at this point where another multi-source connection
|
||||||
|
# could incorrectly re-apply the same GTID, in case of row-based replication.
|
||||||
|
SET GLOBAL debug_dbug="+d,inject_sleep_gtid_100_x_x";
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
SET @old_domain=@@SESSION.gtid_domain_id;
|
||||||
|
SET @old_format=@@SESSION.binlog_format;
|
||||||
|
SET SESSION gtid_domain_id=100;
|
||||||
|
SET SESSION binlog_format='row';
|
||||||
|
INSERT INTO t1 VALUES (30);
|
||||||
|
INSERT INTO t1 VALUES (31);
|
||||||
|
INSERT INTO t1 VALUES (32);
|
||||||
|
INSERT INTO t1 VALUES (33);
|
||||||
|
INSERT INTO t1 VALUES (34);
|
||||||
|
INSERT INTO t1 VALUES (35);
|
||||||
|
INSERT INTO t1 VALUES (36);
|
||||||
|
INSERT INTO t1 VALUES (37);
|
||||||
|
INSERT INTO t1 VALUES (38);
|
||||||
|
INSERT INTO t1 VALUES (39);
|
||||||
|
INSERT INTO t1 VALUES (40);
|
||||||
|
INSERT INTO t1 VALUES (41);
|
||||||
|
INSERT INTO t1 VALUES (42);
|
||||||
|
INSERT INTO t1 VALUES (43);
|
||||||
|
INSERT INTO t1 VALUES (44);
|
||||||
|
INSERT INTO t1 VALUES (45);
|
||||||
|
INSERT INTO t1 VALUES (46);
|
||||||
|
INSERT INTO t1 VALUES (47);
|
||||||
|
INSERT INTO t1 VALUES (48);
|
||||||
|
INSERT INTO t1 VALUES (49);
|
||||||
|
SET SESSION gtid_domain_id=@old_domain;
|
||||||
|
SET SESSION binlog_format=@old_format;
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
INSERT INTO t1 VALUES (50);
|
||||||
|
--let $gtid=`SELECT @@last_gtid`
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_3
|
||||||
|
SET default_master_connection = "b2c";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
--replace_result $gtid GTID
|
||||||
|
eval SELECT MASTER_GTID_WAIT("$gtid", 30);
|
||||||
|
# The bug occured here, the slave would get an out-of-order binlog error
|
||||||
|
# due to trying to re-apply the 100-x-x transaction.
|
||||||
|
|
||||||
|
# Restart stopped multi-source connections, and sync up.
|
||||||
|
--connection server_1
|
||||||
|
SET default_master_connection = "b2a";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
SET default_master_connection = "c2a";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
|
||||||
|
--connection server_3
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SET GLOBAL debug_dbug=@old_dbug;
|
||||||
|
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||||
|
SET GLOBAL slave_exec_mode=@old_slave_mode;
|
||||||
|
SET GLOBAL gtid_strict_mode=@old_strict;
|
||||||
|
|
||||||
|
|
||||||
# Clean up.
|
# Clean up.
|
||||||
--connection server_1
|
--connection server_1
|
||||||
SET GLOBAL gtid_domain_id=0;
|
SET GLOBAL gtid_domain_id=0;
|
||||||
|
@ -1788,6 +1788,13 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||||||
rli->clear_flag(Relay_log_info::IN_STMT);
|
rli->clear_flag(Relay_log_info::IN_STMT);
|
||||||
rli->clear_flag(Relay_log_info::IN_TRANSACTION);
|
rli->clear_flag(Relay_log_info::IN_TRANSACTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Ensure we always release the domain for others to process, when using
|
||||||
|
--gtid-ignore-duplicates.
|
||||||
|
*/
|
||||||
|
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
|
||||||
|
rpl_global_gtid_slave_state.release_domain_owner(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1796,13 +1803,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||||||
thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
|
thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
|
||||||
thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
|
thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
|
||||||
|
|
||||||
/*
|
|
||||||
Ensure we always release the domain for others to process, when using
|
|
||||||
--gtid-ignore-duplicates.
|
|
||||||
*/
|
|
||||||
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
|
|
||||||
rpl_global_gtid_slave_state.release_domain_owner(this);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Reset state related to long_find_row notes in the error log:
|
Reset state related to long_find_row notes in the error log:
|
||||||
- timestamp
|
- timestamp
|
||||||
@ -1811,6 +1811,11 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||||||
reset_row_stmt_start_timestamp();
|
reset_row_stmt_start_timestamp();
|
||||||
unset_long_find_row_note_printed();
|
unset_long_find_row_note_printed();
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("inject_sleep_gtid_100_x_x", {
|
||||||
|
if (current_gtid.domain_id == 100)
|
||||||
|
my_sleep(50000);
|
||||||
|
};);
|
||||||
|
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user