MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry
When rolling back and retrying a transaction in parallel replication, don't release the domain ownership (for --gtid-ignore-duplicates) as part of the rollback. Otherwise another master connection could grab the ownership and double-apply the transaction in parallel with the retry. Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
parent
7bcacd767a
commit
0a6f46965a
@ -174,6 +174,105 @@ a
|
|||||||
10
|
10
|
||||||
11
|
11
|
||||||
12
|
12
|
||||||
|
*** MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry
|
||||||
|
connection server_2;
|
||||||
|
STOP SLAVE "c2b";
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
STOP SLAVE "a2b";
|
||||||
|
SET default_master_connection = "a2b";
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
connection server_1;
|
||||||
|
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (0, 0);
|
||||||
|
INSERT INTO t2 VALUES (1, 0);
|
||||||
|
INSERT INTO t2 VALUES (2, 0);
|
||||||
|
INSERT INTO t2 VALUES (3, 0);
|
||||||
|
INSERT INTO t2 VALUES (4, 0);
|
||||||
|
INSERT INTO t2 VALUES (5, 0);
|
||||||
|
INSERT INTO t2 VALUES (6, 0);
|
||||||
|
INSERT INTO t2 VALUES (7, 0);
|
||||||
|
INSERT INTO t2 VALUES (8, 0);
|
||||||
|
INSERT INTO t2 VALUES (9, 0);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (0+10, 100);
|
||||||
|
UPDATE t2 SET b=0 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (0+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (1+10, 100);
|
||||||
|
UPDATE t2 SET b=1 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (1+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (2+10, 100);
|
||||||
|
UPDATE t2 SET b=2 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (2+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (3+10, 100);
|
||||||
|
UPDATE t2 SET b=3 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (3+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (4+10, 100);
|
||||||
|
UPDATE t2 SET b=4 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (4+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (5+10, 100);
|
||||||
|
UPDATE t2 SET b=5 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (5+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (6+10, 100);
|
||||||
|
UPDATE t2 SET b=6 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (6+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (7+10, 100);
|
||||||
|
UPDATE t2 SET b=7 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (7+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (8+10, 100);
|
||||||
|
UPDATE t2 SET b=8 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (8+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO t2 VALUES (9+10, 100);
|
||||||
|
UPDATE t2 SET b=9 WHERE a<10;
|
||||||
|
INSERT INTO t2 VALUES (9+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
COUNT(*) SUM(a) SUM(b)
|
||||||
|
30 435 3090
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
connection server_2;
|
||||||
|
SET @old_mode= @@GLOBAL.slave_parallel_mode;
|
||||||
|
SET GLOBAL slave_parallel_mode=aggressive;
|
||||||
|
SET default_master_connection = "a2b";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_to_start.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
COUNT(*) SUM(a) SUM(b)
|
||||||
|
30 435 3090
|
||||||
|
connection server_3;
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
COUNT(*) SUM(a) SUM(b)
|
||||||
|
30 435 3090
|
||||||
|
connection server_4;
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
COUNT(*) SUM(a) SUM(b)
|
||||||
|
30 435 3090
|
||||||
*** Test also with not using parallel replication.
|
*** Test also with not using parallel replication.
|
||||||
connection server_1;
|
connection server_1;
|
||||||
SET default_master_connection = "b2a";
|
SET default_master_connection = "b2a";
|
||||||
@ -474,6 +573,7 @@ Warnings:
|
|||||||
Note 1938 SLAVE 'a2b' stopped
|
Note 1938 SLAVE 'a2b' stopped
|
||||||
Note 1938 SLAVE 'c2b' stopped
|
Note 1938 SLAVE 'c2b' stopped
|
||||||
SET GLOBAL slave_parallel_threads= @old_parallel;
|
SET GLOBAL slave_parallel_threads= @old_parallel;
|
||||||
|
SET GLOBAL slave_parallel_mode= @old_mode;
|
||||||
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
||||||
connection server_3;
|
connection server_3;
|
||||||
SET GLOBAL gtid_domain_id=0;
|
SET GLOBAL gtid_domain_id=0;
|
||||||
@ -491,22 +591,22 @@ Note 1938 SLAVE 'a2d' stopped
|
|||||||
SET GLOBAL slave_parallel_threads= @old_parallel;
|
SET GLOBAL slave_parallel_threads= @old_parallel;
|
||||||
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
||||||
connection server_1;
|
connection server_1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
include/reset_master_slave.inc
|
include/reset_master_slave.inc
|
||||||
disconnect server_1;
|
disconnect server_1;
|
||||||
connection server_2;
|
connection server_2;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
include/reset_master_slave.inc
|
include/reset_master_slave.inc
|
||||||
disconnect server_2;
|
disconnect server_2;
|
||||||
connection server_3;
|
connection server_3;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
include/reset_master_slave.inc
|
include/reset_master_slave.inc
|
||||||
disconnect server_3;
|
disconnect server_3;
|
||||||
connection server_4;
|
connection server_4;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
include/reset_master_slave.inc
|
include/reset_master_slave.inc
|
||||||
disconnect server_4;
|
disconnect server_4;
|
||||||
|
@ -173,6 +173,65 @@ SET default_master_connection = "a2b";
|
|||||||
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
|
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
|
||||||
|
|
||||||
|
|
||||||
|
--echo *** MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry
|
||||||
|
|
||||||
|
# Create a bunch of transactions that will cause conflicts and retries.
|
||||||
|
# The bug was that the retry code was not handling the --gtid-ignore-duplicates
|
||||||
|
# option, so events could be doubly-applied.
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
STOP SLAVE "c2b";
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
STOP SLAVE "a2b";
|
||||||
|
SET default_master_connection = "a2b";
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||||
|
BEGIN;
|
||||||
|
--let $i= 0
|
||||||
|
while ($i < 10) {
|
||||||
|
eval INSERT INTO t2 VALUES ($i, 0);
|
||||||
|
inc $i;
|
||||||
|
}
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
--let $i= 0
|
||||||
|
while ($i < 10) {
|
||||||
|
BEGIN;
|
||||||
|
eval INSERT INTO t2 VALUES ($i+10, 100);
|
||||||
|
eval UPDATE t2 SET b=$i WHERE a<10;
|
||||||
|
eval INSERT INTO t2 VALUES ($i+20, 200);
|
||||||
|
COMMIT;
|
||||||
|
inc $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
SET @old_mode= @@GLOBAL.slave_parallel_mode;
|
||||||
|
SET GLOBAL slave_parallel_mode=aggressive;
|
||||||
|
SET default_master_connection = "a2b";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
SET default_master_connection = "c2b";
|
||||||
|
START SLAVE;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
|
||||||
|
--connection server_3
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
|
||||||
|
--connection server_4
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
|
||||||
|
|
||||||
|
|
||||||
--echo *** Test also with not using parallel replication.
|
--echo *** Test also with not using parallel replication.
|
||||||
|
|
||||||
--connection server_1
|
--connection server_1
|
||||||
@ -414,6 +473,7 @@ SET GLOBAL gtid_domain_id=0;
|
|||||||
--sorted_result
|
--sorted_result
|
||||||
STOP ALL SLAVES;
|
STOP ALL SLAVES;
|
||||||
SET GLOBAL slave_parallel_threads= @old_parallel;
|
SET GLOBAL slave_parallel_threads= @old_parallel;
|
||||||
|
SET GLOBAL slave_parallel_mode= @old_mode;
|
||||||
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
||||||
|
|
||||||
--connection server_3
|
--connection server_3
|
||||||
@ -431,25 +491,25 @@ SET GLOBAL slave_parallel_threads= @old_parallel;
|
|||||||
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
|
||||||
|
|
||||||
--connection server_1
|
--connection server_1
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
--source include/reset_master_slave.inc
|
--source include/reset_master_slave.inc
|
||||||
--disconnect server_1
|
--disconnect server_1
|
||||||
|
|
||||||
--connection server_2
|
--connection server_2
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
--source include/reset_master_slave.inc
|
--source include/reset_master_slave.inc
|
||||||
--disconnect server_2
|
--disconnect server_2
|
||||||
|
|
||||||
--connection server_3
|
--connection server_3
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
--source include/reset_master_slave.inc
|
--source include/reset_master_slave.inc
|
||||||
--disconnect server_3
|
--disconnect server_3
|
||||||
|
|
||||||
--connection server_4
|
--connection server_4
|
||||||
DROP TABLE t1;
|
DROP TABLE t1, t2;
|
||||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
|
||||||
--source include/reset_master_slave.inc
|
--source include/reset_master_slave.inc
|
||||||
--disconnect server_4
|
--disconnect server_4
|
||||||
|
@ -211,6 +211,13 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
|
|||||||
signal_error_to_sql_driver_thread(thd, rgi, err);
|
signal_error_to_sql_driver_thread(thd, rgi, err);
|
||||||
thd->wait_for_commit_ptr= NULL;
|
thd->wait_for_commit_ptr= NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Calls to check_duplicate_gtid() must match up with
|
||||||
|
record_and_update_gtid() (or release_domain_owner() in error case). This
|
||||||
|
assertion tries to catch any missing release of the domain.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(rgi->gtid_ignore_duplicate_state != rpl_group_info::GTID_DUPLICATE_OWNER);
|
||||||
|
|
||||||
mysql_mutex_lock(&entry->LOCK_parallel_entry);
|
mysql_mutex_lock(&entry->LOCK_parallel_entry);
|
||||||
/*
|
/*
|
||||||
We need to mark that this event group started its commit phase, in case we
|
We need to mark that this event group started its commit phase, in case we
|
||||||
@ -868,7 +875,13 @@ do_retry:
|
|||||||
});
|
});
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
rgi->cleanup_context(thd, 1);
|
/*
|
||||||
|
We are still applying the event group, even though we will roll it back
|
||||||
|
and retry it. So for --gtid-ignore-duplicates, keep ownership of the
|
||||||
|
domain during the retry so another master connection will not try to take
|
||||||
|
over and duplicate apply the same event group (MDEV-33475).
|
||||||
|
*/
|
||||||
|
rgi->cleanup_context(thd, 1, 1 /* keep_domain_owner */);
|
||||||
wait_for_pending_deadlock_kill(thd, rgi);
|
wait_for_pending_deadlock_kill(thd, rgi);
|
||||||
thd->reset_killed();
|
thd->reset_killed();
|
||||||
thd->clear_error();
|
thd->clear_error();
|
||||||
|
@ -2248,7 +2248,7 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void rpl_group_info::cleanup_context(THD *thd, bool error)
|
void rpl_group_info::cleanup_context(THD *thd, bool error, bool keep_domain_owner)
|
||||||
{
|
{
|
||||||
DBUG_ENTER("rpl_group_info::cleanup_context");
|
DBUG_ENTER("rpl_group_info::cleanup_context");
|
||||||
DBUG_PRINT("enter", ("error: %d", (int) error));
|
DBUG_PRINT("enter", ("error: %d", (int) error));
|
||||||
@ -2298,7 +2298,7 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||||||
Ensure we always release the domain for others to process, when using
|
Ensure we always release the domain for others to process, when using
|
||||||
--gtid-ignore-duplicates.
|
--gtid-ignore-duplicates.
|
||||||
*/
|
*/
|
||||||
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
|
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL && !keep_domain_owner)
|
||||||
rpl_global_gtid_slave_state->release_domain_owner(this);
|
rpl_global_gtid_slave_state->release_domain_owner(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -917,7 +917,7 @@ struct rpl_group_info
|
|||||||
}
|
}
|
||||||
|
|
||||||
void clear_tables_to_lock();
|
void clear_tables_to_lock();
|
||||||
void cleanup_context(THD *, bool);
|
void cleanup_context(THD *, bool, bool keep_domain_owner= false);
|
||||||
void slave_close_thread_tables(THD *);
|
void slave_close_thread_tables(THD *);
|
||||||
void mark_start_commit_no_lock();
|
void mark_start_commit_no_lock();
|
||||||
void mark_start_commit();
|
void mark_start_commit();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user