MDEV-6549, failing to update gtid_slave_pos for a transaction that was retried.
The bug was that in some cases, if a replicated transaction was rolled back due to deadlock, during the subsequent retry of that transaction, the gtid_slave_pos would _not_ be updated with the new GTID, leaving the GTID position of the slave incorrect. Fix this by ensuring during the retry that we clear the flag that marks that the GTID has already been recorded in gtid_slave_pos, so that the update of gtid_slave_pos will be done again during the retry. In the original bug, the symptom was an assertion due to OPTION_GTID_BEGIN not being cleared during the retry of the transaction. The reason was some code in handling of a COMMIT query event, which would not clear the flag when not recording a GTID in gtid_slave_pos. This commit also fixes that code to always clear the OPTION_GTID_BEGIN flag for clarity, though it is actually not possible for OPTION_GTID_BEGIN to become set unless a GTID is pending for update (after fixing the bug described above).
This commit is contained in:
parent
354f3f1f9b
commit
ec05fea0a0
@ -793,6 +793,7 @@ SET debug_sync='now WAIT_FOR master_queued2';
|
|||||||
SET debug_sync='now SIGNAL master_cont1';
|
SET debug_sync='now SIGNAL master_cont1';
|
||||||
SET debug_sync='RESET';
|
SET debug_sync='RESET';
|
||||||
include/start_slave.inc
|
include/start_slave.inc
|
||||||
|
include/stop_slave.inc
|
||||||
SELECT * FROM t4 ORDER BY a;
|
SELECT * FROM t4 ORDER BY a;
|
||||||
a b
|
a b
|
||||||
1 NULL
|
1 NULL
|
||||||
@ -801,6 +802,42 @@ a b
|
|||||||
5 NULL
|
5 NULL
|
||||||
6 6
|
6 6
|
||||||
7 NULL
|
7 NULL
|
||||||
|
DELETE FROM t4;
|
||||||
|
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
|
||||||
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
|
||||||
|
UPDATE t4 SET b=NULL WHERE a=6;
|
||||||
|
SET debug_sync='now WAIT_FOR master_queued1';
|
||||||
|
SET @old_format= @@SESSION.binlog_format;
|
||||||
|
SET binlog_format='statement';
|
||||||
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
|
||||||
|
DELETE FROM t4 WHERE b <= 1;
|
||||||
|
SET debug_sync='now WAIT_FOR master_queued2';
|
||||||
|
SET debug_sync='now SIGNAL master_cont1';
|
||||||
|
SET @old_format=@@GLOBAL.binlog_format;
|
||||||
|
SET debug_sync='RESET';
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug="+d,disable_thd_need_ordering_with";
|
||||||
|
include/start_slave.inc
|
||||||
|
SET GLOBAL debug_dbug=@old_dbug;
|
||||||
|
SELECT * FROM t4 ORDER BY a;
|
||||||
|
a b
|
||||||
|
1 NULL
|
||||||
|
2 2
|
||||||
|
3 NULL
|
||||||
|
4 4
|
||||||
|
5 NULL
|
||||||
|
6 NULL
|
||||||
|
SET @last_gtid= 'GTID';
|
||||||
|
SELECT IF(@@gtid_slave_pos LIKE CONCAT('%',@last_gtid,'%'), "GTID found ok",
|
||||||
|
CONCAT("GTID ", @last_gtid, " not found in gtid_slave_pos=", @@gtid_slave_pos))
|
||||||
|
AS result;
|
||||||
|
result
|
||||||
|
GTID found ok
|
||||||
|
SELECT "ROW FOUND" AS `Is the row found?`
|
||||||
|
FROM mysql.gtid_slave_pos
|
||||||
|
WHERE CONCAT(domain_id, "-", server_id, "-", seq_no) = @last_gtid;
|
||||||
|
Is the row found?
|
||||||
|
ROW FOUND
|
||||||
*** MDEV-5938: Exec_master_log_pos not updated at log rotate in parallel replication ***
|
*** MDEV-5938: Exec_master_log_pos not updated at log rotate in parallel replication ***
|
||||||
include/stop_slave.inc
|
include/stop_slave.inc
|
||||||
SET GLOBAL slave_parallel_threads=1;
|
SET GLOBAL slave_parallel_threads=1;
|
||||||
|
@ -1246,10 +1246,78 @@ SET debug_sync='RESET';
|
|||||||
--connection server_2
|
--connection server_2
|
||||||
--source include/start_slave.inc
|
--source include/start_slave.inc
|
||||||
--sync_with_master
|
--sync_with_master
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
|
||||||
SELECT * FROM t4 ORDER BY a;
|
SELECT * FROM t4 ORDER BY a;
|
||||||
|
|
||||||
|
|
||||||
|
# MDEV-6549, failing to update gtid_slave_pos for a transaction that was retried.
|
||||||
|
# The problem was that when a transaction updates the mysql.gtid_slave_pos
|
||||||
|
# table, it clears the flag that marks that there is a GTID position that
|
||||||
|
# needs to be updated. Then, if the transaction got killed after that due
|
||||||
|
# to a deadlock, the subsequent retry would fail to notice that the GTID needs
|
||||||
|
# to be recorded in gtid_slave_pos.
|
||||||
|
#
|
||||||
|
# (In the original bug report, the symptom was an assertion; this was however
|
||||||
|
# just a side effect of the missing update of gtid_slave_pos, which also
|
||||||
|
# happened to cause a missing clear of OPTION_GTID_BEGIN).
|
||||||
|
--connection server_1
|
||||||
|
DELETE FROM t4;
|
||||||
|
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
|
||||||
|
|
||||||
|
# Create two transactions that can run in parallel on the slave but cause
|
||||||
|
# a deadlock if the second runs before the first.
|
||||||
|
--connection con1
|
||||||
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
|
||||||
|
send UPDATE t4 SET b=NULL WHERE a=6;
|
||||||
|
--connection server_1
|
||||||
|
SET debug_sync='now WAIT_FOR master_queued1';
|
||||||
|
|
||||||
|
--connection con2
|
||||||
|
# Must use statement-based binlogging. Otherwise the transaction will not be
|
||||||
|
# binlogged at all, as it modifies no rows.
|
||||||
|
SET @old_format= @@SESSION.binlog_format;
|
||||||
|
SET binlog_format='statement';
|
||||||
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
|
||||||
|
send DELETE FROM t4 WHERE b <= 1;
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
SET debug_sync='now WAIT_FOR master_queued2';
|
||||||
|
SET debug_sync='now SIGNAL master_cont1';
|
||||||
|
|
||||||
|
--connection con1
|
||||||
|
REAP;
|
||||||
|
--connection con2
|
||||||
|
REAP;
|
||||||
|
SET @old_format=@@GLOBAL.binlog_format;
|
||||||
|
SET debug_sync='RESET';
|
||||||
|
--save_master_pos
|
||||||
|
--let $last_gtid= `SELECT @@last_gtid`
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
# Disable the usual skip of gap locks for transactions that are run in
|
||||||
|
# parallel, using DBUG. This allows the deadlock to occur, and this in turn
|
||||||
|
# triggers a retry of the second transaction, and the code that was buggy and
|
||||||
|
# caused the gtid_slave_pos update to be skipped in the retry.
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug="+d,disable_thd_need_ordering_with";
|
||||||
|
--source include/start_slave.inc
|
||||||
|
--sync_with_master
|
||||||
|
SET GLOBAL debug_dbug=@old_dbug;
|
||||||
|
|
||||||
|
SELECT * FROM t4 ORDER BY a;
|
||||||
|
# Check that the GTID of the second transaction was correctly recorded in
|
||||||
|
# gtid_slave_pos, in the variable as well as in the table.
|
||||||
|
--replace_result $last_gtid GTID
|
||||||
|
eval SET @last_gtid= '$last_gtid';
|
||||||
|
SELECT IF(@@gtid_slave_pos LIKE CONCAT('%',@last_gtid,'%'), "GTID found ok",
|
||||||
|
CONCAT("GTID ", @last_gtid, " not found in gtid_slave_pos=", @@gtid_slave_pos))
|
||||||
|
AS result;
|
||||||
|
SELECT "ROW FOUND" AS `Is the row found?`
|
||||||
|
FROM mysql.gtid_slave_pos
|
||||||
|
WHERE CONCAT(domain_id, "-", server_id, "-", seq_no) = @last_gtid;
|
||||||
|
|
||||||
|
|
||||||
--echo *** MDEV-5938: Exec_master_log_pos not updated at log rotate in parallel replication ***
|
--echo *** MDEV-5938: Exec_master_log_pos not updated at log rotate in parallel replication ***
|
||||||
--connection server_2
|
--connection server_2
|
||||||
--source include/stop_slave.inc
|
--source include/stop_slave.inc
|
||||||
|
@ -4265,28 +4265,31 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
|
|||||||
Record any GTID in the same transaction, so slave state is
|
Record any GTID in the same transaction, so slave state is
|
||||||
transactionally consistent.
|
transactionally consistent.
|
||||||
*/
|
*/
|
||||||
if (current_stmt_is_commit && rgi->gtid_pending)
|
if (current_stmt_is_commit)
|
||||||
{
|
{
|
||||||
sub_id= rgi->gtid_sub_id;
|
|
||||||
rgi->gtid_pending= false;
|
|
||||||
|
|
||||||
gtid= rgi->current_gtid;
|
|
||||||
thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
|
thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
|
||||||
if (rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false))
|
if (rgi->gtid_pending)
|
||||||
{
|
{
|
||||||
int errcode= thd->get_stmt_da()->sql_errno();
|
sub_id= rgi->gtid_sub_id;
|
||||||
if (!is_parallel_retry_error(rgi, errcode))
|
rgi->gtid_pending= false;
|
||||||
rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE,
|
|
||||||
rgi->gtid_info(),
|
gtid= rgi->current_gtid;
|
||||||
"Error during COMMIT: failed to update GTID state in "
|
if (rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false))
|
||||||
"%s.%s: %d: %s",
|
{
|
||||||
"mysql", rpl_gtid_slave_state_table_name.str,
|
int errcode= thd->get_stmt_da()->sql_errno();
|
||||||
errcode,
|
if (!is_parallel_retry_error(rgi, errcode))
|
||||||
thd->get_stmt_da()->message());
|
rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE,
|
||||||
trans_rollback(thd);
|
rgi->gtid_info(),
|
||||||
sub_id= 0;
|
"Error during COMMIT: failed to update GTID state in "
|
||||||
thd->is_slave_error= 1;
|
"%s.%s: %d: %s",
|
||||||
goto end;
|
"mysql", rpl_gtid_slave_state_table_name.str,
|
||||||
|
errcode,
|
||||||
|
thd->get_stmt_da()->message());
|
||||||
|
trans_rollback(thd);
|
||||||
|
sub_id= 0;
|
||||||
|
thd->is_slave_error= 1;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -318,6 +318,15 @@ do_retry:
|
|||||||
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
||||||
rgi->cleanup_context(thd, 1);
|
rgi->cleanup_context(thd, 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
If we retry due to a deadlock kill that occured during the commit step, we
|
||||||
|
might have already updated (but not committed) an update of table
|
||||||
|
mysql.gtid_slave_pos, and cleared the gtid_pending flag. Now we have
|
||||||
|
rolled back any such update, so we must set the gtid_pending flag back to
|
||||||
|
true so that we will do a new update when/if we succeed with the retry.
|
||||||
|
*/
|
||||||
|
rgi->gtid_pending= true;
|
||||||
|
|
||||||
mysql_mutex_lock(&rli->data_lock);
|
mysql_mutex_lock(&rli->data_lock);
|
||||||
++rli->retried_trans;
|
++rli->retried_trans;
|
||||||
statistic_increment(slave_retried_transactions, LOCK_status);
|
statistic_increment(slave_retried_transactions, LOCK_status);
|
||||||
|
@ -4346,6 +4346,7 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
|
|||||||
{
|
{
|
||||||
rpl_group_info *rgi, *other_rgi;
|
rpl_group_info *rgi, *other_rgi;
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("disable_thd_need_ordering_with", return 1;);
|
||||||
if (!thd || !other_thd)
|
if (!thd || !other_thd)
|
||||||
return 1;
|
return 1;
|
||||||
rgi= thd->rgi_slave;
|
rgi= thd->rgi_slave;
|
||||||
@ -4361,7 +4362,7 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
|
|||||||
if (!rgi->commit_id || rgi->commit_id != other_rgi->commit_id)
|
if (!rgi->commit_id || rgi->commit_id != other_rgi->commit_id)
|
||||||
return 1;
|
return 1;
|
||||||
/*
|
/*
|
||||||
These two threads are doing parallel replication within the same
|
Otherwise, these two threads are doing parallel replication within the same
|
||||||
replication domain. Their commit order is already fixed, so we do not need
|
replication domain. Their commit order is already fixed, so we do not need
|
||||||
gap locks or similar to otherwise enforce ordering (and in fact such locks
|
gap locks or similar to otherwise enforce ordering (and in fact such locks
|
||||||
could lead to unnecessary deadlocks and transaction retry).
|
could lead to unnecessary deadlocks and transaction retry).
|
||||||
|
Loading…
x
Reference in New Issue
Block a user