Add MDEV-25999 & MDEV-36840 to MDEV-16091’s test
1. Stricten MDEV-16091’s `rpl.rpl_seconds_behind_master_spike` to prove that its fix covered MDEV-25999 as well 2. Add a Parallel Replication variant `rpl_seconds_behind_master_spike-parallel` to reproduce MDEV-36840 Because Parallel Replication queues Format Description Events as Position Update metaevents, the two variants use different breakpoints. To support them, this commit replaces the MDEV-10653 patch 8dad51481b with a live-activation solution. In contrast, previous iterations queued the activation pre-test and relied on other factors to enable the break.
This commit is contained in:
parent
ed805cf609
commit
9dda21b79e
@ -0,0 +1,59 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
CHANGE MASTER TO MASTER_USE_GTID=NO;
|
||||
SET @@GLOBAL.slave_parallel_threads= 1;
|
||||
include/start_slave.inc
|
||||
# Future events must be logged at least 2 seconds after
|
||||
# the slave starts
|
||||
connection master;
|
||||
# Write events to ensure slave will be consistent with master
|
||||
create table t1 (a int);
|
||||
connection slave;
|
||||
# Enable breakpoints
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@GLOBAL.debug_dbug=
|
||||
"+d,pause_sql_thread_on_fde,negate_clock_diff_with_master";
|
||||
connection master;
|
||||
# Flush logs on master forces slave to generate a Format description
|
||||
# event in its relay log
|
||||
flush logs;
|
||||
connection slave;
|
||||
# On the next FDE, the slave should have the master CREATE/INSERT events
|
||||
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||
# The relay log FDE has been processed - here we check to ensure it was
|
||||
# not considered in Seconds_Behind_Master calculation
|
||||
Seconds_Behind_Master = '2'
|
||||
# Resume slave SQL (worker) thread
|
||||
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
|
||||
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||
#
|
||||
# MDEV-29639
|
||||
# When receiving an event after the SQL Thread idles,
|
||||
# Seconds_Behind_Master should not update before it updates
|
||||
# last_master_timestamp
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
set @@global.debug_dbug="+d,pause_sql_thread_on_next_event";
|
||||
include/start_slave.inc
|
||||
connection master;
|
||||
insert into t1 values(2);
|
||||
include/save_master_gtid.inc
|
||||
connection slave;
|
||||
set debug_sync='now wait_for paused_on_event';
|
||||
connection master;
|
||||
# Sleeping 1s to create a visible SBM gap between events
|
||||
insert into t1 values(3);
|
||||
include/save_master_gtid.inc
|
||||
connection slave;
|
||||
set debug_sync='now wait_for paused_on_event';
|
||||
include/stop_slave.inc
|
||||
set debug_sync='RESET';
|
||||
SET @@global.debug_dbug=$save_dbug;
|
||||
SET @@GLOBAL.slave_parallel_threads= DEFAULT;
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
connection master;
|
||||
DROP TABLE t1;
|
||||
include/rpl_end.inc
|
@ -4,35 +4,29 @@ connection slave;
|
||||
include/stop_slave.inc
|
||||
CHANGE MASTER TO MASTER_USE_GTID=NO;
|
||||
include/start_slave.inc
|
||||
include/stop_slave.inc
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
|
||||
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
|
||||
include/start_slave.inc
|
||||
# Future events must be logged at least 2 seconds after
|
||||
# the slave starts
|
||||
connection master;
|
||||
# Write events to ensure slave will be consistent with master
|
||||
create table t1 (a int);
|
||||
insert into t1 values (1);
|
||||
connection slave;
|
||||
# Enable breakpoints
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@GLOBAL.debug_dbug=
|
||||
"+d,pause_sql_thread_on_fde,negate_clock_diff_with_master";
|
||||
connection master;
|
||||
# Flush logs on master forces slave to generate a Format description
|
||||
# event in its relay log
|
||||
flush logs;
|
||||
connection slave;
|
||||
# On the next FDE, the slave should have the master CREATE/INSERT events
|
||||
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||
select count(*)=1 from t1;
|
||||
count(*)=1
|
||||
1
|
||||
# The relay log FDE has been processed - here we check to ensure it was
|
||||
# not considered in Seconds_Behind_Master calculation
|
||||
connection slave1;
|
||||
# Safely resume slave SQL thread
|
||||
# Prove SQL thread is in state "debug sync point: now"
|
||||
Seconds_Behind_Master = '0'
|
||||
# Resume slave SQL (worker) thread
|
||||
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
|
||||
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||
# Wait for SQL thread to continue into normal execution
|
||||
SET DEBUG_SYNC='RESET';
|
||||
#
|
||||
# MDEV-29639
|
||||
# When receiving an event after the SQL Thread idles,
|
||||
|
@ -0,0 +1,2 @@
|
||||
--let $slave_parallel_threads= 1
|
||||
--source ./rpl_seconds_behind_master_spike.test
|
@ -18,6 +18,8 @@
|
||||
#
|
||||
# References:
|
||||
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
|
||||
# MDEV-25999: Unrealistic "Seconds behind master" value
|
||||
# MDEV-36840: Seconds_Behind_Master Spike at Log Rotation on Parallel Replica
|
||||
#
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
@ -29,12 +31,10 @@
|
||||
# Using_Gtid needs to start as NO before updating debug_dbug
|
||||
--source include/stop_slave.inc
|
||||
CHANGE MASTER TO MASTER_USE_GTID=NO;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--source include/stop_slave.inc
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
|
||||
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
|
||||
if ($slave_parallel_threads)
|
||||
{
|
||||
--eval SET @@GLOBAL.slave_parallel_threads= $slave_parallel_threads
|
||||
}
|
||||
--source include/start_slave.inc
|
||||
|
||||
--let $sleep_time=2
|
||||
@ -45,56 +45,37 @@ SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
|
||||
--connection master
|
||||
--echo # Write events to ensure slave will be consistent with master
|
||||
create table t1 (a int);
|
||||
insert into t1 values (1);
|
||||
--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`
|
||||
--sync_slave_with_master
|
||||
|
||||
--echo # Enable breakpoints
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@GLOBAL.debug_dbug=
|
||||
"+d,pause_sql_thread_on_fde,negate_clock_diff_with_master";
|
||||
|
||||
--connection master
|
||||
--echo # Flush logs on master forces slave to generate a Format description
|
||||
--echo # event in its relay log
|
||||
flush logs;
|
||||
|
||||
--connection slave
|
||||
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
|
||||
--sleep $sleep_time
|
||||
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||
select count(*)=1 from t1;
|
||||
|
||||
--echo # The relay log FDE has been processed - here we check to ensure it was
|
||||
--echo # not considered in Seconds_Behind_Master calculation
|
||||
--connection slave1
|
||||
let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
|
||||
--let $t_now= `SELECT UNIX_TIMESTAMP()`
|
||||
|
||||
# Ensure Seconds_Behind_Master does not point beyond when we have proven the
|
||||
# events we have proven to have executed. The extra second is needed as a
|
||||
# buffer because the recorded times are not exact with when the events were
|
||||
# recorded on the master.
|
||||
if(`select $sbm > $t_now - $t_master_events_logged + 1`)
|
||||
{
|
||||
die "A relay log event was incorrectly used to set Seconds_Behind_Master";
|
||||
}
|
||||
|
||||
--echo # Safely resume slave SQL thread
|
||||
|
||||
--let $dbug_wait_state="debug sync point: now"
|
||||
--echo # Prove SQL thread is in state $dbug_wait_state
|
||||
--let $wait_condition= SELECT STATE=$dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
|
||||
--source include/wait_condition.inc
|
||||
# `Seconds_Behind_Master` must remains 0.
|
||||
# * MDEV-16091:
|
||||
# It should not point at the FDE's `start_slave.inc` timestamp.
|
||||
# * MDEV-25999 & MDEV-36840:
|
||||
# It should not point at all in a new stream, for FDEs are not content.
|
||||
--let $status_items= Seconds_Behind_Master
|
||||
--source include/show_slave_status.inc
|
||||
|
||||
--echo # Resume slave SQL (worker) thread
|
||||
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
|
||||
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||
|
||||
# We have to wait for the SQL thread to acknowledge the sql_thread_continue
|
||||
# signal. Otherwise the below RESET command can overwrite the signal before
|
||||
# the SQL thread is notified to proceed, causing it to "permanently" become
|
||||
# stuck awaiting the signal (until timeout is reached).
|
||||
|
||||
--echo # Wait for SQL thread to continue into normal execution
|
||||
--let $wait_condition= SELECT STATE!= $dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
|
||||
--source include/wait_condition.inc
|
||||
|
||||
# Reset last sql_thread_continue signal
|
||||
SET DEBUG_SYNC='RESET';
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-29639
|
||||
--echo # When receiving an event after the SQL Thread idles,
|
||||
@ -178,6 +159,10 @@ while (!$caught_up)
|
||||
--source include/stop_slave.inc
|
||||
set debug_sync='RESET';
|
||||
SET @@global.debug_dbug=$save_dbug;
|
||||
if ($slave_parallel_threads)
|
||||
{
|
||||
SET @@GLOBAL.slave_parallel_threads= DEFAULT;
|
||||
}
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
|
@ -104,6 +104,14 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
|
||||
(e->force_abort && !rli->stop_for_until))
|
||||
return;
|
||||
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
DBUG_EXECUTE_IF("pause_sql_thread_on_fde",
|
||||
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
|
||||
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"
|
||||
)));
|
||||
);
|
||||
#endif
|
||||
|
||||
mysql_mutex_lock(&rli->data_lock);
|
||||
cmp= compare_log_name(rli->group_relay_log_name, qev->event_relay_log_name);
|
||||
if (cmp < 0)
|
||||
|
26
sql/slave.cc
26
sql/slave.cc
@ -4511,15 +4511,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
||||
{
|
||||
Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev);
|
||||
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
DBUG_EXECUTE_IF(
|
||||
"pause_sql_thread_on_relay_fde_after_trans",
|
||||
{
|
||||
DBUG_SET("-d,pause_sql_thread_on_relay_fde_after_trans");
|
||||
DBUG_SET("+d,pause_sql_thread_on_next_relay_fde");
|
||||
});
|
||||
#endif
|
||||
|
||||
/*
|
||||
For GTID, allocate a new sub_id for the given domain_id.
|
||||
The sub_id must be allocated in increasing order of binlog order.
|
||||
@ -4671,16 +4662,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
||||
wsrep_after_statement(thd);
|
||||
#endif /* WITH_WSREP */
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
DBUG_EXECUTE_IF(
|
||||
"pause_sql_thread_on_next_relay_fde",
|
||||
if (ev && typ == FORMAT_DESCRIPTION_EVENT &&
|
||||
((Format_description_log_event *) ev)->is_relay_log_event()) {
|
||||
DBUG_ASSERT(!debug_sync_set_action(
|
||||
thd,
|
||||
STRING_WITH_LEN(
|
||||
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
|
||||
DBUG_SET("-d,pause_sql_thread_on_next_relay_fde");
|
||||
});
|
||||
// Note: Parallel Replication does not hit this point.
|
||||
DBUG_EXECUTE_IF("pause_sql_thread_on_fde",
|
||||
if (typ == FORMAT_DESCRIPTION_EVENT)
|
||||
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
|
||||
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"
|
||||
)));
|
||||
);
|
||||
#endif
|
||||
|
||||
DBUG_RETURN(exec_res);
|
||||
|
Loading…
x
Reference in New Issue
Block a user