MDEV-32265: seconds_behind_master is inaccurate for Delayed replication
If a replica is actively delaying a transaction when restarted (STOP SLAVE/START SLAVE), when the sql thread is back up, Seconds_Behind_Master will present as 0 until the configured MASTER_DELAY has passed. That is, before the restart, last_master_timestamp is updated to the timestamp of the delayed event. Then after the restart, the negation of sql_thread_caught_up is skipped because the timestamp of the event has already been used for the last_master_timestamp, and their update is grouped together in the same conditional block. This patch fixes this by separating the negation of sql_thread_caught_up out of the timestamp-dependent block, so it is called any time an idle parallel slave queues an event to a worker. Note that sql_thread_caught_up is still left in the check for internal events, as SBM should remain idle in such case to not "magically" begin incrementing. Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
parent
9517755165
commit
c5f776e9fa
@ -15,9 +15,6 @@ create table t2 (a int);
|
||||
include/sync_slave_sql_with_master.inc
|
||||
#
|
||||
# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
|
||||
# Lock t1 on slave so the first received transaction does not complete/commit
|
||||
connection slave;
|
||||
LOCK TABLES t1 WRITE;
|
||||
connection master;
|
||||
# Sleep 2 to allow a buffer between events for SBM check
|
||||
insert into t1 values (0);
|
||||
@ -26,8 +23,16 @@ connection slave;
|
||||
# Waiting for transaction to arrive on slave and begin SQL Delay..
|
||||
# Validating SBM is updated on event arrival..
|
||||
# ..done
|
||||
# MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
|
||||
# delaying a transaction; then when the reciprocal START SLAVE occurs,
|
||||
# if the event is still to be delayed, SBM should resume accordingly
|
||||
include/stop_slave.inc
|
||||
include/start_slave.inc
|
||||
connection slave;
|
||||
UNLOCK TABLES;
|
||||
# Waiting for replica to resume the delay for the transaction
|
||||
# Sleeping 1s to increment SBM
|
||||
# Ensuring Seconds_Behind_Master increases after sleeping..
|
||||
# ..done
|
||||
include/sync_with_master_gtid.inc
|
||||
#
|
||||
# Pt 2) If the worker threads have not entered an idle state, ensure
|
||||
|
@ -36,10 +36,6 @@ create table t2 (a int);
|
||||
--echo #
|
||||
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
|
||||
|
||||
--echo # Lock t1 on slave so the first received transaction does not complete/commit
|
||||
--connection slave
|
||||
LOCK TABLES t1 WRITE;
|
||||
|
||||
--connection master
|
||||
--echo # Sleep 2 to allow a buffer between events for SBM check
|
||||
sleep 2;
|
||||
@ -65,8 +61,31 @@ if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
|
||||
}
|
||||
--echo # ..done
|
||||
|
||||
|
||||
--echo # MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
|
||||
--echo # delaying a transaction; then when the reciprocal START SLAVE occurs,
|
||||
--echo # if the event is still to be delayed, SBM should resume accordingly
|
||||
|
||||
--source include/stop_slave.inc
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection slave
|
||||
UNLOCK TABLES;
|
||||
--echo # Waiting for replica to resume the delay for the transaction
|
||||
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--echo # Sleeping 1s to increment SBM
|
||||
sleep 1;
|
||||
|
||||
--echo # Ensuring Seconds_Behind_Master increases after sleeping..
|
||||
--let $sbm_trx1_after_1s_sleep= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
||||
if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`)
|
||||
{
|
||||
--echo # ..failed
|
||||
--die Seconds_Behind_Master did not increase after sleeping, but should have
|
||||
}
|
||||
--echo # ..done
|
||||
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
--echo #
|
||||
|
@ -4249,8 +4249,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
||||
if (rli->last_master_timestamp < ev->when)
|
||||
{
|
||||
rli->last_master_timestamp= ev->when;
|
||||
rli->sql_thread_caught_up= false;
|
||||
}
|
||||
rli->sql_thread_caught_up= false;
|
||||
}
|
||||
|
||||
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
|
||||
|
Loading…
x
Reference in New Issue
Block a user