From c5f776e9fa40d4543ecc0d07f847b9cc18a85e84 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Wed, 27 Sep 2023 14:39:03 -0600 Subject: [PATCH] MDEV-32265: seconds_behind_master is inaccurate for Delayed replication If a replica is actively delaying a transaction when restarted (STOP SLAVE/START SLAVE), when the sql thread is back up, Seconds_Behind_Master will present as 0 until the configured MASTER_DELAY has passed. That is, before the restart, last_master_timestamp is updated to the timestamp of the delayed event. Then after the restart, the negation of sql_thread_caught_up is skipped because the timestamp of the event has already been used for the last_master_timestamp, and their update is grouped together in the same conditional block. This patch fixes this by separating the negation of sql_thread_caught_up out of the timestamp-dependent block, so it is called any time an idle parallel slave queues an event to a worker. Note that sql_thread_caught_up is still left in the check for internal events, as SBM should remain idle in such case to not "magically" begin incrementing. Reviewed By: ============ Andrei Elkin --- .../suite/rpl/r/rpl_parallel_sbm.result | 13 ++++++--- mysql-test/suite/rpl/t/rpl_parallel_sbm.test | 29 +++++++++++++++---- sql/slave.cc | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel_sbm.result b/mysql-test/suite/rpl/r/rpl_parallel_sbm.result index f3cc8454510..7990a663f04 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_sbm.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_sbm.result @@ -15,9 +15,6 @@ create table t2 (a int); include/sync_slave_sql_with_master.inc # # Pt 1) Ensure SBM is updated immediately upon arrival of the next event -# Lock t1 on slave so the first received transaction does not complete/commit -connection slave; -LOCK TABLES t1 WRITE; connection master; # Sleep 2 to allow a buffer between events for SBM check insert into t1 values (0); @@ -26,8 +23,16 @@ connection slave; # Waiting for transaction to arrive on slave and begin SQL Delay.. # Validating SBM is updated on event arrival.. # ..done +# MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently +# delaying a transaction; then when the reciprocal START SLAVE occurs, +# if the event is still to be delayed, SBM should resume accordingly +include/stop_slave.inc +include/start_slave.inc connection slave; -UNLOCK TABLES; +# Waiting for replica to resume the delay for the transaction +# Sleeping 1s to increment SBM +# Ensuring Seconds_Behind_Master increases after sleeping.. +# ..done include/sync_with_master_gtid.inc # # Pt 2) If the worker threads have not entered an idle state, ensure diff --git a/mysql-test/suite/rpl/t/rpl_parallel_sbm.test b/mysql-test/suite/rpl/t/rpl_parallel_sbm.test index e738f55e7ec..58c0db15e47 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_sbm.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_sbm.test @@ -36,10 +36,6 @@ create table t2 (a int); --echo # --echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event ---echo # Lock t1 on slave so the first received transaction does not complete/commit ---connection slave -LOCK TABLES t1 WRITE; - --connection master --echo # Sleep 2 to allow a buffer between events for SBM check sleep 2; @@ -65,8 +61,31 @@ if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`) } --echo # ..done + +--echo # MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently +--echo # delaying a transaction; then when the reciprocal START SLAVE occurs, +--echo # if the event is still to be delayed, SBM should resume accordingly + +--source include/stop_slave.inc +--source include/start_slave.inc + --connection slave -UNLOCK TABLES; +--echo # Waiting for replica to resume the delay for the transaction +--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event'; +--source include/wait_condition.inc + +--echo # Sleeping 1s to increment SBM +sleep 1; + +--echo # Ensuring Seconds_Behind_Master increases after sleeping.. +--let $sbm_trx1_after_1s_sleep= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1) +if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`) +{ + --echo # ..failed + --die Seconds_Behind_Master did not increase after sleeping, but should have +} +--echo # ..done + --source include/sync_with_master_gtid.inc --echo # diff --git a/sql/slave.cc b/sql/slave.cc index 44fcb93a3a9..3b9b5d6a83b 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4249,8 +4249,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (rli->last_master_timestamp < ev->when) { rli->last_master_timestamp= ev->when; - rli->sql_thread_caught_up= false; } + rli->sql_thread_caught_up= false; } int res= rli->parallel.do_event(serial_rgi, ev, event_size);