MDEV-29369: rpl.rpl_semi_sync_shutdown_await_ack fails regularly with Result content mismatch

This test was prone to failures for a few reasons, summarized below:

 1) MDEV-32168 introduced “only_running_threads=1” to
slave_stop.inc, which allowed the stop logic to bypass an
attempting-to-reconnect IO thread. That is, the IO thread could
realize the master shutdown in `read_event()`, and thereby call into
`try_to_reconnect()`. This would leave the IO thread up when the
test expected it to be stopped. Fixed by explicitly stopping the
IO thread and allowing an error state, as the above case would
lead to errno 2003.

 2) On slow systems (or those running profiling tools, e.g. MSAN),
the waiting-for-ack transaction can complete before the system
processes the `SHUTDOWN WAIT FOR ALL SLAVES`. There was shutdown
preparation logic in-between the transaction and shutdown itself,
which contributes to this problem. This patch also moves this
preparation logic before the transaction, so there is less to do
in-between the calls.

 3) Changed work-around for MDEV-28141 to use debug_sync instead
of sleep delay, as it was still possible to hit the bug on very
slow systems.

 4) Masked MTR variable reset with disable/enable query log

Reviewed By:
============
Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Brandon Nesterenko 2024-01-09 13:47:02 -07:00
parent ee89558312
commit 03d1346e7f
5 changed files with 190 additions and 149 deletions

View File

@ -15,11 +15,15 @@ call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;
connection server_3;
call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;
connection server_1;
CREATE TABLE t1 (a int);
connection server_2;
@ -40,15 +44,15 @@ connection server_1;
#-- Enable semi-sync on slaves
let slave_last= 3
connection server_2;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
Rpl_semi_sync_slave_status ON
connection server_3;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
@ -67,24 +71,20 @@ show status like 'Rpl_semi_sync_master_clients';
Variable_name Value
Rpl_semi_sync_master_clients 2
#-- Prepare servers to simulate delay or error
connection server_1;
SET @@GLOBAL.debug_dbug= "";
connection server_2;
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
connection server_3;
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
#--
#-- Test begins
connection server_1_con2;
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
connection server_1;
#-- Begin semi-sync transaction
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
show status like 'Rpl_semi_sync_master_wait_sessions';
Variable_name Value
Rpl_semi_sync_master_wait_sessions 1
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_1;
@ -111,22 +111,19 @@ count(*)=1
#-- Re-synchronize slaves with master and disable semi-sync
#-- Stop slaves
connection server_2;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
connection server_3;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
#-- Bring the master back up
connection server_1_con2;
connection default;
connection server_1;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
show status like 'Rpl_semi_sync_master_status';
Variable_name Value
Rpl_semi_sync_master_status OFF
TRUNCATE TABLE t1;
#-- Bring slaves back up
connection server_2;
@ -157,15 +154,15 @@ connection server_1;
#-- Enable semi-sync on slaves
let slave_last= 3
connection server_2;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
Rpl_semi_sync_slave_status ON
connection server_3;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
@ -184,24 +181,20 @@ show status like 'Rpl_semi_sync_master_clients';
Variable_name Value
Rpl_semi_sync_master_clients 2
#-- Prepare servers to simulate delay or error
connection server_1;
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
connection server_2;
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
connection server_3;
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
#--
#-- Test begins
connection server_1_con2;
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
connection server_1;
#-- Begin semi-sync transaction
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
show status like 'Rpl_semi_sync_master_wait_sessions';
Variable_name Value
Rpl_semi_sync_master_wait_sessions 1
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_1;
@ -226,24 +219,33 @@ count(*)=0
1
#
#-- Re-synchronize slaves with master and disable semi-sync
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
# killing their semi-sync connections
connection server_2;
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
# Wait for debug_sync signal to have been received before issuing RESET
set debug_sync= "reset";
connection server_3;
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
# Wait for debug_sync signal to have been received before issuing RESET
set debug_sync= "reset";
#-- Stop slaves
connection server_2;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
connection server_3;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
#-- Bring the master back up
connection server_1_con2;
connection default;
connection server_1;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
show status like 'Rpl_semi_sync_master_status';
Variable_name Value
Rpl_semi_sync_master_status OFF
TRUNCATE TABLE t1;
#-- Bring slaves back up
connection server_2;
@ -275,15 +277,15 @@ connection server_1;
#-- Enable semi-sync on slaves
let slave_last= 3
connection server_2;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
Rpl_semi_sync_slave_status ON
connection server_3;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
@ -302,24 +304,20 @@ show status like 'Rpl_semi_sync_master_clients';
Variable_name Value
Rpl_semi_sync_master_clients 2
#-- Prepare servers to simulate delay or error
connection server_1;
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
connection server_2;
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
connection server_3;
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
#--
#-- Test begins
connection server_1_con2;
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
connection server_1;
#-- Begin semi-sync transaction
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
show status like 'Rpl_semi_sync_master_wait_sessions';
Variable_name Value
Rpl_semi_sync_master_wait_sessions 1
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_1;
@ -344,24 +342,28 @@ count(*)=1
1
#
#-- Re-synchronize slaves with master and disable semi-sync
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
# killing their semi-sync connections
connection server_2;
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
# Wait for debug_sync signal to have been received before issuing RESET
set debug_sync= "reset";
#-- Stop slaves
connection server_2;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
connection server_3;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
#-- Bring the master back up
connection server_1_con2;
connection default;
connection server_1;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
show status like 'Rpl_semi_sync_master_status';
Variable_name Value
Rpl_semi_sync_master_status OFF
TRUNCATE TABLE t1;
#-- Bring slaves back up
connection server_2;
@ -399,15 +401,15 @@ connection server_1;
#-- Enable semi-sync on slaves
let slave_last= 3
connection server_2;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
Rpl_semi_sync_slave_status ON
connection server_3;
set global rpl_semi_sync_slave_enabled = 1;
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
show status like 'Rpl_semi_sync_slave_status';
Variable_name Value
@ -426,24 +428,20 @@ show status like 'Rpl_semi_sync_master_clients';
Variable_name Value
Rpl_semi_sync_master_clients 2
#-- Prepare servers to simulate delay or error
connection server_1;
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
connection server_2;
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection";
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
connection server_3;
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
#--
#-- Test begins
connection server_1_con2;
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
connection server_1;
#-- Begin semi-sync transaction
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
show status like 'Rpl_semi_sync_master_wait_sessions';
Variable_name Value
Rpl_semi_sync_master_wait_sessions 1
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_1;
@ -468,24 +466,28 @@ count(*)=1
1
#
#-- Re-synchronize slaves with master and disable semi-sync
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
# killing their semi-sync connections
connection server_2;
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
# Wait for debug_sync signal to have been received before issuing RESET
set debug_sync= "reset";
#-- Stop slaves
connection server_2;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
connection server_3;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
include/stop_slave.inc
include/stop_slave_io.inc
include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
#-- Bring the master back up
connection server_1_con2;
connection default;
connection server_1;
SET @@GLOBAL.debug_dbug= "";
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
show status like 'Rpl_semi_sync_master_status';
Variable_name Value
Rpl_semi_sync_master_status OFF
TRUNCATE TABLE t1;
#-- Bring slaves back up
connection server_2;
@ -509,9 +511,13 @@ COUNT(*)=0
#############################
connection server_2;
include/stop_slave.inc
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
include/start_slave.inc
connection server_3;
include/stop_slave.inc
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
include/start_slave.inc
connection server_1;
drop table t1;

View File

@ -3,7 +3,6 @@
# replicas before shutting down.
#
# Parameters:
# server_1_dbug (string) Debug setting for primary (server 1)
# server_2_dbug (string) Debug setting to simulate delay or error on
# the first replica (server 2)
# server_3_dbug (string) Debug setting to simulate delay or error on
@ -32,8 +31,8 @@ while (`SELECT $i <= $slave_last`)
--connection server_$i
--sync_with_master
set global rpl_semi_sync_slave_enabled = 1;
source include/stop_slave.inc;
set global rpl_semi_sync_slave_enabled = 1;
source include/start_slave.inc;
show status like 'Rpl_semi_sync_slave_status';
@ -56,8 +55,6 @@ show status like 'Rpl_semi_sync_master_status';
show status like 'Rpl_semi_sync_master_clients';
--echo #-- Prepare servers to simulate delay or error
--connection server_1
--eval SET @@GLOBAL.debug_dbug= $server_1_dbug
--connection server_2
--eval SET @@GLOBAL.debug_dbug= $server_2_dbug
--connection server_3
@ -66,6 +63,14 @@ show status like 'Rpl_semi_sync_master_clients';
--echo #--
--echo #-- Test begins
--connection server_1_con2
--echo #-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait
EOF
--connection server_1
--echo #-- Begin semi-sync transaction
--send INSERT INTO t1 VALUES (1)
@ -75,14 +80,6 @@ show status like 'Rpl_semi_sync_master_clients';
let $status_var= Rpl_semi_sync_master_wait_sessions;
let $status_var_value= 1;
source include/wait_for_status_var.inc;
show status like 'Rpl_semi_sync_master_wait_sessions';
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait
EOF
--echo #-- Give enough time after timeout/ack received to query yes_tx/no_tx
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
--echo #-- Begin master shutdown
--send SHUTDOWN WAIT FOR ALL SLAVES
@ -111,19 +108,72 @@ show status like 'Rpl_semi_sync_master_no_tx';
--echo #
--echo #-- Re-synchronize slaves with master and disable semi-sync
if (`SELECT ($server_2_expect_row_count + $server_3_expect_row_count) < 2`)
{
--echo #-- FIXME: workaround for MDEV-28141, preventing errored replicas from
--echo # killing their semi-sync connections
# I.e. we can't create a new kill connection to the primary if we know that the
# primary is shutting down for risk of Packets out of order error. So we wait
# to hit a debug_sync point before the creation of the new kill_connection, and
# don't progress until the primary has been shutdown, so no new connection can
# be formed.
# Note this is only needed in the error case (using corrupt_queue_event), as
# the running io_thread will otherwise automatically detect that the primary
# has shutdown before progressing to the cleanup of the io thread.
}
if (!$server_2_expect_row_count)
{
--connection server_2
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
--echo # Wait for debug_sync signal to have been received before issuing RESET
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%";
source include/wait_condition.inc;
set debug_sync= "reset";
}
if (!$server_3_expect_row_count)
{
--connection server_3
set debug_sync= "now wait_for at_semisync_kill_connection";
set debug_sync= "now signal continue_semisync_kill_connection";
--echo # Wait for debug_sync signal to have been received before issuing RESET
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%";
source include/wait_condition.inc;
set debug_sync= "reset";
}
--echo #-- Stop slaves
--connection server_2
--eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug"
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0
--let $rpl_only_running_threads= 1
source include/stop_slave.inc;
# If server_2_expect_row_count is 0, we are simulating an error on the replica
# and the IO thread will end with errno 1595.
# Otherwise, we still expect error, because the master has shutdown at this
# point, and the IO thread may or may not have realized the shutdown, and
# started to try to automatically reconnect. This may result in the IO thread
# giving a 2003 error if the slave tries to reconnect to a shutdown master.
# Additionally disable warnings because the slave may have stopped in err
# automatically, and we don't want a sporadic "Slave is already stopped"
# warning.
--disable_warnings
--let $rpl_allow_error= 1
--source include/stop_slave_io.inc
--enable_warnings
--let $rpl_allow_error=
--source include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
--connection server_3
--eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug"
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0
--let $rpl_only_running_threads= 1
source include/stop_slave.inc;
# Expect error for IO thread, see above comment for stopping server_2
--disable_warnings
--let $rpl_allow_error= 1
--source include/stop_slave_io.inc
--enable_warnings
--let $rpl_allow_error=
--source include/stop_slave_sql.inc
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
--echo #-- Bring the master back up
--connection server_1_con2
@ -142,13 +192,6 @@ EOF
--enable_reconnect
--source include/wait_until_connected_again.inc
--eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug"
let $status_var= Rpl_semi_sync_master_clients;
let $status_var_value= 0;
source include/wait_for_status_var.inc;
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled = 0
show status like 'Rpl_semi_sync_master_status';
TRUNCATE TABLE t1;
--save_master_pos

View File

@ -69,9 +69,8 @@ call mtr.add_suppression("Timeout waiting");
call mtr.add_suppression("did not exit");
call mtr.add_suppression("Got an error reading communication packets");
--let $sav_master_timeout= `SELECT @@global.rpl_semi_sync_master_timeout`
--let $sav_master_timeout= `SELECT @@GLOBAL.rpl_semi_sync_master_timeout`
--let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled`
--let $sav_master_dbug= `SELECT @@GLOBAL.debug_dbug`
--echo # Suppress slave errors related to the simulated error
--connection server_2
@ -79,16 +78,16 @@ call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
--let $sav_enabled_server_2=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
--let $sav_server_2_dbug= `SELECT @@GLOBAL.debug_dbug`
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;
--connection server_3
call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
--let $sav_enabled_server_3=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
--let $sav_server_3_dbug= `SELECT @@GLOBAL.debug_dbug`
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;
--connection server_1
CREATE TABLE t1 (a int);
@ -116,7 +115,6 @@ while (`SELECT $i <= $slave_last`)
--echo # allowed timeout, the primary should delay killing the suspended thread
--echo # until an ACK is received (Rpl_semi_sync_master_yes_tx should be 1).
--echo #
--let server_1_dbug= ""
--let server_2_dbug= "+d,simulate_delay_semisync_slave_reply"
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
@ -129,9 +127,8 @@ while (`SELECT $i <= $slave_last`)
--echo # the primary should delay killing the suspended thread until the
--echo # timeout is reached (Rpl_semi_sync_master_no_tx should be 1).
--echo #
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
--let server_2_dbug= "+d,corrupt_queue_event"
--let server_3_dbug= "+d,corrupt_queue_event"
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let semisync_timeout= 500
--let server_2_expect_row_count= 0
--let server_3_expect_row_count= 0
@ -143,8 +140,7 @@ while (`SELECT $i <= $slave_last`)
--echo # primary should delay killing the suspended thread until it receives an
--echo # ACK from the delayed slave (Rpl_semi_sync_master_yes_tx should be 1).
--echo #
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
--let server_2_dbug= "+d,corrupt_queue_event"
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
@ -163,15 +159,7 @@ while (`SELECT $i <= $slave_last`)
--echo # slave does not send a `QUIT` in this case (Rpl_semi_sync_master_yes_tx
--echo # should be 1 because server_3 will send the ACK within a valid timeout).
--echo #
# mysqld_delay_kill_threads_phase1 ensures that server_2 will have enough time
# to start a new connection that has the intent to kill the active semi-sync
# connection
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
# slave_delay_killing_semisync_connection ensures that the primary has force
# killed its current connection before it is able to issue `KILL`
--let server_2_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection"
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
@ -184,32 +172,24 @@ while (`SELECT $i <= $slave_last`)
--connection server_2
source include/stop_slave.inc;
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
source include/start_slave.inc;
--disable_query_log
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_2
--eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug"
--enable_query_log
--connection server_3
source include/stop_slave.inc;
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
source include/start_slave.inc;
--disable_query_log
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_3
--eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug"
--enable_query_log
--connection server_1
let $status_var= Rpl_semi_sync_master_clients;
let $status_var_value= 0;
source include/wait_for_status_var.inc;
--disable_query_log
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master
--eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug"
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout;
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master;
--enable_query_log
drop table t1;

View File

@ -1742,7 +1742,6 @@ static void close_connections(void)
This will give the threads some time to gracefully abort their
statements and inform their clients that the server is about to die.
*/
DBUG_EXECUTE_IF("mysqld_delay_kill_threads_phase_1", my_sleep(200000););
int n_threads_awaiting_ack= 0;
server_threads.iterate(kill_thread_phase_1, &n_threads_awaiting_ack);

View File

@ -17,6 +17,7 @@
#include <my_global.h>
#include "semisync_slave.h"
#include "debug_sync.h"
Repl_semi_sync_slave repl_semisync_slave;
@ -115,7 +116,21 @@ int Repl_semi_sync_slave::slave_start(Master_info *mi)
int Repl_semi_sync_slave::slave_stop(Master_info *mi)
{
if (get_slave_enabled())
{
#ifdef ENABLED_DEBUG_SYNC
/*
TODO: Remove after MDEV-28141
*/
DBUG_EXECUTE_IF("delay_semisync_kill_connection_for_mdev_28141", {
const char act[]= "now "
"signal at_semisync_kill_connection "
"wait_for continue_semisync_kill_connection";
DBUG_ASSERT(debug_sync_service);
DBUG_ASSERT(!debug_sync_set_action(mi->io_thd, STRING_WITH_LEN(act)));
};);
#endif
kill_connection(mi->mysql);
}
if (rpl_semi_sync_slave_status)
rpl_semi_sync_slave_status= 0;
@ -151,8 +166,6 @@ void Repl_semi_sync_slave::kill_connection(MYSQL *mysql)
goto failed_graceful_kill;
}
DBUG_EXECUTE_IF("slave_delay_killing_semisync_connection", my_sleep(400000););
kill_buffer_length= my_snprintf(kill_buffer, 30, "KILL %lu",
mysql->thread_id);
if (mysql_real_query(kill_mysql, kill_buffer, (ulong)kill_buffer_length))