MDEV-29369: rpl.rpl_semi_sync_shutdown_await_ack fails regularly with Result content mismatch
This test was prone to failures for a few reasons, summarized below: 1) MDEV-32168 introduced “only_running_threads=1” to slave_stop.inc, which allowed the stop logic to bypass an attempting-to-reconnect IO thread. That is, the IO thread could realize the master shutdown in `read_event()`, and thereby call into `try_to_reconnect()`. This would leave the IO thread up when the test expected it to be stopped. Fixed by explicitly stopping the IO thread and allowing an error state, as the above case would lead to errno 2003. 2) On slow systems (or those running profiling tools, e.g. MSAN), the waiting-for-ack transaction can complete before the system processes the `SHUTDOWN WAIT FOR ALL SLAVES`. There was shutdown preparation logic in-between the transaction and shutdown itself, which contributes to this problem. This patch also moves this preparation logic before the transaction, so there is less to do in-between the calls. 3) Changed work-around for MDEV-28141 to use debug_sync instead of sleep delay, as it was still possible to hit the bug on very slow systems. 4) Masked MTR variable reset with disable/enable query log Reviewed By: ============ Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
parent
ee89558312
commit
03d1346e7f
@ -15,11 +15,15 @@ call mtr.add_suppression("reply failed");
|
||||
call mtr.add_suppression("Replication event checksum verification");
|
||||
call mtr.add_suppression("Relay log write failure");
|
||||
call mtr.add_suppression("Failed to kill the active semi-sync connection");
|
||||
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
|
||||
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;
|
||||
connection server_3;
|
||||
call mtr.add_suppression("reply failed");
|
||||
call mtr.add_suppression("Replication event checksum verification");
|
||||
call mtr.add_suppression("Relay log write failure");
|
||||
call mtr.add_suppression("Failed to kill the active semi-sync connection");
|
||||
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
|
||||
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;
|
||||
connection server_1;
|
||||
CREATE TABLE t1 (a int);
|
||||
connection server_2;
|
||||
@ -40,15 +44,15 @@ connection server_1;
|
||||
#-- Enable semi-sync on slaves
|
||||
let slave_last= 3
|
||||
connection server_2;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_slave_status ON
|
||||
connection server_3;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
@ -67,24 +71,20 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_clients 2
|
||||
#-- Prepare servers to simulate delay or error
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
|
||||
#--
|
||||
#-- Test begins
|
||||
connection server_1_con2;
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
connection server_1;
|
||||
#-- Begin semi-sync transaction
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection server_1_con2;
|
||||
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
|
||||
show status like 'Rpl_semi_sync_master_wait_sessions';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_wait_sessions 1
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
#-- Begin master shutdown
|
||||
SHUTDOWN WAIT FOR ALL SLAVES;
|
||||
connection server_1;
|
||||
@ -111,22 +111,19 @@ count(*)=1
|
||||
#-- Re-synchronize slaves with master and disable semi-sync
|
||||
#-- Stop slaves
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
|
||||
#-- Bring the master back up
|
||||
connection server_1_con2;
|
||||
connection default;
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
|
||||
show status like 'Rpl_semi_sync_master_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_status OFF
|
||||
TRUNCATE TABLE t1;
|
||||
#-- Bring slaves back up
|
||||
connection server_2;
|
||||
@ -157,15 +154,15 @@ connection server_1;
|
||||
#-- Enable semi-sync on slaves
|
||||
let slave_last= 3
|
||||
connection server_2;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_slave_status ON
|
||||
connection server_3;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
@ -184,24 +181,20 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_clients 2
|
||||
#-- Prepare servers to simulate delay or error
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
|
||||
#--
|
||||
#-- Test begins
|
||||
connection server_1_con2;
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
connection server_1;
|
||||
#-- Begin semi-sync transaction
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection server_1_con2;
|
||||
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
|
||||
show status like 'Rpl_semi_sync_master_wait_sessions';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_wait_sessions 1
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
#-- Begin master shutdown
|
||||
SHUTDOWN WAIT FOR ALL SLAVES;
|
||||
connection server_1;
|
||||
@ -226,24 +219,33 @@ count(*)=0
|
||||
1
|
||||
#
|
||||
#-- Re-synchronize slaves with master and disable semi-sync
|
||||
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
|
||||
# killing their semi-sync connections
|
||||
connection server_2;
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
# Wait for debug_sync signal to have been received before issuing RESET
|
||||
set debug_sync= "reset";
|
||||
connection server_3;
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
# Wait for debug_sync signal to have been received before issuing RESET
|
||||
set debug_sync= "reset";
|
||||
#-- Stop slaves
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
|
||||
#-- Bring the master back up
|
||||
connection server_1_con2;
|
||||
connection default;
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
|
||||
show status like 'Rpl_semi_sync_master_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_status OFF
|
||||
TRUNCATE TABLE t1;
|
||||
#-- Bring slaves back up
|
||||
connection server_2;
|
||||
@ -275,15 +277,15 @@ connection server_1;
|
||||
#-- Enable semi-sync on slaves
|
||||
let slave_last= 3
|
||||
connection server_2;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_slave_status ON
|
||||
connection server_3;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
@ -302,24 +304,20 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_clients 2
|
||||
#-- Prepare servers to simulate delay or error
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
|
||||
#--
|
||||
#-- Test begins
|
||||
connection server_1_con2;
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
connection server_1;
|
||||
#-- Begin semi-sync transaction
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection server_1_con2;
|
||||
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
|
||||
show status like 'Rpl_semi_sync_master_wait_sessions';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_wait_sessions 1
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
#-- Begin master shutdown
|
||||
SHUTDOWN WAIT FOR ALL SLAVES;
|
||||
connection server_1;
|
||||
@ -344,24 +342,28 @@ count(*)=1
|
||||
1
|
||||
#
|
||||
#-- Re-synchronize slaves with master and disable semi-sync
|
||||
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
|
||||
# killing their semi-sync connections
|
||||
connection server_2;
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
# Wait for debug_sync signal to have been received before issuing RESET
|
||||
set debug_sync= "reset";
|
||||
#-- Stop slaves
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
|
||||
#-- Bring the master back up
|
||||
connection server_1_con2;
|
||||
connection default;
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
|
||||
show status like 'Rpl_semi_sync_master_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_status OFF
|
||||
TRUNCATE TABLE t1;
|
||||
#-- Bring slaves back up
|
||||
connection server_2;
|
||||
@ -399,15 +401,15 @@ connection server_1;
|
||||
#-- Enable semi-sync on slaves
|
||||
let slave_last= 3
|
||||
connection server_2;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_slave_status ON
|
||||
connection server_3;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/stop_slave.inc
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
include/start_slave.inc
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
Variable_name Value
|
||||
@ -426,24 +428,20 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_clients 2
|
||||
#-- Prepare servers to simulate delay or error
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection";
|
||||
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
|
||||
#--
|
||||
#-- Test begins
|
||||
connection server_1_con2;
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
connection server_1;
|
||||
#-- Begin semi-sync transaction
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection server_1_con2;
|
||||
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
|
||||
show status like 'Rpl_semi_sync_master_wait_sessions';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_wait_sessions 1
|
||||
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
#-- Begin master shutdown
|
||||
SHUTDOWN WAIT FOR ALL SLAVES;
|
||||
connection server_1;
|
||||
@ -468,24 +466,28 @@ count(*)=1
|
||||
1
|
||||
#
|
||||
#-- Re-synchronize slaves with master and disable semi-sync
|
||||
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
|
||||
# killing their semi-sync connections
|
||||
connection server_2;
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
# Wait for debug_sync signal to have been received before issuing RESET
|
||||
set debug_sync= "reset";
|
||||
#-- Stop slaves
|
||||
connection server_2;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
|
||||
connection server_3;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
|
||||
include/stop_slave.inc
|
||||
include/stop_slave_io.inc
|
||||
include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
|
||||
#-- Bring the master back up
|
||||
connection server_1_con2;
|
||||
connection default;
|
||||
connection server_1;
|
||||
SET @@GLOBAL.debug_dbug= "";
|
||||
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
|
||||
show status like 'Rpl_semi_sync_master_status';
|
||||
Variable_name Value
|
||||
Rpl_semi_sync_master_status OFF
|
||||
TRUNCATE TABLE t1;
|
||||
#-- Bring slaves back up
|
||||
connection server_2;
|
||||
@ -509,9 +511,13 @@ COUNT(*)=0
|
||||
#############################
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
include/start_slave.inc
|
||||
connection server_3;
|
||||
include/stop_slave.inc
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
drop table t1;
|
||||
|
@ -3,7 +3,6 @@
|
||||
# replicas before shutting down.
|
||||
#
|
||||
# Parameters:
|
||||
# server_1_dbug (string) Debug setting for primary (server 1)
|
||||
# server_2_dbug (string) Debug setting to simulate delay or error on
|
||||
# the first replica (server 2)
|
||||
# server_3_dbug (string) Debug setting to simulate delay or error on
|
||||
@ -32,8 +31,8 @@ while (`SELECT $i <= $slave_last`)
|
||||
--connection server_$i
|
||||
--sync_with_master
|
||||
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
source include/stop_slave.inc;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
source include/start_slave.inc;
|
||||
show status like 'Rpl_semi_sync_slave_status';
|
||||
|
||||
@ -56,8 +55,6 @@ show status like 'Rpl_semi_sync_master_status';
|
||||
show status like 'Rpl_semi_sync_master_clients';
|
||||
|
||||
--echo #-- Prepare servers to simulate delay or error
|
||||
--connection server_1
|
||||
--eval SET @@GLOBAL.debug_dbug= $server_1_dbug
|
||||
--connection server_2
|
||||
--eval SET @@GLOBAL.debug_dbug= $server_2_dbug
|
||||
--connection server_3
|
||||
@ -66,6 +63,14 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
--echo #--
|
||||
--echo #-- Test begins
|
||||
|
||||
--connection server_1_con2
|
||||
--echo #-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
|
||||
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
|
||||
wait
|
||||
EOF
|
||||
|
||||
--connection server_1
|
||||
--echo #-- Begin semi-sync transaction
|
||||
--send INSERT INTO t1 VALUES (1)
|
||||
@ -75,14 +80,6 @@ show status like 'Rpl_semi_sync_master_clients';
|
||||
let $status_var= Rpl_semi_sync_master_wait_sessions;
|
||||
let $status_var_value= 1;
|
||||
source include/wait_for_status_var.inc;
|
||||
show status like 'Rpl_semi_sync_master_wait_sessions';
|
||||
|
||||
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
|
||||
wait
|
||||
EOF
|
||||
|
||||
--echo #-- Give enough time after timeout/ack received to query yes_tx/no_tx
|
||||
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
|
||||
|
||||
--echo #-- Begin master shutdown
|
||||
--send SHUTDOWN WAIT FOR ALL SLAVES
|
||||
@ -111,19 +108,72 @@ show status like 'Rpl_semi_sync_master_no_tx';
|
||||
--echo #
|
||||
--echo #-- Re-synchronize slaves with master and disable semi-sync
|
||||
|
||||
if (`SELECT ($server_2_expect_row_count + $server_3_expect_row_count) < 2`)
|
||||
{
|
||||
--echo #-- FIXME: workaround for MDEV-28141, preventing errored replicas from
|
||||
--echo # killing their semi-sync connections
|
||||
# I.e. we can't create a new kill connection to the primary if we know that the
|
||||
# primary is shutting down for risk of Packets out of order error. So we wait
|
||||
# to hit a debug_sync point before the creation of the new kill_connection, and
|
||||
# don't progress until the primary has been shutdown, so no new connection can
|
||||
# be formed.
|
||||
# Note this is only needed in the error case (using corrupt_queue_event), as
|
||||
# the running io_thread will otherwise automatically detect that the primary
|
||||
# has shutdown before progressing to the cleanup of the io thread.
|
||||
}
|
||||
|
||||
if (!$server_2_expect_row_count)
|
||||
{
|
||||
--connection server_2
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
--echo # Wait for debug_sync signal to have been received before issuing RESET
|
||||
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%";
|
||||
source include/wait_condition.inc;
|
||||
set debug_sync= "reset";
|
||||
}
|
||||
if (!$server_3_expect_row_count)
|
||||
{
|
||||
--connection server_3
|
||||
set debug_sync= "now wait_for at_semisync_kill_connection";
|
||||
set debug_sync= "now signal continue_semisync_kill_connection";
|
||||
--echo # Wait for debug_sync signal to have been received before issuing RESET
|
||||
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%";
|
||||
source include/wait_condition.inc;
|
||||
set debug_sync= "reset";
|
||||
}
|
||||
|
||||
--echo #-- Stop slaves
|
||||
|
||||
--connection server_2
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug"
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0
|
||||
--let $rpl_only_running_threads= 1
|
||||
source include/stop_slave.inc;
|
||||
# If server_2_expect_row_count is 0, we are simulating an error on the replica
|
||||
# and the IO thread will end with errno 1595.
|
||||
# Otherwise, we still expect error, because the master has shutdown at this
|
||||
# point, and the IO thread may or may not have realized the shutdown, and
|
||||
# started to try to automatically reconnect. This may result in the IO thread
|
||||
# giving a 2003 error if the slave tries to reconnect to a shutdown master.
|
||||
# Additionally disable warnings because the slave may have stopped in err
|
||||
# automatically, and we don't want a sporadic "Slave is already stopped"
|
||||
# warning.
|
||||
--disable_warnings
|
||||
--let $rpl_allow_error= 1
|
||||
--source include/stop_slave_io.inc
|
||||
--enable_warnings
|
||||
--let $rpl_allow_error=
|
||||
--source include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
|
||||
|
||||
--connection server_3
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug"
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0
|
||||
--let $rpl_only_running_threads= 1
|
||||
source include/stop_slave.inc;
|
||||
# Expect error for IO thread, see above comment for stopping server_2
|
||||
--disable_warnings
|
||||
--let $rpl_allow_error= 1
|
||||
--source include/stop_slave_io.inc
|
||||
--enable_warnings
|
||||
--let $rpl_allow_error=
|
||||
--source include/stop_slave_sql.inc
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
|
||||
|
||||
--echo #-- Bring the master back up
|
||||
--connection server_1_con2
|
||||
@ -142,13 +192,6 @@ EOF
|
||||
--enable_reconnect
|
||||
--source include/wait_until_connected_again.inc
|
||||
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug"
|
||||
let $status_var= Rpl_semi_sync_master_clients;
|
||||
let $status_var_value= 0;
|
||||
source include/wait_for_status_var.inc;
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled = 0
|
||||
show status like 'Rpl_semi_sync_master_status';
|
||||
|
||||
TRUNCATE TABLE t1;
|
||||
--save_master_pos
|
||||
|
||||
|
@ -69,9 +69,8 @@ call mtr.add_suppression("Timeout waiting");
|
||||
call mtr.add_suppression("did not exit");
|
||||
call mtr.add_suppression("Got an error reading communication packets");
|
||||
|
||||
--let $sav_master_timeout= `SELECT @@global.rpl_semi_sync_master_timeout`
|
||||
--let $sav_master_timeout= `SELECT @@GLOBAL.rpl_semi_sync_master_timeout`
|
||||
--let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled`
|
||||
--let $sav_master_dbug= `SELECT @@GLOBAL.debug_dbug`
|
||||
|
||||
--echo # Suppress slave errors related to the simulated error
|
||||
--connection server_2
|
||||
@ -79,16 +78,16 @@ call mtr.add_suppression("reply failed");
|
||||
call mtr.add_suppression("Replication event checksum verification");
|
||||
call mtr.add_suppression("Relay log write failure");
|
||||
call mtr.add_suppression("Failed to kill the active semi-sync connection");
|
||||
--let $sav_enabled_server_2=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
|
||||
--let $sav_server_2_dbug= `SELECT @@GLOBAL.debug_dbug`
|
||||
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
|
||||
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;
|
||||
|
||||
--connection server_3
|
||||
call mtr.add_suppression("reply failed");
|
||||
call mtr.add_suppression("Replication event checksum verification");
|
||||
call mtr.add_suppression("Relay log write failure");
|
||||
call mtr.add_suppression("Failed to kill the active semi-sync connection");
|
||||
--let $sav_enabled_server_3=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
|
||||
--let $sav_server_3_dbug= `SELECT @@GLOBAL.debug_dbug`
|
||||
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
|
||||
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;
|
||||
|
||||
--connection server_1
|
||||
CREATE TABLE t1 (a int);
|
||||
@ -116,7 +115,6 @@ while (`SELECT $i <= $slave_last`)
|
||||
--echo # allowed timeout, the primary should delay killing the suspended thread
|
||||
--echo # until an ACK is received (Rpl_semi_sync_master_yes_tx should be 1).
|
||||
--echo #
|
||||
--let server_1_dbug= ""
|
||||
--let server_2_dbug= "+d,simulate_delay_semisync_slave_reply"
|
||||
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
|
||||
--let semisync_timeout= 1600
|
||||
@ -129,9 +127,8 @@ while (`SELECT $i <= $slave_last`)
|
||||
--echo # the primary should delay killing the suspended thread until the
|
||||
--echo # timeout is reached (Rpl_semi_sync_master_no_tx should be 1).
|
||||
--echo #
|
||||
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
|
||||
--let server_2_dbug= "+d,corrupt_queue_event"
|
||||
--let server_3_dbug= "+d,corrupt_queue_event"
|
||||
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
|
||||
--let server_3_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
|
||||
--let semisync_timeout= 500
|
||||
--let server_2_expect_row_count= 0
|
||||
--let server_3_expect_row_count= 0
|
||||
@ -143,8 +140,7 @@ while (`SELECT $i <= $slave_last`)
|
||||
--echo # primary should delay killing the suspended thread until it receives an
|
||||
--echo # ACK from the delayed slave (Rpl_semi_sync_master_yes_tx should be 1).
|
||||
--echo #
|
||||
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
|
||||
--let server_2_dbug= "+d,corrupt_queue_event"
|
||||
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
|
||||
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
|
||||
--let semisync_timeout= 1600
|
||||
--let server_2_expect_row_count= 0
|
||||
@ -163,15 +159,7 @@ while (`SELECT $i <= $slave_last`)
|
||||
--echo # slave does not send a `QUIT` in this case (Rpl_semi_sync_master_yes_tx
|
||||
--echo # should be 1 because server_3 will send the ACK within a valid timeout).
|
||||
--echo #
|
||||
|
||||
# mysqld_delay_kill_threads_phase1 ensures that server_2 will have enough time
|
||||
# to start a new connection that has the intent to kill the active semi-sync
|
||||
# connection
|
||||
--let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1"
|
||||
|
||||
# slave_delay_killing_semisync_connection ensures that the primary has force
|
||||
# killed its current connection before it is able to issue `KILL`
|
||||
--let server_2_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection"
|
||||
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
|
||||
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
|
||||
--let semisync_timeout= 1600
|
||||
--let server_2_expect_row_count= 0
|
||||
@ -184,32 +172,24 @@ while (`SELECT $i <= $slave_last`)
|
||||
|
||||
--connection server_2
|
||||
source include/stop_slave.inc;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
|
||||
source include/start_slave.inc;
|
||||
|
||||
--disable_query_log
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_2
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug"
|
||||
--enable_query_log
|
||||
|
||||
--connection server_3
|
||||
source include/stop_slave.inc;
|
||||
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
|
||||
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
|
||||
source include/start_slave.inc;
|
||||
|
||||
--disable_query_log
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_3
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug"
|
||||
--enable_query_log
|
||||
|
||||
|
||||
--connection server_1
|
||||
let $status_var= Rpl_semi_sync_master_clients;
|
||||
let $status_var_value= 0;
|
||||
source include/wait_for_status_var.inc;
|
||||
|
||||
--disable_query_log
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master
|
||||
--eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug"
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout;
|
||||
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master;
|
||||
--enable_query_log
|
||||
|
||||
drop table t1;
|
||||
|
@ -1742,7 +1742,6 @@ static void close_connections(void)
|
||||
This will give the threads some time to gracefully abort their
|
||||
statements and inform their clients that the server is about to die.
|
||||
*/
|
||||
DBUG_EXECUTE_IF("mysqld_delay_kill_threads_phase_1", my_sleep(200000););
|
||||
int n_threads_awaiting_ack= 0;
|
||||
server_threads.iterate(kill_thread_phase_1, &n_threads_awaiting_ack);
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include <my_global.h>
|
||||
#include "semisync_slave.h"
|
||||
#include "debug_sync.h"
|
||||
|
||||
Repl_semi_sync_slave repl_semisync_slave;
|
||||
|
||||
@ -115,7 +116,21 @@ int Repl_semi_sync_slave::slave_start(Master_info *mi)
|
||||
int Repl_semi_sync_slave::slave_stop(Master_info *mi)
|
||||
{
|
||||
if (get_slave_enabled())
|
||||
{
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
/*
|
||||
TODO: Remove after MDEV-28141
|
||||
*/
|
||||
DBUG_EXECUTE_IF("delay_semisync_kill_connection_for_mdev_28141", {
|
||||
const char act[]= "now "
|
||||
"signal at_semisync_kill_connection "
|
||||
"wait_for continue_semisync_kill_connection";
|
||||
DBUG_ASSERT(debug_sync_service);
|
||||
DBUG_ASSERT(!debug_sync_set_action(mi->io_thd, STRING_WITH_LEN(act)));
|
||||
};);
|
||||
#endif
|
||||
kill_connection(mi->mysql);
|
||||
}
|
||||
|
||||
if (rpl_semi_sync_slave_status)
|
||||
rpl_semi_sync_slave_status= 0;
|
||||
@ -151,8 +166,6 @@ void Repl_semi_sync_slave::kill_connection(MYSQL *mysql)
|
||||
goto failed_graceful_kill;
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("slave_delay_killing_semisync_connection", my_sleep(400000););
|
||||
|
||||
kill_buffer_length= my_snprintf(kill_buffer, 30, "KILL %lu",
|
||||
mysql->thread_id);
|
||||
if (mysql_real_query(kill_mysql, kill_buffer, (ulong)kill_buffer_length))
|
||||
|
Loading…
x
Reference in New Issue
Block a user