diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel.result b/mysql-test/suite/binlog_encryption/rpl_parallel.result index b75a66a634a..610e87e850e 100644 --- a/mysql-test/suite/binlog_encryption/rpl_parallel.result +++ b/mysql-test/suite/binlog_encryption/rpl_parallel.result @@ -732,7 +732,7 @@ SET debug_sync='now WAIT_FOR t3_waiting'; SET debug_sync='now SIGNAL d2_cont'; SET debug_sync='now WAIT_FOR t4_waiting'; KILL THD_ID; -SET debug_sync='now WAIT_FOR t3_killed'; +# Wait for replica to signal worker threads to stop SET debug_sync='now SIGNAL t1_cont'; include/wait_for_slave_sql_error.inc [errno=1317,1927,1964] STOP SLAVE IO_THREAD; @@ -742,7 +742,6 @@ a b 61 61 62 62 63 63 -64 64 68 68 69 69 70 70 @@ -816,6 +815,7 @@ connection server_2; SET debug_sync='now WAIT_FOR wait_queue_ready'; KILL THD_ID; SET debug_sync='now WAIT_FOR wait_queue_killed'; +# Wait for replica to signal worker threads to stop SET debug_sync='now SIGNAL query_cont'; include/wait_for_slave_sql_error.inc [errno=1317,1927,1964] STOP SLAVE IO_THREAD; @@ -827,6 +827,8 @@ SET binlog_format=@old_format; connection server_2; SET debug_sync='RESET'; include/start_slave.inc +SET debug_sync='now WAIT_FOR query_waiting'; +SET debug_sync='now SIGNAL query_cont'; SELECT * FROM t3 WHERE a >= 80 ORDER BY a; a b 80 0 @@ -1215,6 +1217,7 @@ connection server_2; include/wait_for_slave_sql_to_stop.inc SELECT * FROM t2 WHERE a >= 40 ORDER BY a; a +40 41 42 include/start_slave.inc diff --git a/mysql-test/suite/rpl/include/rpl_par_stop_slave_quick_common.test b/mysql-test/suite/rpl/include/rpl_par_stop_slave_quick_common.test new file mode 100644 index 00000000000..deac78660e8 --- /dev/null +++ b/mysql-test/suite/rpl/include/rpl_par_stop_slave_quick_common.test @@ -0,0 +1,608 @@ +# +# The stop_slave_quick suite of tests aims to validate that stopping a replica +# with parallelization enabled will stop in a timely manner. That is, a +# parallel replica should try to immediately stop and roll-back any ongoing +# transactions. If any threads have a non-transactional workload, then it +# along with all prior transactions are executed before stopping. +# +# This file provides test cases that should be binlog format independent. There +# is, however, behavior that is specific to either statement or row format, +# which each have their own test files that include this file. +# +# Requirements: +# 1. Tables named `ti`, `ti2`, and `ti3` have already been created with +# storage engine InnoDB; and a table named `tm` has been created with +# storage engine MyIsam. +# 2. Test variables ti_ctr, ti2_ctr, ti3_ctr, and tm_ctr have been created +# to serve as dynamic values to insert into their respective tables +# +# References: +# MDEV-13915: STOP SLAVE takes very long time on a busy system +# + +--echo # +--echo # Common Test Case 1: +--echo # Using one parallel replication worker thread on workload {T,T}, ensure +--echo # the replica immediately rolls back the transaction and stops the +--echo # SQL thread +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; +--let $row_count_initial=`select count(*) from ti` + +--connection master +--source include/save_master_gtid.inc +BEGIN; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +COMMIT; + +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--connection slave +LOCK TABLES ti WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to begin executing the first transaction +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $row_count_end=`select count(*) from ti` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= No new rows should have been inserted +--let $assert_cond= $row_count_diff = 0 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should not change +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Common Test Case 2: +--echo # Using multiple parallel replication threads (two) on workload {T,T}, +--echo # ensure both transactions are rolled back if stop slave is issued +--echo # in the middle of the first transaction. + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=2; +--let $row_count_initial=`select count(*) from ti` + +--connection master +--source include/save_master_gtid.inc +BEGIN; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +COMMIT; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--connection slave +LOCK TABLES ti WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to begin executing the first transaction +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--echo # Wait for second transaction to begin +--connection slave +--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE state LIKE 'Waiting for work from SQL thread' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $row_count_end=`select count(*) from ti` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= No insertions should have committed +--let $assert_cond= $row_count_diff = 0 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should not change +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--echo # Slave should be error-free +let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1); +--let $assert_text= Slave should be error free +--let $assert_cond= $last_error = 0 +--source include/assert.inc + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Common Test Case 3: +--echo # Using multiple parallel replication threads (two) on workload {T,T}, +--echo # with the same commit id (cid), ensure both transactions are rolled +--echo # back if stop slave is issued + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_mode=AGGRESSIVE; +set @@global.slave_parallel_threads=2; +--let $row_count_initial=`select count(*) from ti` + +--connection master +--source include/save_master_gtid.inc +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10000; +BEGIN; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +COMMIT; +--eval insert into ti2 values ($ti2_ctr) +--inc $ti2_ctr + +SET @@SESSION.debug_dbug=@old_dbug; + +--connection slave +LOCK TABLES ti WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to begin executing the first transactions +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--echo # Wait for second transaction to start group commit +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $row_count_end=`select count(*) from ti` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= No insertions should have committed +--let $assert_cond= $row_count_diff = 0 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should not change +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--connection master +--source include/save_master_gtid.inc + +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Common Test Case 4: +--echo # Using multiple parallel replication threads (4) on workload +--echo # T (long running); should commit +--echo # N (waiting for prior commit); should commit +--echo # T (long running); should rollback +--echo # T (waiting for prior commit); should rollback +--echo # Issuing STOP SLAVE should allow the first two transactions to commit +--echo # while preventing and rolling back the third +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=4; + +--connection master +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10001; +BEGIN; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +COMMIT; + +--connection master +--eval insert into tm values ($tm_ctr) +--inc $tm_ctr +--source include/save_master_gtid.inc + +--eval insert into ti2 values ($ti2_ctr) +--inc $ti2_ctr + +--eval insert into ti3 values ($ti3_ctr) +--inc $ti3_ctr + +SET @@SESSION.debug_dbug=@old_dbug; + +--connection slave +LOCK TABLES ti WRITE, ti2 WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--echo # Wait for replica to progress until unblocked transactions are queued for group commit.. +--connection slave +--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should reach first N transaction +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +set @@global.slave_parallel_mode=CONSERVATIVE; + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Common Test Case 5: +--echo # Using multiple parallel replication threads (5) on workload +--echo # T (long running); should commit +--echo # N (waiting for prior commit); should commit +--echo # T (waiting for prior commit); should commit +--echo # N (waiting for prior commit); should commit +--echo # T (long running); should rollback +--echo # Issuing STOP SLAVE should allow all transactions up to and including +--echo # the last N (4th) to commit, while preventing and rolling back the +--echo # final transaction (5th) + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=5; + +--connection master +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10002; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--eval insert into tm values ($tm_ctr) +--inc $tm_ctr + +--eval insert into ti2 values ($ti2_ctr) +--inc $ti2_ctr + +--eval insert into tm2 values ($tm2_ctr) +--inc $tm2_ctr +--source include/save_master_gtid.inc + +--eval insert into ti3 values ($ti3_ctr) +--inc $ti3_ctr + +SET @@SESSION.debug_dbug=@old_dbug; + +--connection slave +LOCK TABLES ti WRITE, ti3 WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--echo # Wait for replica to progress until unblocked transactions are queued for group commit.. +--connection slave +--let $wait_condition= SELECT count(*)=3 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should reach second N transaction +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +set @@global.slave_parallel_mode=CONSERVATIVE; + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Common Test Case 6: +--echo # If retrying a T transaction while STOP SLAVE is issued, the +--echo # transaction should be rolled back and the slave abruptly stopped + +--connection master +--eval insert into ti values ($ti_ctr) +--source include/save_master_gtid.inc + +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; + +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` +# 2 second buffer to give ample time to wait for transaction and issue stop slave +set @@global.innodb_lock_wait_timeout= 2; +BEGIN; +--eval SELECT * FROM ti WHERE a=$ti_ctr FOR UPDATE + +--connection master +--source include/save_master_gtid.inc +--eval update ti set a=a+1 where a=$ti_ctr +--inc $ti_ctr +--inc $ti_ctr + +--connection slave +--source include/start_slave.inc +--let $retried_tx_initial= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1) + +if (`SELECT @@global.binlog_format = 'ROW'`) +{ + --let $update_state=Update_rows_log_event::find_row(-1) +} +if (`SELECT @@global.binlog_format = 'STATEMENT'`) +{ + --let $update_state=Updating +} +--echo # Wait for replicating transaction to wait for innodb table lock +--source include/start_slave.inc +--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state LIKE "$update_state" and command like 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +connection slave; +--source include/wait_for_slave_sql_to_stop.inc +--let $retried_tx_test= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1) +if ($retried_tx_initial != $retried_tx_test) +{ + --echo T transaction should have been rolled back without retry + --die T transaction should have been rolled back without retry +} + +# End the SELECT ... FOR UPDATE +ROLLBACK; + +--connection slave1 +--reap +--connection slave + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= The retried T transaction should have been rolled back +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--eval set @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout + +--echo # +--echo # Common Test Case 7: +--echo # Using multiple parallel replication threads on a workload with a +--echo # non-transactional transaction in-between transactional transactions.. +--echo # 7a: with AGGRESSIVE replication where the N statement has been +--echo # executed already, all transactions up to and including N should +--echo # be replicated, and all transactions afterwards should be rolled +--echo # back. +--echo # 7b: with MINIMAL replication, the N statement should not execute +--echo # concurrently, but should wait along with the other later +--echo # transactions, and all future transactions except the first should +--echo # be rolled back. + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=4; + +--let $mode_ctr=2 +while ($mode_ctr) +{ + --connection slave + if ($mode_ctr == 2) + { + --echo # + --echo # 7a: slave_parallel_mode=AGGRESSIVE + set @@global.slave_parallel_mode=AGGRESSIVE; + } + if ($mode_ctr == 1) + { + --echo # + --echo # 7b: slave_parallel_mode=MINIMAL + set @@global.slave_parallel_mode=MINIMAL; + } + + --connection slave + --let $row_count_initial=`select count(*) from (select * from ti UNION ALL select * from tm UNION ALL select * from ti2 UNION ALL select * from tm2 UNION ALL select * from ti3) t` + + --connection master + if ($mode_ctr == 1) + { + --let $master_gtid_cmp= `select @@global.gtid_binlog_pos` + } + + --connection master + --eval insert into ti values ($ti_ctr) + --inc $ti_ctr + + --eval insert into tm values ($tm_ctr) + if ($mode_ctr == 2) + { + # AGGRESSIVE mode should allow N trx to complete + --let $master_gtid_cmp= `select @@global.gtid_binlog_pos` + } + --eval insert into ti2 values ($ti2_ctr) + --inc $ti2_ctr + --eval insert into ti values ($ti_ctr) + --inc $ti_ctr + --source include/save_master_gtid.inc + + --connection slave + LOCK TABLES ti WRITE; + --connection slave_lock_extra + LOCK TABLES ti2 WRITE; + + --source include/start_slave.inc + + --echo # Wait for replica to halt due to locks and dependency requirements + --connection slave + + if ($mode_ctr == 2) + { + # AGGRESSIVE allows for more concurrency that we need to wait for + --let $wait_condition= SELECT count(*)=3 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; + --source include/wait_condition.inc + --let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; + --source include/wait_condition.inc + } + if ($mode_ctr == 1) + { + # MINIMAL will only have the first transaction begun + --let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; + --source include/wait_condition.inc + --let $wait_condition= SELECT count(*)=3 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to start commit%' and command LIKE 'Slave_worker'; + --source include/wait_condition.inc + } + + --connection slave1 + --send STOP SLAVE; + + --connection slave + --echo # Wait for replica to signal worker threads to stop + --let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; + --source include/wait_condition.inc + + UNLOCK TABLES; + --connection slave_lock_extra + UNLOCK TABLES; + + --connection slave1 + --reap + --connection slave + --source include/wait_for_slave_sql_to_stop.inc + + --let $row_count_end=`select count(*) from (select * from ti UNION ALL select * from tm UNION ALL select * from ti2 UNION ALL select * from tm2 UNION ALL select * from ti3) t` + --let $row_count_diff=`select ($row_count_end-$row_count_initial)` + + if ($mode_ctr == 2) + { + --let $assert_text= The entirety of the first two transactions should have committed with AGGRESSIVE parallelization + --let $assert_cond= $row_count_diff = 2 + } + if ($mode_ctr == 1) + { + --let $assert_text= All transactions should have rolled back with MINIMAL parallelization + --let $assert_cond= $row_count_diff = 0 + } + + --source include/assert.inc + + --let $slave_gtid= `select @@global.gtid_slave_pos` + --let $assert_text= Slave state should be consistent + --let $assert_cond= $master_gtid_cmp = $slave_gtid + --source include/assert.inc + + --connection master + --source include/save_master_gtid.inc + --connection slave + --source include/start_slave.inc + --source include/sync_with_master_gtid.inc + + --source include/stop_slave.inc + --dec $mode_ctr +} +--source include/start_slave.inc + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 9b2e68d366e..4f4a1c7dbcd 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -731,7 +731,7 @@ SET debug_sync='now WAIT_FOR t3_waiting'; SET debug_sync='now SIGNAL d2_cont'; SET debug_sync='now WAIT_FOR t4_waiting'; KILL THD_ID; -SET debug_sync='now WAIT_FOR t3_killed'; +# Wait for replica to signal worker threads to stop SET debug_sync='now SIGNAL t1_cont'; include/wait_for_slave_sql_error.inc [errno=1317,1927,1964] STOP SLAVE IO_THREAD; @@ -741,7 +741,6 @@ a b 61 61 62 62 63 63 -64 64 68 68 69 69 70 70 @@ -815,6 +814,7 @@ connection server_2; SET debug_sync='now WAIT_FOR wait_queue_ready'; KILL THD_ID; SET debug_sync='now WAIT_FOR wait_queue_killed'; +# Wait for replica to signal worker threads to stop SET debug_sync='now SIGNAL query_cont'; include/wait_for_slave_sql_error.inc [errno=1317,1927,1964] STOP SLAVE IO_THREAD; @@ -826,6 +826,8 @@ SET binlog_format=@old_format; connection server_2; SET debug_sync='RESET'; include/start_slave.inc +SET debug_sync='now WAIT_FOR query_waiting'; +SET debug_sync='now SIGNAL query_cont'; SELECT * FROM t3 WHERE a >= 80 ORDER BY a; a b 80 0 @@ -1214,6 +1216,7 @@ connection server_2; include/wait_for_slave_sql_to_stop.inc SELECT * FROM t2 WHERE a >= 40 ORDER BY a; a +40 41 42 include/start_slave.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel2.result b/mysql-test/suite/rpl/r/rpl_parallel2.result index 559c56271b8..aeedb4bafee 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel2.result +++ b/mysql-test/suite/rpl/r/rpl_parallel2.result @@ -1,6 +1,7 @@ include/rpl_init.inc [topology=1->2] *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication *** connection server_2; +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; set @old_parallel_mode= @@GLOBAL.slave_parallel_mode; include/stop_slave.inc diff --git a/mysql-test/suite/rpl/r/rpl_row_par_stop_slave_quick.result b/mysql-test/suite/rpl/r/rpl_row_par_stop_slave_quick.result new file mode 100644 index 00000000000..6fd5b448b47 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_row_par_stop_slave_quick.result @@ -0,0 +1,507 @@ +include/master-slave.inc +[connection master] +# +# Setup +connection slave; +include/stop_slave.inc +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Can't find record"); +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure"); +set @@global.slave_parallel_mode=CONSERVATIVE; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +connect slave_lock_extra,127.0.0.1,root,,test,$SLAVE_MYPORT; +CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS; +include/start_slave.inc +# +# Initialize test data +connection master; +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +create sequence s1; +create table ti (a int) engine=innodb; +create table ti2 (a int) engine=innodb; +create table ti3 (a int) engine=innodb; +create table tm (a int) engine=myisam; +create table tm2 (a int) engine=myisam; +connection slave; +# Run binlog format independent test cases +# +# Common Test Case 1: +# Using one parallel replication worker thread on workload {T,T}, ensure +# the replica immediately rolls back the transaction and stops the +# SQL thread +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +include/save_master_gtid.inc +BEGIN; +insert into ti values (100); +insert into ti values (101); +COMMIT; +insert into ti values (102); +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transaction +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No new rows should have been inserted] +include/assert.inc [GTID slave state should not change] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 2: +# Using multiple parallel replication threads (two) on workload {T,T}, +# ensure both transactions are rolled back if stop slave is issued +# in the middle of the first transaction. +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=2; +connection master; +include/save_master_gtid.inc +BEGIN; +insert into ti values (103); +insert into ti values (104); +COMMIT; +insert into ti values (105); +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transaction +connection slave; +# Wait for second transaction to begin +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No insertions should have committed] +include/assert.inc [GTID slave state should not change] +# Slave should be error-free +include/assert.inc [Slave should be error free] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 3: +# Using multiple parallel replication threads (two) on workload {T,T}, +# with the same commit id (cid), ensure both transactions are rolled +# back if stop slave is issued +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=AGGRESSIVE; +set @@global.slave_parallel_threads=2; +connection master; +include/save_master_gtid.inc +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10000; +BEGIN; +insert into ti values (106); +insert into ti values (107); +COMMIT; +insert into ti2 values (400); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transactions +connection slave; +# Wait for second transaction to start group commit +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No insertions should have committed] +include/assert.inc [GTID slave state should not change] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 4: +# Using multiple parallel replication threads (4) on workload +# T (long running); should commit +# N (waiting for prior commit); should commit +# T (long running); should rollback +# T (waiting for prior commit); should rollback +# Issuing STOP SLAVE should allow the first two transactions to commit +# while preventing and rolling back the third +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=4; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10001; +BEGIN; +insert into ti values (108); +insert into ti values (109); +COMMIT; +connection master; +insert into tm values (200); +include/save_master_gtid.inc +insert into ti2 values (401); +insert into ti3 values (500); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE, ti2 WRITE; +include/start_slave.inc +# Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +# Wait for replica to progress until unblocked transactions are queued for group commit.. +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [GTID slave state should reach first N transaction] +set @@global.slave_parallel_mode=CONSERVATIVE; +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 5: +# Using multiple parallel replication threads (5) on workload +# T (long running); should commit +# N (waiting for prior commit); should commit +# T (waiting for prior commit); should commit +# N (waiting for prior commit); should commit +# T (long running); should rollback +# Issuing STOP SLAVE should allow all transactions up to and including +# the last N (4th) to commit, while preventing and rolling back the +# final transaction (5th) +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=5; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10002; +insert into ti values (110); +insert into tm values (201); +insert into ti2 values (402); +insert into tm2 values (300); +include/save_master_gtid.inc +insert into ti3 values (501); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE, ti3 WRITE; +include/start_slave.inc +# Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +# Wait for replica to progress until unblocked transactions are queued for group commit.. +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [GTID slave state should reach second N transaction] +set @@global.slave_parallel_mode=CONSERVATIVE; +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 6: +# If retrying a T transaction while STOP SLAVE is issued, the +# transaction should be rolled back and the slave abruptly stopped +connection master; +insert into ti values (111); +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +set @@global.innodb_lock_wait_timeout= 2; +BEGIN; +SELECT * FROM ti WHERE a=111 FOR UPDATE; +a +111 +connection master; +include/save_master_gtid.inc +update ti set a=a+1 where a=111; +connection slave; +include/start_slave.inc +# Wait for replicating transaction to wait for innodb table lock +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +connection slave1; +STOP SLAVE;; +connection slave; +include/wait_for_slave_sql_to_stop.inc +ROLLBACK; +connection slave1; +connection slave; +include/assert.inc [The retried T transaction should have been rolled back] +set @@global.innodb_lock_wait_timeout= 50; +# +# Common Test Case 7: +# Using multiple parallel replication threads on a workload with a +# non-transactional transaction in-between transactional transactions.. +# 7a: with AGGRESSIVE replication where the N statement has been +# executed already, all transactions up to and including N should +# be replicated, and all transactions afterwards should be rolled +# back. +# 7b: with MINIMAL replication, the N statement should not execute +# concurrently, but should wait along with the other later +# transactions, and all future transactions except the first should +# be rolled back. +connection slave; +include/stop_slave.inc +Warnings: +Note 1255 Slave already has been stopped +set @@global.slave_parallel_threads=4; +connection slave; +# +# 7a: slave_parallel_mode=AGGRESSIVE +set @@global.slave_parallel_mode=AGGRESSIVE; +connection slave; +connection master; +connection master; +insert into ti values (113); +insert into tm values (202); +insert into ti2 values (403); +insert into ti values (114); +include/save_master_gtid.inc +connection slave; +LOCK TABLES ti WRITE; +connection slave_lock_extra; +LOCK TABLES ti2 WRITE; +include/start_slave.inc +# Wait for replica to halt due to locks and dependency requirements +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +connection slave_lock_extra; +UNLOCK TABLES; +connection slave1; +connection slave; +include/wait_for_slave_sql_to_stop.inc +include/assert.inc [The entirety of the first two transactions should have committed with AGGRESSIVE parallelization] +include/assert.inc [Slave state should be consistent] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +connection slave; +# +# 7b: slave_parallel_mode=MINIMAL +set @@global.slave_parallel_mode=MINIMAL; +connection slave; +connection master; +connection master; +insert into ti values (115); +insert into tm values (202); +insert into ti2 values (404); +insert into ti values (116); +include/save_master_gtid.inc +connection slave; +LOCK TABLES ti WRITE; +connection slave_lock_extra; +LOCK TABLES ti2 WRITE; +include/start_slave.inc +# Wait for replica to halt due to locks and dependency requirements +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +connection slave_lock_extra; +UNLOCK TABLES; +connection slave1; +connection slave; +include/wait_for_slave_sql_to_stop.inc +include/assert.inc [All transactions should have rolled back with MINIMAL parallelization] +include/assert.inc [Slave state should be consistent] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +include/start_slave.inc +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +include/sync_with_master_gtid.inc +# +# ROW Test Case 1: +# Using an N multi-statement transaction, ensure if STOP SLAVE is +# issued in-between row updates, that the transaction is finished. +connection master; +truncate table ti; +truncate table tm; +# Set up multiple rows to allow a multi-statement update rows event +insert into tm values (202); +insert into tm values (203); +connection slave; +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +# Next-to-commit non-transactional transaction should finish +update tm set a=a+1; +include/save_master_gtid.inc +# This should not be committed because it is after next-to-commit +insert into ti values (117); +connection slave; +set @@global.debug_dbug="+d,pause_after_next_row_exec"; +START SLAVE; +set debug_sync= "now WAIT_FOR row_executed"; +connection slave1; +STOP SLAVE;; +connection slave; +set @@global.debug_dbug=""; +set debug_sync= "now SIGNAL continue_row_execution"; +connection slave1; +include/wait_for_slave_sql_to_stop.inc +# Slave should be error-free +include/assert.inc [Slave should be error free] +set debug_sync= "RESET"; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# ROW Test Case 2: +# Using a T multi-statement transaction, ensure if STOP SLAVE is +# issued in-between row updates, that the transaction is rolled back. +connection master; +truncate table ti; +truncate table ti2; +truncate table tm; +insert into ti values (118); +insert into ti values (119); +connection slave; +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +# Next-to-commit transactional multi-row event should be rolled back +include/save_master_gtid.inc +update ti set a=a+1; +insert into ti values (120); +connection slave; +set @@global.debug_dbug="+d,pause_after_next_row_exec"; +START SLAVE; +set debug_sync= "now WAIT_FOR row_executed"; +connection slave1; +STOP SLAVE;; +connection slave; +set @@global.debug_dbug=""; +set debug_sync= "now SIGNAL continue_row_execution"; +connection slave1; +include/wait_for_slave_sql_to_stop.inc +include/assert.inc [No new rows should have been inserted] +# Comparing master gtid 0-1-42 to slaves 0-1-42 +include/assert.inc [No transactions should have committed] +# Slave should be error-free +include/assert.inc [Slave should be error free] +connection master; +include/save_master_gtid.inc +connection slave; +set debug_sync= "RESET"; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Row Test Case 3: +# A workload with a later transaction that updates a sequence table +# should complete all transactions up to the sequence table update. +# Workload: +# T (long running); should commit +# S (waiting for prior commit); should commit +# T (long running); should rollback +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=AGGRESSIVE; +set @@global.slave_parallel_threads=3; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10002; +insert into ti values (121); +select next value for s1; +next value for s1 +1 +include/save_master_gtid.inc +insert into ti2 values (405); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti write, ti2 WRITE; +include/start_slave.inc +# Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +# Wait for replica to progress until unblocked transactions are queued for group commit.. +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [GTID slave state should not change] +set @@global.slave_parallel_mode=CONSERVATIVE; +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Cleanup +connection master; +DROP TABLE ti, ti2, ti3, tm, tm2, s1; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.debug_dbug=""; +set @@global.slave_parallel_threads=0; +set @@global.slave_parallel_mode=conservative; +include/start_slave.inc +include/rpl_end.inc +# End of tests diff --git a/mysql-test/suite/rpl/r/rpl_stm_par_stop_slave_quick.result b/mysql-test/suite/rpl/r/rpl_stm_par_stop_slave_quick.result new file mode 100644 index 00000000000..18b06f5054f --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_stm_par_stop_slave_quick.result @@ -0,0 +1,461 @@ +include/master-slave.inc +[connection master] +# +# Setup +connection slave; +include/stop_slave.inc +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); +set @@global.slave_parallel_mode=CONSERVATIVE; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +change master to master_use_gtid=slave_pos; +include/start_slave.inc +connect slave_lock_extra,127.0.0.1,root,,test,$SLAVE_MYPORT; +# +# Initialize test data +connection master; +set statement sql_log_bin=0 for call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +create table ti (a int primary key) engine=innodb; +create table ti2 (a int) engine=innodb; +create table ti3 (a int) engine=innodb; +create table tm (a int) engine=myisam; +create table tm2 (a int) engine=myisam; +connection slave; +# Run binlog format independent test cases +# +# Common Test Case 1: +# Using one parallel replication worker thread on workload {T,T}, ensure +# the replica immediately rolls back the transaction and stops the +# SQL thread +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +include/save_master_gtid.inc +BEGIN; +insert into ti values (100); +insert into ti values (101); +COMMIT; +insert into ti values (102); +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transaction +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No new rows should have been inserted] +include/assert.inc [GTID slave state should not change] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 2: +# Using multiple parallel replication threads (two) on workload {T,T}, +# ensure both transactions are rolled back if stop slave is issued +# in the middle of the first transaction. +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=2; +connection master; +include/save_master_gtid.inc +BEGIN; +insert into ti values (103); +insert into ti values (104); +COMMIT; +insert into ti values (105); +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transaction +connection slave; +# Wait for second transaction to begin +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No insertions should have committed] +include/assert.inc [GTID slave state should not change] +# Slave should be error-free +include/assert.inc [Slave should be error free] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 3: +# Using multiple parallel replication threads (two) on workload {T,T}, +# with the same commit id (cid), ensure both transactions are rolled +# back if stop slave is issued +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=AGGRESSIVE; +set @@global.slave_parallel_threads=2; +connection master; +include/save_master_gtid.inc +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10000; +BEGIN; +insert into ti values (106); +insert into ti values (107); +COMMIT; +insert into ti2 values (400); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transactions +connection slave; +# Wait for second transaction to start group commit +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No insertions should have committed] +include/assert.inc [GTID slave state should not change] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 4: +# Using multiple parallel replication threads (4) on workload +# T (long running); should commit +# N (waiting for prior commit); should commit +# T (long running); should rollback +# T (waiting for prior commit); should rollback +# Issuing STOP SLAVE should allow the first two transactions to commit +# while preventing and rolling back the third +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=4; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10001; +BEGIN; +insert into ti values (108); +insert into ti values (109); +COMMIT; +connection master; +insert into tm values (200); +include/save_master_gtid.inc +insert into ti2 values (401); +insert into ti3 values (500); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE, ti2 WRITE; +include/start_slave.inc +# Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +# Wait for replica to progress until unblocked transactions are queued for group commit.. +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [GTID slave state should reach first N transaction] +set @@global.slave_parallel_mode=CONSERVATIVE; +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 5: +# Using multiple parallel replication threads (5) on workload +# T (long running); should commit +# N (waiting for prior commit); should commit +# T (waiting for prior commit); should commit +# N (waiting for prior commit); should commit +# T (long running); should rollback +# Issuing STOP SLAVE should allow all transactions up to and including +# the last N (4th) to commit, while preventing and rolling back the +# final transaction (5th) +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_mode=optimistic; +set @@global.slave_parallel_threads=5; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10002; +insert into ti values (110); +insert into tm values (201); +insert into ti2 values (402); +insert into tm2 values (300); +include/save_master_gtid.inc +insert into ti3 values (501); +SET @@SESSION.debug_dbug=@old_dbug; +connection slave; +LOCK TABLES ti WRITE, ti3 WRITE; +include/start_slave.inc +# Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +# Wait for replica to progress until unblocked transactions are queued for group commit.. +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [GTID slave state should reach second N transaction] +set @@global.slave_parallel_mode=CONSERVATIVE; +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Common Test Case 6: +# If retrying a T transaction while STOP SLAVE is issued, the +# transaction should be rolled back and the slave abruptly stopped +connection master; +insert into ti values (111); +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +set @@global.innodb_lock_wait_timeout= 2; +BEGIN; +SELECT * FROM ti WHERE a=111 FOR UPDATE; +a +111 +connection master; +include/save_master_gtid.inc +update ti set a=a+1 where a=111; +connection slave; +include/start_slave.inc +# Wait for replicating transaction to wait for innodb table lock +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +connection slave1; +STOP SLAVE;; +connection slave; +include/wait_for_slave_sql_to_stop.inc +ROLLBACK; +connection slave1; +connection slave; +include/assert.inc [The retried T transaction should have been rolled back] +set @@global.innodb_lock_wait_timeout= 50; +# +# Common Test Case 7: +# Using multiple parallel replication threads on a workload with a +# non-transactional transaction in-between transactional transactions.. +# 7a: with AGGRESSIVE replication where the N statement has been +# executed already, all transactions up to and including N should +# be replicated, and all transactions afterwards should be rolled +# back. +# 7b: with MINIMAL replication, the N statement should not execute +# concurrently, but should wait along with the other later +# transactions, and all future transactions except the first should +# be rolled back. +connection slave; +include/stop_slave.inc +Warnings: +Note 1255 Slave already has been stopped +set @@global.slave_parallel_threads=4; +connection slave; +# +# 7a: slave_parallel_mode=AGGRESSIVE +set @@global.slave_parallel_mode=AGGRESSIVE; +connection slave; +connection master; +connection master; +insert into ti values (113); +insert into tm values (202); +insert into ti2 values (403); +insert into ti values (114); +include/save_master_gtid.inc +connection slave; +LOCK TABLES ti WRITE; +connection slave_lock_extra; +LOCK TABLES ti2 WRITE; +include/start_slave.inc +# Wait for replica to halt due to locks and dependency requirements +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +connection slave_lock_extra; +UNLOCK TABLES; +connection slave1; +connection slave; +include/wait_for_slave_sql_to_stop.inc +include/assert.inc [The entirety of the first two transactions should have committed with AGGRESSIVE parallelization] +include/assert.inc [Slave state should be consistent] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +connection slave; +# +# 7b: slave_parallel_mode=MINIMAL +set @@global.slave_parallel_mode=MINIMAL; +connection slave; +connection master; +connection master; +insert into ti values (115); +insert into tm values (202); +insert into ti2 values (404); +insert into ti values (116); +include/save_master_gtid.inc +connection slave; +LOCK TABLES ti WRITE; +connection slave_lock_extra; +LOCK TABLES ti2 WRITE; +include/start_slave.inc +# Wait for replica to halt due to locks and dependency requirements +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +connection slave_lock_extra; +UNLOCK TABLES; +connection slave1; +connection slave; +include/wait_for_slave_sql_to_stop.inc +include/assert.inc [All transactions should have rolled back with MINIMAL parallelization] +include/assert.inc [Slave state should be consistent] +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +include/start_slave.inc +connection master; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +include/sync_with_master_gtid.inc +# +# Statement Test Case 1: +# Using one parallel replication worker thread on workload {N,T}, ensure +# the replica finishes the non-transactional transaction, and does not +# start the next +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +SET @@session.binlog_direct_non_transactional_updates= 0; +BEGIN; +insert into ti values (117); +insert into tm values (202); +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it accesses a non-transactional table after accessing a transactional table within the same transaction +insert into tm2 values (301); +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it accesses a non-transactional table after accessing a transactional table within the same transaction +COMMIT; +include/save_master_gtid.inc +insert into ti values (118); +connection slave; +lock tables tm2 write; +START SLAVE; +# Wait for replica to get stuck on held lock +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +# Unlock row-level lock holding transaction +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [Transaction should have committed] +include/assert.inc [N should have been applied] +# Slave should be error-free +include/assert.inc [Slave should be error free] +connection master; +include/save_master_gtid.inc +SET @@session.binlog_direct_non_transactional_updates= 1; +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Statement Test Case 2: +# If STOP SLAVE is issued on a parallel slave, such that the next to +# commit transaction is T; even if the next event from the group will +# commit the transaction (e.g. XID_EVENT), the transaction should be +# stopped and rolled back. +connection slave; +include/stop_slave.inc +set @@global.slave_parallel_threads=1; +connection master; +insert into ti values (119); +insert into ti values (120); +include/save_master_gtid.inc +connection slave; +LOCK TABLES ti WRITE; +include/start_slave.inc +# Wait for replica to begin executing the first transaction +connection slave; +connection slave1; +STOP SLAVE;; +connection slave; +# Wait for replica to signal worker threads to stop +UNLOCK TABLES; +include/wait_for_slave_sql_to_stop.inc +connection slave1; +connection slave; +include/assert.inc [No insertions should have committed] +include/assert.inc [GTID slave state should increment to the first transaction] +include/start_slave.inc +include/sync_with_master_gtid.inc +# +# Cleanup +connection master; +DROP TABLE ti, tm, ti2, tm2, ti3; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads=0; +set @@global.slave_domain_parallel_threads=0; +set @@global.slave_parallel_mode=conservative; +set @@global.debug_dbug=""; +include/start_slave.inc +include/rpl_end.inc +# End of tests diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 9ba7a30f2eb..433e7c9d8c4 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -949,8 +949,9 @@ SET debug_sync='now WAIT_FOR t4_waiting'; --replace_result $d1_thd_id THD_ID eval KILL $d1_thd_id; -# Wait until T3 has reacted on the kill. -SET debug_sync='now WAIT_FOR t3_killed'; +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc # Now we can allow T1 to proceed. SET debug_sync='now SIGNAL t1_cont'; @@ -959,7 +960,7 @@ SET debug_sync='now SIGNAL t1_cont'; --source include/wait_for_slave_sql_error.inc STOP SLAVE IO_THREAD; # Since T2, T3, and T4 run in parallel, we can not be sure if T2 will have time -# to commit or not before the stop. However, T1 should commit, and T3/T4 may +# to commit or not before the stop. However, T1 should rollback, and T3/T4 may # not have committed. (After slave restart we check that all become committed # eventually). SELECT * FROM t3 WHERE a >= 60 AND a != 65 ORDER BY a; @@ -1058,6 +1059,11 @@ SET debug_sync='now WAIT_FOR wait_queue_ready'; eval KILL $thd_id; SET debug_sync='now WAIT_FOR wait_queue_killed'; + +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + SET debug_sync='now SIGNAL query_cont'; --let $slave_sql_errno= 1317,1927,1964 @@ -1075,6 +1081,14 @@ SET binlog_format=@old_format; --connection server_2 SET debug_sync='RESET'; --source include/start_slave.inc + +# Test amendment from MDEV-13915: +# A worker thread's event queue is no longer executed on replica stop. +# The signal query_cont needs to be re-sent because the transaction was +# aborted. +SET debug_sync='now WAIT_FOR query_waiting'; +SET debug_sync='now SIGNAL query_cont'; + --sync_with_master SELECT * FROM t3 WHERE a >= 80 ORDER BY a; diff --git a/mysql-test/suite/rpl/t/rpl_parallel2.test b/mysql-test/suite/rpl/t/rpl_parallel2.test index 8934b15e546..506e074110c 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel2.test +++ b/mysql-test/suite/rpl/t/rpl_parallel2.test @@ -7,6 +7,7 @@ --echo *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication *** --connection server_2 +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; set @old_parallel_mode= @@GLOBAL.slave_parallel_mode; --source include/stop_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_error_stop.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_error_stop.test index 27f38d47bdb..35f28073a74 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_error_stop.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_error_stop.test @@ -122,10 +122,10 @@ SELECT COUNT(*) = 1 as "W4 remains with the same status" FROM information_schema --echo # Slave_SQL_Running YES = $status # B. In the fixed version W3 is waiting for W2,... ---let $wait_condition= SELECT count(*) = 1 as "W4 is waiting" FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit%" +--let $wait_condition= SELECT count(*) = 1 as "W3 is waiting" FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit%" --source include/wait_condition.inc --echo # while W2 is held back ... ---let $wait_condition= SELECT count(*) = 1 as "W2 simulates slowness" FROM information_schema.processlist WHERE state LIKE "debug sync point: now" +--let $wait_condition= SELECT count(*) >= 1 as "W2 simulates slowness" FROM information_schema.processlist WHERE state LIKE "debug sync point: now" --source include/wait_condition.inc # C. # ...until NOW. @@ -143,7 +143,7 @@ if ($old_version_regression) --let $wait_condition= SELECT count(*) = 0 as "W3 does not wait on W2" FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit%" --source include/wait_condition.inc - --let $wait_condition= SELECT count(*) = 1 as "W2 simulates slowness" FROM information_schema.processlist WHERE state LIKE "debug sync point: now" + --let $wait_condition= SELECT count(*) >= 1 as "W2 simulates slowness" FROM information_schema.processlist WHERE state LIKE "debug sync point: now" --source include/wait_condition.inc # Like above, but signaling is done after W4 is done to violate the commit order diff --git a/mysql-test/suite/rpl/t/rpl_row_par_stop_slave_quick.test b/mysql-test/suite/rpl/t/rpl_row_par_stop_slave_quick.test new file mode 100644 index 00000000000..56ad9e34a93 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_row_par_stop_slave_quick.test @@ -0,0 +1,268 @@ +# +# Validate that STOP SLAVE works in a timely manner on a parallel replica with +# ROW binary logging format. +# +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_binlog_format_row.inc + +--echo # +--echo # Setup +--connection slave +--source include/stop_slave.inc +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Can't find record"); +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure"); +--let $old_debug= `SELECT @@global.debug_dbug` +--let $old_threads= `SELECT @@global.slave_parallel_threads` +--let $old_slave_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_mode=CONSERVATIVE; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +--connect(slave_lock_extra,127.0.0.1,root,,test,$SLAVE_MYPORT) +CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS; +--source include/start_slave.inc + +--echo # +--echo # Initialize test data +--connection master +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); + +# Needed by this test and include/rpl_par_stop_slave_quick.inc + +create sequence s1; +create table ti (a int) engine=innodb; +create table ti2 (a int) engine=innodb; +create table ti3 (a int) engine=innodb; +create table tm (a int) engine=myisam; +create table tm2 (a int) engine=myisam; +--let $ti_ctr= 100 +--let $tm_ctr= 200 +--let $tm2_ctr= 300 +--let $ti2_ctr= 400 +--let $ti3_ctr= 500 +--sync_slave_with_master + +--echo # Run binlog format independent test cases +--source include/rpl_par_stop_slave_quick_common.test + +--echo # +--echo # ROW Test Case 1: +--echo # Using an N multi-statement transaction, ensure if STOP SLAVE is +--echo # issued in-between row updates, that the transaction is finished. + +--connection master +truncate table ti; +truncate table tm; +--echo # Set up multiple rows to allow a multi-statement update rows event +--eval insert into tm values ($tm_ctr) +--inc $tm_ctr +--eval insert into tm values ($tm_ctr) +--inc $tm_ctr +--sync_slave_with_master + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; +--let $row_count_initial=`select count(*) from (select * from ti UNION ALL select * from tm) t` + +--connection master + +--echo # Next-to-commit non-transactional transaction should finish +--eval update tm set a=a+1 +--source include/save_master_gtid.inc +--let $master_gtid_after_update= `select @@global.gtid_binlog_pos` + +--echo # This should not be committed because it is after next-to-commit +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--connection slave +set @@global.debug_dbug="+d,pause_after_next_row_exec"; + +START SLAVE; +set debug_sync= "now WAIT_FOR row_executed"; + +--connection slave1 +--send STOP SLAVE; + +--connection slave +set @@global.debug_dbug=""; +set debug_sync= "now SIGNAL continue_row_execution"; + +--connection slave1 +--reap +--source include/wait_for_slave_sql_to_stop.inc + +--echo # Slave should be error-free +let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1); +--let $assert_text= Slave should be error free +--let $assert_cond= $last_error = 0 +--source include/assert.inc + +set debug_sync= "RESET"; +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # ROW Test Case 2: +--echo # Using a T multi-statement transaction, ensure if STOP SLAVE is +--echo # issued in-between row updates, that the transaction is rolled back. + +--connection master +truncate table ti; +truncate table ti2; +truncate table tm; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--sync_slave_with_master + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; +--let $row_count_initial=`select count(*) from (select * from ti UNION ALL select * from tm) t` + +--connection master + +--echo # Next-to-commit transactional multi-row event should be rolled back +--source include/save_master_gtid.inc +--let $master_gtid_initial= `select @@global.gtid_binlog_pos` +--eval update ti set a=a+1 + +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--connection slave +set @@global.debug_dbug="+d,pause_after_next_row_exec"; +START SLAVE; +set debug_sync= "now WAIT_FOR row_executed"; + +--connection slave1 +--send STOP SLAVE; + +--connection slave +set @@global.debug_dbug=""; +set debug_sync= "now SIGNAL continue_row_execution"; + +--connection slave1 +--reap +--source include/wait_for_slave_sql_to_stop.inc + +--let $row_count_end=`select count(*) from (select * from ti UNION ALL select * from tm) t` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= No new rows should have been inserted +--let $assert_cond= $row_count_diff = 0 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--echo # Comparing master gtid $master_pos to slaves $slave_gtid +--let $assert_text= No transactions should have committed +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--echo # Slave should be error-free +let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1); +--let $assert_text= Slave should be error free +--let $assert_cond= $last_error = 0 +--source include/assert.inc + +--connection master +--source include/save_master_gtid.inc + +--connection slave +set debug_sync= "RESET"; +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Row Test Case 3: +--echo # A workload with a later transaction that updates a sequence table +--echo # should complete all transactions up to the sequence table update. +--echo # Workload: +--echo # T (long running); should commit +--echo # S (waiting for prior commit); should commit +--echo # T (long running); should rollback + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_mode=AGGRESSIVE; +set @@global.slave_parallel_threads=3; + +--connection master +SET @old_dbug= @@SESSION.debug_dbug; +SET @@SESSION.debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 10002; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +select next value for s1; +--source include/save_master_gtid.inc + +--eval insert into ti2 values ($ti2_ctr) +--inc $ti2_ctr + +SET @@SESSION.debug_dbug=@old_dbug; + +--connection slave +LOCK TABLES ti write, ti2 WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to progress until the transactions targeting locked tables are stuck on their locks.. +--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--echo # Wait for replica to progress until unblocked transactions are queued for group commit.. +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should not change +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +set @@global.slave_parallel_mode=CONSERVATIVE; + +--connection master +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Cleanup +--connection master +DROP TABLE ti, ti2, ti3, tm, tm2, s1; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--eval set @@global.debug_dbug="$old_debug" +--eval set @@global.slave_parallel_threads=$old_threads +--eval set @@global.slave_parallel_mode=$old_slave_mode +--source include/start_slave.inc + +--source include/rpl_end.inc + +--echo # End of tests diff --git a/mysql-test/suite/rpl/t/rpl_stm_par_stop_slave_quick.test b/mysql-test/suite/rpl/t/rpl_stm_par_stop_slave_quick.test new file mode 100644 index 00000000000..b76979c62da --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stm_par_stop_slave_quick.test @@ -0,0 +1,200 @@ +# +# Validate that STOP SLAVE works in a timely manner on a parallel replica with +# STATEMENT binary logging format. +# +--source include/have_debug.inc +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_binlog_format_statement.inc + +--echo # +--echo # Setup +--connection slave +--source include/stop_slave.inc +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); +--let $old_threads= `SELECT @@global.slave_parallel_threads` +--let $old_domain_threads= `SELECT @@global.slave_domain_parallel_threads` +--let $old_slave_mode= `SELECT @@global.slave_parallel_mode` +--let $old_debug_dbug= `SELECT @@global.debug_dbug` +set @@global.slave_parallel_mode=CONSERVATIVE; + +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +change master to master_use_gtid=slave_pos; +--source include/start_slave.inc +--connect(slave_lock_extra,127.0.0.1,root,,test,$SLAVE_MYPORT) + +--echo # +--echo # Initialize test data +--connection master +set statement sql_log_bin=0 for call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +create table ti (a int primary key) engine=innodb; +create table ti2 (a int) engine=innodb; +create table ti3 (a int) engine=innodb; +create table tm (a int) engine=myisam; +create table tm2 (a int) engine=myisam; +--let $ti_ctr= 100 +--let $tm_ctr= 200 +--let $tm2_ctr= 300 +--let $ti2_ctr= 400 +--let $ti3_ctr= 500 +--sync_slave_with_master + +--echo # Run binlog format independent test cases +--source include/rpl_par_stop_slave_quick_common.test + +--echo # +--echo # Statement Test Case 1: +--echo # Using one parallel replication worker thread on workload {N,T}, ensure +--echo # the replica finishes the non-transactional transaction, and does not +--echo # start the next + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; +--let $row_count_initial=`select count(*) from (select * from ti UNION ALL select * from tm UNION ALL select * from tm2) t` + +--connection master +--let $old_binlog_direct= `SELECT @@global.binlog_direct_non_transactional_updates` +SET @@session.binlog_direct_non_transactional_updates= 0; +BEGIN; +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into tm values ($tm_ctr) +--inc $tm_ctr +--eval insert into tm2 values ($tm2_ctr) +--inc $tm_ctr +COMMIT; +--source include/save_master_gtid.inc + +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr + +--connection slave +lock tables tm2 write; +START SLAVE; + +--echo # Wait for replica to get stuck on held lock +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc + +--echo # Unlock row-level lock holding transaction +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $row_count_end=`select count(*) from (select * from ti UNION ALL select * from tm UNION ALL select * from tm2) t` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= Transaction should have committed +--let $assert_cond= $row_count_diff = 3 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +# N is the non-transactional transaction +--let $assert_text= N should have been applied +--let $assert_cond= $master_pos = $slave_gtid +--source include/assert.inc + +--echo # Slave should be error-free +let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1); +--let $assert_text= Slave should be error free +--let $assert_cond= $last_error = 0 +--source include/assert.inc + +--connection master +--source include/save_master_gtid.inc +--eval SET @@session.binlog_direct_non_transactional_updates= $old_binlog_direct + +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Statement Test Case 2: +--echo # If STOP SLAVE is issued on a parallel slave, such that the next to +--echo # commit transaction is T; even if the next event from the group will +--echo # commit the transaction (e.g. XID_EVENT), the transaction should be +--echo # stopped and rolled back. + +--connection slave +--source include/stop_slave.inc +set @@global.slave_parallel_threads=1; +--let $row_count_initial=`select count(*) from (select * from ti UNION ALL select * from tm) t` + +--connection master +--let $master_gtid_cmp= `select @@global.gtid_binlog_pos` +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--eval insert into ti values ($ti_ctr) +--inc $ti_ctr +--source include/save_master_gtid.inc + +--connection slave +LOCK TABLES ti WRITE; +--source include/start_slave.inc + +--echo # Wait for replica to begin executing the first transaction +--connection slave +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +--connection slave1 +--send STOP SLAVE; + +--connection slave +--echo # Wait for replica to signal worker threads to stop +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for worker thread to stop'; +--source include/wait_condition.inc +UNLOCK TABLES; +--source include/wait_for_slave_sql_to_stop.inc + +--connection slave1 +--reap +--connection slave + +--let $row_count_end=`select count(*) from (select * from ti UNION ALL select * from tm) t` +--let $row_count_diff=`select ($row_count_end-$row_count_initial)` +--let $assert_text= No insertions should have committed +--let $assert_cond= $row_count_diff = 0 +--source include/assert.inc + +--let $slave_gtid= `select @@global.gtid_slave_pos` +--let $assert_text= GTID slave state should increment to the first transaction +--let $assert_cond= $master_gtid_cmp = $slave_gtid +--source include/assert.inc + +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +--echo # +--echo # Cleanup +--connection master +DROP TABLE ti, tm, ti2, tm2, ti3; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--eval set @@global.slave_parallel_threads=$old_threads +--eval set @@global.slave_domain_parallel_threads=$old_domain_threads +--eval set @@global.slave_parallel_mode=$old_slave_mode +--eval set @@global.debug_dbug="$old_debug_dbug" +--source include/start_slave.inc + +--source include/rpl_end.inc + +--echo # End of tests diff --git a/sql/log_event.cc b/sql/log_event.cc index bafcf34cc2e..0bd1e28abfe 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -8013,7 +8013,10 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, thd_arg->transaction.all.has_created_dropped_temp_table() || thd_arg->transaction.all.trans_executed_admin_cmd()) flags2|= FL_DDL; - else if (is_transactional && !is_tmp_table) + else if (is_transactional && !is_tmp_table && + !(thd_arg->transaction.all.modified_non_trans_table && + thd->variables.binlog_direct_non_trans_update == 0 && + !thd->is_current_stmt_binlog_format_row())) flags2|= FL_TRANSACTIONAL; if (!(thd_arg->variables.option_bits & OPTION_RPL_SKIP_PARALLEL)) flags2|= FL_ALLOW_PARALLEL; @@ -8144,7 +8147,6 @@ Gtid_log_event::do_apply_event(rpl_group_info *rgi) thd->variables.server_id= this->server_id; thd->variables.gtid_domain_id= this->domain_id; thd->variables.gtid_seq_no= this->seq_no; - rgi->gtid_ev_flags2= flags2; thd->reset_for_next_command(); if (opt_gtid_strict_mode && opt_bin_log && opt_log_slave_updates) @@ -11672,8 +11674,27 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (!table->in_use) table->in_use= thd; + /* + Exit early, and let the Event-level exit logic take care of the cleanup + and rollback. + */ + if (rgi->rli->mi->using_parallel() && + rgi->parallel_entry->stop_abrupt(rgi->rli) && + rgi->parallel_entry->rgi_is_safe_to_terminate(rgi)) + break; + error= do_exec_row(rgi); + + DBUG_EXECUTE_IF( + "pause_after_next_row_exec", + { + DBUG_ASSERT(!debug_sync_set_action( + thd, + STRING_WITH_LEN( + "now SIGNAL row_executed WAIT_FOR continue_row_execution"))); + }); + if (unlikely(error)) DBUG_PRINT("info", ("error: %s", HA_ERR(error))); DBUG_ASSERT(error != HA_ERR_RECORD_DELETED); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index c110baa9634..b18f14d1de3 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -9560,6 +9560,7 @@ PSI_stage_info stage_waiting_for_work_from_sql_thread= { 0, "Waiting for work fr PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for prior transaction to commit", 0}; PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0}; PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0}; +PSI_stage_info stage_waiting_for_worker_stop= { 0, "Waiting for worker thread to stop", 0}; PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0}; PSI_stage_info stage_waiting_for_ftwrl= { 0, "Waiting due to global read lock", 0}; PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause= { 0, "Waiting for worker threads to pause for global read lock", 0}; diff --git a/sql/mysqld.h b/sql/mysqld.h index f521ea23638..0c80792168e 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -535,6 +535,7 @@ extern PSI_stage_info stage_waiting_for_work_from_sql_thread; extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit; extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit; extern PSI_stage_info stage_waiting_for_room_in_worker_thread; +extern PSI_stage_info stage_waiting_for_worker_stop; extern PSI_stage_info stage_waiting_for_workers_idle; extern PSI_stage_info stage_waiting_for_ftwrl; extern PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 6ca582e4f21..2dce2880218 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -96,7 +96,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) rli= qev->rgi->rli; e= qev->entry_for_queued; if (e->stop_on_error_sub_id < (uint64)ULONGLONG_MAX || - (e->force_abort && !rli->stop_for_until)) + (e->stop_abrupt(rli))) return; mysql_mutex_lock(&rli->data_lock); @@ -173,7 +173,7 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, mark_start_commit() calls can be made and it is safe to de-allocate the GCO. */ - err= wfc->wait_for_prior_commit(thd); + err= wfc->wait_for_prior_commit(thd, true); if (unlikely(err) && !rgi->worker_error) signal_error_to_sql_driver_thread(thd, rgi, err); thd->wait_for_commit_ptr= NULL; @@ -395,13 +395,14 @@ do_gco_wait(rpl_group_info *rgi, group_commit_orderer *gco, } while (wait_count > entry->count_committing_event_groups); } - if (entry->force_abort && wait_count > entry->stop_count) + if (entry->force_abort && wait_count >= entry->stop_count) { /* We are stopping (STOP SLAVE), and this event group is beyond the point where we can safely stop. So return a flag that will cause us to skip, rather than execute, the following events. */ + DBUG_ASSERT(entry->rgi_is_safe_to_terminate(rgi)); return true; } else @@ -461,6 +462,16 @@ do_ftwrl_wait(rpl_group_info *rgi, if (sub_id > entry->largest_started_sub_id) entry->largest_started_sub_id= sub_id; + /* + If this rgi is non-transactional, and the state of our current entry + (incorrectly) views the rgi as safe to terminate, we change our state + to disallow this rgi from stop/rollback in the event of STOP SLAVE. + */ + if (!(rgi->gtid_ev_flags2 & Gtid_log_event::FL_TRANSACTIONAL) && + entry->unsafe_rollback_marker_sub_id.load(std::memory_order_relaxed) < + rgi->gtid_sub_id) + entry->unsafe_rollback_marker_sub_id= sub_id; + DBUG_RETURN(aborted); } @@ -1370,7 +1381,9 @@ handle_rpl_parallel_thread(void *arg) if (!err) #endif { - if (unlikely(thd->check_killed())) + if (unlikely(thd->check_killed()) || + (entry->stop_abrupt(rgi->rli) && + entry->rgi_is_safe_to_terminate(rgi))) { thd->clear_error(); thd->get_stmt_da()->reset_diagnostics_area(); @@ -2339,6 +2352,7 @@ rpl_parallel::wait_for_done(THD *thd, Relay_log_info *rli) struct rpl_parallel_entry *e; rpl_parallel_thread *rpt; uint32 i, j; + PSI_stage_info old_stage; /* First signal all workers that they must force quit; no more events will @@ -2399,9 +2413,11 @@ rpl_parallel::wait_for_done(THD *thd, Relay_log_info *rli) if ((rpt= e->rpl_threads[j])) { mysql_mutex_lock(&rpt->LOCK_rpl_thread); + thd->ENTER_COND(&rpt->COND_rpl_thread_stop, &rpt->LOCK_rpl_thread, + &stage_waiting_for_worker_stop, &old_stage); while (rpt->current_owner == &e->rpl_threads[j]) mysql_cond_wait(&rpt->COND_rpl_thread_stop, &rpt->LOCK_rpl_thread); - mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + thd->EXIT_COND(&old_stage); } } } @@ -2501,6 +2517,17 @@ rpl_parallel_entry::queue_master_restart(rpl_group_info *rgi, return 0; } +bool rpl_parallel_entry::stop_abrupt(Relay_log_info *rli) +{ + return force_abort.load(std::memory_order_relaxed) && !rli->stop_for_until; +} + +bool rpl_parallel_entry::rgi_is_safe_to_terminate(rpl_group_info *rgi) +{ + return unsafe_rollback_marker_sub_id.load(std::memory_order_relaxed) < + rgi->gtid_sub_id; +} + int rpl_parallel::wait_for_workers_idle(THD *thd) diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 650aa06e504..9ad9b0b61dd 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -272,7 +272,7 @@ struct rpl_parallel_entry { so worker threads must force abort any current transactions without waiting for event groups to complete. */ - bool force_abort; + std::atomic force_abort; /* At STOP SLAVE (force_abort=true), we do not want to process all events in the queue (which could unnecessarily delay stop, if a lot of events happen @@ -349,10 +349,34 @@ struct rpl_parallel_entry { /* The group_commit_orderer object for the events currently being queued. */ group_commit_orderer *current_gco; + /* + Marks the highest sub id that all transactions up to it must be executed to + allow for a consistent replication state; and all active transactions + afterwards can safely be stopped and rolled back. + */ + std::atomic unsafe_rollback_marker_sub_id; + rpl_parallel_thread * choose_thread(rpl_group_info *rgi, bool *did_enter_cond, PSI_stage_info *old_stage, bool reuse); int queue_master_restart(rpl_group_info *rgi, Format_description_log_event *fdev); + + /* + Check if we are stopping the slave as a direct command from the user, as + opposed to force_abort being set due to the UNTIL clause from START SLAVE. + + Returns 1 if the slave has been explicitly ordered to stop, 0 otherwise. + */ + bool stop_abrupt(Relay_log_info *rli); + + /* + Check if the rgi is safe to stop and rollback in the event of an abrupt + stop of the parallel slave. + + Returns 1 if we can safely terminate and rollback the transaction, 0 + otherwise + */ + bool rgi_is_safe_to_terminate(rpl_group_info *rgi); }; struct rpl_parallel { HASH domain_hash; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 04fddb3e74b..48fd9b1f435 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -2188,6 +2188,7 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) rgi->current_gtid.server_id= gev->server_id; rgi->current_gtid.seq_no= gev->seq_no; rgi->commit_id= gev->commit_id; + rgi->gtid_ev_flags2= gev->flags2; rgi->gtid_pending= true; return 0; } diff --git a/sql/slave.cc b/sql/slave.cc index f43240a8866..647756482d5 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3631,6 +3631,8 @@ int has_temporary_error(THD *thd) { uint current_errno; + rpl_group_info *rgi= thd->rgi_slave; + Relay_log_info *rli= rgi->rli; DBUG_ENTER("has_temporary_error"); DBUG_EXECUTE_IF("all_errors_are_temporary_errors", @@ -3648,6 +3650,11 @@ has_temporary_error(THD *thd) if (!likely(thd->is_error())) DBUG_RETURN(0); + if (rgi->rli->mi->using_parallel() && + rgi->parallel_entry->stop_abrupt(rli) && + rgi->parallel_entry->rgi_is_safe_to_terminate(rgi)) + DBUG_RETURN(0); + current_errno= thd->get_stmt_da()->sql_errno(); for (uint i= 0; i < slave_transaction_retry_error_length; i++) { diff --git a/sql/sql_class.cc b/sql/sql_class.cc index dc58d2a250a..81fcfa272a3 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -7639,7 +7639,7 @@ wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) */ int -wait_for_commit::wait_for_prior_commit2(THD *thd) +wait_for_commit::wait_for_prior_commit2(THD *thd, bool force_wait) { PSI_stage_info old_stage; wait_for_commit *loc_waitee; @@ -7664,9 +7664,24 @@ wait_for_commit::wait_for_prior_commit2(THD *thd) &stage_waiting_for_prior_transaction_to_commit, &old_stage); while ((loc_waitee= this->waitee.load(std::memory_order_relaxed)) && - likely(!thd->check_killed(1))) + (likely(!thd->check_killed(1)) || force_wait)) mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); - if (!loc_waitee) + if (!loc_waitee +#ifndef EMBEDDED_LIBRARY + /* + If a worker has been killed prior to this wait, e.g. in do_gco_wait(), + then it should not perform thread cleanup if there are threads which + have yet to commit. This is to prevent the cleanup of resources that + the prior RGI may need, e.g. its GCO. This is achieved by skipping + the unregistration of the waitee, such that each subsequent call to + wait_for_prior_commit() will exit early (while maintaining the + dependence), thus allowing the final call to + thd->wait_for_prior_commit() within finish_event_group() to wait. + */ + || (thd->rgi_slave && (thd->rgi_slave->worker_error && + !thd->rgi_slave->did_mark_start_commit)) +#endif + ) { if (wakeup_error) my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); diff --git a/sql/sql_class.h b/sql/sql_class.h index 4e5aba33443..e177fc55d9d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2144,14 +2144,14 @@ struct wait_for_commit bool commit_started; void register_wait_for_prior_commit(wait_for_commit *waitee); - int wait_for_prior_commit(THD *thd) + int wait_for_prior_commit(THD *thd, bool force_wait= false) { /* Quick inline check, to avoid function call and locking in the common case where no wakeup is registered, or a registered wait was already signalled. */ if (waitee.load(std::memory_order_acquire)) - return wait_for_prior_commit2(thd); + return wait_for_prior_commit2(thd, force_wait); else { if (wakeup_error) @@ -2205,7 +2205,7 @@ struct wait_for_commit void wakeup(int wakeup_error); - int wait_for_prior_commit2(THD *thd); + int wait_for_prior_commit2(THD *thd, bool force_wait= false); void wakeup_subsequent_commits2(int wakeup_error); void unregister_wait_for_prior_commit2();