From 9150a0c7cbefefe6ffbd9ba19a37f159d86a8d2f Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Wed, 25 Jun 2014 15:24:11 +0200 Subject: [PATCH] MDEV-4937: sql_slave_skip_counter does not work with GTID The sql_slave_skip_counter is important to be able to recover replication from certain errors. Often, an appropriate solution is to set sql_slave_skip_counter to skip over a problem event. But setting sql_slave_skip_counter produced an error in GTID mode, with a suggestion to instead set @@gtid_slave_pos to point past the problem event. This however is not always possible; for example, in case of an INCIDENT event, that event does not have any GTID to assign to @@gtid_slave_pos. With this patch, sql_slave_skip_counter now works in GTID mode the same was as in non-GTID mode. When set, that many initial events are skipped when the SQL thread starts, plus as many extra events are needed to completely skip any partially skipped event group. The GTID position is updated to point past the skipped event(s). --- mysql-test/suite/rpl/r/rpl_gtid_basic.result | 109 +++++++++++++++- .../suite/rpl/r/rpl_gtid_errorlog.result | 4 - mysql-test/suite/rpl/t/rpl_gtid_basic.test | 122 ++++++++++++++++-- mysql-test/suite/rpl/t/rpl_gtid_errorlog.test | 5 - sql/rpl_rli.cc | 3 +- sql/slave.cc | 27 +++- sql/sql_repl.cc | 2 + sql/sys_vars.cc | 5 - 8 files changed, 241 insertions(+), 36 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_gtid_basic.result b/mysql-test/suite/rpl/r/rpl_gtid_basic.result index fd33221814d..a7294db4d13 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_basic.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_basic.result @@ -61,13 +61,7 @@ include/stop_slave.inc INSERT INTO t1 VALUES (5, "m1a"); INSERT INTO t2 VALUES (5, "i1a"); CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT, -MASTER_USE_GTID=SLAVE_POS; -SET GLOBAL sql_slave_skip_counter=1; -ERROR HY000: When using GTID, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position. -CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT, MASTER_USE_GTID=CURRENT_POS; -SET GLOBAL sql_slave_skip_counter=10; -ERROR HY000: When using GTID, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position. include/start_slave.inc SELECT * FROM t1 ORDER BY a; a b @@ -322,5 +316,108 @@ master_gtid_wait('2-1-10') 0 master_gtid_wait('2-1-10') 0 +*** Test sql_gtid_slave_pos when used with GTID *** +include/stop_slave.inc +SET gtid_domain_id=2; +SET gtid_seq_no=1000; +INSERT INTO t1 VALUES (10); +INSERT INTO t1 VALUES (11); +SET sql_slave_skip_counter= 1; +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +a +11 +SELECT IF(LOCATE("2-1-1001", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1001 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +include/stop_slave.inc +SET gtid_domain_id=2; +SET gtid_seq_no=1010; +INSERT INTO t1 VALUES (12); +INSERT INTO t1 VALUES (13); +SET sql_slave_skip_counter= 2; +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +a +11 +13 +SELECT IF(LOCATE("2-1-1011", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1011 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +include/stop_slave.inc +SET gtid_domain_id=2; +SET gtid_seq_no=1020; +INSERT INTO t1 VALUES (14); +INSERT INTO t1 VALUES (15); +INSERT INTO t1 VALUES (16); +SET sql_slave_skip_counter= 3; +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +a +11 +13 +15 +16 +SELECT IF(LOCATE("2-1-1022", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1022 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +include/stop_slave.inc +SET gtid_domain_id=2; +SET gtid_seq_no=1030; +INSERT INTO t1 VALUES (17); +INSERT INTO t1 VALUES (18); +INSERT INTO t1 VALUES (19); +SET sql_slave_skip_counter= 5; +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +a +11 +13 +15 +16 +19 +SELECT IF(LOCATE("2-1-1032", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1032 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +include/stop_slave.inc +SET gtid_domain_id=3; +SET gtid_seq_no=100; +CREATE TABLE t2 (a INT PRIMARY KEY); +DROP TABLE t2; +SET gtid_domain_id=2; +SET gtid_seq_no=1040; +INSERT INTO t1 VALUES (20); +SET @saved_mode= @@GLOBAL.slave_ddl_exec_mode; +SET GLOBAL slave_ddl_exec_mode=STRICT; +SET sql_slave_skip_counter=1; +START SLAVE UNTIL master_gtid_pos="3-1-100"; +include/sync_with_master_gtid.inc +include/wait_for_slave_sql_to_stop.inc +SELECT * FROM t2; +ERROR 42S02: Table 'test.t2' doesn't exist +SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +SET sql_log_bin=0; +CALL mtr.add_suppression("Slave: Unknown table 'test\\.t2' Error_code: 1051"); +SET sql_log_bin=1; +START SLAVE; +include/wait_for_slave_sql_error.inc [errno=1051] +SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +STOP SLAVE IO_THREAD; +SET sql_slave_skip_counter=2; +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +a +20 +SELECT IF(LOCATE("3-1-101", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-101 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +SELECT IF(LOCATE("2-1-1040", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1040 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +status +Ok +SET GLOBAL slave_ddl_exec_mode= @saved_mode; DROP TABLE t1; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result index e68f5d65b93..204615201d9 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result @@ -29,12 +29,8 @@ REPLACE INTO t1 VALUES (5); SET debug_dbug= @dbug_save; include/wait_for_slave_sql_error.inc [errno=1590] include/stop_slave.inc -CHANGE MASTER TO master_use_gtid=no; SET sql_slave_skip_counter=1; include/start_slave.inc -include/stop_slave.inc -CHANGE MASTER TO master_use_gtid=slave_pos; -include/start_slave.inc SELECT * FROM t1 ORDER BY a; a 1 diff --git a/mysql-test/suite/rpl/t/rpl_gtid_basic.test b/mysql-test/suite/rpl/t/rpl_gtid_basic.test index 3f2d5e1e321..86866f90775 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_basic.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_basic.test @@ -68,16 +68,8 @@ save_master_pos; connection server_4; --replace_result $MASTER_MYPORT MASTER_PORT -eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, - MASTER_USE_GTID=SLAVE_POS; -# Test that sql_slave_skip_counter is prevented in GTID mode. ---error ER_SLAVE_SKIP_NOT_IN_GTID -SET GLOBAL sql_slave_skip_counter=1; ---replace_result $MASTER_MYPORT MASTER_PORT eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, MASTER_USE_GTID=CURRENT_POS; ---error ER_SLAVE_SKIP_NOT_IN_GTID -SET GLOBAL sql_slave_skip_counter=10; --source include/start_slave.inc sync_with_master; SELECT * FROM t1 ORDER BY a; @@ -374,6 +366,120 @@ reap; reap; +--echo *** Test sql_gtid_slave_pos when used with GTID *** + +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +SET gtid_domain_id=2; +SET gtid_seq_no=1000; +INSERT INTO t1 VALUES (10); +INSERT INTO t1 VALUES (11); +--save_master_pos + +--connection server_2 +SET sql_slave_skip_counter= 1; +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT IF(LOCATE("2-1-1001", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1001 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +--source include/stop_slave.inc + +--connection server_1 +SET gtid_domain_id=2; +SET gtid_seq_no=1010; +INSERT INTO t1 VALUES (12); +INSERT INTO t1 VALUES (13); +--save_master_pos + +--connection server_2 +SET sql_slave_skip_counter= 2; +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT IF(LOCATE("2-1-1011", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1011 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +--source include/stop_slave.inc + +--connection server_1 +SET gtid_domain_id=2; +SET gtid_seq_no=1020; +INSERT INTO t1 VALUES (14); +INSERT INTO t1 VALUES (15); +INSERT INTO t1 VALUES (16); +--save_master_pos + +--connection server_2 +SET sql_slave_skip_counter= 3; +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT IF(LOCATE("2-1-1022", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1022 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +--source include/stop_slave.inc + +--connection server_1 +SET gtid_domain_id=2; +SET gtid_seq_no=1030; +INSERT INTO t1 VALUES (17); +INSERT INTO t1 VALUES (18); +INSERT INTO t1 VALUES (19); +--save_master_pos + +--connection server_2 +SET sql_slave_skip_counter= 5; +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT IF(LOCATE("2-1-1032", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1032 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + + +--source include/stop_slave.inc + +--connection server_1 +SET gtid_domain_id=3; +SET gtid_seq_no=100; +CREATE TABLE t2 (a INT PRIMARY KEY); +DROP TABLE t2; +SET gtid_domain_id=2; +SET gtid_seq_no=1040; +INSERT INTO t1 VALUES (20); +--save_master_pos + +--connection server_2 +SET @saved_mode= @@GLOBAL.slave_ddl_exec_mode; +SET GLOBAL slave_ddl_exec_mode=STRICT; +SET sql_slave_skip_counter=1; +START SLAVE UNTIL master_gtid_pos="3-1-100"; +--let $master_pos=3-1-100 +--source include/sync_with_master_gtid.inc +--source include/wait_for_slave_sql_to_stop.inc +--error ER_NO_SUCH_TABLE +SELECT * FROM t2; +SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +# Start the slave again, it should fail on the DROP TABLE as the table is not there. +SET sql_log_bin=0; +CALL mtr.add_suppression("Slave: Unknown table 'test\\.t2' Error_code: 1051"); +SET sql_log_bin=1; +START SLAVE; +--let $slave_sql_errno=1051 +--source include/wait_for_slave_sql_error.inc +SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +STOP SLAVE IO_THREAD; +SET sql_slave_skip_counter=2; +--source include/start_slave.inc +--sync_with_master + +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +SELECT IF(LOCATE("3-1-101", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-101 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; +SELECT IF(LOCATE("2-1-1040", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1040 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status; + +SET GLOBAL slave_ddl_exec_mode= @saved_mode; + --connection server_1 DROP TABLE t1; diff --git a/mysql-test/suite/rpl/t/rpl_gtid_errorlog.test b/mysql-test/suite/rpl/t/rpl_gtid_errorlog.test index f83422486da..24298e9893a 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_errorlog.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_errorlog.test @@ -46,15 +46,10 @@ SET debug_dbug= @dbug_save; --connection slave --let $slave_sql_errno=1590 --source include/wait_for_slave_sql_error.inc -# ToDo no need to switch off GTID once MDEV-4937 is fixed --source include/stop_slave.inc -CHANGE MASTER TO master_use_gtid=no; SET sql_slave_skip_counter=1; --source include/start_slave.inc --sync_with_master ---source include/stop_slave.inc -CHANGE MASTER TO master_use_gtid=slave_pos; ---source include/start_slave.inc SELECT * FROM t1 ORDER BY a; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index cc543f7c377..595ef49d72f 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1084,7 +1084,6 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, DBUG_ASSERT(rli->slave_running == 0); DBUG_ASSERT(rli->mi->slave_running == 0); - rli->slave_skip_counter=0; mysql_mutex_lock(&rli->data_lock); /* @@ -1565,8 +1564,8 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) return 1; } rgi->gtid_sub_id= sub_id; - rgi->current_gtid.server_id= gev->server_id; rgi->current_gtid.domain_id= gev->domain_id; + rgi->current_gtid.server_id= gev->server_id; rgi->current_gtid.seq_no= gev->seq_no; return 0; } diff --git a/sql/slave.cc b/sql/slave.cc index f755cb63558..3d6809b575a 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3521,9 +3521,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (opt_gtid_ignore_duplicates) { - serial_rgi->current_gtid.domain_id= gev->domain_id; - serial_rgi->current_gtid.server_id= gev->server_id; - serial_rgi->current_gtid.seq_no= gev->seq_no; int res= rpl_global_gtid_slave_state.check_duplicate_gtid (&serial_rgi->current_gtid, serial_rgi); if (res < 0) @@ -4366,6 +4363,7 @@ pthread_handler_t handle_slave_sql(void *arg) char saved_master_log_name[FN_REFLEN]; my_off_t UNINIT_VAR(saved_log_pos); my_off_t UNINIT_VAR(saved_master_log_pos); + String saved_skip_gtid_pos; my_off_t saved_skip= 0; Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; @@ -4571,6 +4569,12 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, strmake_buf(saved_master_log_name, rli->group_master_log_name); saved_log_pos= rli->group_relay_log_pos; saved_master_log_pos= rli->group_master_log_pos; + if (mi->using_gtid != Master_info::USE_GTID_NO) + { + saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '")); + rpl_append_gtid_state(&saved_skip_gtid_pos, false); + saved_skip_gtid_pos.append(STRING_WITH_LEN("'; ")); + } saved_skip= rli->slave_skip_counter; } if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS || @@ -4594,16 +4598,27 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, if (saved_skip && rli->slave_skip_counter == 0) { + String tmp; + if (mi->using_gtid != Master_info::USE_GTID_NO) + { + tmp.append(STRING_WITH_LEN(", GTID '")); + rpl_append_gtid_state(&tmp, false); + tmp.append(STRING_WITH_LEN("'; ")); + } + sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at " "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', " - "master_log_pos='%ld' and new position at " + "master_log_pos='%ld'%s and new position at " "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', " - "master_log_pos='%ld' ", + "master_log_pos='%ld'%s ", (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos, saved_master_log_name, (ulong) saved_master_log_pos, + saved_skip_gtid_pos.c_ptr_safe(), rli->group_relay_log_name, (ulong) rli->group_relay_log_pos, - rli->group_master_log_name, (ulong) rli->group_master_log_pos); + rli->group_master_log_name, (ulong) rli->group_master_log_pos, + tmp.c_ptr_safe()); saved_skip= 0; + saved_skip_gtid_pos.free(); } if (exec_relay_log_event(thd, rli, serial_rgi)) diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c7bd28259ae..e2ba5197dc4 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3075,6 +3075,7 @@ int reset_slave(THD *thd, Master_info* mi) mi->clear_error(); mi->rli.clear_error(); mi->rli.clear_until_condition(); + mi->rli.slave_skip_counter= 0; // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 end_master_info(mi); @@ -3522,6 +3523,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added) /* Clear the errors, for a clean start */ mi->rli.clear_error(); mi->rli.clear_until_condition(); + mi->rli.slave_skip_counter= 0; sql_print_information("'CHANGE MASTER TO executed'. " "Previous state master_host='%s', master_port='%u', master_log_file='%s', " diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 9790a82886a..4140c4c14d0 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -4287,11 +4287,6 @@ bool update_multi_source_variable(sys_var *self_var, THD *thd, static bool update_slave_skip_counter(sys_var *self, THD *thd, Master_info *mi) { - if (mi->using_gtid != Master_info::USE_GTID_NO) - { - my_error(ER_SLAVE_SKIP_NOT_IN_GTID, MYF(0)); - return true; - } if (mi->rli.slave_running) { my_error(ER_SLAVE_MUST_STOP, MYF(0), mi->connection_name.length,