From fea2a5d83dea81e231bb14bc5535b60261c7b30d Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Feb 2008 19:07:07 +0400 Subject: [PATCH] BUG#13861 - START SLAVE UNTIL may stop 1 evnt too late if log-slave-updates and circul repl Slave SQL thread may execute one extra event when there are events skipped by slave I/O thread (e.g. originated by the same server). Whereas it was requested not to do so by the UNTIL condition. This happens because we compare with the end position of previously executed event. This is fine when there are no skipped by slave I/O thread events, as end position of previous event equals to start position of to be executed event. Otherwise this position equals to start position of skipped event. This is fixed by: - reading the event to be executed before checking if the until condition is satisfied. - comparing the start position of the event to be executed. Since we do not have the start position available, we compute it by subtracting event length from end position (which is available). - if there are no events on the event queue at the slave sql starting time, that meet until condition, we stop immediately, as in this case we do not want to wait for next event. mysql-test/r/rpl_dual_pos_advance.result: A test case for BUG#13861. mysql-test/t/rpl_dual_pos_advance.test: A test case for BUG#13861. sql/log_event.cc: Store length of event. This is needed for further calculation of the beginning of event. sql/slave.cc: Slave SQL thread may execute one extra event when there are events skipped by slave I/O thread (e.g. originated by the same server). Whereas it was requested not to do so by the UNTIL condition. This happens because we compare with the end position of previously executed event. This is fine when there are no skipped by slave I/O thread events, as end position of previous event equals to start position of to be executed event. Otherwise this position equals to start position of skipped event. This is fixed by: - reading the event to be executed before checking if the until condition is satisfied. - comparing the start position of the event to be executed. Since we do not have the start position available, we compute it by subtracting event length from end position (which is available). - if there are no events on the event queue at the slave sql starting time, that meet until condition, we stop immediately, as in this case we do not want to wait for next event. sql/slave.h: Added master_log_pos parametr to is_until_satisfied(). mysql-test/t/rpl_dual_pos_advance-slave.opt: New BitKeeper file ``mysql-test/t/rpl_dual_pos_advance-slave.opt'' --- mysql-test/r/rpl_dual_pos_advance.result | 43 ++++++++++++- mysql-test/t/rpl_dual_pos_advance-slave.opt | 1 + mysql-test/t/rpl_dual_pos_advance.test | 56 +++++++++++------ sql/log_event.cc | 3 +- sql/slave.cc | 68 +++++++++++++-------- sql/slave.h | 2 +- 6 files changed, 128 insertions(+), 45 deletions(-) create mode 100644 mysql-test/t/rpl_dual_pos_advance-slave.opt diff --git a/mysql-test/r/rpl_dual_pos_advance.result b/mysql-test/r/rpl_dual_pos_advance.result index 257baa81b74..97ca8101c33 100644 --- a/mysql-test/r/rpl_dual_pos_advance.result +++ b/mysql-test/r/rpl_dual_pos_advance.result @@ -8,15 +8,56 @@ reset master; change master to master_host="127.0.0.1",master_port=SLAVE_PORT,master_user="root"; start slave; create table t1 (n int); +stop slave; +create table t2 (n int); +show tables; +Tables_in_test +t1 +t2 +create table t3 (n int) engine=innodb; +set @a=1; +insert into t3 values(@a); +begin; +insert into t3 values(2); +insert into t3 values(3); +commit; +insert into t3 values(4); +start slave until master_log_file="slave-bin.000001",master_log_pos=195; +Warnings: +Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart +select master_pos_wait("slave-bin.000001",137); +master_pos_wait("slave-bin.000001",137) +0 +show tables; +Tables_in_test +t1 +t2 +start slave until master_log_file="slave-bin.000001",master_log_pos=438; +Warnings: +Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart +select * from t3; +n +1 +start slave until master_log_file="slave-bin.000001",master_log_pos=663; +Warnings: +Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart +select * from t3; +n +1 +2 +3 +start slave; create table t4 (n int); create table t5 (n int); create table t6 (n int); show tables; Tables_in_test t1 +t2 +t3 t4 t5 t6 stop slave; reset slave; -drop table t1,t4,t5,t6; +drop table t1,t2,t3,t4,t5,t6; diff --git a/mysql-test/t/rpl_dual_pos_advance-slave.opt b/mysql-test/t/rpl_dual_pos_advance-slave.opt new file mode 100644 index 00000000000..627becdbfb5 --- /dev/null +++ b/mysql-test/t/rpl_dual_pos_advance-slave.opt @@ -0,0 +1 @@ +--innodb diff --git a/mysql-test/t/rpl_dual_pos_advance.test b/mysql-test/t/rpl_dual_pos_advance.test index 518fa9df885..5e08284a2f2 100644 --- a/mysql-test/t/rpl_dual_pos_advance.test +++ b/mysql-test/t/rpl_dual_pos_advance.test @@ -7,6 +7,7 @@ # It also will test BUG#13861. source include/master-slave.inc; +source include/have_innodb.inc; # set up "dual head" @@ -30,45 +31,64 @@ save_master_pos; connection master; sync_with_master; -# Now test BUG#13861. This will be enabled when Guilhem fixes this -# bug. +# +# BUG#13861 - START SLAVE UNTIL may stop 1 evnt too late if +# log-slave-updates and circul repl +# +stop slave; -# stop slave +create table t2 (n int); # create one ignored event -# create table t2 (n int); # create one ignored event +save_master_pos; +connection slave; +sync_with_master; -# save_master_pos; -# connection slave; -# sync_with_master; +connection slave; -# connection slave; +show tables; -# show tables; +save_master_pos; -# save_master_pos; +create table t3 (n int) engine=innodb; +set @a=1; +insert into t3 values(@a); +begin; +insert into t3 values(2); +insert into t3 values(3); +commit; +insert into t3 values(4); -# create table t3 (n int); -# connection master; +connection master; # bug is that START SLAVE UNTIL may stop too late, we test that by # asking it to stop before creation of t3. -# start slave until master_log_file="slave-bin.000001",master_log_pos=195; +start slave until master_log_file="slave-bin.000001",master_log_pos=195; # wait until it's started (the position below is the start of "CREATE # TABLE t2") (otherwise wait_for_slave_to_stop may return at once) -# select master_pos_wait("slave-bin.000001",137); +select master_pos_wait("slave-bin.000001",137); -# wait_for_slave_to_stop; +wait_for_slave_to_stop; # then BUG#13861 causes t3 to show up below (because stopped too # late). -# show tables; +show tables; -# start slave; +# ensure that we do not break set @a=1; insert into t3 values(@a); +start slave until master_log_file="slave-bin.000001",master_log_pos=438; +wait_for_slave_to_stop; +select * from t3; + +# ensure that we do not break transaction +start slave until master_log_file="slave-bin.000001",master_log_pos=663; +wait_for_slave_to_stop; +select * from t3; + +start slave; # BUG#13023 is that Exec_master_log_pos may stay too low "forever": @@ -99,7 +119,7 @@ show tables; stop slave; reset slave; -drop table t1,t4,t5,t6; # add t2 and t3 later +drop table t1,t2,t3,t4,t5,t6; save_master_pos; connection slave; diff --git a/sql/log_event.cc b/sql/log_event.cc index a950094a018..8e14a873fc6 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -373,6 +373,7 @@ Log_event::Log_event(const char* buf, #endif when = uint4korr(buf); server_id = uint4korr(buf + SERVER_ID_OFFSET); + data_written= uint4korr(buf + EVENT_LEN_OFFSET); if (description_event->binlog_version==1) { log_pos= 0; @@ -405,7 +406,7 @@ Log_event::Log_event(const char* buf, binlog, so which will cause problems if the user uses this value in CHANGE MASTER). */ - log_pos+= uint4korr(buf + EVENT_LEN_OFFSET); + log_pos+= data_written; } DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos)); diff --git a/sql/slave.cc b/sql/slave.cc index 8a3620080f2..500683e080d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3115,6 +3115,11 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error) Check if condition stated in UNTIL clause of START SLAVE is reached. SYNOPSYS st_relay_log_info::is_until_satisfied() + master_beg_pos position of the beginning of to be executed event + (not log_pos member of the event that points to the + beginning of the following event) + + DESCRIPTION Checks if UNTIL condition is reached. Uses caching result of last comparison of current log file name and target log file name. So cached @@ -3139,7 +3144,7 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error) false - condition not met */ -bool st_relay_log_info::is_until_satisfied() +bool st_relay_log_info::is_until_satisfied(my_off_t master_beg_pos) { const char *log_name; ulonglong log_pos; @@ -3149,7 +3154,7 @@ bool st_relay_log_info::is_until_satisfied() if (until_condition == UNTIL_MASTER_POS) { log_name= group_master_log_name; - log_pos= group_master_log_pos; + log_pos= master_beg_pos; } else { /* until_condition == UNTIL_RELAY_POS */ @@ -3228,28 +3233,6 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) wait for something for example inside of next_event(). */ pthread_mutex_lock(&rli->data_lock); - /* - This tests if the position of the end of the last previous executed event - hits the UNTIL barrier. - We would prefer to test if the position of the start (or possibly) end of - the to-be-read event hits the UNTIL barrier, this is different if there - was an event ignored by the I/O thread just before (BUG#13861 to be - fixed). - */ - if (rli->until_condition!=RELAY_LOG_INFO::UNTIL_NONE && - rli->is_until_satisfied()) - { - char buf[22]; - sql_print_information("Slave SQL thread stopped because it reached its" - " UNTIL position %s", llstr(rli->until_pos(), buf)); - /* - Setting abort_slave flag because we do not want additional message about - error in query execution to be printed. - */ - rli->abort_slave= 1; - pthread_mutex_unlock(&rli->data_lock); - return 1; - } Log_event * ev = next_event(rli); @@ -3266,6 +3249,27 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) int type_code = ev->get_type_code(); int exec_res; + /* + This tests if the position of the beginning of the current event + hits the UNTIL barrier. + */ + if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE && + rli->is_until_satisfied((thd->options & OPTION_BEGIN || !ev->log_pos) ? + rli->group_master_log_pos : + ev->log_pos - ev->data_written)) + { + char buf[22]; + sql_print_information("Slave SQL thread stopped because it reached its" + " UNTIL position %s", llstr(rli->until_pos(), buf)); + /* + Setting abort_slave flag because we do not want additional message about + error in query execution to be printed. + */ + rli->abort_slave= 1; + pthread_mutex_unlock(&rli->data_lock); + delete ev; + return 1; + } /* Queries originating from this server must be skipped. Low-level events (Format_desc, Rotate, Stop) from this server @@ -3977,6 +3981,22 @@ Slave SQL thread aborted. Can't execute init_slave query"); } } + /* + First check until condition - probably there is nothing to execute. We + do not want to wait for next event in this case. + */ + pthread_mutex_lock(&rli->data_lock); + if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE && + rli->is_until_satisfied(rli->group_master_log_pos)) + { + char buf[22]; + sql_print_information("Slave SQL thread stopped because it reached its" + " UNTIL position %s", llstr(rli->until_pos(), buf)); + pthread_mutex_unlock(&rli->data_lock); + goto err; + } + pthread_mutex_unlock(&rli->data_lock); + /* Read queries from the IO/THREAD until this thread is killed */ while (!sql_slave_killed(thd,rli)) diff --git a/sql/slave.h b/sql/slave.h index c61787cdf3b..da548e145d3 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -348,7 +348,7 @@ typedef struct st_relay_log_info void close_temporary_tables(); /* Check if UNTIL condition is satisfied. See slave.cc for more. */ - bool is_until_satisfied(); + bool is_until_satisfied(my_off_t master_beg_pos); inline ulonglong until_pos() { return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :