From 1faafbb0475c538b1348789a86cd6455ee24d37d Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Sun, 28 Feb 2010 19:31:46 +0200 Subject: [PATCH 1/3] Bug #50296 Slave reconnects earlier than the prescribed slave_net_timeout value There was auto-reconnecting by slave earlier than a prescribed by slave_net_timeout value. The issue happened on 64bit solaris that spotted rather incorrect casting of the ulong slave_net_timeout into the uint of mysql.options.read_timeout. Notice, that there is no reason for slave_net_timeout to be of type of ulong. Since it's primarily passed as arg to mysql_options the type can be made as uint to avoid all conversion hassles. That's what the fixes are made. A "side" effect of the patch is a new value for the max of slave_net_timeout to be the max of the unsigned int type (therefore to vary across platforms). Note, a regression test can't be made to run reliably without making it to last over some 20 secs. That's why it is placed in suite/large_tests. mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result: the new test results. mysql-test/suite/large_tests/t/rpl_slave_net_timeout-slave.opt: Initialization of the option that yields slave_net_timeout's default. sql/mysql_priv.h: changing type for slave_net_timeout from ulong to uint sql/mysqld.cc: changing type for slave_net_timeout from ulong to uint sql/sys_vars.cc: Refining the max value for slave_net_timeout to be as the max for uint type. --- .../r/rpl_slave_net_timeout.result | 25 ++++++ .../t/rpl_slave_net_timeout-slave.opt | 1 + .../large_tests/t/rpl_slave_net_timeout.test | 81 +++++++++++++++++++ sql/mysql_priv.h | 3 +- sql/mysqld.cc | 3 +- sql/sys_vars.cc | 2 +- 6 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result create mode 100644 mysql-test/suite/large_tests/t/rpl_slave_net_timeout-slave.opt create mode 100644 mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test diff --git a/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result b/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result new file mode 100644 index 00000000000..526221fcb50 --- /dev/null +++ b/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result @@ -0,0 +1,25 @@ +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +set @save_general_log = @@global.general_log; +set @save_log_output = @@global.log_output; +set @save_slave_net_timeout = @@global.slave_net_timeout; +set @@global.general_log = ON; +set @@global.log_output = 'table,file'; +include/stop_slave.inc +set @@global.slave_net_timeout = @@global.net_read_timeout * 2; +change master to master_host = '127.0.0.1',master_port = 13020, +master_user = 'root', master_heartbeat_period = 0; +include/start_slave.inc +include/stop_slave.inc +select event_time from (select event_time from mysql.general_log as t_1 where command_type like 'Connect' order by event_time desc limit 2) as t_2 order by event_time desc limit 1 into @ts_last; +select event_time from (select event_time from mysql.general_log as t_1 where command_type like 'Connect' order by event_time desc limit 2) as t_2 order by event_time asc limit 1 into @ts_prev; +select @result as 'Must be 1'; +Must be 1 +1 +set @@global.general_log = @save_general_log; +set @@global.log_output = @save_log_output; +set @@global.slave_net_timeout = @save_slave_net_timeout; diff --git a/mysql-test/suite/large_tests/t/rpl_slave_net_timeout-slave.opt b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout-slave.opt new file mode 100644 index 00000000000..281566c9ad9 --- /dev/null +++ b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout-slave.opt @@ -0,0 +1 @@ +--net_read_timeout=5 diff --git a/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test new file mode 100644 index 00000000000..7712c054258 --- /dev/null +++ b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test @@ -0,0 +1,81 @@ +# +# Testing reconnecting by slave as specified by `slave_net_timeout' +# +# Bug #50296 Slave reconnects earlier than the prescribed slave_net_timeout value +# +--source include/have_csv.inc +--source include/master-slave.inc + + +# save global env +connection master; +set @save_general_log = @@global.general_log; +set @save_log_output = @@global.log_output; + +connection slave; +set @save_slave_net_timeout = @@global.slave_net_timeout; + +connection master; +set @@global.general_log = ON; +set @@global.log_output = 'table,file'; + +connection slave; +--source include/stop_slave.inc +--disable_warnings +set @@global.slave_net_timeout = @@global.net_read_timeout * 2; +--enable_warnings +let $idle_time=`select @@global.slave_net_timeout * 2`; + +# +# if heartbeat is disabled then reconnecting to the idle master +# should happen with `slave_net_timeout' period. +# Since it's the real time that is measured, `slave_net_timeout' +# merely guarantees that reconnecting can *not* happen earlier of a value specified. +# That is there can't an exact estimate for how many time it will happen. +# +# The following lines verify that having idle master +# for more than 2 * slave_net_timeout seconds and +# slave.net_read_timeout < slave_net_timeout +# won't cause reconnecting by the slave within at least +# slave_net_timeout interval. + +eval change master to master_host = '127.0.0.1',master_port = $MASTER_MYPORT, +master_user = 'root', master_heartbeat_period = 0; + +let $slave_net_timeout = `select @@global.slave_net_timeout`; + +--source include/start_slave.inc + +--disable_query_log +--disable_result_log +eval select 'master is idle for ', sleep($idle_time); +--enable_result_log +--enable_query_log + +--source include/stop_slave.inc + +# querying general-log + +connection master; + +# In particular the last reconnection timestamp must be greater or equal to +# the previous one + slave_net_timeout + +select event_time from (select event_time from mysql.general_log as t_1 where command_type like 'Connect' order by event_time desc limit 2) as t_2 order by event_time desc limit 1 into @ts_last; +select event_time from (select event_time from mysql.general_log as t_1 where command_type like 'Connect' order by event_time desc limit 2) as t_2 order by event_time asc limit 1 into @ts_prev; + +--disable_query_log +eval select time_to_sec(@ts_last) - $slave_net_timeout >= time_to_sec(@ts_prev) into @result; +--enable_query_log + +select @result as 'Must be 1'; + +# cleanup + +# restore global env +connection master; +set @@global.general_log = @save_general_log; +set @@global.log_output = @save_log_output; +connection slave; +set @@global.slave_net_timeout = @save_slave_net_timeout; + diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 6e35291b1c7..15cf8729a54 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1970,7 +1970,8 @@ extern ulong slow_launch_threads, slow_launch_time; extern ulong table_cache_size, table_def_size; extern MYSQL_PLUGIN_IMPORT ulong max_connections; extern ulong max_connect_errors, connect_timeout; -extern ulong slave_net_timeout, slave_trans_retries; +extern ulong slave_trans_retries; +extern uint slave_net_timeout; extern ulong what_to_log,flush_time; extern ulong query_buff_size; extern ulong max_prepared_stmt_count, prepared_stmt_count; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 05442643e23..45648536f4c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -453,7 +453,8 @@ ulong table_cache_size, table_def_size; ulong what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; -ulong slave_net_timeout, slave_trans_retries; +ulong slave_trans_retries; +uint slave_net_timeout; uint slave_exec_mode_options; ulonglong slave_type_conversions_options; ulong thread_cache_size=0, thread_pool_size= 0; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 54e6fdb1d74..051c2173a55 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2811,7 +2811,7 @@ static Sys_var_ulong Sys_slave_net_timeout( "slave_net_timeout", "Number of seconds to wait for more data " "from a master/slave connection before aborting the read", GLOBAL_VAR(slave_net_timeout), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1), + VALID_RANGE(1, UINT_MAX), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_slave_net_timeout)); From 055c61049e32572464495ef23d0cdf158e083dda Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Sun, 28 Feb 2010 19:39:28 +0200 Subject: [PATCH 2/3] bug#50296 a small refinement to the test --- mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result | 2 +- mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result b/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result index 526221fcb50..81de6e228e0 100644 --- a/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result +++ b/mysql-test/suite/large_tests/r/rpl_slave_net_timeout.result @@ -11,7 +11,7 @@ set @@global.general_log = ON; set @@global.log_output = 'table,file'; include/stop_slave.inc set @@global.slave_net_timeout = @@global.net_read_timeout * 2; -change master to master_host = '127.0.0.1',master_port = 13020, +change master to master_host = '127.0.0.1',master_port = MASTER_PORT, master_user = 'root', master_heartbeat_period = 0; include/start_slave.inc include/stop_slave.inc diff --git a/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test index 7712c054258..9a2cdc3b596 100644 --- a/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test +++ b/mysql-test/suite/large_tests/t/rpl_slave_net_timeout.test @@ -38,7 +38,7 @@ let $idle_time=`select @@global.slave_net_timeout * 2`; # slave.net_read_timeout < slave_net_timeout # won't cause reconnecting by the slave within at least # slave_net_timeout interval. - +--replace_result $MASTER_MYPORT MASTER_PORT eval change master to master_host = '127.0.0.1',master_port = $MASTER_MYPORT, master_user = 'root', master_heartbeat_period = 0; From 6d4cfbbca528ce54b7c8839082f74e4cac69321c Mon Sep 17 00:00:00 2001 From: Marc Alff Date: Mon, 1 Mar 2010 07:30:22 -0700 Subject: [PATCH 3/3] Bug#50296 Slave reconnects earlier than the prescribed slave_net_timeout value Fixed failed assert on 64 bit platforms, introduced by the previous fix. mysqld: sys_vars.h:125: Assertion `size == sizeof(T)' failed. --- sql/sys_vars.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 1d3ef17c5a1..b7aa4007153 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2795,7 +2795,7 @@ static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type) mysql_mutex_unlock(&LOCK_active_mi); return false; } -static Sys_var_ulong Sys_slave_net_timeout( +static Sys_var_uint Sys_slave_net_timeout( "slave_net_timeout", "Number of seconds to wait for more data " "from a master/slave connection before aborting the read", GLOBAL_VAR(slave_net_timeout), CMD_LINE(REQUIRED_ARG),