BUG#11752315 - 43460: STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING TO RECONNECT TO
Problem : The basic problem is the way the thread sleeps in mysql-5.5 and also in mysql-5.1 when we execute a stop slave on windows platform. On windows platform if the stop slave is executed after the master dies, we have this long wait before the stop slave return a value. This is because there is a sleep of the thread. The sleep is uninterruptable in the two above version, which was fixed by Davi patch for the BUG#11765860 for mysql-trunk. Backporting his patch for mysql-5.5 fixes the problem. Solution : A new pair of mutex and condition variable is introduced to synchronize thread sleep and finalization. A new mutex is required because the slave threads are terminated while holding the slave thread locks (run_lock), which can not be relinquished during termination as this would affect the lock order. mysql-test/suite/rpl/r/rpl_start_stop_slave.result: The result file associated with the test added. mysql-test/suite/rpl/t/rpl_start_stop_slave.test: A test to check the new functionality. sql/rpl_mi.cc: The constructor using the new mutex and condition variables for the master_info. sql/rpl_mi.h: The condition variable and mutex have been added for the master_info. sql/rpl_rli.cc: The constructor using the new mutex and condition variables for the realy_log_info. sql/rpl_rli.h: The condition variable and mutex have been added for the relay_log_info. sql/slave.cc: Use a timed wait on a condition variable to implement a interruptible sleep. The wait is registered with the THD object so that the thread will be woken up if killed.
This commit is contained in:
parent
31a1f8ef54
commit
e69da6dc3e
10
mysql-test/suite/rpl/r/rpl_start_stop_slave.result
Normal file
10
mysql-test/suite/rpl/r/rpl_start_stop_slave.result
Normal file
@ -0,0 +1,10 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
set @time_before_kill := (select CURRENT_TIMESTAMP);
|
||||
[Time before the query]
|
||||
[Connection ID of the slave I/O thread found]
|
||||
kill <connection_id>;
|
||||
set @time_after_kill := (select CURRENT_TIMESTAMP);
|
||||
[Time after the query]
|
||||
[Killing of the slave IO thread was successful]
|
||||
include/rpl_end.inc
|
44
mysql-test/suite/rpl/t/rpl_start_stop_slave.test
Normal file
44
mysql-test/suite/rpl/t/rpl_start_stop_slave.test
Normal file
@ -0,0 +1,44 @@
|
||||
#
|
||||
#BUG#11752315 : STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING
|
||||
# TO RECONNECT TO
|
||||
#
|
||||
# ==== Purpose ====
|
||||
#
|
||||
#Tests that the slave does not go to a sleep for a long duration after the
|
||||
#master is killed and we do a START_SLAVE and STOP_SLAVE.
|
||||
#
|
||||
# ==== Method ====
|
||||
#
|
||||
#This is a new functionality of having an interruptable sleep of the slave.
|
||||
#We find the thread id for the slave thread. On finding the thread ID of the
|
||||
#slave thread we kill the slave thread. A successful kill in less than 60 sec
|
||||
#should serve the purpose of checking the functionality.
|
||||
#
|
||||
|
||||
--source include/have_log_bin.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
connection slave;
|
||||
--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
|
||||
|
||||
set @time_before_kill := (select CURRENT_TIMESTAMP);
|
||||
|
||||
--echo [Time before the query]
|
||||
--echo [Connection ID of the slave I/O thread found]
|
||||
|
||||
--replace_regex /kill [0-9]*/kill <connection_id>/
|
||||
--eval kill $connection_id
|
||||
|
||||
set @time_after_kill := (select CURRENT_TIMESTAMP);
|
||||
|
||||
--echo [Time after the query]
|
||||
|
||||
if(`select TIMESTAMPDIFF(SECOND,@time_after_kill, @time_before_kill) > 60`)
|
||||
{
|
||||
--echo # assert : The difference between the timestamps 'time_after_kill' and 'time_before_kill' should be less than 60sec.
|
||||
--die
|
||||
}
|
||||
|
||||
--echo [Killing of the slave IO thread was successful]
|
||||
# End of test
|
||||
--source include/rpl_end.inc
|
@ -7682,8 +7682,10 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
|
||||
key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
|
||||
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
|
||||
key_master_info_data_lock, key_master_info_run_lock,
|
||||
key_master_info_sleep_lock,
|
||||
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
|
||||
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
|
||||
key_relay_log_info_sleep_lock,
|
||||
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
|
||||
key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
|
||||
key_PARTITION_LOCK_auto_inc;
|
||||
@ -7729,10 +7731,12 @@ static PSI_mutex_info all_server_mutexes[]=
|
||||
{ &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
|
||||
{ &key_master_info_data_lock, "Master_info::data_lock", 0},
|
||||
{ &key_master_info_run_lock, "Master_info::run_lock", 0},
|
||||
{ &key_master_info_sleep_lock, "Master_info::sleep_lock", 0},
|
||||
{ &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
|
||||
{ &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
|
||||
{ &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
|
||||
{ &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
|
||||
{ &key_relay_log_info_sleep_lock, "Relay_log_info::sleep_lock", 0},
|
||||
{ &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
|
||||
{ &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
|
||||
{ &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
|
||||
@ -7768,8 +7772,10 @@ PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
|
||||
key_delayed_insert_cond, key_delayed_insert_cond_client,
|
||||
key_item_func_sleep_cond, key_master_info_data_cond,
|
||||
key_master_info_start_cond, key_master_info_stop_cond,
|
||||
key_master_info_sleep_cond,
|
||||
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
|
||||
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
|
||||
key_relay_log_info_sleep_cond,
|
||||
key_TABLE_SHARE_cond, key_user_level_lock_cond,
|
||||
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
|
||||
PSI_cond_key key_RELAYLOG_update_cond;
|
||||
@ -7797,10 +7803,12 @@ static PSI_cond_info all_server_conds[]=
|
||||
{ &key_master_info_data_cond, "Master_info::data_cond", 0},
|
||||
{ &key_master_info_start_cond, "Master_info::start_cond", 0},
|
||||
{ &key_master_info_stop_cond, "Master_info::stop_cond", 0},
|
||||
{ &key_master_info_sleep_cond, "Master_info::sleep_cond", 0},
|
||||
{ &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
|
||||
{ &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
|
||||
{ &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
|
||||
{ &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
|
||||
{ &key_relay_log_info_sleep_cond, "Relay_log_info::sleep_cond", 0},
|
||||
{ &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
|
||||
{ &key_user_level_lock_cond, "User_level_lock::cond", 0},
|
||||
{ &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
|
||||
|
@ -244,8 +244,10 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
|
||||
key_LOCK_table_share, key_LOCK_thd_data,
|
||||
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
|
||||
key_master_info_data_lock, key_master_info_run_lock,
|
||||
key_master_info_sleep_lock,
|
||||
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
|
||||
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
|
||||
key_relay_log_info_sleep_lock,
|
||||
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
|
||||
key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc;
|
||||
extern PSI_mutex_key key_RELAYLOG_LOCK_index;
|
||||
@ -264,8 +266,10 @@ extern PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
|
||||
key_delayed_insert_cond, key_delayed_insert_cond_client,
|
||||
key_item_func_sleep_cond, key_master_info_data_cond,
|
||||
key_master_info_start_cond, key_master_info_stop_cond,
|
||||
key_master_info_sleep_cond,
|
||||
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
|
||||
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
|
||||
key_relay_log_info_sleep_cond,
|
||||
key_TABLE_SHARE_cond, key_user_level_lock_cond,
|
||||
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
|
||||
extern PSI_cond_key key_RELAYLOG_update_cond;
|
||||
|
@ -49,9 +49,11 @@ Master_info::Master_info(bool is_slave_recovery)
|
||||
bzero((char*) &file, sizeof(file));
|
||||
mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_mutex_init(key_master_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
|
||||
mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
|
||||
mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
|
||||
mysql_cond_init(key_master_info_sleep_cond, &sleep_cond, NULL);
|
||||
}
|
||||
|
||||
Master_info::~Master_info()
|
||||
@ -59,9 +61,11 @@ Master_info::~Master_info()
|
||||
delete_dynamic(&ignore_server_ids);
|
||||
mysql_mutex_destroy(&run_lock);
|
||||
mysql_mutex_destroy(&data_lock);
|
||||
mysql_mutex_destroy(&sleep_lock);
|
||||
mysql_cond_destroy(&data_cond);
|
||||
mysql_cond_destroy(&start_cond);
|
||||
mysql_cond_destroy(&stop_cond);
|
||||
mysql_cond_destroy(&sleep_cond);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -78,8 +78,8 @@ class Master_info : public Slave_reporting_capability
|
||||
File fd; // we keep the file open, so we need to remember the file pointer
|
||||
IO_CACHE file;
|
||||
|
||||
mysql_mutex_t data_lock, run_lock;
|
||||
mysql_cond_t data_cond, start_cond, stop_cond;
|
||||
mysql_mutex_t data_lock, run_lock, sleep_lock;
|
||||
mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond;
|
||||
THD *io_thd;
|
||||
MYSQL* mysql;
|
||||
uint32 file_id; /* for 3.23 load data infile */
|
||||
|
@ -75,10 +75,12 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
|
||||
&data_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_mutex_init(key_relay_log_info_log_space_lock,
|
||||
&log_space_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_mutex_init(key_relay_log_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
|
||||
mysql_cond_init(key_relay_log_info_data_cond, &data_cond, NULL);
|
||||
mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL);
|
||||
mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL);
|
||||
mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL);
|
||||
mysql_cond_init(key_relay_log_info_sleep_cond, &sleep_cond, NULL);
|
||||
relay_log.init_pthread_objects();
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
@ -91,10 +93,12 @@ Relay_log_info::~Relay_log_info()
|
||||
mysql_mutex_destroy(&run_lock);
|
||||
mysql_mutex_destroy(&data_lock);
|
||||
mysql_mutex_destroy(&log_space_lock);
|
||||
mysql_mutex_destroy(&sleep_lock);
|
||||
mysql_cond_destroy(&data_cond);
|
||||
mysql_cond_destroy(&start_cond);
|
||||
mysql_cond_destroy(&stop_cond);
|
||||
mysql_cond_destroy(&log_space_cond);
|
||||
mysql_cond_destroy(&sleep_cond);
|
||||
relay_log.cleanup();
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
@ -138,15 +138,13 @@ public:
|
||||
standard lock acquisition order to avoid deadlocks:
|
||||
run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
|
||||
*/
|
||||
mysql_mutex_t data_lock, run_lock;
|
||||
|
||||
mysql_mutex_t data_lock, run_lock, sleep_lock;
|
||||
/*
|
||||
start_cond is broadcast when SQL thread is started
|
||||
stop_cond - when stopped
|
||||
data_cond - when data protected by data_lock changes
|
||||
*/
|
||||
mysql_cond_t start_cond, stop_cond, data_cond;
|
||||
|
||||
mysql_cond_t start_cond, stop_cond, data_cond, sleep_cond;
|
||||
/* parent Master_info structure */
|
||||
Master_info *mi;
|
||||
|
||||
|
68
sql/slave.cc
68
sql/slave.cc
@ -68,8 +68,6 @@ bool use_slave_mask = 0;
|
||||
MY_BITMAP slave_error_mask;
|
||||
char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
|
||||
|
||||
typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
|
||||
|
||||
char* slave_load_tmpdir = 0;
|
||||
Master_info *active_mi= 0;
|
||||
my_bool replicate_same_server_id;
|
||||
@ -152,9 +150,6 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
|
||||
bool suppress_warnings);
|
||||
static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
|
||||
bool reconnect, bool suppress_warnings);
|
||||
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
|
||||
void* thread_killed_arg);
|
||||
static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
|
||||
static Log_event* next_event(Relay_log_info* rli);
|
||||
static int queue_event(Master_info* mi,const char* buf,ulong event_len);
|
||||
static int terminate_slave_thread(THD *thd,
|
||||
@ -2068,35 +2063,42 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
Sleep for a given amount of time or until killed.
|
||||
|
||||
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
|
||||
void* thread_killed_arg)
|
||||
@param thd Thread context of the current thread.
|
||||
@param seconds The number of seconds to sleep.
|
||||
@param func Function object to check if the thread has been killed.
|
||||
@param info The Rpl_info object associated with this sleep.
|
||||
|
||||
@retval True if the thread has been killed, false otherwise.
|
||||
*/
|
||||
template <typename killed_func, typename rpl_info>
|
||||
static inline bool slave_sleep(THD *thd, time_t seconds,
|
||||
killed_func func, rpl_info info)
|
||||
{
|
||||
int nap_time;
|
||||
thr_alarm_t alarmed;
|
||||
DBUG_ENTER("safe_sleep");
|
||||
|
||||
thr_alarm_init(&alarmed);
|
||||
time_t start_time= my_time(0);
|
||||
time_t end_time= start_time+sec;
|
||||
bool ret;
|
||||
struct timespec abstime;
|
||||
const char *old_proc_info;
|
||||
|
||||
while ((nap_time= (int) (end_time - start_time)) > 0)
|
||||
mysql_mutex_t *lock= &info->sleep_lock;
|
||||
mysql_cond_t *cond= &info->sleep_cond;
|
||||
|
||||
/* Absolute system time at which the sleep time expires. */
|
||||
set_timespec(abstime, seconds);
|
||||
mysql_mutex_lock(lock);
|
||||
old_proc_info= thd->enter_cond(cond, lock, thd->proc_info);
|
||||
|
||||
while (! (ret= func(thd, info)))
|
||||
{
|
||||
ALARM alarm_buff;
|
||||
/*
|
||||
The only reason we are asking for alarm is so that
|
||||
we will be woken up in case of murder, so if we do not get killed,
|
||||
set the alarm so it goes off after we wake up naturally
|
||||
*/
|
||||
thr_alarm(&alarmed, 2 * nap_time, &alarm_buff);
|
||||
sleep(nap_time);
|
||||
thr_end_alarm(&alarmed);
|
||||
|
||||
if ((*thread_killed)(thd,thread_killed_arg))
|
||||
DBUG_RETURN(1);
|
||||
start_time= my_time(0);
|
||||
int error= mysql_cond_timedwait(cond, lock, &abstime);
|
||||
if (error == ETIMEDOUT || error == ETIME)
|
||||
break;
|
||||
}
|
||||
DBUG_RETURN(0);
|
||||
/* Implicitly unlocks the mutex. */
|
||||
thd->exit_cond(old_proc_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -2555,8 +2557,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
|
||||
exec_res= 0;
|
||||
rli->cleanup_context(thd, 1);
|
||||
/* chance for concurrent connection to get more locks */
|
||||
safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
|
||||
(CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
|
||||
slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
|
||||
sql_slave_killed, rli);
|
||||
mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
|
||||
rli->trans_retries++;
|
||||
rli->retried_trans++;
|
||||
@ -2654,8 +2656,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
|
||||
{
|
||||
if (*retry_count > master_retry_count)
|
||||
return 1; // Don't retry forever
|
||||
safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed,
|
||||
(void *) mi);
|
||||
slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
|
||||
}
|
||||
if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
|
||||
return 1;
|
||||
@ -4248,8 +4249,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
|
||||
change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
|
||||
break;
|
||||
}
|
||||
safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
|
||||
(void*)mi);
|
||||
slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
|
||||
}
|
||||
|
||||
if (!slave_was_killed)
|
||||
|
Loading…
x
Reference in New Issue
Block a user