From 9220e09fbc3b33f0dd4858a52e17789e83dbca61 Mon Sep 17 00:00:00 2001 From: Luis Soares Date: Fri, 13 Nov 2009 18:29:30 +0000 Subject: [PATCH] BUG#44188: STOP SLAVE should flush info files and relay logs. Replication info files are not being flushed and synced when the command 'STOP SLAVE' is issued. This means that one cannot just rely on existing values on those files when the slave has been stopped. Having consistent, uncorrupted and up-to-date info files when stopping the slave would be most useful, for instance, for snapshotting purposes (a procedure that is often used for restoring slaves). This patch addresses this by instrumenting the terminate_slave_threads function so that it also flushes and syncs the *info files as well as the relay log whenever it gets called, ie, on 'STOP SLAVE'. Although this imposes a performance trade-off (specifically when stopping the slave), it should have no negative influence on overall replication performance (impact is only noticeable on 'STOP SLAVE'). --- sql/slave.cc | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/sql/slave.cc b/sql/slave.cc index c35f0641729..faee649be32 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -470,6 +470,7 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) DBUG_RETURN(0); /* successfully do nothing */ int error,force_all = (thread_mask & SLAVE_FORCE_ALL); pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock; + pthread_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) { @@ -481,6 +482,22 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) skip_lock)) && !force_all) DBUG_RETURN(error); + + pthread_mutex_lock(log_lock); + + DBUG_PRINT("info",("Flushing relay log and master info file.")); + if (current_thd) + thd_proc_info(current_thd, "Flushing relay log and master info files."); + if (flush_master_info(mi, TRUE /* flush relay log */)) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->fd, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + pthread_mutex_unlock(log_lock); } if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) { @@ -492,8 +509,21 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) skip_lock)) && !force_all) DBUG_RETURN(error); + + pthread_mutex_lock(log_lock); + + DBUG_PRINT("info",("Flushing relay-log info file.")); + if (current_thd) + thd_proc_info(current_thd, "Flushing relay-log info file."); + if (flush_relay_log_info(&mi->rli)) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->rli.info_fd, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + pthread_mutex_unlock(log_lock); } - DBUG_RETURN(0); + DBUG_RETURN(0); } @@ -4421,8 +4451,9 @@ MYSQL *rpl_connect_master(MYSQL *mysql) rli Relay log information NOTES - - As this is only called by the slave thread, we don't need to - have a lock on this. + - As this is only called by the slave thread or on STOP SLAVE, with the + log_lock grabbed and the slave thread stopped, we don't need to have + a lock here. - If there is an active transaction, then we don't update the position in the relay log. This is to ensure that we re-execute statements if we die in the middle of an transaction that was rolled back. @@ -4473,7 +4504,10 @@ bool flush_relay_log_info(Relay_log_info* rli) error=1; rli->sync_counter= 0; } - /* Flushing the relay log is done by the slave I/O thread */ + /* + Flushing the relay log is done by the slave I/O thread + or by the user on STOP SLAVE. + */ DBUG_RETURN(error); }