Replication:
Now the I/O thread (in flush_master_info()) flushes the relay log to disk after reading every event. Slower but provides additionnal safety in case of brutal crash. I had to make the flush optional (i.e. add a if(some_bool_argument) in the function) because sometimes flush_master_info() is called when there is no usable relay log (the relay log's IO_CACHE is not initialized so can't be flushed). mysql-test/r/rpl_loaddata_rule_m.result: avoid a harmless error in the .err file; we don't need a slave in this test (even though it's called 'rpl' because it's testing binlog-ignore-db). mysql-test/t/rpl_loaddata_rule_m.test: result update sql/repl_failsafe.cc: update call to flush_master_info() according to new prototype. sql/slave.cc: - Now the I/O thread (in flush_master_info()) flushes the relay log to disk after reading every event. Slower but provides additionnal safety in case of brutal crash. I had to make the flush optional (i.e. add a if(some_bool_argument) in the function) because sometimes flush_master_info() is called when there is no usable relay log (the relay log's IO_CACHE is not initialized so can't be flushed). - Update version in message. - Remove warning about bug as it's not true anymore (since this changeset). sql/slave.h: new prototype sql/sql_repl.cc: update call to flush_master_info() according to new prototype.
This commit is contained in:
parent
d0d8ba7815
commit
bd6a70019e
@ -5,7 +5,7 @@ reset slave;
|
|||||||
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
||||||
start slave;
|
start slave;
|
||||||
drop database if exists mysqltest;
|
drop database if exists mysqltest;
|
||||||
reset master;
|
stop slave;
|
||||||
create database mysqltest;
|
create database mysqltest;
|
||||||
create table t1(a int, b int, unique(b));
|
create table t1(a int, b int, unique(b));
|
||||||
use mysqltest;
|
use mysqltest;
|
||||||
|
@ -9,7 +9,7 @@ drop database if exists mysqltest;
|
|||||||
--enable_warnings
|
--enable_warnings
|
||||||
|
|
||||||
connection slave;
|
connection slave;
|
||||||
reset master;
|
stop slave; # don't need slave for this test
|
||||||
|
|
||||||
# Test logging on master
|
# Test logging on master
|
||||||
|
|
||||||
|
@ -908,7 +908,12 @@ int load_master_data(THD* thd)
|
|||||||
// don't hit the magic number
|
// don't hit the magic number
|
||||||
if (active_mi->master_log_pos < BIN_LOG_HEADER_SIZE)
|
if (active_mi->master_log_pos < BIN_LOG_HEADER_SIZE)
|
||||||
active_mi->master_log_pos = BIN_LOG_HEADER_SIZE;
|
active_mi->master_log_pos = BIN_LOG_HEADER_SIZE;
|
||||||
flush_master_info(active_mi);
|
/*
|
||||||
|
Relay log's IO_CACHE may not be inited (even if we are sure that some
|
||||||
|
host was specified; there could have been a problem when replication
|
||||||
|
started, which led to relay log's IO_CACHE to not be inited.
|
||||||
|
*/
|
||||||
|
flush_master_info(active_mi, 0);
|
||||||
}
|
}
|
||||||
mysql_free_result(master_status_res);
|
mysql_free_result(master_status_res);
|
||||||
}
|
}
|
||||||
|
51
sql/slave.cc
51
sql/slave.cc
@ -1107,7 +1107,7 @@ static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi)
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* 5.0 is not supported */
|
/* 5.0 is not supported */
|
||||||
errmsg = "Master reported an unrecognized MySQL version. Note that 4.0 \
|
errmsg = "Master reported an unrecognized MySQL version. Note that 4.1 \
|
||||||
slaves can't replicate a 5.0 or newer master.";
|
slaves can't replicate a 5.0 or newer master.";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1368,32 +1368,9 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. It is
|
The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE.
|
||||||
notable that the last kilobytes of it (8 kB for example) may live in
|
Note that the I/O thread flushes it to disk after writing every event, in
|
||||||
memory, not on disk (depending on what the thread using it does). While
|
flush_master_info(mi, 1).
|
||||||
this is efficient, it has a side-effect one must know:
|
|
||||||
The size of the relay log on disk (displayed by 'ls -l' on Unix) can be a
|
|
||||||
few kilobytes less than one would expect by doing SHOW SLAVE STATUS; this
|
|
||||||
happens when only the IO thread is started (not the SQL thread). The
|
|
||||||
"missing" kilobytes are in memory, are preserved during 'STOP SLAVE; START
|
|
||||||
SLAVE IO_THREAD', and are flushed to disk when the slave's mysqld stops. So
|
|
||||||
this does not cause any bug. Example of how disk size grows by leaps:
|
|
||||||
|
|
||||||
Read_Master_Log_Pos: 7811 -rw-rw---- 1 guilhem qq 4 Jun 5 16:19 gbichot2-relay-bin.002
|
|
||||||
...later...
|
|
||||||
Read_Master_Log_Pos: 9744 -rw-rw---- 1 guilhem qq 8192 Jun 5 16:27 gbichot2-relay-bin.002
|
|
||||||
|
|
||||||
See how 4 is less than 7811 and 8192 is less than 9744.
|
|
||||||
|
|
||||||
WARNING: this is risky because the slave can stay like this for a long
|
|
||||||
time; then if it has a power failure, master.info says the I/O thread has
|
|
||||||
read until 9744 while the relay-log contains only until 8192 (the
|
|
||||||
in-memory part from 8192 to 9744 has been lost), so the SQL slave thread
|
|
||||||
will miss some events, silently breaking replication.
|
|
||||||
Ideally we would like to flush master.info only when we know that the relay
|
|
||||||
log has no in-memory tail.
|
|
||||||
Note that the above problem may arise only when only the IO thread is
|
|
||||||
started, which is unlikely.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1850,7 +1827,7 @@ file '%s')", fname);
|
|||||||
mi->inited = 1;
|
mi->inited = 1;
|
||||||
// now change cache READ -> WRITE - must do this before flush_master_info
|
// now change cache READ -> WRITE - must do this before flush_master_info
|
||||||
reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1);
|
reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1);
|
||||||
if ((error=test(flush_master_info(mi))))
|
if ((error=test(flush_master_info(mi, 1))))
|
||||||
sql_print_error("Failed to flush master info file");
|
sql_print_error("Failed to flush master info file");
|
||||||
pthread_mutex_unlock(&mi->data_lock);
|
pthread_mutex_unlock(&mi->data_lock);
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
@ -2100,7 +2077,7 @@ int show_master_info(THD* thd, MASTER_INFO* mi)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool flush_master_info(MASTER_INFO* mi)
|
bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache)
|
||||||
{
|
{
|
||||||
IO_CACHE* file = &mi->file;
|
IO_CACHE* file = &mi->file;
|
||||||
char lbuf[22];
|
char lbuf[22];
|
||||||
@ -2124,6 +2101,20 @@ bool flush_master_info(MASTER_INFO* mi)
|
|||||||
(int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert,
|
(int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert,
|
||||||
mi->ssl_cipher, mi->ssl_key);
|
mi->ssl_cipher, mi->ssl_key);
|
||||||
flush_io_cache(file);
|
flush_io_cache(file);
|
||||||
|
/*
|
||||||
|
Flush the relay log to disk. If we don't do it, then the relay log while
|
||||||
|
have some part (its last kilobytes) in memory only, so if the slave server
|
||||||
|
dies now, with, say, from master's position 100 to 150 in memory only (not
|
||||||
|
on disk), and with position 150 in master.info, then when the slave
|
||||||
|
restarts, the I/O thread will fetch binlogs from 150, so in the relay log
|
||||||
|
we will have "[0, 100] U [150, infinity[" and nobody will notice it, so the
|
||||||
|
SQL thread will jump from 100 to 150, and replication will silently break.
|
||||||
|
|
||||||
|
When we come to this place in code, relay log may or not be initialized;
|
||||||
|
the caller is responsible for setting 'flush_relay_log_cache' accordingly.
|
||||||
|
*/
|
||||||
|
if (flush_relay_log_cache)
|
||||||
|
flush_io_cache(mi->rli.relay_log.get_log_file());
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2982,7 +2973,7 @@ reconnect done to recover from failed read");
|
|||||||
sql_print_error("Slave I/O thread could not queue event from master");
|
sql_print_error("Slave I/O thread could not queue event from master");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
flush_master_info(mi);
|
flush_master_info(mi, 1); /* sure that we can flush the relay log */
|
||||||
/*
|
/*
|
||||||
See if the relay logs take too much space.
|
See if the relay logs take too much space.
|
||||||
We don't lock mi->rli.log_space_lock here; this dirty read saves time
|
We don't lock mi->rli.log_space_lock here; this dirty read saves time
|
||||||
|
@ -461,7 +461,7 @@ typedef struct st_table_rule_ent
|
|||||||
|
|
||||||
int init_slave();
|
int init_slave();
|
||||||
void init_slave_skip_errors(const char* arg);
|
void init_slave_skip_errors(const char* arg);
|
||||||
bool flush_master_info(MASTER_INFO* mi);
|
bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache);
|
||||||
bool flush_relay_log_info(RELAY_LOG_INFO* rli);
|
bool flush_relay_log_info(RELAY_LOG_INFO* rli);
|
||||||
int register_slave_on_master(MYSQL* mysql);
|
int register_slave_on_master(MYSQL* mysql);
|
||||||
int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
|
int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
|
||||||
|
@ -1085,8 +1085,11 @@ int change_master(THD* thd, MASTER_INFO* mi)
|
|||||||
strmake(mi->master_log_name, mi->rli.group_master_log_name,
|
strmake(mi->master_log_name, mi->rli.group_master_log_name,
|
||||||
sizeof(mi->master_log_name)-1);
|
sizeof(mi->master_log_name)-1);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
flush_master_info(mi);
|
Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
|
||||||
|
a slave before).
|
||||||
|
*/
|
||||||
|
flush_master_info(mi, 0);
|
||||||
if (need_relay_log_purge)
|
if (need_relay_log_purge)
|
||||||
{
|
{
|
||||||
relay_log_purge= 1;
|
relay_log_purge= 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user