MDEV-27436: binlog corruption (/tmp no space left on device at the same moment)
This commit fixes several bugs in error handling around disk full when writing the statement/transaction binlog caches: 1. If the error occurs during a non-transactional statement, the code attempts to binlog the partially executed statement (as it cannot roll back). The stmt_cache->error was still set from the disk full error. This caused MYSQL_BIN_LOG::write_cache() to get an error while trying to read the cache to copy it to the binlog. This was then wrongly interpreted as a disk full error writing to the binlog file. As a result, a partial event group containing just a GTID event (no query or commit) was binlogged. Fixed by checking if an error is set in the statement cache, and if so binlog an INCIDENT event instead of a corrupt event group, as for other errors. 2. For LOAD DATA LOCAL INFILE, if a disk full error occured while writing to the statement cache, the code would attempt to abort and read-and-discard any remaining data sent by the client. The discard code would however continue trying to write data to the statement cache, and wrongly interpret another disk full error as end-of-file from the client. This left the client connection with extra data which corrupts the communication for the next command, as well as again causing an corrupt/incomplete event to be binlogged. Fixed by restoring the default read function before reading any remaining data from the client connection. Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
parent
e52777f1a4
commit
6fa69ad747
@ -0,0 +1,38 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection master;
|
||||
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
|
||||
SET GLOBAL binlog_stmt_cache_size= 4096;
|
||||
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;
|
||||
FLUSH STATUS;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
Variable_name Value
|
||||
Binlog_stmt_cache_disk_use 0
|
||||
Binlog_stmt_cache_use 0
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,load_data_binlog_cache_error";
|
||||
LOAD DATA CONCURRENT LOCAL INFILE 'std_data/bug30435_5k.txt'
|
||||
REPLACE INTO TABLE t1 (a);
|
||||
ERROR HY000: Error writing file '<FILENAME>' (Errcode: 28 "No space left on device")
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
Variable_name Value
|
||||
Binlog_stmt_cache_disk_use 1
|
||||
Binlog_stmt_cache_use 1
|
||||
SELECT IF(COUNT(*) > 0 AND COUNT(*) < 5000,
|
||||
"ok",
|
||||
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
|
||||
AS check_result
|
||||
FROM t1;
|
||||
check_result
|
||||
ok
|
||||
connection slave;
|
||||
include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
|
||||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
0
|
||||
connection master;
|
||||
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
|
||||
DROP TABLE t1;
|
||||
include/rpl_end.inc
|
51
mysql-test/suite/rpl/r/rpl_binlog_cache_disk_full_row.result
Normal file
51
mysql-test/suite/rpl/r/rpl_binlog_cache_disk_full_row.result
Normal file
@ -0,0 +1,51 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection master;
|
||||
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
|
||||
SET GLOBAL binlog_stmt_cache_size= 4096;
|
||||
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=MyISAM;
|
||||
FLUSH STATUS;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
Variable_name Value
|
||||
Binlog_stmt_cache_disk_use 0
|
||||
Binlog_stmt_cache_use 0
|
||||
INSERT INTO t1 VALUES (0, CONCAT("?", "-", REPEAT("x", 200)));
|
||||
INSERT INTO t1 SELECT a+1, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+2, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+4, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+8, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+16, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+32, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+64, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+128, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
Variable_name Value
|
||||
Binlog_stmt_cache_disk_use 2
|
||||
Binlog_stmt_cache_use 9
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,simulate_disk_full_at_flush_pending";
|
||||
INSERT INTO t1 SELECT a+256, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
ERROR HY000: Error writing file '<FILENAME>' (Errcode: 28 "No space left on device")
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
Variable_name Value
|
||||
Binlog_stmt_cache_disk_use 3
|
||||
Binlog_stmt_cache_use 10
|
||||
SELECT IF(COUNT(*) > 256 AND COUNT(*) < 512,
|
||||
"ok",
|
||||
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
|
||||
AS check_result
|
||||
FROM t1;
|
||||
check_result
|
||||
ok
|
||||
ALTER TABLE t1 COMMENT '<mumble>';
|
||||
connection slave;
|
||||
include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
|
||||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
256
|
||||
connection master;
|
||||
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
|
||||
DROP TABLE t1;
|
||||
include/rpl_end.inc
|
@ -0,0 +1,47 @@
|
||||
--source include/have_binlog_format_statement.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection master
|
||||
# Set minimal cache size so smaller transaction can trigger spill to disk.
|
||||
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
|
||||
SET GLOBAL binlog_stmt_cache_size= 4096;
|
||||
|
||||
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;
|
||||
|
||||
FLUSH STATUS;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,load_data_binlog_cache_error";
|
||||
--replace_regex /Error writing file '[^']+'/Error writing file '<FILENAME>'/
|
||||
--error 3
|
||||
LOAD DATA CONCURRENT LOCAL INFILE 'std_data/bug30435_5k.txt'
|
||||
REPLACE INTO TABLE t1 (a);
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
# The actual number of rows left after the disk full error may change as
|
||||
# binlog event sizes are modified. So here we just test that we get partial
|
||||
# update from the last INSERT..SELECT that gets disk full error.
|
||||
SELECT IF(COUNT(*) > 0 AND COUNT(*) < 5000,
|
||||
"ok",
|
||||
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
|
||||
AS check_result
|
||||
FROM t1;
|
||||
|
||||
--save_master_pos
|
||||
|
||||
--connection slave
|
||||
--let $slave_sql_errno= 1590
|
||||
--source include/wait_for_slave_sql_error_and_skip.inc
|
||||
|
||||
--sync_with_master
|
||||
SELECT COUNT(*) FROM t1;
|
||||
|
||||
# Cleanup
|
||||
|
||||
--connection master
|
||||
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
|
||||
DROP TABLE t1;
|
||||
|
||||
--source include/rpl_end.inc
|
61
mysql-test/suite/rpl/t/rpl_binlog_cache_disk_full_row.test
Normal file
61
mysql-test/suite/rpl/t/rpl_binlog_cache_disk_full_row.test
Normal file
@ -0,0 +1,61 @@
|
||||
--source include/have_binlog_format_row.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection master
|
||||
# Set minimal cache size so smaller transaction can trigger spill to disk.
|
||||
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
|
||||
SET GLOBAL binlog_stmt_cache_size= 4096;
|
||||
|
||||
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=MyISAM;
|
||||
|
||||
FLUSH STATUS;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
INSERT INTO t1 VALUES (0, CONCAT("?", "-", REPEAT("x", 200)));
|
||||
INSERT INTO t1 SELECT a+1, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+2, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+4, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+8, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+16, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+32, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+64, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
INSERT INTO t1 SELECT a+128, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,simulate_disk_full_at_flush_pending";
|
||||
--replace_regex /Error writing file '[^']+'/Error writing file '<FILENAME>'/
|
||||
--error 3
|
||||
INSERT INTO t1 SELECT a+256, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SHOW STATUS LIKE "binlog_stmt_cache%";
|
||||
# The actual number of rows left after the disk full error may change as
|
||||
# binlog event sizes are modified. So here we just test that we get partial
|
||||
# update from the last INSERT..SELECT that gets disk full error.
|
||||
SELECT IF(COUNT(*) > 256 AND COUNT(*) < 512,
|
||||
"ok",
|
||||
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
|
||||
AS check_result
|
||||
FROM t1;
|
||||
|
||||
# A random extra event that helped show the bug that a partial event
|
||||
# group was binlogged.
|
||||
ALTER TABLE t1 COMMENT '<mumble>';
|
||||
|
||||
--save_master_pos
|
||||
|
||||
--connection slave
|
||||
--let $slave_sql_errno= 1590
|
||||
--source include/wait_for_slave_sql_error_and_skip.inc
|
||||
|
||||
--sync_with_master
|
||||
SELECT COUNT(*) FROM t1;
|
||||
|
||||
# Cleanup
|
||||
|
||||
--connection master
|
||||
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
|
||||
DROP TABLE t1;
|
||||
|
||||
--source include/rpl_end.inc
|
58
sql/log.cc
58
sql/log.cc
@ -2228,6 +2228,23 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check if there was an error while writing the statement cache.
|
||||
If the cache content is corrupt due to an error, we should write an incident
|
||||
event to the binlog rather than write corrupt data to it.
|
||||
*/
|
||||
bool
|
||||
MYSQL_BIN_LOG::check_cache_error(THD *thd, binlog_cache_data *cache_data)
|
||||
{
|
||||
if (!cache_data)
|
||||
return false;
|
||||
if (check_write_error(thd))
|
||||
return true;
|
||||
if (!cache_data->empty() && cache_data->cache_log.error)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@note
|
||||
How do we handle this (unlikely but legal) case:
|
||||
@ -5874,7 +5891,7 @@ write_err:
|
||||
engines, data is written to table but writing to binary log failed. In
|
||||
these scenarios rollback is not possible. Hence report an incident.
|
||||
*/
|
||||
if (mysql_bin_log.check_write_error(this) && cache_data &&
|
||||
if (mysql_bin_log.check_cache_error(this, cache_data) &&
|
||||
lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE) &&
|
||||
table->current_lock == F_WRLCK)
|
||||
cache_data->set_incident();
|
||||
@ -6005,20 +6022,37 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
|
||||
/*
|
||||
Write pending event to the cache.
|
||||
*/
|
||||
#ifndef DBUG_OFF
|
||||
bool clear_dbug= false;
|
||||
#endif
|
||||
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
|
||||
{DBUG_SET("+d,simulate_file_write_error");});
|
||||
{
|
||||
if (my_b_tell(&cache_data->cache_log) > 10000)
|
||||
{
|
||||
DBUG_SET("+d,simulate_file_write_error");
|
||||
clear_dbug= true;
|
||||
}
|
||||
});
|
||||
if (writer.write(pending))
|
||||
{
|
||||
set_write_error(thd, is_transactional);
|
||||
if (check_write_error(thd) && cache_data &&
|
||||
if (check_cache_error(thd, cache_data) &&
|
||||
stmt_has_updated_non_trans_table(thd))
|
||||
cache_data->set_incident();
|
||||
delete pending;
|
||||
cache_data->set_pending(NULL);
|
||||
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
|
||||
{DBUG_SET("-d,simulate_file_write_error");});
|
||||
{
|
||||
if (clear_dbug)
|
||||
DBUG_SET("-d,simulate_file_write_error");
|
||||
});
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
|
||||
{
|
||||
if (clear_dbug)
|
||||
DBUG_SET("-d,simulate_file_write_error");
|
||||
});
|
||||
|
||||
delete pending;
|
||||
}
|
||||
@ -6613,7 +6647,7 @@ err:
|
||||
if (unlikely(error))
|
||||
{
|
||||
set_write_error(thd, is_trans_cache);
|
||||
if (check_write_error(thd) && cache_data &&
|
||||
if (check_cache_error(thd, cache_data) &&
|
||||
stmt_has_updated_non_trans_table(thd))
|
||||
cache_data->set_incident();
|
||||
}
|
||||
@ -8337,6 +8371,20 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
|
||||
binlog_cache_mngr *mngr= entry->cache_mngr;
|
||||
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
|
||||
|
||||
/*
|
||||
An error in the trx_cache will truncate the cache to the last good
|
||||
statement, it won't leave a lingering error. Assert that this holds.
|
||||
*/
|
||||
DBUG_ASSERT(!(entry->using_trx_cache && !mngr->trx_cache.empty() &&
|
||||
mngr->get_binlog_cache_log(TRUE)->error));
|
||||
/*
|
||||
An error in the stmt_cache would be caught on the higher level and result
|
||||
in an incident event being written over a (possibly corrupt) cache content.
|
||||
Assert that this holds.
|
||||
*/
|
||||
DBUG_ASSERT(!(entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
|
||||
mngr->get_binlog_cache_log(FALSE)->error));
|
||||
|
||||
if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
|
||||
DBUG_RETURN(ER_ERROR_ON_WRITE);
|
||||
|
||||
|
@ -815,6 +815,7 @@ public:
|
||||
int write_cache(THD *thd, IO_CACHE *cache);
|
||||
void set_write_error(THD *thd, bool is_transactional);
|
||||
bool check_write_error(THD *thd);
|
||||
bool check_cache_error(THD *thd, binlog_cache_data *cache_data);
|
||||
|
||||
void start_union_events(THD *thd, query_id_t query_id_param);
|
||||
void stop_union_events(THD *thd);
|
||||
|
@ -253,6 +253,10 @@ public:
|
||||
*/
|
||||
void skip_data_till_eof()
|
||||
{
|
||||
#ifndef EMBEDDED_LIBRARY
|
||||
if (mysql_bin_log.is_open())
|
||||
cache.read_function= cache.real_read_function;
|
||||
#endif
|
||||
while (GET != my_b_EOF)
|
||||
;
|
||||
}
|
||||
|
@ -4499,6 +4499,10 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
|
||||
/* buffer contains position where we started last read */
|
||||
uchar* buffer= (uchar*) my_b_get_buffer_start(file);
|
||||
uint max_event_size= lf_info->thd->variables.max_allowed_packet;
|
||||
int res;
|
||||
#ifndef DBUG_OFF
|
||||
bool did_dbug_inject= false;
|
||||
#endif
|
||||
|
||||
if (lf_info->thd->is_current_stmt_binlog_format_row())
|
||||
goto ret;
|
||||
@ -4506,6 +4510,19 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
|
||||
lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
|
||||
goto ret;
|
||||
|
||||
DBUG_EXECUTE_IF("load_data_binlog_cache_error",
|
||||
{
|
||||
/*
|
||||
Simulate "disk full" error in the middle of writing to
|
||||
the binlog cache.
|
||||
*/
|
||||
if (lf_info->last_pos_in_file >= 2*4096)
|
||||
{
|
||||
DBUG_SET("+d,simulate_file_write_error");
|
||||
did_dbug_inject= true;
|
||||
}
|
||||
};);
|
||||
|
||||
for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
|
||||
buffer += MY_MIN(block_len, max_event_size),
|
||||
block_len -= MY_MIN(block_len, max_event_size))
|
||||
@ -4517,7 +4534,10 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
|
||||
MY_MIN(block_len, max_event_size),
|
||||
lf_info->log_delayed);
|
||||
if (mysql_bin_log.write(&a))
|
||||
DBUG_RETURN(1);
|
||||
{
|
||||
res= 1;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -4526,12 +4546,20 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
|
||||
MY_MIN(block_len, max_event_size),
|
||||
lf_info->log_delayed);
|
||||
if (mysql_bin_log.write(&b))
|
||||
DBUG_RETURN(1);
|
||||
{
|
||||
res= 1;
|
||||
goto err;
|
||||
}
|
||||
lf_info->wrote_create_file= 1;
|
||||
}
|
||||
}
|
||||
ret:
|
||||
int res= Buffer ? lf_info->real_read_function(file, Buffer, Count) : 0;
|
||||
res= Buffer ? lf_info->real_read_function(file, Buffer, Count) : 0;
|
||||
err:
|
||||
#ifndef DBUG_OFF
|
||||
if (did_dbug_inject)
|
||||
DBUG_SET("-d,simulate_file_write_error");
|
||||
#endif
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user