From edda2fd149e4fcb7c9640be4d05d2c25013b2b71 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Tue, 1 Oct 2019 03:23:33 +0300 Subject: [PATCH 01/11] MDEV-20703: mariabackup creates binlog files in server binlog directory on --prepare --export step When "--export" mariabackup option is used, mariabackup starts the server in bootstrap mode to generate *.cfg files for the certain innodb tables. The started instance of the server reads options from the file, pointed out in "--defaults-file" mariabackup option. If the server uses the same config file as mariabackup, and binlog is switched on in that config file, then "mariabackup --prepare --export" will create binary log files in the server's binary log directory, what can cause issues. The fix is to add "--skip-log-bin" in mysld options when the server is started to generate *.cfg files. --- extra/mariabackup/xtrabackup.cc | 6 ++++-- mysql-test/suite/mariabackup/partial.test | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 46b67705700..ec53554e2d8 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1535,7 +1535,8 @@ static int prepare_export() " --defaults-extra-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=." " --innodb --innodb-fast-shutdown=0 --loose-partition" " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu" - " --console --skip-log-error --bootstrap < " BOOTSTRAP_FILENAME IF_WIN("\"",""), + " --console --skip-log-error --skip-log-bin --bootstrap < " + BOOTSTRAP_FILENAME IF_WIN("\"",""), mariabackup_exe, orig_argv1, (my_defaults_group_suffix?my_defaults_group_suffix:""), xtrabackup_use_memory); @@ -1547,7 +1548,8 @@ static int prepare_export() " --defaults-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=." " --innodb --innodb-fast-shutdown=0 --loose-partition" " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu" - " --console --log-error= --bootstrap < " BOOTSTRAP_FILENAME IF_WIN("\"",""), + " --console --log-error= --skip-log-bin --bootstrap < " + BOOTSTRAP_FILENAME IF_WIN("\"",""), mariabackup_exe, (my_defaults_group_suffix?my_defaults_group_suffix:""), xtrabackup_use_memory); diff --git a/mysql-test/suite/mariabackup/partial.test b/mysql-test/suite/mariabackup/partial.test index 559ba155972..53388b1947f 100644 --- a/mysql-test/suite/mariabackup/partial.test +++ b/mysql-test/suite/mariabackup/partial.test @@ -12,7 +12,7 @@ CREATE TABLE t2(i int) ENGINE INNODB; echo # xtrabackup backup; -let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; +let targetdir=$MYSQLTEST_VARDIR/tmp/backup; --disable_result_log exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables=test.*1" --target-dir=$targetdir; --enable_result_log @@ -25,13 +25,27 @@ EOF write_file $targetdir/test/junk.frm; EOF +let server_cnf=$targetdir/server.cnf; +copy_file $MYSQLTEST_VARDIR/my.cnf $server_cnf; + +# Emulate server config file turnes on binary logs +perl; +my $binlog_path="$ENV{'targetdir'}/mysqld-bin"; +my $config_path=$ENV{'server_cnf'}; +open(my $fd, '>>', "$config_path"); +print $fd "\n[mysqld]\n"; +print $fd "log-bin=$binlog_path\n"; +close $fd; +EOF echo # xtrabackup prepare; --disable_result_log -exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --defaults-group-suffix=.1 --prepare --export --target-dir=$targetdir; +exec $XTRABACKUP --defaults-file=$server_cnf --defaults-group-suffix=.1 --prepare --export --target-dir=$targetdir; --enable_result_log list_files $targetdir/test *.cfg; +# There must not be binary logs created on --prepare step +list_files $targetdir/ mysqld-bin.*; let $MYSQLD_DATADIR= `select @@datadir`; ALTER TABLE t1 DISCARD TABLESPACE; From e43791d4dc8feb2c02a08ce73c0bb0e2c320018c Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Wed, 2 Oct 2019 15:23:59 +0400 Subject: [PATCH 02/11] Cleanup EITS Moved EITS allocation inside read_statistics_for_tables_if_needed(). Removed redundant is_safe argument. --- sql/sql_base.cc | 26 --------------------- sql/sql_statistics.cc | 53 +++++++++++++++++++++++++++---------------- sql/sql_statistics.h | 2 -- 3 files changed, 34 insertions(+), 47 deletions(-) diff --git a/sql/sql_base.cc b/sql/sql_base.cc index e8bdff8b48f..1c4e7600d87 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -4245,32 +4245,6 @@ open_and_process_table(THD *thd, TABLE_LIST *tables, uint *counter, uint flags, goto end; } - if (get_use_stat_tables_mode(thd) > NEVER && tables->table) - { - TABLE_SHARE *table_share= tables->table->s; - if (table_share && table_share->table_category == TABLE_CATEGORY_USER && - table_share->tmp_table == NO_TMP_TABLE) - { - if (table_share->stats_cb.stats_can_be_read || - !alloc_statistics_for_table_share(thd, table_share, FALSE)) - { - if (table_share->stats_cb.stats_can_be_read) - { - KEY *key_info= table_share->key_info; - KEY *key_info_end= key_info + table_share->keys; - KEY *table_key_info= tables->table->key_info; - for ( ; key_info < key_info_end; key_info++, table_key_info++) - table_key_info->read_stats= key_info->read_stats; - Field **field_ptr= table_share->field; - Field **table_field_ptr= tables->table->field; - for ( ; *field_ptr; field_ptr++, table_field_ptr++) - (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; - tables->table->stats_is_read= table_share->stats_cb.stats_is_read; - } - } - } - } - process_view_routines: /* Again we may need cache all routines used by this view and add diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 52b3811e60d..e18cec589be 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2216,8 +2216,6 @@ inline bool statistics_for_command_is_needed(THD *thd) thd Thread handler @param table_share Table share for which the memory for statistical data is allocated - @param - is_safe TRUE <-> at any time only one thread can perform the function @note The function allocates the memory for the statistical data on a table in the @@ -2226,8 +2224,6 @@ inline bool statistics_for_command_is_needed(THD *thd) mysql.index_stats. The memory is allocated for the statistics on the table, on the tables's columns, and on the table's indexes. The memory is allocated in the table_share's mem_root. - If the parameter is_safe is TRUE then it is guaranteed that at any given time - only one thread is executed the code of the function. @retval 0 If the memory for all statistical data has been successfully allocated @@ -2246,16 +2242,10 @@ inline bool statistics_for_command_is_needed(THD *thd) Here the second and the third threads try to allocate the memory for statistical data at the same time. The precautions are taken to guarantee the correctness of the allocation. - - @note - Currently the function always is called with the parameter is_safe set - to FALSE. */ -int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, - bool is_safe) +static int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share) { - Field **field_ptr; KEY *key_info, *end; TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb; @@ -2268,13 +2258,11 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, if (!statistics_for_command_is_needed(thd)) DBUG_RETURN(1); - if (!is_safe) - mysql_mutex_lock(&table_share->LOCK_share); + mysql_mutex_lock(&table_share->LOCK_share); if (stats_cb->stats_can_be_read) { - if (!is_safe) - mysql_mutex_unlock(&table_share->LOCK_share); + mysql_mutex_unlock(&table_share->LOCK_share); DBUG_RETURN(0); } @@ -2285,8 +2273,7 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, sizeof(Table_statistics)); if (!table_stats) { - if (!is_safe) - mysql_mutex_unlock(&table_share->LOCK_share); + mysql_mutex_unlock(&table_share->LOCK_share); DBUG_RETURN(1); } memset(table_stats, 0, sizeof(Table_statistics)); @@ -2358,8 +2345,7 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, if (column_stats && index_stats && idx_avg_frequency) stats_cb->stats_can_be_read= TRUE; - if (!is_safe) - mysql_mutex_unlock(&table_share->LOCK_share); + mysql_mutex_unlock(&table_share->LOCK_share); DBUG_RETURN(0); } @@ -3264,6 +3250,35 @@ int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) DBUG_ENTER("read_statistics_for_tables_if_needed"); + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + if (get_use_stat_tables_mode(thd) > NEVER && tl->table) + { + TABLE_SHARE *table_share= tl->table->s; + if (table_share && table_share->table_category == TABLE_CATEGORY_USER && + table_share->tmp_table == NO_TMP_TABLE) + { + if (table_share->stats_cb.stats_can_be_read || + !alloc_statistics_for_table_share(thd, table_share)) + { + if (table_share->stats_cb.stats_can_be_read) + { + KEY *key_info= table_share->key_info; + KEY *key_info_end= key_info + table_share->keys; + KEY *table_key_info= tl->table->key_info; + for ( ; key_info < key_info_end; key_info++, table_key_info++) + table_key_info->read_stats= key_info->read_stats; + Field **field_ptr= table_share->field; + Field **table_field_ptr= tl->table->field; + for ( ; *field_ptr; field_ptr++, table_field_ptr++) + (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; + tl->table->stats_is_read= table_share->stats_cb.stats_is_read; + } + } + } + } + } + DEBUG_SYNC(thd, "statistics_read_start"); if (!statistics_for_tables_is_needed(thd, tables)) diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 16325220e84..968f77cd2ca 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -90,8 +90,6 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); int collect_statistics_for_table(THD *thd, TABLE *table); -int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share, - bool is_safe); void delete_stat_values_for_table_share(TABLE_SHARE *table_share); int alloc_statistics_for_table(THD *thd, TABLE *table); int update_statistics_for_table(THD *thd, TABLE *table); From 5e65c67cfcc79c48c22798babee7555b04f40d18 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Thu, 3 Oct 2019 17:45:36 +0300 Subject: [PATCH 03/11] fix clang warning --- storage/connect/inihandl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/connect/inihandl.cpp b/storage/connect/inihandl.cpp index 8e79aeac7ef..dacab3c485c 100644 --- a/storage/connect/inihandl.cpp +++ b/storage/connect/inihandl.cpp @@ -194,7 +194,7 @@ static void PROFILE_Save( FILE *file, PROFILESECTION *section ) } for (key = section->key; key; key = key->next) - if (key->name && key->name[0]) { + if (key->name[0]) { fprintf(file, "%s", SVP(key->name)); if (key->value) From 13535b2713aa78636e7c93059c30a298a72b627f Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Mon, 30 Sep 2019 09:08:17 +0200 Subject: [PATCH 04/11] Fix problem with warnings of new compilers. --- libmariadb | 2 +- sql-common/client_plugin.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/libmariadb b/libmariadb index 544b6f1d12f..c6403c4c847 160000 --- a/libmariadb +++ b/libmariadb @@ -1 +1 @@ -Subproject commit 544b6f1d12f0e5b2a141129075ff2d64feb0e4c9 +Subproject commit c6403c4c847d94ed6b40d3fd128e729271867e75 diff --git a/sql-common/client_plugin.c b/sql-common/client_plugin.c index 52e0ae03ee1..a4de02b9f41 100644 --- a/sql-common/client_plugin.c +++ b/sql-common/client_plugin.c @@ -28,10 +28,8 @@ There is no reference counting and no unloading either. */ -#if _MSC_VER /* Silence warnings about variable 'unused' being used. */ #define FORCE_INIT_OF_VARS 1 -#endif #include #include "mysql.h" From adefaeffcce7c4ae0844f72dd920603b35285d40 Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Wed, 2 Oct 2019 16:04:52 +0400 Subject: [PATCH 05/11] MDEV-19536 - Server crash or ASAN heap-use-after-free in is_temporary_table / read_statistics_for_tables_if_needed Regression after 279a907, read_statistics_for_tables_if_needed() was called after open_normal_and_derived_tables() failure. Fixed by moving read_statistics_for_tables() call to a branch of get_schema_stat_record() where result of open_normal_and_derived_tables() is checked. Removed THD::force_read_stats, added read_statistics_for_tables() instead. Simplified away statistics_for_command_is_needed(). --- sql/sql_class.cc | 1 - sql/sql_class.h | 3 -- sql/sql_show.cc | 5 +-- sql/sql_statistics.cc | 83 ++++++++++++++----------------------------- sql/sql_statistics.h | 1 + 5 files changed, 29 insertions(+), 64 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1a3ca54abf8..ab105c67507 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -986,7 +986,6 @@ THD::THD(bool is_wsrep_applier) memset(&invoker_host, 0, sizeof(invoker_host)); prepare_derived_at_open= FALSE; create_tmp_table_for_derived= FALSE; - force_read_stats= FALSE; save_prep_leaf_list= FALSE; /* Restore THR_THD */ set_current_thd(old_THR_THD); diff --git a/sql/sql_class.h b/sql/sql_class.h index 8a4d8ff06a3..6c622648ac7 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2200,9 +2200,6 @@ public: */ bool create_tmp_table_for_derived; - /* The flag to force reading statistics from EITS tables */ - bool force_read_stats; - bool save_prep_leaf_list; /* container for handler's private per-connection data */ diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 71bfc644441..c154f5da472 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -4272,7 +4272,6 @@ fill_schema_table_by_open(THD *thd, bool is_show_fields_or_keys, SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()'. */ - thd->force_read_stats= get_schema_table_idx(schema_table) == SCH_STATISTICS; lex->sql_command= SQLCOM_SHOW_FIELDS; result= (open_temporary_tables(thd, table_list) || open_normal_and_derived_tables(thd, table_list, @@ -4287,9 +4286,6 @@ fill_schema_table_by_open(THD *thd, bool is_show_fields_or_keys, */ lex->sql_command= old_lex->sql_command; - (void) read_statistics_for_tables_if_needed(thd, table_list); - thd->force_read_stats= false; - DEBUG_SYNC(thd, "after_open_table_ignore_flush"); /* @@ -6165,6 +6161,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, KEY *key_info=show_table->s->key_info; if (show_table->file) { + (void) read_statistics_for_tables(thd, tables); show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_TIME); diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index e18cec589be..4a1ed9cde4e 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2160,54 +2160,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) } -/** - @brief - Check whether any persistent statistics for the processed command is needed - - @param - thd The thread handle - - @details - The function checks whether any persitent statistics for the processed - command is needed to be read. - - @retval - TRUE statistics is needed to be read - @retval - FALSE Otherwise -*/ - -static -inline bool statistics_for_command_is_needed(THD *thd) -{ - if (thd->bootstrap || thd->variables.use_stat_tables == NEVER) - return FALSE; - - if (thd->force_read_stats) - return TRUE; - - switch(thd->lex->sql_command) { - case SQLCOM_SELECT: - case SQLCOM_INSERT: - case SQLCOM_INSERT_SELECT: - case SQLCOM_UPDATE: - case SQLCOM_UPDATE_MULTI: - case SQLCOM_DELETE: - case SQLCOM_DELETE_MULTI: - case SQLCOM_REPLACE: - case SQLCOM_REPLACE_SELECT: - case SQLCOM_CREATE_TABLE: - case SQLCOM_SET_OPTION: - case SQLCOM_DO: - break; - default: - return FALSE; - } - - return TRUE; -} - - /** @brief Allocate memory for the statistical data used by a table share @@ -2255,9 +2207,6 @@ static int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share) DEBUG_SYNC(thd, "statistics_mem_alloc_start1"); DEBUG_SYNC(thd, "statistics_mem_alloc_start2"); - if (!statistics_for_command_is_needed(thd)) - DBUG_RETURN(1); - mysql_mutex_lock(&table_share->LOCK_share); if (stats_cb->stats_can_be_read) @@ -3110,9 +3059,6 @@ bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables) { if (!tables) return FALSE; - - if (!statistics_for_command_is_needed(thd)) - return FALSE; /* Do not read statistics for any query that explicity involves @@ -3244,15 +3190,40 @@ int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) */ int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) +{ + switch (thd->lex->sql_command) { + case SQLCOM_SELECT: + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_CREATE_TABLE: + case SQLCOM_SET_OPTION: + case SQLCOM_DO: + return read_statistics_for_tables(thd, tables); + default: + return 0; + } +} + + +int read_statistics_for_tables(THD *thd, TABLE_LIST *tables) { TABLE_LIST stat_tables[STATISTICS_TABLES]; Open_tables_backup open_tables_backup; - DBUG_ENTER("read_statistics_for_tables_if_needed"); + DBUG_ENTER("read_statistics_for_tables"); + + if (thd->bootstrap || thd->variables.use_stat_tables == NEVER) + DBUG_RETURN(0); for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) { - if (get_use_stat_tables_mode(thd) > NEVER && tl->table) + if (tl->table) { TABLE_SHARE *table_share= tl->table->s; if (table_share && table_share->table_category == TABLE_CATEGORY_USER && diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 968f77cd2ca..71d727eab07 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -89,6 +89,7 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) } int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); +int read_statistics_for_tables(THD *thd, TABLE_LIST *tables); int collect_statistics_for_table(THD *thd, TABLE *table); void delete_stat_values_for_table_share(TABLE_SHARE *table_share); int alloc_statistics_for_table(THD *thd, TABLE *table); From db9a4d928dc8e81ce449b54ef0bf02c248d931d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 7 Oct 2019 17:18:10 +0300 Subject: [PATCH 06/11] Remove orphan declaration buf_flush_wait_batch_end_wait_only() The function was declared but not defined in commit 9d6d1902e091c868bb288e0ccf9f975ccb474db9 --- storage/innobase/include/buf0flu.h | 11 ----------- storage/xtradb/include/buf0flu.h | 11 ----------- 2 files changed, 22 deletions(-) diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index b5cfcb125e6..4cc26be3918 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -141,17 +141,6 @@ buf_flush_wait_batch_end( buf_pool_t* buf_pool, /*!< in: buffer pool instance */ enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -/******************************************************************//** -Waits until a flush batch of the given type ends. This is called by -a thread that only wants to wait for a flush to end but doesn't do -any flushing itself. */ -UNIV_INTERN -void -buf_flush_wait_batch_end_wait_only( -/*===============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - enum buf_flush type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ /********************************************************************//** This function should be called at a mini-transaction commit, if a page was modified in it. Puts the block to the list of modified blocks, if it not diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index ed01b627bf4..83d4dae7f1c 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -143,17 +143,6 @@ buf_flush_wait_batch_end( buf_pool_t* buf_pool, /*!< in: buffer pool instance */ enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -/******************************************************************//** -Waits until a flush batch of the given type ends. This is called by -a thread that only wants to wait for a flush to end but doesn't do -any flushing itself. */ -UNIV_INTERN -void -buf_flush_wait_batch_end_wait_only( -/*===============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - enum buf_flush type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ /********************************************************************//** This function should be called at a mini-transaction commit, if a page was modified in it. Puts the block to the list of modified blocks, if it not From 27664ef29d97be2973982aedcc0c8c90d18b20a4 Mon Sep 17 00:00:00 2001 From: Sachin Setiya Date: Thu, 26 Sep 2019 15:05:55 +0530 Subject: [PATCH 07/11] MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one. Analysis Mysqlbinlog output for encrypted binary log #Q> insert into tab1 values (3,'row 003') #190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19 # at 940 #190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F Here we can see Table_map_log_event ends at 980 but Next event starts at 940. And the reason for that is we do not send START_ENCRYPTION_EVENT to the slave Solution:- Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog), So that mysqlbinlog can update its log_pos. Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not have so We only update slave master pos when master actually have the FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT. Also added the test case when new server reads the data from old server which does not send START_ENCRYPTION_EVENT to slave. Master Slave Upgrade Scenario. When Slave is updated first, Slave will have extra logic of handling START_ENCRYPTION_EVENT But master willnot be sending START_ENCRYPTION_EVENT. So there will be no issue. When Master is updated first, It will send START_ENCRYPTION_EVENT to slave , But slave will ignore this event in queue_event. --- mysql-test/std_data/binlog_before_20574.bin | Bin 0 -> 1022 bytes .../binlog_mdev_20574_old_binlog.result | 27 ++++++++ .../binlog_mdev_20574_old_binlog.test | 46 +++++++++++++ .../binlog_row_annotate.result | 9 +++ .../binlog_encryption/mysqlbinlog.result | 1 + .../suite/binlog_encryption/mysqlbinlog.test | 3 + sql/log_event.cc | 34 +++++----- sql/slave.cc | 11 +++ sql/sql_repl.cc | 63 ++++++++++++------ 9 files changed, 159 insertions(+), 35 deletions(-) create mode 100644 mysql-test/std_data/binlog_before_20574.bin create mode 100644 mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.result create mode 100644 mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.test diff --git a/mysql-test/std_data/binlog_before_20574.bin b/mysql-test/std_data/binlog_before_20574.bin new file mode 100644 index 0000000000000000000000000000000000000000..596a883dc71f71a807047ec0073578dbe60eee43 GIT binary patch literal 1022 zcmeyDl$pmQJ~@`3k%58X4-hi|8H@}p42A}JMtVjTy1t1;nTak=x+$qirRlmk`RO1v zSOLfwVGCXc4hA6x79bE}V0gm7BEwZ$*Vk|hV+bAwx{eiV?(p0s@d%Oiz&$qAN{q;@W?_F-X zKz$c?ED>B1bfIR>z1t}Q%KL4~c&cPW>fM-9_MJD$Zs-xrd@+&j&3Y>lPXnOhTB!vy zmd|7|V8fmEtJ2%pl8#Bz4Zn{%z6K=qrC`hA`J zfQc^sqCU(a3i&g^(Xb63H=>zjKXulPR2pfSC5 z%|VuB>w*n|DsLVv-k$x^?^?CR=?-Vb^A{$sTGP4etG&#NHKCWc@?~fg+FfqT$o9;Y zHn@K+5*z>mN1LA>aW$O~zTsx~7W=;FZO^VITiWwo@j9e=Pip@Mo0g}`mntj!*Qo(b zUuL+SA>sKS$1~>Zzx==d!_y^ZRmi96o CKX*0& literal 0 HcmV?d00001 diff --git a/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.result b/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.result new file mode 100644 index 00000000000..cf660297640 --- /dev/null +++ b/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +connection master; +include/rpl_stop_server.inc [server_number=1] +# Data in binlog +# CREATE TABLE t1 (a INT); +# INSERT INTO t1 VALUES (1),(2),(3); +# REPLACE INTO t1 VALUES (4); +include/rpl_start_server.inc [server_number=1] +connection slave; +RESET SLAVE; +RESET MASTER; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4; +include/start_slave.inc +DESC t1; +Field Type Null Key Default Extra +a int(11) YES NULL +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.test b/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.test new file mode 100644 index 00000000000..417df631878 --- /dev/null +++ b/mysql-test/suite/binlog_encryption/binlog_mdev_20574_old_binlog.test @@ -0,0 +1,46 @@ +# MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one. +# Test replicating off old master. +# Test case Desc:- When new server reads the data from old server binlog which +# does not send START_ENCRYPTION_EVENT to slave. +# We simulate old master by copying in pre-generated binlog files from earlier +# server versions with encrypted binlog. +--source include/have_binlog_format_row.inc +--source include/master-slave.inc +--source include/have_innodb.inc + +--connection slave +--source include/stop_slave.inc + +--connection master +--let $datadir= `SELECT @@datadir` + +--let $rpl_server_number= 1 +--source include/rpl_stop_server.inc + +--remove_file $datadir/master-bin.000001 +--remove_file $datadir/master-bin.state +--echo # Data in binlog +--echo # CREATE TABLE t1 (a INT); +--echo # INSERT INTO t1 VALUES (1),(2),(3); +--echo # REPLACE INTO t1 VALUES (4); + +--copy_file $MYSQL_TEST_DIR/std_data/binlog_before_20574.bin $datadir/master-bin.000001 + +--let $rpl_server_number= 1 +--source include/rpl_start_server.inc + +--source include/wait_until_connected_again.inc +--save_master_pos + +--connection slave +RESET SLAVE; +RESET MASTER; +--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 +eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4; +--source include/start_slave.inc +--sync_with_master +DESC t1; +SELECT * FROM t1 ORDER BY a; + +DROP TABLE t1; +--source include/rpl_end.inc diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result index 88c690a8bb7..9b843dc8a6b 100644 --- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result +++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result @@ -104,6 +104,9 @@ DELIMITER /*!*/; #010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup ROLLBACK/*!*/; # at # +#010909 4:46:40 server id # end_log_pos # Ignorable +# Ignorable event type 164 (Start_encryption) +# at # #010909 4:46:40 server id # end_log_pos # Gtid list [] # at # #010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001 @@ -336,6 +339,9 @@ DELIMITER /*!*/; #010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup ROLLBACK/*!*/; # at # +#010909 4:46:40 server id # end_log_pos # Ignorable +# Ignorable event type 164 (Start_encryption) +# at # #010909 4:46:40 server id # end_log_pos # Gtid list [] # at # #010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001 @@ -495,6 +501,9 @@ DELIMITER /*!*/; #010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup ROLLBACK/*!*/; # at # +#010909 4:46:40 server id # end_log_pos # Ignorable +# Ignorable event type 164 (Start_encryption) +# at # #010909 4:46:40 server id # end_log_pos # Gtid list [] # at # #010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001 diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result index 71758f7d6e7..e97e0569571 100644 --- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result +++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result @@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3); REPLACE INTO t1 VALUES (4); DROP TABLE t1; FLUSH LOGS; +FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test index b80388aaa45..108dbd8782f 100644 --- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test +++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test @@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql; exec $MYSQL_BINLOG $local > $outfile; exec $MYSQL_BINLOG $local --force-read >> $outfile; exec $MYSQL_BINLOG $remote >> $outfile; +--let SEARCH_FILE= $outfile +--let SEARCH_PATTERN= Ignorable event type 164.* +--source include/search_pattern_in_file.inc remove_file $outfile; diff --git a/sql/log_event.cc b/sql/log_event.cc index 0e0d69b515c..66f1d6bc790 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, alg != BINLOG_CHECKSUM_ALG_OFF)) event_len= event_len - BINLOG_CHECKSUM_LEN; + /* + Create an object of Ignorable_log_event for unrecognized sub-class. + So that SLAVE SQL THREAD will only update the position and continue. + We should look for this flag first instead of judging by event_type + Any event can be Ignorable_log_event if it has this flag on. + look into @note of Ignorable_log_event + */ + if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F) + { + ev= new Ignorable_log_event(buf, fdle, + get_type_str((Log_event_type) event_type)); + goto exit; + } switch(event_type) { case QUERY_EVENT: ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT); @@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, ev = new Start_encryption_log_event(buf, event_len, fdle); break; default: - /* - Create an object of Ignorable_log_event for unrecognized sub-class. - So that SLAVE SQL THREAD will only update the position and continue. - */ - if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F) - { - ev= new Ignorable_log_event(buf, fdle, - get_type_str((Log_event_type) event_type)); - } - else - { - DBUG_PRINT("error",("Unknown event code: %d", - (uchar) buf[EVENT_TYPE_OFFSET])); - ev= NULL; - break; - } + DBUG_PRINT("error",("Unknown event code: %d", + (uchar) buf[EVENT_TYPE_OFFSET])); + ev= NULL; + break; } } +exit: if (ev) { diff --git a/sql/slave.cc b/sql/slave.cc index 88a80029bba..1bc21f8895b 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) mi->last_queued_gtid.seq_no == 1000) goto skip_relay_logging; }); + goto default_action; #endif + case START_ENCRYPTION_EVENT: + if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F) + { + /* + If the event was not requested by the slave (the slave did not ask for + it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos + */ + inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0; + break; + } /* fall through */ default: default_action: diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index edff750a9a3..8a6bea34fb4 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -399,16 +399,27 @@ static int send_file(THD *thd) /** Internal to mysql_binlog_send() routine that recalculates checksum for - a FD event (asserted) that needs additional arranment prior sending to slave. + 1. FD event (asserted) that needs additional arranment prior sending to slave. + 2. Start_encryption_log_event whose Ignored flag is set +TODO DBUG_ASSERT can be removed if this function is used for more general cases */ -inline void fix_checksum(String *packet, ulong ev_offset) + +inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet, + ulong ev_offset) { + if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF || + checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF) + return; /* recalculate the crc for this event */ uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET); ha_checksum crc; - DBUG_ASSERT(data_len == + DBUG_ASSERT((data_len == LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN + - BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN); + BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) || + (data_len == + LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH + + BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH + + BINLOG_CHECKSUM_LEN)); crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len - BINLOG_CHECKSUM_LEN); int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc); @@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, THD *thd= info->thd; String *packet= info->packet; Log_event_type event_type; + bool initial_log_pos= info->clear_initial_log_pos; DBUG_ENTER("send_format_descriptor_event"); /** @@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, (*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F; - if (info->clear_initial_log_pos) + if (initial_log_pos) { info->clear_initial_log_pos= false; /* @@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, ST_CREATED_OFFSET+ev_offset, (ulong) 0); /* fix the checksum due to latest changes in header */ - if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF && - info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) - fix_checksum(packet, ev_offset); + fix_checksum(info->current_checksum_alg, packet, ev_offset); } else if (info->using_gtid_state) { @@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, { int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+ ST_CREATED_OFFSET+ev_offset, (ulong) 0); - if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF && - info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) - fix_checksum(packet, ev_offset); + fix_checksum(info->current_checksum_alg, packet, ev_offset); } } @@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, } /* - Read the following Start_encryption_log_event but don't send it to slave. - Slave doesn't need to know whether master's binlog is encrypted, - and if it'll want to encrypt its logs, it should generate its own - random nonce, not use the one from the master. + Read the following Start_encryption_log_event and send it to slave as + Ignorable_log_event. Although Slave doesn't need to know whether master's + binlog is encrypted but it needs to update slave log pos (for mysqlbinlog). + + If slave want to encrypt its logs, it should generate its own + random nonce, it should not use the one from the master. */ - packet->length(0); + /* reset transmit packet for the event read from binary log file */ + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + DBUG_RETURN(1); info->last_pos= linfo->pos; error= Log_event::read_log_event(log, packet, info->fdev, opt_master_verify_checksum @@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, DBUG_RETURN(1); } - event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]); + event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]); if (event_type == START_ENCRYPTION_EVENT) { Start_encryption_log_event *sele= (Start_encryption_log_event *) - Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg, - info->fdev, BINLOG_CHECKSUM_ALG_OFF); + Log_event::read_log_event(packet->ptr() + ev_offset, packet->length() + - ev_offset, &info->errmsg, info->fdev, + BINLOG_CHECKSUM_ALG_OFF); if (!sele) { info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; @@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, delete sele; DBUG_RETURN(1); } + /* Make it Ignorable_log_event and send it */ + (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F; + if (initial_log_pos) + int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0); + /* fix the checksum due to latest changes in header */ + fix_checksum(info->current_checksum_alg, packet, ev_offset); + if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length())) + { + info->errmsg= "Failed on my_net_write()"; + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(1); + } delete sele; } else if (start_pos == BIN_LOG_HEADER_SIZE) From d480d28f4f389d34248e3889cc059238a803008e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 8 Oct 2019 18:18:48 +0300 Subject: [PATCH 08/11] Add page_has_prev(), page_has_next(), page_has_siblings() Until now, InnoDB inefficiently compared the aligned fields FIL_PAGE_PREV, FIL_PAGE_NEXT to the byte-order-agnostic value FIL_NULL. This is a backport of 32170f8c6d55c497ae7a791997e17ff7c992b86f from MariaDB Server 10.3. --- storage/innobase/btr/btr0btr.cc | 12 +++++------ storage/innobase/btr/btr0cur.cc | 28 ++++++++++---------------- storage/innobase/btr/btr0defragment.cc | 2 +- storage/innobase/dict/dict0stats.cc | 4 ++-- storage/innobase/gis/gis0rtree.cc | 2 +- storage/innobase/gis/gis0sea.cc | 2 +- storage/innobase/ibuf/ibuf0ibuf.cc | 3 +-- storage/innobase/include/btr0cur.ic | 15 ++++++-------- storage/innobase/include/btr0pcur.ic | 16 ++++----------- storage/innobase/include/page0page.h | 18 +++++++++++++++++ storage/innobase/page/page0cur.cc | 2 +- storage/innobase/page/page0page.cc | 3 +-- storage/innobase/page/page0zip.cc | 11 ++++------ 13 files changed, 56 insertions(+), 62 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index f2b7b0d3f73..1bd49fa048e 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1987,7 +1987,7 @@ btr_root_raise_and_insert( they should already have been set. The previous node field must be FIL_NULL if root_page_zip != NULL, because the REC_INFO_MIN_REC_FLAG (of the first user record) will be - set if and only if btr_page_get_prev() == FIL_NULL. */ + set if and only if !page_has_prev(). */ btr_page_set_next(root, root_page_zip, FIL_NULL, mtr); btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr); @@ -3361,8 +3361,7 @@ btr_lift_page_up( bool lift_father_up; buf_block_t* block_orig = block; - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); + ut_ad(!page_has_siblings(page)); ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table)); page_level = btr_page_get_level(page, mtr); @@ -3429,8 +3428,7 @@ btr_lift_page_up( page = buf_block_get_frame(block); page_level = btr_page_get_level(page, mtr); - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); + ut_ad(!page_has_siblings(page)); ut_ad(mtr_is_block_fix( mtr, block, MTR_MEMO_PAGE_X_FIX, index->table)); @@ -5077,13 +5075,13 @@ loop: if (left_page_no == FIL_NULL) { ut_a(node_ptr == page_rec_get_next( page_get_infimum_rec(father_page))); - ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL); + ut_a(!page_has_prev(father_page)); } if (right_page_no == FIL_NULL) { ut_a(node_ptr == page_rec_get_prev( page_get_supremum_rec(father_page))); - ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); + ut_a(!page_has_next(father_page)); } else { const rec_t* right_node_ptr; diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index b23cacac227..d0d453ed828 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -595,10 +595,10 @@ btr_cur_will_modify_tree( /* is first, 2nd or last record */ if (page_rec_is_first(rec, page) - || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL + || (page_has_next(page) && (page_rec_is_last(rec, page) || page_rec_is_second_last(rec, page))) - || (mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL + || (page_has_prev(page) && page_rec_is_second(rec, page))) { return(true); } @@ -680,13 +680,10 @@ btr_cur_need_opposite_intention( { switch (lock_intention) { case BTR_INTENTION_DELETE: - return((mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL - && page_rec_is_first(rec, page)) - || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL - && page_rec_is_last(rec, page))); + return (page_has_prev(page) && page_rec_is_first(rec, page)) || + (page_has_next(page) && page_rec_is_last(rec, page)); case BTR_INTENTION_INSERT: - return(mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL - && page_rec_is_last(rec, page)); + return page_has_next(page) && page_rec_is_last(rec, page); case BTR_INTENTION_BOTH: return(false); } @@ -1898,7 +1895,7 @@ need_opposite_intention: MTR_MEMO_PAGE_S_FIX | MTR_MEMO_PAGE_X_FIX)); - if (btr_page_get_prev(page, mtr) != FIL_NULL + if (page_has_prev(page) && page_rec_is_first(node_ptr, page)) { if (leftmost_from_level == 0) { @@ -2015,7 +2012,7 @@ need_opposite_intention: } else if (!dict_index_is_spatial(index) && latch_mode == BTR_MODIFY_TREE && lock_intention == BTR_INTENTION_INSERT - && mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL + && page_has_next(page) && page_rec_is_last(page_cur_get_rec(page_cursor), page)) { /* btr_insert_into_right_sibling() might cause @@ -5302,7 +5299,7 @@ btr_cur_pessimistic_delete( } else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) { rec_t* next_rec = page_rec_get_next(rec); - if (btr_page_get_prev(page, mtr) == FIL_NULL) { + if (!page_has_prev(page)) { /* If we delete the leftmost node pointer on a non-leaf level, we must mark the new leftmost node @@ -6325,7 +6322,8 @@ btr_estimate_number_of_different_key_vals( } } - if (n_cols == dict_index_get_n_unique_in_tree(index)) { + if (n_cols == dict_index_get_n_unique_in_tree(index) + && page_has_siblings(page)) { /* If there is more than one leaf page in the tree, we add one because we know that the first record @@ -6336,11 +6334,7 @@ btr_estimate_number_of_different_key_vals( algorithm grossly underestimated the number of rows in the table. */ - if (btr_page_get_prev(page, &mtr) != FIL_NULL - || btr_page_get_next(page, &mtr) != FIL_NULL) { - - n_diff[n_cols - 1]++; - } + n_diff[n_cols - 1]++; } mtr_commit(&mtr); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 6995c6f0998..bcabfd3a3bf 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -602,7 +602,7 @@ btr_defragment_n_pages( } if (n_pages == 1) { - if (btr_page_get_prev(first_page, mtr) == FIL_NULL) { + if (!page_has_prev(first_page)) { /* last page in the index */ if (dict_index_get_page(index) == page_get_page_no(first_page)) diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 4db4687e600..f49fb87e4ea 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1076,7 +1076,7 @@ dict_stats_analyze_index_level( ut_a(btr_page_get_level(page, mtr) == level); /* there should not be any pages on the left */ - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_a(!page_has_prev(page)); /* check whether the first record on the leftmost page is marked as such, if we are on a non-leaf level */ @@ -1689,7 +1689,7 @@ dict_stats_analyze_index_for_n_prefix( ut_a(btr_page_get_level(page, mtr) == n_diff_data->level); /* there should not be any pages on the left */ - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_a(!page_has_prev(page)); /* check whether the first record on the leftmost page is marked as such; we are on a non-leaf level */ diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index 2965a19a5e0..dc9e34903c2 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -1063,7 +1063,7 @@ func_start: page_no = block->page.id.page_no(); - if (btr_page_get_prev(page, mtr) == FIL_NULL && !page_is_leaf(page)) { + if (!page_has_prev(page) && !page_is_leaf(page)) { first_rec = page_rec_get_next( page_get_infimum_rec(buf_block_get_frame(block))); } diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index f1e4b82fd36..0a0a542689a 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1713,7 +1713,7 @@ rtr_cur_search_with_match( first page as much as possible, as there will be problem when update MIN_REC rec in compress table */ if (buf_block_get_page_zip(block) - && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL + && !page_has_prev(page) && page_get_n_recs(page) >= 2) { rec = page_rec_get_next_const(rec); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 5581d1d1093..34cba10c6b5 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -3254,8 +3254,7 @@ ibuf_get_entry_counter_func( return(ULINT_UNDEFINED); } else if (!page_rec_is_infimum(rec)) { return(ibuf_get_entry_counter_low(mtr, rec, space, page_no)); - } else if (only_leaf - || fil_page_get_prev(page_align(rec)) == FIL_NULL) { + } else if (only_leaf || !page_has_prev(page_align(rec))) { /* The parent node pointer did not contain the searched for (space, page_no), which means that the search ended on the correct page regardless of the diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic index 1decfa09e5b..7cf6c5982fa 100644 --- a/storage/innobase/include/btr0cur.ic +++ b/storage/innobase/include/btr0cur.ic @@ -138,10 +138,9 @@ btr_cur_compress_recommendation( LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2U, return(FALSE)); - if ((page_get_data_size(page) - < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL))) { + if (page_get_data_size(page) + < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index) + || !page_has_siblings(page)) { /* The page fillfactor has dropped below a predefined minimum value OR the level in the B-tree contains just @@ -174,11 +173,9 @@ btr_cur_can_delete_without_compress( page = btr_cur_get_page(cursor); - if ((page_get_data_size(page) - rec_size - < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL)) - || (page_get_n_recs(page) < 2)) { + if (page_get_data_size(page) - rec_size + < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index) + || !page_has_siblings(page) || page_get_n_recs(page) < 2) { /* The page fillfactor will drop below a predefined minimum value, OR the level in the B-tree contains just diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic index 51ebcfbb2ee..8b0da666250 100644 --- a/storage/innobase/include/btr0pcur.ic +++ b/storage/innobase/include/btr0pcur.ic @@ -219,12 +219,8 @@ btr_pcur_is_before_first_in_tree( ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); + return !page_has_prev(btr_pcur_get_page(cursor)) + && page_cur_is_before_first(btr_pcur_get_page_cur(cursor)); } /*********************************************************//** @@ -240,12 +236,8 @@ btr_pcur_is_after_last_in_tree( ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); + return !page_has_next(btr_pcur_get_page(cursor)) + && page_cur_is_after_last(btr_pcur_get_page_cur(cursor)); } /*********************************************************//** diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index ee261b55d16..c50e72a6542 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -697,6 +697,24 @@ inline bool page_has_siblings(const page_t* page) != ~uint64_t(0); } +/** Determine whether a page has a predecessor. +@param[in] page page frame +@return true if the page has a predecessor */ +inline bool page_has_prev(const page_t* page) +{ + return *reinterpret_cast(page + FIL_PAGE_PREV) + != FIL_NULL; +} + +/** Determine whether a page has a successor. +@param[in] page page frame +@return true if the page has a successor */ +inline bool page_has_next(const page_t* page) +{ + return *reinterpret_cast(page + FIL_PAGE_NEXT) + != FIL_NULL; +} + /************************************************************//** Gets the pointer to the next record on the page. @return pointer to next record */ diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 92ff3e104ba..4071db7b4d9 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -521,7 +521,7 @@ up_rec_match: ulint rec_info = rec_get_info_bits(mid_rec, rec_offs_comp(offsets)); ut_ad(rec_info & REC_INFO_MIN_REC_FLAG); - ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL); + ut_ad(!page_has_prev(page)); mtr_commit(&mtr); #endif diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 12a0f44011d..f950af02e31 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -2764,8 +2764,7 @@ page_delete_rec( if (!rec_offs_any_extern(offsets) && ((page_get_data_size(page) - rec_offs_size(offsets) < BTR_CUR_PAGE_COMPRESS_LIMIT(index)) - || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL - && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL) + || !page_has_siblings(page) || (page_get_n_recs(page) < 2))) { ulint root_page_no = dict_index_get_page(index); diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index e90c5f67f8e..9e3e58efa3a 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -670,8 +670,7 @@ page_zip_dir_encode( status = REC_STATUS_ORDINARY; } else { status = REC_STATUS_NODE_PTR; - if (UNIV_UNLIKELY - (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) { + if (UNIV_UNLIKELY(!page_has_prev(page))) { min_mark = REC_INFO_MIN_REC_FLAG; } } @@ -3187,8 +3186,7 @@ zlib_error: goto err_exit; } - info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL - ? REC_INFO_MIN_REC_FLAG : 0; + info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG; if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page, info_bits))) { @@ -4826,9 +4824,8 @@ page_zip_copy_recs( + page_zip->m_end < page_zip_get_size(page_zip)); if (!page_is_leaf(src) - && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL) - && UNIV_LIKELY(mach_read_from_4(page - + FIL_PAGE_PREV) != FIL_NULL)) { + && UNIV_UNLIKELY(!page_has_prev(src)) + && UNIV_LIKELY(page_has_prev(page))) { /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */ ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE); From 99dc40d6ac2234fa4c990665cc1914c1925cd641 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Mon, 30 Sep 2019 20:58:50 +0300 Subject: [PATCH 09/11] MDEV-19783 Random crashes and corrupt data in INSTANT-added columns The bug affects MariaDB Server 10.3 or later, but it makes sense to improve CHECK TABLE in earlier versions already. page_validate(): Check REC_INFO_MIN_REC_FLAG in the records. This allows CHECK TABLE to catch more bugs. --- storage/innobase/page/page0page.cc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index f950af02e31..bda4693648b 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -2392,6 +2392,7 @@ page_validate( ulint data_size; const rec_t* rec; const rec_t* old_rec = NULL; + const rec_t* first_rec = NULL; ulint offs; ulint n_slots; ibool ret = FALSE; @@ -2488,6 +2489,21 @@ page_validate( goto func_exit; } + if (rec == first_rec) { + if ((rec_get_info_bits(rec, page_is_comp(page)) + & REC_INFO_MIN_REC_FLAG) + && page_is_leaf(page)) { + ib::error() << "REC_INFO_MIN_REC_FLAG " + "is set in a leaf-page record"; + ret = false; + } + } else if (rec_get_info_bits(rec, page_is_comp(page)) + & REC_INFO_MIN_REC_FLAG) { + ib::error() << "REC_INFO_MIN_REC_FLAG record is not " + "first in page"; + ret = false; + } + /* Check that the records are in the ascending order */ if (count >= PAGE_HEAP_NO_USER_LOW && !page_rec_is_supremum(rec)) { @@ -2599,6 +2615,11 @@ page_validate( old_rec = rec; rec = page_rec_get_next_const(rec); + if (page_rec_is_infimum(old_rec) + && page_rec_is_user_rec(rec)) { + first_rec = rec; + } + /* set old_offsets to offsets; recycle offsets */ { ulint* offs = old_offsets; From ed0793e096a17955c5a03844b248bcf8303dd335 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Wed, 2 Oct 2019 22:47:45 +0300 Subject: [PATCH 10/11] MDEV-19783: Add more REC_INFO_MIN_REC_FLAG checks btr_cur_pessimistic_delete(): code changed in a way that allows to put more REC_INFO_MIN_REC_FLAG assertions inside btr_set_min_rec_mark(). Without that change tests innodb.innodb-table-online, innodb.temp_table_savepoint and innodb_zip.prefix_index_liftedlimit fail. Removed basically duplicated page_zip_validate() calls which fails because of temporary(!) invariant violation. That fixed innodb_zip.wl5522_debug_zip and innodb_zip.prefix_index_liftedlimit --- storage/innobase/btr/btr0btr.cc | 21 +++++++++------------ storage/innobase/btr/btr0cur.cc | 26 ++++++++++++++++---------- storage/innobase/include/btr0btr.h | 12 ++++-------- storage/innobase/include/page0page.ic | 4 ++++ storage/innobase/page/page0cur.cc | 4 ---- storage/innobase/page/page0zip.cc | 4 ---- 6 files changed, 33 insertions(+), 38 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 1bd49fa048e..473e4248656 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -3309,25 +3309,22 @@ btr_parse_set_min_rec_mark( return(ptr + 2); } -/****************************************************************//** -Sets a record as the predefined minimum record. */ -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /*!< in: record */ - mtr_t* mtr) /*!< in: mtr */ +/** Sets a record as the predefined minimum record. */ +void btr_set_min_rec_mark(rec_t* rec, mtr_t* mtr) { - ulint info_bits; + const bool comp = page_rec_is_comp(rec); - if (page_rec_is_comp(rec)) { - info_bits = rec_get_info_bits(rec, TRUE); + ut_ad(rec == page_rec_get_next_const(page_get_infimum_rec( + page_align(rec)))); + ut_ad(!(rec_get_info_bits(page_rec_get_next(rec), comp) + & REC_INFO_MIN_REC_FLAG)); + size_t info_bits = rec_get_info_bits(rec, comp); + if (comp) { rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG); btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr); } else { - info_bits = rec_get_info_bits(rec, FALSE); - rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG); btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index d0d453ed828..3060865bc8e 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -5277,8 +5277,15 @@ btr_cur_pessimistic_delete( #endif /* UNIV_ZIP_DEBUG */ } - if (flags == 0) { - lock_update_delete(block, rec); + rec_t* next_rec = NULL; + bool min_mark_next_rec = false; + + if (page_is_leaf(page)) { + ut_ad(!(rec_get_info_bits(rec, page_rec_is_comp(rec)) + & REC_INFO_MIN_REC_FLAG)); + if (flags == 0) { + lock_update_delete(block, rec); + } } if (UNIV_UNLIKELY(page_get_n_recs(page) < 2) @@ -5297,20 +5304,14 @@ btr_cur_pessimistic_delete( if (page_is_leaf(page)) { btr_search_update_hash_on_delete(cursor); } else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) { - rec_t* next_rec = page_rec_get_next(rec); + next_rec = page_rec_get_next(rec); if (!page_has_prev(page)) { - /* If we delete the leftmost node pointer on a non-leaf level, we must mark the new leftmost node pointer as the predefined minimum record */ - /* This will make page_zip_validate() fail until - page_cur_delete_rec() completes. This is harmless, - because everything will take place within a single - mini-transaction and because writing to the redo log - is an atomic operation (performed by mtr_commit()). */ - btr_set_min_rec_mark(next_rec, mtr); + min_mark_next_rec = true; } else if (dict_index_is_spatial(index)) { /* For rtree, if delete the leftmost node pointer, we need to update parent page. */ @@ -5379,6 +5380,11 @@ btr_cur_pessimistic_delete( block->page.size, mtr); page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr); + + if (min_mark_next_rec) { + btr_set_min_rec_mark(next_rec, mtr); + } + #ifdef UNIV_ZIP_DEBUG ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index e00b2545708..604890ed03c 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -508,14 +508,10 @@ btr_insert_on_non_leaf_level_func( mtr_t* mtr); /*!< in: mtr */ #define btr_insert_on_non_leaf_level(f,i,l,t,m) \ btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m) -/****************************************************************//** -Sets a record as the predefined minimum record. */ -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /*!< in/out: record */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); + +/** Sets a record as the predefined minimum record. */ +void btr_set_min_rec_mark(rec_t* rec, mtr_t* mtr) MY_ATTRIBUTE((nonnull)); + /** Seek to the parent page of a B-tree page. @param[in,out] index b-tree @param[in] block child page diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 05774daac50..3956ecce0ee 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -659,6 +659,10 @@ page_rec_get_next_low( return(NULL); } + ut_ad(page_rec_is_infimum(rec) + || !(rec_get_info_bits(page + offs, comp) + & REC_INFO_MIN_REC_FLAG)); + return(page + offs); } diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 4071db7b4d9..90c0dd377e1 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -2421,10 +2421,6 @@ page_cur_delete_rec( if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { page_dir_balance_slot(page, page_zip, cur_slot_no); } - -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ } #ifdef UNIV_COMPILE_TEST_FUNCS diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 9e3e58efa3a..d6abec57821 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -4291,10 +4291,6 @@ page_zip_clear_rec( } else { ut_ad(!rec_offs_any_extern(offsets)); } - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ } /**********************************************************************//** From c65cb244b35412ef54192b17120f7ace8a8af5fd Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Thu, 3 Oct 2019 18:12:08 +0530 Subject: [PATCH 11/11] MDEV-19335 Remove buf_page_t::encrypted The field buf_page_t::encrypted was added in MDEV-8588. It was made mostly redundant in MDEV-12699. Remove the field. --- extra/mariabackup/fil_cur.cc | 4 +- storage/innobase/buf/buf0buf.cc | 80 +++++++--------------------- storage/innobase/fil/fil0crypt.cc | 6 +-- storage/innobase/ibuf/ibuf0ibuf.cc | 4 +- storage/innobase/include/buf0buf.h | 2 - storage/innobase/include/fil0crypt.h | 4 +- 6 files changed, 24 insertions(+), 76 deletions(-) diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc index dce0f9ba6f2..b229a37d934 100644 --- a/extra/mariabackup/fil_cur.cc +++ b/extra/mariabackup/fil_cur.cc @@ -339,11 +339,9 @@ static bool page_is_corrupted(const byte *page, ulint page_no, memcpy(tmp_page, page, page_size); - bool decrypted = false; if (!space->crypt_data || space->crypt_data->type == CRYPT_SCHEME_UNENCRYPTED - || !fil_space_decrypt(space, tmp_frame, tmp_page, - &decrypted)) { + || !fil_space_decrypt(space, tmp_frame, tmp_page)) { return true; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e3c6605652f..b5ca51c81dc 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -589,12 +589,6 @@ decrypt_failed: << mach_read_from_4( FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + dst_frame); - /* Mark page encrypted in case it should be. */ - if (space->crypt_data->type - != CRYPT_SCHEME_UNENCRYPTED) { - bpage->encrypted = true; - } - return false; } @@ -605,8 +599,7 @@ decrypt_failed: ut_d(fil_page_type_validate(dst_frame)); /* decrypt using crypt_buf to dst_frame */ - if (!fil_space_decrypt(space, slot->crypt_buf, - dst_frame, &bpage->encrypted)) { + if (!fil_space_decrypt(space, slot->crypt_buf, dst_frame)) { slot->release(); goto decrypt_failed; } @@ -1534,7 +1527,6 @@ buf_block_init( block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; block->page.flush_observer = NULL; - block->page.encrypted = false; block->page.real_size = 0; block->page.write_size = 0; block->modify_clock = 0; @@ -4044,7 +4036,6 @@ err_exit: if (encrypted) { ib::info() << "Row compressed page could be encrypted" " with key_version " << key_version; - block->page.encrypted = true; dict_set_encrypted_by_space(block->page.id.space()); } else { dict_set_corrupted_by_space(block->page.id.space()); @@ -5248,7 +5239,6 @@ buf_page_init_low( bpage->newest_modification = 0; bpage->oldest_modification = 0; bpage->write_size = 0; - bpage->encrypted = false; bpage->real_size = 0; bpage->slot = NULL; @@ -5841,15 +5831,16 @@ buf_page_monitor( } /** Mark a table corrupted. -@param[in] bpage Corrupted page. */ -static -void -buf_mark_space_corrupt(buf_page_t* bpage) +@param[in] bpage corrupted page +@param[in] space tablespace of the corrupted page */ +ATTRIBUTE_COLD +static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space) { /* If block is not encrypted find the table with specified space id, and mark it corrupted. Encrypted tables are marked unusable later e.g. in ::open(). */ - if (!bpage->encrypted) { + if (!space.crypt_data + || space.crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) { dict_set_corrupted_by_space(bpage->id.space()); } else { dict_set_encrypted_by_space(bpage->id.space()); @@ -5891,7 +5882,7 @@ buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space) mutex_exit(buf_page_get_mutex(bpage)); if (!srv_force_recovery) { - buf_mark_space_corrupt(bpage); + buf_mark_space_corrupt(bpage, *space); } /* After this point bpage can't be referenced. */ @@ -5921,7 +5912,6 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) byte* dst_frame = (bpage->zip.data) ? bpage->zip.data : ((buf_block_t*) bpage)->frame; dberr_t err = DB_SUCCESS; - bool corrupted = false; /* In buf_decrypt_after_read we have either decrypted the page if page post encryption checksum matches and used key_id is found @@ -5929,33 +5919,20 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) not decrypted and it could be either encrypted and corrupted or corrupted or good page. If we decrypted, there page could still be corrupted if used key does not match. */ - const bool still_encrypted = mach_read_from_4( + const bool seems_encrypted = mach_read_from_4( dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) && space->crypt_data - && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED - && !bpage->encrypted - && fil_space_verify_crypt_checksum(dst_frame, bpage->size); + && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED; - if (!still_encrypted) { - /* If traditional checksums match, we assume that page is - not anymore encrypted. */ - corrupted = buf_page_is_corrupted( - true, dst_frame, bpage->size, space); - - if (!corrupted) { - bpage->encrypted = false; - } else { - err = DB_PAGE_CORRUPTED; - } + /* If traditional checksums match, we assume that page is + not anymore encrypted. */ + if (buf_page_is_corrupted( + true, dst_frame, bpage->size, space)) { + err = DB_PAGE_CORRUPTED; } - /* Pages that we think are unencrypted but do not match the checksum - checks could be corrupted or encrypted or both. */ - if (corrupted && !bpage->encrypted) { - /* An error will be reported by - buf_page_io_complete(). */ - } else if (still_encrypted || (bpage->encrypted && corrupted)) { - bpage->encrypted = true; + if (seems_encrypted && err == DB_PAGE_CORRUPTED + && bpage->id.page_no() != 0) { err = DB_DECRYPTION_FAILED; ib::error() @@ -6017,7 +5994,6 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict) if (io_type == BUF_IO_READ) { ulint read_page_no = 0; ulint read_space_id = 0; - uint key_version = 0; byte* frame = bpage->zip.data ? bpage->zip.data : reinterpret_cast(bpage)->frame; @@ -6057,8 +6033,6 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict) read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET); read_space_id = mach_read_from_4( frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - key_version = mach_read_from_4( - frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); if (bpage->id.space() == TRX_SYS_SPACE && buf_dblwr_page_inside(bpage->id.page_no())) { @@ -6174,23 +6148,9 @@ database_corrupted: && fil_page_get_type(frame) == FIL_PAGE_INDEX && page_is_leaf(frame)) { - if (bpage->encrypted) { - ib::warn() - << "Table in tablespace " - << bpage->id.space() - << " encrypted. However key " - "management plugin or used " - << "key_version " << key_version - << " is not found or" - " used encryption algorithm or method does not match." - " Can't continue opening the table."; - } else { - - ibuf_merge_or_delete_for_page( - (buf_block_t*) bpage, bpage->id, - &bpage->size, TRUE); - } - + ibuf_merge_or_delete_for_page( + (buf_block_t*) bpage, bpage->id, + &bpage->size, TRUE); } fil_space_release_for_io(space); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index e1817985685..918ffd77542 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -793,7 +793,6 @@ Decrypt a page. @param[in] space Tablespace @param[in] tmp_frame Temporary buffer used for decrypting @param[in,out] src_frame Page to decrypt -@param[out] decrypted true if page was decrypted @return decrypted page, or original not encrypted page if decryption is not needed.*/ UNIV_INTERN @@ -801,13 +800,11 @@ byte* fil_space_decrypt( const fil_space_t* space, byte* tmp_frame, - byte* src_frame, - bool* decrypted) + byte* src_frame) { dberr_t err = DB_SUCCESS; byte* res = NULL; const page_size_t page_size(space->flags); - *decrypted = false; ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted()); ut_ad(space->n_pending_ios > 0); @@ -817,7 +814,6 @@ fil_space_decrypt( if (err == DB_SUCCESS) { if (encrypted) { - *decrypted = true; /* Copy the decrypted page back to page buffer, not really any other options. */ memcpy(src_frame, tmp_frame, page_size.physical()); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 34cba10c6b5..ccc471da2aa 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -342,10 +342,8 @@ ibuf_header_page_get( page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO), univ_page_size, RW_X_LATCH, mtr); - - if (!block->page.encrypted) { + if (block) { buf_block_dbg_add_level(block, SYNC_IBUF_HEADER); - page = buf_block_get_frame(block); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 6bfeeb83018..af2a7d90101 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1468,8 +1468,6 @@ public: if written again we check is TRIM operation needed. */ - bool encrypted; /*!< page is still encrypted */ - ulint real_size; /*!< Real size of the page Normal pages == UNIV_PAGE_SIZE page compressed pages, payload diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h index 24108f95167..3c56315ee9a 100644 --- a/storage/innobase/include/fil0crypt.h +++ b/storage/innobase/include/fil0crypt.h @@ -375,7 +375,6 @@ Decrypt a page @param[in] space Tablespace @param[in] tmp_frame Temporary buffer used for decrypting @param[in,out] src_frame Page to decrypt -@param[out] decrypted true if page was decrypted @return decrypted page, or original not encrypted page if decryption is not needed.*/ UNIV_INTERN @@ -383,8 +382,7 @@ byte* fil_space_decrypt( const fil_space_t* space, byte* tmp_frame, - byte* src_frame, - bool* decrypted) + byte* src_frame) MY_ATTRIBUTE((warn_unused_result)); /******************************************************************