From 60f08dd55563f23b80e118a863113116bf7443e2 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 18 Jun 2020 11:57:19 +0300 Subject: [PATCH] MDEV-22925 ALTER TABLE s3_table ENGINE=Aria can cause failure on slave When converting a table (test.s3_table) from S3 to another engine, the following will be logged to the binary log: DROP TABLE IF EXISTS test.t1; CREATE OR REPLACE TABLE test.t1 (...) ENGINE=new_engine INSERT rows to test.t1 in binary-row-log-format The bug is that the above statements are logged one by one to the binary log. This means that a fast slave, configured to use the same S3 storage as the master, would be able to execute the DROP and CREATE from the binary log before the master has finished the ALTER TABLE. In this case the slave would ignore the DROP (as it's on a S3 table) but it will stop on CREATE of the local tale, as the table is still exists in S3. The REPLACE part will be ignored by the slave as it can't touch the S3 table. The fix is to ensure that all the above statements is written to binary log AFTER the table has been deleted from S3. --- mysql-test/suite/s3/replication.inc | 2 +- mysql-test/suite/s3/replication_mixed.result | 12 ++++------- .../suite/s3/replication_partition.result | 6 ++---- mysql-test/suite/s3/replication_stmt.result | 12 ++++------- sql/handler.h | 2 ++ sql/log.cc | 5 ++++- sql/log_event.h | 2 +- sql/log_event_server.cc | 1 + sql/sql_insert.cc | 21 ++++++++++++++++--- sql/sql_insert.h | 2 +- sql/sql_priv.h | 2 ++ sql/sql_show.cc | 4 +++- sql/sql_table.cc | 8 ++++--- 13 files changed, 48 insertions(+), 31 deletions(-) diff --git a/mysql-test/suite/s3/replication.inc b/mysql-test/suite/s3/replication.inc index deef0209542..26dd9a5da25 100644 --- a/mysql-test/suite/s3/replication.inc +++ b/mysql-test/suite/s3/replication.inc @@ -5,7 +5,7 @@ # Tests for S3 replication # -connection slave; +sync_slave_with_master; let $SLAVE_DATADIR= `select @@datadir`; connection master; diff --git a/mysql-test/suite/s3/replication_mixed.result b/mysql-test/suite/s3/replication_mixed.result index 98b9a78d5f5..47cf907b187 100644 --- a/mysql-test/suite/s3/replication_mixed.result +++ b/mysql-test/suite/s3/replication_mixed.result @@ -194,34 +194,30 @@ slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; flush tables slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; alter table t1 add column e int, engine=s3 -slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ -slave-bin.000001 # Gtid # # GTID #-#-# -slave-bin.000001 # Query # # use `database`; CREATE TABLE `t1` ( +slave-bin.000001 # Query # # use `database`; CREATE OR REPLACE TABLE `t1` ( `a` int(11) DEFAULT NULL, `b` int(11) DEFAULT NULL, `c` int(11) DEFAULT NULL, `d` int(11) DEFAULT NULL, `e` int(11) DEFAULT NULL ) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 -slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Annotate_rows # # alter table t1 engine=aria slave-bin.000001 # Table_map # # table_id: # (database.t1) slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F slave-bin.000001 # Query # # COMMIT slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; alter table t1 engine=s3 -slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ -slave-bin.000001 # Gtid # # GTID #-#-# -slave-bin.000001 # Query # # use `database`; CREATE TABLE `t2` ( +slave-bin.000001 # Query # # use `database`; CREATE OR REPLACE TABLE `t2` ( `a` int(11) DEFAULT NULL, `b` int(11) DEFAULT NULL, `c` int(11) DEFAULT NULL, `d` int(11) DEFAULT NULL, `e` int(11) DEFAULT NULL ) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 -slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Annotate_rows # # alter table t1 rename t2, engine=aria slave-bin.000001 # Table_map # # table_id: # (database.t2) slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F diff --git a/mysql-test/suite/s3/replication_partition.result b/mysql-test/suite/s3/replication_partition.result index 9df6216ab2c..40fb2aec698 100644 --- a/mysql-test/suite/s3/replication_partition.result +++ b/mysql-test/suite/s3/replication_partition.result @@ -225,10 +225,9 @@ slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; rename table t2 to t1 slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; alter table t1 drop partition p1 -slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ -slave-bin.000001 # Gtid # # GTID #-#-# -slave-bin.000001 # Query # # use `database`; CREATE TABLE `t1` ( +slave-bin.000001 # Query # # use `database`; CREATE OR REPLACE TABLE `t1` ( `c1` int(11) NOT NULL, `c2` int(11) DEFAULT NULL, PRIMARY KEY (`c1`) @@ -236,7 +235,6 @@ slave-bin.000001 # Query # # use `database`; CREATE TABLE `t1` ( PARTITION BY RANGE (`c1`) (PARTITION `p2` VALUES LESS THAN (300) ENGINE = InnoDB, PARTITION `p3` VALUES LESS THAN (400) ENGINE = InnoDB) -slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Annotate_rows # # alter table t1 engine=innodb slave-bin.000001 # Table_map # # table_id: # (database.t1) slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F diff --git a/mysql-test/suite/s3/replication_stmt.result b/mysql-test/suite/s3/replication_stmt.result index ba6573bf219..56fd07445a4 100644 --- a/mysql-test/suite/s3/replication_stmt.result +++ b/mysql-test/suite/s3/replication_stmt.result @@ -194,34 +194,30 @@ slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; flush tables slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; alter table t1 add column e int, engine=s3 -slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ -slave-bin.000001 # Gtid # # GTID #-#-# -slave-bin.000001 # Query # # use `database`; CREATE TABLE `t1` ( +slave-bin.000001 # Query # # use `database`; CREATE OR REPLACE TABLE `t1` ( `a` int(11) DEFAULT NULL, `b` int(11) DEFAULT NULL, `c` int(11) DEFAULT NULL, `d` int(11) DEFAULT NULL, `e` int(11) DEFAULT NULL ) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 -slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Annotate_rows # # alter table t1 engine=aria slave-bin.000001 # Table_map # # table_id: # (database.t1) slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F slave-bin.000001 # Query # # COMMIT slave-bin.000001 # Gtid # # GTID #-#-# slave-bin.000001 # Query # # use `database`; alter table t1 engine=s3 -slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ -slave-bin.000001 # Gtid # # GTID #-#-# -slave-bin.000001 # Query # # use `database`; CREATE TABLE `t2` ( +slave-bin.000001 # Query # # use `database`; CREATE OR REPLACE TABLE `t2` ( `a` int(11) DEFAULT NULL, `b` int(11) DEFAULT NULL, `c` int(11) DEFAULT NULL, `d` int(11) DEFAULT NULL, `e` int(11) DEFAULT NULL ) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 -slave-bin.000001 # Gtid # # BEGIN GTID #-#-# slave-bin.000001 # Annotate_rows # # alter table t1 rename t2, engine=aria slave-bin.000001 # Table_map # # table_id: # (database.t2) slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F diff --git a/sql/handler.h b/sql/handler.h index ad32a7286ff..61223c4e715 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -627,6 +627,8 @@ given at all. */ /* Create a sequence */ #define HA_CREATE_USED_SEQUENCE (1UL << 25) +/* Tell binlog_show_create_table to print all engine options */ +#define HA_CREATE_PRINT_ALL_OPTIONS (1UL << 26) typedef ulonglong alter_table_operations; typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*); diff --git a/sql/log.cc b/sql/log.cc index 3e774d2fe8d..abf22e7897a 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6496,12 +6496,15 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG)))) DBUG_RETURN(0); - if (thd->variables.option_bits & OPTION_GTID_BEGIN) + if (thd->variables.option_bits & + (OPTION_GTID_BEGIN | OPTION_BIN_COMMIT_OFF)) { DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set")); /* Wait for commit from binary log before we commit */ direct= 0; using_trans= 1; + /* Set cache_type to ensure we don't get checksums for this event */ + event_info->cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE; } if (thd->binlog_evt_union.do_union) diff --git a/sql/log_event.h b/sql/log_event.h index 639cbfbe7aa..b515036c5e8 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1232,7 +1232,7 @@ public: */ uint16 flags; - uint16 cache_type; + enum_event_cache_type cache_type; /** A storage to cache the global system variable's value. diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 87212773b4d..fb75bee9f21 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -744,6 +744,7 @@ my_bool Log_event::need_checksum() { my_bool ret; DBUG_ENTER("Log_event::need_checksum"); + /* few callers of Log_event::write (incl FD::write, FD constructing code on the slave side, Rotate relay log diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 48baabd8803..31badbe2aba 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -77,7 +77,6 @@ #include "sql_audit.h" #include "sql_derived.h" // mysql_handle_derived #include "sql_prepare.h" -#include "rpl_filter.h" // binlog_filter #include #include "debug_sync.h" @@ -4796,8 +4795,12 @@ static int binlog_show_create_table(THD *thd, TABLE *table, to a not shared table. */ -bool binlog_create_table(THD *thd, TABLE *table) +bool binlog_create_table(THD *thd, TABLE *table, bool replace) { + Table_specification_st create_info; + bool result; + ulonglong save_option_bits; + /* Don't log temporary tables in row format */ if (thd->variables.binlog_format == BINLOG_FORMAT_ROW && table->s->tmp_table) @@ -4811,7 +4814,19 @@ bool binlog_create_table(THD *thd, TABLE *table) */ thd->set_current_stmt_binlog_format_row(); table->file->prepare_for_row_logging(); - return binlog_show_create_table(thd, table, 0) != 0; + + create_info.lex_start(); + save_option_bits= thd->variables.option_bits; + if (replace) + create_info.set(DDL_options_st::OPT_OR_REPLACE); + /* Ensure we write ENGINE=xxx and CHARSET=... to binary log */ + create_info.used_fields|= (HA_CREATE_USED_ENGINE | + HA_CREATE_USED_DEFAULT_CHARSET); + /* Ensure we write all engine options to binary log */ + create_info.used_fields|= HA_CREATE_PRINT_ALL_OPTIONS; + result= binlog_show_create_table(thd, table, &create_info) != 0; + thd->variables.option_bits= save_option_bits; + return result; } diff --git a/sql/sql_insert.h b/sql/sql_insert.h index 3f741640c1c..14041976973 100644 --- a/sql/sql_insert.h +++ b/sql/sql_insert.h @@ -43,7 +43,7 @@ int check_duplic_insert_without_overlaps(THD *thd, TABLE *table, int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *returning= NULL); void kill_delayed_threads(void); -bool binlog_create_table(THD *thd, TABLE *table); +bool binlog_create_table(THD *thd, TABLE *table, bool replace); bool binlog_drop_table(THD *thd, TABLE *table); #ifdef EMBEDDED_LIBRARY diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 493313da90a..07f07a7150f 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -182,6 +182,8 @@ #define SELECT_NO_UNLOCK (1ULL << 41) // SELECT, intern #define SELECT_NO_UNLOCK (1ULL << 41) // SELECT, intern #define OPTION_BIN_TMP_LOG_OFF (1ULL << 42) // disable binlog, intern +/* Disable commit of binlog. Used to combine many DDL's and DML's as one */ +#define OPTION_BIN_COMMIT_OFF (1ULL << 43) #define OPTION_LEX_FOUND_COMMENT (1ULL << 0) // intern, parser diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 323d0865b3a..f61c5677ced 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -1812,7 +1812,9 @@ static void add_table_options(THD *thd, TABLE *table, handlerton *hton; HA_CREATE_INFO create_info; bool check_options= (!(sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) && - !create_info_arg); + (!create_info_arg || + create_info_arg->used_fields & + HA_CREATE_PRINT_ALL_OPTIONS)); #ifdef WITH_PARTITION_STORAGE_ENGINE if (table->part_info) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 5c74cae713b..5c491421511 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -10711,7 +10711,7 @@ do_continue:; thd->binlog_table_should_be_logged(&new_table->s->db)) { /* - We new_table is marked as internal temp table, but we want to have + 'new_table' is marked as internal temp table, but we want to have the logging based on the original table type */ bool res; @@ -10720,9 +10720,11 @@ do_continue:; /* Force row logging, even if the table was created as 'temporary' */ new_table->s->can_do_row_logging= 1; - thd->binlog_start_trans_and_stmt(); - res= binlog_drop_table(thd, table) || binlog_create_table(thd, new_table); + thd->variables.option_bits|= OPTION_BIN_COMMIT_OFF; + res= (binlog_drop_table(thd, table) || + binlog_create_table(thd, new_table, 1)); + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; new_table->s->tmp_table= org_tmp_table; if (res) goto err_new_table_cleanup;