MDEV-6462: Slave replicating using GTID doesn't recover correctly when master crashes in the middle of transaction

If the slave gets a reconnect in the middle of a GTID event group, normally it will re-fetch that event group, skipping the first part that was already queued for the SQL thread. However, if the master crashed while writing the event group, the group is incomplete. This patch detects this case and makes sure that the transaction is rolled back and nothing is skipped from any following event groups. Similarly, a network proxy might cause the reconnect to end up on a different master server. Detect this by noticing a different server_id, and similarly in this case roll back the partially received group.
2014-09-02 14:07:01 +02:00 · 2014-09-02 14:07:01 +02:00 · 36f50be970
commit 36f50be970
parent fbaaf3688d
6 changed files with 373 additions and 0 deletions
--- a/mysql-test/suite/rpl/r/rpl_gtid_crash.result
+++ b/mysql-test/suite/rpl/r/rpl_gtid_crash.result
@ -112,5 +112,98 @@ SHOW VARIABLES like 'gtid_strict_mode';
 Variable_name	Value
 gtid_strict_mode	ON
 include/start_slave.inc
+*** MDEV-6462: Incorrect recovery on a slave reconnecting to crashed master ***
+set sql_log_bin= 0;
+call mtr.add_suppression("Error writing file 'master-bin'");
+set sql_log_bin= 1;
+set sql_log_bin= 0;
+call mtr.add_suppression("The server_id of master server changed in the middle of GTID");
+call mtr.add_suppression("Unexpected change of master binlog file name in the middle of GTID");
+set sql_log_bin= 1;
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (11);
+COMMIT;
+ERROR HY000: Error writing file 'master-bin' (errno: 11 "Resource temporarily unavailable")
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+COMMIT;
+Got one of the listed errors
+SELECT @@GLOBAL.server_id;
+@@GLOBAL.server_id
+3
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+# Wait 30 seconds for SQL thread to catch up with IO thread
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+# Repeat this with additional transactions on the master
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (12);
+COMMIT;
+ERROR HY000: Error writing file 'master-bin' (errno: 11 "Resource temporarily unavailable")
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+COMMIT;
+Got one of the listed errors
+SELECT @@GLOBAL.server_id;
+@@GLOBAL.server_id
+1
+INSERT INTO t1 VALUES (13);
+INSERT INTO t1 VALUES (14);
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+include/save_master_gtid.inc
+include/sync_with_master_gtid.inc
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (21);
+COMMIT;
+ERROR HY000: Error writing file 'master-bin' (errno: 11 "Resource temporarily unavailable")
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+COMMIT;
+Got one of the listed errors
+SELECT @@GLOBAL.server_id;
+@@GLOBAL.server_id
+1
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+# Wait 30 seconds for SQL thread to catch up with IO thread
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+# Repeat this with additional transactions on the master
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (22);
+COMMIT;
+ERROR HY000: Error writing file 'master-bin' (errno: 11 "Resource temporarily unavailable")
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+COMMIT;
+Got one of the listed errors
+INSERT INTO t1 VALUES (23);
+INSERT INTO t1 VALUES (24);
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+23
+24
+include/save_master_gtid.inc
+include/sync_with_master_gtid.inc
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+a
+13
+14
+23
+24
 DROP TABLE t1;
 include/rpl_end.inc
--- a/mysql-test/suite/rpl/t/rpl_gtid_crash.test
+++ b/mysql-test/suite/rpl/t/rpl_gtid_crash.test
@ -294,6 +294,192 @@ SHOW VARIABLES like 'gtid_strict_mode';
 eval SET GLOBAL gtid_strict_mode= $old_gtid_strict;
 --enable_query_log

+
+--echo *** MDEV-6462: Incorrect recovery on a slave reconnecting to crashed master ***
+
+--connection server_1
+set sql_log_bin= 0;
+call mtr.add_suppression("Error writing file 'master-bin'");
+set sql_log_bin= 1;
+--connection server_2
+set sql_log_bin= 0;
+call mtr.add_suppression("The server_id of master server changed in the middle of GTID");
+call mtr.add_suppression("Unexpected change of master binlog file name in the middle of GTID");
+set sql_log_bin= 1;
+
+--connection server_1
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (11);
+--error ER_ERROR_ON_WRITE
+COMMIT;
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+--error 2006,2013
+COMMIT;
+
+--source include/wait_until_disconnected.inc
+
+# Simulate that we reconnect to a different server (new server_id).
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --server-id=3
+EOF
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+SELECT @@GLOBAL.server_id;
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+--echo # Wait 30 seconds for SQL thread to catch up with IO thread
+--connection server_2
+--let $wait_timeout= 300
+while ($wait_timeout != 0)
+{
+  --let $read_log_pos= query_get_value('SHOW SLAVE STATUS', Read_Master_Log_Pos, 1)
+  --let $exec_log_pos= query_get_value('SHOW SLAVE STATUS', Exec_Master_Log_Pos, 1)
+  if ($read_log_pos == $exec_log_pos)
+  {
+    --let $wait_timeout= 0
+  }
+  if ($read_log_pos != $exec_log_pos)
+  {
+    --sleep 0.1
+    --dec $wait_timeout
+  }
+}
+if ($read_log_pos != $exec_log_pos)
+{
+  --die Timeout wait for SQL thread to catch up with IO thread
+}
+
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+--echo # Repeat this with additional transactions on the master
+
+--connection server_1
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (12);
+--error ER_ERROR_ON_WRITE
+COMMIT;
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+--error 2006,2013
+COMMIT;
+
+--source include/wait_until_disconnected.inc
+
+# Simulate that we reconnect to a different server (new server_id).
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --server-id=1
+EOF
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+SELECT @@GLOBAL.server_id;
+INSERT INTO t1 VALUES (13);
+INSERT INTO t1 VALUES (14);
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+--connection server_1
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (21);
+--error ER_ERROR_ON_WRITE
+COMMIT;
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+--error 2006,2013
+COMMIT;
+
+--source include/wait_until_disconnected.inc
+
+# Simulate that we reconnect to the same server (same server_id).
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart
+EOF
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+SELECT @@GLOBAL.server_id;
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+--echo # Wait 30 seconds for SQL thread to catch up with IO thread
+--connection server_2
+--let $wait_timeout= 300
+while ($wait_timeout != 0)
+{
+  --let $read_log_pos= query_get_value('SHOW SLAVE STATUS', Read_Master_Log_Pos, 1)
+  --let $exec_log_pos= query_get_value('SHOW SLAVE STATUS', Exec_Master_Log_Pos, 1)
+  if ($read_log_pos == $exec_log_pos)
+  {
+    --let $wait_timeout= 0
+  }
+  if ($read_log_pos != $exec_log_pos)
+  {
+    --sleep 0.1
+    --dec $wait_timeout
+  }
+}
+if ($read_log_pos != $exec_log_pos)
+{
+  --die Timeout wait for SQL thread to catch up with IO thread
+}
+
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+--echo # Repeat this with additional transactions on the master
+
+--connection server_1
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
+BEGIN;
+INSERT INTO t1 VALUES (22);
+--error ER_ERROR_ON_WRITE
+COMMIT;
+SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
+--error 2006,2013
+COMMIT;
+
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart
+EOF
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+INSERT INTO t1 VALUES (23);
+INSERT INTO t1 VALUES (24);
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+SELECT * from t1 WHERE a > 10 ORDER BY a;
+
+
 --connection server_1
 DROP TABLE t1;

--- a/sql/log.cc
+++ b/sql/log.cc
@ -7469,6 +7469,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
    }
  }

+  DBUG_EXECUTE_IF("inject_error_writing_xid",
+                  {
+                    entry->error_cache= NULL;
+                    entry->commit_errno= 28;
+                    DBUG_RETURN(ER_ERROR_ON_WRITE);
+                  });
+
  if (entry->end_event->write(&log_file))
  {
    entry->error_cache= NULL;
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@ -38,6 +38,7 @@ Master_info::Master_info(LEX_STRING *connection_name_arg,
   connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
   slave_running(0), slave_run_id(0), sync_counter(0),
   heartbeat_period(0), received_heartbeats(0), master_id(0),
+   prev_master_id(0),
   using_gtid(USE_GTID_NO), events_queued_since_last_gtid(0),
   gtid_reconnect_event_skip_count(0), gtid_event_seen(false)
 {
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@ -135,6 +135,12 @@ class Master_info : public Slave_reporting_capability
  ulonglong received_heartbeats;  // counter of received heartbeat events
  DYNAMIC_ARRAY ignore_server_ids;
  ulong master_id;
+  /*
+    At reconnect and until the first rotate event is seen, prev_master_id is
+    the value of master_id during the previous connection, used to detect
+    silent change of master server during reconnects.
+  */
+  ulong prev_master_id;
  /*
    Which kind of GTID position (if any) is used when connecting to master.

--- a/sql/slave.cc
+++ b/sql/slave.cc
@ -5265,6 +5265,86 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
                         event_len - BINLOG_CHECKSUM_LEN : event_len,
                         mi->rli.relay_log.description_event_for_queue);

+    if (unlikely(mi->gtid_reconnect_event_skip_count) &&
+        unlikely(!mi->gtid_event_seen) &&
+        rev.is_artificial_event() &&
+        (mi->prev_master_id != mi->master_id ||
+         strcmp(rev.new_log_ident, mi->master_log_name) != 0))
+    {
+      /*
+        Artificial Rotate_log_event is the first event we receive at the start
+        of each master binlog file. It gives the name of the new binlog file.
+
+        Normally, we already have this name from the real rotate event at the
+        end of the previous binlog file (unless we are making a new connection
+        using GTID). But if the master server restarted/crashed, there is no
+        rotate event at the end of the prior binlog file, so the name is new.
+
+        We use this fact to handle a special case of master crashing. If the
+        master crashed while writing the binlog, it might end with a partial
+        event group lacking the COMMIT/XID event, which must be rolled
+        back. If the slave IO thread happens to get a disconnect in the middle
+        of exactly this event group, it will try to reconnect at the same GTID
+        and skip already fetched events. However, that GTID did not commit on
+        the master before the crash, so it does not really exist, and the
+        master will connect the slave at the next following GTID starting in
+        the next binlog. This could confuse the slave and make it mix the
+        start of one event group with the end of another.
+
+        But we detect this case here, by noticing the change of binlog name
+        which detects the missing rotate event at the end of the previous
+        binlog file. In this case, we reset the counters to make us not skip
+        the next event group, and queue an artificial Format Description
+        event. The previously fetched incomplete event group will then be
+        rolled back when the Format Description event is executed by the SQL
+        thread.
+
+        A similar case is if the reconnect somehow connects to a different
+        master server (like due to a network proxy or IP address takeover).
+        We detect this case by noticing a change of server_id and in this
+        case likewise rollback the partially received event group.
+      */
+      Format_description_log_event fdle(4);
+
+      if (mi->prev_master_id != mi->master_id)
+        sql_print_warning("The server_id of master server changed in the "
+                          "middle of GTID %u-%u-%llu. Assuming a change of "
+                          "master server, so rolling back the previously "
+                          "received partial transaction. Expected: %lu, "
+                          "received: %lu", mi->last_queued_gtid.domain_id,
+                          mi->last_queued_gtid.server_id,
+                          mi->last_queued_gtid.seq_no,
+                          mi->prev_master_id, mi->master_id);
+      else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0)
+        sql_print_warning("Unexpected change of master binlog file name in the "
+                          "middle of GTID %u-%u-%llu, assuming that master has "
+                          "crashed and rolling back the transaction. Expected: "
+                          "'%s', received: '%s'",
+                          mi->last_queued_gtid.domain_id,
+                          mi->last_queued_gtid.server_id,
+                          mi->last_queued_gtid.seq_no,
+                          mi->master_log_name, rev.new_log_ident);
+
+      mysql_mutex_lock(log_lock);
+      if (likely(!fdle.write(rli->relay_log.get_log_file()) &&
+                 !rli->relay_log.flush_and_sync(NULL)))
+      {
+        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+      }
+      else
+      {
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+        mysql_mutex_unlock(log_lock);
+        goto err;
+      }
+      rli->relay_log.signal_update();
+      mysql_mutex_unlock(log_lock);
+
+      mi->gtid_reconnect_event_skip_count= 0;
+      mi->events_queued_since_last_gtid= 0;
+    }
+    mi->prev_master_id= mi->master_id;
+
    if (unlikely(process_io_rotate(mi, &rev)))
    {
      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;