From c89f769f2412202dd8d7e69b0942778c91602ec7 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Tue, 21 Nov 2023 15:43:11 +0200 Subject: [PATCH 1/2] MDEV-31905 GTID inconsistency This commit fixes GTID inconsistency which was injected by mariabackup SST. Donor node now writes new info file: donor_galera_info, which is streamed along the mariabackup donation to the joiner node. The donor_galera_info file contains both GTID and gtid domain_id, and joiner will use these to initialize the GTID state. Commit has new mtr test case: galera_3nodes.galera_gtid_consistency, which exercises potentially harmful mariabackup SST scenarios. The test has also scenario with IST joining. Signed-off-by: Julius Goryavsky --- extra/mariabackup/backup_copy.cc | 1 + extra/mariabackup/backup_copy.h | 1 + extra/mariabackup/backup_mysql.cc | 28 +- extra/mariabackup/wsrep.cc | 10 +- include/mysql/service_wsrep.h | 3 + .../include/galera_sst_method.combinations | 5 + mysql-test/include/galera_sst_method.inc | 4 + .../suite/galera_3nodes/r/MDEV-29171.result | 1 + .../r/galera_gtid_consistency.result | 219 +++++++++++ .../suite/galera_3nodes/t/MDEV-29171.test | 12 +- .../t/galera_gtid_consistency.cnf | 35 ++ .../t/galera_gtid_consistency.test | 346 ++++++++++++++++++ scripts/wsrep_sst_mariabackup.sh | 21 +- sql/service_wsrep.cc | 5 + sql/sql_plugin_services.inl | 3 +- sql/wsrep_dummy.cc | 2 + sql/wsrep_sst.cc | 11 + 17 files changed, 692 insertions(+), 15 deletions(-) create mode 100644 mysql-test/include/galera_sst_method.combinations create mode 100644 mysql-test/include/galera_sst_method.inc create mode 100644 mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result create mode 100644 mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf create mode 100644 mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test diff --git a/extra/mariabackup/backup_copy.cc b/extra/mariabackup/backup_copy.cc index c9c504c7e29..f4c1b5bc83a 100644 --- a/extra/mariabackup/backup_copy.cc +++ b/extra/mariabackup/backup_copy.cc @@ -1674,6 +1674,7 @@ ibx_copy_incremental_over_full() NULL}; const char *sup_files[] = {"xtrabackup_binlog_info", "xtrabackup_galera_info", + "donor_galera_info", "xtrabackup_slave_info", "xtrabackup_info", "ib_lru_dump", diff --git a/extra/mariabackup/backup_copy.h b/extra/mariabackup/backup_copy.h index b4a323f2e89..b5aaf3121e9 100644 --- a/extra/mariabackup/backup_copy.h +++ b/extra/mariabackup/backup_copy.h @@ -9,6 +9,7 @@ /* special files */ #define XTRABACKUP_SLAVE_INFO "xtrabackup_slave_info" #define XTRABACKUP_GALERA_INFO "xtrabackup_galera_info" +#define XTRABACKUP_DONOR_GALERA_INFO "donor_galera_info" #define XTRABACKUP_BINLOG_INFO "xtrabackup_binlog_info" #define XTRABACKUP_INFO "xtrabackup_info" diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc index 4f151b95941..e945037fea4 100644 --- a/extra/mariabackup/backup_mysql.cc +++ b/extra/mariabackup/backup_mysql.cc @@ -1424,6 +1424,7 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) { char *state_uuid = NULL, *state_uuid55 = NULL; char *last_committed = NULL, *last_committed55 = NULL; + char *domain_id = NULL, *domain_id55 = NULL; bool result; mysql_variable status[] = { @@ -1434,6 +1435,12 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) {NULL, NULL} }; + mysql_variable value[] = { + {"Wsrep_gtid_domain_id", &domain_id}, + {"wsrep_gtid_domain_id", &domain_id55}, + {NULL, NULL} + }; + /* When backup locks are supported by the server, we should skip creating xtrabackup_galera_info file on the backup stage, because wsrep_local_state_uuid and wsrep_last_committed will be inconsistent @@ -1452,9 +1459,26 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) goto cleanup; } + read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true); + + if (domain_id == NULL && domain_id55 == NULL) { + msg("Warning: failed to get master wsrep state from SHOW VARIABLES."); + result = true; + goto cleanup; + } + result = datasink->backup_file_printf(XTRABACKUP_GALERA_INFO, - "%s:%s\n", state_uuid ? state_uuid : state_uuid55, - last_committed ? last_committed : last_committed55); + "%s:%s %s\n", state_uuid ? state_uuid : state_uuid55, + last_committed ? last_committed : last_committed55, + domain_id ? domain_id : domain_id55); + + if (result) + { + result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO, + "%s:%s %s\n", state_uuid ? state_uuid : state_uuid55, + last_committed ? last_committed : last_committed55, + domain_id ? domain_id : domain_id55); + } if (result) { write_current_binlog_file(datasink, connection); diff --git a/extra/mariabackup/wsrep.cc b/extra/mariabackup/wsrep.cc index 1b93e9ed10e..acaf5c50e7c 100644 --- a/extra/mariabackup/wsrep.cc +++ b/extra/mariabackup/wsrep.cc @@ -53,6 +53,7 @@ permission notice: /*! Name of file where Galera info is stored on recovery */ #define XB_GALERA_INFO_FILENAME "xtrabackup_galera_info" +#define XB_GALERA_DONOR_INFO_FILENAME "donor_galera_info" /*********************************************************************** Store Galera checkpoint info in the 'xtrabackup_galera_info' file, if that @@ -67,7 +68,7 @@ xb_write_galera_info(bool incremental_prepare) long long seqno; MY_STAT statinfo; - /* Do not overwrite existing an existing file to be compatible with + /* Do not overwrite an existing file to be compatible with servers with older server versions */ if (!incremental_prepare && my_stat(XB_GALERA_INFO_FILENAME, &statinfo, MYF(0)) != NULL) { @@ -101,10 +102,11 @@ xb_write_galera_info(bool incremental_prepare) seqno = wsrep_xid_seqno(&xid); - msg("mariabackup: Recovered WSREP position: %s:%lld\n", - uuid_str, (long long) seqno); + msg("mariabackup: Recovered WSREP position: %s:%lld domain_id: %lld\n", + uuid_str, (long long) seqno, (long long)wsrep_get_domain_id()); - if (fprintf(fp, "%s:%lld", uuid_str, (long long) seqno) < 0) { + if (fprintf(fp, "%s:%lld %lld", uuid_str, (long long) seqno, + (long long)wsrep_get_domain_id()) < 0) { die( "could not write to " XB_GALERA_INFO_FILENAME diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index f3588da4b46..a1897d76dc1 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -92,6 +92,7 @@ extern struct wsrep_service_st { void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd); void (*wsrep_thd_kill_UNLOCK_func)(const MYSQL_THD thd); void (*wsrep_thd_set_wsrep_PA_unsafe_func)(MYSQL_THD thd); + uint32 (*wsrep_get_domain_id_func)(); } *wsrep_service; #define MYSQL_SERVICE_WSREP_INCLUDED @@ -139,6 +140,7 @@ extern struct wsrep_service_st { #define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V) #define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T) +#define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T) #else #define MYSQL_SERVICE_WSREP_STATIC_INCLUDED @@ -241,5 +243,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd, unsigned long long trx_id); /* declare parallel applying unsafety for the THD */ extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd); +extern "C" uint32 wsrep_get_domain_id(); #endif #endif /* MYSQL_SERVICE_WSREP_INCLUDED */ diff --git a/mysql-test/include/galera_sst_method.combinations b/mysql-test/include/galera_sst_method.combinations new file mode 100644 index 00000000000..14b6172e638 --- /dev/null +++ b/mysql-test/include/galera_sst_method.combinations @@ -0,0 +1,5 @@ +[rsync] +wsrep-sst-method=rsync + +[mariabackup] +wsrep_sst_method=mariabackup diff --git a/mysql-test/include/galera_sst_method.inc b/mysql-test/include/galera_sst_method.inc new file mode 100644 index 00000000000..99dd34531c7 --- /dev/null +++ b/mysql-test/include/galera_sst_method.inc @@ -0,0 +1,4 @@ +# The goal of including this file is to enable galera_sst_method combinations +# (see include/galera_sst_method.combinations) + +--source include/have_innodb.inc diff --git a/mysql-test/suite/galera_3nodes/r/MDEV-29171.result b/mysql-test/suite/galera_3nodes/r/MDEV-29171.result index 151be86d9cc..371ce006dd3 100644 --- a/mysql-test/suite/galera_3nodes/r/MDEV-29171.result +++ b/mysql-test/suite/galera_3nodes/r/MDEV-29171.result @@ -14,6 +14,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; @@wsrep_gtid_domain_id @@wsrep_node_name 100 node3 connection node_3; +connection node_1; connection node_2; connection node_1; connection node_1; diff --git a/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result b/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result new file mode 100644 index 00000000000..a35f31da422 --- /dev/null +++ b/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result @@ -0,0 +1,219 @@ +connection node_2; +connection node_1; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; +set wsrep_sync_wait=0; +connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +set wsrep_sync_wait=0; +connection node_1; +CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int) +BEGIN +DECLARE current_num int; +SET current_num = 0; +WHILE current_num < repeat_count do +INSERT INTO t1(node, name) VALUES (node, UUID()); +SET current_num = current_num + 1; +END WHILE; +END| +CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb; +# node_1 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2 +connection node_2; +# node_2 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2 +connection node_3; +# node_3 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2 +connection node_1; +CALL insert_row('node1', 500);; +connection node_2; +CALL insert_row('node2', 500);; +connection node_3; +CALL insert_row('node3', 500);; +connection node_2; +# Shutdown node_2, force SST +connection node_2b; +# Wait until node_2 leaves cluster +connection node_1b; +connection node_1; +connection node_3; +connection node_1; +CALL insert_row('node1', 500); +connection node_3; +CALL insert_row('node3', 500); +CREATE TABLE t2(i int primary key) engine=innodb; +connection node_2; +# Restart node_2 +# restart +connection node_1b; +# Wait until node_2 is back in cluster +# node2 has joined +# GTID in node1 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2503 +connection node_2; +# GTID in node2 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2503 +connection node_3; +# GTID in node3 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2503 +# Shutdown node_3 +connection node_3; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; +# Wait until node_3 leaves cluster +connection node_1b; +connection node_1; +CALL insert_row('node1', 50); +CREATE TABLE t3(i int primary key) engine=innodb; +connection node_3; +# Rejoin node_3 +SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0'; +connection node_1b; +# Wait until node_3 is back in cluster +# node3 has joined +connection node_1; +# GTID in node1 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2554 +connection node_2; +# GTID in node2 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2554 +connection node_3; +# GTID in node3 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2554 +# One by one shutdown all nodes +connection node_3; +# shutdown node_3 +connection node_2; +# wait until node_3 is out of cluster +# shutdown node_2 +connection node_1; +# wait until node_2 is out of cluster +# shutdown node_1 +# Bootstrap from node_1 +connection node_1; +# restart: --wsrep_new_cluster +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2554 +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +CALL insert_row('node1', 100);; +# Restart node_2 +connection node_2; +# restart +connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; +set wsrep_sync_wait=0; +connection node_1c; +# wait until node_1 and node_2 are in cluster +connection node_2; +ALTER TABLE t2 ADD COLUMN (k int); +CALL insert_row('node2', 100);; +# Restart node_3 +connection node_3; +# restart +connection node_1c; +# wait until all nodes are back in cluster +after cluster restart +connection node_2; +connection node_1; +connection node_1; +node1 GTID +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2756 +connection node_2; +node2 GTID +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2756 +connection node_3; +node3 GTID +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 1111 +show variables like '%gtid_binlog_pos%'; +Variable_name Value +gtid_binlog_pos 1111-1-2756 +connection node_1; +table size in node1 +SELECT COUNT(*) FROM t1; +COUNT(*) +2750 +connection node_2; +table size in node2 +SELECT COUNT(*) FROM t1; +COUNT(*) +2750 +connection node_3; +table size in node3 +SELECT COUNT(*) FROM t1; +COUNT(*) +2750 +connection node_2; +call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node"); +call mtr.add_suppression("WSREP: Sending JOIN failed:.*"); +call mtr.add_suppression("Sending JOIN failed:.*"); +call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*"); +connection node_3; +call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node"); +call mtr.add_suppression("WSREP: Sending JOIN failed:.*"); +call mtr.add_suppression("Sending JOIN failed:.*"); +call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*"); +# cleanup +connection node_1; +DROP PROCEDURE insert_row; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +connection node_3; +connection node_2; +disconnect node_3; +disconnect node_2b; +disconnect node_1b; +disconnect node_1c; diff --git a/mysql-test/suite/galera_3nodes/t/MDEV-29171.test b/mysql-test/suite/galera_3nodes/t/MDEV-29171.test index 33fa4d722ae..bfb7abf9a8b 100644 --- a/mysql-test/suite/galera_3nodes/t/MDEV-29171.test +++ b/mysql-test/suite/galera_3nodes/t/MDEV-29171.test @@ -4,11 +4,13 @@ # --source include/galera_cluster.inc ---source include/have_innodb.inc +--source include/galera_sst_method.inc +--source include/force_restart.inc # # Initially wsrep gtid domain id is 100 # + --connection node_1 select @@wsrep_gtid_domain_id,@@wsrep_node_name; @@ -26,6 +28,10 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; --connection node_3 --source include/shutdown_mysqld.inc +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + --connection node_2 --let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; --source include/wait_condition.inc @@ -36,6 +42,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; --source include/wait_condition.inc --source include/shutdown_mysqld.inc +--sleep 5 # # Bootstrap from node_1 and change wsrep_gtid_domain_id to 200 @@ -45,12 +52,11 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; --source include/start_mysqld.inc show variables like 'wsrep_gtid_domain_id'; - # # Restart node_2, expect that wsrep_gtid_domain_id has changed to 200 # --connection node_2 ---let $restart_parameters = +--let $restart_parameters = --let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect --source include/start_mysqld.inc show variables like 'wsrep_gtid_domain_id'; diff --git a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf new file mode 100644 index 00000000000..5bd03178d1f --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf @@ -0,0 +1,35 @@ +!include ../galera_3nodes.cnf + +[mysqld.1] +wsrep-node-name="node1" +wsrep_gtid_domain_id=1111 +gtid_domain_id=2 +server_id=10999 +wsrep_sst_auth="root:" +wsrep_sst_method=mariabackup +log_slave_updates=ON +log_bin=mariadb-bin-log +binlog-format=row +wsrep-gtid-mode=ON + +[mysqld.2] +wsrep-node-name="node2" +wsrep_gtid_domain_id=1112 +gtid_domain_id=3 +wsrep_sst_auth="root:" +wsrep_sst_method=mariabackup +log_slave_updates=ON +log_bin=mariadb-bin-log +binlog-format=row +wsrep-gtid-mode=ON + +[mysqld.3] +wsrep-node-name="node3" +wsrep_gtid_domain_id=1113 +gtid_domain_id=4 +wsrep_sst_auth="root:" +wsrep_sst_method=mariabackup +log_slave_updates=ON +log_bin=mariadb-bin-log +binlog-format=row +wsrep-gtid-mode=ON diff --git a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test new file mode 100644 index 00000000000..f41230bc8e6 --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test @@ -0,0 +1,346 @@ +--source include/galera_cluster.inc +--source include/big_test.inc +--source include/force_restart.inc + + +# +# Testing gtid consistency in 3 node cluster when nodes drop +# and join back to cluster. +# The tests verify that wsrep_gtid_domain_id and gtid_binlog_pos +# remains same across the cluster +# In the configuration, nodes have different wsrep_gtid_domain_id +# but all nodes are supposed to receive effective domain id +# from the bootstrap node (node_1), and use it +# +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 +set wsrep_sync_wait=0; +--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 +set wsrep_sync_wait=0; + +--connection node_1 +DELIMITER |; +CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int) +BEGIN + DECLARE current_num int; + SET current_num = 0; + WHILE current_num < repeat_count do + INSERT INTO t1(node, name) VALUES (node, UUID()); + SET current_num = current_num + 1; + END WHILE; +END| +DELIMITER ;| + +CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb; + +# +# report initial gtid positions after table t1 is created +# +--echo # node_1 +show variables like '%gtid_binlog_pos%'; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc + +--echo # node_2 +show variables like '%gtid_binlog_pos%'; + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc + +--echo # node_3 +show variables like '%gtid_binlog_pos%'; + +########################################### +# scenario: join node 2 by SST +########################################## + +# +# start concurrent insert load and stop node2 while the load is on +# +--connection node_1 +--send CALL insert_row('node1', 500); + +--connection node_2 +--send CALL insert_row('node2', 500); + +--connection node_3 +--send CALL insert_row('node3', 500); + +# +# stop load to node 2 and shutdown the node, force SST +# +--connection node_2 +--reap + +--echo # Shutdown node_2, force SST +--connection node_2b +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--echo # Wait until node_2 leaves cluster +--connection node_1b +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +# +# stop the remaining load to node 1 and 3 +# +--connection node_1 +--reap +--connection node_3 +--reap + +# +# some more inserts and DDL to nodes 1 and 3 +# while node 2 is absent +# +--connection node_1 +CALL insert_row('node1', 500); + +--connection node_3 +CALL insert_row('node3', 500); + +CREATE TABLE t2(i int primary key) engine=innodb; + +# +# restart node 2, should join by SST +# +--connection node_2 +--echo # Restart node_2 +--source include/start_mysqld.inc + +--connection node_1b +--echo # Wait until node_2 is back in cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--echo # node2 has joined + +# +# check gtid positions in all nodes +# +--echo # GTID in node1 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_2 +--echo # GTID in node2 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_3 +--echo # GTID in node3 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + + +########################################### +# scenario: join node 3 by IST +########################################## + +--echo # Shutdown node_3 +--connection node_3 +SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; + +--echo # Wait until node_3 leaves cluster +--connection node_1b +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +# +# do some inserts and DDL to node 1 +# while node 3 is absent +# +--connection node_1 +CALL insert_row('node1', 50); + +CREATE TABLE t3(i int primary key) engine=innodb; + +# +# remove isolation in node 3, should join by IST +# +--connection node_3 +--echo # Rejoin node_3 +SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0'; + +--connection node_1b +--echo # Wait until node_3 is back in cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--echo # node3 has joined + +# +# check gtid positions in all nodes +# +--connection node_1 +--echo # GTID in node1 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_2 +--echo # GTID in node2 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_3 +--echo # GTID in node3 +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + + +########################################### +# scenario: restart full custer +########################################## + + +# +# stop all nodes, one by one +# +--echo # One by one shutdown all nodes +--connection node_3 +--echo # shutdown node_3 +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.3/data/grastate.dat + +--connection node_2 +--echo # wait until node_3 is out of cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--echo # shutdown node_2 +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--echo # wait until node_2 is out of cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--echo # shutdown node_1 +--source include/shutdown_mysqld.inc + +# +# bootstap cluster in order node1 - node2 - node3 +# send some inserts and DDL after each node started +# +--sleep 5 +--echo # Bootstrap from node_1 +--connection node_1 +--let $restart_parameters = --wsrep_new_cluster +--source include/start_mysqld.inc + +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +ANALYZE TABLE t2; +--send CALL insert_row('node1', 100); + +--echo # Restart node_2 +--connection node_2 +--let $restart_parameters = +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--source include/start_mysqld.inc + +# +# connection node_1b may not be functional anymore, after node was +# shutdown, open node_1c for controlling node 1 state +# +--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1 +set wsrep_sync_wait=0; +--connection node_1c +--echo # wait until node_1 and node_2 are in cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--connection node_2 +ALTER TABLE t2 ADD COLUMN (k int); +--send CALL insert_row('node2', 100); + + +--echo # Restart node_3 +--connection node_3 +--let $restart_parameters = +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +--source include/start_mysqld.inc + +--connection node_1c +--echo # wait until all nodes are back in cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--echo after cluster restart + +# stop load for nodes 1 and 2 +--connection node_2 +--reap + +--connection node_1 +--reap + +# +# check gtid positions in all nodes +# +--connection node_1 +--echo node1 GTID +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_2 +--echo node2 GTID +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +--connection node_3 +--echo node3 GTID +show variables like 'wsrep_gtid_domain_id'; +show variables like '%gtid_binlog_pos%'; + +# +# check table size in all nodes +# +--connection node_1 +--echo table size in node1 +SELECT COUNT(*) FROM t1; + +--connection node_2 +--echo table size in node2 +SELECT COUNT(*) FROM t1; + +--connection node_3 +--echo table size in node3 +SELECT COUNT(*) FROM t1; + +# +# cleanups +# +--connection node_2 +call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node"); +call mtr.add_suppression("WSREP: Sending JOIN failed:.*"); +call mtr.add_suppression("Sending JOIN failed:.*"); +call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*"); +--connection node_3 +call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node"); +call mtr.add_suppression("WSREP: Sending JOIN failed:.*"); +call mtr.add_suppression("Sending JOIN failed:.*"); +call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*"); + +--echo # cleanup +--connection node_1 + +DROP PROCEDURE insert_row; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2' +--source include/wait_condition.inc +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2' +--source include/wait_condition.inc + +--disconnect node_3 +--disconnect node_2b +--disconnect node_1b +--disconnect node_1c diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh index 3dc7c0e3b69..2b05326e500 100644 --- a/scripts/wsrep_sst_mariabackup.sh +++ b/scripts/wsrep_sst_mariabackup.sh @@ -104,8 +104,10 @@ fi DATA="$WSREP_SST_OPT_DATA" INFO_FILE='xtrabackup_galera_info' +DONOR_INFO_FILE='donor_galera_info' IST_FILE='xtrabackup_ist' MAGIC_FILE="$DATA/$INFO_FILE" +DONOR_MAGIC_FILE="$DATA/$DONOR_INFO_FILE" INNOAPPLYLOG="$DATA/mariabackup.prepare.log" INNOMOVELOG="$DATA/mariabackup.move.log" @@ -651,14 +653,14 @@ get_stream() if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then strmcmd="'$STREAM_BIN' -x" else - strmcmd="'$STREAM_BIN' -c '$INFO_FILE'" + strmcmd="'$STREAM_BIN' -c '$INFO_FILE' '$DONOR_INFO_FILE'" fi else sfmt='tar' if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then strmcmd='tar xfi -' else - strmcmd="tar cf - '$INFO_FILE'" + strmcmd="tar cf - '$INFO_FILE' '$DONOR_INFO_FILE'" fi fi wsrep_log_info "Streaming with $sfmt" @@ -680,6 +682,7 @@ cleanup_at_exit() if [ $estatus -ne 0 ]; then wsrep_log_error "Removing $MAGIC_FILE file due to signal" [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || : + [ -f "$DONOR_MAGIC_FILE" ] && rm -f "$DONOR_MAGIC_FILE" || : fi if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then @@ -916,6 +919,7 @@ monitor_process() } [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" +[ -f "$DONOR_MAGIC_FILE" ] && rm -rf "$DONOR_MAGIC_FILE" read_cnf setup_ports @@ -1048,7 +1052,7 @@ send_magic() # Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id # (separated by a space). echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$MAGIC_FILE" - + echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$DONOR_MAGIC_FILE" if [ -n "$WSREP_SST_OPT_REMOTE_PSWD" ]; then # Let joiner know that we know its secret echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE" @@ -1594,9 +1598,16 @@ else # joiner exit 2 fi + # use donor magic file, if present + # if IST was used, donor magic file was not created # Remove special tags from the magic file, and from the output: - coords=$(head -n1 "$MAGIC_FILE") - wsrep_log_info "Galera co-ords from recovery: $coords" + if [ -r "$DONOR_MAGIC_FILE" ]; then + coords=$(head -n1 "$DONOR_MAGIC_FILE") + wsrep_log_info "Galera co-ords from donor: $coords" + else + coords=$(head -n1 "$MAGIC_FILE") + wsrep_log_info "Galera co-ords from recovery: $coords" + fi echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id wsrep_log_info "Total time on joiner: $totime seconds" diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index ccce076d8a9..8004e29c875 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -409,3 +409,8 @@ extern "C" void wsrep_thd_set_PA_unsafe(THD *thd) WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe"); } } + +extern "C" uint32 wsrep_get_domain_id() +{ + return wsrep_gtid_domain_id; +} diff --git a/sql/sql_plugin_services.inl b/sql/sql_plugin_services.inl index 60843a28fab..6b21f64bc64 100644 --- a/sql/sql_plugin_services.inl +++ b/sql/sql_plugin_services.inl @@ -181,7 +181,8 @@ static struct wsrep_service_st wsrep_handler = { wsrep_report_bf_lock_wait, wsrep_thd_kill_LOCK, wsrep_thd_kill_UNLOCK, - wsrep_thd_set_PA_unsafe + wsrep_thd_set_PA_unsafe, + wsrep_get_domain_id }; static struct thd_specifics_service_st thd_specifics_handler= diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index a67da77c472..da3812d29f5 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -161,3 +161,5 @@ void wsrep_report_bf_lock_wait(const THD*, void wsrep_thd_set_PA_unsafe(THD*) {} +uint32 wsrep_get_domain_id() +{ return 0;} diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 5a69bca0ae6..08a3d159e94 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -652,7 +652,9 @@ static void* sst_joiner_thread (void* a) else { // Read state ID (UUID:SEQNO) followed by wsrep_gtid_domain_id (if any). + unsigned long int domain_id= wsrep_gtid_domain_id; const char *pos= strchr(out, ' '); + WSREP_DEBUG("SST state ID tmp=%s out=%s pos=%p", tmp, out, pos); if (!pos) { @@ -662,6 +664,13 @@ static void* sst_joiner_thread (void* a) WSREP_WARN("Did not find domain ID from SST script output '%s'. " "Domain ID must be set manually to keep binlog consistent", out); + if (wsrep_gtid_domain_id) + { + WSREP_INFO("This node is configured to use wsrep_gtid_domain_id=%lu by user.", + domain_id); + wsrep_gtid_server.domain_id= (uint32)domain_id; + wsrep_gtid_domain_id= (uint32)domain_id; + } } err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); @@ -1688,6 +1697,8 @@ static int sst_flush_tables(THD* thd) char content[100]; snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); + WSREP_DEBUG("sst_flush_tables : %s:%lld %d", wsrep_cluster_state_uuid, + (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); err= sst_create_file(flush_success, content); if (err) From 362c0950e8999a88f5866fc7361e10615364e4a3 Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Mon, 23 Oct 2023 11:49:47 +0200 Subject: [PATCH 2/2] MDEV-32549 Cluster inconsistent after SAVEPOINT is rolled back Attempting to set a SAVEPOINT when one of the involved storage engines does not support savepoints, raises an error, and results in statement rollback. If Galera is enabled with binlog emulation, the above scenario was not handled correctly, and resulted in cluster wide inconsistency. The problem was in wsrep_register_binlog_handler(), which is called towards the beginning of SAVEPOINT execution. This function is supposed to mark the beginning of statement position in trx cache through `set_prev_position()`. However, it did so only on condition that `get_prev_position()` returns `MY_OFF_T_UNDEF`. This before statement position is typically reset to undefined at the end of statement in `binlog_commit()` / `binlog_rollback()`. However that's not the case with Galera and binlog emulation, for which binlog commit / rollback hooks are not called due to the optimization that avoids internal 2PC (MDEV-16509). Signed-off-by: Julius Goryavsky --- mysql-test/suite/galera/r/MDEV-32549.result | 24 ++++++++++++++++++ mysql-test/suite/galera/t/MDEV-32549.test | 28 +++++++++++++++++++++ sql/log.cc | 9 +++---- 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 mysql-test/suite/galera/r/MDEV-32549.result create mode 100644 mysql-test/suite/galera/t/MDEV-32549.test diff --git a/mysql-test/suite/galera/r/MDEV-32549.result b/mysql-test/suite/galera/r/MDEV-32549.result new file mode 100644 index 00000000000..65c81819d27 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-32549.result @@ -0,0 +1,24 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) engine=innodb; +CREATE TABLE t2 (f1 INTEGER PRIMARY KEY) engine=aria; +START TRANSACTION; +INSERT INTO t1 VALUES (1); +SELECT * FROM t2; +f1 +SAVEPOINT s1; +ERROR 42000: The storage engine for the table doesn't support SAVEPOINT +INSERT INTO t1 VALUES (2); +COMMIT; +connection node_1; +SELECT * FROM t1; +f1 +1 +2 +connection node_2; +SELECT * FROM t1; +f1 +1 +2 +connection node_1; +DROP TABLE t1,t2; diff --git a/mysql-test/suite/galera/t/MDEV-32549.test b/mysql-test/suite/galera/t/MDEV-32549.test new file mode 100644 index 00000000000..a2faa46da5f --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-32549.test @@ -0,0 +1,28 @@ +# +# MDEV-32549: Cluster is inconsitent after savepoint +# statement is rolled back +# +--source include/galera_cluster.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) engine=innodb; +CREATE TABLE t2 (f1 INTEGER PRIMARY KEY) engine=aria; + +START TRANSACTION; +INSERT INTO t1 VALUES (1); +SELECT * FROM t2; +--error ER_CHECK_NOT_IMPLEMENTED +SAVEPOINT s1; +INSERT INTO t1 VALUES (2); +COMMIT; + +--connection node_1 +SELECT * FROM t1; + +# If bug is present: only the second INSERT +# is replicated, causing an inconsistent +# cluster. +--connection node_2 +SELECT * FROM t1; + +--connection node_1 +DROP TABLE t1,t2; diff --git a/sql/log.cc b/sql/log.cc index aacf773fa65..3d3ac9b2b35 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -11257,12 +11257,9 @@ void wsrep_register_binlog_handler(THD *thd, bool trx) /* Set an implicit savepoint in order to be able to truncate a trx-cache. */ - if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) - { - my_off_t pos= 0; - binlog_trans_log_savepos(thd, &pos); - cache_mngr->trx_cache.set_prev_position(pos); - } + my_off_t pos= 0; + binlog_trans_log_savepos(thd, &pos); + cache_mngr->trx_cache.set_prev_position(pos); /* Set callbacks in order to be able to call commmit or rollback.