From 6c40590405da71a592a65b68e6b41b11437fb517 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Mon, 28 Jan 2019 00:26:23 +0100 Subject: [PATCH 01/13] use environment file in systemd units for _WSREP_START_POSITION We used to run `systemctl set-environment` to pass _WSREP_START_POSITION. This is bad because: * it clutter systemd's environment (yes, pid 1) * it requires root privileges * options (like LimitNOFILE=) are not applied Let's just create an environment file in ExecStartPre=, that is read before ExecStart= kicks in. We have _WSREP_START_POSITION around for the main process without any downsides. --- support-files/CMakeLists.txt | 2 ++ support-files/mariadb.service.in | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index 4c1db775c19..7f49fca212b 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -32,6 +32,8 @@ ELSE() SET(MYSQLD_GROUP "mysql") SET(ini_file_extension "cnf") SET(HOSTNAME "uname -n") + get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) + SET(mysqlunixdir ${MYSQL_UNIX_DIR}) ENDIF() # XXX: shouldn't we just have variables for all this stuff and centralise diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index b7a55596029..2f990df4a15 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,20 +70,20 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Execute pre and post scripts as root, otherwise it does it as User= -PermissionsStartOnly=true +# Use an environment file to pass variable _WSREP_START_POSITION +EnvironmentFile=-@mysqlunixdir@/wsrep-start-position @SYSTEMD_EXECSTARTPRE@ # Perform automatic wsrep recovery. When server is started without wsrep, # galera_recovery simply returns an empty string. In any case, however, # the script is not expected to return with a non-zero status. -# It is always safe to unset _WSREP_START_POSITION environment variable. +# It is always safe to remove @mysqlunixdir@/wsrep-start-position +# environment file. # Do not panic if galera_recovery script is not available. (MDEV-10538) -ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ VAR=`cd @bindir@/..; @bindir@/galera_recovery`; [ $? -eq 0 ] \ - && systemctl set-environment _WSREP_START_POSITION=$VAR || exit 1" + && echo _WSREP_START_POSITION=$VAR > @mysqlunixdir@/wsrep-start-position || exit 1" # Needed to create system tables etc. # ExecStartPre=@scriptdir@/mysql_install_db -u mysql @@ -96,7 +96,7 @@ ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ ExecStart=@sbindir@/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION # Unset _WSREP_START_POSITION environment variable. -ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" +ExecStartPost=/bin/rm -f @mysqlunixdir@/wsrep-start-position @SYSTEMD_EXECSTARTPOST@ From b54e4bf00b191633c74baad143c552bfb478fff3 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Tue, 29 Jan 2019 10:03:42 +0100 Subject: [PATCH 02/13] update galera_new_cluster to use environment file Now that the systemd unit files use an environment file to pass _WSREP_START_POSITION we have to update galera_new_cluster as well. --- scripts/CMakeLists.txt | 15 +++++++++++++++ scripts/galera_new_cluster.sh | 4 ++-- support-files/mariadb.service.in | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) mode change 100755 => 100644 scripts/galera_new_cluster.sh diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 358045c17ab..e41c4fce68f 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -190,6 +190,9 @@ ELSE() SET(localstatedir ${MYSQL_DATADIR}) ENDIF() +get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) +SET(mysqlunixdir ${MYSQL_UNIX_DIR}) + SET(resolveip_locations "$basedir/${INSTALL_BINDIR} $basedir/bin") SET(mysqld_locations "$basedir/${INSTALL_SBINDIR} $basedir/libexec $basedir/sbin $basedir/bin") SET(errmsg_locations "$basedir/${INSTALL_MYSQLSHAREDIR}/english $basedir/share/english $basedir/share/mysql/english") @@ -207,6 +210,18 @@ INSTALL_SCRIPT( DESTINATION ${INSTALL_SCRIPTDIR} COMPONENT Server ) + +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/galera_new_cluster.sh + ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster ESCAPE_QUOTES @ONLY) + EXECUTE_PROCESS( + COMMAND chmod +x ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster + ) + +INSTALL_SCRIPT( + "${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster" + DESTINATION ${INSTALL_SCRIPTDIR} + COMPONENT Server + ) ENDIF() SET(prefix "${CMAKE_INSTALL_PREFIX}") diff --git a/scripts/galera_new_cluster.sh b/scripts/galera_new_cluster.sh old mode 100755 new mode 100644 index ac9dcf42102..b6617378b90 --- a/scripts/galera_new_cluster.sh +++ b/scripts/galera_new_cluster.sh @@ -21,11 +21,11 @@ EOF exit 0 fi -systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster' && \ +echo _WSREP_NEW_CLUSTER='--wsrep-new-cluster' > @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" && \ systemctl restart ${1:-mariadb} extcode=$? -systemctl set-environment _WSREP_NEW_CLUSTER='' +rm -f @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" exit $extcode diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 2f990df4a15..19e2e16abeb 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,6 +70,9 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true +# Use an environment file to pass variable _WSREP_NEW_CLUSTER +EnvironmentFile=-@mysqlunixdir@/wsrep-new-cluster-%N + # Use an environment file to pass variable _WSREP_START_POSITION EnvironmentFile=-@mysqlunixdir@/wsrep-start-position From 277968aa4cfa46a290af1c44e9d756381005939a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 20 Jun 2023 14:57:04 +0300 Subject: [PATCH 03/13] MDEV-31413 : Node has been dropped from the cluster on Startup / Shutdown with async replica There was two related problems: (1) Galera node that is defined as a slave to async MariaDB master at restart might do SST (state stransfer) and part of that it will copy mysql.gtid_slave_pos table. Problem is that updates on that table are not replicated on a cluster. Therefore, table from donor that is not slave is copied and joiner looses gtid position it was and start executing events from wrong position of the binlog. This incorrect position could break replication and causes node to be dropped and requiring user action. (2) Slave sql thread might start executing events before galera is ready (wsrep_ready=ON) and that could also cause node to be dropped from the cluster. In this fix we enable replication of mysql.gtid_slave_pos table on a cluster. In this way all nodes in a cluster will know gtid slave position and even after SST joiner knows correct gtid position to start. Furthermore, we wait galera to be ready before slave sql thread executes any events to prevent too early execution. Signed-off-by: Julius Goryavsky --- .../r/galera_as_slave_gtid_myisam.result | 9 +- .../galera/r/galera_restart_replica.result | 122 ++++++++++ .../galera/t/galera_as_slave_gtid_myisam.cnf | 1 + .../galera/t/galera_as_slave_gtid_myisam.test | 12 +- .../suite/galera/t/galera_restart_replica.cnf | 20 ++ .../galera/t/galera_restart_replica.test | 212 ++++++++++++++++++ sql/rpl_gtid.cc | 33 ++- sql/slave.cc | 2 + sql/wsrep_mysqld.cc | 26 ++- sql/wsrep_mysqld.h | 4 +- sql/wsrep_priv.h | 4 +- sql/wsrep_server_service.cc | 7 +- 12 files changed, 433 insertions(+), 19 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_restart_replica.result create mode 100644 mysql-test/suite/galera/t/galera_restart_replica.cnf create mode 100644 mysql-test/suite/galera/t/galera_restart_replica.test diff --git a/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result b/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result index b498f334bf8..6559c7828be 100644 --- a/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result +++ b/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result @@ -17,12 +17,15 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1; LENGTH(@@global.gtid_binlog_state) > 1 1 connection node_1; +SELECT COUNT(*) AS EXPECT_1 FROM t1; +EXPECT_1 +1 gtid_binlog_state_equal 0 connection node_2; -SELECT COUNT(*) AS EXPECT_0 FROM t1; -EXPECT_0 -0 +SELECT COUNT(*) AS EXPECT_1 FROM t1; +EXPECT_1 +1 gtid_binlog_state_equal 0 #cleanup diff --git a/mysql-test/suite/galera/r/galera_restart_replica.result b/mysql-test/suite/galera/r/galera_restart_replica.result new file mode 100644 index 00000000000..9b7e9fd259f --- /dev/null +++ b/mysql-test/suite/galera/r/galera_restart_replica.result @@ -0,0 +1,122 @@ +connection node_2; +connection node_1; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +create user repl@'%' identified by 'repl'; +grant all on *.* to repl@'%'; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +connection node_1; +connection node_2; +connection node_2; +START SLAVE; +connection node_3; +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +# Verify that graceful shutdown succeeds. +# Force SST +connection node_1; +# Waiting until node_2 is not part of cluster anymore +connection node_2; +# Start node_2 again +¤ Wait until node_2 is back on cluster +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_3; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_3; +drop table t1; +connection node_2; +connection node_1; +connection node_3; +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +# Verify that graceful shutdown succeeds. +# Force SST +connection node_1; +# Waiting until node_2 is not part of cluster anymore +connection node_3; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_2; +# Start node_2 again +¤ Wait until node_2 is back on cluster +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_3; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_3; +drop table t1; +connection node_2; +connection node_1; +connection node_2; +STOP SLAVE; +RESET SLAVE ALL; +connection node_3; +RESET MASTER; +connection node_1; +disconnect node_3; +disconnect node_2; +disconnect node_1; +# End of test diff --git a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf index 01d2eb12630..d3f33bd1427 100644 --- a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf +++ b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf @@ -4,3 +4,4 @@ log-bin=mysqld-bin log-slave-updates binlog-format=ROW +wsrep-replicate-myisam=ON diff --git a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test index 8787f864a99..60476bc45a7 100644 --- a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test +++ b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test @@ -44,12 +44,20 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1; --let $wait_condition = SELECT COUNT(*) = 1 FROM t1; --source include/wait_condition.inc +SELECT COUNT(*) AS EXPECT_1 FROM t1; + --disable_query_log --eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal; --enable_query_log --connection node_2 -SELECT COUNT(*) AS EXPECT_0 FROM t1; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) AS EXPECT_1 FROM t1; --disable_query_log --eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal; @@ -59,8 +67,6 @@ SELECT COUNT(*) AS EXPECT_0 FROM t1; --connection node_3 DROP TABLE t1; ---sleep 1 - --connection node_1 --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc diff --git a/mysql-test/suite/galera/t/galera_restart_replica.cnf b/mysql-test/suite/galera/t/galera_restart_replica.cnf new file mode 100644 index 00000000000..6713e301527 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_restart_replica.cnf @@ -0,0 +1,20 @@ +!include ../galera_2nodes_as_slave.cnf + +[mysqld] +wsrep-debug=1 + +[mysqld.1] +server_id=15 +wsrep_gtid_mode=1 +wsrep_gtid_domain_id=16 +gtid_domain_id=11 +gtid_strict_mode=1 + +[mysqld.2] +skip-slave-start=OFF +server_id=15 +wsrep_gtid_mode=1 +wsrep_gtid_domain_id=16 +gtid_domain_id=11 +gtid_strict_mode=1 + diff --git a/mysql-test/suite/galera/t/galera_restart_replica.test b/mysql-test/suite/galera/t/galera_restart_replica.test new file mode 100644 index 00000000000..2cc3a1dcff2 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_restart_replica.test @@ -0,0 +1,212 @@ +# +# Test Galera as a replica to a MySQL async replication +# +# The galera/galera_2node_slave.cnf describes the setup of the nodes +# +--source include/big_test.inc +--source include/force_restart.inc +--source include/galera_cluster.inc +--source include/have_sequence.inc + +# As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it +# we open the node_3 connection here +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + +create user repl@'%' identified by 'repl'; +grant all on *.* to repl@'%'; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +--let $node_1 = node_1 +--let $node_2 = node_2 +--source include/auto_increment_offset_save.inc + +--connection node_2 +--disable_query_log +--eval CHANGE MASTER TO master_host='127.0.0.1', master_user='repl', master_password='repl', master_port=$NODE_MYPORT_3, master_use_gtid=slave_pos; +--enable_query_log +START SLAVE; + +--connection node_3 + +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--echo # Verify that graceful shutdown succeeds. +--source include/shutdown_mysqld.inc +--echo # Force SST +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--echo # Waiting until node_2 is not part of cluster anymore +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +--connection node_2 +--echo # Start node_2 again +--source include/start_mysqld.inc + +--echo ¤ Wait until node_2 is back on cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +--connection node_2 +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_3 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +# +# Cleanup +# +--connection node_3 +drop table t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +# +# Case 2 : While slave is down add writes to master +# + +--connection node_3 + +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--echo # Verify that graceful shutdown succeeds. +--source include/shutdown_mysqld.inc +--echo # Force SST +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--echo # Waiting until node_2 is not part of cluster anymore +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +# Add writes to master +--connection node_3 +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_20001_to_30000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_2 +--echo # Start node_2 again +--source include/start_mysqld.inc + +--echo ¤ Wait until node_2 is back on cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; +--source include/wait_condition.inc +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; +--source include/wait_condition.inc +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_3 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +# +# Cleanup +# +--connection node_3 +drop table t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_2 +STOP SLAVE; +RESET SLAVE ALL; + +--connection node_3 +RESET MASTER; + +--connection node_1 +--disconnect node_3 + +--source include/auto_increment_offset_restore.inc +--source include/galera_end.inc +--echo # End of test diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index e7ff8924874..b0c9a018ae8 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -28,6 +28,10 @@ #include "rpl_rli.h" #include "slave.h" #include "log_event.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" // wsrep_thd_is_local +#include "wsrep_trans_observer.h" // wsrep_start_trx_if_not_started +#endif const LEX_CSTRING rpl_gtid_slave_state_table_name= { STRING_WITH_LEN("gtid_slave_pos") }; @@ -690,10 +694,18 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, #ifdef WITH_WSREP /* - Updates in slave state table should not be appended to galera transaction - writeset. + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. */ - thd->wsrep_ignore_table= true; + if (WSREP_ON_ && wsrep_thd_is_local(thd)) + { + thd->wsrep_ignore_table= false; + wsrep_start_trx_if_not_started(thd); + } + else + { + thd->wsrep_ignore_table= true; + } #endif if (!in_transaction) @@ -859,9 +871,20 @@ rpl_slave_state::gtid_delete_pending(THD *thd, #ifdef WITH_WSREP /* - Updates in slave state table should not be appended to galera transaction - writeset. + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. */ + if (WSREP_ON_ && wsrep_thd_is_local(thd) && + thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + if (thd->wsrep_trx().active() == false) + { + if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + thd->set_query_id(next_query_id()); + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + thd->wsrep_ignore_table= false; + } thd->wsrep_ignore_table= true; #endif diff --git a/sql/slave.cc b/sql/slave.cc index 0b575b5f714..3fe89a95e5e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5484,6 +5484,8 @@ pthread_handler_t handle_slave_sql(void *arg) mysql_mutex_unlock(&rli->data_lock); #ifdef WITH_WSREP wsrep_open(thd); + if (WSREP_ON_) + wsrep_wait_ready(thd); if (wsrep_before_command(thd)) { WSREP_WARN("Slave SQL wsrep_before_command() failed"); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index a8247f977cf..a345f8c6b35 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2022 Codership Oy +/* Copyright 2008-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3151,3 +3151,27 @@ bool wsrep_consistency_check(THD *thd) { return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; } + +// Wait until wsrep has reached ready state +void wsrep_wait_ready(THD *thd) +{ + mysql_mutex_lock(&LOCK_wsrep_ready); + while(!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait(&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} + +void wsrep_ready_set(bool ready_value) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", ready_value); + mysql_mutex_lock(&LOCK_wsrep_ready); + wsrep_ready= ready_value; + // Signal if we have reached ready state + if (wsrep_ready) + mysql_cond_signal(&COND_wsrep_ready); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 201e3e1a44a..3d06f165c5c 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -1,4 +1,4 @@ -/* Copyright 2008-2022 Codership Oy +/* Copyright 2008-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -442,6 +442,8 @@ enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit); wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, enum wsrep::key::type type); +void wsrep_wait_ready(THD *thd); +void wsrep_ready_set(bool ready_value); #else /* !WITH_WSREP */ /* These macros are needed to compile MariaDB without WSREP support diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h index e88d3c862a0..e707ec79c95 100644 --- a/sql/wsrep_priv.h +++ b/sql/wsrep_priv.h @@ -1,4 +1,4 @@ -/* Copyright 2010 Codership Oy +/* Copyright 2010-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,8 +22,6 @@ #include "wsrep_api.h" #include "wsrep/server_state.hpp" -my_bool wsrep_ready_set (my_bool x); - ssize_t wsrep_sst_prepare (void** msg); wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index 71f5d20feba..7bf9851c25b 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -342,6 +342,7 @@ void Wsrep_server_service::log_state_change( case Wsrep_server_state::s_synced: wsrep_ready= TRUE; WSREP_INFO("Synchronized with group, ready for connections"); + wsrep_ready_set(true); /* fall through */ case Wsrep_server_state::s_joined: case Wsrep_server_state::s_donor: @@ -349,16 +350,16 @@ void Wsrep_server_service::log_state_change( break; case Wsrep_server_state::s_connected: wsrep_cluster_status= "non-Primary"; - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_connected= TRUE; break; case Wsrep_server_state::s_disconnected: - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_connected= FALSE; wsrep_cluster_status= "Disconnected"; break; default: - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_cluster_status= "non-Primary"; break; } From 48e6918c94d3a2caa99feb4b1195b9d6e2739581 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 8 Aug 2023 14:01:36 +0100 Subject: [PATCH 04/13] Revert "update galera_new_cluster to use environment file" This reverts commit b54e4bf00b191633c74baad143c552bfb478fff3. --- scripts/CMakeLists.txt | 15 --------------- scripts/galera_new_cluster.sh | 4 ++-- support-files/mariadb.service.in | 3 --- 3 files changed, 2 insertions(+), 20 deletions(-) mode change 100644 => 100755 scripts/galera_new_cluster.sh diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index e41c4fce68f..358045c17ab 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -190,9 +190,6 @@ ELSE() SET(localstatedir ${MYSQL_DATADIR}) ENDIF() -get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) -SET(mysqlunixdir ${MYSQL_UNIX_DIR}) - SET(resolveip_locations "$basedir/${INSTALL_BINDIR} $basedir/bin") SET(mysqld_locations "$basedir/${INSTALL_SBINDIR} $basedir/libexec $basedir/sbin $basedir/bin") SET(errmsg_locations "$basedir/${INSTALL_MYSQLSHAREDIR}/english $basedir/share/english $basedir/share/mysql/english") @@ -210,18 +207,6 @@ INSTALL_SCRIPT( DESTINATION ${INSTALL_SCRIPTDIR} COMPONENT Server ) - -CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/galera_new_cluster.sh - ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster ESCAPE_QUOTES @ONLY) - EXECUTE_PROCESS( - COMMAND chmod +x ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster - ) - -INSTALL_SCRIPT( - "${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster" - DESTINATION ${INSTALL_SCRIPTDIR} - COMPONENT Server - ) ENDIF() SET(prefix "${CMAKE_INSTALL_PREFIX}") diff --git a/scripts/galera_new_cluster.sh b/scripts/galera_new_cluster.sh old mode 100644 new mode 100755 index b6617378b90..ac9dcf42102 --- a/scripts/galera_new_cluster.sh +++ b/scripts/galera_new_cluster.sh @@ -21,11 +21,11 @@ EOF exit 0 fi -echo _WSREP_NEW_CLUSTER='--wsrep-new-cluster' > @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" && \ +systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster' && \ systemctl restart ${1:-mariadb} extcode=$? -rm -f @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" +systemctl set-environment _WSREP_NEW_CLUSTER='' exit $extcode diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 19e2e16abeb..2f990df4a15 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,9 +70,6 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Use an environment file to pass variable _WSREP_NEW_CLUSTER -EnvironmentFile=-@mysqlunixdir@/wsrep-new-cluster-%N - # Use an environment file to pass variable _WSREP_START_POSITION EnvironmentFile=-@mysqlunixdir@/wsrep-start-position From 161ce045a71e306768d4609bdc35788fa5ea2a71 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 8 Aug 2023 14:01:47 +0100 Subject: [PATCH 05/13] Revert "use environment file in systemd units for _WSREP_START_POSITION" This reverts commit 6c40590405da71a592a65b68e6b41b11437fb517. --- support-files/CMakeLists.txt | 2 -- support-files/mariadb.service.in | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index 7f49fca212b..4c1db775c19 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -32,8 +32,6 @@ ELSE() SET(MYSQLD_GROUP "mysql") SET(ini_file_extension "cnf") SET(HOSTNAME "uname -n") - get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) - SET(mysqlunixdir ${MYSQL_UNIX_DIR}) ENDIF() # XXX: shouldn't we just have variables for all this stuff and centralise diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 2f990df4a15..b7a55596029 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,20 +70,20 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Use an environment file to pass variable _WSREP_START_POSITION -EnvironmentFile=-@mysqlunixdir@/wsrep-start-position +# Execute pre and post scripts as root, otherwise it does it as User= +PermissionsStartOnly=true @SYSTEMD_EXECSTARTPRE@ # Perform automatic wsrep recovery. When server is started without wsrep, # galera_recovery simply returns an empty string. In any case, however, # the script is not expected to return with a non-zero status. -# It is always safe to remove @mysqlunixdir@/wsrep-start-position -# environment file. +# It is always safe to unset _WSREP_START_POSITION environment variable. # Do not panic if galera_recovery script is not available. (MDEV-10538) +ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ VAR=`cd @bindir@/..; @bindir@/galera_recovery`; [ $? -eq 0 ] \ - && echo _WSREP_START_POSITION=$VAR > @mysqlunixdir@/wsrep-start-position || exit 1" + && systemctl set-environment _WSREP_START_POSITION=$VAR || exit 1" # Needed to create system tables etc. # ExecStartPre=@scriptdir@/mysql_install_db -u mysql @@ -96,7 +96,7 @@ ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ ExecStart=@sbindir@/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION # Unset _WSREP_START_POSITION environment variable. -ExecStartPost=/bin/rm -f @mysqlunixdir@/wsrep-start-position +ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" @SYSTEMD_EXECSTARTPOST@ From e9333ff03cba05cff82a325d9d44720546fc7cb9 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 10 Aug 2023 16:13:32 +0300 Subject: [PATCH 06/13] MDEV-31893 Valgrind reports issues in main.join_cache_notasan This is also related to MDEV-31348 Assertion `last_key_entry >= end_pos' failed in virtual bool JOIN_CACHE_HASHED::put_record() Valgrind exposed a problem with the join_cache for hash joins: =25636== Conditional jump or move depends on uninitialised value(s) ==25636== at 0xA8FF4E: JOIN_CACHE_HASHED::init_hash_table() (sql_join_cache.cc:2901) The reason for this was that avg_record_length contained a random value if one had used SET optimizer_switch='optimize_join_buffer_size=off'. This causes either 'random size' memory to be allocated (up to join_buffer_size) which can increase memory usage or, if avg_record_length is less than the row size, memory overwrites in thd->mem_root, which is bad. Fixed by setting avg_record_length in JOIN_CACHE_HASHED::init() before it's used. There is no test case for MDEV-31893 as valgrind of join_cache_notasan checks that. I added a test case for MDEV-31348. --- include/no_valgrind_without_big.inc | 13 +++++++++++ mysql-test/main/join_cache.result | 16 ++++++++++++++ mysql-test/main/join_cache.test | 19 ++++++++++++++++ mysql-test/main/join_cache_notasan.test | 3 +++ sql/sql_join_cache.cc | 29 +++++++++++++++---------- sql/sql_join_cache.h | 1 + 6 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 include/no_valgrind_without_big.inc diff --git a/include/no_valgrind_without_big.inc b/include/no_valgrind_without_big.inc new file mode 100644 index 00000000000..ea1f2ac91ab --- /dev/null +++ b/include/no_valgrind_without_big.inc @@ -0,0 +1,13 @@ +# include/no_valgrind_without_big.inc +# +# If we are running with Valgrind ($VALGRIND_TEST <> 0) than the resource +# consumption (storage space needed, runtime ...) will be extreme. +# Therefore we require that the option "--big-test" is also set. +# + +if ($VALGRIND_TEST) { + if (!$BIG_TEST) + { + --skip Need "--big-test" when running with Valgrind + } +} diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result index 20980d09001..92c04bb002b 100644 --- a/mysql-test/main/join_cache.result +++ b/mysql-test/main/join_cache.result @@ -6256,3 +6256,19 @@ length(concat(t1.f,t2.f)) DROP TABLE t; set @@optimizer_switch=@org_optimizer_switch; set @@join_buffer_size=@org_join_buffer_size; +# +# MDEV-31348 Assertion `last_key_entry >= end_pos' failed in +# virtual bool JOIN_CACHE_HASHED::put_record() +# +SET JOIN_buffer_size=1; +Warnings: +Warning 1292 Truncated incorrect join_buffer_size value: '1' +SET SESSION JOIN_cache_level=4; +SET SESSION optimizer_switch='optimize_JOIN_buffer_size=OFF'; +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +ERROR HY001: Could not create a join buffer. Please check and adjust the value of the variables 'JOIN_BUFFER_SIZE (128)' and 'JOIN_BUFFER_SPACE_LIMIT (2097152)' +SET JOIN_buffer_size=16384; +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +# +# End of 10.4 tests +# diff --git a/mysql-test/main/join_cache.test b/mysql-test/main/join_cache.test index 43ce3b97ad1..f8723447efe 100644 --- a/mysql-test/main/join_cache.test +++ b/mysql-test/main/join_cache.test @@ -4231,3 +4231,22 @@ SELECT length(concat(t1.f,t2.f)) FROM t t1, t t2; DROP TABLE t; set @@optimizer_switch=@org_optimizer_switch; set @@join_buffer_size=@org_join_buffer_size; + +--echo # +--echo # MDEV-31348 Assertion `last_key_entry >= end_pos' failed in +--echo # virtual bool JOIN_CACHE_HASHED::put_record() +--echo # + +SET JOIN_buffer_size=1; +SET SESSION JOIN_cache_level=4; +SET SESSION optimizer_switch='optimize_JOIN_buffer_size=OFF'; +--error ER_OUTOFMEMORY +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +SET JOIN_buffer_size=16384; +--disable_result_log +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +--enable_result_log + +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/mysql-test/main/join_cache_notasan.test b/mysql-test/main/join_cache_notasan.test index cfdfe4eff18..406303ef7b3 100644 --- a/mysql-test/main/join_cache_notasan.test +++ b/mysql-test/main/join_cache_notasan.test @@ -2,7 +2,10 @@ # Tests that should be in join_cache but cannot be run with ASAN --source include/have_64bit.inc +# Disable asan it asan builds crashes when trying to allocate too much memory --source include/not_asan.inc +# Valgrind is useful here, but very slow as lots of memory is allocated +--source include/no_valgrind_without_big.inc --source include/have_innodb.inc --echo # diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 1319fd59a99..f1dd23d9618 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -800,6 +800,18 @@ size_t JOIN_CACHE::get_min_join_buffer_size() } +size_t JOIN_CACHE::calc_avg_record_length() +{ + size_t len= 0; + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + len+= tab->get_used_fieldlength(); + } + len+= get_record_max_affix_length(); + return len; +} + /* Get the maximum possible size of the cache join buffer @@ -822,9 +834,9 @@ size_t JOIN_CACHE::get_min_join_buffer_size() 'max_buff_size' in order to use it directly at the next invocations of the function. - RETURN VALUE - The maximum possible size of the join buffer of this cache + The maximum possible size of the join buffer of this cache + avg_record_length is also updated if optimize_buff_size != 0 */ size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size, @@ -839,19 +851,13 @@ size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size, return max_buff_size= limit_sz; size_t max_sz; - size_t len= 0; + size_t len; double max_records, partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality(); /* Expected join buffer space used for one record */ size_t space_per_record; - for (JOIN_TAB *tab= start_tab; tab != join_tab; - tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) - { - len+= tab->get_used_fieldlength(); - } - len+= get_record_max_affix_length(); - avg_record_length= len; + len= avg_record_length= calc_avg_record_length(); len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; space_per_record= len; @@ -2786,7 +2792,6 @@ bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) int JOIN_CACHE_HASHED::init(bool for_explain) { TABLE_REF *ref= &join_tab->ref; - DBUG_ENTER("JOIN_CACHE_HASHED::init"); hash_table= 0; @@ -2873,6 +2878,8 @@ int JOIN_CACHE_HASHED::init_hash_table() hash_table= 0; key_entries= 0; + avg_record_length= calc_avg_record_length(); + /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); for (size_of_key_ofs= 2; diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index 8bdce1bd592..b0cfb674ef9 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -130,6 +130,7 @@ protected: case 4: int4store(ptr, (uint32) ofs); return; } } + size_t calc_avg_record_length(); /* The maximum total length of the fields stored for a record in the cache. From 5055490c1798aa3385ab5d8d7f5f87d665e9af40 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Thu, 3 Aug 2023 14:20:47 +0200 Subject: [PATCH 07/13] MDEV-381: fdatasync() does not correctly flush growing binlog file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert the old work-around for buggy fdatasync() on Linux ext3. This bug was fixed in Linux > 10 years ago back to kernel version at least 3.0. Reviewed-by: Marko Mäkelä Signed-off-by: Kristian Nielsen --- include/my_sys.h | 1 - mysys/my_sync.c | 18 ++---------------- sql/backup.cc | 3 +-- sql/log.cc | 12 ++++++------ 4 files changed, 9 insertions(+), 25 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 017968ab6ce..3c44b6a0a80 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -92,7 +92,6 @@ typedef struct my_aio_result { #define MY_THREADSAFE 2048U /* my_seek(): lock fd mutex */ #define MY_SYNC 4096U /* my_copy(): sync dst file */ #define MY_SYNC_DIR 32768U /* my_create/delete/rename: sync directory */ -#define MY_SYNC_FILESIZE 65536U /* my_sync(): safe sync when file is extended */ #define MY_THREAD_SPECIFIC 0x10000U /* my_malloc(): thread specific */ #define MY_THREAD_MOVE 0x20000U /* realloc(); Memory can move */ /* Tree that should delete things automatically */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 736d97d14ef..6f8760c3183 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -49,13 +49,6 @@ void thr_set_sync_wait_callback(void (*before_wait)(void), (which is correct behaviour, if we know that the other thread synced the file before closing) - MY_SYNC_FILESIZE is useful when syncing a file after it has been extended. - On Linux, fdatasync() on ext3/ext4 file systems does not properly flush - to disk the inode data required to preserve the added data across a crash - (this looks to be a bug). But when a file is extended, inode data will most - likely need flushing in any case, so passing MY_SYNC_FILESIZE as flags - is not likely to be any slower, and will be crash safe on Linux ext3/ext4. - RETURN 0 ok -1 error @@ -88,12 +81,8 @@ int my_sync(File fd, myf my_flags) DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); #endif #if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC - if (!(my_flags & MY_SYNC_FILESIZE)) - res= fdatasync(fd); - else - { -#endif -#if defined(HAVE_FSYNC) + res= fdatasync(fd); +#elif defined(HAVE_FSYNC) res= fsync(fd); if (res == -1 && errno == ENOLCK) res= 0; /* Result Bug in Old FreeBSD */ @@ -102,9 +91,6 @@ int my_sync(File fd, myf my_flags) #else #error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ -#endif -#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC - } #endif } while (res == -1 && errno == EINTR); diff --git a/sql/backup.cc b/sql/backup.cc index d3f2e453a85..d45c18a3cdf 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -311,8 +311,7 @@ static bool backup_block_commit(THD *thd) if (mysql_bin_log.is_open()) { mysql_mutex_lock(mysql_bin_log.get_log_lock()); - mysql_file_sync(mysql_bin_log.get_log_file()->file, - MYF(MY_WME|MY_SYNC_FILESIZE)); + mysql_file_sync(mysql_bin_log.get_log_file()->file, MYF(MY_WME)); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } thd->clear_error(); diff --git a/sql/log.cc b/sql/log.cc index 06ba27c4b79..726598affe3 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3730,7 +3730,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, bytes_written+= description_event_for_queue->data_written; } if (flush_io_cache(&log_file) || - mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(log_file.file, MYF(MY_WME))) goto err; my_off_t offset= my_b_tell(&log_file); @@ -3768,7 +3768,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, strlen(log_file_name)) || my_b_write(&index_file, (uchar*) "\n", 1) || flush_io_cache(&index_file) || - mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(index_file.file, MYF(MY_WME))) goto err; #ifdef HAVE_REPLICATION @@ -3908,7 +3908,7 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) } /* The following will either truncate the file or fill the end with \n' */ if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) || - mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(file, MYF(MY_WME))) goto err; /* Reset data in old index cache */ @@ -4702,7 +4702,7 @@ int MYSQL_BIN_LOG::sync_purge_index_file() if (unlikely((error= flush_io_cache(&purge_index_file))) || unlikely((error= my_sync(purge_index_file.file, - MYF(MY_WME | MY_SYNC_FILESIZE))))) + MYF(MY_WME))))) DBUG_RETURN(error); DBUG_RETURN(error); @@ -5462,7 +5462,7 @@ bool MYSQL_BIN_LOG::flush_and_sync(bool *synced) if (sync_period && ++sync_counter >= sync_period) { sync_counter= 0; - err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE)); + err= mysql_file_sync(fd, MYF(MY_WME)); if (synced) *synced= 1; #ifndef DBUG_OFF @@ -6142,7 +6142,7 @@ MYSQL_BIN_LOG::write_state_to_file() log_inited= false; if ((err= end_io_cache(&cache))) goto err; - if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE)))) + if ((err= mysql_file_sync(file_no, MYF(MY_WME)))) goto err; goto end; From b2e312b0558cd9af6d8426412a3827e4e264b7b0 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Tue, 8 Aug 2023 16:10:31 +0200 Subject: [PATCH 08/13] MDEV-23021: rpl.rpl_parallel_optimistic_until fails in Buildbot The test case accessed slave-relay-bin.000003 without waiting for the IO thread to write it first. If the IO thread was slow, this could fail. Signed-off-by: Kristian Nielsen --- .../rpl/t/rpl_parallel_optimistic_until.test | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test index 508213c9075..0797e8bf220 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test @@ -263,13 +263,34 @@ BEGIN; START SLAVE IO_THREAD; --source include/wait_for_slave_io_to_start.inc -# The following test sets the stop coordinate is set to inside the first event +# The following test sets the stop coordinate to inside the first event # of a relay log that holds events of a transaction started in an earlier log. # Peek the stop position in the middle of trx1, not even on a event boundary. --let $pos_until=255 --let $file_rl=slave-relay-bin.000003 --let $binlog_file=$file_rl +# Wait for the IO thread to write the trx1 to the relaylog before querying it. +# (wait_for_slave_param.inc isn't flexible enough, so do it manually.) +--let $continue= 1 +--let $count=600 +while ($continue) +{ + --let $cur_file= query_get_value(SHOW SLAVE STATUS, 'Master_Log_File', 1) + --let $cur_pos= query_get_value(SHOW SLAVE STATUS, 'Read_Master_Log_Pos', 1) + --let $continue= `SELECT '$cur_file' = '$fil_1' AND $cur_pos < $pos_trx1` + if ($continue) + { + --dec $count + if (!$count) + { + --echo **** ERROR: timeout waiting for Read_Master_Log_Pos($cur_pos) >= $pos_trx1 (file='$cur_file') ****" + --die Timeout waiting for IO thread to write master events to the relaylog + } + --sleep 0.1 + } +} + --let $pos_xid=508 --let $info= query_get_value(SHOW RELAYLOG EVENTS IN '$file_rl' FROM $pos_xid LIMIT 1, Info, 1) From 646eb7be497fd8da173b937ef83d7f1b72ca64d2 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Fri, 11 Aug 2023 07:13:35 +0200 Subject: [PATCH 09/13] galera: wsrep-lib submodule update --- wsrep-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wsrep-lib b/wsrep-lib index e238c0d240c..173693f2eeb 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit e238c0d240c2557229b0523a4a032f3cf8b41639 +Subproject commit 173693f2eeb61054424233fe85fde4086bed36be From dd19ba188c6fbcd1b385eac34553aaceca42fc29 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:43:36 -0400 Subject: [PATCH 10/13] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8c23df65833..203fc146cec 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=4 -MYSQL_VERSION_PATCH=31 +MYSQL_VERSION_PATCH=32 SERVER_MATURITY=stable From d84df2b878bf1bb61f1d4c7e474615b2c299722d Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:46:16 -0400 Subject: [PATCH 11/13] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e1357a38a48..0f8a4608896 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=22 +MYSQL_VERSION_PATCH=23 SERVER_MATURITY=stable From 1fa7c9a3cdeded4cae85e8e0869b67da4b617065 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 21 Jul 2023 15:19:38 +0400 Subject: [PATCH 12/13] MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) Field_varstring::get_copy_func() did not take into account that functions do_varstring1[_mb], do_varstring2[_mb] do not support compressed data. Changing the return value of Field_varstring::get_copy_func() to `do_field_string` if there is a compresion and truncation at the same time. This fixes the problem, so now it works as follows: - val_str() uncompresses the data - The prefix is then calculated on the uncompressed data Additionally, introducing two new copying functions - do_varstring1_no_truncation() - do_varstring2_no_truncation() Using new copying functions in cases when: - a Field_varstring with length_bytes==1 is changing to a longer Field_varstring with length_bytes==1 - a Field_varstring with length_bytes==2 is changing to a longer Field_varstring with length_bytes==2 In these cases we don't care neither of compression nor of multi-byte prefixes: the entire data gets fully copied from the source column to the target column as is. This is a kind of new optimization, but this also was needed to preserve existing MTR test results. --- mysql-test/main/column_compression.result | 238 ++++++++++++++++++++++ mysql-test/main/column_compression.test | 175 ++++++++++++++++ sql/field_conv.cc | 49 +++++ 3 files changed, 462 insertions(+) diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index 5ea981cfdc3..e3bccd5f0de 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -2672,3 +2672,241 @@ SET column_compression_threshold=0; INSERT INTO t1 VALUES('aa'); SET column_compression_threshold=DEFAULT; DROP TABLE t1; +# +# MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) +# +CREATE TABLE t1 ( +id int(10) unsigned not null, +txt varchar(5000) COMPRESSED NOT NULL DEFAULT '', +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; +CREATE TABLE t2 ( +id int(10) unsigned not null, +n1 bigint(20) NOT NULL, +n2 bigint(20) NOT NULL, +n3 bigint(20) NOT NULL, +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; +INSERT INTO t1 VALUES +(1, 'short string < 100 chars'), +(2, 'long string = 99 chars '), +(3, 'long string = 100 chars !'), +(4, 'long string = 101 chars !'); +INSERT INTO t2 VALUES +(1, 24, 1, 1), +(2, 99, 2, 2), +(3, 100, 3, 3), +(4, 101, 4, 4); +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +DROP TABLE t1, t2; +CREATE OR REPLACE TABLE t1 ( +id INT NOT NULL PRIMARY KEY, +txt varchar(5000) COMPRESSED NOT NULL DEFAULT '' +) CHARSET=utf8mb3; +INSERT INTO t1 VALUES +(1, REPEAT('a', 10)), +(2, REPEAT('b', 99)), +(3, REPEAT('c', 100)), +(4, REPEAT('d', 121)); +SELECT txt, sysdate(6) FROM t1 ORDER BY 2; +txt sysdate(6) +aaaaaaaaaa +bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd +DROP TABLE t1; +CREATE FUNCTION f1(imax INT, jmax INT) RETURNS TEXT +BEGIN +DECLARE res TEXT DEFAULT 'x'; +FOR i IN 0..imax +DO +FOR j IN 0..jmax +DO +SET res=CONCAT(res, ' ', i, ' ', j); +END FOR; +END FOR; +RETURN res; +END; +$$ +SET @@column_compression_threshold=32; +# VARCHAR1, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +4 x 0 +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (REPEAT('a',254)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +254 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +4 aaaa +DROP TABLE t1; +# VARCHAR1, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(250) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(254) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +DROP TABLE t1; +# VARCHAR2, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 1 +Column_decompressions 1 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +256 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 1 17 1 18 1 19 1 20 1 21 1 22 +DROP TABLE t1; +# VARCHAR2, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a, 30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(32001) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a, 30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +DROP TABLE t1; +# VARCHAR1, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(1) COMPRESSED CHARACTER SET utf8mb3; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +1 x +DROP TABLE t1; +# VARCHAR1, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(81) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +DROP TABLE t1; +# VARCHAR2, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET utf8mb3; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 1 +Column_decompressions 1 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +256 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 1 17 1 18 1 19 1 20 1 21 1 22 +DROP TABLE t1; +# VARCHAR2, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(10001) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +DROP TABLE t1; +SET @@column_compression_threshold=DEFAULT; +DROP FUNCTION f1; +# +# End of 10.4 tests +# diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index 1560b23ad3e..29e0218749c 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -266,3 +266,178 @@ SET column_compression_threshold=0; INSERT INTO t1 VALUES('aa'); SET column_compression_threshold=DEFAULT; DROP TABLE t1; + + +--echo # +--echo # MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) +--echo # + +CREATE TABLE t1 ( + id int(10) unsigned not null, + txt varchar(5000) COMPRESSED NOT NULL DEFAULT '', + PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; + +CREATE TABLE t2 ( + id int(10) unsigned not null, + n1 bigint(20) NOT NULL, + n2 bigint(20) NOT NULL, + n3 bigint(20) NOT NULL, + PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; + +INSERT INTO t1 VALUES +(1, 'short string < 100 chars'), +(2, 'long string = 99 chars '), +(3, 'long string = 100 chars !'), +(4, 'long string = 101 chars !'); + +INSERT INTO t2 VALUES +(1, 24, 1, 1), +(2, 99, 2, 2), +(3, 100, 3, 3), +(4, 101, 4, 4); + +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id; +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id; +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id ORDER BY v.n1; + +DROP TABLE t1, t2; + +CREATE OR REPLACE TABLE t1 ( + id INT NOT NULL PRIMARY KEY, + txt varchar(5000) COMPRESSED NOT NULL DEFAULT '' +) CHARSET=utf8mb3; + +INSERT INTO t1 VALUES +(1, REPEAT('a', 10)), +(2, REPEAT('b', 99)), +(3, REPEAT('c', 100)), +(4, REPEAT('d', 121)); + +--replace_column 2 +--sorted_result +SELECT txt, sysdate(6) FROM t1 ORDER BY 2; +DROP TABLE t1; + + +DELIMITER $$; +CREATE FUNCTION f1(imax INT, jmax INT) RETURNS TEXT +BEGIN + DECLARE res TEXT DEFAULT 'x'; + FOR i IN 0..imax + DO + FOR j IN 0..jmax + DO + SET res=CONCAT(res, ' ', i, ' ', j); + END FOR; + END FOR; + RETURN res; +END; +$$ +DELIMITER ;$$ + + +# Let's override the default threshold (100) to force +# comression for VARCHAR1+MB, for example, for: +# VARCHAR(80) CHARACTER SET utf8mb3 + +SET @@column_compression_threshold=32; + +--echo # VARCHAR1, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (REPEAT('a',254)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(250) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(254) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(32001) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(1) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(81) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(10001) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +SET @@column_compression_threshold=DEFAULT; + +DROP FUNCTION f1; + +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/sql/field_conv.cc b/sql/field_conv.cc index 7ec93e032e6..7e60e37a36e 100644 --- a/sql/field_conv.cc +++ b/sql/field_conv.cc @@ -532,6 +532,40 @@ static void do_expand_string(Copy_field *copy) } +/* + Copy from a Field_varstring with length_bytes==1 + into another Field_varstring with length_bytes==1 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring1_no_truncation(Copy_field *copy) +{ + uint length= (uint) *(uchar*) copy->from_ptr; + DBUG_ASSERT(length <= copy->to_length - 1); + *(uchar*) copy->to_ptr= (uchar) length; + memcpy(copy->to_ptr+1, copy->from_ptr + 1, length); +} + +/* + Copy from a Field_varstring with length_bytes==2 + into another Field_varstring with length_bytes==2 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring2_no_truncation(Copy_field *copy) +{ + uint length= uint2korr(copy->from_ptr); + DBUG_ASSERT(length <= copy->to_length - HA_KEY_BLOB_LENGTH); + int2store(copy->to_ptr, length); + memcpy(copy->to_ptr + HA_KEY_BLOB_LENGTH, + copy->from_ptr + HA_KEY_BLOB_LENGTH, length); +} + + static void do_varstring1(Copy_field *copy) { uint length= (uint) *(uchar*) copy->from_ptr; @@ -776,6 +810,21 @@ Field::Copy_func *Field_varstring::get_copy_func(const Field *from) const length_bytes != ((const Field_varstring*) from)->length_bytes || !compression_method() != !from->compression_method()) return do_field_string; + + if (field_length >= from->field_length) + return length_bytes == 1 ? do_varstring1_no_truncation : + do_varstring2_no_truncation; + + if (compression_method()) + { + /* + Truncation is going to happen, so we need to calculate prefixes. + Can't calculate prefixes directly on compressed data, + need to go through val_str() to uncompress. + */ + return do_field_string; + } + return length_bytes == 1 ? (from->charset()->mbmaxlen == 1 ? do_varstring1 : do_varstring1_mb) : (from->charset()->mbmaxlen == 1 ? do_varstring2 : do_varstring2_mb); From 9c8ae6dca50534eeb08a9f8837f4e28c80688740 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Tue, 15 Aug 2023 09:36:38 +0400 Subject: [PATCH 13/13] MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column Fix issue was earlier fixed by MDEV-31724. Only adding MTR tests. --- mysql-test/main/column_compression.result | 11 +++++++++++ mysql-test/main/column_compression.test | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index e3bccd5f0de..bea6f2d3998 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -2908,5 +2908,16 @@ DROP TABLE t1; SET @@column_compression_threshold=DEFAULT; DROP FUNCTION f1; # +# MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column +# +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=MyISAM; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=InnoDB; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; +# # End of 10.4 tests # diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index 29e0218749c..84870f539e8 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -438,6 +438,21 @@ SET @@column_compression_threshold=DEFAULT; DROP FUNCTION f1; +--echo # +--echo # MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column +--echo # + +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=MyISAM; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=InnoDB; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; + + --echo # --echo # End of 10.4 tests --echo #