From b4ace139a11a312ea4d32509295b45030ff8f119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Aug 2023 12:14:31 +0300 Subject: [PATCH 1/8] Remove the often-hanging test innodb.alter_rename_files The test innodb.alter_rename_files rather frequently hangs in checkpoint_set_now. The test was removed in MariaDB Server 10.5 commit 37e7bde12abddcda4d5505450e39a739561bd4d5 when the code that it aimed to cover was simplified. Starting with MariaDB Server 10.5 the page flushing and log checkpointing is much simpler, handled by the single buf_flush_page_cleaner() thread. Let us remove the test to avoid occasional failures. We are not going to fix the cause of the failure in MariaDB Server 10.4. --- .../suite/innodb/r/alter_rename_files.result | 19 ------------ .../suite/innodb/t/alter_rename_files.test | 31 ------------------- 2 files changed, 50 deletions(-) delete mode 100644 mysql-test/suite/innodb/r/alter_rename_files.result delete mode 100644 mysql-test/suite/innodb/t/alter_rename_files.test diff --git a/mysql-test/suite/innodb/r/alter_rename_files.result b/mysql-test/suite/innodb/r/alter_rename_files.result deleted file mode 100644 index 4e373d95fad..00000000000 --- a/mysql-test/suite/innodb/r/alter_rename_files.result +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE t1 (x INT NOT NULL UNIQUE KEY) ENGINE=InnoDB; -INSERT INTO t1 VALUES(5); -SET GLOBAL innodb_log_checkpoint_now=TRUE; -SET DEBUG_SYNC='commit_cache_rebuild SIGNAL ready WAIT_FOR finish'; -ALTER TABLE t1 FORCE;; -connect con1,localhost,root,,; -SET DEBUG_SYNC='now WAIT_FOR ready'; -SET GLOBAL innodb_log_checkpoint_now=TRUE; -SET DEBUG_SYNC='now SIGNAL finish'; -disconnect con1; -connection default; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `x` int(11) NOT NULL, - UNIQUE KEY `x` (`x`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -DROP TABLE t1; -SET DEBUG_SYNC='RESET'; diff --git a/mysql-test/suite/innodb/t/alter_rename_files.test b/mysql-test/suite/innodb/t/alter_rename_files.test deleted file mode 100644 index 27408320f7d..00000000000 --- a/mysql-test/suite/innodb/t/alter_rename_files.test +++ /dev/null @@ -1,31 +0,0 @@ ---source include/have_debug.inc ---source include/have_debug_sync.inc ---source include/have_innodb.inc ---source include/count_sessions.inc - -CREATE TABLE t1 (x INT NOT NULL UNIQUE KEY) ENGINE=InnoDB; -INSERT INTO t1 VALUES(5); - -SET GLOBAL innodb_log_checkpoint_now=TRUE; - -# Start an ALTER TABLE and stop it before renaming the files -SET DEBUG_SYNC='commit_cache_rebuild SIGNAL ready WAIT_FOR finish'; - ---send ALTER TABLE t1 FORCE; - -connect (con1,localhost,root,,); - -SET DEBUG_SYNC='now WAIT_FOR ready'; - -SET GLOBAL innodb_log_checkpoint_now=TRUE; - -SET DEBUG_SYNC='now SIGNAL finish'; - -disconnect con1; -connection default; -reap; -SHOW CREATE TABLE t1; -DROP TABLE t1; -SET DEBUG_SYNC='RESET'; - ---source include/wait_until_count_sessions.inc From 920789e9d413504a137aab486961eac06d02b203 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 9 Jul 2023 16:45:47 +0200 Subject: [PATCH 2/8] MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication Remove the exception that InnoDB does not report auto-increment locks waits to the parallel replication. There was an assumption that these waits could not cause conflicts with in-order parallel replication and thus need not be reported. However, this assumption is wrong and it is possible to get conflicts that lead to hangs for the duration of --innodb-lock-wait-timeout. This can be seen with three transactions: 1. T1 is waiting for T3 on an autoinc lock 2. T2 is waiting for T1 to commit 3. T3 is waiting on a normal row lock held by T2 Here, T3 needs to be deadlock killed on the wait by T1. Note: This should be null-merged to 10.6, as a different fix is needed there due to InnoDB lock code changes. Signed-off-by: Kristian Nielsen --- .../suite/rpl/r/rpl_parallel_autoinc.result | 95 ++++++++++++ .../suite/rpl/t/rpl_parallel_autoinc.test | 140 ++++++++++++++++++ sql/sql_class.cc | 6 - storage/innobase/lock/lock0lock.cc | 8 +- 4 files changed, 236 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_autoinc.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_autoinc.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result new file mode 100644 index 00000000000..c1829bafa1a --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result @@ -0,0 +1,95 @@ +include/master-slave.inc +[connection master] +MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication +include/rpl_connect.inc [creating slave2] +include/rpl_connect.inc [creating slave3] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.innodb_lock_wait_timeout= 20; +connection master; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +DELETE FROM t1 WHERE c >= 4 and c < 6; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +include/save_master_gtid.inc +connection slave1; +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; +a b +row for T1 0 +connection slave2; +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; +a b +row for T3 0 +connection slave3; +BEGIN; +DELETE FROM t2 WHERE a=30; +connection slave; +include/start_slave.inc +connection slave2; +ROLLBACK; +connection slave1; +ROLLBACK; +connection slave3; +ROLLBACK; +connection slave; +include/sync_with_master_gtid.inc +SELECT * FROM t1 ORDER BY a; +a b c +1 0 1 +2 0 1 +3 0 2 +4 0 3 +6 0 7 +7 0 8 +8 1 1 +9 3 2 +10 3 3 +11 3 4 +SELECT * FROM t2 ORDER BY a; +a b +10 1 +20 2 +30 3 +40 4 +50 5 +SELECT * FROM t3 ORDER BY a; +a b +row for T1 1 +row for T2 0 +row for T3 1 +connection master; +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= conservative; +SET @@global.innodb_lock_wait_timeout= 50; +include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +@@GLOBAL.innodb_autoinc_lock_mode +1 +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test new file mode 100644 index 00000000000..0e96b4dfb80 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test @@ -0,0 +1,140 @@ +--source include/have_binlog_format_statement.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--echo MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication + +# The scenario is transactions T1, T2, T3: +# +# T1 is waiting for T3 on an autoinc lock +# T2 is waiting for T1 to commit +# T3 is waiting on a normal row lock held by T2 +# +# This caused a hang until innodb_lock_wait_timeout, because autoinc +# locks were not reported to the in-order parallel replication, so T3 +# was not deadlock killed. + +--let $lock_wait_timeout=20 + +--let $rpl_connection_name= slave2 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--let $rpl_connection_name= slave3 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +# A table as destination for INSERT-SELECT +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); + +# A table as source for INSERT-SELECT. +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); + +# A table to help order slave worker threads to setup the desired scenario. +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +eval set @@global.innodb_lock_wait_timeout= $lock_wait_timeout; + +--connection master +# Transaction T1. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +COMMIT; + +# Transaction T2. +DELETE FROM t1 WHERE c >= 4 and c < 6; + +# Transaction T3. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +COMMIT; + +--source include/save_master_gtid.inc + +--connection slave1 +# Temporarily block T1 to create the scheduling that triggers the bug. +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; + +--connection slave2 +# Temporarily block T3 from starting (so T2 can reach commit). +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; + +--connection slave3 +# This critical step blocks T3 after it has inserted its first row, +# and thus taken the auto-increment lock, but before it has reached +# the point where it gets a row lock wait on T2. Even though +# auto-increment lock waits were not reported due to the bug, +# transitive lock waits (T1 waits on autoinc of T3 which waits on row +# on T2) _were_ reported as T1 waiting on T2, and thus a deadlock kill +# happened and the bug was not triggered. +BEGIN; +DELETE FROM t2 WHERE a=30; + +--connection slave +--source include/start_slave.inc + +# First let T2 complete until it is waiting for T1 to commit. +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Then let T3 reach the point where it has obtained the autoinc lock, +# but it is not yet waiting for a row lock held by T2. +--connection slave2 +ROLLBACK; +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Sending data' and info LIKE 'INSERT INTO t1(b, c) SELECT 3, t2.b%' and time_ms > 500 and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Now let T1 continue, while T3 is holding the autoinc lock but before +# it is waiting for T2. Wait a short while to give the hang a chance to +# happen; T1 needs to get to request the autoinc lock before we let T3 +# continue. (There's a small chance the sleep will be too small, which will +# let the test occasionally pass on non-fixed server). +--connection slave1 +ROLLBACK; +--sleep 0.5 + +# Now let T3 continue; the bug was that this lead to an undetected +# deadlock that remained until innodb lock wait timeout. +--connection slave3 +ROLLBACK; + +--connection slave +--let $slave_timeout= `SELECT $lock_wait_timeout/2` +--source include/sync_with_master_gtid.inc +--let $slave_timeout= +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; + +# Cleanup. +--connection master +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; + +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout; +--source include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +--source include/rpl_end.inc diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d2a3cbcf613..ae388515961 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5144,12 +5144,6 @@ thd_need_wait_reports(const MYSQL_THD thd) deadlock with the pre-determined commit order, we kill the later transaction, and later re-try it, to resolve the deadlock. - This call need only receive reports about waits for locks that will remain - until the holding transaction commits. InnoDB auto-increment locks, - for example, are released earlier, and so need not be reported. (Such false - positives are not harmful, but could lead to unnecessary kill and retry, so - best avoided). - Returns 1 if the OTHER_THD will be killed to resolve deadlock, 0 if not. The actual kill will happen later, asynchronously from another thread. The caller does not need to take any actions on the return value if the diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 8e75955c8af..b31e8ba5b56 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6941,13 +6941,7 @@ DeadlockChecker::search() return m_start; } - /* We do not need to report autoinc locks to the upper - layer. These locks are released before commit, so they - can not cause deadlocks with binlog-fixed commit - order. */ - if (m_report_waiters - && (lock_get_type_low(lock) != LOCK_TABLE - || lock_get_mode(lock) != LOCK_AUTO_INC)) { + if (m_report_waiters) { thd_rpl_deadlock_check(m_start->mysql_thd, lock->trx->mysql_thd); } From 900c4d692073ae51413d8f739977216a56663cbf Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Tue, 11 Jul 2023 00:31:29 +0200 Subject: [PATCH 3/8] MDEV-31655: Parallel replication deadlock victim preference code errorneously removed Restore code to make InnoDB choose the second transaction as a deadlock victim if two transactions deadlock that need to commit in-order for parallel replication. This code was erroneously removed when VATS was implemented in InnoDB. Also add a test case for InnoDB choosing the right deadlock victim. Also fixes this bug, with testcase that reliably reproduces: MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master Note: This should be null-merged to 10.6, as a different fix is needed there due to InnoDB locking code changes. Signed-off-by: Kristian Nielsen --- .../binlog_encryption/rpl_parallel.result | 42 +++++++++- mysql-test/suite/rpl/r/rpl_parallel.result | 42 +++++++++- .../r/rpl_parallel_deadlock_victim2.result | 50 +++++++++++ mysql-test/suite/rpl/t/rpl_parallel.test | 71 +++++++++++++++- .../rpl/t/rpl_parallel_deadlock_victim2.test | 83 +++++++++++++++++++ sql/rpl_parallel.cc | 12 ++- sql/slave.cc | 1 + sql/sql_class.cc | 43 ++++++++++ sql/sql_insert.cc | 7 ++ storage/innobase/lock/lock0lock.cc | 17 ++++ storage/innobase/lock/lock0wait.cc | 15 +++- storage/innobase/trx/trx0trx.cc | 15 ++++ 12 files changed, 393 insertions(+), 5 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel.result b/mysql-test/suite/binlog_encryption/rpl_parallel.result index b75a66a634a..b24ff7ba53d 100644 --- a/mysql-test/suite/binlog_encryption/rpl_parallel.result +++ b/mysql-test/suite/binlog_encryption/rpl_parallel.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_threads=10; ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first include/stop_slave.inc @@ -1680,13 +1681,52 @@ a 2000 SELECT * FROM t2 WHERE a>=2000 ORDER BY a; a +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 connection server_2; include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; connection server_1; DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 9b2e68d366e..ef89d954faa 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_threads=10; ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first include/stop_slave.inc @@ -1679,13 +1680,52 @@ a 2000 SELECT * FROM t2 WHERE a>=2000 ORDER BY a; a +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 connection server_2; include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; connection server_1; DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result new file mode 100644 index 00000000000..f154f37a057 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result @@ -0,0 +1,50 @@ +include/master-slave.inc +[connection master] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; +*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +connection master; +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +include/save_master_gtid.inc +connection slave; +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= conservative; +SET @@global.slave_transaction_retries= 10; +include/start_slave.inc +connection master; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 9ba7a30f2eb..d43cec4df34 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -13,6 +13,7 @@ --connection server_2 SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; --error ER_SLAVE_MUST_STOP SET GLOBAL slave_parallel_threads=10; --source include/stop_slave.inc @@ -2203,16 +2204,84 @@ SELECT * FROM t1 WHERE a>=2000 ORDER BY a; SELECT * FROM t2 WHERE a>=2000 ORDER BY a; +--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +# The problem was that InnoDB would choose the wrong deadlock victim. +# Create a lot of transactions that can cause deadlocks, and use error +# injection to check that the first transactions in each group is never +# selected as deadlock victim. +--let $rows= 10 +--let $transactions= 5 +--let $gcos= 10 + +--connection server_1 +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +--disable_query_log +--let $i= 0 +while ($i < 10) { + eval INSERT INTO t7 VALUES ($i, 0); + inc $i; +} +--enable_query_log +COMMIT; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +eval set @@global.slave_parallel_threads= $transactions; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +# This error injection will allow no retries for GTIDs divisible by 1000. +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; + +--connection master +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +--let $j= 1 +while ($j <= $gcos) { + eval SET @commit_id= $j+1000; + --let $i= 0 + while ($i < $transactions) { + --disable_query_log + eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i; + BEGIN; + --let $k= 0 + while ($k < $rows) { + eval UPDATE t7 SET b=b+1 WHERE a=(($i+$k) MOD $rows); + inc $k; + } + COMMIT; + --enable_query_log + inc $i; + } + inc $j; +} + +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; + +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; + + # Clean up. --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; --source include/start_slave.inc SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test new file mode 100644 index 00000000000..522cec18bbc --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test @@ -0,0 +1,83 @@ +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; + +--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +# This was a failure where a transaction T1 could deadlock multiple times +# with T2, eventually exceeding the default --slave-transaction-retries=10. +# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock +# victim over T2. If thread scheduling is right, it was possible for T1 to +# repeatedly deadlock, roll back, and have time to grab an S lock again before +# T2 woke up and got its waiting X lock, thus repeating the same deadlock over +# and over. +# Once the bug is fixed, it is not possible to re-create the same execution +# and thread scheduling. Instead we inject small sleeps in a way that +# triggered the problem when the bug was there, to demonstrate that the +# problem no longer occurs. + +--connection master +# T1 +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +# T2 +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; + +SELECT * FROM t1 ORDER BY a; +--source include/save_master_gtid.inc + +--connection slave +SET @save_dbug= @@GLOBAL.debug_dbug; + +# Inject various delays to hint thread scheduling to happen in the way that +# triggered MDEV-28776. + +# Small delay starting T1 so it will be the youngest trx and be chosen over +# T2 as the deadlock victim by default in InnoDB. +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; + +# Small delay before taking insert X lock to give time for both T1 and T2 to +# get the S lock first and cause a deadlock. +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; + +# Small delay after T2's wait on the X lock, to give time for T1 retry to +# re-aquire the T1 S lock first. +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; + +# Delay deadlock kill of T2. +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; + +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; + +# Cleanup. +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.slave_transaction_retries= $save_transaction_retries; +--source include/start_slave.inc + +--connection master +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index ad9c2ac802c..ba5cf54e673 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1284,6 +1284,11 @@ handle_rpl_parallel_thread(void *arg) bool did_enter_cond= false; PSI_stage_info old_stage; + DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", { + if (rgi->current_gtid.domain_id==0 && + rgi->current_gtid.seq_no == 100) + my_sleep(10000); + }); #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("hold_worker_on_schedule", { if (rgi->current_gtid.domain_id == 0 && @@ -1463,8 +1468,13 @@ handle_rpl_parallel_thread(void *arg) err= dbug_simulate_tmp_error(rgi, thd);); if (unlikely(err)) { + ulong max_retries= slave_trans_retries; convert_kill_to_deadlock_error(rgi); - if (has_temporary_error(thd) && slave_trans_retries > 0) + DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries", + if ((rgi->current_gtid.seq_no % 1000) == 0) + max_retries= 0; + ); + if (has_temporary_error(thd) && max_retries > 0) err= retry_event_group(rgi, rpt, qev); } } diff --git a/sql/slave.cc b/sql/slave.cc index 3fe89a95e5e..cdb16f873c3 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -502,6 +502,7 @@ static void bg_rpl_load_gtid_slave_state(void *) static void bg_slave_kill(void *victim) { THD *to_kill= (THD *)victim; + DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000);); to_kill->awake(KILL_CONNECTION); mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index ae388515961..e7e27401d61 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5272,6 +5272,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) return 0; } + +/* + If the storage engine detects a deadlock, and needs to choose a victim + transaction to roll back, it can call this function to ask the upper + server layer for which of two possible transactions is prefered to be + aborted and rolled back. + + In parallel replication, if two transactions are running in parallel and + one is fixed to commit before the other, then the one that commits later + will be prefered as the victim - chosing the early transaction as a victim + will not resolve the deadlock anyway, as the later transaction still needs + to wait for the earlier to commit. + + The return value is -1 if the first transaction is prefered as a deadlock + victim, 1 if the second transaction is prefered, or 0 for no preference (in + which case the storage engine can make the choice as it prefers). +*/ +extern "C" int +thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2) +{ + rpl_group_info *rgi1, *rgi2; + + if (!thd1 || !thd2) + return 0; + + /* + If the transactions are participating in the same replication domain in + parallel replication, then request to select the one that will commit + later (in the fixed commit order from the master) as the deadlock victim. + */ + rgi1= thd1->rgi_slave; + rgi2= thd2->rgi_slave; + if (rgi1 && rgi2 && + rgi1->is_parallel_exec && + rgi1->rli == rgi2->rli && + rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id) + return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1; + + /* No preferences, let the storage engine decide. */ + return 0; +} + + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction.all.modified_non_trans_table); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index a2c571373cb..a64314814dd 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -78,6 +78,7 @@ #include "sql_audit.h" #include "sql_derived.h" // mysql_handle_derived #include "sql_prepare.h" +#include "rpl_rli.h" #include #include "debug_sync.h" @@ -1753,6 +1754,12 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) save_read_set= table->read_set; save_write_set= table->write_set; + DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200", + { + if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 || + thd->rgi_slave->current_gtid.seq_no == 200)) + my_sleep(20000); + }); if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index b31e8ba5b56..cc20ab95930 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -71,6 +71,9 @@ static void lock_grant_after_reset(lock_t* lock); extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); +#ifdef HAVE_REPLICATION +extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); +#endif /** Pretty-print a table lock. @param[in,out] file output stream @@ -1546,6 +1549,20 @@ static bool has_higher_priority(lock_t *lock1, lock_t *lock2) } else if (!lock_get_wait(lock2)) { return false; } + +#ifdef HAVE_REPLICATION + // Ask the upper server layer if any of the two trx should be prefered. + int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, + lock2->trx->mysql_thd); + if (preference == -1) { + // lock1 is preferred as a victim, so lock2 has higher priority + return false; + } else if (preference == 1) { + // lock2 is preferred as a victim, so lock1 has higher priority + return true; + } +#endif + return lock1->trx->start_time_micro <= lock2->trx->start_time_micro; } diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index 5eb03f668b3..a26b3a6efaa 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -278,7 +278,9 @@ lock_wait_suspend_thread( } ulint lock_type = ULINT_UNDEFINED; - +#ifndef DBUG_OFF + ulint lock_mode = LOCK_NONE; +#endif /* The wait_lock can be cleared by another thread when the lock is released. But the wait can only be initiated by the current thread which owns the transaction. Only acquire the @@ -288,6 +290,9 @@ lock_wait_suspend_thread( wait_lock = trx->lock.wait_lock; if (wait_lock) { lock_type = lock_get_type_low(wait_lock); +#ifndef DBUG_OFF + lock_mode = lock_get_mode(wait_lock); +#endif } lock_mutex_exit(); } @@ -336,6 +341,14 @@ lock_wait_suspend_thread( } os_event_wait(slot->event); + DBUG_EXECUTE_IF("small_sleep_after_lock_wait", + { + if (lock_type == LOCK_REC && lock_mode == LOCK_X && + trx->error_state != DB_DEADLOCK && + !trx_is_interrupted(trx)) { + my_sleep(20000); + } + }); thd_wait_end(trx->mysql_thd); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7cd95878b0c..b742d1c3686 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -52,6 +52,11 @@ Created 3/26/1996 Heikki Tuuri #include #include +#ifdef HAVE_REPLICATION +extern "C" +int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); +#endif + /** The bit pattern corresponding to TRX_ID_MAX */ const byte trx_id_max_bytes[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -1907,6 +1912,16 @@ trx_weight_ge( ibool a_notrans_edit; ibool b_notrans_edit; +#ifdef HAVE_REPLICATION + /* First ask the upper server layer if it has any preference for which + to prefer as a deadlock victim. */ + int pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd); + if (pref < 0) + return FALSE; + else if (pref > 0) + return TRUE; +#endif + /* If mysql_thd is NULL for a transaction we assume that it has not edited non-transactional tables. */ From 18acbaf416ea7a42edc0b2fc51084eacda4d074c Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 5 Aug 2023 22:53:44 +0200 Subject: [PATCH 4/8] MDEV-31655: Parallel replication deadlock victim preference code errorneously removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore code to make InnoDB choose the second transaction as a deadlock victim if two transactions deadlock that need to commit in-order for parallel replication. This code was erroneously removed when VATS was implemented in InnoDB. Also add a test case for InnoDB choosing the right deadlock victim. Also fixes this bug, with testcase that reliably reproduces: MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master Reviewed-by: Marko Mäkelä Signed-off-by: Kristian Nielsen --- .../rpl/r/rpl_parallel_deadlock_victim.result | 51 ++++++++++ .../r/rpl_parallel_deadlock_victim2.result | 50 ++++++++++ .../rpl/t/rpl_parallel_deadlock_victim.test | 86 +++++++++++++++++ .../rpl/t/rpl_parallel_deadlock_victim2.test | 83 +++++++++++++++++ sql/rpl_parallel.cc | 12 ++- sql/slave.cc | 1 + sql/sql_class.cc | 43 +++++++++ sql/sql_insert.cc | 7 ++ storage/innobase/lock/lock0lock.cc | 93 ++++++++++++------- 9 files changed, 390 insertions(+), 36 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result new file mode 100644 index 00000000000..f438f335283 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result @@ -0,0 +1,51 @@ +include/master-slave.inc +[connection master] +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection server_1; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; +COUNT(*) SUM(a*100*b) +10 225000 +connection server_2; +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; +include/start_slave.inc +connection server_1; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result new file mode 100644 index 00000000000..c26944b3321 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result @@ -0,0 +1,50 @@ +include/master-slave.inc +[connection master] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; +*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +connection master; +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +include/save_master_gtid.inc +connection slave; +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= optimistic; +SET @@global.slave_transaction_retries= 10; +include/start_slave.inc +connection master; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test new file mode 100644 index 00000000000..ab634d2953e --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test @@ -0,0 +1,86 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/master-slave.inc + + +--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +# The problem was that InnoDB would choose the wrong deadlock victim. +# Create a lot of transactions that can cause deadlocks, and use error +# injection to check that the first transactions in each group is never +# selected as deadlock victim. +--let $rows= 10 +--let $transactions= 5 +--let $gcos= 10 + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +--disable_query_log +--let $i= 0 +while ($i < 10) { + eval INSERT INTO t1 VALUES ($i, 0); + inc $i; +} +--enable_query_log +COMMIT; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +eval set @@global.slave_parallel_threads= $transactions; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +# This error injection will allow no retries for GTIDs divisible by 1000. +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; + +--connection server_1 +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +--let $j= 1 +while ($j <= $gcos) { + eval SET @commit_id= $j+1000; + --let $i= 0 + while ($i < $transactions) { + --disable_query_log + eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i; + BEGIN; + --let $k= 0 + while ($k < $rows) { + eval UPDATE t1 SET b=b+1 WHERE a=(($i+$k) MOD $rows); + inc $k; + } + COMMIT; + --enable_query_log + inc $i; + } + inc $j; +} + +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; + +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; + + +# Clean up. +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test new file mode 100644 index 00000000000..522cec18bbc --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test @@ -0,0 +1,83 @@ +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; + +--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +# This was a failure where a transaction T1 could deadlock multiple times +# with T2, eventually exceeding the default --slave-transaction-retries=10. +# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock +# victim over T2. If thread scheduling is right, it was possible for T1 to +# repeatedly deadlock, roll back, and have time to grab an S lock again before +# T2 woke up and got its waiting X lock, thus repeating the same deadlock over +# and over. +# Once the bug is fixed, it is not possible to re-create the same execution +# and thread scheduling. Instead we inject small sleeps in a way that +# triggered the problem when the bug was there, to demonstrate that the +# problem no longer occurs. + +--connection master +# T1 +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +# T2 +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; + +SELECT * FROM t1 ORDER BY a; +--source include/save_master_gtid.inc + +--connection slave +SET @save_dbug= @@GLOBAL.debug_dbug; + +# Inject various delays to hint thread scheduling to happen in the way that +# triggered MDEV-28776. + +# Small delay starting T1 so it will be the youngest trx and be chosen over +# T2 as the deadlock victim by default in InnoDB. +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; + +# Small delay before taking insert X lock to give time for both T1 and T2 to +# get the S lock first and cause a deadlock. +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; + +# Small delay after T2's wait on the X lock, to give time for T1 retry to +# re-aquire the T1 S lock first. +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; + +# Delay deadlock kill of T2. +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; + +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; + +# Cleanup. +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.slave_transaction_retries= $save_transaction_retries; +--source include/start_slave.inc + +--connection master +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8034fe7718a..c044defd000 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1301,6 +1301,11 @@ handle_rpl_parallel_thread(void *arg) bool did_enter_cond= false; PSI_stage_info old_stage; + DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", { + if (rgi->current_gtid.domain_id==0 && + rgi->current_gtid.seq_no == 100) + my_sleep(10000); + }); #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("hold_worker_on_schedule", { if (rgi->current_gtid.domain_id == 0 && @@ -1480,8 +1485,13 @@ handle_rpl_parallel_thread(void *arg) err= dbug_simulate_tmp_error(rgi, thd);); if (unlikely(err)) { + ulong max_retries= slave_trans_retries; convert_kill_to_deadlock_error(rgi); - if (has_temporary_error(thd) && slave_trans_retries > 0) + DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries", + if ((rgi->current_gtid.seq_no % 1000) == 0) + max_retries= 0; + ); + if (has_temporary_error(thd) && max_retries > 0) err= retry_event_group(rgi, rpt, qev); } } diff --git a/sql/slave.cc b/sql/slave.cc index 4b24d281d5d..5ad1a446ae6 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -506,6 +506,7 @@ static void bg_rpl_load_gtid_slave_state(void *) static void bg_slave_kill(void *victim) { THD *to_kill= (THD *)victim; + DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000);); to_kill->awake(KILL_CONNECTION); mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8e2f34ac53d..0c929837957 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5516,6 +5516,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) return 0; } + +/* + If the storage engine detects a deadlock, and needs to choose a victim + transaction to roll back, it can call this function to ask the upper + server layer for which of two possible transactions is prefered to be + aborted and rolled back. + + In parallel replication, if two transactions are running in parallel and + one is fixed to commit before the other, then the one that commits later + will be prefered as the victim - chosing the early transaction as a victim + will not resolve the deadlock anyway, as the later transaction still needs + to wait for the earlier to commit. + + The return value is -1 if the first transaction is prefered as a deadlock + victim, 1 if the second transaction is prefered, or 0 for no preference (in + which case the storage engine can make the choice as it prefers). +*/ +extern "C" int +thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2) +{ + rpl_group_info *rgi1, *rgi2; + + if (!thd1 || !thd2) + return 0; + + /* + If the transactions are participating in the same replication domain in + parallel replication, then request to select the one that will commit + later (in the fixed commit order from the master) as the deadlock victim. + */ + rgi1= thd1->rgi_slave; + rgi2= thd2->rgi_slave; + if (rgi1 && rgi2 && + rgi1->is_parallel_exec && + rgi1->rli == rgi2->rli && + rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id) + return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1; + + /* No preferences, let the storage engine decide. */ + return 0; +} + + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction->all.modified_non_trans_table); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 3ca8fc3303b..132be65a848 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -80,6 +80,7 @@ #include "debug_sync.h" // DEBUG_SYNC #include "debug.h" // debug_crash_here #include +#include "rpl_rli.h" #ifdef WITH_WSREP #include "wsrep_trans_observer.h" /* wsrep_start_transction() */ @@ -1828,6 +1829,12 @@ int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *sink) save_read_set= table->read_set; save_write_set= table->write_set; + DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200", + { + if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 || + thd->rgi_slave->current_gtid.seq_no == 200)) + my_sleep(20000); + }); if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 0bb14f5fcf7..a794e2bbf03 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -63,6 +63,7 @@ ulong innodb_deadlock_report; extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); +extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); #endif /** Functor for accessing the embedded node within a table lock. */ @@ -1958,6 +1959,14 @@ check_trx_error: end_wait: mysql_mutex_unlock(&lock_sys.wait_mutex); + DBUG_EXECUTE_IF("small_sleep_after_lock_wait", + { + if (!(type_mode & LOCK_TABLE) && + (type_mode & LOCK_MODE_MASK) == LOCK_X && + trx->error_state != DB_DEADLOCK && !trx_is_interrupted(trx)) { + my_sleep(20000); + } + }); thd_wait_end(trx->mysql_thd); return trx->error_state; @@ -6309,6 +6318,28 @@ namespace Deadlock } } + ATTRIBUTE_COLD + /** Calculate a number used to compare deadlock victim candidates. +Bit 62 is used to prefer transaction that did not modified non-transactional +tables. Bits 1-61 are set to TRX_WEIGHT to prefer transactions with less locks +and less modified rows. Bit 0 is used to prefer orig_trx in case of a tie. + @param trx Transaction + @return a 64-bit unsigned, the lower the more preferred TRX is as a deadlock + victim */ + static undo_no_t calc_victim_weight(trx_t *trx, const trx_t *orig_trx) + { + const undo_no_t trx_weight= (trx != orig_trx) | (TRX_WEIGHT(trx) << 1) | + (trx->mysql_thd && +#ifdef WITH_WSREP + (thd_has_edited_nontrans_tables(trx->mysql_thd) || + (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))) +#else + thd_has_edited_nontrans_tables(trx->mysql_thd) +#endif /* WITH_WSREP */ + ? 1ULL << 62 : 0); + return trx_weight; + } + ATTRIBUTE_COLD /** Report a deadlock (cycle in the waits-for graph). @param trx transaction waiting for a lock in this thread @@ -6332,24 +6363,7 @@ namespace Deadlock static const char rollback_msg[]= "*** WE ROLL BACK TRANSACTION (%u)\n"; char buf[9 + sizeof rollback_msg]; - - /* If current_trx=true, trx is owned by this thread, and we can - safely invoke these without holding trx->mutex or lock_sys.latch. - If current_trx=false, a concurrent commit is protected by both - lock_sys.latch and lock_sys.wait_mutex. */ - const undo_no_t trx_weight= TRX_WEIGHT(trx) | - (trx->mysql_thd && -#ifdef WITH_WSREP - (thd_has_edited_nontrans_tables(trx->mysql_thd) || - (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))) -#else - thd_has_edited_nontrans_tables(trx->mysql_thd) -#endif /* WITH_WSREP */ - ? 1ULL << 63 : 0); - trx_t *victim= nullptr; - undo_no_t victim_weight= ~0ULL; - unsigned victim_pos= 0, trx_pos= 0; /* Here, lock elision does not make sense, because for the output we are going to invoke system calls, @@ -6362,43 +6376,52 @@ namespace Deadlock } { - unsigned l= 0; + unsigned l= 1; /* Now that we are holding lock_sys.wait_mutex again, check whether a cycle still exists. */ trx_t *cycle= find_cycle(trx); if (!cycle) goto func_exit; /* One of the transactions was already aborted. */ + + victim= cycle; + undo_no_t victim_weight= calc_victim_weight(victim, trx); + unsigned victim_pos= l; for (trx_t *next= cycle;;) { next= next->lock.wait_trx; l++; - const undo_no_t next_weight= TRX_WEIGHT(next) | - (next->mysql_thd && -#ifdef WITH_WSREP - (thd_has_edited_nontrans_tables(next->mysql_thd) || - (next->is_wsrep() && wsrep_thd_is_BF(next->mysql_thd, false))) + const undo_no_t next_weight= calc_victim_weight(next, trx); +#ifdef HAVE_REPLICATION + const int pref= + thd_deadlock_victim_preference(victim->mysql_thd, next->mysql_thd); + /* Set bit 63 for any non-preferred victim to make such preference take + priority in the weight comparison. + -1 means victim is preferred. 1 means next is preferred. */ + undo_no_t victim_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)(-pref); + undo_no_t next_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)pref; #else - thd_has_edited_nontrans_tables(next->mysql_thd) -#endif /* WITH_WSREP */ - ? 1ULL << 63 : 0); - if (next_weight < victim_weight) + undo_no_t victim_not_pref= 0; + undo_no_t next_not_pref= 0; +#endif + /* Single comparison to decide which of two transactions is preferred + as a deadlock victim. + - If thd_deadlock_victim_preference() returned non-zero, bit 63 + comparison will decide the preferred one. + - Else if exactly one of them modified non-transactional tables, + bit 62 will decide. + - Else the TRX_WEIGHT in bits 1-61 will decide, if not equal. + - Else, if one of them is the original trx, bit 0 will decide. + - If all is equal, previous victim will arbitrarily be chosen. */ + if ((next_weight|next_not_pref) < (victim_weight|victim_not_pref)) { victim_weight= next_weight; victim= next; victim_pos= l; } - if (next == victim) - trx_pos= l; if (next == cycle) break; } - if (trx_pos && trx_weight == victim_weight) - { - victim= trx; - victim_pos= trx_pos; - } - /* Finally, display the deadlock */ switch (const auto r= static_cast(innodb_deadlock_report)) { case REPORT_OFF: From 805e0668c95f6a6ab3c2da3d966944ac0a58d903 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 9 Jul 2023 16:45:47 +0200 Subject: [PATCH 5/8] MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication Remove the exception that InnoDB does not report auto-increment locks waits to the parallel replication. There was an assumption that these waits could not cause conflicts with in-order parallel replication and thus need not be reported. However, this assumption is wrong and it is possible to get conflicts that lead to hangs for the duration of --innodb-lock-wait-timeout. This can be seen with three transactions: 1. T1 is waiting for T3 on an autoinc lock 2. T2 is waiting for T1 to commit 3. T3 is waiting on a normal row lock held by T2 Here, T3 needs to be deadlock killed on the wait by T1. Signed-off-by: Kristian Nielsen --- .../suite/rpl/r/rpl_parallel_autoinc.result | 95 ++++++++++++ .../suite/rpl/t/rpl_parallel_autoinc.test | 140 ++++++++++++++++++ sql/sql_class.cc | 6 - storage/innobase/lock/lock0lock.cc | 8 +- 4 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_autoinc.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_autoinc.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result new file mode 100644 index 00000000000..e6f18e2d558 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result @@ -0,0 +1,95 @@ +include/master-slave.inc +[connection master] +MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication +include/rpl_connect.inc [creating slave2] +include/rpl_connect.inc [creating slave3] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.innodb_lock_wait_timeout= 20; +connection master; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +DELETE FROM t1 WHERE c >= 4 and c < 6; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +include/save_master_gtid.inc +connection slave1; +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; +a b +row for T1 0 +connection slave2; +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; +a b +row for T3 0 +connection slave3; +BEGIN; +DELETE FROM t2 WHERE a=30; +connection slave; +include/start_slave.inc +connection slave2; +ROLLBACK; +connection slave1; +ROLLBACK; +connection slave3; +ROLLBACK; +connection slave; +include/sync_with_master_gtid.inc +SELECT * FROM t1 ORDER BY a; +a b c +1 0 1 +2 0 1 +3 0 2 +4 0 3 +6 0 7 +7 0 8 +8 1 1 +9 3 2 +10 3 3 +11 3 4 +SELECT * FROM t2 ORDER BY a; +a b +10 1 +20 2 +30 3 +40 4 +50 5 +SELECT * FROM t3 ORDER BY a; +a b +row for T1 1 +row for T2 0 +row for T3 1 +connection master; +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= optimistic; +SET @@global.innodb_lock_wait_timeout= 50; +include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +@@GLOBAL.innodb_autoinc_lock_mode +1 +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test new file mode 100644 index 00000000000..0e96b4dfb80 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test @@ -0,0 +1,140 @@ +--source include/have_binlog_format_statement.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--echo MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication + +# The scenario is transactions T1, T2, T3: +# +# T1 is waiting for T3 on an autoinc lock +# T2 is waiting for T1 to commit +# T3 is waiting on a normal row lock held by T2 +# +# This caused a hang until innodb_lock_wait_timeout, because autoinc +# locks were not reported to the in-order parallel replication, so T3 +# was not deadlock killed. + +--let $lock_wait_timeout=20 + +--let $rpl_connection_name= slave2 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--let $rpl_connection_name= slave3 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +# A table as destination for INSERT-SELECT +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); + +# A table as source for INSERT-SELECT. +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); + +# A table to help order slave worker threads to setup the desired scenario. +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +eval set @@global.innodb_lock_wait_timeout= $lock_wait_timeout; + +--connection master +# Transaction T1. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +COMMIT; + +# Transaction T2. +DELETE FROM t1 WHERE c >= 4 and c < 6; + +# Transaction T3. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +COMMIT; + +--source include/save_master_gtid.inc + +--connection slave1 +# Temporarily block T1 to create the scheduling that triggers the bug. +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; + +--connection slave2 +# Temporarily block T3 from starting (so T2 can reach commit). +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; + +--connection slave3 +# This critical step blocks T3 after it has inserted its first row, +# and thus taken the auto-increment lock, but before it has reached +# the point where it gets a row lock wait on T2. Even though +# auto-increment lock waits were not reported due to the bug, +# transitive lock waits (T1 waits on autoinc of T3 which waits on row +# on T2) _were_ reported as T1 waiting on T2, and thus a deadlock kill +# happened and the bug was not triggered. +BEGIN; +DELETE FROM t2 WHERE a=30; + +--connection slave +--source include/start_slave.inc + +# First let T2 complete until it is waiting for T1 to commit. +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Then let T3 reach the point where it has obtained the autoinc lock, +# but it is not yet waiting for a row lock held by T2. +--connection slave2 +ROLLBACK; +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Sending data' and info LIKE 'INSERT INTO t1(b, c) SELECT 3, t2.b%' and time_ms > 500 and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Now let T1 continue, while T3 is holding the autoinc lock but before +# it is waiting for T2. Wait a short while to give the hang a chance to +# happen; T1 needs to get to request the autoinc lock before we let T3 +# continue. (There's a small chance the sleep will be too small, which will +# let the test occasionally pass on non-fixed server). +--connection slave1 +ROLLBACK; +--sleep 0.5 + +# Now let T3 continue; the bug was that this lead to an undetected +# deadlock that remained until innodb lock wait timeout. +--connection slave3 +ROLLBACK; + +--connection slave +--let $slave_timeout= `SELECT $lock_wait_timeout/2` +--source include/sync_with_master_gtid.inc +--let $slave_timeout= +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; + +# Cleanup. +--connection master +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; + +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout; +--source include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +--source include/rpl_end.inc diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0c929837957..fabfe304e6c 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5382,12 +5382,6 @@ thd_need_wait_reports(const MYSQL_THD thd) deadlock with the pre-determined commit order, we kill the later transaction, and later re-try it, to resolve the deadlock. - This call need only receive reports about waits for locks that will remain - until the holding transaction commits. InnoDB auto-increment locks, - for example, are released earlier, and so need not be reported. (Such false - positives are not harmful, but could lead to unnecessary kill and retry, so - best avoided). - Returns 1 if the OTHER_THD will be killed to resolve deadlock, 0 if not. The actual kill will happen later, asynchronously from another thread. The caller does not need to take any actions on the return value if the diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index a794e2bbf03..64df6320451 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1740,7 +1740,6 @@ static void lock_wait_rpl_report(trx_t *trx) const lock_t *wait_lock= trx->lock.wait_lock; if (!wait_lock) return; - ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); /* This would likely be too large to attempt to use a memory transaction, even for wait_lock->is_table(). */ const bool nowait= lock_sys.wr_lock_try(); @@ -1764,14 +1763,13 @@ func_exit: } else if (!wait_lock->is_waiting()) goto func_exit; - ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); if (wait_lock->is_table()) { dict_table_t *table= wait_lock->un_member.tab_lock.table; for (lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock; lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) - if (!(lock->type_mode & LOCK_AUTO_INC) && lock->trx != trx) + if (lock->trx != trx) thd_rpl_deadlock_check(thd, lock->trx->mysql_thd); } else @@ -1862,8 +1860,8 @@ dberr_t lock_wait(que_thr_t *thr) thd_need_wait_reports() will hold even if parallel (or any) replication is not being used. We want to be allow the user to skip lock_wait_rpl_report(). */ - const bool rpl= !(type_mode & LOCK_AUTO_INC) && trx->mysql_thd && - innodb_deadlock_detect && thd_need_wait_reports(trx->mysql_thd); + const bool rpl= trx->mysql_thd && innodb_deadlock_detect && + thd_need_wait_reports(trx->mysql_thd); #endif const bool row_lock_wait= thr->lock_state == QUE_THR_LOCK_ROW; timespec abstime; From ca5c122adcd39c34b1bd7059903668586496caf6 Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 11 Aug 2023 17:59:40 +0300 Subject: [PATCH 6/8] MDEV-9938 Prepared statement return wrong result (missing row) The problem is that the first execution of the prepared statement makes a permanent optimization of converting the LEFT JOIN to an INNER JOIN. This is based on the assumption that all the user parameters (?) are always constants and that parameters to Item_cond() will not change value from true and false between different executions. (The example was using IS NULL, which will change value if parameter depending on if the parameter is NULL or not). The fix is to change Item_cond::fix_fields() and Item_cond::eval_not_null_tables() to not threat user parameters as constants. This will ensure that we don't do the LEFT_JOIN -> INNER JOIN conversion that causes problems. There is also some things that needs to be improved regarding calculations of not_null_tables_cache as we get a different value for WHERE 1 or t1.a=1 compared to WHERE t1.a= or 1 Changes done: - Mark Item_param with the PARAM flag to be able to quickly check in Item_cond::eval_not_null_tables() if an item contains a prepared statement parameter (just like we check for stored procedure parameters). - Fixed that Item_cond::not_null_tables_cache is not depending on order of arguments. - Don't call item->eval_const_cond() for items that are NOT on the top level of the WHERE clause. This removed a lot of unnecessary warnings in the test suite! - Do not reset not_null_tables_cache for not top level items. - Simplified Item_cond::fix_fields by calling eval_not_null_tables() instead of having duplication of all the code in eval_not_null_tables(). - Return an error if Item_cond::fix_field() generates an error The old code did generate an error in some cases, but not in all cases. - Fixed all handling of the above error in make_cond_for_tables(). The error handling by the callers did not exists before which could lead to asserts in many different places in the old code). - All changes in sql_select.cc are just checking the return value of fix_fields() and make_cond_for_tables() and returning an error value if fix_fields() returns true or make_cond_for_tables() returns NULL and is_error() is set. - Mark Item_cond as const_item if all arguments returns true for can_eval_in_optimize(). Reviewer: Sergei Petrunia --- mysql-test/main/having_cond_pushdown.result | 4 -- mysql-test/main/prepare.result | 35 ++++++++++ mysql-test/main/prepare.test | 36 ++++++++++ mysql-test/main/ps.result | 7 ++ mysql-test/main/ps.test | 8 ++- mysql-test/main/range.result | 2 - mysql-test/main/range_mrr_icp.result | 2 - mysql-test/main/subselect_exists2in.result | 4 +- mysql-test/main/view.result | 4 +- mysql-test/suite/vcol/r/vcol_syntax.result | 10 ++- mysql-test/suite/vcol/t/vcol_syntax.test | 2 + sql/item.cc | 1 + sql/item.h | 11 +-- sql/item_cmpfunc.cc | 74 ++++++++------------- sql/sql_select.cc | 74 ++++++++++++++++----- 15 files changed, 190 insertions(+), 84 deletions(-) diff --git a/mysql-test/main/having_cond_pushdown.result b/mysql-test/main/having_cond_pushdown.result index fea8f83f9a1..fb73cf789b5 100644 --- a/mysql-test/main/having_cond_pushdown.result +++ b/mysql-test/main/having_cond_pushdown.result @@ -4932,12 +4932,8 @@ SELECT * FROM t1 GROUP BY i HAVING i IN ( i IS NULL); i SELECT * FROM t1 GROUP BY i HAVING i IN ( i IS NULL AND 'x' = 0); i -Warnings: -Warning 1292 Truncated incorrect DECIMAL value: 'x' SELECT * FROM t1 GROUP BY i HAVING i='1' IN ( i IS NULL AND 'x' = 0); i -Warnings: -Warning 1292 Truncated incorrect DECIMAL value: 'x' DROP TABLE t1; # # MDEV-28080: HAVING with NOT EXIST predicate in an equality diff --git a/mysql-test/main/prepare.result b/mysql-test/main/prepare.result index 7c730bff0c5..a9ac531280c 100644 --- a/mysql-test/main/prepare.result +++ b/mysql-test/main/prepare.result @@ -80,3 +80,38 @@ drop table t1, t2, t3; # # End of 10.4 tests # +# +# MDEV-9938 Prepared statement return wrong result (missing row) +# +CREATE TABLE t1 (a_id INT AUTO_INCREMENT PRIMARY KEY, a_text VARCHAR(20)); +CREATE TABLE t2 (b_id INT AUTO_INCREMENT PRIMARY KEY, b_a_id INT); +INSERT INTO t1 VALUES (NULL, 'word1'); +INSERT INTO t2 VALUES (NULL, 1), (NULL, NULL); +PREPARE q FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; +SET @var = 'word1'; +expect row count 1 +EXECUTE q USING @var, @var; +b_id b_a_id a_id a_text +1 1 1 word1 +expect row count = 2 +EXECUTE q USING @nul, @nul; +b_id b_a_id a_id a_text +1 1 1 word1 +2 NULL NULL NULL +PREPARE q2 FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; +expect row count 2 +SET @var = 'word1'; +EXECUTE q2 USING @nul, @nul; +b_id b_a_id a_id a_text +1 1 1 word1 +2 NULL NULL NULL +deallocate prepare q; +deallocate prepare q2; +drop table t1,t2; +# +# End of 10.6 tests +# diff --git a/mysql-test/main/prepare.test b/mysql-test/main/prepare.test index bf37f6dc8d1..b8ee5ad6b6d 100644 --- a/mysql-test/main/prepare.test +++ b/mysql-test/main/prepare.test @@ -69,3 +69,39 @@ drop table t1, t2, t3; --echo # --echo # End of 10.4 tests --echo # + +--echo # +--echo # MDEV-9938 Prepared statement return wrong result (missing row) +--echo # + +CREATE TABLE t1 (a_id INT AUTO_INCREMENT PRIMARY KEY, a_text VARCHAR(20)); +CREATE TABLE t2 (b_id INT AUTO_INCREMENT PRIMARY KEY, b_a_id INT); + +INSERT INTO t1 VALUES (NULL, 'word1'); +INSERT INTO t2 VALUES (NULL, 1), (NULL, NULL); + +PREPARE q FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; + +SET @var = 'word1'; +--echo expect row count 1 +EXECUTE q USING @var, @var; +--echo expect row count = 2 +EXECUTE q USING @nul, @nul; + +PREPARE q2 FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; + +--echo expect row count 2 +SET @var = 'word1'; +EXECUTE q2 USING @nul, @nul; + +deallocate prepare q; +deallocate prepare q2; +drop table t1,t2; + +--echo # +--echo # End of 10.6 tests +--echo # diff --git a/mysql-test/main/ps.result b/mysql-test/main/ps.result index fc982fba4e5..d96fcbe2dcc 100644 --- a/mysql-test/main/ps.result +++ b/mysql-test/main/ps.result @@ -4094,9 +4094,16 @@ DROP TABLE t1, t2; # CREATE TABLE t1 (a INT); PREPARE stmt FROM "SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1"; +execute stmt; +1 +SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +1 +insert into t1 values(1),(2); +execute stmt; ERROR 22003: BIGINT UNSIGNED value is out of range in '18446744073709551615 + 1' SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; ERROR 22003: BIGINT UNSIGNED value is out of range in '18446744073709551615 + 1' +deallocate prepare stmt; drop table t1; # End of 5.3 tests # diff --git a/mysql-test/main/ps.test b/mysql-test/main/ps.test index ebf646eadf3..0043d3aa141 100644 --- a/mysql-test/main/ps.test +++ b/mysql-test/main/ps.test @@ -3632,12 +3632,16 @@ DROP TABLE t1, t2; --echo # with out of range in GROUP BY --echo # CREATE TABLE t1 (a INT); - ---error ER_DATA_OUT_OF_RANGE PREPARE stmt FROM "SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1"; +execute stmt; +SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +insert into t1 values(1),(2); +--error ER_DATA_OUT_OF_RANGE +execute stmt; --error ER_DATA_OUT_OF_RANGE SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +deallocate prepare stmt; drop table t1; --echo # End of 5.3 tests diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result index 31777773240..8b4eafa3682 100644 --- a/mysql-test/main/range.result +++ b/mysql-test/main/range.result @@ -1625,8 +1625,6 @@ NULL Warnings: Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date SELECT str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20'; str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20' 1 diff --git a/mysql-test/main/range_mrr_icp.result b/mysql-test/main/range_mrr_icp.result index 6817edd30cd..4fb717b0b51 100644 --- a/mysql-test/main/range_mrr_icp.result +++ b/mysql-test/main/range_mrr_icp.result @@ -1628,8 +1628,6 @@ NULL Warnings: Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date SELECT str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20'; str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20' 1 diff --git a/mysql-test/main/subselect_exists2in.result b/mysql-test/main/subselect_exists2in.result index 6ff518b5a29..78f5457012f 100644 --- a/mysql-test/main/subselect_exists2in.result +++ b/mysql-test/main/subselect_exists2in.result @@ -333,7 +333,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t2.b' of SELECT #3 was resolved in SELECT #2 -Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ((1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` +Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ( (/* select#3 */ select `test`.`t3`.`c` from `test`.`t3` where `test`.`t3`.`c` is not null ), (1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` SELECT ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) FROM t1; ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) 1 @@ -347,7 +347,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t2.b' of SELECT #3 was resolved in SELECT #2 -Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ((1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` +Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ( (/* select#3 */ select `test`.`t3`.`c` from `test`.`t3` where `test`.`t3`.`c` is not null ), (1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` SELECT ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) FROM t1; ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) 1 diff --git a/mysql-test/main/view.result b/mysql-test/main/view.result index 329f367844e..75cfd55f47c 100644 --- a/mysql-test/main/view.result +++ b/mysql-test/main/view.result @@ -4630,7 +4630,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t4 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or 1) having trigcond(NULL is null)))) +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or (NULL is null)) having trigcond(NULL is null)))) SELECT * FROM t1, t2 WHERE t2.a NOT IN (SELECT t3.b FROM t3 RIGHT JOIN t4 ON (t4.a = t3.a) WHERE t4.a >= t1.a); @@ -4646,7 +4646,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t4 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'v1.a' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or 1) having trigcond(NULL is null)))) +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or (NULL is null)) having trigcond(NULL is null)))) SELECT * FROM v1, t2 WHERE t2.a NOT IN (SELECT t3.b FROM t3 RIGHT JOIN t4 ON (t4.a = t3.a) WHERE t4.a >= v1.a); diff --git a/mysql-test/suite/vcol/r/vcol_syntax.result b/mysql-test/suite/vcol/r/vcol_syntax.result index 144d4ab335d..7725d59f59c 100644 --- a/mysql-test/suite/vcol/r/vcol_syntax.result +++ b/mysql-test/suite/vcol/r/vcol_syntax.result @@ -201,18 +201,22 @@ drop table t1; # MDEV-31319 Assertion const_item_cache == true failed in Item_func::fix_fields # create table t (f1 int, f2 int, fv int generated always as (case user() when 'foo' or 'bar' then f1 else f2 end) virtual); -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'foo' -Warning 1292 Truncated incorrect DOUBLE value: 'bar' select * from t; f1 f2 fv +insert into t (f1,f2) values(1,1); +select * from t; +f1 f2 fv +1 1 1 Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'root@localhost' Warning 1292 Truncated incorrect DOUBLE value: 'foo' Warning 1292 Truncated incorrect DOUBLE value: 'bar' create table tmp as select * from information_schema.tables where table_name = 't'; select * from t; f1 f2 fv +1 1 1 Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'root@localhost' Warning 1292 Truncated incorrect DOUBLE value: 'foo' Warning 1292 Truncated incorrect DOUBLE value: 'bar' drop table t, tmp; diff --git a/mysql-test/suite/vcol/t/vcol_syntax.test b/mysql-test/suite/vcol/t/vcol_syntax.test index da2ad27d37d..c26c4897833 100644 --- a/mysql-test/suite/vcol/t/vcol_syntax.test +++ b/mysql-test/suite/vcol/t/vcol_syntax.test @@ -168,6 +168,8 @@ drop table t1; --echo # create table t (f1 int, f2 int, fv int generated always as (case user() when 'foo' or 'bar' then f1 else f2 end) virtual); select * from t; +insert into t (f1,f2) values(1,1); +select * from t; create table tmp as select * from information_schema.tables where table_name = 't'; select * from t; diff --git a/sql/item.cc b/sql/item.cc index ce1ba00bd5b..9f964ed019a 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -4068,6 +4068,7 @@ Item_param::Item_param(THD *thd, const LEX_CSTRING *name_arg, value is set. */ set_maybe_null(); + with_flags= with_flags | item_with_t::PARAM; } diff --git a/sql/item.h b/sql/item.h index 0f59b38cd38..3be7cd7283a 100644 --- a/sql/item.h +++ b/sql/item.h @@ -787,7 +787,8 @@ enum class item_with_t : item_flags_t FIELD= (1<<2), // If any item except Item_sum contains a field. SUM_FUNC= (1<<3), // If item contains a sum func SUBQUERY= (1<<4), // If item containts a sub query - ROWNUM_FUNC= (1<<5) + ROWNUM_FUNC= (1<<5), // If ROWNUM function was used + PARAM= (1<<6) // If user parameter was used }; @@ -1087,6 +1088,8 @@ public: { return (bool) (with_flags & item_with_t::SUBQUERY); } inline bool with_rownum_func() const { return (bool) (with_flags & item_with_t::ROWNUM_FUNC); } + inline bool with_param() const + { return (bool) (with_flags & item_with_t::PARAM); } inline void copy_flags(const Item *org, item_base_t mask) { base_flags= (item_base_t) (((item_flags_t) base_flags & @@ -5304,17 +5307,17 @@ public: :used_tables_cache(other->used_tables_cache), const_item_cache(other->const_item_cache) { } - void used_tables_and_const_cache_init() + inline void used_tables_and_const_cache_init() { used_tables_cache= 0; const_item_cache= true; } - void used_tables_and_const_cache_join(const Item *item) + inline void used_tables_and_const_cache_join(const Item *item) { used_tables_cache|= item->used_tables(); const_item_cache&= item->const_item(); } - void used_tables_and_const_cache_update_and_join(Item *item) + inline void used_tables_and_const_cache_update_and_join(Item *item) { item->update_used_tables(); used_tables_and_const_cache_join(item); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 1e7209ad37c..1ef5d3c9d0c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4915,7 +4915,7 @@ Item_cond::fix_fields(THD *thd, Item **ref) List_iterator li(list); Item *item; uchar buff[sizeof(char*)]; // Max local vars in function - bool is_and_cond= functype() == Item_func::COND_AND_FUNC; + not_null_tables_cache= 0; used_tables_and_const_cache_init(); @@ -4957,52 +4957,24 @@ Item_cond::fix_fields(THD *thd, Item **ref) merge_sub_condition(li); item= *li.ref(); // may be substituted in fix_fields/merge_item_if_possible - used_tables_cache|= item->used_tables(); - if (item->can_eval_in_optimize() && !item->with_sp_var() && - !cond_has_datetime_is_null(item)) - { - if (item->eval_const_cond() == is_and_cond && top_level()) - { - /* - a. This is "... AND true_cond AND ..." - In this case, true_cond has no effect on cond_and->not_null_tables() - b. This is "... OR false_cond/null cond OR ..." - In this case, false_cond has no effect on cond_or->not_null_tables() - */ - } - else - { - /* - a. This is "... AND false_cond/null_cond AND ..." - The whole condition is FALSE/UNKNOWN. - b. This is "... OR const_cond OR ..." - In this case, cond_or->not_null_tables()=0, because the condition - const_cond might evaluate to true (regardless of whether some tables - were NULL-complemented). - */ - not_null_tables_cache= (table_map) 0; - and_tables_cache= (table_map) 0; - } - if (thd->is_error()) - return TRUE; - } - else - { - table_map tmp_table_map= item->not_null_tables(); - not_null_tables_cache|= tmp_table_map; - and_tables_cache&= tmp_table_map; - - const_item_cache= FALSE; - } + used_tables_and_const_cache_join(item); base_flags|= item->base_flags & item_base_t::MAYBE_NULL; with_flags|= item->with_flags; } - if (fix_length_and_dec()) - return TRUE; + (void) eval_not_null_tables((void*) 0); + + /* + We have to set fixed as some other items will check it and fail if we + do not. This can be changed when we properly check if fix_fields() + fails in call cases. + */ base_flags|= item_base_t::FIXED; + if (fix_length_and_dec() || thd->is_error()) + return TRUE; return FALSE; } + /** @brief Merge a lower-level condition pointed by the iterator into this Item_cond @@ -5052,6 +5024,9 @@ void Item_cond::merge_sub_condition(List_iterator& li) } } +/* + Calculate not_null_tables_cache and and_tables_cache. +*/ bool Item_cond::eval_not_null_tables(void *opt_arg) @@ -5059,15 +5034,17 @@ Item_cond::eval_not_null_tables(void *opt_arg) Item *item; bool is_and_cond= functype() == Item_func::COND_AND_FUNC; List_iterator li(list); + bool found= 0; + not_null_tables_cache= (table_map) 0; and_tables_cache= ~(table_map) 0; while ((item=li++)) { - table_map tmp_table_map; - if (item->can_eval_in_optimize() && !item->with_sp_var() && - !cond_has_datetime_is_null(item)) + if (item->can_eval_in_optimize() && + !item->with_sp_var() && !item->with_param() && + !cond_has_datetime_is_null(item) && top_level()) { - if (item->eval_const_cond() == is_and_cond && top_level()) + if (item->eval_const_cond() == is_and_cond) { /* a. This is "... AND true_cond AND ..." @@ -5086,14 +5063,19 @@ Item_cond::eval_not_null_tables(void *opt_arg) const_cond might evaluate to true (regardless of whether some tables were NULL-complemented). */ + found= 1; not_null_tables_cache= (table_map) 0; and_tables_cache= (table_map) 0; } } else { - tmp_table_map= item->not_null_tables(); - not_null_tables_cache|= tmp_table_map; + table_map tmp_table_map= item->not_null_tables(); + if (!found) + { + /* We should not depend on the order of items */ + not_null_tables_cache|= tmp_table_map; + } and_tables_cache&= tmp_table_map; } } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ae68d84b30f..9ae356e0749 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2479,6 +2479,8 @@ JOIN::optimize_inner() COND *table_independent_conds= make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1, FALSE, FALSE); + if (!table_independent_conds && thd->is_error()) + DBUG_RETURN(1); DBUG_EXECUTE("where", print_where(table_independent_conds, "where after opt_sum_query()", @@ -2819,6 +2821,8 @@ int JOIN::optimize_stage2() if (make_join_select(this, select, conds)) { + if (thd->is_error()) + DBUG_RETURN(1); zero_result_cause= "Impossible WHERE noticed after reading const tables"; select_lex->mark_const_derived(zero_result_cause); @@ -3423,9 +3427,13 @@ bool JOIN::add_having_as_table_cond(JOIN_TAB *tab) having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0, ~used_tables, 0, false, false); + if (!having && thd->is_error()) + DBUG_RETURN(true); DBUG_EXECUTE("where", print_where(having, "having after sort", QT_ORDINARY);); } + else if (thd->is_error()) + DBUG_RETURN(true); DBUG_RETURN(false); } @@ -10625,17 +10633,22 @@ int JOIN_TAB::make_scan_filter() Item *cond= is_inner_table_of_outer_join() ? *get_first_inner_table()->on_expr_ref : join->conds; - if (cond && - (tmp= make_cond_for_table(join->thd, cond, - join->const_table_map | table->map, - table->map, -1, FALSE, TRUE))) + if (cond) { - DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); - if (!(cache_select= - (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT)))) - DBUG_RETURN(1); - cache_select->cond= tmp; - cache_select->read_tables=join->const_table_map; + if ((tmp= make_cond_for_table(join->thd, cond, + join->const_table_map | table->map, + table->map, -1, FALSE, TRUE))) + { + DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); + if (!(cache_select= + (SQL_SELECT*) join->thd->memdup((uchar*) select, + sizeof(SQL_SELECT)))) + DBUG_RETURN(1); + cache_select->cond= tmp; + cache_select->read_tables=join->const_table_map; + } + else if (join->thd->is_error()) + DBUG_RETURN(1); } DBUG_RETURN(0); } @@ -12215,6 +12228,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) const_cond= make_cond_for_table(thd, cond, join->const_table_map, (table_map) 0, -1, FALSE, FALSE); + if (!const_cond && thd->is_error()) + DBUG_RETURN(1); + /* Add conditions added by add_not_null_conds(). */ for (uint i= 0 ; i < join->const_tables ; i++) add_cond_and_fix(thd, &const_cond, @@ -12263,6 +12279,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond); join->outer_ref_cond= outer_ref_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } else { @@ -12278,6 +12296,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->pseudo_bits_cond); join->pseudo_bits_cond= pseudo_bits_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } } } @@ -12381,6 +12401,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) { tmp= make_cond_for_table(thd, cond, used_tables, current_map, i, FALSE, FALSE); + if (!tmp && thd->is_error()) + DBUG_RETURN(1); + if (tab == join->join_tab + last_top_base_tab_idx) { /* @@ -12393,7 +12416,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) COND *rand_cond= make_cond_for_table(thd, cond, used_tables, rand_table_bit, -1, FALSE, FALSE); - add_cond_and_fix(thd, &tmp, rand_cond); + if (rand_cond) + add_cond_and_fix(thd, &tmp, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); } } /* Add conditions added by add_not_null_conds(). */ @@ -12478,8 +12504,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) trace_cp.add_table_name(tab->table); COND *push_cond= - make_cond_for_table(thd, tmp_cond, current_map, current_map, - -1, FALSE, FALSE); + make_cond_for_table(thd, tmp_cond, current_map, current_map, + -1, FALSE, FALSE); if (push_cond) { trace_cp.add("push_cond", push_cond); @@ -12487,6 +12513,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (!tab->table->file->cond_push(push_cond)) tab->table->file->pushed_cond= push_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } } } @@ -12695,7 +12723,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->const_table_map, (table_map) 0, -1, FALSE, FALSE); if (!tmp_cond) - continue; + { + if (!thd->is_error()) + continue; + DBUG_RETURN(1); + } tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, &cond_tab->not_null_compl); if (!tmp_cond) @@ -12749,6 +12781,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) current_map, /*(inner_tab - first_tab)*/ -1, FALSE, FALSE); + if (!tmp_cond && thd->is_error()) + DBUG_RETURN(1); if (tab == last_tab) { /* @@ -12762,7 +12796,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2, rand_table_bit, -1, FALSE, FALSE); - add_cond_and_fix(thd, &tmp_cond, rand_cond); + if (rand_cond) + add_cond_and_fix(thd, &tmp_cond, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); } bool is_sjm_lookup_tab= FALSE; if (inner_tab->bush_children) @@ -23676,6 +23713,8 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, retain_ref_cond, false); if (fix) new_cond->argument_list()->push_back(fix, thd->mem_root); + else if (thd->is_error()) + return ((COND*) 0); } switch (new_cond->argument_list()->elements) { case 0: @@ -23718,7 +23757,7 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, exclude_expensive_cond, retain_ref_cond, false); if (!fix) - return (COND*) 0; // Always true + return (COND*) 0; // Always true or error new_cond->argument_list()->push_back(fix, thd->mem_root); } /* @@ -23726,7 +23765,8 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, the new parent Item. This should not be expensive because all children of Item_cond_and should be fixed by now. */ - new_cond->fix_fields(thd, 0); + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; new_cond->top_level_item(); return new_cond; From 88dd50b80ad9624d05b72751fd6e4a2cfdb6a3fe Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 16 Aug 2023 09:12:28 +0400 Subject: [PATCH 7/8] After-merge cleanup for MDEV-27207 + MDEV-31719 Something went wrong during a merge (from 10.5 to 10.6) of 68403eeda320ad0831563ce09a9c4af1549fe65e (fixing bugs MDEV-27207 and MDEV-31719). Originally (in 10.5) the fix was done in_inet6::set() in plugin/type_inet/sql_type_inet.cc. In 10.6 this code resides in a different place: in the method in_fbt::set() of a template class in sql/sql_type_fixedbin.h. During the merge: - the fix did not properly migrate to in_fbt::set() - the related MTR tests disappeared This patch fixes in_fbt::set() properly and restores MTR tests. --- .../mysql-test/type_inet/type_inet6.result | 26 +++++++++++++++++++ .../mysql-test/type_inet/type_inet6.test | 19 ++++++++++++++ sql/sql_type_fixedbin.h | 8 +++--- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.result b/plugin/type_inet/mysql-test/type_inet/type_inet6.result index 79958ee77f4..9c601697e4f 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.result @@ -2257,3 +2257,29 @@ a m ::10 1 DROP VIEW v1; DROP TABLE t1, t2; +# +# MDEV-27207 Assertion `!m_null_value' failed in int FixedBinTypeBundle::cmp_item_fbt::compare or in cmp_item_inet6::compare +# +CREATE TABLE t1 (a CHAR,b INET6); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +a b +Warnings: +Warning 1292 Incorrect inet6 value: '' +Warning 1292 Incorrect inet6 value: '' +INSERT INTO t1 VALUES ('','::'),('','::'); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +a b +Warnings: +Warning 1292 Incorrect inet6 value: '' +Warning 1292 Incorrect inet6 value: '' +DROP TABLE t1; +# +# MDEV-31719 Wrong result of: WHERE inet6_column IN ('','::1') +# +CREATE OR REPLACE TABLE t1 (a INET6); +INSERT INTO t1 VALUES ('::'); +SELECT * FROM t1 WHERE a IN ('','::1'); +a +Warnings: +Warning 1292 Incorrect inet6 value: '' +DROP TABLE t1; diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.test b/plugin/type_inet/mysql-test/type_inet/type_inet6.test index 2cdbc0eb2b9..771d8fbc347 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.test @@ -1656,3 +1656,22 @@ SELECT * FROM v1 ORDER BY a; SELECT * FROM t2 ORDER BY a; DROP VIEW v1; DROP TABLE t1, t2; + +--echo # +--echo # MDEV-27207 Assertion `!m_null_value' failed in int FixedBinTypeBundle::cmp_item_fbt::compare or in cmp_item_inet6::compare +--echo # + +CREATE TABLE t1 (a CHAR,b INET6); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +INSERT INTO t1 VALUES ('','::'),('','::'); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +DROP TABLE t1; + +--echo # +--echo # MDEV-31719 Wrong result of: WHERE inet6_column IN ('','::1') +--echo # + +CREATE OR REPLACE TABLE t1 (a INET6); +INSERT INTO t1 VALUES ('::'); +SELECT * FROM t1 WHERE a IN ('','::1'); +DROP TABLE t1; diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h index dbfc958cd3b..e971dc08b66 100644 --- a/sql/sql_type_fixedbin.h +++ b/sql/sql_type_fixedbin.h @@ -776,10 +776,12 @@ public: Fbt *buff= &((Fbt *) base)[pos]; Fbt_null value(item); if (value.is_null()) + { *buff= Fbt::zero(); - else - *buff= value; - return FALSE; + return true; + } + *buff= value; + return false; } uchar *get_value(Item *item) override { From 8aaacb5509a7981062d3ad0331cef212e3d79d5d Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 14 Aug 2023 11:09:51 +0300 Subject: [PATCH 8/8] MDEV-31432 tmp_table field accessed after free Before this patch, the code in Item_field::print() used this convention (described in sql_explain.h:ExplainDataStructureLifetime): - By default, the table that Item_field refers to is accessible. - ANALYZE and SHOW {EXPLAIN|ANALYZE} may print Items after some temporary tables have been dropped. They use QT_DONT_ACCESS_TMP_TABLES flag. When it is ON, Item_field::print will not access the table it refers to, if it is a temp.table The bug was that EXPLAIN statement also may compute subqueries (depending on subquery context and @@expensive_subquery_limit setting). After the computation, the subquery calls JOIN::cleanup(true) which drops some of its temporary tables. Calling Item_field::print() that refer to such table will cause an access to free'd memory. In this patch, we take into account that query optimization can compute a subquery and discard its temporary tables. Item_field::print() now assumes that any temporary table might have already been dropped. This means QT_DONT_ACCESS_TMP_TABLES flag is not needed - we imply it is always present. But we also make one exception: derived tables are not freed in JOIN::cleanup() call. They are freed later in close_thread_tables(), at the same time when regular tables are closed. Because of that, Item_field::print may assume that temp.tables representing derived tables are available. Initial patch by: Rex Jonston Reviewed by: Monty --- mysql-test/main/show_analyze.result | 39 +++++++++ mysql-test/main/show_analyze.test | 41 +++++++++ sql/item.cc | 32 ++++--- sql/item.h | 31 +++---- sql/item_func.cc | 2 +- sql/item_subselect.cc | 4 +- sql/item_sum.cc | 2 +- sql/mysqld.h | 6 +- sql/sql_base.cc | 3 + sql/sql_explain.cc | 131 +++++++++++----------------- sql/sql_explain.h | 53 +++++------ sql/sql_lex.cc | 1 - sql/sql_parse.cc | 3 +- sql/sql_select.cc | 6 +- sql/sql_window.cc | 2 +- 15 files changed, 199 insertions(+), 157 deletions(-) diff --git a/mysql-test/main/show_analyze.result b/mysql-test/main/show_analyze.result index c5fd61ecf64..f0a492344ba 100644 --- a/mysql-test/main/show_analyze.result +++ b/mysql-test/main/show_analyze.result @@ -434,3 +434,42 @@ ANALYZE } } DROP TABLE t1; +# +# MDEV-31432 tmp_table field accessed after free +# testing for the above (MDEV-28201) caused use after free error +# +create table t1 (x int) engine=myisam; +insert into t1 values(1); +set @tmp=@@optimizer_trace; +set @@optimizer_trace=1; +SELECT +1 IN +(( +SELECT +1 IN (SELECT 1 AS x0 +FROM +( +SELECT * +FROM (SELECT 1 AS x) AS x5 +GROUP BY x,x +HAVING +x IN ( +SELECT * +FROM t1 AS x1 +WHERE +x IN (SELECT 1 AS x +FROM t1 AS x3 +GROUP BY x +HAVING +x IN (SELECT 0 FROM t1 AS x4) +) +) +) AS x6 +) +FROM +t1 +)) as VAL; +VAL +0 +set optimizer_trace=@tmp; +drop table t1; diff --git a/mysql-test/main/show_analyze.test b/mysql-test/main/show_analyze.test index 9d59d5b2188..58d36d7dd16 100644 --- a/mysql-test/main/show_analyze.test +++ b/mysql-test/main/show_analyze.test @@ -364,3 +364,44 @@ ANALYZE format=json SELECT 1 FROM t1 GROUP BY convert_tz('1969-12-31 22:00:00',a,'+10:00'); DROP TABLE t1; +--echo # +--echo # MDEV-31432 tmp_table field accessed after free +--echo # testing for the above (MDEV-28201) caused use after free error +--echo # +create table t1 (x int) engine=myisam; +insert into t1 values(1); +set @tmp=@@optimizer_trace; +set @@optimizer_trace=1; +# Different warning text is produced in regular and --ps-protocol runs: +--disable_warnings +SELECT + 1 IN + (( + SELECT + 1 IN (SELECT 1 AS x0 + FROM + ( + SELECT * + FROM (SELECT 1 AS x) AS x5 + GROUP BY x,x + HAVING + x IN ( + SELECT * + FROM t1 AS x1 + WHERE + x IN (SELECT 1 AS x + FROM t1 AS x3 + GROUP BY x + HAVING + x IN (SELECT 0 FROM t1 AS x4) + ) + ) + ) AS x6 + ) + FROM + t1 + )) as VAL; +--enable_warnings +set optimizer_trace=@tmp; +drop table t1; + diff --git a/sql/item.cc b/sql/item.cc index 6b3a5ffe084..5591bcdb9c4 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3138,7 +3138,7 @@ void Item_field::set_field(Field *field_par) if (field->table->s->tmp_table == SYSTEM_TMP_TABLE || field->table->s->tmp_table == INTERNAL_TMP_TABLE) - set_refers_to_temp_table(true); + set_refers_to_temp_table(); } @@ -3615,7 +3615,7 @@ Item *Item_field::get_tmp_table_item(THD *thd) if (new_item) { new_item->field= new_item->result_field; - new_item->set_refers_to_temp_table(true); + new_item->set_refers_to_temp_table(); } return new_item; } @@ -3626,9 +3626,14 @@ longlong Item_field::val_int_endpoint(bool left_endp, bool *incl_endp) return null_value? LONGLONG_MIN : res; } -void Item_field::set_refers_to_temp_table(bool value) +void Item_field::set_refers_to_temp_table() { - refers_to_temp_table= value; + /* + Derived temp. tables have non-zero derived_select_number. + We don't need to distingish between other kinds of temp.tables currently. + */ + refers_to_temp_table= (field->table->derived_select_number != 0)? + REFERS_TO_DERIVED_TMP : REFERS_TO_OTHER_TMP; } @@ -6292,7 +6297,7 @@ void Item_field::cleanup() field= 0; item_equal= NULL; null_value= FALSE; - refers_to_temp_table= FALSE; + refers_to_temp_table= NO_TEMP_TABLE; DBUG_VOID_RETURN; } @@ -7860,14 +7865,15 @@ void Item_field::print(String *str, enum_query_type query_type) { /* If the field refers to a constant table, print the value. - (1): But don't attempt to do that if - * the field refers to a temporary (work) table, and - * temp. tables might already have been dropped. + There are two exceptions: + 1. For temporary (aka "work") tables, we can only access the derived temp. + tables. Other kinds of tables might already have been dropped. + 2. Don't print constants if QT_NO_DATA_EXPANSION or QT_VIEW_INTERNAL is + specified. */ - if (!(refers_to_temp_table && // (1) - (query_type & QT_DONT_ACCESS_TMP_TABLES)) && // (1) - field && field->table->const_table && - !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL))) + if ((refers_to_temp_table != REFERS_TO_OTHER_TMP) && // (1) + !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL)) && // (2) + field && field->table->const_table) { print_value(str); return; @@ -9145,7 +9151,7 @@ Item* Item_cache_wrapper::get_tmp_table_item(THD *thd) { auto item_field= new (thd->mem_root) Item_field(thd, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd); diff --git a/sql/item.h b/sql/item.h index 6b7008c8075..cc1075dc1a9 100644 --- a/sql/item.h +++ b/sql/item.h @@ -3568,27 +3568,18 @@ public: private: /* - Setting this member to TRUE (via set_refers_to_temp_table()) - ensures print() function continues to work even if the table - has been dropped. + Indicates whether this Item_field refers to a regular or some kind of + temporary table. + This is needed for print() to work: it may be called even after the table + referred by the Item_field has been dropped. - We need this for "ANALYZE statement" feature. Query execution has - these steps: - 1. Run the query. - 2. Cleanup starts. Temporary tables are destroyed - 3. print "ANALYZE statement" output, if needed - 4. Call close_thread_table() for regular tables. - - Step #4 is done after step #3, so "ANALYZE stmt" has no problem printing - Item_field objects that refer to regular tables. - - However, Step #3 is done after Step #2. Attempt to print Item_field objects - that refer to temporary tables will cause access to freed memory. - - To resolve this, we use refers_to_temp_table member to refer to items - in temporary (work) tables. + See ExplainDataStructureLifetime in sql_explain.h for details. */ - bool refers_to_temp_table= false; + enum { + NO_TEMP_TABLE= 0, + REFERS_TO_DERIVED_TMP= 1, + REFERS_TO_OTHER_TMP=2 + } refers_to_temp_table = NO_TEMP_TABLE; public: Item_field(THD *thd, Name_resolution_context *context_arg, @@ -3804,7 +3795,7 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg) override; - void set_refers_to_temp_table(bool value); + void set_refers_to_temp_table(); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; diff --git a/sql/item_func.cc b/sql/item_func.cc index ee0f507d3e8..cb432b4f82f 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -749,7 +749,7 @@ Item *Item_func::get_tmp_table_item(THD *thd) { auto item_field= new (thd->mem_root) Item_field(thd, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd); diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index ff004bbb045..e020627d68b 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -1030,7 +1030,7 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg) auto item_field= new (thd->mem_root) Item_field(thd_arg, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd_arg); @@ -5310,7 +5310,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() Item_field *right_col_item= new (thd->mem_root) Item_field(thd, context, tmp_table->field[i]); if (right_col_item) - right_col_item->set_refers_to_temp_table(true); + right_col_item->set_refers_to_temp_table(); if (!right_col_item || !(eq_cond= new (thd->mem_root) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index ffac6dbb912..bbd09a59267 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -562,7 +562,7 @@ Item *Item_sum::get_tmp_table_item(THD *thd) auto item_field= new (thd->mem_root) Item_field(thd, result_field_tmp++); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); sum_item->args[i]= item_field; } } diff --git a/sql/mysqld.h b/sql/mysqld.h index 5263e397a15..28a0cb30637 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -905,11 +905,7 @@ enum enum_query_type // don't reveal values. QT_NO_DATA_EXPANSION= (1 << 9), // Remove wrappers added for TVC when creating or showing view - QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12), - - // The temporary tables used by the query might be freed by the time - // this print() call is made. - QT_DONT_ACCESS_TMP_TABLES= (1 << 13) + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12) }; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index c9c74046fd9..a4269e20614 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -871,6 +871,9 @@ int close_thread_tables(THD *thd) TODO: Probably even better approach is to simply associate list of derived tables with (sub-)statement instead of thread and destroy them at the end of its execution. + + Note: EXPLAIN/ANALYZE depends on derived tables being freed here. See + sql_explain.h:ExplainDataStructureLifetime. */ if (thd->derived_tables) { diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 9c09259cb56..30ce9535a14 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -39,8 +39,8 @@ const char *unit_operation_text[4]= const char *pushed_derived_text= "PUSHED DERIVED"; const char *pushed_select_text= "PUSHED SELECT"; -static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl); -static void append_item_to_str(String *out, Item *item, bool no_tmp_tbl); +static void write_item(Json_writer *writer, Item *item); +static void append_item_to_str(String *out, Item *item); Explain_query::Explain_query(THD *thd_arg, MEM_ROOT *root) : mem_root(root), upd_del_plan(nullptr), insert_plan(nullptr), @@ -196,7 +196,7 @@ int Explain_query::send_explain(THD *thd, bool extended) int res= 0; if (thd->lex->explain_json) - print_explain_json(result, thd->lex->analyze_stmt, false /*is_show_cmd*/); + print_explain_json(result, thd->lex->analyze_stmt); else { res= print_explain(result, lex->describe, thd->lex->analyze_stmt); @@ -252,16 +252,8 @@ int Explain_query::print_explain(select_result_sink *output, } -/* - @param is_show_cmd TRUE<=> This is a SHOW EXPLAIN|ANALYZE command. - (These commands may be called at late stage in - the query processing, we need to pass no_tmp_tbl=true - to other print functions) -*/ - int Explain_query::print_explain_json(select_result_sink *output, bool is_analyze, - bool is_show_cmd, ulonglong query_time_in_progress_ms) { Json_writer writer; @@ -275,25 +267,17 @@ int Explain_query::print_explain_json(select_result_sink *output, writer.add_member("r_query_time_in_progress_ms"). add_ull(query_time_in_progress_ms); - /* - If we are printing ANALYZE FORMAT=JSON output, take into account that - query's temporary tables have already been freed. See sql_explain.h, - sql_explain.h:ExplainDataStructureLifetime for details. - */ - if (is_analyze) - is_show_cmd= true; - if (upd_del_plan) - upd_del_plan->print_explain_json(this, &writer, is_analyze, is_show_cmd); + upd_del_plan->print_explain_json(this, &writer, is_analyze); else if (insert_plan) - insert_plan->print_explain_json(this, &writer, is_analyze, is_show_cmd); + insert_plan->print_explain_json(this, &writer, is_analyze); else { /* Start printing from node with id=1 */ Explain_node *node= get_node(1); if (!node) return 1; /* No query plan */ - node->print_explain_json(this, &writer, is_analyze, is_show_cmd); + node->print_explain_json(this, &writer, is_analyze); } writer.end_object(); @@ -656,8 +640,7 @@ int Explain_union::print_explain(Explain_query *query, void Explain_union::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); char table_name_buffer[SAFE_NAME_LEN]; @@ -702,12 +685,12 @@ void Explain_union::print_explain_json(Explain_query *query, //writer->add_member("dependent").add_str("TODO"); //writer->add_member("cacheable").add_str("TODO"); Explain_select *sel= query->get_select(union_members.at(i)); - sel->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + sel->print_explain_json(query, writer, is_analyze); writer->end_object(); } writer->end_array(); - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // union_result writer->end_object(); // query_block @@ -769,8 +752,7 @@ bool is_connection_printable_in_json(enum Explain_node::explain_connection_type void Explain_node::print_explain_json_for_children(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -797,7 +779,7 @@ void Explain_node::print_explain_json_for_children(Explain_query *query, } writer->start_object(); - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } @@ -977,8 +959,7 @@ void Explain_select::add_linkage(Json_writer *writer) } void Explain_select::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -1000,7 +981,7 @@ void Explain_select::print_explain_json(Explain_query *query, message); writer->end_object(); - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); } else @@ -1022,17 +1003,17 @@ void Explain_select::print_explain_json(Explain_query *query, if (exec_const_cond) { writer->add_member("const_condition"); - write_item(writer, exec_const_cond, no_tmp_tbl); + write_item(writer, exec_const_cond); } if (outer_ref_cond) { writer->add_member("outer_ref_condition"); - write_item(writer, outer_ref_cond, no_tmp_tbl); + write_item(writer, outer_ref_cond); } if (pseudo_bits_cond) { writer->add_member("pseudo_bits_condition"); - write_item(writer, pseudo_bits_cond, no_tmp_tbl); + write_item(writer, pseudo_bits_cond); } /* we do not print HAVING which always evaluates to TRUE */ @@ -1040,7 +1021,7 @@ void Explain_select::print_explain_json(Explain_query *query, { writer->add_member("having_condition"); if (likely(having)) - write_item(writer, having, no_tmp_tbl); + write_item(writer, having); else { /* Normally we should not go this branch, left just for safety */ @@ -1064,7 +1045,7 @@ void Explain_select::print_explain_json(Explain_query *query, { writer->add_member("filesort").start_object(); auto aggr_node= (Explain_aggr_filesort*)node; - aggr_node->print_json_members(writer, is_analyze, no_tmp_tbl); + aggr_node->print_json_members(writer, is_analyze); break; } case AGGR_OP_REMOVE_DUPLICATES: @@ -1075,7 +1056,7 @@ void Explain_select::print_explain_json(Explain_query *query, //TODO: make print_json_members virtual? writer->add_member("window_functions_computation").start_object(); auto aggr_node= (Explain_aggr_window_funcs*)node; - aggr_node->print_json_members(writer, is_analyze, no_tmp_tbl); + aggr_node->print_json_members(writer, is_analyze); break; } default: @@ -1084,8 +1065,7 @@ void Explain_select::print_explain_json(Explain_query *query, started_objects++; } - Explain_basic_join::print_explain_json_interns(query, writer, is_analyze, - no_tmp_tbl); + Explain_basic_join::print_explain_json_interns(query, writer, is_analyze); for (;started_objects; started_objects--) writer->end_object(); @@ -1114,8 +1094,7 @@ Explain_aggr_filesort::Explain_aggr_filesort(MEM_ROOT *mem_root, void Explain_aggr_filesort::print_json_members(Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { char item_buf[256]; String str(item_buf, sizeof(item_buf), &my_charset_bin); @@ -1135,7 +1114,7 @@ void Explain_aggr_filesort::print_json_members(Json_writer *writer, { str.append(STRING_WITH_LEN(", ")); } - append_item_to_str(&str, item, no_tmp_tbl); + append_item_to_str(&str, item); if (*direction == ORDER::ORDER_DESC) str.append(STRING_WITH_LEN(" desc")); } @@ -1148,8 +1127,7 @@ void Explain_aggr_filesort::print_json_members(Json_writer *writer, void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Explain_aggr_filesort *srt; List_iterator it(sorts); @@ -1158,19 +1136,19 @@ void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, { Json_writer_object sort(writer); Json_writer_object filesort(writer, "filesort"); - srt->print_json_members(writer, is_analyze, no_tmp_tbl); + srt->print_json_members(writer, is_analyze); } } void Explain_basic_join::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { writer->add_member("query_block").start_object(); writer->add_member("select_id").add_ll(select_id); - print_explain_json_interns(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_interns(query, writer, is_analyze); writer->end_object(); } @@ -1179,7 +1157,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query, void Explain_basic_join:: print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { { Json_writer_array loop(writer, "nested_loop"); @@ -1192,7 +1170,7 @@ print_explain_json_interns(Explain_query *query, writer->start_array(); } - join_tabs[i]->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + join_tabs[i]->print_explain_json(query, writer, is_analyze); if (join_tabs[i]->end_dups_weedout) { @@ -1201,7 +1179,7 @@ print_explain_json_interns(Explain_query *query, } } } // "nested_loop" - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); } @@ -1601,7 +1579,7 @@ const char *String_list::append_str(MEM_ROOT *mem_root, const char *str) } -static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl) +static void write_item(Json_writer *writer, Item *item) { THD *thd= current_thd; char item_buf[256]; @@ -1611,27 +1589,25 @@ static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl) ulonglong save_option_bits= thd->variables.option_bits; thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; - auto qtype= QT_EXPLAIN | (no_tmp_tbl? QT_DONT_ACCESS_TMP_TABLES : 0); - item->print(&str, (enum_query_type)qtype); + item->print(&str, QT_EXPLAIN); thd->variables.option_bits= save_option_bits; writer->add_str(str.c_ptr_safe()); } -static void append_item_to_str(String *out, Item *item, bool no_tmp_tbl) +static void append_item_to_str(String *out, Item *item) { THD *thd= current_thd; ulonglong save_option_bits= thd->variables.option_bits; thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; - auto qtype= QT_EXPLAIN | (no_tmp_tbl? QT_DONT_ACCESS_TMP_TABLES : 0); - item->print(out, (enum_query_type)qtype); + item->print(out, QT_EXPLAIN); + thd->variables.option_bits= save_option_bits; } void Explain_table_access::tag_to_json(Json_writer *writer, - enum explain_extra_tag tag, - bool no_tmp_tbl) + enum explain_extra_tag tag) { switch (tag) { @@ -1655,11 +1631,11 @@ void Explain_table_access::tag_to_json(Json_writer *writer, break; case ET_USING_INDEX_CONDITION: writer->add_member("index_condition"); - write_item(writer, pushed_index_cond, no_tmp_tbl); + write_item(writer, pushed_index_cond); break; case ET_USING_INDEX_CONDITION_BKA: writer->add_member("index_condition_bka"); - write_item(writer, pushed_index_cond, no_tmp_tbl); + write_item(writer, pushed_index_cond); break; case ET_USING_WHERE: { @@ -1673,7 +1649,7 @@ void Explain_table_access::tag_to_json(Json_writer *writer, if (item) { writer->add_member("attached_condition"); - write_item(writer, item, no_tmp_tbl); + write_item(writer, item); } } break; @@ -1807,7 +1783,7 @@ static void trace_engine_stats(handler *file, Json_writer *writer) void Explain_table_access::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { Json_writer_object jsobj(writer); @@ -1838,7 +1814,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, } } writer->add_member("filesort").start_object(); - pre_join_sort->print_json_members(writer, is_analyze, no_tmp_tbl); + pre_join_sort->print_json_members(writer, is_analyze); } if (bka_type.is_using_jbuf()) @@ -1976,7 +1952,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, for (int i=0; i < (int)extra_tags.elements(); i++) { - tag_to_json(writer, extra_tags.at(i), no_tmp_tbl); + tag_to_json(writer, extra_tags.at(i)); } if (full_scan_on_null_key) @@ -1997,7 +1973,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, if (where_cond) { writer->add_member("attached_condition"); - write_item(writer, where_cond, no_tmp_tbl); + write_item(writer, where_cond); } if (is_analyze) @@ -2044,7 +2020,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, { writer->add_member("lateral").add_ll(1); } - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } if (non_merged_sjm_number) @@ -2054,7 +2030,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, writer->add_member("unique").add_ll(1); Explain_node *node= query->get_node(non_merged_sjm_number); node->connection_type= Explain_node::EXPLAIN_NODE_NON_MERGED_SJ; - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } if (sjm_nest) @@ -2062,7 +2038,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, /* This is a non-merged semi-join table. Print its contents here */ writer->add_member("materialized").start_object(); writer->add_member("unique").add_ll(1); - sjm_nest->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + sjm_nest->print_explain_json(query, writer, is_analyze); writer->end_object(); } @@ -2368,8 +2344,7 @@ int Explain_delete::print_explain(Explain_query *query, void Explain_delete::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2384,7 +2359,7 @@ void Explain_delete::print_explain_json(Explain_query *query, writer->end_object(); // query_block return; } - Explain_update::print_explain_json(query, writer, is_analyze, no_tmp_tbl); + Explain_update::print_explain_json(query, writer, is_analyze); } @@ -2487,8 +2462,7 @@ int Explain_update::print_explain(Explain_query *query, void Explain_update::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2655,7 +2629,7 @@ void Explain_update::print_explain_json(Explain_query *query, if (where_cond) { writer->add_member("attached_condition"); - write_item(writer, where_cond, no_tmp_tbl); + write_item(writer, where_cond); } /*** The part of plan that is before the buffering/sorting ends here ***/ @@ -2667,7 +2641,7 @@ void Explain_update::print_explain_json(Explain_query *query, writer->end_object(); // table - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // query_block } @@ -2697,8 +2671,7 @@ int Explain_insert::print_explain(Explain_query *query, } void Explain_insert::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2707,7 +2680,7 @@ void Explain_insert::print_explain_json(Explain_query *query, writer->add_member("table").start_object(); writer->add_member("table_name").add_str(table_name.c_ptr()); writer->end_object(); // table - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // query_block } diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 894309568ba..4510c1aa3ce 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -134,13 +134,12 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze)=0; virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl)= 0; + bool is_analyze)= 0; int print_explain_for_children(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json_for_children(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + Json_writer *writer, bool is_analyze); bool print_explain_json_cache(Json_writer *writer, bool is_analyze); virtual ~Explain_node() = default; }; @@ -174,10 +173,10 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); void print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); /* A flat array of Explain structs for tables. */ Explain_table_access** join_tabs; @@ -261,7 +260,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); Table_access_tracker *get_using_temporary_read_tracker() { @@ -304,8 +303,7 @@ public: Explain_aggr_filesort(MEM_ROOT *mem_root, bool is_analyze, Filesort *filesort); - void print_json_members(Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + void print_json_members(Json_writer *writer, bool is_analyze); }; class Explain_aggr_tmp_table : public Explain_aggr_node @@ -326,8 +324,7 @@ class Explain_aggr_window_funcs : public Explain_aggr_node public: enum_explain_aggr_node_type get_type() { return AGGR_OP_WINDOW_FUNCS; } - void print_json_members(Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + void print_json_members(Json_writer *writer, bool is_analyze); friend class Window_funcs_computation; }; @@ -380,7 +377,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); const char *fake_select_type; bool using_filesort; @@ -448,19 +445,19 @@ class Explain_insert; (1) - Query plan construction is finished and it is available for reading. - (2) - Temporary tables are freed. After this point, - we need to pass QT_DONT_ACCESS_TMP_TABLES to item->print(). Since - we don't track when #2 happens for each temp.table, we pass this - flag whenever we're printing the query plan for a SHOW command. - Also, we pass it when printing ANALYZE (?) + (2) - Temporary tables are freed (with exception of derived tables + which are freed at step (4)). + The tables are no longer accessible but one can still call + item->print(), even for items that refer to temp.tables (see + Item_field::print() for details) (3) - Notification about (4). - (4) - Tables used by the query are closed. One known consequence of this is - that the values of the const tables' fields are not available anymore. - We could use the same approach as in QT_DONT_ACCESS_TMP_TABLES to work - around that, but instead we disallow producing FORMAT=JSON output at - step #3. We also processing of SHOW command. The rationale is that - query is close to finish anyway. + (4) - Tables used by the query are closed. One consequence of this is that + the values of the const tables' fields are not available anymore. + We could adjust the code in Item_field::print() to handle this but + instead we make step (3) disallow production of FORMAT=JSON output. + We also disable processing of SHOW EXPLAIN|ANALYZE output because + the query is about to finish anyway. (5) - Item objects are freed. After this, it's certainly not possible to print them into FORMAT=JSON output. @@ -499,7 +496,6 @@ public: bool print_explain_str(THD *thd, String *out_str, bool is_analyze); int print_explain_json(select_result_sink *output, bool is_analyze, - bool is_show_cmd, ulonglong query_time_in_progress_ms= 0); /* If true, at least part of EXPLAIN can be printed */ @@ -908,15 +904,14 @@ public: uint select_id, const char *select_type, bool using_temporary, bool using_filesort); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); private: void append_tag_name(String *str, enum explain_extra_tag tag); void fill_key_str(String *key_str, bool is_json) const; void fill_key_len_str(String *key_len_str, bool is_json) const; double get_r_filtered(); - void tag_to_json(Json_writer *writer, enum explain_extra_tag tag, - bool no_tmp_tbl); + void tag_to_json(Json_writer *writer, enum explain_extra_tag tag); }; @@ -1003,7 +998,7 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; @@ -1029,7 +1024,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; @@ -1056,7 +1051,7 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 31f31850807..c367bc7fe15 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -5802,7 +5802,6 @@ int LEX::print_explain(select_result_sink *output, uint8 explain_flags, query_time_in_progress_ms= (now - start_time) / (HRTIME_RESOLUTION / 1000); res= explain->print_explain_json(output, is_analyze, - true /* is_show_cmd */, query_time_in_progress_ms); } else diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2cfbbc22af9..b0338250088 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -6235,8 +6235,7 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) result->remove_offset_limit(); if (lex->explain_json) { - lex->explain->print_explain_json(result, lex->analyze_stmt, - false /* is_show_cmd */); + lex->explain->print_explain_json(result, lex->analyze_stmt); } else { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 2d4e28f095c..3a6a8859326 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -20014,7 +20014,7 @@ bool Create_tmp_table::add_fields(THD *thd, if (!(tmp_item= new (thd->mem_root) Item_field(thd, new_field))) goto err; - ((Item_field*) tmp_item)->set_refers_to_temp_table(true); + ((Item_field*) tmp_item)->set_refers_to_temp_table(); arg= sum_item->set_arg(i, thd, tmp_item); thd->mem_root= &table->mem_root; @@ -27290,7 +27290,7 @@ change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, Item_field *new_field= new (thd->mem_root) Item_field(thd, field); if (!suv || !new_field) DBUG_RETURN(true); // Fatal error - new_field->set_refers_to_temp_table(true); + new_field->set_refers_to_temp_table(); List list; list.push_back(new_field, thd->mem_root); suv->set_arguments(thd, list); @@ -27309,7 +27309,7 @@ change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, { item_field= (Item*) new (thd->mem_root) Item_field(thd, field); if (item_field) - ((Item_field*) item_field)->set_refers_to_temp_table(true); + ((Item_field*) item_field)->set_refers_to_temp_table(); } if (!item_field) DBUG_RETURN(true); // Fatal error diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 4d6d5ddd951..9a46413d3e7 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -3122,7 +3122,7 @@ bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel, Item_field *item= new (thd->mem_root) Item_field(thd, join_tab->table->field[0]); if (item) - item->set_refers_to_temp_table(true); + item->set_refers_to_temp_table(); order->item= (Item **)alloc_root(thd->mem_root, 2 * sizeof(Item *)); order->item[1]= NULL; order->item[0]= item;