MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Merge 5.3-mwl89 into 5.3 main.

There is one remaining test failure in this merge:
innodb_mysql_lock2. All other tests have been checked to
deliver the same results/explains as 5.3-mwl89, including
the few remaining wrong results.
This commit is contained in:
unknown 2010-11-05 14:42:58 +02:00
commit bc7369b74b
26 changed files with 5814 additions and 801 deletions

View File

@ -0,0 +1,152 @@
-- echo
-- echo /* A. Subqueries in the SELECT clause. */
explain
select a1, a1 in (select b1 from t2 where b1 > '0') from t1;
select a1, a1 in (select b1 from t2 where b1 > '0') from t1;
-- echo
explain
select a1, a2, (a1, a2) in (select b1, b2 from t2 where b1 > '0') from t1;
select a1, a2, (a1, a2) in (select b1, b2 from t2 where b1 > '0') from t1;
-- echo
explain
select a1, a2, (a1, a2) in (select b1, b2 from t2 where b1 > '0' and b1 < '9') from t1;
select a1, a2, (a1, a2) in (select b1, b2 from t2 where b1 > '0' and b1 < '9') from t1;
-- echo
-- echo /*
-- echo B. "Natural" examples of subqueries without grouping that
-- echo cannot be flattened into semijoin.
-- echo */
explain
select a1 from t1 where a1 in (select b2 from t2) or a2 < '9';
select a1 from t1 where a1 in (select b2 from t2) or a2 < '9';
-- echo
explain
select a1, a2 from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0') or a2 < '9';
select a1, a2 from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0') or a2 < '9';
-- echo UNION subqueries are currently limited to only use IN-TO-EXISTS.
explain
select a2 from t1 where a2 in (select b2 from t2 UNION select b3 from t2 as t3);
select a2 from t1 where a2 in (select b2 from t2 UNION select b3 from t2 as t3);
-- echo
explain
select a1 from t1 where a1 = '1 - 02' and a1 in (select max(b1) from t2 where b2 = '2 - 02');
select a1 from t1 where a1 = '1 - 02' and a1 in (select max(b1) from t2 where b2 = '2 - 02');
-- echo
explain
select a1, a2 from t1 where (a1, a2) in (select b1, b2 from t2 order by b3);
select a1, a2 from t1 where (a1, a2) in (select b1, b2 from t2 order by b3);
-- echo
-- echo /* C. Subqueries in the WHERE clause with GROUP BY. */
explain
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
-- echo
explain
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
-- echo
explain
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
-- echo
explain
select * from t1 where (a1, a2, a3) in (select b1, b2, b3 from t2 group by b1, b2, b3);
select * from t1 where (a1, a2, a3) in (select b1, b2, b3 from t2 group by b1, b2, b3);
-- echo
explain
select * from t1 where (a1, a2, a3) in (select b1, b2, b3 from t2 where b3 = '3 - 02' group by b1, b2);
select * from t1 where (a1, a2, a3) in (select b1, b2, b3 from t2 where b3 = '3 - 02' group by b1, b2);
-- echo
explain
select * from t1 where (a1,a2,a3) in (select b1,b2,b3 from t2 where b1 = '1 - 01' group by b1,b2,b3);
select * from t1 where (a1,a2,a3) in (select b1,b2,b3 from t2 where b1 = '1 - 01' group by b1,b2,b3);
-- echo
-- echo /*
-- echo D. Subqueries for which materialization is not possible, and the
-- echo optimizer reverts to in-to-exists.
-- echo */
# The first two cases are rejected during the prepare phase by the procedure
# subquery_types_allow_materialization().
explain
select left(a1,7), left(a2,7) from t1_1024 where a1 in (select b1 from t2_1024 where b1 > '0') or a2 < '9';
select left(a1,7), left(a2,7) from t1_1024 where a1 in (select b1 from t2_1024 where b1 > '0') or a2 < '9';
explain
select left(a1,7), left(a2,7) from t1_1024 where (a1,a2) in (select b1, b2 from t2_1024 where b1 > '0') or a2 < '9';
select left(a1,7), left(a2,7) from t1_1024 where (a1,a2) in (select b1, b2 from t2_1024 where b1 > '0') or a2 < '9';
-- echo
# The following two subqueries return the result of a string function with a
# blob argument, where the return type may be != blob. These are rejected during
# cost-based optimization when attempting to create a temporary table.
explain
select left(a1,7), left(a2,7) from t1_1024 where a1 in (select substring(b1,1,1024) from t2_1024 where b1 > '0') or a2 < '9';
select left(a1,7), left(a2,7) from t1_1024 where a1 in (select substring(b1,1,1024) from t2_1024 where b1 > '0') or a2 < '9';
explain
select left(a1,7), left(a2,7) from t1_1024 where (a1,a2) in (select substring(b1,1,1024), substring(b2,1,1024) from t2_1024 where b1 > '0') or a2 < '9';
select left(a1,7), left(a2,7) from t1_1024 where (a1,a2) in (select substring(b1,1,1024), substring(b2,1,1024) from t2_1024 where b1 > '0') or a2 < '9';
-- echo
-- echo
-- echo /* E. Edge cases. */
-- echo
-- echo /* E.1 Both materialization and in_to_exists cannot be off. */
set @save_optimizer_switch=@@optimizer_switch;
set @@optimizer_switch = 'materialization=off,in_to_exists=off';
--error ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
set @@optimizer_switch = @save_optimizer_switch;
-- echo /* E.2 Outer query without tables, always uses IN-TO-EXISTS. */
explain
select '1 - 03' in (select b1 from t2 where b1 > '0');
select '1 - 03' in (select b1 from t2 where b1 > '0');
-- echo /* E.3 Subqueries without tables. */
explain
select a1 from t1 where a1 in (select '1 - 03') or a2 < '9';
select a1 from t1 where a1 in (select '1 - 03') or a2 < '9';
-- echo UNION subqueries are currently limited to only use IN-TO-EXISTS.
explain
select a1 from t1 where a1 in (select '1 - 03' UNION select '1 - 02');
select a1 from t1 where a1 in (select '1 - 03' UNION select '1 - 02');
-- echo /* E.4 optimize_cond detects FALSE where/having clause. */
explain
select a1 from t1 where a1 in (select b1 from t2 where b1 = b2 and b2 = '1 - 03' and b1 = '1 - 02' ) or a2 < '9';
select a1 from t1 where a1 in (select b1 from t2 where b1 = b2 and b2 = '1 - 03' and b1 = '1 - 02' ) or a2 < '9';
-- echo /* E.5 opt_sum_query detects no matching min/max row or substitutes MIN/MAX with a const. */
-- echo TODO this test produces wrong result due to missing logic to handle the case
-- echo when JOIN::optimize detects an empty subquery result.
explain
select a1 from t1 where a1 in (select max(b1) from t2);
select a1 from t1 where a1 in (select max(b1) from t2);
-- echo
explain
select a1 from t1 where a1 in (select max(b1) from t2 where b1 = '7 - 02');
select a1 from t1 where a1 in (select max(b1) from t2 where b1 = '7 - 02');
-- echo /* E.6 make_join_select detects impossible WHERE. *
-- echo TODO
-- echo /* E.7 constant optimization detects "no matching row in const table". */
-- echo TODO
-- echo /* E.8 Impossible WHERE noticed after reading const tables. */
explain
select '1 - 03' in (select b1 from t2 where b1 > '0' and b1 < '0');
select '1 - 03' in (select b1 from t2 where b1 > '0' and b1 < '0');
-- echo
-- echo /* F. UPDATE/DELETE with subqueries. */
-- echo
-- echo TODO
-- echo

View File

@ -840,11 +840,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1
2 0 1
1 0 0
2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same

View File

@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1
2 0 1
1 0 0
2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same

View File

@ -30,7 +30,7 @@ create index it3i3 on t3i (c1, c2);
insert into t1i select * from t1;
insert into t2i select * from t2;
insert into t3i select * from t3;
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
/******************************************************************************
* Simple tests.
******************************************************************************/
@ -176,33 +176,33 @@ a1 a2
1 - 02 2 - 02
select * from t1 where (a1, a2) in (select b1, min(b2) from t2i limit 1,1);
ERROR 42000: This version of MySQL doesn't yet support 'LIMIT & IN/ALL/ANY/SOME subquery'
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
prepare st1 from
"select * from t1 where (a1, a2) in (select b1, min(b2) from t2 where b1 > '0' group by b1)";
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
execute st1;
a1 a2
1 - 01 2 - 01
1 - 02 2 - 02
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
execute st1;
a1 a2
1 - 01 2 - 01
1 - 02 2 - 02
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
prepare st1 from
"select * from t1 where (a1, a2) in (select b1, min(b2) from t2 where b1 > '0' group by b1)";
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
execute st1;
a1 a2
1 - 01 2 - 01
1 - 02 2 - 02
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
execute st1;
a1 a2
1 - 01 2 - 01
1 - 02 2 - 02
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
id select_type table type possible_keys key key_len ref rows filtered Extra
@ -549,7 +549,7 @@ a1 a2
Test that BLOBs are not materialized (except when arguments of some functions).
*/
# force materialization to be always considered
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
set @prefix_len = 6;
set @blob_len = 16;
set @suffix_len = @blob_len - @prefix_len;
@ -951,7 +951,7 @@ insert into t1bit values (b'010', b'110');
insert into t2bit values (b'001', b'101');
insert into t2bit values (b'010', b'110');
insert into t2bit values (b'110', b'111');
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain extended select bin(a1), bin(a2)
from t1bit
where (a1, a2) in (select b1, b2 from t2bit);
@ -994,7 +994,7 @@ drop table t1, t2, t3, t1i, t2i, t3i, columns;
/******************************************************************************
* Test the cache of the left operand of IN.
******************************************************************************/
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
create table t1 (s1 int);
create table t2 (s2 int);
insert into t1 values (5),(1),(0);
@ -1136,27 +1136,40 @@ drop table t2;
create table t1 (a1 int key);
create table t2 (b1 int);
insert into t1 values (5);
Only the last query returns correct result. Filed as BUG#40037.
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,materialization=off';
NULL
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,semijoin=off';
NULL
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
set @@optimizer_switch='default,materialization=off';
NULL
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
NULL
set @@optimizer_switch='materialization=off,in_to_exists=off,semijoin=on';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
@ -1167,7 +1180,7 @@ drop table t1,t2;
create table t1 (a char(2), b varchar(10));
insert into t1 values ('a', 'aaa');
insert into t1 values ('aa', 'aaaa');
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select a,b from t1 where b in (select a from t1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
@ -1187,7 +1200,7 @@ INSERT INTO t1 (f1, f2) VALUES (10, 1.668);
CREATE TABLE t2 LIKE t1;
INSERT INTO t2 VALUES (1, 1.789);
INSERT INTO t2 VALUES (13, 1.454);
SET @@optimizer_switch='default,semijoin=on,materialization=on';
SET @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
EXPLAIN SELECT COUNT(*) FROM t1 WHERE (f1,f2) IN (SELECT f1,f2 FROM t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY subselect2 ALL unique_key NULL NULL NULL 2
@ -1208,7 +1221,7 @@ PRIMARY KEY (pk)
INSERT INTO t1 VALUES (1,'o','ffff','ffff','ffoo'),(2,'f','ffff','ffff','ffff');
CREATE TABLE t2 LIKE t1;
INSERT INTO t2 VALUES (1,'i','iiii','iiii','iiii'),(2,'f','ffff','ffff','ffff');
SET @@optimizer_switch='default,semijoin=on,materialization=on';
SET @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
EXPLAIN SELECT pk FROM t1 WHERE (a) IN (SELECT a FROM t2 WHERE pk > 0);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2
@ -1237,7 +1250,7 @@ i
3
4
set @save_optimizer_switch=@@optimizer_switch;
set session optimizer_switch='materialization=off';
set session optimizer_switch='materialization=off,in_to_exists=on';
select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
i
1

File diff suppressed because it is too large Load Diff

View File

@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1;
# The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
--echo # 2nd and 3rd columns should be same for x == 11 only
--echo # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1;

View File

@ -48,7 +48,7 @@ insert into t2i select * from t2;
insert into t3i select * from t3;
# force the use of materialization
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
/******************************************************************************
* Simple tests.
@ -111,22 +111,22 @@ select * from t1 where (a1, a2) in (select b1, min(b2) from t2i limit 1,1);
# test re-optimization/re-execution with different execution methods
# prepare once, exec with different modes
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
prepare st1 from
"select * from t1 where (a1, a2) in (select b1, min(b2) from t2 where b1 > '0' group by b1)";
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
execute st1;
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
execute st1;
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
prepare st1 from
"select * from t1 where (a1, a2) in (select b1, min(b2) from t2 where b1 > '0' group by b1)";
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
execute st1;
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=on';
execute st1;
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
# materialize the result of ORDER BY
# non-indexed fields
@ -327,7 +327,7 @@ select * from t1 order by (select col from columns limit 1);
Test that BLOBs are not materialized (except when arguments of some functions).
*/
# force materialization to be always considered
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
set @prefix_len = 6;
# BLOB == 16 (small blobs that could be stored in HEAP tables)
@ -680,7 +680,7 @@ insert into t2bit values (b'001', b'101');
insert into t2bit values (b'010', b'110');
insert into t2bit values (b'110', b'111');
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain extended select bin(a1), bin(a2)
from t1bit
@ -718,7 +718,7 @@ drop table t1, t2, t3, t1i, t2i, t3i, columns;
/******************************************************************************
* Test the cache of the left operand of IN.
******************************************************************************/
set @@optimizer_switch='semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
# Test that default values of Cached_item are not used for comparison
create table t1 (s1 int);
@ -812,23 +812,28 @@ drop table t2;
create table t1 (a1 int key);
create table t2 (b1 int);
insert into t1 values (5);
-- echo Only the last query returns correct result. Filed as BUG#40037.
# Query with group by, executed via materialization
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
# Query with group by, executed via IN=>EXISTS
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
# Executed with materialization
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
select min(a1) from t1 where 7 in (select b1 from t2);
# Executed via IN=>EXISTS
set @@optimizer_switch='materialization=off,in_to_exists=on,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
select min(a1) from t1 where 7 in (select b1 from t2);
# Executed with semi-join. Notice, this time we get a different result (NULL).
# This is the only correct result of all four queries. This difference is
# This is the only correct result of all five queries. This difference is
# filed as BUG#40037.
set @@optimizer_switch='default,materialization=off';
set @@optimizer_switch='materialization=off,in_to_exists=off,semijoin=on';
explain select min(a1) from t1 where 7 in (select b1 from t2);
select min(a1) from t1 where 7 in (select b1 from t2);
drop table t1,t2;
@ -840,7 +845,7 @@ create table t1 (a char(2), b varchar(10));
insert into t1 values ('a', 'aaa');
insert into t1 values ('aa', 'aaaa');
set @@optimizer_switch='default,semijoin=off';
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
explain select a,b from t1 where b in (select a from t1);
select a,b from t1 where b in (select a from t1);
prepare st1 from "select a,b from t1 where b in (select a from t1)";
@ -861,7 +866,7 @@ CREATE TABLE t2 LIKE t1;
INSERT INTO t2 VALUES (1, 1.789);
INSERT INTO t2 VALUES (13, 1.454);
SET @@optimizer_switch='default,semijoin=on,materialization=on';
SET @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
EXPLAIN SELECT COUNT(*) FROM t1 WHERE (f1,f2) IN (SELECT f1,f2 FROM t2);
SELECT COUNT(*) FROM t1 WHERE (f1,f2) IN (SELECT f1,f2 FROM t2);
@ -883,7 +888,7 @@ INSERT INTO t1 VALUES (1,'o','ffff','ffff','ffoo'),(2,'f','ffff','ffff','ffff');
CREATE TABLE t2 LIKE t1;
INSERT INTO t2 VALUES (1,'i','iiii','iiii','iiii'),(2,'f','ffff','ffff','ffff');
SET @@optimizer_switch='default,semijoin=on,materialization=on';
SET @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=on';
EXPLAIN SELECT pk FROM t1 WHERE (a) IN (SELECT a FROM t2 WHERE pk > 0);
SELECT pk FROM t1 WHERE (a) IN (SELECT a FROM t2 WHERE pk > 0);
SELECT pk FROM t1 WHERE (b,c,d) IN (SELECT b,c,d FROM t2 WHERE pk > 0);
@ -900,7 +905,7 @@ create table t3(i int);
insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
set @save_optimizer_switch=@@optimizer_switch;
set session optimizer_switch='materialization=off';
set session optimizer_switch='materialization=off,in_to_exists=on';
select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
set session optimizer_switch=@save_optimizer_switch;
drop table t1, t2, t3;

View File

@ -0,0 +1,271 @@
#
# Tets of cost-based choice between the materialization and in-to-exists
# subquery execution strategies (MWL#89)
#
--disable_warnings
drop table if exists t1, t2, t1_1024, t2_1024;
drop procedure if exists make_t1_indexes;
drop procedure if exists make_t2_indexes;
drop procedure if exists remove_t1_indexes;
drop procedure if exists remove_t2_indexes;
drop procedure if exists add_materialization_data;
drop procedure if exists delete_materialization_data;
drop procedure if exists set_all_columns_not_null;
drop procedure if exists set_all_columns_nullable;
--enable_warnings
create table t1 (a1 char(8), a2 char(8), a3 char(8), a4 int);
insert into t1 values ('1 - 00', '2 - 00', '3 - 00', 0);
insert into t1 values ('1 - 01', '2 - 01', '3 - 01', 1);
insert into t1 values ('1 - 02', '2 - 02', '3 - 02', 2);
create table t2 (b1 char(8), b2 char(8), b3 char(8), b4 int);
insert into t2 values ('1 - 01', '2 - 01', '3 - 01', 1);
insert into t2 values ('1 - 01', '2 - 01', '3 - 02', 2);
insert into t2 values ('1 - 02', '2 - 02', '3 - 03', 3);
insert into t2 values ('1 - 02', '2 - 02', '3 - 04', 4);
insert into t2 values ('1 - 03', '2 - 03', '3 - 05', 5);
create table t1_1024 (a1 blob(1024), a2 blob(1024));
insert into t1_1024 values (concat('1 - 00', repeat('x', 1018)), concat('2 - 00', repeat('x', 1018)));
insert into t1_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018)));
create table t2_1024 (b1 blob(1024), b2 blob(1024));
insert into t2_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018)));
insert into t2_1024 values (concat('1 - 02', repeat('x', 1018)), concat('2 - 02', repeat('x', 1018)));
insert into t2_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018)));
insert into t2_1024 values (concat('1 - 04', repeat('x', 1018)), concat('2 - 04', repeat('x', 1018)));
delimiter |;
create procedure make_t1_indexes()
begin
create index it1i1 on t1 (a1);
create index it1i2 on t1 (a2);
create index it1i3 on t1 (a1, a2);
create index it1_1024i1 on t1_1024 (a1(6));
create index it1_1024i2 on t1_1024 (a2(6));
create index it1_1024i3 on t1_1024 (a1(6), a2(6));
end|
create procedure make_t2_indexes()
begin
create index it2i1 on t2 (b1);
create index it2i2 on t2 (b2);
create index it2i3 on t2 (b1, b2);
create unique index it2i4 on t2 (b1, b2, b3);
create index it2_1024i1 on t2_1024 (b1(6));
create index it2_1024i2 on t2_1024 (b2(6));
create index it2_1024i3 on t2_1024 (b1(6), b2(6));
end|
create procedure remove_t1_indexes()
begin
drop index it1i1 on t1;
drop index it1i2 on t1;
drop index it1i3 on t1;
drop index it1_1024i1 on t1_1024;
drop index it1_1024i2 on t1_1024;
drop index it1_1024i3 on t1_1024;
end|
create procedure remove_t2_indexes()
begin
drop index it2i1 on t2;
drop index it2i2 on t2;
drop index it2i3 on t2;
drop index it2i4 on t2;
drop index it2_1024i1 on t2_1024;
drop index it2_1024i2 on t2_1024;
drop index it2_1024i3 on t2_1024;
end|
create procedure add_materialization_data()
begin
insert into t1 values ('1 - 03', '2 - 03', '3 - 03', 3);
insert into t1 values ('1 - 04', '2 - 04', '3 - 04', 4);
insert into t1 values ('1 - 05', '2 - 05', '3 - 05', 5);
insert into t1 values ('1 - 06', '2 - 06', '3 - 06', 6);
insert into t1 values ('1 - 07', '2 - 07', '3 - 07', 7);
insert into t1_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018)));
end|
create procedure delete_materialization_data()
begin
delete from t1 where a1 >= '1 - 03';
delete from t1_1024 where a1 >= '1 - 03';
end|
create procedure set_all_columns_not_null()
begin
alter table t1 modify a1 char(8) not null, modify a2 char(8) not null, modify a3 char(8) not null;
alter table t2 modify b1 char(8) not null, modify b2 char(8) not null, modify b3 char(8) not null;
end|
create procedure set_all_columns_nullable()
begin
alter table t1 modify a1 char(8) null, modify a2 char(8) null, modify a3 char(8) null;
alter table t2 modify b1 char(8) null, modify b2 char(8) null, modify b3 char(8) null;
end|
delimiter ;|
-- echo
-- echo /******************************************************************************
-- echo 1. Both materialization and in-to-exists are ON, make a cost-based choice.
-- echo ******************************************************************************/
set @@optimizer_switch='materialization=on,in_to_exists=on';
-- echo
-- echo /* 1.1 In-to-exists is cheaper */
call make_t1_indexes();
-- echo /* 1.1.1 non-indexed table access */
-- source include/subselect_mat_cost.inc
-- echo /* 1.1.2 indexed table access, nullabale columns. */
call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 1.1.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
call set_all_columns_nullable();
-- echo
-- echo /* 1.2 Materialization is cheaper */
# make materialization cheaper
call add_materialization_data();
call remove_t1_indexes();
-- echo /* 1.2.1 non-indexed table access */
call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 1.2.2 indexed table access, nullabale columns. */
call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 1.2.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
call set_all_columns_nullable();
-- echo /******************************************************************************
-- echo 2. Materialization is OFF, in-to-exists is ON, materialization is cheaper.
-- echo ******************************************************************************/
set @@optimizer_switch='materialization=off,in_to_exists=on';
-- echo /* 2.1 non-indexed table access */
call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 2.2 indexed table access, nullabale columns. */
call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 2.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
call set_all_columns_nullable();
-- echo /******************************************************************************
-- echo 3. Materialization is ON, in-to-exists is OFF, in-to-exists is cheaper.
-- echo ******************************************************************************/
set @@optimizer_switch='materialization=on,in_to_exists=off';
# make IN-TO-EXISTS cheaper
call delete_materialization_data();
call make_t1_indexes();
-- echo /* 3.1 non-indexed table access */
call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 3.2 indexed table access, nullabale columns. */
call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 3.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
call set_all_columns_nullable();
drop procedure make_t1_indexes;
drop procedure make_t2_indexes;
drop procedure remove_t1_indexes;
drop procedure remove_t2_indexes;
drop procedure add_materialization_data;
drop procedure delete_materialization_data;
drop procedure set_all_columns_not_null;
drop procedure set_all_columns_nullable;
drop table t1, t2, t1_1024, t2_1024;
--echo #
--echo # LP BUG#643424 valgrind warning in choose_subquery_plan()
--echo #
CREATE TABLE t1 (
pk int(11) NOT NULL AUTO_INCREMENT,
c1 int(11) DEFAULT NULL,
c2 int(11) DEFAULT NULL,
PRIMARY KEY (pk),
KEY c2 (c2));
INSERT INTO t1 VALUES (1,NULL,2);
INSERT INTO t1 VALUES (2,7,9);
INSERT INTO t1 VALUES (9,NULL,8);
CREATE TABLE t2 (
pk int(11) NOT NULL AUTO_INCREMENT,
c1 int(11) DEFAULT NULL,
c2 int(11) DEFAULT NULL,
PRIMARY KEY (pk),
KEY c2 (c2));
INSERT INTO t2 VALUES (1,1,7);
set @save_optimizer_switch=@@optimizer_switch;
set @@optimizer_switch='materialization=on,in_to_exists=on,semijoin=off';
SELECT pk FROM t1 WHERE (c2, c1) IN (SELECT c2, c2 FROM t2);
set session optimizer_switch=@save_optimizer_switch;
drop table t1, t2;
--echo #
--echo # LP BUG#652727 Crash in create_ref_for_key()
--echo #
CREATE TABLE t2 (
pk int(11) NOT NULL AUTO_INCREMENT,
c1 int(11) DEFAULT NULL,
PRIMARY KEY (pk));
INSERT INTO t2 VALUES (10,7);
INSERT INTO t2 VALUES (11,1);
INSERT INTO t2 VALUES (17,NULL);
CREATE TABLE t1 (
pk int(11) NOT NULL AUTO_INCREMENT,
c1 int(11) DEFAULT NULL,
PRIMARY KEY (pk));
INSERT INTO t1 VALUES (15,1);
INSERT INTO t1 VALUES (19,NULL);
CREATE TABLE t3 (c2 int(11) DEFAULT NULL, KEY c2 (c2));
INSERT INTO t3 VALUES (1);
set @save_optimizer_switch=@@optimizer_switch;
set @@optimizer_switch='materialization=on,in_to_exists=on,semijoin=off';
SELECT c2
FROM t3
WHERE (2, 6) IN (SELECT t1.c1, t1.c1 FROM t1 STRAIGHT_JOIN t2 ON t2.pk = t1.pk);
set session optimizer_switch=@save_optimizer_switch;
drop table t1, t2, t3;

View File

@ -948,6 +948,8 @@ public:
virtual bool register_field_in_read_map(uchar *arg) { return 0; }
virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
virtual bool eliminate_subselect_processor(uchar *arg) { return 0; }
virtual bool set_fake_select_as_master_processor(uchar *arg) { return 0; }
/* To call bool function for all arguments */
struct bool_func_call_args

View File

@ -1985,6 +1985,18 @@ Item *Item_in_optimizer::transform(Item_transformer transformer, uchar *argument
}
bool Item_in_optimizer::is_expensive_processor(uchar *arg)
{
return args[1]->is_expensive_processor(arg);
}
bool Item_in_optimizer::is_expensive()
{
return args[1]->is_expensive();
}
longlong Item_func_eq::val_int()
{
DBUG_ASSERT(fixed == 1);
@ -4650,12 +4662,6 @@ Item *and_expressions(Item *a, Item *b, Item **org_item)
longlong Item_func_isnull::val_int()
{
DBUG_ASSERT(fixed == 1);
/*
Handle optimization if the argument can't be null
This has to be here because of the test in update_used_tables().
*/
if (!used_tables_cache && !with_subselect)
return cached_value;
return args[0]->is_null() ? 1: 0;
}
@ -4663,12 +4669,6 @@ longlong Item_is_not_null_test::val_int()
{
DBUG_ASSERT(fixed == 1);
DBUG_ENTER("Item_is_not_null_test::val_int");
if (!used_tables_cache && !with_subselect)
{
owner->was_null|= (!cached_value);
DBUG_PRINT("info", ("cached: %ld", (long) cached_value));
DBUG_RETURN(cached_value);
}
if (args[0]->is_null())
{
DBUG_PRINT("info", ("null"));
@ -4685,19 +4685,9 @@ longlong Item_is_not_null_test::val_int()
void Item_is_not_null_test::update_used_tables()
{
if (!args[0]->maybe_null)
{
used_tables_cache= 0; /* is always true */
cached_value= (longlong) 1;
}
else
{
args[0]->update_used_tables();
if (!(used_tables_cache=args[0]->used_tables()) && !with_subselect)
{
/* Remember if the value is always NULL or never NULL */
cached_value= (longlong) !args[0]->is_null();
}
}
}
@ -5373,7 +5363,7 @@ Item *Item_func_nop_all::neg_transformer(THD *thd)
/* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */
Item_func_not_all *new_item= new Item_func_not_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
allany->func= allany->func_creator(FALSE);
allany->create_comp_func(FALSE);
allany->all= !allany->all;
allany->upper_item= new_item;
return new_item;
@ -5385,7 +5375,7 @@ Item *Item_func_not_all::neg_transformer(THD *thd)
Item_func_nop_all *new_item= new Item_func_nop_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
allany->all= !allany->all;
allany->func= allany->func_creator(TRUE);
allany->create_comp_func(TRUE);
allany->upper_item= new_item;
return new_item;
}
@ -5668,6 +5658,9 @@ longlong Item_equal::val_int()
Item_field *item_field;
if (cond_false)
return 0;
/* If there is a single constant and no fields, the equality is TRUE. */
if (const_item && !fields.elements)
return 1;
List_iterator_fast<Item_field> it(fields);
Item *item= const_item ? const_item : it++;
if ((null_value= item->is_null()))
@ -5688,6 +5681,15 @@ longlong Item_equal::val_int()
void Item_equal::fix_length_and_dec()
{
Item *item= get_first(NULL);
if (!item)
{
/*
If there are no fields, there must be at least a constant, in which
case Item_equal::val_int evaluates to TRUE.
*/
DBUG_ASSERT(const_item);
return;
}
eval_item= cmp_item::get_comparator(item->result_type(),
item->collation.collation);
}

View File

@ -266,6 +266,8 @@ public:
void keep_top_level_cache();
Item *transform(Item_transformer transformer, uchar *arg);
virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
bool is_expensive_processor(uchar *arg);
bool is_expensive();
};
class Comp_creator
@ -1303,8 +1305,6 @@ public:
class Item_func_isnull :public Item_bool_func
{
protected:
longlong cached_value;
public:
Item_func_isnull(Item *a) :Item_bool_func(a) {}
longlong val_int();
@ -1322,18 +1322,9 @@ public:
{
used_tables_cache= 0; /* is always false */
const_item_cache= 1;
cached_value= (longlong) 0;
}
else
{
args[0]->update_used_tables();
if ((const_item_cache= !(used_tables_cache= args[0]->used_tables()) &&
!with_subselect))
{
/* Remember if the value is always NULL or never NULL */
cached_value= (longlong) args[0]->is_null();
}
}
}
table_map not_null_tables() const { return 0; }
optimize_type select_optimize() const { return OPTIMIZE_NULL; }

File diff suppressed because it is too large Load Diff

View File

@ -71,6 +71,13 @@ protected:
bool inside_first_fix_fields;
bool done_first_fix_fields;
/*
Set to TRUE if at optimization or execution time we determine that this
item's value is a constant. We need this member because it is not possible
to substitute 'this' with a constant item.
*/
bool forced_const;
public:
/* A reference from inside subquery predicate to somewhere outside of it */
class Ref_to_outside : public Sql_alloc
@ -119,6 +126,12 @@ public:
Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; }
bool is_in_predicate()
{
return (substype() == Item_subselect::IN_SUBS ||
substype() == Item_subselect::ALL_SUBS ||
substype() == Item_subselect::ANY_SUBS);
}
/*
We need this method, because some compilers do not allow 'this'
@ -149,12 +162,21 @@ public:
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
virtual bool exec();
/*
If subquery optimization or execution determines that the subquery has
an empty result, mark the subquery predicate as a constant value.
*/
void make_const()
{
used_tables_cache= 0;
const_item_cache= 0;
forced_const= TRUE;
}
virtual void fix_length_and_dec();
table_map used_tables() const;
table_map not_null_tables() const { return 0; }
bool const_item() const;
inline table_map get_used_tables_cache() { return used_tables_cache; }
inline bool get_const_item_cache() { return const_item_cache; }
Item *get_tmp_table_item(THD *thd);
void update_used_tables();
virtual void print(String *str, enum_query_type query_type);
@ -181,6 +203,8 @@ public:
enum_parsing_place place() { return parsing_place; }
bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
bool mark_as_eliminated_processor(uchar *arg);
bool eliminate_subselect_processor(uchar *arg);
bool set_fake_select_as_master_processor(uchar *arg);
bool enumerate_field_refs_processor(uchar *arg);
bool check_vcol_func_processor(uchar *int_arg)
{
@ -313,6 +337,18 @@ public:
};
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, semantic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
/* Partial matching substrategies of MATERIALIZATION. */
#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
/**
Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)".
@ -330,8 +366,6 @@ public:
class Item_in_subselect :public Item_exists_subselect
{
public:
Item *left_expr;
protected:
/*
Cache of the left operand of the subquery predicate. Allocated in the
@ -339,12 +373,6 @@ protected:
*/
List<Cached_item> *left_expr_cache;
bool first_execution;
/*
Set to TRUE if at query execution time we determine that this item's
value is a constant during this execution. We need this member because
it is not possible to substitute 'this' with a constant item.
*/
bool is_constant;
/*
expr & optimizer used in subselect rewriting to store Item for
@ -354,10 +382,24 @@ protected:
Item_in_optimizer *optimizer;
bool was_null;
bool abort_on_null;
public:
/* Used to trigger on/off conditions that were pushed down to subselect */
bool *pushed_cond_guards;
Comp_creator *func;
protected:
bool init_cond_guards();
trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join);
bool fix_having(Item *having, st_select_lex *select_lex);
trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
public:
Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */
int sj_convert_priority;
/*
@ -388,14 +430,8 @@ public:
*/
bool sjm_scan_allowed;
/* The method chosen to execute the IN predicate. */
enum enum_exec_method {
NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
MATERIALIZATION /* IN will be executed via subquery materialization. */
};
enum_exec_method exec_method;
/* A bitmap of possible execution strategies for an IN predicate. */
uchar in_strategy;
bool *get_cond_guard(int i)
{
@ -413,9 +449,10 @@ public:
Item_in_subselect(Item * left_expr, st_select_lex *select_lex);
Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0)
{}
optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), func(NULL), in_strategy(0),
upper_item(0)
{}
void cleanup();
subs_type substype() { return IN_SUBS; }
void reset()
@ -426,12 +463,9 @@ public:
was_null= 0;
}
trans_res select_transformer(JOIN *join);
trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
trans_res single_value_transformer(JOIN *join, Comp_creator *func);
trans_res row_value_transformer(JOIN * join);
trans_res single_value_in_to_exists_transformer(JOIN * join,
Comp_creator *func);
trans_res row_value_in_to_exists_transformer(JOIN * join);
bool create_in_to_exists_cond(JOIN *join_arg);
bool inject_in_to_exists_cond(JOIN *join_arg);
virtual bool exec();
longlong val_int();
double val_real();
@ -446,11 +480,12 @@ public:
bool fix_fields(THD *thd, Item **ref);
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
void update_used_tables();
bool setup_engine();
bool setup_mat_engine();
bool init_left_expr_cache();
/* Inform 'this' that it was computed, and contains a valid result. */
void set_first_execution() { if (first_execution) first_execution= FALSE; }
bool is_expensive_processor(uchar *arg);
bool is_expensive() { return TRUE; }
bool expr_cache_is_needed(THD *thd);
/*
@ -472,7 +507,6 @@ class Item_allany_subselect :public Item_in_subselect
{
public:
chooser_compare_func_creator func_creator;
Comp_creator *func;
bool all;
Item_allany_subselect(Item * left_expr, chooser_compare_func_creator fc,
@ -481,6 +515,7 @@ public:
// only ALL subquery has upper not
subs_type substype() { return all?ALL_SUBS:ANY_SUBS; }
trans_res select_transformer(JOIN *join);
void create_comp_func(bool invert) { func= func_creator(invert); }
virtual void print(String *str, enum_query_type query_type);
};
@ -821,10 +856,9 @@ public:
}
~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
bool init(List<Item> *tmp_columns);
void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */
int prepare();
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()

View File

@ -562,24 +562,29 @@ protected:
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
#define OPTIMIZER_SWITCH_SEMIJOIN 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif
#ifdef DBUG_OFF
/* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
/*
TODO: Materialization is off by default to mimic 5.1/5.2 behavior.
Once cost based choice between materialization and in-to-exists should be
enabled by default, add OPTIMIZER_SWITCH_MATERIALIZATION
*/
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
@ -587,7 +592,7 @@ protected:
OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
@ -601,7 +606,7 @@ protected:
OPTIMIZER_SWITCH_TABLE_ELIMINATION | \
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\

View File

@ -341,7 +341,7 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
"index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin",
"firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
@ -362,6 +362,7 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("firstmatch") - 1,
sizeof("loosescan") - 1,
sizeof("materialization") - 1,
sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
@ -460,7 +461,8 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"index_condition_pushdown=on,"
"firstmatch=on,"
"loosescan=on,"
"materialization=on,"
"materialization=off,"
"in_to_exists=on,"
"semijoin=on,"
"partial_match_rowid_merge=on,"
"partial_match_table_scan=on,"
@ -7375,7 +7377,7 @@ thread is in the relay logs.",
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, "
"index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache"
#ifndef DBUG_OFF

View File

@ -67,6 +67,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
{
THD *thd=join->thd;
st_select_lex *select_lex= join->select_lex;
st_select_lex_unit* parent_unit= select_lex->master_unit();
DBUG_ENTER("check_and_do_in_subquery_rewrites");
/*
If
@ -84,8 +85,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
TODO: for PS, make the whole block execute only on the first execution
*/
Item_subselect *subselect;
if (!thd->lex->view_prepare_mode && // (1)
(subselect= select_lex->master_unit()->item)) // (2)
if (!thd->lex->view_prepare_mode && // (1)
(subselect= parent_unit->item)) // (2)
{
Item_in_subselect *in_subs= NULL;
if (subselect->substype() == Item_subselect::IN_SUBS)
@ -129,6 +130,15 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
if (failure)
DBUG_RETURN(-1); /* purecov: deadcode */
}
if (select_lex == parent_unit->fake_select_lex)
{
/*
The join and its select_lex object represent the 'fake' select used
to compute the result of a UNION.
*/
DBUG_RETURN(0);
}
DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
/*
Check if we're in subquery that is a candidate for flattening into a
@ -154,8 +164,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
!join->having && !select_lex->with_sum_func && // 4
thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6
select_lex->master_unit()->first_select()->leaf_tables && // 7
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
parent_unit->first_select()->leaf_tables && // 7
!in_subs->in_strategy && // 8
select_lex->outer_select()->leaf_tables && // 9
!((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10
@ -175,63 +185,82 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else
{
DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
/*
Check if the subquery predicate can be executed via materialization.
The required conditions are:
1. Subquery predicate is an IN/=ANY subq predicate
2. Subquery is a single SELECT (not a UNION)
3. Subquery is not a table-less query. In this case there is no
point in materializing.
3A The upper query is not a table-less SELECT ... FROM DUAL. We
/* Test if the user has set a legal combination of optimizer switches. */
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
if (in_subs)
{
/* Subquery predicate is an IN/=ANY predicate. */
if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS))
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
in_subs->in_strategy|= SUBS_MATERIALIZATION;
/*
Check if the subquery predicate can be executed via materialization.
The required conditions are:
1. Subquery is a single SELECT (not a UNION)
2. Subquery is not a table-less query. In this case there is no
point in materializing.
2A The upper query is not a table-less SELECT ... FROM DUAL. We
can't do materialization for SELECT .. FROM DUAL because it
does not call setup_subquery_materialization(). We could make
SELECT ... FROM DUAL call that function but that doesn't seem
to be the case that is worth handling.
4. Either the subquery predicate is a top-level predicate, or at
least one partial match strategy is enabled. If no partial match
strategy is enabled, then materialization cannot be used for
non-top-level queries because it cannot handle NULLs correctly.
5. Subquery is non-correlated
TODO:
This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated ||
Subquery is correlated to any query outer to IN predicate ||
(Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN"))
6. No execution method was already chosen (by a prepared statement).
3. Either the subquery predicate is a top-level predicate, or at
least one partial match strategy is enabled. If no partial match
strategy is enabled, then materialization cannot be used for
non-top-level queries because it cannot handle NULLs correctly.
4. Subquery is non-correlated
TODO:
This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated ||
Subquery is correlated to any query outer to IN predicate ||
(Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN"))
(*) The subquery must be part of a SELECT statement. The current
condition also excludes multi-table update statements.
(*) The subquery must be part of a SELECT statement. The current
condition also excludes multi-table update statements.
*/
if (!(in_subs->in_strategy & SUBS_MATERIALIZATION &&
!select_lex->is_part_of_union() && // 1
parent_unit->first_select()->leaf_tables && // 2
thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 2A
subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() || //3
optimizer_flag(thd,
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
optimizer_flag(thd,
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
!in_subs->is_correlated)) //4
{
/* Materialization is not possible based on syntactic properties. */
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
}
Determine whether we will perform subquery materialization before
calling the IN=>EXISTS transformation, so that we know whether to
perform the whole transformation or only that part of it which wraps
Item_in_subselect in an Item_in_optimizer.
*/
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&
in_subs && // 1
!select_lex->is_part_of_union() && // 2
select_lex->master_unit()->first_select()->leaf_tables && // 3
thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 3A
subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
!in_subs->is_correlated && // 5
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
{
in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
if (!in_subs->in_strategy)
{
/*
If neither materialization is possible, nor the user chose
IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy.
*/
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
}
}
/*
Transform each subquery predicate according to its overloaded
transformer.
*/
Item_subselect::trans_res trans_res;
if ((trans_res= subselect->select_transformer(join)) !=
Item_subselect::RES_OK)
{
DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
}
}
}
DBUG_RETURN(0);
@ -509,6 +538,15 @@ skip_conversion:
FALSE))
DBUG_RETURN(TRUE);
}
/*
Revert to the IN->EXISTS strategy in the rare case when the subquery could
not be flattened.
TODO: This is a limitation done for simplicity. Such subqueries could also
be executed via materialization. In order to determine this, we should
re-run the test for materialization that was done in
check_and_do_in_subquery_rewrites.
*/
(*in_subq)->in_strategy= SUBS_IN_TO_EXISTS;
}
if (arena)
@ -769,8 +807,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
/* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier
subq_pred->exec_method=
Item_in_subselect::SEMI_JOIN; // for subsequent executions
subq_pred->in_strategy= SUBS_SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */
@ -1167,8 +1204,8 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
sjm->tables= n_tables;
sjm->is_used= FALSE;
double subjoin_out_rows, subjoin_read_time;
get_partial_join_cost(join, n_tables,
&subjoin_read_time, &subjoin_out_rows);
join->get_partial_join_cost(n_tables + join->const_tables,
&subjoin_read_time, &subjoin_out_rows);
sjm->materialization_cost.convert_from_cost(subjoin_read_time);
sjm->rows= subjoin_out_rows;
@ -3362,9 +3399,23 @@ int rewrite_to_index_subquery_engine(JOIN *join)
JOIN_TAB* join_tab=join->join_tab;
SELECT_LEX_UNIT *unit= join->unit;
DBUG_ENTER("rewrite_to_index_subquery_engine");
/*
is this simple IN subquery?
*/
/* TODO: In order to use these more efficient subquery engines in more cases,
the following problems need to be solved:
- the code that removes GROUP BY (group_list), also adds an ORDER BY
(order), thus GROUP BY queries (almost?) never pass through this branch.
Solution: remove the test below '!join->order', because we remove the
ORDER clase for subqueries anyway.
- in order to set a more efficient engine, the optimizer needs to both
decide to remove GROUP BY, *and* select one of the JT_[EQ_]REF[_OR_NULL]
access methods, *and* loose scan should be more expensive or
inapliccable. When is that possible?
- Consider expanding the applicability of this rewrite for loose scan
for group by queries.
*/
if (!join->group_list && !join->order &&
join->unit->item &&
join->unit->item->substype() == Item_subselect::IN_SUBS &&
@ -3505,3 +3556,332 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
}
/**
Optimize all subqueries of a query that have were flattened into a semijoin.
@details
Optimize all immediate children subqueries of a query.
This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality,
and we need to reference the correct items in the index access method of the
IN predicate.
@return Operation status
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::optimize_unflattened_subqueries()
{
return select_lex->optimize_unflattened_subqueries();
}
/**
Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
based on cost.
@param join_tables the set of tables joined in the subquery
@notes
The method chooses between the materialization and IN=>EXISTS rewrite
strategies for the execution of a non-flattened subquery IN predicate.
The cost-based decision is made as follows:
1. compute materialize_strategy_cost based on the unmodified subquery
2. reoptimize the subquery taking into account the IN-EXISTS predicates
3. compute in_exists_strategy_cost based on the reoptimized plan
4. compare and set the cheaper strategy
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_strategy = MATERIALIZATION
else
in_strategy = IN_TO_EXISTS
5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
revert to IN_TO_EXISTS
6. if (in_strategy == MATERIALIZATION)
revert the subquery plan to the original one before reoptimizing
else
inject the IN=>EXISTS predicates into the new EXISTS subquery plan
The implementation itself is a bit more complicated because it takes into
account two more factors:
- whether the user allowed both strategies through an optimizer_switch, and
- if materialization was the cheaper strategy, whether it can be executed
or not.
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::choose_subquery_plan(table_map join_tables)
{ /* The original QEP of the subquery. */
DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */
POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
/* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
KEYUSE *save_join_tab_keyuse[MAX_TABLES];
/* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
key_map save_join_tab_checked_keys[MAX_TABLES];
enum_reopt_result reopt_result= REOPT_NONE;
Item_in_subselect *in_subs;
if (select_lex->master_unit()->item &&
select_lex->master_unit()->item->is_in_predicate())
{
in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
if (in_subs->create_in_to_exists_cond(this))
return true;
}
else
return false;
DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
save_keyuse.elements= 0;
save_keyuse.buffer= NULL;
/*
Compute and compare the costs of materialization and in-exists if both
strategies are possible and allowed by the user (checked during the prepare
phase.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
JOIN *inner_join= this;
/* Cost of the outer JOIN. */
double outer_read_time, outer_record_count;
/* Cost of the unmodified subquery. */
double inner_read_time_1, inner_record_count_1;
/* Cost of the subquery with injected IN-EXISTS predicates. */
double inner_read_time_2, inner_record_count_2;
/* The cost to compute IN via materialization. */
double materialize_strategy_cost;
/* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost;
if (outer_join)
outer_join->get_partial_join_cost(outer_join->tables,
&outer_read_time, &outer_record_count);
else
{
/*
TODO: outer_join can be NULL for DELETE statements.
How to compute its cost?
*/
outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */
}
inner_join->get_partial_join_cost(inner_join->tables,
&inner_read_time_1, &inner_record_count_1);
if (in_to_exists_where && const_tables != tables)
{
/*
Re-optimize and cost the subquery taking into account the IN-EXISTS
conditions.
*/
if (save_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys))
return TRUE;
reopt_result= reoptimize(in_to_exists_where, join_tables);
if (reopt_result == REOPT_OLD_PLAN)
restore_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys);
else if (reopt_result == REOPT_ERROR)
return TRUE;
inner_join->get_partial_join_cost(inner_join->tables,
&inner_read_time_2, &inner_record_count_2);
}
else
{
/* Reoptimization would not produce any better plan. */
inner_read_time_2= inner_read_time_1;
inner_record_count_2= inner_record_count_1;
}
/* Compute execution costs. */
/*
1. Compute the cost of the materialization strategy.
*/
double materialization_cost; /* The cost of executing the subquery and */
/* storing its result in an indexed temp table.*/
/* The cost of a lookup into the unique index of the materialized table. */
double lookup_cost;
double write_row_cost= 1; /* TODO: what is the real cost to write a row? */
materialization_cost= inner_read_time_1 +
inner_record_count_1 * write_row_cost;
/*
The cost of a hash/btree lookup into a unique index of a materialized
subquery.
TIMOUR: TODO: the block of code below is exact copy/paste from
opt_subselect.cc:optimize_semi_join_nests() - refactor it.
*/
uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size)
lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
else
lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
materialize_strategy_cost= materialization_cost +
outer_record_count * lookup_cost;
/*
2. Compute the cost of the IN=>EXISTS strategy.
*/
in_exists_strategy_cost= outer_record_count * inner_read_time_2;
/* Compare the costs and choose the cheaper strategy. */
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
}
/*
If (1) materialization is a possible strategy based on semantic analysis
during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization
strategy,
fall back to IN->EXISTS.
otherwise
use materialization.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
{
/*
If materialization was the cheaper or the only user-selected strategy,
but it is not possible to execute it due to limitations in the
implementation, fall back to IN-TO-EXISTS.
*/
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
}
if (in_subs->in_strategy & SUBS_MATERIALIZATION)
{
/* Restore the original query plan used for materialization. */
if (reopt_result == REOPT_NEW_PLAN)
restore_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys);
/* TODO: should we set/unset this flag for both select_lex and its unit? */
in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT;
select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT;
/*
Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
TODO:
Currently we set the subquery LIMIT to infinity, and this is correct
because we forbid at parse time LIMIT inside IN subqueries (see
Item_in_subselect::test_limit). However, once we allow this, here
we should set the correct limit if given in the query.
*/
in_subs->unit->global_parameters->select_limit= NULL;
in_subs->unit->set_limit(unit->global_parameters);
/*
Set the limit of this JOIN object as well, because normally its being
set in the beginning of JOIN::optimize, which was already done.
*/
select_limit= in_subs->unit->select_limit_cnt;
}
else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
/* Keep the new query plan with injected conditions, delete the old plan. */
if (reopt_result == REOPT_NEW_PLAN)
delete_dynamic(&save_keyuse);
if (reopt_result == REOPT_NONE && in_to_exists_where && const_tables != tables)
{
/*
The subquery was not reoptimized either because the user allowed only the
IN-EXISTS strategy, or because materialization was not possible based on
semantic analysis. Clenup the original plan and reoptimize.
*/
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= NULL;
join_tab[i].checked_keys.clear_all();
}
if ((reopt_result= reoptimize(in_to_exists_where, join_tables)) ==
REOPT_ERROR)
return TRUE;
}
if (in_subs->inject_in_to_exists_cond(this))
return TRUE;
}
else
DBUG_ASSERT(FALSE);
return FALSE;
}
/**
Choose a query plan for a table-less subquery.
@notes
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::choose_tableless_subquery_plan()
{
DBUG_ASSERT(!tables_list || !tables);
if (select_lex->master_unit()->item)
{
DBUG_ASSERT(select_lex->master_unit()->item->type() ==
Item::SUBSELECT_ITEM);
Item_subselect *subs_predicate= select_lex->master_unit()->item;
/*
If the optimizer determined that his query has an empty result,
in most cases the subquery predicate is a known constant value -
either FALSE or NULL. The implementation of Item_subselect::reset()
determines which one.
*/
if (zero_result_cause)
{
if (!implicit_grouping)
{
/*
Both group by queries and non-group by queries without aggregate
functions produce empty subquery result.
*/
subs_predicate->reset();
subs_predicate->make_const();
return FALSE;
}
/* TODO:
A further optimization is possible when a non-group query with
MIN/MAX/COUNT is optimized by opt_sum_query. Then, if there are
only MIN/MAX functions over an empty result set, the subquery
result is a NULL value/row, thus the value of subs_predicate is
NULL.
*/
}
if (subs_predicate->is_in_predicate())
{
Item_in_subselect *in_subs;
in_subs= (Item_in_subselect*) subs_predicate;
in_subs->in_strategy= SUBS_IN_TO_EXISTS;
if (in_subs->create_in_to_exists_cond(this) ||
in_subs->inject_in_to_exists_cond(this))
return TRUE;
tmp_having= having;
}
}
return FALSE;
}

View File

@ -6247,3 +6247,5 @@ ER_UNKNOWN_OPTION
eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'"
ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."

View File

@ -2656,6 +2656,7 @@ void Query_arena::free_items()
for (; free_list; free_list= next)
{
next= free_list->next;
DBUG_ASSERT(free_list != next);
free_list->delete_self();
}
/* Postcondition: free_list is 0 */
@ -3092,6 +3093,7 @@ void TMP_TABLE_PARAM::init()
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
materialized_subquery= 0;
skip_create_table= 0;
DBUG_VOID_RETURN;
}

View File

@ -2926,6 +2926,8 @@ public:
uint convert_blob_length;
CHARSET_INFO *table_charset;
bool schema_table;
/* TRUE if the temp table is created for subquery materialization. */
bool materialized_subquery;
/*
True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case
@ -2949,8 +2951,8 @@ public:
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
schema_table(0), precomputed_group_by(0), force_copy_fields(0),
bit_fields_as_long(0), skip_create_table(0)
schema_table(0), materialized_subquery(0), precomputed_group_by(0),
force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
{}
~TMP_TABLE_PARAM()
{
@ -2983,6 +2985,7 @@ public:
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);
TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
};
/* Base subselect interface class */
@ -3046,7 +3049,7 @@ protected:
void reset();
public:
select_materialize_with_stats() {}
select_materialize_with_stats() { tmp_table_param.init(); }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);

View File

@ -92,6 +92,10 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
}
}
/* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
if (select_lex->optimize_unflattened_subqueries())
DBUG_RETURN(TRUE);
const_cond= (!conds || conds->const_item());
safe_update=test(thd->options & OPTION_SAFE_UPDATES);
if (safe_update && const_cond)

View File

@ -1735,17 +1735,29 @@ void st_select_lex_node::fast_exclude()
}
/*
excluding select_lex structure (except first (first select can't be
Exclude a node from the tree lex structure, but leave it in the global
list of nodes.
*/
void st_select_lex_node::exclude_from_tree()
{
if ((*prev= next))
next->prev= prev;
}
/*
Exclude select_lex structure (except first (first select can't be
deleted, because it is most upper select))
*/
void st_select_lex_node::exclude()
{
//exclude from global list
/* exclude from global list */
fast_exclude();
//exclude from other structures
if ((*prev= next))
next->prev= prev;
/* exclude from other structures */
exclude_from_tree();
/*
We do not need following statements, because prev pointer of first
list element point to master->slave
@ -2145,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd,
select_limit == 1, and there should be no offset_limit.
*/
(((subs_type == Item_subselect::IN_SUBS) &&
((Item_in_subselect*)item)->exec_method ==
Item_in_subselect::MATERIALIZATION) ?
((Item_in_subselect*)item)->in_strategy &
SUBS_MATERIALIZATION) ?
TRUE :
(select_limit->val_int() == 1LL) &&
offset_limit == 0));
@ -3076,6 +3088,70 @@ bool st_select_lex::add_index_hint (THD *thd, char *str, uint length)
str, length));
}
bool st_select_lex::optimize_unflattened_subqueries()
{
for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
{
Item_subselect *subquery_predicate= un->item;
if (subquery_predicate)
{
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select;
ulonglong save_options;
int res;
/* We need only 1 row to determine existence */
un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl;
save_options= inner_join->select_options;
if (un->outer_select()->options & SELECT_DESCRIBE)
{
/* Optimize the subquery in the context of EXPLAIN. */
set_explain_type();
inner_join->select_options= options;
}
res= inner_join->optimize();
inner_join->select_options= save_options;
un->thd->lex->current_select= save_select;
if (res)
return TRUE;
}
}
}
return FALSE;
}
/**
Set the EXPLAIN type for this subquery.
*/
void st_select_lex::set_explain_type()
{
SELECT_LEX *first= master_unit()->first_select();
/* drop UNCACHEABLE_EXPLAIN, because it is for internal usage only */
uint8 is_uncacheable= (uncacheable & ~UNCACHEABLE_EXPLAIN);
type= ((&master_unit()->thd->lex->select_lex == this) ?
(first_inner_unit() || next_select() ?
"PRIMARY" : "SIMPLE") :
((this == first) ?
((linkage == DERIVED_TABLE_TYPE) ?
"DERIVED" :
((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
"DEPENDENT SUBQUERY" :
(is_uncacheable ? "UNCACHEABLE SUBQUERY" :
"SUBQUERY"))) :
((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
"DEPENDENT UNION":
is_uncacheable ? "UNCACHEABLE UNION":
"UNION")));
options|= SELECT_DESCRIBE;
}
/**
A routine used by the parser to decide whether we are specifying a full
partitioning or if only partitions to add or to split.
@ -3093,4 +3169,3 @@ bool st_lex::is_partition_management() const
(alter_info.flags == ALTER_ADD_PARTITION ||
alter_info.flags == ALTER_REORGANIZE_PARTITION));
}

View File

@ -439,6 +439,7 @@ public:
st_select_lex_node(): linkage(UNSPECIFIED_TYPE) {}
virtual ~st_select_lex_node() {}
inline st_select_lex_node* get_master() { return master; }
inline void set_master(st_select_lex_node* master_arg) { master= master_arg; }
virtual void init_query();
virtual void init_select();
void include_down(st_select_lex_node *upper);
@ -446,6 +447,7 @@ public:
void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref);
void include_global(st_select_lex_node **plink);
void exclude();
void exclude_from_tree();
virtual st_select_lex_unit* master_unit()= 0;
virtual st_select_lex* outer_select()= 0;
@ -846,6 +848,15 @@ public:
void clear_index_hints(void) { index_hints= NULL; }
bool is_part_of_union() { return master_unit()->is_union(); }
/*
Optimize all subqueries that have not been flattened into semi-joins.
This functionality is a method of SELECT_LEX instead of JOIN because
some SQL statements as DELETE do not have a corresponding JOIN object.
*/
bool optimize_unflattened_subqueries();
/* Set the EXPLAIN type for this subquery. */
void set_explain_type();
private:
/* current index hint kind. used in filling up index_hints */
enum index_hint_type current_index_hint_type;

View File

@ -59,9 +59,9 @@ static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab,
uint tables, COND *conds,
COND_EQUAL *cond_equal,
table_map table_map, SELECT_LEX *select_lex,
st_sargable_param **sargables);
static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse);
static int sort_keyuse(KEYUSE *a,KEYUSE *b);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
table_map used_tables);
@ -236,8 +236,6 @@ static bool update_sum_func(Item_sum **func);
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
bool distinct, const char *message=NullS);
static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg,
double *record_count_arg);
static uint make_join_orderinfo(JOIN *join);
static int
join_read_record_no_init(JOIN_TAB *tab);
@ -843,6 +841,7 @@ JOIN::optimize()
"Impossible HAVING" : "Impossible WHERE";
tables= 0;
error= 0;
choose_tableless_subquery_plan();
goto setup_subq_exit;
}
}
@ -887,12 +886,13 @@ JOIN::optimize()
*/
if ((res=opt_sum_query(select_lex->leaf_tables, all_fields, conds)))
{
if (res == HA_ERR_KEY_NOT_FOUND)
if (res == HA_ERR_KEY_NOT_FOUND || res < 0)
{
DBUG_PRINT("info",("No matching min/max row"));
zero_result_cause= "No matching min/max row";
tables= 0;
error=0;
choose_tableless_subquery_plan();
goto setup_subq_exit;
}
if (res > 1)
@ -901,14 +901,7 @@ JOIN::optimize()
DBUG_PRINT("error",("Error from opt_sum_query"));
DBUG_RETURN(1);
}
if (res < 0)
{
DBUG_PRINT("info",("No matching min/max row"));
zero_result_cause= "No matching min/max row";
tables= 0;
error=0;
goto setup_subq_exit;
}
DBUG_PRINT("info",("Select tables optimized away"));
zero_result_cause= "Select tables optimized away";
tables_list= 0; // All tables resolved
@ -933,17 +926,14 @@ JOIN::optimize()
QT_ORDINARY););
conds= table_independent_conds;
}
goto setup_subq_exit;
}
}
if (!tables_list)
{
DBUG_PRINT("info",("No tables"));
error= 0;
/* Create all structures needed for materialized subquery execution. */
if (setup_subquery_materialization())
DBUG_RETURN(1);
DBUG_RETURN(0);
choose_tableless_subquery_plan();
goto setup_subq_exit;
}
error= -1; // Error is sent to client
sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
@ -1300,8 +1290,7 @@ JOIN::optimize()
if (!(select_options & SELECT_DESCRIBE))
init_ftfuncs(thd, select_lex, test(order));
/* Create all structures needed for materialized subquery execution. */
if (setup_subquery_materialization())
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
int res;
@ -1396,6 +1385,32 @@ JOIN::optimize()
if (join_tab->is_using_loose_index_scan())
tmp_table_param.precomputed_group_by= TRUE;
error= 0;
DBUG_RETURN(0);
setup_subq_exit:
/*
Even with zero matching rows, subqueries in the HAVING clause may
need to be evaluated if there are aggregate functions in the query.
*/
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
error= 0;
DBUG_RETURN(0);
}
/**
Create and initialize objects neeed for the execution of a query plan.
*/
int JOIN::init_execution()
{
DBUG_ENTER("JOIN::init_execution");
DBUG_ASSERT(optimized);
initialized= true;
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
{
@ -1428,7 +1443,7 @@ JOIN::optimize()
select_options,
tmp_rows_limit,
(char *) "")))
{
{
DBUG_RETURN(1);
}
@ -1514,19 +1529,6 @@ JOIN::optimize()
DBUG_RETURN(-1); /* purecov: inspected */
}
error= 0;
DBUG_RETURN(0);
setup_subq_exit:
/*
Even with zero matching rows, subqueries in the HAVING clause may
need to be evaluated if there are aggregate functions in the
query. If we have planned to materialize the subquery, we need to
set it up properly before prematurely leaving optimize().
*/
if (setup_subquery_materialization())
DBUG_RETURN(1);
error= 0;
DBUG_RETURN(0);
}
@ -1745,6 +1747,9 @@ JOIN::exec()
int tmp_error;
DBUG_ENTER("JOIN::exec");
if (!initialized && init_execution())
DBUG_VOID_RETURN;
thd_proc_info(thd, "executing");
error= 0;
if (procedure)
@ -2561,51 +2566,6 @@ err:
}
/**
Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join.
@details Iterate over all subqueries of the query, and if they are under an
IN predicate, and the optimizer chose to compute it via hash semi-join:
- try to initialize all data structures needed for the materialized execution
of the IN predicate,
- if this fails, then perform the IN=>EXISTS transformation which was
previously blocked during JOIN::prepare.
This method is part of the "code generation" query processing phase.
This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality,
and we need to reference the correct items in the index access method of the
IN predicate.
@return Operation status
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::setup_subquery_materialization()
{
for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un;
un= un->next_unit())
{
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
Item_subselect *subquery_predicate= sl->master_unit()->item;
if (subquery_predicate &&
subquery_predicate->substype() == Item_subselect::IN_SUBS)
{
Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate;
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
in_subs->setup_engine())
return TRUE;
}
}
}
return FALSE;
}
/*****************************************************************************
Create JOIN_TABS, make a guess about the table types,
Approximate how many records will be used in each table
@ -2830,10 +2790,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
}
if (conds || outer_join)
{
if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
conds, join->cond_equal,
~outer_join, join->select_lex, &sargables))
conds, ~outer_join, join->select_lex, &sargables))
goto error;
if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array))
goto error;
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
}
join->const_table_map= no_rows_const_tables;
join->const_tables= const_count;
@ -3136,6 +3100,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
sizeof(POSITION)*join->const_tables);
join->best_read=1.0;
}
if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
goto error;
/* Generate an execution plan from the found optimal join order. */
DBUG_RETURN(join->thd->killed || get_best_combination(join));
@ -4086,11 +4053,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
static bool
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
uint tables, COND *cond, COND_EQUAL *cond_equal,
table_map normal_tables, SELECT_LEX *select_lex,
SARGABLE_PARAM **sargables)
uint tables, COND *cond, table_map normal_tables,
SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
{
uint and_level,i,found_eq_constant;
uint and_level,i;
KEY_FIELD *key_fields, *end, *field;
uint sz;
uint m= max(select_lex->max_equal_elems,1);
@ -4186,67 +4152,76 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
return TRUE;
}
/*
Sort the array of possible keys and remove the following key parts:
- ref if there is a keypart which is a ref and a const.
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
then we skip the key part corresponding to b=t2.d)
- keyparts without previous keyparts
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
used in the query, we drop the partial key parts from consideration).
Special treatment for ft-keys.
*/
if (keyuse->elements)
{
KEYUSE key_end,*prev,*save_pos,*use;
my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
(qsort_cmp) sort_keyuse);
bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */
if (insert_dynamic(keyuse,(uchar*) &key_end))
return TRUE;
use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant=0;
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
use->table->const_key_parts[use->key]|= use->keypart_map;
if (use->keypart != FT_KEYPART)
{
if (use->key == prev->key && use->table == prev->table)
{
if (prev->keypart+1 < use->keypart ||
(prev->keypart == use->keypart && found_eq_constant))
continue; /* remove */
}
else if (use->keypart != 0) // First found must be 0
continue;
}
#ifdef HAVE_valgrind
/* Valgrind complains about overlapped memcpy when save_pos==use. */
if (save_pos != use)
#endif
*save_pos= *use;
prev=use;
found_eq_constant= !use->used_tables;
/* Save ptr to first use */
if (!use->table->reginfo.join_tab->keyuse)
use->table->reginfo.join_tab->keyuse=save_pos;
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
save_pos++;
}
i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
keyuse->elements=i;
}
DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
return FALSE;
}
/**
Sort the array of possible keys and remove the following key parts:
- ref if there is a keypart which is a ref and a const.
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
then we skip the key part corresponding to b=t2.d)
- keyparts without previous keyparts
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
used in the query, we drop the partial key parts from consideration).
Special treatment for ft-keys.
*/
static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse)
{
KEYUSE key_end, *prev, *save_pos, *use;
uint found_eq_constant, i;
DBUG_ASSERT(keyuse->elements);
my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
(qsort_cmp) sort_keyuse);
bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
if (insert_dynamic(keyuse, (uchar*) &key_end))
return TRUE;
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant= 0;
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
use->table->const_key_parts[use->key]|= use->keypart_map;
if (use->keypart != FT_KEYPART)
{
if (use->key == prev->key && use->table == prev->table)
{
if (prev->keypart+1 < use->keypart ||
(prev->keypart == use->keypart && found_eq_constant))
continue; /* remove */
}
else if (use->keypart != 0) // First found must be 0
continue;
}
#ifdef HAVE_valgrind
/* Valgrind complains about overlapped memcpy when save_pos==use. */
if (save_pos != use)
#endif
*save_pos= *use;
prev= use;
found_eq_constant= !use->used_tables;
/* Save ptr to first use */
if (!use->table->reginfo.join_tab->keyuse)
use->table->reginfo.join_tab->keyuse=save_pos;
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
save_pos++;
}
i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
keyuse->elements= i;
return FALSE;
}
/**
Update some values in keyuse for faster choose_plan() loop.
*/
@ -5433,40 +5408,43 @@ greedy_search(JOIN *join,
}
/*
Calculate a cost of given partial join order
/**
Calculate a cost of given partial join order in join->positions.
SYNOPSIS
get_partial_join_cost()
join IN Join to use. join->positions holds the
partial join order
idx IN # tables in the partial join order
read_time_arg OUT Store read time here
record_count_arg OUT Store record count here
@param n_tables[in] # tables in the partial join order after the last
constant table
@param read_time_arg[out] store read time here
@param record_count_arg[out] store record count here
DESCRIPTION
This is needed for semi-join materialization code. The idea is that
we detect sj-materialization after we've put all sj-inner tables into
the join prefix
@note
When used by semi-join materialization code the idea is that we
detect sj-materialization after we've put all sj-inner tables into
the join prefix.
prefix-tables semi-join-inner-tables tN
^--we're here
and we'll need to get the cost of prefix-tables prefix again.
When used with non-flattened subqueries, the method computes the
total cost of query plan.
@returns
read_time_arg and record_count_arg contain the computed cost.
*/
void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
double *record_count_arg)
void JOIN::get_partial_join_cost(uint n_tables,
double *read_time_arg, double *record_count_arg)
{
double record_count= 1;
double read_time= 0.0;
for (uint i= join->const_tables; i < n_tables + join->const_tables ; i++)
for (uint i= const_tables; i < n_tables; i++)
{
if (join->best_positions[i].records_read)
if (best_positions[i].records_read)
{
record_count *= join->best_positions[i].records_read;
read_time += join->best_positions[i].read_time;
record_count *= best_positions[i].records_read;
read_time += best_positions[i].read_time;
}
}
*read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
@ -11428,10 +11406,30 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
{
if (thd->is_fatal_error)
goto err; // Got OOM
continue; // Some kindf of const item
continue; // Some kind of const item
}
if (type == Item::SUM_FUNC_ITEM)
((Item_sum *) item)->result_field= new_field;
{
Item_sum *agg_item= (Item_sum *) item;
/*
Update the result field only if it has never been set, or if the
created temporary table is not to be used for subquery
materialization.
The reason is that for subqueries that require materialization as part
of their plan, we create the 'external' temporary table needed for IN
execution, after the 'internal' temporary table needed for grouping.
Since both the external and the internal temporary tables are created
for the same list of SELECT fields of the subquery, setting
'result_field' for each invocation of create_tmp_table overrides the
previous value of 'result_field'.
The condition below prevents the creation of the external temp table
to override the 'result_field' that was set for the internal temp table.
*/
if (!agg_item->result_field || !param->materialized_subquery)
agg_item->result_field= new_field;
}
tmp_from_field++;
reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG))
@ -18895,28 +18893,9 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
bool res= 0;
SELECT_LEX *first= unit->first_select();
for (SELECT_LEX *sl= first;
sl;
sl= sl->next_select())
{
// drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
sl->type= (((&thd->lex->select_lex)==sl)?
(sl->first_inner_unit() || sl->next_select() ?
"PRIMARY" : "SIMPLE"):
((sl == first)?
((sl->linkage == DERIVED_TABLE_TYPE) ?
"DERIVED":
((uncacheable & UNCACHEABLE_DEPENDENT) ?
"DEPENDENT SUBQUERY":
(uncacheable?"UNCACHEABLE SUBQUERY":
"SUBQUERY"))):
((uncacheable & UNCACHEABLE_DEPENDENT) ?
"DEPENDENT UNION":
uncacheable?"UNCACHEABLE UNION":
"UNION")));
sl->options|= SELECT_DESCRIBE;
}
for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
sl->set_explain_type();
if (unit->is_union())
{
unit->fake_select_lex->select_number= UINT_MAX; // jost for initialization
@ -19334,6 +19313,8 @@ bool JOIN::change_result(select_result *res)
{
DBUG_ENTER("JOIN::change_result");
result= res;
if (tmp_join)
tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2()))
{
@ -19342,6 +19323,155 @@ bool JOIN::change_result(select_result *res)
DBUG_RETURN(FALSE);
}
/**
Save a query execution plan so that the caller can revert to it if needed,
and reset the current query plan so that it can be reoptimized.
@param save_keyuse[out] a KEYUSE array to save JOIN::keyuse
@param save_best_positions[out] array to save JOIN::best_positions
@param save_join_tab_keyuse[out] array of KEYUSE pointers to save each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys[out] an array of bitmaps to save
each JOIN_TAB::checked_keys
@retval 0 OK
@retval 1 memory allocation error
*/
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (keyuse.elements)
{
DYNAMIC_ARRAY tmp_keyuse;
if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64))
return 1;
/* Swap the current and the backup keyuse arrays. */
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
for (uint i= 0; i < tables; i++)
{
save_join_tab_keyuse[i]= join_tab[i].keyuse;
join_tab[i].keyuse= NULL;
save_join_tab_checked_keys[i]= join_tab[i].checked_keys;
join_tab[i].checked_keys.clear_all();
}
}
memcpy((uchar*) save_best_positions, (uchar*) best_positions,
sizeof(POSITION) * (tables + 1));
memset(best_positions, 0, sizeof(POSITION) * (tables + 1));
return 0;
}
/**
Restore a query plan previously saved by the caller.
@param save_keyuse a KEYUSE array to restore into JOIN::keyuse
@param save_best_positions array to restore into JOIN::best_positions
@param save_join_tab_keyuse array of KEYUSE pointers to restore each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys an array of bitmaps to restore
each JOIN_TAB::checked_keys
*/
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (save_keyuse->elements)
{
DYNAMIC_ARRAY tmp_keyuse;
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
delete_dynamic(save_keyuse);
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= save_join_tab_keyuse[i];
join_tab[i].checked_keys= save_join_tab_checked_keys[i];
}
}
memcpy((uchar*) best_positions, (uchar*) save_best_positions,
sizeof(POSITION) * (tables + 1));
}
/**
Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause.
@param added_where An extra conjunct to the WHERE clause to reoptimize with
@param join_tables The set of tables to reoptimize
@param save_best_positions The join order of the original plan to restore to
if needed.
@notes
Given a query plan that already optimized taking into account some WHERE clause
'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. The
reoptimization works as follows:
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
that are about to be injected into the query.
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
accommodate the new REF accesses computed for the 'added_where' condition.
3. Add the new KEYUSEs into JOIN::keyuse.
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
KEYUSE elements.
@retval REOPT_NEW_PLAN there is a new plan.
@retval REOPT_OLD_PLAN no new improved plan was produced, use the old one.
@retval REOPT_ERROR an irrecovarable error occured during reoptimization.
*/
JOIN::enum_reopt_result JOIN::reoptimize(Item *added_where, table_map join_tables)
{
DYNAMIC_ARRAY added_keyuse;
SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
/* Re-run the REF optimizer to take into account the new conditions. */
if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where,
~outer_join, select_lex, &sargables))
{
delete_dynamic(&added_keyuse);
return REOPT_ERROR;
}
if (!added_keyuse.elements)
return REOPT_OLD_PLAN;
/* Add the new access methods to the keyuse array. */
if (!keyuse.buffer &&
my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
{
delete_dynamic(&added_keyuse);
return REOPT_ERROR;
}
allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements);
memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
added_keyuse.buffer,
(size_t) added_keyuse.elements * added_keyuse.size_of_element);
keyuse.elements+= added_keyuse.elements;
delete_dynamic(&added_keyuse);
if (sort_and_filter_keyuse(&keyuse))
return REOPT_ERROR;
optimize_keyuse(this, &keyuse);
/* Re-run the join optimizer to compute a new query plan. */
if (choose_plan(this, join_tables))
return REOPT_ERROR;
return REOPT_NEW_PLAN;
}
/**
@} (end of group Query_Optimizer)
*/

View File

@ -1369,8 +1369,30 @@ inline bool sj_is_materialize_strategy(uint strategy)
class JOIN :public Sql_alloc
{
private:
JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */
protected:
/* Results of reoptimizing a JOIN via JOIN::reoptimize(). */
enum enum_reopt_result {
REOPT_NEW_PLAN, /* there is a new reoptimized plan */
REOPT_OLD_PLAN, /* no new improved plan can be found, use the old one */
REOPT_ERROR, /* an irrecovarable error occured during reoptimization */
REOPT_NONE /* not yet reoptimized */
};
/* Support for plan reoptimization with rewritten conditions. */
enum_reopt_result reoptimize(Item *added_where, table_map join_tables);
int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
/* Choose a subquery plan for a table-less subquery. */
bool choose_tableless_subquery_plan();
public:
JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
@ -1581,8 +1603,15 @@ public:
bool union_part; ///< this subselect is part of union
bool optimized; ///< flag to avoid double optimization in EXPLAIN
bool initialized; ///< flag to avoid double init_execution calls
Array<Item_in_subselect> sj_subselects;
/*
Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
subquery transformation of a JOIN object.
*/
Item *in_to_exists_where;
Item *in_to_exists_having;
/* Temporary tables used to weed-out semi-join duplicates */
List<TABLE> sj_tmp_tables;
@ -1657,6 +1686,7 @@ public:
ref_pointer_array_size= 0;
zero_result_cause= 0;
optimized= 0;
initialized= 0;
cond_equal= 0;
having_equal= 0;
group_optimized_away= 0;
@ -1672,6 +1702,8 @@ public:
no_const_tables= FALSE;
first_select= sub_select;
in_to_exists_where= NULL;
in_to_exists_having= NULL;
}
int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
@ -1680,12 +1712,13 @@ public:
SELECT_LEX_UNIT *unit);
int optimize();
int reinit();
int init_execution();
void exec();
int destroy();
void restore_tmp();
bool alloc_func_list();
bool flatten_subqueries();
bool setup_subquery_materialization();
bool optimize_unflattened_subqueries();
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by, bool recompute= FALSE);
@ -1745,6 +1778,10 @@ public:
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
bool choose_subquery_plan(table_map join_tables);
void get_partial_join_cost(uint n_tables,
double *read_time_arg, double *record_count_arg);
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
@ -1977,8 +2014,6 @@ inline Item * and_items(Item* cond, Item *item)
return (cond? (new Item_cond_and(cond, item)) : item);
}
bool choose_plan(JOIN *join,table_map join_tables);
void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
double *record_count_arg);
void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map last_remaining_tables,
bool first_alt, uint no_jbuf_before,

View File

@ -185,6 +185,8 @@ st_select_lex_unit::init_prepare_fake_select_lex(THD *thd_arg)
{
(*order->item)->walk(&Item::change_context_processor, 0,
(uchar*) &fake_select_lex->context);
(*order->item)->walk(&Item::set_fake_select_as_master_processor, 0,
(uchar*) fake_select_lex);
}
}
@ -270,6 +272,18 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
/*
Remove all references from the select_lex_units to the subqueries that
are inside the ORDER BY clause.
*/
if (can_skip_order_by)
{
for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next)
{
(*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
}
}
saved_error= join->prepare(&sl->ref_pointer_array,
sl->table_list.first,
sl->with_wild,

View File

@ -290,6 +290,10 @@ int mysql_update(THD *thd,
DBUG_RETURN(1); /* purecov: inspected */
}
/* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
if (select_lex->optimize_unflattened_subqueries())
DBUG_RETURN(TRUE);
if (select_lex->inner_refs_list.elements &&
fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
DBUG_RETURN(1);