From 8af289d2b0ac35c5ac76f813cd9e4d5aa5eb6adb Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Mon, 18 Nov 2013 12:26:25 +0400 Subject: [PATCH 1/2] MDEV-5293: outer join, join buffering, and order by - invalid query plan - make_join_readinfo() has the code that forces use of "Using temporary; Using filesort" when join buffering is in use. That code didn't handle all cases, in particular it didn't hande the case where ORDER BY originally has tables from multiple columns, but the optimizer eventually figures out that doing filesort() on one table will be sufficient. Adjusted the code to handle that case. --- mysql-test/r/join_cache.result | 19 +++++++++++++++++++ mysql-test/t/join_cache.test | 20 ++++++++++++++++++++ sql/sql_select.cc | 18 +++++++++++++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/join_cache.result b/mysql-test/r/join_cache.result index 618ce7e540b..b3ca6dc0df2 100644 --- a/mysql-test/r/join_cache.result +++ b/mysql-test/r/join_cache.result @@ -5637,4 +5637,23 @@ c set join_buffer_size=default; set optimizer_switch=@tmp_optimizer_switch; DROP table t1,t2,t3; +set join_buffer_size= default; +set @@optimizer_switch=@save_optimizer_switch; +# +# MDEV-5293: outer join, join buffering, and order by - invalid query plan +# +create table t0 (a int primary key) engine=myisam; +insert into t0 values (1); +create table t1(a int) engine=myisam; +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +alter table t1 add b int; +create table t2 like t1; +insert into t2 select * from t1; +#The following must use "Using temporary; Using filesort" and not just "Using filesort": +explain select * from t0,t1 left join t2 on t1.b=t2.b order by t0.a, t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t0 system NULL NULL NULL NULL 1 Using temporary; Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +drop table t0,t1,t2; set @@optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/t/join_cache.test b/mysql-test/t/join_cache.test index 2d06c3e2a30..585ef677492 100644 --- a/mysql-test/t/join_cache.test +++ b/mysql-test/t/join_cache.test @@ -3635,6 +3635,26 @@ set join_buffer_size=default; set optimizer_switch=@tmp_optimizer_switch; DROP table t1,t2,t3; +set join_buffer_size= default; +set @@optimizer_switch=@save_optimizer_switch; + + +--echo # +--echo # MDEV-5293: outer join, join buffering, and order by - invalid query plan +--echo # +create table t0 (a int primary key) engine=myisam; +insert into t0 values (1); + +create table t1(a int) engine=myisam; +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +alter table t1 add b int; + +create table t2 like t1; +insert into t2 select * from t1; +--echo #The following must use "Using temporary; Using filesort" and not just "Using filesort": +explain select * from t0,t1 left join t2 on t1.b=t2.b order by t0.a, t1.a; + +drop table t0,t1,t2; # this must be the last command in the file set @@optimizer_switch=@save_optimizer_switch; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index cd81b41d0ec..a1f125b11f0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -10284,7 +10284,23 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) join->group_list ? join->join_tab+join->const_tables : join->get_sort_by_join_tab(); - if (sort_by_tab) + /* + It could be that sort_by_tab==NULL, and the plan is to use filesort() + on the first table. + */ + if (join->order) + { + join->simple_order= 0; + join->need_tmp= 1; + } + + if (join->group && !join->group_optimized_away) + { + join->need_tmp= 1; + join->simple_group= 0; + } + + if (sort_by_tab) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; From c4defdc8d971cdcc186de549bae9ac4351c7aade Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Thu, 21 Nov 2013 11:19:01 +0400 Subject: [PATCH 2/2] MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table - Don't pull out a table out of a semi-join if it is on the inner side of an outer join. - Make join->sort_by_table= get_sort_by_table(...) call after const table detection is done. That way, the value of join->sort_by_table will match the actual execution. Which will allow the code in setup_semijoin_dups_elimination() (search for "Make sure that possible sorting of rows from the head table is not to be employed." to see that "Using filesort" is going to be used together with Duplicate Elimination ( and change it to Using temporary + Using filesort) --- mysql-test/r/subselect_sj.result | 33 +++++++++++++++++++++++++++ mysql-test/r/subselect_sj_jcl6.result | 33 +++++++++++++++++++++++++++ mysql-test/t/subselect_sj.test | 21 +++++++++++++++++ sql/opt_subselect.cc | 19 +++++++++++++-- sql/sql_select.cc | 29 ++++++++++++++++++++--- 5 files changed, 130 insertions(+), 5 deletions(-) diff --git a/mysql-test/r/subselect_sj.result b/mysql-test/r/subselect_sj.result index 2d229db9ec5..babcbaf34d1 100644 --- a/mysql-test/r/subselect_sj.result +++ b/mysql-test/r/subselect_sj.result @@ -2920,4 +2920,37 @@ COUNT(*) DROP TABLE t1, t2, t3; set join_buffer_size = @tmp_join_buffer_size; set max_heap_table_size = @tmp_max_heap_table_size; +# +# MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table +# +select @@optimizer_switch; +@@optimizer_switch +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=off,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=off,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=off +select @@join_cache_level; +@@join_cache_level +1 +CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x'); +CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t2 VALUES ('x'); +CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t3 VALUES ('x'),('d'); +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; +pk c1 c2 +4 NULL x +3 c x +1 v x +2 v x +5 x x +# This should show that "t1 left join t3" is still in the semi-join nest: +EXPLAIN EXTENDED +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 system NULL NULL NULL NULL 1 100.00 Using temporary; Using filesort +1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 5 100.00 Start temporary +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t1.pk 1 100.00 +1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 Using where; End temporary +Warnings: +Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1`,'x' AS `c2` from `test`.`t1` semi join (`test`.`t1` left join `test`.`t3` on((`test`.`t1`.`c1` = `test`.`t3`.`c3`))) where (`test`.`t1`.`pk` = `test`.`t1`.`pk`) order by 'x',`test`.`t1`.`c1` +DROP TABLE t1,t2,t3; set optimizer_switch=@subselect_sj_tmp; diff --git a/mysql-test/r/subselect_sj_jcl6.result b/mysql-test/r/subselect_sj_jcl6.result index a0ebbb3305d..ca903de4e80 100644 --- a/mysql-test/r/subselect_sj_jcl6.result +++ b/mysql-test/r/subselect_sj_jcl6.result @@ -2934,6 +2934,39 @@ COUNT(*) DROP TABLE t1, t2, t3; set join_buffer_size = @tmp_join_buffer_size; set max_heap_table_size = @tmp_max_heap_table_size; +# +# MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table +# +select @@optimizer_switch; +@@optimizer_switch +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=off,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=off,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=off,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=off +select @@join_cache_level; +@@join_cache_level +6 +CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x'); +CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t2 VALUES ('x'); +CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t3 VALUES ('x'),('d'); +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; +pk c1 c2 +4 NULL x +3 c x +1 v x +2 v x +5 x x +# This should show that "t1 left join t3" is still in the semi-join nest: +EXPLAIN EXTENDED +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 system NULL NULL NULL NULL 1 100.00 Using temporary; Using filesort +1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 5 100.00 Start temporary +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t1.pk 1 100.00 Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan +1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 Using where; End temporary; Using join buffer (incremental, BNL join) +Warnings: +Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1`,'x' AS `c2` from `test`.`t1` semi join (`test`.`t1` left join `test`.`t3` on((`test`.`t1`.`c1` = `test`.`t3`.`c3`))) where (`test`.`t1`.`pk` = `test`.`t1`.`pk`) order by 'x',`test`.`t1`.`c1` +DROP TABLE t1,t2,t3; set optimizer_switch=@subselect_sj_tmp; # # BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off diff --git a/mysql-test/t/subselect_sj.test b/mysql-test/t/subselect_sj.test index cde8d5d0e19..3b5f4bb08b2 100644 --- a/mysql-test/t/subselect_sj.test +++ b/mysql-test/t/subselect_sj.test @@ -2622,5 +2622,26 @@ DROP TABLE t1, t2, t3; set join_buffer_size = @tmp_join_buffer_size; set max_heap_table_size = @tmp_max_heap_table_size; +--echo # +--echo # MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table +--echo # +select @@optimizer_switch; +select @@join_cache_level; +CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x'); + +CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t2 VALUES ('x'); + +CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM; +INSERT INTO t3 VALUES ('x'),('d'); + +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; + +--echo # This should show that "t1 left join t3" is still in the semi-join nest: +EXPLAIN EXTENDED +SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1; + +DROP TABLE t1,t2,t3; # The following command must be the last one the file set optimizer_switch=@subselect_sj_tmp; diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index d4cb33c759a..0f5a4500353 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -1924,11 +1924,25 @@ int pull_out_semijoin_tables(JOIN *join) } } - table_map pulled_tables= 0; + table_map dep_tables= 0; if (have_join_nest_children) goto skip; + /* + Calculate set of tables within this semi-join nest that have + other dependent tables + */ + child_li.rewind(); + while ((tbl= child_li++)) + { + TABLE *const table= tbl->table; + if (table && + (table->reginfo.join_tab->dependent & + sj_nest->nested_join->used_tables)) + dep_tables|= table->reginfo.join_tab->dependent; + } + /* Action #1: Mark the constant tables to be pulled out */ child_li.rewind(); while ((tbl= child_li++)) @@ -1979,7 +1993,8 @@ int pull_out_semijoin_tables(JOIN *join) child_li.rewind(); while ((tbl= child_li++)) { - if (tbl->table && !(pulled_tables & tbl->table->map)) + if (tbl->table && !(pulled_tables & tbl->table->map) && + !(dep_tables & tbl->table->map)) { if (find_eq_ref_candidate(tbl->table, sj_nest->nested_join->used_tables & diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a1f125b11f0..d7a672b210f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -241,7 +241,8 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array, List &all_fields, bool *all_order_by_fields_used); static bool test_if_subpart(ORDER *a,ORDER *b); -static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List &tables); +static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List &tables, + table_map const_tables); static void calc_group_buffer(JOIN *join,ORDER *group); static bool make_group_fields(JOIN *main_join, JOIN *curr_join); static bool alloc_group_fields(JOIN *join,ORDER *group); @@ -1211,7 +1212,8 @@ JOIN::optimize() goto setup_subq_exit; } error= -1; // Error is sent to client - sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables); + /* get_sort_by_table() call used to be here: */ + MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table)); /* Calculate how to do the join */ thd_proc_info(thd, "statistics"); @@ -3583,6 +3585,9 @@ make_join_statistics(JOIN *join, List &tables_list, } } while (join->const_table_map & found_ref && ref_changed); + join->sort_by_table= get_sort_by_table(join->order, join->group_list, + join->select_lex->leaf_tables, + join->const_table_map); /* Update info on indexes that can be used for search lookups as reading const tables may has added new sargable predicates. @@ -20730,7 +20735,8 @@ test_if_subpart(ORDER *a,ORDER *b) */ static TABLE * -get_sort_by_table(ORDER *a,ORDER *b, List &tables) +get_sort_by_table(ORDER *a,ORDER *b, List &tables, + table_map const_tables) { TABLE_LIST *table; List_iterator ti(tables); @@ -20744,6 +20750,23 @@ get_sort_by_table(ORDER *a,ORDER *b, List &tables) for (; a && b; a=a->next,b=b->next) { + /* Skip elements of a that are constant */ + while (!((*a->item)->used_tables() & ~const_tables)) + { + if (!(a= a->next)) + break; + } + + /* Skip elements of b that are constant */ + while (!((*b->item)->used_tables() & ~const_tables)) + { + if (!(b= b->next)) + break; + } + + if (!a || !b) + break; + if (!(*a->item)->eq(*b->item,1)) DBUG_RETURN(0); map|=a->item[0]->used_tables();