From 9b475ee3c1855c23fc716ba83a261c238d9b6f83 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 5 Aug 2015 20:43:25 +0300 Subject: [PATCH] MDEV-8289: Semijoin inflates number of rows in query result - Make semi-join optimizer not to choose LooseScan when 1) the index is not covered and 2) full index scan will be required. - Make sure that the code in make_join_select() that may change full index scan into a range scan is not invoked when the table uses full scan. --- mysql-test/r/subselect_sj2.result | 33 ++++++++++ mysql-test/r/subselect_sj2_jcl6.result | 33 ++++++++++ mysql-test/r/subselect_sj2_mat.result | 33 ++++++++++ mysql-test/t/subselect_sj2.test | 39 +++++++++++ sql/opt_subselect.cc | 91 +++++++++++++++++++------- sql/opt_subselect.h | 13 +++- sql/sql_select.cc | 8 +++ sql/sql_select.h | 3 +- 8 files changed, 226 insertions(+), 27 deletions(-) diff --git a/mysql-test/r/subselect_sj2.result b/mysql-test/r/subselect_sj2.result index 9a5da710a4c..e6259a3979b 100644 --- a/mysql-test/r/subselect_sj2.result +++ b/mysql-test/r/subselect_sj2.result @@ -1262,5 +1262,38 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary drop table t3,t2,t1; set optimizer_search_depth=@tmp7474; +# +# +# +CREATE TABLE t1 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 ( +id int(16) NOT NULL AUTO_INCREMENT, +t3_id int(16) NOT NULL DEFAULT '0', +t1_id int(16) NOT NULL DEFAULT '0', +PRIMARY KEY (id), +KEY t3_idx (t3_id), +KEY t1_idx (t1_id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +CREATE TABLE t3 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +INSERT INTO t3 VALUES (1); +INSERT INTO t2 VALUES (1, 1, 1); +INSERT INTO t2 VALUES (2, 1, 2); +INSERT INTO t2 VALUES (3, 1, 2); +INSERT INTO t2 VALUES (4, 1, 1); +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +SELECT * FROM t1 WHERE t1.id IN ( +SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1 +); +id +1 +2 +drop table t1,t2,t3; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; diff --git a/mysql-test/r/subselect_sj2_jcl6.result b/mysql-test/r/subselect_sj2_jcl6.result index c10b550d11a..64f7ab2e6bf 100644 --- a/mysql-test/r/subselect_sj2_jcl6.result +++ b/mysql-test/r/subselect_sj2_jcl6.result @@ -1277,6 +1277,39 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary drop table t3,t2,t1; set optimizer_search_depth=@tmp7474; +# +# +# +CREATE TABLE t1 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 ( +id int(16) NOT NULL AUTO_INCREMENT, +t3_id int(16) NOT NULL DEFAULT '0', +t1_id int(16) NOT NULL DEFAULT '0', +PRIMARY KEY (id), +KEY t3_idx (t3_id), +KEY t1_idx (t1_id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +CREATE TABLE t3 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +INSERT INTO t3 VALUES (1); +INSERT INTO t2 VALUES (1, 1, 1); +INSERT INTO t2 VALUES (2, 1, 2); +INSERT INTO t2 VALUES (3, 1, 2); +INSERT INTO t2 VALUES (4, 1, 1); +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +SELECT * FROM t1 WHERE t1.id IN ( +SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1 +); +id +1 +2 +drop table t1,t2,t3; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; # diff --git a/mysql-test/r/subselect_sj2_mat.result b/mysql-test/r/subselect_sj2_mat.result index 4e75aee24a2..08a97175487 100644 --- a/mysql-test/r/subselect_sj2_mat.result +++ b/mysql-test/r/subselect_sj2_mat.result @@ -1264,6 +1264,39 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary drop table t3,t2,t1; set optimizer_search_depth=@tmp7474; +# +# +# +CREATE TABLE t1 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 ( +id int(16) NOT NULL AUTO_INCREMENT, +t3_id int(16) NOT NULL DEFAULT '0', +t1_id int(16) NOT NULL DEFAULT '0', +PRIMARY KEY (id), +KEY t3_idx (t3_id), +KEY t1_idx (t1_id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +CREATE TABLE t3 ( +id int(16) NOT NULL AUTO_INCREMENT, +PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +INSERT INTO t3 VALUES (1); +INSERT INTO t2 VALUES (1, 1, 1); +INSERT INTO t2 VALUES (2, 1, 2); +INSERT INTO t2 VALUES (3, 1, 2); +INSERT INTO t2 VALUES (4, 1, 1); +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +SELECT * FROM t1 WHERE t1.id IN ( +SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1 +); +id +1 +2 +drop table t1,t2,t3; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; set optimizer_switch=default; diff --git a/mysql-test/t/subselect_sj2.test b/mysql-test/t/subselect_sj2.test index 0bf9c6d9d10..6ed36083b33 100644 --- a/mysql-test/t/subselect_sj2.test +++ b/mysql-test/t/subselect_sj2.test @@ -1391,5 +1391,44 @@ eval explain $query; drop table t3,t2,t1; set optimizer_search_depth=@tmp7474; +--echo # +--echo # +--echo # +CREATE TABLE t1 ( + id int(16) NOT NULL AUTO_INCREMENT, + PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +CREATE TABLE t2 ( + id int(16) NOT NULL AUTO_INCREMENT, + t3_id int(16) NOT NULL DEFAULT '0', + t1_id int(16) NOT NULL DEFAULT '0', + PRIMARY KEY (id), + KEY t3_idx (t3_id), + KEY t1_idx (t1_id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; + +CREATE TABLE t3 ( + id int(16) NOT NULL AUTO_INCREMENT, + PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; + + +INSERT INTO t3 VALUES (1); + +INSERT INTO t2 VALUES (1, 1, 1); +INSERT INTO t2 VALUES (2, 1, 2); +INSERT INTO t2 VALUES (3, 1, 2); +INSERT INTO t2 VALUES (4, 1, 1); + +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); + +SELECT * FROM t1 WHERE t1.id IN ( + SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1 +); + +drop table t1,t2,t3; + --echo # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 0ad90e2ef3d..062b43291fb 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4361,6 +4361,74 @@ int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint } +/* + @brief + Set up semi-join Loose Scan strategy for execution + + @detail + Other strategies are done in setup_semijoin_dups_elimination(), + however, we need to set up Loose Scan earlier, before make_join_select is + called. This is to prevent make_join_select() from switching full index + scans into quick selects (which will break Loose Scan access). + + @return + 0 OK + 1 Error +*/ + +int setup_semijoin_loosescan(JOIN *join) +{ + uint i; + DBUG_ENTER("setup_semijoin_loosescan"); + + POSITION *pos= join->best_positions + join->const_tables; + for (i= join->const_tables ; i < join->top_join_tab_count; ) + { + JOIN_TAB *tab=join->join_tab + i; + switch (pos->sj_strategy) { + case SJ_OPT_MATERIALIZE: + case SJ_OPT_MATERIALIZE_SCAN: + i+= 1; /* join tabs are embedded in the nest */ + pos += pos->n_sj_tables; + break; + case SJ_OPT_LOOSE_SCAN: + { + /* We jump from the last table to the first one */ + tab->loosescan_match_tab= tab + pos->n_sj_tables - 1; + + /* LooseScan requires records to be produced in order */ + if (tab->select && tab->select->quick) + tab->select->quick->need_sorted_output(); + + for (uint j= i; j < i + pos->n_sj_tables; j++) + join->join_tab[j].inside_loosescan_range= TRUE; + + /* Calculate key length */ + uint keylen= 0; + uint keyno= pos->loosescan_picker.loosescan_key; + for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) + keylen += tab->table->key_info[keyno].key_part[kp].store_length; + + tab->loosescan_key= keyno; + tab->loosescan_key_len= keylen; + if (pos->n_sj_tables > 1) + tab[pos->n_sj_tables - 1].do_firstmatch= tab; + i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; + break; + } + default: + { + i++; + pos++; + break; + } + } + } + DBUG_RETURN(FALSE); +} + + /* Setup the strategies to eliminate semi-join duplicates. @@ -4469,8 +4537,6 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, for (i= join->const_tables ; i < join->top_join_tab_count; ) { JOIN_TAB *tab=join->join_tab + i; - //POSITION *pos= join->best_positions + i; - uint keylen, keyno; switch (pos->sj_strategy) { case SJ_OPT_MATERIALIZE: case SJ_OPT_MATERIALIZE_SCAN: @@ -4480,26 +4546,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, break; case SJ_OPT_LOOSE_SCAN: { - /* We jump from the last table to the first one */ - tab->loosescan_match_tab= tab + pos->n_sj_tables - 1; - - /* LooseScan requires records to be produced in order */ - if (tab->select && tab->select->quick) - tab->select->quick->need_sorted_output(); - - for (uint j= i; j < i + pos->n_sj_tables; j++) - join->join_tab[j].inside_loosescan_range= TRUE; - - /* Calculate key length */ - keylen= 0; - keyno= pos->loosescan_picker.loosescan_key; - for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) - keylen += tab->table->key_info[keyno].key_part[kp].store_length; - - tab->loosescan_key= keyno; - tab->loosescan_key_len= keylen; - if (pos->n_sj_tables > 1) - tab[pos->n_sj_tables - 1].do_firstmatch= tab; + /* Setup already handled by setup_semijoin_loosescan */ i+= pos->n_sj_tables; pos+= pos->n_sj_tables; break; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 3da94d05521..0fb1a931e36 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -194,8 +194,6 @@ public: PREV_BITS(key_part_map, max_loose_keypart+1) && // (3) !key_uses_partial_cols(s->table->s, key)) { - /* Ok, can use the strategy */ - part1_conds_met= TRUE; if (s->quick && s->quick->index == key && s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { @@ -204,6 +202,12 @@ public: } DBUG_PRINT("info", ("Can use LooseScan scan")); + if (found_part & 1) + { + /* Can use LooseScan on ref access if the first key part is bound */ + part1_conds_met= TRUE; + } + /* Check if this is a special case where there are no usable bound IN-equalities, i.e. we have @@ -211,11 +215,13 @@ public: outer_expr IN (SELECT innertbl.key FROM ...) and outer_expr cannot be evaluated yet, so it's actually full - index scan and not a ref access + index scan and not a ref access. + We can do full index scan if it uses index-only. */ if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ s->table->covering_keys.is_set(key)) { + part1_conds_met= TRUE; DBUG_PRINT("info", ("Can use full index scan for LooseScan")); /* Calculate the cost of complete loose index scan. */ @@ -383,6 +389,7 @@ public: bool create_sj_weedout_tmp_table(THD *thd); }; +int setup_semijoin_loosescan(JOIN *join); int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint no_jbuf_after); void destroy_sj_tmp_tables(JOIN *join); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index aa9a3f2da0e..22c14733ffa 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1530,6 +1530,9 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S /* Cache constant expressions in WHERE, HAVING, ON clauses. */ cache_const_exprs(); + if (setup_semijoin_loosescan(this)) + DBUG_RETURN(1); + if (make_join_select(this, select, conds)) { zero_result_cause= @@ -9676,9 +9679,14 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) Check again if we should use an index. We could have used an column from a previous table in the index if we are using limit and this is the first table + + (1) - Don't switch the used index if we are using semi-join + LooseScan on this table. Using different index will not + produce the desired ordering and de-duplication. */ if (!tab->table->is_filled_at_execution() && + !tab->loosescan_match_tab && // (1) ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || (!tab->const_keys.is_clear_all() && i == join->const_tables && join->unit->select_limit_cnt < diff --git a/sql/sql_select.h b/sql/sql_select.h index 61157130f50..bbeb2aa6952 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -717,8 +717,7 @@ public: struct st_position *pos, struct st_position *loose_scan_pos); friend bool get_best_combination(JOIN *join); - friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, - uint no_jbuf_after); + friend int setup_semijoin_loosescan(JOIN *join); friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); };