MDEV-8289: Semijoin inflates number of rows in query result
- Make semi-join optimizer not to choose LooseScan when 1) the index is not covered and 2) full index scan will be required. - Make sure that the code in make_join_select() that may change full index scan into a range scan is not invoked when the table uses full scan.
This commit is contained in:
parent
1bfe4da1e9
commit
9b475ee3c1
@ -1262,5 +1262,38 @@ id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
|
||||
drop table t3,t2,t1;
|
||||
set optimizer_search_depth=@tmp7474;
|
||||
#
|
||||
#
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t2 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
t3_id int(16) NOT NULL DEFAULT '0',
|
||||
t1_id int(16) NOT NULL DEFAULT '0',
|
||||
PRIMARY KEY (id),
|
||||
KEY t3_idx (t3_id),
|
||||
KEY t1_idx (t1_id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t3 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
INSERT INTO t3 VALUES (1);
|
||||
INSERT INTO t2 VALUES (1, 1, 1);
|
||||
INSERT INTO t2 VALUES (2, 1, 2);
|
||||
INSERT INTO t2 VALUES (3, 1, 2);
|
||||
INSERT INTO t2 VALUES (4, 1, 1);
|
||||
INSERT INTO t1 VALUES (1);
|
||||
INSERT INTO t1 VALUES (2);
|
||||
SELECT * FROM t1 WHERE t1.id IN (
|
||||
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
|
||||
);
|
||||
id
|
||||
1
|
||||
2
|
||||
drop table t1,t2,t3;
|
||||
# This must be the last in the file:
|
||||
set optimizer_switch=@subselect_sj2_tmp;
|
||||
|
@ -1277,6 +1277,39 @@ id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
|
||||
drop table t3,t2,t1;
|
||||
set optimizer_search_depth=@tmp7474;
|
||||
#
|
||||
#
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t2 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
t3_id int(16) NOT NULL DEFAULT '0',
|
||||
t1_id int(16) NOT NULL DEFAULT '0',
|
||||
PRIMARY KEY (id),
|
||||
KEY t3_idx (t3_id),
|
||||
KEY t1_idx (t1_id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t3 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
INSERT INTO t3 VALUES (1);
|
||||
INSERT INTO t2 VALUES (1, 1, 1);
|
||||
INSERT INTO t2 VALUES (2, 1, 2);
|
||||
INSERT INTO t2 VALUES (3, 1, 2);
|
||||
INSERT INTO t2 VALUES (4, 1, 1);
|
||||
INSERT INTO t1 VALUES (1);
|
||||
INSERT INTO t1 VALUES (2);
|
||||
SELECT * FROM t1 WHERE t1.id IN (
|
||||
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
|
||||
);
|
||||
id
|
||||
1
|
||||
2
|
||||
drop table t1,t2,t3;
|
||||
# This must be the last in the file:
|
||||
set optimizer_switch=@subselect_sj2_tmp;
|
||||
#
|
||||
|
@ -1264,6 +1264,39 @@ id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
|
||||
drop table t3,t2,t1;
|
||||
set optimizer_search_depth=@tmp7474;
|
||||
#
|
||||
#
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t2 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
t3_id int(16) NOT NULL DEFAULT '0',
|
||||
t1_id int(16) NOT NULL DEFAULT '0',
|
||||
PRIMARY KEY (id),
|
||||
KEY t3_idx (t3_id),
|
||||
KEY t1_idx (t1_id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t3 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
INSERT INTO t3 VALUES (1);
|
||||
INSERT INTO t2 VALUES (1, 1, 1);
|
||||
INSERT INTO t2 VALUES (2, 1, 2);
|
||||
INSERT INTO t2 VALUES (3, 1, 2);
|
||||
INSERT INTO t2 VALUES (4, 1, 1);
|
||||
INSERT INTO t1 VALUES (1);
|
||||
INSERT INTO t1 VALUES (2);
|
||||
SELECT * FROM t1 WHERE t1.id IN (
|
||||
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
|
||||
);
|
||||
id
|
||||
1
|
||||
2
|
||||
drop table t1,t2,t3;
|
||||
# This must be the last in the file:
|
||||
set optimizer_switch=@subselect_sj2_tmp;
|
||||
set optimizer_switch=default;
|
||||
|
@ -1391,5 +1391,44 @@ eval explain $query;
|
||||
drop table t3,t2,t1;
|
||||
set optimizer_search_depth=@tmp7474;
|
||||
|
||||
--echo #
|
||||
--echo #
|
||||
--echo #
|
||||
CREATE TABLE t1 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
|
||||
CREATE TABLE t2 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
t3_id int(16) NOT NULL DEFAULT '0',
|
||||
t1_id int(16) NOT NULL DEFAULT '0',
|
||||
PRIMARY KEY (id),
|
||||
KEY t3_idx (t3_id),
|
||||
KEY t1_idx (t1_id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
|
||||
CREATE TABLE t3 (
|
||||
id int(16) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (id)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
|
||||
|
||||
INSERT INTO t3 VALUES (1);
|
||||
|
||||
INSERT INTO t2 VALUES (1, 1, 1);
|
||||
INSERT INTO t2 VALUES (2, 1, 2);
|
||||
INSERT INTO t2 VALUES (3, 1, 2);
|
||||
INSERT INTO t2 VALUES (4, 1, 1);
|
||||
|
||||
INSERT INTO t1 VALUES (1);
|
||||
INSERT INTO t1 VALUES (2);
|
||||
|
||||
SELECT * FROM t1 WHERE t1.id IN (
|
||||
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
|
||||
);
|
||||
|
||||
drop table t1,t2,t3;
|
||||
|
||||
--echo # This must be the last in the file:
|
||||
set optimizer_switch=@subselect_sj2_tmp;
|
||||
|
@ -4361,6 +4361,74 @@ int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Set up semi-join Loose Scan strategy for execution
|
||||
|
||||
@detail
|
||||
Other strategies are done in setup_semijoin_dups_elimination(),
|
||||
however, we need to set up Loose Scan earlier, before make_join_select is
|
||||
called. This is to prevent make_join_select() from switching full index
|
||||
scans into quick selects (which will break Loose Scan access).
|
||||
|
||||
@return
|
||||
0 OK
|
||||
1 Error
|
||||
*/
|
||||
|
||||
int setup_semijoin_loosescan(JOIN *join)
|
||||
{
|
||||
uint i;
|
||||
DBUG_ENTER("setup_semijoin_loosescan");
|
||||
|
||||
POSITION *pos= join->best_positions + join->const_tables;
|
||||
for (i= join->const_tables ; i < join->top_join_tab_count; )
|
||||
{
|
||||
JOIN_TAB *tab=join->join_tab + i;
|
||||
switch (pos->sj_strategy) {
|
||||
case SJ_OPT_MATERIALIZE:
|
||||
case SJ_OPT_MATERIALIZE_SCAN:
|
||||
i+= 1; /* join tabs are embedded in the nest */
|
||||
pos += pos->n_sj_tables;
|
||||
break;
|
||||
case SJ_OPT_LOOSE_SCAN:
|
||||
{
|
||||
/* We jump from the last table to the first one */
|
||||
tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
|
||||
|
||||
/* LooseScan requires records to be produced in order */
|
||||
if (tab->select && tab->select->quick)
|
||||
tab->select->quick->need_sorted_output();
|
||||
|
||||
for (uint j= i; j < i + pos->n_sj_tables; j++)
|
||||
join->join_tab[j].inside_loosescan_range= TRUE;
|
||||
|
||||
/* Calculate key length */
|
||||
uint keylen= 0;
|
||||
uint keyno= pos->loosescan_picker.loosescan_key;
|
||||
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
|
||||
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
|
||||
|
||||
tab->loosescan_key= keyno;
|
||||
tab->loosescan_key_len= keylen;
|
||||
if (pos->n_sj_tables > 1)
|
||||
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
|
||||
i+= pos->n_sj_tables;
|
||||
pos+= pos->n_sj_tables;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
i++;
|
||||
pos++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
DBUG_RETURN(FALSE);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Setup the strategies to eliminate semi-join duplicates.
|
||||
|
||||
@ -4469,8 +4537,6 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
for (i= join->const_tables ; i < join->top_join_tab_count; )
|
||||
{
|
||||
JOIN_TAB *tab=join->join_tab + i;
|
||||
//POSITION *pos= join->best_positions + i;
|
||||
uint keylen, keyno;
|
||||
switch (pos->sj_strategy) {
|
||||
case SJ_OPT_MATERIALIZE:
|
||||
case SJ_OPT_MATERIALIZE_SCAN:
|
||||
@ -4480,26 +4546,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
break;
|
||||
case SJ_OPT_LOOSE_SCAN:
|
||||
{
|
||||
/* We jump from the last table to the first one */
|
||||
tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
|
||||
|
||||
/* LooseScan requires records to be produced in order */
|
||||
if (tab->select && tab->select->quick)
|
||||
tab->select->quick->need_sorted_output();
|
||||
|
||||
for (uint j= i; j < i + pos->n_sj_tables; j++)
|
||||
join->join_tab[j].inside_loosescan_range= TRUE;
|
||||
|
||||
/* Calculate key length */
|
||||
keylen= 0;
|
||||
keyno= pos->loosescan_picker.loosescan_key;
|
||||
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
|
||||
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
|
||||
|
||||
tab->loosescan_key= keyno;
|
||||
tab->loosescan_key_len= keylen;
|
||||
if (pos->n_sj_tables > 1)
|
||||
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
|
||||
/* Setup already handled by setup_semijoin_loosescan */
|
||||
i+= pos->n_sj_tables;
|
||||
pos+= pos->n_sj_tables;
|
||||
break;
|
||||
|
@ -194,8 +194,6 @@ public:
|
||||
PREV_BITS(key_part_map, max_loose_keypart+1) && // (3)
|
||||
!key_uses_partial_cols(s->table->s, key))
|
||||
{
|
||||
/* Ok, can use the strategy */
|
||||
part1_conds_met= TRUE;
|
||||
if (s->quick && s->quick->index == key &&
|
||||
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
|
||||
{
|
||||
@ -204,6 +202,12 @@ public:
|
||||
}
|
||||
DBUG_PRINT("info", ("Can use LooseScan scan"));
|
||||
|
||||
if (found_part & 1)
|
||||
{
|
||||
/* Can use LooseScan on ref access if the first key part is bound */
|
||||
part1_conds_met= TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
Check if this is a special case where there are no usable bound
|
||||
IN-equalities, i.e. we have
|
||||
@ -211,11 +215,13 @@ public:
|
||||
outer_expr IN (SELECT innertbl.key FROM ...)
|
||||
|
||||
and outer_expr cannot be evaluated yet, so it's actually full
|
||||
index scan and not a ref access
|
||||
index scan and not a ref access.
|
||||
We can do full index scan if it uses index-only.
|
||||
*/
|
||||
if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
|
||||
s->table->covering_keys.is_set(key))
|
||||
{
|
||||
part1_conds_met= TRUE;
|
||||
DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
|
||||
|
||||
/* Calculate the cost of complete loose index scan. */
|
||||
@ -383,6 +389,7 @@ public:
|
||||
bool create_sj_weedout_tmp_table(THD *thd);
|
||||
};
|
||||
|
||||
int setup_semijoin_loosescan(JOIN *join);
|
||||
int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
uint no_jbuf_after);
|
||||
void destroy_sj_tmp_tables(JOIN *join);
|
||||
|
@ -1530,6 +1530,9 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S
|
||||
/* Cache constant expressions in WHERE, HAVING, ON clauses. */
|
||||
cache_const_exprs();
|
||||
|
||||
if (setup_semijoin_loosescan(this))
|
||||
DBUG_RETURN(1);
|
||||
|
||||
if (make_join_select(this, select, conds))
|
||||
{
|
||||
zero_result_cause=
|
||||
@ -9676,9 +9679,14 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
|
||||
Check again if we should use an index.
|
||||
We could have used an column from a previous table in
|
||||
the index if we are using limit and this is the first table
|
||||
|
||||
(1) - Don't switch the used index if we are using semi-join
|
||||
LooseScan on this table. Using different index will not
|
||||
produce the desired ordering and de-duplication.
|
||||
*/
|
||||
|
||||
if (!tab->table->is_filled_at_execution() &&
|
||||
!tab->loosescan_match_tab && // (1)
|
||||
((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
|
||||
(!tab->const_keys.is_clear_all() && i == join->const_tables &&
|
||||
join->unit->select_limit_cnt <
|
||||
|
@ -717,8 +717,7 @@ public:
|
||||
struct st_position *pos,
|
||||
struct st_position *loose_scan_pos);
|
||||
friend bool get_best_combination(JOIN *join);
|
||||
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
uint no_jbuf_after);
|
||||
friend int setup_semijoin_loosescan(JOIN *join);
|
||||
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user