From 2956dbe84f5c0dcae4783ae581aa3a623da05ee0 Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Wed, 10 May 2006 17:40:20 +0400 Subject: [PATCH] BUG#17379 Wrong reuse of E(#rows(range)) as E(#rows(ref(const))): Re-work best_access_path() and find_best() to reuse E(#rows(range access)) as E(#rows(ref[_or_null](const) access) only when it is appropriate. [This is the final cumulative patch] --- mysql-test/r/select.result | 16 ++++ mysql-test/r/subselect.result | 2 +- mysql-test/t/select.test | 13 ++++ sql/opt_range.cc | 11 ++- sql/sql_select.cc | 135 ++++++++++++++++++++++++++++------ sql/table.h | 1 + 6 files changed, 154 insertions(+), 24 deletions(-) diff --git a/mysql-test/r/select.result b/mysql-test/r/select.result index ea85025a37b..3aae29a922e 100644 --- a/mysql-test/r/select.result +++ b/mysql-test/r/select.result @@ -3411,3 +3411,19 @@ SELECT * FROM t1; i 255 DROP TABLE t1; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2 (a int, b int, c int, e int, primary key(a,b,c)); +insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C; +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +select 'In next EXPLAIN, B.rows must be exactly 10:' Z; +Z +In next EXPLAIN, B.rows must be exactly 10: +explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5 +and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE A range PRIMARY PRIMARY 12 NULL 3 Using where +1 SIMPLE B ref PRIMARY PRIMARY 8 const,test.A.e 10 +drop table t1, t2; diff --git a/mysql-test/r/subselect.result b/mysql-test/r/subselect.result index 14b05238e14..e3d65882f81 100644 --- a/mysql-test/r/subselect.result +++ b/mysql-test/r/subselect.result @@ -1480,7 +1480,7 @@ Note 1003 select `test`.`t1`.`s1` AS `s1`,not((`test`.`t1`.`s1`,(`test`.`t1`.`s1`,(((`test`.`t1`.`s1`) in t2 on s1 checking NULL where (`test`.`t2`.`s1` < _latin1'a2'))))) AS `s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2')` from `test`.`t1` drop table t1,t2; diff --git a/mysql-test/t/select.test b/mysql-test/t/select.test index 8e3c5847846..033350d138a 100644 --- a/mysql-test/t/select.test +++ b/mysql-test/t/select.test @@ -2886,3 +2886,16 @@ SELECT * FROM t1; UPDATE t1 SET i = i - 1; SELECT * FROM t1; DROP TABLE t1; + +# BUG#17379 + +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2 (a int, b int, c int, e int, primary key(a,b,c)); +insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C; +analyze table t2; +select 'In next EXPLAIN, B.rows must be exactly 10:' Z; + +explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5 + and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5); +drop table t1, t2; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 2610e4fc1c8..fb4af4e7932 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -347,8 +347,10 @@ typedef struct st_qsel_param { uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ - /* TRUE if last checked tree->key can be used for ROR-scan */ + /* TRUE if last checked tree->key can be used for ROR-scan */ bool is_ror_scan; + /* Number of ranges in the last checked tree->key */ + uint n_ranges; } PARAM; class TABLE_READ_PLAN; @@ -5297,6 +5299,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) param->table->file->primary_key_is_clustered()); param->is_ror_scan= !cpk_scan; } + param->n_ranges= 0; records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); if (records != HA_POS_ERROR) @@ -5304,7 +5307,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) param->table->quick_keys.set_bit(key); param->table->quick_rows[key]=records; param->table->quick_key_parts[key]=param->max_key_part+1; - + param->table->quick_n_ranges[key]= param->n_ranges; if (cpk_scan) param->is_ror_scan= TRUE; } @@ -5440,7 +5443,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, HA_NOSAME && min_key_length == max_key_length && !memcmp(param->min_key,param->max_key,min_key_length)) + { tmp=1; // Max one record + param->n_ranges++; + } else { if (param->is_ror_scan) @@ -5460,6 +5466,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, is_key_scan_ror(param, keynr, key_tree->part + 1))) param->is_ror_scan= FALSE; } + param->n_ranges++; if (tmp_min_flag & GEOM_FLAG) { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 86a3814cda2..d36ceedc7ae 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -3335,7 +3335,10 @@ best_access_path(JOIN *join, uint key= keyuse->key; KEY *keyinfo= table->key_info+key; bool ft_key= (keyuse->keypart == FT_KEYPART); - uint found_ref_or_null= 0; + /* Bitmap of keyparts where the ref access is over 'keypart=const': */ + key_part_map const_part= 0; + /* The or-null keypart in ref-or-null access: */ + key_part_map ref_or_null_part= 0; /* Calculate how many key segments of the current key we can use */ start_key= keyuse; @@ -3347,12 +3350,14 @@ best_access_path(JOIN *join, do { if (!(remaining_tables & keyuse->used_tables) && - !(found_ref_or_null & keyuse->optimize)) + !(ref_or_null_part && (keyuse->optimize & + KEY_OPTIMIZE_REF_OR_NULL))) { found_part|= keyuse->keypart_map; - double tmp= prev_record_reads(join, - (found_ref | - keyuse->used_tables)); + if (!(keyuse->used_tables & ~join->const_table_map)) + const_part|= keyuse->keypart_map; + double tmp= prev_record_reads(join, (found_ref | + keyuse->used_tables)); if (tmp < best_prev_record_reads) { best_part_found_ref= keyuse->used_tables; @@ -3364,8 +3369,8 @@ best_access_path(JOIN *join, If there is one 'key_column IS NULL' expression, we can use this ref_or_null optimisation of this field */ - found_ref_or_null|= (keyuse->optimize & - KEY_OPTIMIZE_REF_OR_NULL); + if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) + ref_or_null_part |= keyuse->keypart_map; } keyuse++; } while (keyuse->table == table && keyuse->key == key && @@ -3401,7 +3406,7 @@ best_access_path(JOIN *join, Check if we found full key */ if (found_part == PREV_BITS(uint,keyinfo->key_parts) && - !found_ref_or_null) + !ref_or_null_part) { /* use eq key */ max_key_part= (uint) ~0; if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) @@ -3413,6 +3418,23 @@ best_access_path(JOIN *join, { if (!found_ref) { /* We found a const key */ + /* + ReuseRangeEstimateForRef-1: + We get here if we've found a ref(const) (c_i are constants): + "(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond] + + If range optimizer was able to construct a "range" + access on this index, then its condition "quick_cond" was + eqivalent to ref_const_cond (*), and we can re-use E(#rows) + from the range optimizer. + + Proof of (*): By properties of range and ref optimizers + quick_cond will be equal or tighther than ref_const_cond. + ref_const_cond already covers "smallest" possible interval - + a singlepoint interval over all keyparts. Therefore, + quick_cond is equivalent to ref_const_cond (if it was an + empty interval we wouldn't have got here). + */ if (table->quick_keys.is_set(key)) records= (double) table->quick_rows[key]; else @@ -3433,6 +3455,23 @@ best_access_path(JOIN *join, if (records < 2.0) records=2.0; /* Can't be as good as a unique */ } + /* + ReuseRangeEstimateForRef-2: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make an adjustment. The criteria of when we + can make an adjustment is a special case of the criteria used + in ReuseRangeEstimateForRef-3. + */ + if (table->quick_keys.is_set(key) && + const_part & (1 << table->quick_key_parts[key]) && + table->quick_n_ranges[key] == 1 && + records > (double) table->quick_rows[key]) + { + records= (double) table->quick_rows[key]; + } } /* Limit the number of matched rows */ tmp= records; @@ -3461,12 +3500,50 @@ best_access_path(JOIN *join, { max_key_part= max_part_bit(found_part); /* - Check if quick_range could determinate how many rows we - will match + ReuseRangeEstimateForRef-3: + We're now considering a ref[or_null] access via + (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL")] (**) + + Try re-using E(#rows) from "range" optimizer: + We can do so if "range" optimizer used the same intervals as + in (**). The intervals used by range optimizer may be not + available at this point (as "range" access might have choosen to + create quick select over another index), so we can't compare + them to (**). We'll make indirect judgements instead. + The sufficient conditions for re-use are: + (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if + this is not satisfied we have no way to know which ranges + will be actually scanned by 'ref' until we execute the + join) + (C2) max #key parts in 'range' access == K == max_key_part (this + is apparently a necessary requirement) + + We also have a property that "range optimizer produces equal or + tighter set of scan intervals than ref(const) optimizer". Each + of the intervals in (**) are "tightest possible" intervals when + one limits itself to using keyparts 1..K (which we do in #2). + From here it follows that range access used either one, or + both of the (I1) and (I2) intervals: + + (t.keypart1=c1 AND ... AND t.keypartK=eK) (I1) + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL") (I2) + + The remaining part is to exclude the situation where range + optimizer used one interval while we're considering + ref-or-null and looking for estimate for two intervals. This + is done by last limitation: + + (C3) "range optimizer used (have ref_or_null?2:1) intervals" */ - if (table->quick_keys.is_set(key) && - table->quick_key_parts[key] == max_key_part) + if (table->quick_keys.is_set(key) && !found_ref && //(C1) + table->quick_key_parts[key] == max_key_part && //(C2) + table->quick_n_ranges[key] == 1+test(ref_or_null_part)) //(C3) + { tmp= records= (double) table->quick_rows[key]; + } else { /* Check if we have statistic about the distribution */ @@ -3510,21 +3587,37 @@ best_access_path(JOIN *join, } records = (ulong) tmp; } - /* - If quick_select was used on a part of this key, we know - the maximum number of rows that the key can match. - */ - if (table->quick_keys.is_set(key) && - table->quick_key_parts[key] <= max_key_part && - records > (double) table->quick_rows[key]) - tmp= records= (double) table->quick_rows[key]; - else if (found_ref_or_null) + + if (ref_or_null_part) { /* We need to do two key searches to find key */ tmp *= 2.0; records *= 2.0; } + + /* + ReuseRangeEstimateForRef-4: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make the adjustment. + + The decision whether we can re-use the estimate from the range + optimizer is the same as in ReuseRangeEstimateForRef-3, + applied to first table->quick_key_parts[key] key parts. + */ + if (table->quick_keys.is_set(key) && + table->quick_key_parts[key] <= max_key_part && + const_part & (1 << table->quick_key_parts[key]) && + table->quick_n_ranges[key] == 1 + test(ref_or_null_part & + const_part) && + records > (double) table->quick_rows[key]) + { + tmp= records= (double) table->quick_rows[key]; + } } + /* Limit the number of matched rows */ set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); if (table->used_keys.is_set(key)) diff --git a/sql/table.h b/sql/table.h index ce44f797280..106421d7a17 100644 --- a/sql/table.h +++ b/sql/table.h @@ -221,6 +221,7 @@ struct st_table { ha_rows quick_rows[MAX_KEY]; key_part_map const_key_parts[MAX_KEY]; uint quick_key_parts[MAX_KEY]; + uint quick_n_ranges[MAX_KEY]; /* If this table has TIMESTAMP field with auto-set property (pointed by