MDEV-5926, MDEV-4362 post-fixes:
- Histogram::find_bucket() should not walk off the end of the value range. - Address review feedback in Histogram::point_selectivity(): different handling for zero-width buckets, and explanations.
This commit is contained in:
parent
dee11f9633
commit
ab061a2bb3
@ -1378,7 +1378,7 @@ Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1
|
|||||||
# Must not cause fp division by zero, or produce nonsense numbers:
|
# Must not cause fp division by zero, or produce nonsense numbers:
|
||||||
explain extended select * from t1 where col1 in (-1,-2,-3);
|
explain extended select * from t1 where col1 in (-1,-2,-3);
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
||||||
explain extended select * from t1 where col1<=-1;
|
explain extended select * from t1 where col1<=-1;
|
||||||
|
@ -1388,7 +1388,7 @@ Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1
|
|||||||
# Must not cause fp division by zero, or produce nonsense numbers:
|
# Must not cause fp division by zero, or produce nonsense numbers:
|
||||||
explain extended select * from t1 where col1 in (-1,-2,-3);
|
explain extended select * from t1 where col1 in (-1,-2,-3);
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
||||||
explain extended select * from t1 where col1<=-1;
|
explain extended select * from t1 where col1<=-1;
|
||||||
|
@ -151,6 +151,7 @@ private:
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the bucket which value 'pos' falls into. */
|
/* Find the bucket which value 'pos' falls into. */
|
||||||
uint find_bucket(double pos, bool first)
|
uint find_bucket(double pos, bool first)
|
||||||
{
|
{
|
||||||
@ -171,7 +172,7 @@ private:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (val > get_value(i))
|
if (val > get_value(i) && i < (get_width() - 1))
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
if (val == get_value(i))
|
if (val == get_value(i))
|
||||||
@ -251,6 +252,27 @@ public:
|
|||||||
|
|
||||||
@return
|
@return
|
||||||
Expected condition selectivity (a number between 0 and 1)
|
Expected condition selectivity (a number between 0 and 1)
|
||||||
|
|
||||||
|
@notes
|
||||||
|
[re_zero_length_buckets] If a bucket with zero value-length is in the
|
||||||
|
middle of the histogram, we will not have min==max. Example: suppose,
|
||||||
|
pos_value=0x12, and the histogram is:
|
||||||
|
|
||||||
|
#n #n+1 #n+2
|
||||||
|
... 0x10 0x12 0x12 0x14 ...
|
||||||
|
|
|
||||||
|
+------------- bucket with zero value-length
|
||||||
|
|
||||||
|
Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
|
||||||
|
|
||||||
|
The problem happens at the histogram ends. if pos_value=0, and the
|
||||||
|
histogram is:
|
||||||
|
|
||||||
|
0x00 0x10 ...
|
||||||
|
|
||||||
|
then min=0, max=0. This means pos_value is contained within bucket #0,
|
||||||
|
but on the other hand, histogram data says that the bucket has only one
|
||||||
|
value.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
double point_selectivity(double pos, double avg_sel)
|
double point_selectivity(double pos, double avg_sel)
|
||||||
@ -264,6 +286,16 @@ public:
|
|||||||
uint max= min;
|
uint max= min;
|
||||||
while (max + 1 < get_width() && get_value(max + 1) == pos_value)
|
while (max + 1 < get_width() && get_value(max + 1) == pos_value)
|
||||||
max++;
|
max++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
A special case: we're looking at a single bucket, and that bucket has
|
||||||
|
zero value-length. Use the multi-bucket formula (attempt to use
|
||||||
|
single-bucket formula will cause divison by zero).
|
||||||
|
|
||||||
|
For more details see [re_zero_length_buckets] above.
|
||||||
|
*/
|
||||||
|
if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
|
||||||
|
max++;
|
||||||
|
|
||||||
if (max > min)
|
if (max > min)
|
||||||
{
|
{
|
||||||
@ -302,27 +334,17 @@ public:
|
|||||||
(max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
|
(max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
|
||||||
(min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
|
(min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
|
||||||
|
|
||||||
if (current_bucket_width < 1e-16)
|
DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
|
||||||
{
|
|
||||||
/*
|
|
||||||
A special case: we are at the first (or the last) bucket in the
|
|
||||||
histogram, the bucket's value range is a singlepoint [x,x], and
|
|
||||||
pos_value=0 (for the first bucket) or pos_value=1 (for the last).
|
|
||||||
*/
|
|
||||||
sel= avg_sel;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
So:
|
|
||||||
- each bucket has the same #rows
|
|
||||||
- values are unformly distributed across the [min_value,max_value] domain.
|
|
||||||
|
|
||||||
If a bucket has value range that's N times bigger then average, than
|
/*
|
||||||
each value will have to have N times fewer rows than average.
|
So:
|
||||||
*/
|
- each bucket has the same #rows
|
||||||
sel= avg_sel * avg_bucket_width / current_bucket_width;
|
- values are unformly distributed across the [min_value,max_value] domain.
|
||||||
}
|
|
||||||
|
If a bucket has value range that's N times bigger then average, than
|
||||||
|
each value will have to have N times fewer rows than average.
|
||||||
|
*/
|
||||||
|
sel= avg_sel * avg_bucket_width / current_bucket_width;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
(Q: if we just follow this proportion we may end up in a situation
|
(Q: if we just follow this proportion we may end up in a situation
|
||||||
|
Loading…
x
Reference in New Issue
Block a user