Fix off-by-one error in Histogram_json_hb::find_bucket
This commit is contained in:
parent
b179640219
commit
28ad128585
@ -4093,12 +4093,12 @@ test.t2 analyze status Engine-independent statistics collected
|
||||
test.t2 analyze status OK
|
||||
explain extended select * from t2 where city = 'Moscow';
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 98.02 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
|
||||
analyze select * from t2 where city = 'Moscow';
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 98.02 98.02 Using where
|
||||
explain extended select * from t2 where city = 'Helsinki';
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
|
||||
|
@ -182,4 +182,3 @@ SET histogram_type= JSON_HB;
|
||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
SELECT * FROM t1;
|
||||
drop table t1;
|
||||
|
||||
|
@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
||||
|
||||
// If the value is outside of the histogram's range, this will "clip" it to
|
||||
// first or last bucket.
|
||||
int idx= find_bucket(field, key, false);
|
||||
bool equal;
|
||||
int idx= find_bucket(field, key, &equal);
|
||||
|
||||
double sel;
|
||||
|
||||
if (buckets[idx].ndv == 1 &&
|
||||
field->key_cmp((uchar*)buckets[idx].start_value.data(), key))
|
||||
if (buckets[idx].ndv == 1 && !equal)
|
||||
{
|
||||
// The bucket has a single value and it doesn't match! Use the global
|
||||
// average.
|
||||
@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
||||
|
||||
// Find the leftmost bucket that contains the lookup value.
|
||||
// (If the lookup value is to the left of all buckets, find bucket #0)
|
||||
int idx= find_bucket(field, min_key, exclusive_endp);
|
||||
bool equal;
|
||||
int idx= find_bucket(field, min_key, &equal);
|
||||
if (equal && exclusive_endp && buckets[idx].ndv==1 &&
|
||||
idx < (int)buckets.size()-1)
|
||||
{
|
||||
/*
|
||||
The range is "col > $CONST" and we've found a bucket that contains
|
||||
only the value $CONST. Move to the next bucket.
|
||||
TODO: what if the last value in the histogram is a popular one?
|
||||
*/
|
||||
idx++;
|
||||
}
|
||||
double left_fract= get_left_fract(idx);
|
||||
double sel= position_in_interval(field, min_key, min_key_len,
|
||||
buckets[idx].start_value,
|
||||
@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
||||
max_key++;
|
||||
max_key_len--;
|
||||
}
|
||||
bool equal;
|
||||
int idx= find_bucket(field, max_key, &equal);
|
||||
|
||||
int idx= find_bucket(field, max_key, inclusive_endp);
|
||||
if (equal && !inclusive_endp && idx > 0)
|
||||
{
|
||||
/*
|
||||
The range is "col < $CONST" and we've found a bucket starting with
|
||||
$CONST. Move to the previous bucket.
|
||||
TODO: what if the first value is the popular one?
|
||||
*/
|
||||
idx--;
|
||||
}
|
||||
double left_fract= get_left_fract(idx);
|
||||
double sel= position_in_interval(field, max_key, max_key_len,
|
||||
buckets[idx].start_value,
|
||||
@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field)
|
||||
*/
|
||||
|
||||
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
|
||||
bool equal_is_less)
|
||||
bool *equal)
|
||||
{
|
||||
int res;
|
||||
int low= 0;
|
||||
int high= (int)buckets.size() - 1;
|
||||
*equal= false;
|
||||
|
||||
while (low + 1 < high)
|
||||
{
|
||||
int middle= (low + high) / 2;
|
||||
int res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
|
||||
res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
|
||||
if (!res)
|
||||
res= equal_is_less? -1: 1;
|
||||
if (res < 0)
|
||||
{
|
||||
*equal= true;
|
||||
return middle;
|
||||
}
|
||||
else if (res < 0)
|
||||
low= middle;
|
||||
else //res > 0
|
||||
high= middle;
|
||||
}
|
||||
|
||||
/*
|
||||
If low and high were assigned a value in the above loop, then they are not
|
||||
equal to the lookup value:
|
||||
|
||||
bucket[low] < lookup_val < bucket[high]
|
||||
|
||||
But there are two special cases: low=0 and high=last_bucket. Handle them
|
||||
below.
|
||||
*/
|
||||
if (low == 0)
|
||||
{
|
||||
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
|
||||
if (!res)
|
||||
*equal= true;
|
||||
else if (res < 0)
|
||||
{
|
||||
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
|
||||
if (!res)
|
||||
*equal= true;
|
||||
if (res >= 0)
|
||||
low= high;
|
||||
}
|
||||
}
|
||||
else if (high == (int)buckets.size() - 1)
|
||||
{
|
||||
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
|
||||
if (!res)
|
||||
*equal= true;
|
||||
if (res >= 0)
|
||||
low= high;
|
||||
}
|
||||
|
||||
return low;
|
||||
}
|
||||
|
@ -123,6 +123,6 @@ public:
|
||||
private:
|
||||
double get_left_fract(int idx);
|
||||
std::string& get_end_value(int idx);
|
||||
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
|
||||
int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user