Fix off-by-one error in Histogram_json_hb::find_bucket
This commit is contained in:
parent
b179640219
commit
28ad128585
@ -4093,12 +4093,12 @@ test.t2 analyze status Engine-independent statistics collected
|
|||||||
test.t2 analyze status OK
|
test.t2 analyze status OK
|
||||||
explain extended select * from t2 where city = 'Moscow';
|
explain extended select * from t2 where city = 'Moscow';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 98.02 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
|
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
|
||||||
analyze select * from t2 where city = 'Moscow';
|
analyze select * from t2 where city = 'Moscow';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 98.02 98.02 Using where
|
||||||
explain extended select * from t2 where city = 'Helsinki';
|
explain extended select * from t2 where city = 'Helsinki';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
|
||||||
|
@ -182,4 +182,3 @@ SET histogram_type= JSON_HB;
|
|||||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||||
SELECT * FROM t1;
|
SELECT * FROM t1;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
|||||||
|
|
||||||
// If the value is outside of the histogram's range, this will "clip" it to
|
// If the value is outside of the histogram's range, this will "clip" it to
|
||||||
// first or last bucket.
|
// first or last bucket.
|
||||||
int idx= find_bucket(field, key, false);
|
bool equal;
|
||||||
|
int idx= find_bucket(field, key, &equal);
|
||||||
|
|
||||||
double sel;
|
double sel;
|
||||||
|
|
||||||
if (buckets[idx].ndv == 1 &&
|
if (buckets[idx].ndv == 1 && !equal)
|
||||||
field->key_cmp((uchar*)buckets[idx].start_value.data(), key))
|
|
||||||
{
|
{
|
||||||
// The bucket has a single value and it doesn't match! Use the global
|
// The bucket has a single value and it doesn't match! Use the global
|
||||||
// average.
|
// average.
|
||||||
@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
|||||||
|
|
||||||
// Find the leftmost bucket that contains the lookup value.
|
// Find the leftmost bucket that contains the lookup value.
|
||||||
// (If the lookup value is to the left of all buckets, find bucket #0)
|
// (If the lookup value is to the left of all buckets, find bucket #0)
|
||||||
int idx= find_bucket(field, min_key, exclusive_endp);
|
bool equal;
|
||||||
|
int idx= find_bucket(field, min_key, &equal);
|
||||||
|
if (equal && exclusive_endp && buckets[idx].ndv==1 &&
|
||||||
|
idx < (int)buckets.size()-1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
The range is "col > $CONST" and we've found a bucket that contains
|
||||||
|
only the value $CONST. Move to the next bucket.
|
||||||
|
TODO: what if the last value in the histogram is a popular one?
|
||||||
|
*/
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
double left_fract= get_left_fract(idx);
|
double left_fract= get_left_fract(idx);
|
||||||
double sel= position_in_interval(field, min_key, min_key_len,
|
double sel= position_in_interval(field, min_key, min_key_len,
|
||||||
buckets[idx].start_value,
|
buckets[idx].start_value,
|
||||||
@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
|||||||
max_key++;
|
max_key++;
|
||||||
max_key_len--;
|
max_key_len--;
|
||||||
}
|
}
|
||||||
|
bool equal;
|
||||||
|
int idx= find_bucket(field, max_key, &equal);
|
||||||
|
|
||||||
int idx= find_bucket(field, max_key, inclusive_endp);
|
if (equal && !inclusive_endp && idx > 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
The range is "col < $CONST" and we've found a bucket starting with
|
||||||
|
$CONST. Move to the previous bucket.
|
||||||
|
TODO: what if the first value is the popular one?
|
||||||
|
*/
|
||||||
|
idx--;
|
||||||
|
}
|
||||||
double left_fract= get_left_fract(idx);
|
double left_fract= get_left_fract(idx);
|
||||||
double sel= position_in_interval(field, max_key, max_key_len,
|
double sel= position_in_interval(field, max_key, max_key_len,
|
||||||
buckets[idx].start_value,
|
buckets[idx].start_value,
|
||||||
@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
|
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
|
||||||
bool equal_is_less)
|
bool *equal)
|
||||||
{
|
{
|
||||||
|
int res;
|
||||||
int low= 0;
|
int low= 0;
|
||||||
int high= (int)buckets.size() - 1;
|
int high= (int)buckets.size() - 1;
|
||||||
|
*equal= false;
|
||||||
|
|
||||||
while (low + 1 < high)
|
while (low + 1 < high)
|
||||||
{
|
{
|
||||||
int middle= (low + high) / 2;
|
int middle= (low + high) / 2;
|
||||||
int res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
|
res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
|
||||||
if (!res)
|
if (!res)
|
||||||
res= equal_is_less? -1: 1;
|
{
|
||||||
if (res < 0)
|
*equal= true;
|
||||||
|
return middle;
|
||||||
|
}
|
||||||
|
else if (res < 0)
|
||||||
low= middle;
|
low= middle;
|
||||||
else //res > 0
|
else //res > 0
|
||||||
high= middle;
|
high= middle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If low and high were assigned a value in the above loop, then they are not
|
||||||
|
equal to the lookup value:
|
||||||
|
|
||||||
|
bucket[low] < lookup_val < bucket[high]
|
||||||
|
|
||||||
|
But there are two special cases: low=0 and high=last_bucket. Handle them
|
||||||
|
below.
|
||||||
|
*/
|
||||||
|
if (low == 0)
|
||||||
|
{
|
||||||
|
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
|
||||||
|
if (!res)
|
||||||
|
*equal= true;
|
||||||
|
else if (res < 0)
|
||||||
|
{
|
||||||
|
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
|
||||||
|
if (!res)
|
||||||
|
*equal= true;
|
||||||
|
if (res >= 0)
|
||||||
|
low= high;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (high == (int)buckets.size() - 1)
|
||||||
|
{
|
||||||
|
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
|
||||||
|
if (!res)
|
||||||
|
*equal= true;
|
||||||
|
if (res >= 0)
|
||||||
|
low= high;
|
||||||
|
}
|
||||||
|
|
||||||
return low;
|
return low;
|
||||||
}
|
}
|
||||||
|
@ -123,6 +123,6 @@ public:
|
|||||||
private:
|
private:
|
||||||
double get_left_fract(int idx);
|
double get_left_fract(int idx);
|
||||||
std::string& get_end_value(int idx);
|
std::string& get_end_value(int idx);
|
||||||
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
|
int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user