Code cleanup part#2: do not copy key values in xxx_selectivity() functions
This commit is contained in:
parent
2a1cdbabec
commit
fcf58a5e0f
@ -2444,15 +2444,15 @@ test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON_HB {
|
|||||||
}
|
}
|
||||||
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 60.87 Using where
|
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 59.87 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz'
|
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz'
|
||||||
analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 60.87 60.00 Using where
|
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 59.87 60.00 Using where
|
||||||
explain extended select * from t1_json where a < 'b-1a';
|
explain extended select * from t1_json where a < 'b-1a';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 100.00 Using where
|
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 99.00 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
|
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
|
||||||
analyze select * from t1_json where a > 'zzzzzzzzz';
|
analyze select * from t1_json where a > 'zzzzzzzzz';
|
||||||
@ -2476,12 +2476,12 @@ test.t2 analyze status Engine-independent statistics collected
|
|||||||
test.t2 analyze status OK
|
test.t2 analyze status OK
|
||||||
explain extended select * from t2 where city = 'Moscow';
|
explain extended select * from t2 where city = 'Moscow';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 98.04 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 96.08 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
|
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
|
||||||
analyze select * from t2 where city = 'Moscow';
|
analyze select * from t2 where city = 'Moscow';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 98.04 98.02 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 96.08 98.02 Using where
|
||||||
explain extended select * from t2 where city = 'Helsinki';
|
explain extended select * from t2 where city = 'Helsinki';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 2.00 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 2.00 Using where
|
||||||
|
@ -63,16 +63,7 @@
|
|||||||
equal to "never".
|
equal to "never".
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type);
|
||||||
* json_get_array_items expects a JSON array as argument,
|
|
||||||
* and pushes the elements of the array into the `container` vector.
|
|
||||||
* It only works if all the elements in the original JSON array
|
|
||||||
* are scalar values (i.e., strings, numbers, true or false),
|
|
||||||
* else, the JSON type encountered is stored in value_type and the function returns false.
|
|
||||||
*/
|
|
||||||
bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container);
|
|
||||||
|
|
||||||
Histogram_base *create_histogram(Histogram_type hist_type);
|
|
||||||
|
|
||||||
/* Currently there are only 3 persistent statistical tables */
|
/* Currently there are only 3 persistent statistical tables */
|
||||||
static const uint STATISTICS_TABLES= 3;
|
static const uint STATISTICS_TABLES= 3;
|
||||||
@ -1235,18 +1226,9 @@ public:
|
|||||||
Field *stat_field= stat_table->field[fldno];
|
Field *stat_field= stat_table->field[fldno];
|
||||||
table_field->read_stats->set_not_null(fldno);
|
table_field->read_stats->set_not_null(fldno);
|
||||||
stat_field->val_str(&val);
|
stat_field->val_str(&val);
|
||||||
switch (table_field->read_stats->histogram_type_on_disk)
|
hist= create_histogram(mem_root, table_field->read_stats->histogram_type_on_disk);
|
||||||
{
|
if (!hist)
|
||||||
case SINGLE_PREC_HB:
|
|
||||||
case DOUBLE_PREC_HB:
|
|
||||||
hist = new (mem_root) Histogram_binary();
|
|
||||||
break;
|
|
||||||
case JSON_HB:
|
|
||||||
hist = new (mem_root) Histogram_json();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
if (!hist->parse(mem_root, table_field,
|
if (!hist->parse(mem_root, table_field,
|
||||||
table_field->read_stats->histogram_type_on_disk,
|
table_field->read_stats->histogram_type_on_disk,
|
||||||
val.ptr(), val.length()))
|
val.ptr(), val.length()))
|
||||||
@ -1415,7 +1397,6 @@ double pos_in_interval_through_val_real(Field *field,
|
|||||||
uchar *max_val,
|
uchar *max_val,
|
||||||
uchar *midpoint_val)
|
uchar *midpoint_val)
|
||||||
{
|
{
|
||||||
|
|
||||||
// For each passed value: unpack it into Field's current value. Then, we can
|
// For each passed value: unpack it into Field's current value. Then, we can
|
||||||
// get the value as double.
|
// get the value as double.
|
||||||
|
|
||||||
@ -1526,114 +1507,105 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
|
|||||||
const uchar *min_key = endpoint->key;
|
const uchar *min_key = endpoint->key;
|
||||||
if (field->real_maybe_null())
|
if (field->real_maybe_null())
|
||||||
min_key++;
|
min_key++;
|
||||||
uint min_idx= find_bucket(field, min_key);
|
uint min_idx= find_bucket(field, min_key, false);
|
||||||
|
|
||||||
uint max_idx= min_idx;
|
|
||||||
|
|
||||||
|
uint max_idx= find_bucket(field, min_key, true);
|
||||||
|
#if 0
|
||||||
// find how many buckets this value occupies
|
// find how many buckets this value occupies
|
||||||
while ((max_idx + 1 < get_width() ) &&
|
while ((max_idx + 1 < get_width() ) &&
|
||||||
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
|
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
|
||||||
max_idx++;
|
max_idx++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (max_idx > min_idx)
|
if (max_idx > min_idx)
|
||||||
{
|
{
|
||||||
// value spans multiple buckets
|
// value spans multiple buckets
|
||||||
double bucket_sel= 1.0/(get_width() + 1);
|
double bucket_sel= 1.0/(get_width() + 1);
|
||||||
sel= bucket_sel * (max_idx - min_idx + 1);
|
sel= bucket_sel * (max_idx - min_idx + 1);
|
||||||
} else
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
// the value fits within a single bucket
|
// the value fits within a single bucket
|
||||||
sel = MY_MIN(avg_sel, (1.0/get_width()));
|
sel = MY_MIN(avg_sel, 1.0/get_width());
|
||||||
}
|
}
|
||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@param field The table field histogram is for. We don't care about the
|
@param field The table field histogram is for. We don't care about the
|
||||||
field's current value, we only need its virtual functions to
|
field's current value, we only need its virtual functions to
|
||||||
perform various operations
|
perform various operations
|
||||||
|
|
||||||
@param min_endp, max_endp - this specifies the range.
|
@param min_endp Left endpoint, or NULL if there is none
|
||||||
|
@param max_endp Right endpoint, or NULL if there is none
|
||||||
*/
|
*/
|
||||||
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
|
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp)
|
key_range *max_endp)
|
||||||
{
|
{
|
||||||
double min = 0.0, max = 1.0;
|
double min, max;
|
||||||
double width = 1.0/(int)histogram_bounds.size();
|
double width= 1.0 / histogram_bounds.size();
|
||||||
if (min_endp)
|
|
||||||
|
if (min_endp && !(field->null_ptr && min_endp->key[0]))
|
||||||
{
|
{
|
||||||
double min_sel = 0.0;
|
bool exclusive_endp= (min_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
||||||
const uchar *min_key= min_endp->key;
|
const uchar *min_key= min_endp->key;
|
||||||
// GSOC-TODO: properly handle SQL NULLs.
|
|
||||||
// in this test patch, we just assume the values are not SQL NULLs.
|
|
||||||
if (field->real_maybe_null())
|
if (field->real_maybe_null())
|
||||||
min_key++;
|
min_key++;
|
||||||
|
|
||||||
int min_bucket_idx, max_bucket_idx;
|
// Find the leftmost bucket that contains the lookup value.
|
||||||
min_bucket_idx= find_bucket(field, min_key);
|
// (If the lookup value is to the left of all buckets, find bucket #0)
|
||||||
std::string min_bucket, max_bucket;
|
int idx= find_bucket(field, min_key, exclusive_endp);
|
||||||
|
double min_sel;
|
||||||
max_bucket_idx= min_bucket_idx + 1;
|
|
||||||
if (min_bucket_idx != -1)
|
|
||||||
{
|
{
|
||||||
min_bucket= histogram_bounds[min_bucket_idx];
|
std::string &left= histogram_bounds[idx];
|
||||||
max_bucket= (max_bucket_idx < (int) histogram_bounds.size())
|
std::string &right= histogram_bounds[idx+1];
|
||||||
? histogram_bounds[max_bucket_idx]
|
|
||||||
: "";
|
|
||||||
|
|
||||||
if (field->pos_through_val_str())
|
if (field->pos_through_val_str())
|
||||||
min_sel= pos_in_interval_through_strxfrm(
|
min_sel= pos_in_interval_through_strxfrm(
|
||||||
field, (uchar *) min_bucket.data(), (uchar *) max_bucket.data(),
|
field, (uchar*) left.data(), (uchar*) right.data(),
|
||||||
(uchar *) min_key);
|
(uchar*) min_key);
|
||||||
else
|
else
|
||||||
min_sel= pos_in_interval_through_val_real(
|
min_sel= pos_in_interval_through_val_real(
|
||||||
field, (uchar *) min_bucket.data(), (uchar *) max_bucket.data(),
|
field, (uchar *) left.data(), (uchar*) right.data(),
|
||||||
(uchar *) min_key);
|
(uchar*) min_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
min = min_bucket_idx * (width) + min_sel * (width);
|
min= idx*width + min_sel*width;
|
||||||
//fprintf(stderr, "min pos_in_interval =%g\n", min_sel);
|
|
||||||
//fprintf(stderr, "min =%g\n", min);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
min= 0.0;
|
||||||
|
|
||||||
if (max_endp)
|
if (max_endp)
|
||||||
{
|
{
|
||||||
double max_sel = 1.0;
|
// The right endpoint cannot be NULL
|
||||||
|
DBUG_ASSERT(!(field->null_ptr && max_endp->key[0]));
|
||||||
|
bool inclusive_endp= (max_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
||||||
const uchar *max_key= max_endp->key;
|
const uchar *max_key= max_endp->key;
|
||||||
if (field->real_maybe_null())
|
if (field->real_maybe_null())
|
||||||
max_key++;
|
max_key++;
|
||||||
|
|
||||||
int min_bucket_idx, max_bucket_idx;
|
int idx= find_bucket(field, max_key, inclusive_endp);
|
||||||
min_bucket_idx= find_bucket(field, max_key);
|
double max_sel;
|
||||||
std::string min_bucket, max_bucket;
|
|
||||||
|
|
||||||
max_bucket_idx= min_bucket_idx + 1;
|
|
||||||
if (min_bucket_idx != -1)
|
|
||||||
{
|
{
|
||||||
min_bucket= histogram_bounds[min_bucket_idx];
|
std::string &left= histogram_bounds[idx];
|
||||||
max_bucket= (max_bucket_idx < (int) histogram_bounds.size())
|
std::string &right= histogram_bounds[idx+1];
|
||||||
? histogram_bounds[max_bucket_idx]
|
|
||||||
: "";
|
|
||||||
|
|
||||||
if (field->pos_through_val_str())
|
if (field->pos_through_val_str())
|
||||||
max_sel= pos_in_interval_through_strxfrm(
|
max_sel= pos_in_interval_through_strxfrm(
|
||||||
field, (uchar *) min_bucket.data(), (uchar *) max_bucket.data(),
|
field, (uchar *) left.data(), (uchar *) right.data(),
|
||||||
(uchar *) max_key);
|
(uchar *) max_key);
|
||||||
else
|
else
|
||||||
max_sel= pos_in_interval_through_val_real(
|
max_sel= pos_in_interval_through_val_real(
|
||||||
field, (uchar *) min_bucket.data(), (uchar *) max_bucket.data(),
|
field, (uchar *) left.data(), (uchar *) right.data(),
|
||||||
(uchar *) max_key);
|
(uchar *) max_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
max = min_bucket_idx * (width) + max_sel * (width);
|
max= idx*width + max_sel*width;
|
||||||
//fprintf(stderr, "max pos_in_interval =%g\n", max_sel);
|
|
||||||
//fprintf(stderr, "max =%g\n", max);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
max= 1.0;
|
||||||
|
|
||||||
double sel = max - min;
|
double sel = max - min;
|
||||||
//fprintf(stderr, "final selection = %g\n", sel);
|
|
||||||
//fprintf(stderr, "Histogram_json::range_selectivity ends\n");
|
|
||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1644,34 +1616,33 @@ void Histogram_json::serialize(Field *field)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
|
/*
|
||||||
|
Find the histogram bucket that contains the value.
|
||||||
|
|
||||||
|
@param equal_is_less Controls what to do if a histogram bound is equal to the
|
||||||
|
lookup_val.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int Histogram_json::find_bucket(Field *field, const uchar *lookup_val,
|
||||||
|
bool equal_is_less)
|
||||||
{
|
{
|
||||||
int low = 0;
|
int low= 0;
|
||||||
int high = (int)histogram_bounds.size()-1;
|
int high= histogram_bounds.size() - 1;
|
||||||
int mid;
|
int middle;
|
||||||
int min_bucket_index = -1;
|
|
||||||
std::string mid_val; // GSOC-todo: don't copy strings
|
|
||||||
|
|
||||||
while(low <= high) {
|
while (low + 1 < high)
|
||||||
// c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up).
|
{
|
||||||
// it works but it doesn't feel so readable, maybe we could make improvements?
|
middle= (low + high) / 2;
|
||||||
int sum = (low+high);
|
int res= field->key_cmp((uchar*)histogram_bounds[middle].data(), lookup_val);
|
||||||
mid = sum/2 + (sum % 2 != 0);
|
if (!res)
|
||||||
|
res= equal_is_less? -1: 1;
|
||||||
mid_val = histogram_bounds[mid];
|
if (res < 0)
|
||||||
|
low= middle;
|
||||||
int res = field->key_cmp((uchar*) mid_val.data(), endpoint);
|
else //res > 0
|
||||||
if (res < 0) {
|
high= middle;
|
||||||
low = mid + 1;
|
|
||||||
min_bucket_index = mid;
|
|
||||||
} else if (res >= 0) {
|
|
||||||
high = mid - 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_bucket_index == -1)
|
return low;
|
||||||
min_bucket_index = high;
|
|
||||||
return min_bucket_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2114,14 +2085,14 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
Histogram_base *create_histogram(Histogram_type hist_type)
|
Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type)
|
||||||
{
|
{
|
||||||
switch (hist_type) {
|
switch (hist_type) {
|
||||||
case SINGLE_PREC_HB:
|
case SINGLE_PREC_HB:
|
||||||
case DOUBLE_PREC_HB:
|
case DOUBLE_PREC_HB:
|
||||||
return new Histogram_binary();
|
return new (mem_root) Histogram_binary();
|
||||||
case JSON_HB:
|
case JSON_HB:
|
||||||
return new Histogram_json();
|
return new (mem_root) Histogram_json();
|
||||||
default:
|
default:
|
||||||
DBUG_ASSERT(0);
|
DBUG_ASSERT(0);
|
||||||
}
|
}
|
||||||
@ -2963,7 +2934,7 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
|
|||||||
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
|
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
|
||||||
{
|
{
|
||||||
have_histogram= true;
|
have_histogram= true;
|
||||||
histogram_= create_histogram(hist_type);
|
histogram_= create_histogram(mem_root, hist_type);
|
||||||
histogram_->init_for_collection(mem_root, hist_type, hist_size);
|
histogram_->init_for_collection(mem_root, hist_type, hist_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4530,9 +4501,10 @@ double Histogram_binary::point_selectivity(Field *field, key_range *min_endp, do
|
|||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double Histogram_binary::range_selectivity(Field *field,
|
double Histogram_binary::range_selectivity(Field *field,
|
||||||
key_range *min_endp,
|
key_range *min_endp,
|
||||||
key_range *max_endp)
|
key_range *max_endp)
|
||||||
{
|
{
|
||||||
double sel, min_mp_pos, max_mp_pos;
|
double sel, min_mp_pos, max_mp_pos;
|
||||||
Column_statistics *col_stats= field->read_stats;
|
Column_statistics *col_stats= field->read_stats;
|
||||||
@ -4561,13 +4533,6 @@ double Histogram_binary::range_selectivity(Field *field,
|
|||||||
uint max= find_bucket(max_mp_pos, FALSE);
|
uint max= find_bucket(max_mp_pos, FALSE);
|
||||||
sel= bucket_sel * (max - min + 1);
|
sel= bucket_sel * (max - min + 1);
|
||||||
|
|
||||||
/*fprintf(stderr, "bucket_sel =%g\n", bucket_sel);
|
|
||||||
fprintf(stderr, "min pos_in_interval =%g\n", min_mp_pos);
|
|
||||||
fprintf(stderr, "max pos_in_interval =%g\n", max_mp_pos);
|
|
||||||
fprintf(stderr, "min =%d\n", min);
|
|
||||||
fprintf(stderr, "max =%d\n", max);*/
|
|
||||||
/*fprintf(stderr, "final sel =%g\n", sel);
|
|
||||||
fprintf(stderr, "Histogram_binary::range_selectivity ends\n");*/
|
|
||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -400,10 +400,8 @@ public:
|
|||||||
double avg_selection) override;
|
double avg_selection) override;
|
||||||
double range_selectivity(Field *field, key_range *min_endp,
|
double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp) override;
|
key_range *max_endp) override;
|
||||||
/*
|
private:
|
||||||
* Returns the index of the biggest histogram value that is smaller than endpoint
|
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
|
||||||
*/
|
|
||||||
int find_bucket(Field *field, const uchar *endpoint);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Columns_statistics;
|
class Columns_statistics;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user