Added comments.
Renamed the virtual method middle_point_pos for the class Field to pos_in_interval.
This commit is contained in:
parent
f4cd2b37b1
commit
b12b3cae85
72
sql/field.cc
72
sql/field.cc
@ -1274,7 +1274,24 @@ out_of_range:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double Field_num::middle_point_pos(Field *min, Field *max)
|
/**
|
||||||
|
@brief
|
||||||
|
Determine the relative position of the field value in a numeric interval
|
||||||
|
|
||||||
|
@details
|
||||||
|
The function returns a double number between 0.0 and 1.0 as the relative
|
||||||
|
position of the value of the this field in the numeric interval of [min,max].
|
||||||
|
If the value is not in the interval the the function returns 0.0 when
|
||||||
|
the value is less than min, and, 1.0 when the value is greater than max.
|
||||||
|
|
||||||
|
@param min value of the left end of the interval
|
||||||
|
@param max value of the right end of the interval
|
||||||
|
|
||||||
|
@return
|
||||||
|
relative position of the field value in the numeric interval [min,max]
|
||||||
|
*/
|
||||||
|
|
||||||
|
double Field_num::pos_in_interval(Field *min, Field *max)
|
||||||
{
|
{
|
||||||
double n, d;
|
double n, d;
|
||||||
n= val_real() - min->val_real();
|
n= val_real() - min->val_real();
|
||||||
@ -6196,7 +6213,39 @@ inline ulonglong char_prefix_to_ulonglong(uchar *src)
|
|||||||
return uint8korr(src);
|
return uint8korr(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
double Field_str::middle_point_pos(Field *min, Field *max)
|
/**
|
||||||
|
@brief
|
||||||
|
Determine the relative position of the field value in a string interval
|
||||||
|
|
||||||
|
@details
|
||||||
|
The function returns a double number between 0.0 and 1.0 as the relative
|
||||||
|
position of the value of the this field in the string interval of [min,max].
|
||||||
|
If the value is not in the interval the the function returns 0.0 when
|
||||||
|
the value is less than min, and, 1.0 when the value is greater than max.
|
||||||
|
|
||||||
|
@note
|
||||||
|
To calculate the relative position of the string value v in the interval
|
||||||
|
[min, max] the function first converts the beginning of these three
|
||||||
|
strings v, min, max into the strings that are used for byte comparison.
|
||||||
|
For each string not more sizeof(ulonglong) first bytes are taken
|
||||||
|
from the result of conversion. Then these bytes are interpreted as the
|
||||||
|
big-endian representation of an ulonglong integer. The values of these
|
||||||
|
integer numbers obtained for the strings v, min, max are used to calculate
|
||||||
|
the position of v in [min,max] in the same way is it's done for numeric
|
||||||
|
fields (see Field_num::pos_in_interval).
|
||||||
|
|
||||||
|
@todo
|
||||||
|
Improve the procedure for the case when min and max have the same
|
||||||
|
beginning
|
||||||
|
|
||||||
|
@param min value of the left end of the interval
|
||||||
|
@param max value of the right end of the interval
|
||||||
|
|
||||||
|
@return
|
||||||
|
relative position of the field value in the string interval [min,max]
|
||||||
|
*/
|
||||||
|
|
||||||
|
double Field_str::pos_in_interval(Field *min, Field *max)
|
||||||
{
|
{
|
||||||
uchar mp_prefix[sizeof(ulonglong)];
|
uchar mp_prefix[sizeof(ulonglong)];
|
||||||
uchar minp_prefix[sizeof(ulonglong)];
|
uchar minp_prefix[sizeof(ulonglong)];
|
||||||
@ -8435,7 +8484,24 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double Field_bit::middle_point_pos(Field *min, Field *max)
|
/**
|
||||||
|
@brief
|
||||||
|
Determine the relative position of the field value in a bit interval
|
||||||
|
|
||||||
|
@details
|
||||||
|
The function returns a double number between 0.0 and 1.0 as the relative
|
||||||
|
position of the value of the this field in the bit interval of [min,max].
|
||||||
|
If the value is not in the interval the the function returns 0.0 when
|
||||||
|
the value is less than min, and, 1.0 when the value is greater than max.
|
||||||
|
|
||||||
|
@param min value of the left end of the interval
|
||||||
|
@param max value of the right end of the interval
|
||||||
|
|
||||||
|
@return
|
||||||
|
relative position of the field value in the bit interval [min,max]
|
||||||
|
*/
|
||||||
|
|
||||||
|
double Field_bit::pos_in_interval(Field *min, Field *max)
|
||||||
{
|
{
|
||||||
double n, d;
|
double n, d;
|
||||||
n= val_real() - min->val_real();
|
n= val_real() - min->val_real();
|
||||||
|
11
sql/field.h
11
sql/field.h
@ -723,9 +723,10 @@ public:
|
|||||||
virtual bool hash_join_is_possible() { return TRUE; }
|
virtual bool hash_join_is_possible() { return TRUE; }
|
||||||
virtual bool eq_cmp_as_binary() { return TRUE; }
|
virtual bool eq_cmp_as_binary() { return TRUE; }
|
||||||
|
|
||||||
virtual double middle_point_pos(Field *min, Field *max)
|
/* Position of the field value within the interval of [min, max] */
|
||||||
|
virtual double pos_in_interval(Field *min, Field *max)
|
||||||
{
|
{
|
||||||
return (double) 1.0;
|
return (double) 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend int cre_myisam(char * name, register TABLE *form, uint options,
|
friend int cre_myisam(char * name, register TABLE *form, uint options,
|
||||||
@ -846,7 +847,7 @@ public:
|
|||||||
bool get_int(CHARSET_INFO *cs, const char *from, uint len,
|
bool get_int(CHARSET_INFO *cs, const char *from, uint len,
|
||||||
longlong *rnd, ulonglong unsigned_max,
|
longlong *rnd, ulonglong unsigned_max,
|
||||||
longlong signed_min, longlong signed_max);
|
longlong signed_min, longlong signed_max);
|
||||||
double middle_point_pos(Field *min, Field *max);
|
double pos_in_interval(Field *min, Field *max);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -893,7 +894,7 @@ public:
|
|||||||
uint is_equal(Create_field *new_field);
|
uint is_equal(Create_field *new_field);
|
||||||
bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
|
bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
|
||||||
virtual uint length_size() { return 0; }
|
virtual uint length_size() { return 0; }
|
||||||
double middle_point_pos(Field *min, Field *max);
|
double pos_in_interval(Field *min, Field *max);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* base class for Field_string, Field_varstring and Field_blob */
|
/* base class for Field_string, Field_varstring and Field_blob */
|
||||||
@ -2308,7 +2309,7 @@ public:
|
|||||||
{
|
{
|
||||||
store(*((longlong *)val), TRUE);
|
store(*((longlong *)val), TRUE);
|
||||||
}
|
}
|
||||||
double middle_point_pos(Field *min, Field *max);
|
double pos_in_interval(Field *min, Field *max);
|
||||||
void get_image(uchar *buff, uint length, CHARSET_INFO *cs)
|
void get_image(uchar *buff, uint length, CHARSET_INFO *cs)
|
||||||
{ get_key_image(buff, length, itRAW); }
|
{ get_key_image(buff, length, itRAW); }
|
||||||
void set_image(const uchar *buff,uint length, CHARSET_INFO *cs)
|
void set_image(const uchar *buff,uint length, CHARSET_INFO *cs)
|
||||||
|
@ -3222,6 +3222,26 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
|
|||||||
* Condition selectivity module
|
* Condition selectivity module
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Build descriptors of pseudo-indexes over columns to perform range analysis
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
create_key_parts_for_pseudo_indexes()
|
||||||
|
param IN/OUT data structure for the descriptors to be built
|
||||||
|
used_fields bitmap of columns for which the descriptors are to be built
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
For each column marked in the bitmap used_fields the function builds
|
||||||
|
a descriptor of a single-component pseudo-index over this column that
|
||||||
|
can be used for the range analysis of the predicates over this columns.
|
||||||
|
The descriptors are created in the memory of param->mem_root.
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
FALSE in the case of success
|
||||||
|
TRUE otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
static
|
static
|
||||||
bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
|
bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
|
||||||
MY_BITMAP *used_fields)
|
MY_BITMAP *used_fields)
|
||||||
@ -3275,6 +3295,31 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Estimate the number of rows in all ranges built for a column
|
||||||
|
by the range optimizer
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
records_in_column_ranges()
|
||||||
|
param the data structure to access descriptors of pseudo indexes
|
||||||
|
built over columns used in the condition of the processed query
|
||||||
|
idx the index of the descriptor of interest in param
|
||||||
|
tree the tree representing ranges built for the interesting column
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
This function retrieves the ranges represented by the SEL_ARG 'tree' and
|
||||||
|
for each of them r it calls the function get_column_range_cardinality()
|
||||||
|
that estimates the number of expected rows in r. It is assumed that param
|
||||||
|
is the data structure containing the descriptors of pseudo-indexes that
|
||||||
|
has been built to perform range analysis of the range conditions imposed
|
||||||
|
on the columns used in the processed query, while idx is the index of the
|
||||||
|
descriptor created in 'param' exactly for the column for which 'tree'
|
||||||
|
has been built by the range optimizer.
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
the number of rows in the retrieved ranges
|
||||||
|
*/
|
||||||
|
|
||||||
static
|
static
|
||||||
double records_in_column_ranges(PARAM *param, uint idx,
|
double records_in_column_ranges(PARAM *param, uint idx,
|
||||||
SEL_ARG *tree)
|
SEL_ARG *tree)
|
||||||
@ -3322,6 +3367,29 @@ double records_in_column_ranges(PARAM *param, uint idx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Calculate the selectivity of the condition imposed on the rows of a table
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
calculate_cond_selectivity_for_table()
|
||||||
|
thd the context handle
|
||||||
|
table the table of interest
|
||||||
|
cond conditions imposed on the rows of the table
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
This function calculates the selectivity of range conditions cond imposed
|
||||||
|
on the rows of 'table' in the processed query.
|
||||||
|
The calculated selectivity is assigned to the field table->cond_selectivity.
|
||||||
|
|
||||||
|
NOTE
|
||||||
|
Currently the selectivities of range conditions over different columns are
|
||||||
|
considered independent.
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
FALSE on success
|
||||||
|
TRUE otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
|
bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
|
||||||
{
|
{
|
||||||
uint keynr;
|
uint keynr;
|
||||||
@ -3338,6 +3406,11 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
|
|||||||
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
|
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
|
||||||
!bitmap_is_clear_all(used_fields))
|
!bitmap_is_clear_all(used_fields))
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
Calculate the selectivity of the range conditions not supported
|
||||||
|
by any index
|
||||||
|
*/
|
||||||
|
|
||||||
PARAM param;
|
PARAM param;
|
||||||
MEM_ROOT alloc;
|
MEM_ROOT alloc;
|
||||||
SEL_TREE *tree;
|
SEL_TREE *tree;
|
||||||
|
@ -292,7 +292,8 @@ typedef struct st_join_table {
|
|||||||
/* psergey-todo: make the below have type double, like POSITION::records_read? */
|
/* psergey-todo: make the below have type double, like POSITION::records_read? */
|
||||||
ha_rows records_read;
|
ha_rows records_read;
|
||||||
|
|
||||||
double cond_selectivity;
|
/* The selectivity of the conditions that can be pushed to the table */
|
||||||
|
double cond_selectivity;
|
||||||
|
|
||||||
/* Startup cost for execution */
|
/* Startup cost for execution */
|
||||||
double startup_cost;
|
double startup_cost;
|
||||||
@ -774,7 +775,8 @@ typedef struct st_position :public Sql_alloc
|
|||||||
*/
|
*/
|
||||||
double records_read;
|
double records_read;
|
||||||
|
|
||||||
double cond_selectivity;
|
/* The selectivity of the pushed down conditions */
|
||||||
|
double cond_selectivity;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Cost accessing the table in course of the entire complete join execution,
|
Cost accessing the table in course of the entire complete join execution,
|
||||||
|
@ -250,12 +250,13 @@ public:
|
|||||||
and column_name with column_name taken out of the only parameter f of the
|
and column_name with column_name taken out of the only parameter f of the
|
||||||
Field* type passed to this method. After this get_stat_values looks
|
Field* type passed to this method. After this get_stat_values looks
|
||||||
for a row by the set key value. If the row is found the values of statistical
|
for a row by the set key value. If the row is found the values of statistical
|
||||||
data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency
|
data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||||
are read into internal structures. Values of nulls_ratio, avg_length,
|
hist_size, hist_type, histogram are read into internal structures. Values
|
||||||
avg_frequency are read into the corresponding fields of the read_stat
|
of nulls_ratio, avg_length, avg_frequency, hist_size, hist_type, histogram
|
||||||
structure from the Field object f, while values from min_value and max_value
|
are read into the corresponding fields of the read_stat structure from
|
||||||
are copied into the min_value and max_value record buffers attached to the
|
the Field object f, while values from min_value and max_value are copied
|
||||||
TABLE structure for table t.
|
into the min_value and max_value record buffers attached to the TABLE
|
||||||
|
structure for table t.
|
||||||
If the value of a statistical column in the found row is null, then the
|
If the value of a statistical column in the found row is null, then the
|
||||||
corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off.
|
corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off.
|
||||||
Otherwise the flag is set on. If no row is found for the column the all flags
|
Otherwise the flag is set on. If no row is found for the column the all flags
|
||||||
@ -868,11 +869,12 @@ public:
|
|||||||
|
|
||||||
@details
|
@details
|
||||||
This implementation of a purely virtual method sets the value of the
|
This implementation of a purely virtual method sets the value of the
|
||||||
columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length' and
|
columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length',
|
||||||
'avg_frequency' of the stistical table columns_stat according to the
|
'avg_frequency', 'hist_size', 'hist_type' and 'histogram' of the
|
||||||
contents of the bitmap write_stat.column_stat_nulls and the values
|
stistical table columns_stat according to the contents of the bitmap
|
||||||
of the fields min_value, max_value, nulls_ratio, avg_length and
|
write_stat.column_stat_nulls and the values of the fields min_value,
|
||||||
avg_frequency of the structure write_stat from the Field structure
|
max_value, nulls_ratio, avg_length, avg_frequency, hist_size, hist_type
|
||||||
|
and histogram of the structure write_stat from the Field structure
|
||||||
for the field 'table_field'.
|
for the field 'table_field'.
|
||||||
The value of the k-th column in the table columns_stat is set to NULL
|
The value of the k-th column in the table columns_stat is set to NULL
|
||||||
if the k-th bit in the bitmap 'column_stat_nulls' is set to 1.
|
if the k-th bit in the bitmap 'column_stat_nulls' is set to 1.
|
||||||
@ -951,14 +953,15 @@ public:
|
|||||||
|
|
||||||
@details
|
@details
|
||||||
This implementation of a purely virtual method first looks for a record
|
This implementation of a purely virtual method first looks for a record
|
||||||
the statistical table column_stats by its primary key set the record
|
in the statistical table column_stats by its primary key set in the record
|
||||||
buffer with the help of Column_stat::set_key_fields. Then, if the row is
|
buffer with the help of Column_stat::set_key_fields. Then, if the row is
|
||||||
found, the function reads the values of the columns 'min_value',
|
found, the function reads the values of the columns 'min_value',
|
||||||
'max_value', 'nulls_ratio', 'avg_length' and 'avg_frequency' of the
|
'max_value', 'nulls_ratio', 'avg_length', 'avg_frequency', 'hist_size' and
|
||||||
table column_stat and sets accordingly the value of the bitmap
|
'hist_type" of the table column_stat and sets accordingly the value of
|
||||||
read_stat.column_stat_nulls' and the values of the fields min_value,
|
the bitmap read_stat.column_stat_nulls' and the values of the fields
|
||||||
max_value, nulls_ratio, avg_length and avg_frequency of the structure
|
min_value, max_value, nulls_ratio, avg_length, avg_frequency, hist_size and
|
||||||
read_stat from the Field structure for the field 'table_field'.
|
hist_type of the structure read_stat from the Field structure for the field
|
||||||
|
'table_field'.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void get_stat_values()
|
void get_stat_values()
|
||||||
@ -1022,6 +1025,22 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief
|
||||||
|
Read histogram from of column_stats
|
||||||
|
|
||||||
|
@details
|
||||||
|
This method first looks for a record in the statistical table column_stats
|
||||||
|
by its primary key set the record buffer with the help of
|
||||||
|
Column_stat::set_key_fields. Then, if the row is found, the function reads
|
||||||
|
the value of the column 'histogram' of the table column_stat and sets
|
||||||
|
accordingly the corresponding bit in the bitmap read_stat.column_stat_nulls.
|
||||||
|
The method assumes that the value of histogram size and the pointer to
|
||||||
|
the histogram location has been already set in the fields size and values
|
||||||
|
of read_stats->histogram.
|
||||||
|
*/
|
||||||
|
|
||||||
void get_histogram_value()
|
void get_histogram_value()
|
||||||
{
|
{
|
||||||
if (find_stat())
|
if (find_stat())
|
||||||
@ -1238,20 +1257,24 @@ public:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
Histogram_builder is a helper class that is used to build histograms
|
||||||
|
for columns
|
||||||
|
*/
|
||||||
|
|
||||||
class Histogram_builder
|
class Histogram_builder
|
||||||
{
|
{
|
||||||
Field *column;
|
Field *column; /* table field for which the histogram is built */
|
||||||
uint col_length;
|
uint col_length; /* size of this field */
|
||||||
ha_rows records;
|
ha_rows records; /* number of records the histogram is built for */
|
||||||
Field *min_value;
|
Field *min_value; /* pointer to the minimal value for the field */
|
||||||
Field *max_value;
|
Field *max_value; /* pointer to the maximal value for the field */
|
||||||
Histogram *histogram;
|
Histogram *histogram; /* the histogram location */
|
||||||
uint hist_width;
|
uint hist_width; /* the number of points in the histogram */
|
||||||
double bucket_capacity;
|
double bucket_capacity; /* number of rows in a bucket of the histogram */
|
||||||
uint curr_bucket;
|
uint curr_bucket; /* number of the current bucket to be built */
|
||||||
ulonglong count;
|
ulonglong count; /* number of values retrieved */
|
||||||
ulonglong count_distinct;
|
ulonglong count_distinct; /* number of distinct values retrieved */
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Histogram_builder(Field *col, uint col_len, ha_rows rows)
|
Histogram_builder(Field *col, uint col_len, ha_rows rows)
|
||||||
@ -1280,7 +1303,7 @@ public:
|
|||||||
{
|
{
|
||||||
column->store_field_value((uchar *) elem, col_length);
|
column->store_field_value((uchar *) elem, col_length);
|
||||||
histogram->set_value(curr_bucket,
|
histogram->set_value(curr_bucket,
|
||||||
column->middle_point_pos(min_value, max_value));
|
column->pos_in_interval(min_value, max_value));
|
||||||
curr_bucket++;
|
curr_bucket++;
|
||||||
while (curr_bucket != hist_width &&
|
while (curr_bucket != hist_width &&
|
||||||
count > bucket_capacity * (curr_bucket + 1))
|
count > bucket_capacity * (curr_bucket + 1))
|
||||||
@ -1389,6 +1412,10 @@ public:
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Build the histogram for the elements accumulated in the container of 'tree'
|
||||||
|
*/
|
||||||
ulonglong get_value_with_histogram(ha_rows rows)
|
ulonglong get_value_with_histogram(ha_rows rows)
|
||||||
{
|
{
|
||||||
Histogram_builder hist_builder(table_field, tree_key_length, rows);
|
Histogram_builder hist_builder(table_field, tree_key_length, rows);
|
||||||
@ -1396,11 +1423,19 @@ public:
|
|||||||
return hist_builder.get_count_distinct();
|
return hist_builder.get_count_distinct();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Get the size of the histogram in bytes built for table_field
|
||||||
|
*/
|
||||||
uint get_hist_size()
|
uint get_hist_size()
|
||||||
{
|
{
|
||||||
return table_field->collected_stats->histogram.get_size();
|
return table_field->collected_stats->histogram.get_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Get the pointer to the histogram built for table_field
|
||||||
|
*/
|
||||||
uchar *get_histogram()
|
uchar *get_histogram()
|
||||||
{
|
{
|
||||||
return table_field->collected_stats->histogram.get_values();
|
return table_field->collected_stats->histogram.get_values();
|
||||||
@ -1936,7 +1971,6 @@ inline bool statistics_for_command_is_needed(THD *thd)
|
|||||||
@note
|
@note
|
||||||
Currently the function always is called with the parameter is_safe set
|
Currently the function always is called with the parameter is_safe set
|
||||||
to FALSE.
|
to FALSE.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
|
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
|
||||||
@ -2051,6 +2085,36 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
|
|||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief
|
||||||
|
Allocate memory for the histogram used by a table share
|
||||||
|
|
||||||
|
@param
|
||||||
|
thd Thread handler
|
||||||
|
@param
|
||||||
|
table_share Table share for which the memory for histogram data is allocated
|
||||||
|
@param
|
||||||
|
is_safe TRUE <-> at any time only one thread can perform the function
|
||||||
|
|
||||||
|
@note
|
||||||
|
The function allocates the memory for the histogram built for a table in the
|
||||||
|
table's share memory with the intention to read the data there from the
|
||||||
|
system persistent statistical table mysql.column_stats,
|
||||||
|
The memory is allocated in the table_share's mem_root.
|
||||||
|
If the parameter is_safe is TRUE then it is guaranteed that at any given time
|
||||||
|
only one thread is executed the code of the function.
|
||||||
|
|
||||||
|
@retval
|
||||||
|
0 If the memory for all statistical data has been successfully allocated
|
||||||
|
@retval
|
||||||
|
1 Otherwise
|
||||||
|
|
||||||
|
@note
|
||||||
|
Currently the function always is called with the parameter is_safe set
|
||||||
|
to FALSE.
|
||||||
|
*/
|
||||||
|
|
||||||
static
|
static
|
||||||
int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share,
|
int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share,
|
||||||
bool is_safe)
|
bool is_safe)
|
||||||
@ -2781,6 +2845,38 @@ bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief
|
||||||
|
Read histogram for a table from the persistent statistical tables
|
||||||
|
|
||||||
|
@param
|
||||||
|
thd The thread handle
|
||||||
|
@param
|
||||||
|
table The table to read histograms for
|
||||||
|
@param
|
||||||
|
stat_tables The array of TABLE_LIST objects for statistical tables
|
||||||
|
|
||||||
|
@details
|
||||||
|
For the statistical table columns_stats the function looks for the rows
|
||||||
|
from this table that contain statistical data on 'table'. If such rows
|
||||||
|
are found the histograms from them are read into the memory allocated
|
||||||
|
for histograms of 'table'. Later at the query processing these histogram
|
||||||
|
are supposed to be used by the optimizer.
|
||||||
|
The parameter stat_tables should point to an array of TABLE_LIST
|
||||||
|
objects for all statistical tables linked into a list. All statistical
|
||||||
|
tables are supposed to be opened.
|
||||||
|
The function is called by read_statistics_for_tables_if_needed().
|
||||||
|
|
||||||
|
@retval
|
||||||
|
0 If data has been successfully read for the table
|
||||||
|
@retval
|
||||||
|
1 Otherwise
|
||||||
|
|
||||||
|
@note
|
||||||
|
Objects of the helper Column_stat are employed read histogram
|
||||||
|
from the statistical table column_stats now.
|
||||||
|
*/
|
||||||
|
|
||||||
static
|
static
|
||||||
int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
|
int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
|
||||||
{
|
{
|
||||||
@ -3311,6 +3407,17 @@ void set_statistics_for_table(THD *thd, TABLE *table)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief
|
||||||
|
Get the average frequency for a column
|
||||||
|
|
||||||
|
@param
|
||||||
|
field The column whose average frequency is required
|
||||||
|
|
||||||
|
@retval
|
||||||
|
The required average frequency
|
||||||
|
*/
|
||||||
|
|
||||||
double get_column_avg_frequency(Field * field)
|
double get_column_avg_frequency(Field * field)
|
||||||
{
|
{
|
||||||
double res;
|
double res;
|
||||||
@ -3337,6 +3444,27 @@ double get_column_avg_frequency(Field * field)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief
|
||||||
|
Estimate the number of rows in a column range using data from stat tables
|
||||||
|
|
||||||
|
@param
|
||||||
|
field The column whose range cardinality is to be estimated
|
||||||
|
@param
|
||||||
|
min_endp The left end of the range whose cardinality is required
|
||||||
|
@param
|
||||||
|
max_endp The right end of the range whose cardinality is required
|
||||||
|
@param
|
||||||
|
range_flag The range flags
|
||||||
|
|
||||||
|
@details
|
||||||
|
The function gets an estimate of the number of rows in a column range
|
||||||
|
using the statistical data from the table column_stats.
|
||||||
|
|
||||||
|
@retval
|
||||||
|
The required estimate of the rows in the column range
|
||||||
|
*/
|
||||||
|
|
||||||
double get_column_range_cardinality(Field *field,
|
double get_column_range_cardinality(Field *field,
|
||||||
key_range *min_endp,
|
key_range *min_endp,
|
||||||
key_range *max_endp,
|
key_range *max_endp,
|
||||||
@ -3377,8 +3505,8 @@ double get_column_range_cardinality(Field *field,
|
|||||||
Histogram *hist= &col_stats->histogram;
|
Histogram *hist= &col_stats->histogram;
|
||||||
if (hist->is_available())
|
if (hist->is_available())
|
||||||
{
|
{
|
||||||
double pos= field->middle_point_pos(col_stats->min_value,
|
double pos= field->pos_in_interval(col_stats->min_value,
|
||||||
col_stats->max_value);
|
col_stats->max_value);
|
||||||
res= col_non_nulls *
|
res= col_non_nulls *
|
||||||
hist->point_selectivity(pos,
|
hist->point_selectivity(pos,
|
||||||
avg_frequency / col_non_nulls);
|
avg_frequency / col_non_nulls);
|
||||||
@ -3396,8 +3524,8 @@ double get_column_range_cardinality(Field *field,
|
|||||||
{
|
{
|
||||||
store_key_image_to_rec(field, (uchar *) min_endp->key,
|
store_key_image_to_rec(field, (uchar *) min_endp->key,
|
||||||
min_endp->length);
|
min_endp->length);
|
||||||
min_mp_pos= field->middle_point_pos(col_stats->min_value,
|
min_mp_pos= field->pos_in_interval(col_stats->min_value,
|
||||||
col_stats->max_value);
|
col_stats->max_value);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
min_mp_pos= 0.0;
|
min_mp_pos= 0.0;
|
||||||
@ -3405,8 +3533,8 @@ double get_column_range_cardinality(Field *field,
|
|||||||
{
|
{
|
||||||
store_key_image_to_rec(field, (uchar *) max_endp->key,
|
store_key_image_to_rec(field, (uchar *) max_endp->key,
|
||||||
max_endp->length);
|
max_endp->length);
|
||||||
max_mp_pos= field->middle_point_pos(col_stats->min_value,
|
max_mp_pos= field->pos_in_interval(col_stats->min_value,
|
||||||
col_stats->max_value);
|
col_stats->max_value);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
max_mp_pos= 1.0;
|
max_mp_pos= 1.0;
|
||||||
|
@ -271,8 +271,8 @@ public:
|
|||||||
Column_statistics *column_stats; /* Array of statistical data for columns */
|
Column_statistics *column_stats; /* Array of statistical data for columns */
|
||||||
Index_statistics *index_stats; /* Array of statistical data for indexes */
|
Index_statistics *index_stats; /* Array of statistical data for indexes */
|
||||||
ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
|
ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
|
||||||
ulong total_hist_size;
|
ulong total_hist_size; /* Total size of all histograms */
|
||||||
uchar *histograms; /* Sequence of histograms */
|
uchar *histograms; /* Sequence of histograms */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user