Addressed all remaining issues from the review of the patch
that introduced engine independent persistent statistics. In particular: - added an enumeration type for possible values of the system variable use_stat_tables - renamed KEY::real_rec_per_key to KEY::actual_rec_per_key - optimized the collection of statistical data for any primary key defined only on one column.
This commit is contained in:
parent
65820439bd
commit
a06224bd15
@ -1201,7 +1201,7 @@ bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
|
||||
uint parts= my_count_bits(key_tuple_map);
|
||||
ulong rpc;
|
||||
ulonglong rowids_size= rowid_buf_elem_size;
|
||||
if ((rpc= key_info->real_rec_per_key(parts - 1)))
|
||||
if ((rpc= key_info->actual_rec_per_key(parts - 1)))
|
||||
rowids_size= rowid_buf_elem_size * rpc;
|
||||
|
||||
double fraction_for_rowids=
|
||||
|
@ -5502,8 +5502,8 @@ ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
|
||||
ha_rows ext_records= ext_index_scan->records;
|
||||
if (i < used_key_parts)
|
||||
{
|
||||
ulong f1= key_info->real_rec_per_key(i-1);
|
||||
ulong f2= key_info->real_rec_per_key(i);
|
||||
ulong f1= key_info->actual_rec_per_key(i-1);
|
||||
ulong f2= key_info->actual_rec_per_key(i);
|
||||
ext_records= (ha_rows) ((double) ext_records / f2 * f1);
|
||||
}
|
||||
if (ext_records < table_cardinality)
|
||||
@ -12642,7 +12642,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
|
||||
num_blocks= (uint)(table_records / keys_per_block) + 1;
|
||||
|
||||
/* Compute the number of keys in a group. */
|
||||
keys_per_group= index_info->real_rec_per_key(group_key_parts - 1);
|
||||
keys_per_group= index_info->actual_rec_per_key(group_key_parts - 1);
|
||||
if (keys_per_group == 0) /* If there is no statistics try to guess */
|
||||
/* each group contains 10% of all records */
|
||||
keys_per_group= (uint)(table_records / 10) + 1;
|
||||
@ -12662,7 +12662,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
|
||||
Compute the probability that two ends of a subgroup are inside
|
||||
different blocks.
|
||||
*/
|
||||
keys_per_subgroup= index_info->real_rec_per_key(used_key_parts - 1);
|
||||
keys_per_subgroup= index_info->actual_rec_per_key(used_key_parts - 1);
|
||||
if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
|
||||
p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
|
||||
else
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "sql_parse.h" // check_table_access
|
||||
#include "strfunc.h"
|
||||
#include "sql_admin.h"
|
||||
#include "sql_statistics.h"
|
||||
|
||||
/* Prepare, run and cleanup for mysql_recreate_table() */
|
||||
|
||||
@ -718,7 +719,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
|
||||
if (compl_result_code == HA_ADMIN_OK &&
|
||||
operator_func == &handler::ha_analyze &&
|
||||
table->table->s->table_category == TABLE_CATEGORY_USER &&
|
||||
(thd->variables.use_stat_tables > 0 ||
|
||||
(get_use_stat_tables_mode(thd) > NEVER ||
|
||||
lex->with_persistent_for_clause))
|
||||
{
|
||||
if (!(compl_result_code=
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include "sql_trigger.h"
|
||||
#include "transaction.h"
|
||||
#include "sql_prepare.h"
|
||||
#include "sql_statistics.h"
|
||||
#include <m_ctype.h>
|
||||
#include <my_dir.h>
|
||||
#include <hash.h>
|
||||
@ -3142,7 +3143,7 @@ retry_share:
|
||||
while (table_cache_count > table_cache_size && unused_tables)
|
||||
free_cache_entry(unused_tables);
|
||||
|
||||
if (thd->variables.use_stat_tables > 0)
|
||||
if (get_use_stat_tables_mode(thd) > NEVER)
|
||||
{
|
||||
if (share->table_category != TABLE_CATEGORY_SYSTEM)
|
||||
{
|
||||
@ -4634,7 +4635,7 @@ open_and_process_table(THD *thd, LEX *lex, TABLE_LIST *tables,
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (thd->variables.use_stat_tables > 0 && tables->table)
|
||||
if (get_use_stat_tables_mode(thd) > NEVER && tables->table)
|
||||
{
|
||||
TABLE_SHARE *table_share= tables->table->s;
|
||||
if (table_share && table_share->table_category != TABLE_CATEGORY_SYSTEM)
|
||||
|
@ -310,22 +310,6 @@ int dynamic_column_error_message(enum_dyncol_func_result rc);
|
||||
/* open_and_lock_tables with optional derived handling */
|
||||
int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived);
|
||||
|
||||
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
|
||||
int collect_statistics_for_table(THD *thd, TABLE *table);
|
||||
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
|
||||
bool is_safe);
|
||||
int alloc_statistics_for_table(THD *thd, TABLE *table);
|
||||
int update_statistics_for_table(THD *thd, TABLE *table);
|
||||
int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
|
||||
int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
|
||||
int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
|
||||
bool ext_prefixes_only);
|
||||
int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
|
||||
LEX_STRING *new_db, LEX_STRING *new_tab);
|
||||
int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
|
||||
const char *new_name);
|
||||
void set_statistics_for_table(THD *thd, TABLE *table);
|
||||
|
||||
extern "C" int simple_raw_key_cmp(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
extern "C" int count_distinct_walk(void *elem, element_count count, void *arg);
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "sp.h"
|
||||
#include "events.h"
|
||||
#include "sql_handler.h"
|
||||
#include "sql_statistics.h"
|
||||
#include <my_dir.h>
|
||||
#include <m_ctype.h>
|
||||
#include "log.h"
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "sql_select.h"
|
||||
#include "sp_head.h"
|
||||
#include "sql_trigger.h"
|
||||
#include "sql_statistics.h"
|
||||
#include "transaction.h"
|
||||
#include "records.h" // init_read_record,
|
||||
#include "sql_derived.h" // mysql_handle_list_of_derived
|
||||
|
@ -3812,7 +3812,8 @@ uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
|
||||
uint incr= 0;
|
||||
TABLE_REF *ref= &join_tab->ref;
|
||||
TABLE *tab= join_tab->table;
|
||||
uint rec_per_key= tab->key_info[ref->key].real_rec_per_key(ref->key_parts-1);
|
||||
uint rec_per_key=
|
||||
tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1);
|
||||
set_if_bigger(rec_per_key, 1);
|
||||
if (recno == 1)
|
||||
incr= ref->key_length + tab->file->ref_length;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY
|
||||
#include "sql_base.h" // tdc_remove_table, lock_table_names,
|
||||
#include "sql_handler.h" // mysql_ha_rm_tables
|
||||
#include "sql_statistics.h"
|
||||
#include "datadict.h"
|
||||
|
||||
static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list,
|
||||
|
@ -5416,7 +5416,7 @@ best_access_path(JOIN *join,
|
||||
else
|
||||
{
|
||||
uint key_parts= table->actual_n_key_parts(keyinfo);
|
||||
if (!(records= keyinfo->real_rec_per_key(key_parts-1)))
|
||||
if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
|
||||
{ /* Prefer longer keys */
|
||||
records=
|
||||
((double) s->records / (double) rec *
|
||||
@ -5516,7 +5516,7 @@ best_access_path(JOIN *join,
|
||||
else
|
||||
{
|
||||
/* Check if we have statistic about the distribution */
|
||||
if ((records= keyinfo->real_rec_per_key(max_key_part-1)))
|
||||
if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
|
||||
{
|
||||
/*
|
||||
Fix for the case where the index statistics is too
|
||||
@ -22974,7 +22974,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
||||
if (used_key_parts > used_index_parts)
|
||||
used_pk_parts= used_key_parts-used_index_parts;
|
||||
rec_per_key= used_key_parts ?
|
||||
keyinfo->real_rec_per_key(used_key_parts-1) : 1;
|
||||
keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
|
||||
/* Take into account the selectivity of the used pk prefix */
|
||||
if (used_pk_parts)
|
||||
{
|
||||
@ -22989,8 +22989,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
||||
rec_per_key= 1;
|
||||
if (rec_per_key > 1)
|
||||
{
|
||||
rec_per_key*= pkinfo->real_rec_per_key(used_pk_parts-1);
|
||||
rec_per_key/= pkinfo->real_rec_per_key(0);
|
||||
rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
|
||||
rec_per_key/= pkinfo->actual_rec_per_key(0);
|
||||
/*
|
||||
The value of rec_per_key for the extended key has
|
||||
to be adjusted accordingly if some components of
|
||||
@ -23004,9 +23004,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
||||
We presume here that for any index rec_per_key[i] != 0
|
||||
if rec_per_key[0] != 0.
|
||||
*/
|
||||
DBUG_ASSERT(pkinfo->real_rec_per_key(i));
|
||||
rec_per_key*= pkinfo->real_rec_per_key(i-1);
|
||||
rec_per_key/= pkinfo->real_rec_per_key(i);
|
||||
DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
|
||||
rec_per_key*= pkinfo->actual_rec_per_key(i-1);
|
||||
rec_per_key/= pkinfo->actual_rec_per_key(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -23051,7 +23051,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
||||
select_limit= (ha_rows) (select_limit *
|
||||
(double) table_records /
|
||||
table->quick_condition_rows);
|
||||
rec_per_key= keyinfo->real_rec_per_key(keyinfo->key_parts-1);
|
||||
rec_per_key= keyinfo->actual_rec_per_key(keyinfo->key_parts-1);
|
||||
set_if_bigger(rec_per_key, 1);
|
||||
/*
|
||||
Here we take into account the fact that rows are
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "set_var.h"
|
||||
#include "sql_trigger.h"
|
||||
#include "sql_derived.h"
|
||||
#include "sql_statistics.h"
|
||||
#include "sql_connect.h"
|
||||
#include "authors.h"
|
||||
#include "contributors.h"
|
||||
@ -5765,7 +5766,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables,
|
||||
if (key->rec_per_key[j])
|
||||
{
|
||||
ha_rows records=((double) show_table->stat_records() /
|
||||
key->real_rec_per_key(j));
|
||||
key->actual_rec_per_key(j));
|
||||
table->field[9]->store((longlong) records, TRUE);
|
||||
table->field[9]->set_notnull();
|
||||
}
|
||||
|
@ -206,6 +206,8 @@ private:
|
||||
Count_distinct_field *count_distinct; /* The container for distinct
|
||||
column values */
|
||||
|
||||
bool is_single_pk_col; /* TRUE <-> the only column of the primary key */
|
||||
|
||||
public:
|
||||
|
||||
inline void init(THD *thd, Field * table_field);
|
||||
@ -1399,6 +1401,8 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
bool is_single_comp_pk;
|
||||
|
||||
Index_prefix_calc(TABLE *table, KEY *key_info)
|
||||
: index_table(table), index_info(key_info)
|
||||
{
|
||||
@ -1407,6 +1411,16 @@ public:
|
||||
uint key_parts= table->actual_n_key_parts(key_info);
|
||||
empty= TRUE;
|
||||
prefixes= 0;
|
||||
|
||||
is_single_comp_pk= FALSE;
|
||||
uint pk= table->s->primary_key;
|
||||
if (table->key_info - key_info == pk && table->key_info[pk].key_parts == 1)
|
||||
{
|
||||
prefixes= 1;
|
||||
is_single_comp_pk= TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
if ((calc_state=
|
||||
(Prefix_calc_state *) sql_alloc(sizeof(Prefix_calc_state)*key_parts)))
|
||||
{
|
||||
@ -1430,6 +1444,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@breif
|
||||
Change the elements of calc_state after reading the next index entry
|
||||
@ -1487,6 +1502,13 @@ public:
|
||||
{
|
||||
uint i;
|
||||
Prefix_calc_state *state;
|
||||
|
||||
if (is_single_comp_pk)
|
||||
{
|
||||
index_info->collected_stats->set_avg_frequency(0, 1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i= 0, state= calc_state; i < prefixes; i++, state++)
|
||||
{
|
||||
if (i < prefixes)
|
||||
@ -1658,7 +1680,7 @@ void create_min_max_stistical_fields_for_table_share(THD *thd,
|
||||
int alloc_statistics_for_table(THD* thd, TABLE *table)
|
||||
{
|
||||
Field **field_ptr;
|
||||
uint cnt= 0;
|
||||
uint fields;
|
||||
|
||||
DBUG_ENTER("alloc_statistics_for_table");
|
||||
|
||||
@ -1666,10 +1688,11 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
|
||||
(Table_statistics *) alloc_root(&table->mem_root,
|
||||
sizeof(Table_statistics));
|
||||
|
||||
for (field_ptr= table->field; *field_ptr; field_ptr++, cnt++) ;
|
||||
fields= table->s->fields ;
|
||||
Column_statistics_collected *column_stats=
|
||||
(Column_statistics_collected *) alloc_root(&table->mem_root,
|
||||
sizeof(Column_statistics_collected) * cnt);
|
||||
sizeof(Column_statistics_collected) *
|
||||
fields);
|
||||
|
||||
uint keys= table->s->keys;
|
||||
Index_statistics *index_stats=
|
||||
@ -1688,7 +1711,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
|
||||
table_stats->index_stats= index_stats;
|
||||
table_stats->idx_avg_frequency= idx_avg_frequency;
|
||||
|
||||
memset(column_stats, 0, sizeof(Column_statistics) * cnt);
|
||||
memset(column_stats, 0, sizeof(Column_statistics) * fields);
|
||||
|
||||
for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++)
|
||||
(*field_ptr)->collected_stats= column_stats;
|
||||
@ -1838,13 +1861,23 @@ inline
|
||||
void Column_statistics_collected::init(THD *thd, Field *table_field)
|
||||
{
|
||||
uint max_heap_table_size= thd->variables.max_heap_table_size;
|
||||
TABLE *table= table_field->table;
|
||||
uint pk= table->s->primary_key;
|
||||
|
||||
is_single_pk_col= FALSE;
|
||||
|
||||
if (pk != MAX_KEY && table->key_info[pk].key_parts == 1 &&
|
||||
table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1)
|
||||
is_single_pk_col= TRUE;
|
||||
|
||||
column= table_field;
|
||||
|
||||
set_all_nulls();
|
||||
|
||||
nulls= 0;
|
||||
column_total_length= 0;
|
||||
if (is_single_pk_col)
|
||||
count_distinct= NULL;
|
||||
if (table_field->flags & BLOB_FLAG)
|
||||
count_distinct= NULL;
|
||||
else
|
||||
@ -1923,6 +1956,12 @@ void Column_statistics_collected::finish(ha_rows rows)
|
||||
delete count_distinct;
|
||||
count_distinct= NULL;
|
||||
}
|
||||
else if (is_single_pk_col)
|
||||
{
|
||||
val= 1.0;
|
||||
set_avg_frequency(val);
|
||||
set_not_null(COLUMN_STAT_AVG_FREQUENCY);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1986,6 +2025,12 @@ int collect_statistics_for_index(THD *thd, TABLE *table, uint index)
|
||||
DEBUG_SYNC(table->in_use, "statistics_collection_start1");
|
||||
DEBUG_SYNC(table->in_use, "statistics_collection_start2");
|
||||
|
||||
if (index_prefix_calc.is_single_comp_pk)
|
||||
{
|
||||
index_prefix_calc.get_avg_frequency();
|
||||
DBUG_RETURN(rc);
|
||||
}
|
||||
|
||||
table->key_read= 1;
|
||||
table->file->extra(HA_EXTRA_KEYREAD);
|
||||
|
||||
@ -2078,7 +2123,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
|
||||
|
||||
table->collected_stats->cardinality_is_null= TRUE;
|
||||
table->collected_stats->cardinality= 0;
|
||||
|
||||
|
||||
for (field_ptr= table->field; *field_ptr; field_ptr++)
|
||||
{
|
||||
table_field= *field_ptr;
|
||||
@ -2949,9 +2994,9 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
|
||||
|
||||
void set_statistics_for_table(THD *thd, TABLE *table)
|
||||
{
|
||||
uint use_stat_table_mode= thd->variables.use_stat_tables;
|
||||
Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
|
||||
table->used_stat_records=
|
||||
(use_stat_table_mode <= 1 ||
|
||||
(use_stat_table_mode <= COMPLEMENTARY ||
|
||||
!table->s->stats_is_read || !table->s->read_stats ||
|
||||
table->s->read_stats->cardinality_is_null) ?
|
||||
table->file->stats.records : table->s->read_stats->cardinality;
|
||||
@ -2960,7 +3005,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
|
||||
key_info < key_info_end; key_info++)
|
||||
{
|
||||
key_info->is_statistics_from_stat_tables=
|
||||
(use_stat_table_mode > 1 && table->s->stats_is_read &&
|
||||
(use_stat_table_mode > COMPLEMENTARY &&
|
||||
table->s->stats_is_read &&
|
||||
key_info->read_stats &&
|
||||
key_info->read_stats->avg_frequency_is_inited() &&
|
||||
key_info->read_stats->get_avg_frequency(0) > 0.5);
|
||||
|
@ -25,6 +25,14 @@
|
||||
generated automatically by the table definitions.
|
||||
*/
|
||||
|
||||
typedef
|
||||
enum enum_use_stat_tables_mode
|
||||
{
|
||||
NEVER,
|
||||
COMPLEMENTARY,
|
||||
PEFERABLY,
|
||||
} Use_stat_tables_mode;
|
||||
|
||||
enum enum_stat_tables
|
||||
{
|
||||
TABLE_STAT,
|
||||
@ -60,6 +68,27 @@ enum enum_index_stat_col
|
||||
INDEX_STAT_AVG_FREQUENCY
|
||||
};
|
||||
|
||||
inline
|
||||
Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
|
||||
{
|
||||
return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
|
||||
}
|
||||
|
||||
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
|
||||
int collect_statistics_for_table(THD *thd, TABLE *table);
|
||||
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
|
||||
bool is_safe);
|
||||
int alloc_statistics_for_table(THD *thd, TABLE *table);
|
||||
int update_statistics_for_table(THD *thd, TABLE *table);
|
||||
int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
|
||||
int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
|
||||
int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
|
||||
bool ext_prefixes_only);
|
||||
int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
|
||||
LEX_STRING *new_db, LEX_STRING *new_tab);
|
||||
int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
|
||||
const char *new_name);
|
||||
void set_statistics_for_table(THD *thd, TABLE *table);
|
||||
|
||||
class Columns_statistics;
|
||||
class Index_statistics;
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "discover.h" // readfrm
|
||||
#include "my_pthread.h" // pthread_mutex_t
|
||||
#include "log_event.h" // Query_log_event
|
||||
#include "sql_statistics.h"
|
||||
#include <hash.h>
|
||||
#include <myisam.h>
|
||||
#include <my_dir.h>
|
||||
|
@ -142,7 +142,7 @@ typedef struct st_key {
|
||||
engine_option_value *option_list;
|
||||
ha_index_option_struct *option_struct; /* structure with parsed options */
|
||||
|
||||
double real_rec_per_key(uint i);
|
||||
double actual_rec_per_key(uint i);
|
||||
|
||||
} KEY;
|
||||
|
||||
|
@ -6783,7 +6783,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd)
|
||||
}
|
||||
|
||||
|
||||
double KEY::real_rec_per_key(uint i)
|
||||
double KEY::actual_rec_per_key(uint i)
|
||||
{
|
||||
if (rec_per_key == 0)
|
||||
return 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user