Addressed all remaining issues from the review of the patch

that introduced engine independent persistent statistics.
In particular:
- added an enumeration type for possible values of the system
  variable use_stat_tables
- renamed KEY::real_rec_per_key to KEY::actual_rec_per_key
- optimized the collection of statistical data for any primary
  key defined only on one column.
This commit is contained in:
Igor Babaev 2012-12-13 23:05:12 -08:00
parent 65820439bd
commit a06224bd15
16 changed files with 112 additions and 45 deletions

View File

@ -1201,7 +1201,7 @@ bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
uint parts= my_count_bits(key_tuple_map);
ulong rpc;
ulonglong rowids_size= rowid_buf_elem_size;
if ((rpc= key_info->real_rec_per_key(parts - 1)))
if ((rpc= key_info->actual_rec_per_key(parts - 1)))
rowids_size= rowid_buf_elem_size * rpc;
double fraction_for_rowids=

View File

@ -5502,8 +5502,8 @@ ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
ha_rows ext_records= ext_index_scan->records;
if (i < used_key_parts)
{
ulong f1= key_info->real_rec_per_key(i-1);
ulong f2= key_info->real_rec_per_key(i);
ulong f1= key_info->actual_rec_per_key(i-1);
ulong f2= key_info->actual_rec_per_key(i);
ext_records= (ha_rows) ((double) ext_records / f2 * f1);
}
if (ext_records < table_cardinality)
@ -12642,7 +12642,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
num_blocks= (uint)(table_records / keys_per_block) + 1;
/* Compute the number of keys in a group. */
keys_per_group= index_info->real_rec_per_key(group_key_parts - 1);
keys_per_group= index_info->actual_rec_per_key(group_key_parts - 1);
if (keys_per_group == 0) /* If there is no statistics try to guess */
/* each group contains 10% of all records */
keys_per_group= (uint)(table_records / 10) + 1;
@ -12662,7 +12662,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
Compute the probability that two ends of a subgroup are inside
different blocks.
*/
keys_per_subgroup= index_info->real_rec_per_key(used_key_parts - 1);
keys_per_subgroup= index_info->actual_rec_per_key(used_key_parts - 1);
if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
else

View File

@ -29,6 +29,7 @@
#include "sql_parse.h" // check_table_access
#include "strfunc.h"
#include "sql_admin.h"
#include "sql_statistics.h"
/* Prepare, run and cleanup for mysql_recreate_table() */
@ -718,7 +719,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
if (compl_result_code == HA_ADMIN_OK &&
operator_func == &handler::ha_analyze &&
table->table->s->table_category == TABLE_CATEGORY_USER &&
(thd->variables.use_stat_tables > 0 ||
(get_use_stat_tables_mode(thd) > NEVER ||
lex->with_persistent_for_clause))
{
if (!(compl_result_code=

View File

@ -49,6 +49,7 @@
#include "sql_trigger.h"
#include "transaction.h"
#include "sql_prepare.h"
#include "sql_statistics.h"
#include <m_ctype.h>
#include <my_dir.h>
#include <hash.h>
@ -3142,7 +3143,7 @@ retry_share:
while (table_cache_count > table_cache_size && unused_tables)
free_cache_entry(unused_tables);
if (thd->variables.use_stat_tables > 0)
if (get_use_stat_tables_mode(thd) > NEVER)
{
if (share->table_category != TABLE_CATEGORY_SYSTEM)
{
@ -4634,7 +4635,7 @@ open_and_process_table(THD *thd, LEX *lex, TABLE_LIST *tables,
goto end;
}
if (thd->variables.use_stat_tables > 0 && tables->table)
if (get_use_stat_tables_mode(thd) > NEVER && tables->table)
{
TABLE_SHARE *table_share= tables->table->s;
if (table_share && table_share->table_category != TABLE_CATEGORY_SYSTEM)

View File

@ -310,22 +310,6 @@ int dynamic_column_error_message(enum_dyncol_func_result rc);
/* open_and_lock_tables with optional derived handling */
int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived);
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
int collect_statistics_for_table(THD *thd, TABLE *table);
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
bool is_safe);
int alloc_statistics_for_table(THD *thd, TABLE *table);
int update_statistics_for_table(THD *thd, TABLE *table);
int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
bool ext_prefixes_only);
int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
LEX_STRING *new_db, LEX_STRING *new_tab);
int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
const char *new_name);
void set_statistics_for_table(THD *thd, TABLE *table);
extern "C" int simple_raw_key_cmp(void* arg, const void* key1,
const void* key2);
extern "C" int count_distinct_walk(void *elem, element_count count, void *arg);

View File

@ -36,6 +36,7 @@
#include "sp.h"
#include "events.h"
#include "sql_handler.h"
#include "sql_statistics.h"
#include <my_dir.h>
#include <m_ctype.h>
#include "log.h"

View File

@ -35,6 +35,7 @@
#include "sql_select.h"
#include "sp_head.h"
#include "sql_trigger.h"
#include "sql_statistics.h"
#include "transaction.h"
#include "records.h" // init_read_record,
#include "sql_derived.h" // mysql_handle_list_of_derived

View File

@ -3812,7 +3812,8 @@ uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
uint incr= 0;
TABLE_REF *ref= &join_tab->ref;
TABLE *tab= join_tab->table;
uint rec_per_key= tab->key_info[ref->key].real_rec_per_key(ref->key_parts-1);
uint rec_per_key=
tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1);
set_if_bigger(rec_per_key, 1);
if (recno == 1)
incr= ref->key_length + tab->file->ref_length;

View File

@ -28,6 +28,7 @@
#include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY
#include "sql_base.h" // tdc_remove_table, lock_table_names,
#include "sql_handler.h" // mysql_ha_rm_tables
#include "sql_statistics.h"
#include "datadict.h"
static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list,

View File

@ -5416,7 +5416,7 @@ best_access_path(JOIN *join,
else
{
uint key_parts= table->actual_n_key_parts(keyinfo);
if (!(records= keyinfo->real_rec_per_key(key_parts-1)))
if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
{ /* Prefer longer keys */
records=
((double) s->records / (double) rec *
@ -5516,7 +5516,7 @@ best_access_path(JOIN *join,
else
{
/* Check if we have statistic about the distribution */
if ((records= keyinfo->real_rec_per_key(max_key_part-1)))
if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
{
/*
Fix for the case where the index statistics is too
@ -22974,7 +22974,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (used_key_parts > used_index_parts)
used_pk_parts= used_key_parts-used_index_parts;
rec_per_key= used_key_parts ?
keyinfo->real_rec_per_key(used_key_parts-1) : 1;
keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
/* Take into account the selectivity of the used pk prefix */
if (used_pk_parts)
{
@ -22989,8 +22989,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
rec_per_key= 1;
if (rec_per_key > 1)
{
rec_per_key*= pkinfo->real_rec_per_key(used_pk_parts-1);
rec_per_key/= pkinfo->real_rec_per_key(0);
rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
rec_per_key/= pkinfo->actual_rec_per_key(0);
/*
The value of rec_per_key for the extended key has
to be adjusted accordingly if some components of
@ -23004,9 +23004,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
We presume here that for any index rec_per_key[i] != 0
if rec_per_key[0] != 0.
*/
DBUG_ASSERT(pkinfo->real_rec_per_key(i));
rec_per_key*= pkinfo->real_rec_per_key(i-1);
rec_per_key/= pkinfo->real_rec_per_key(i);
DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
rec_per_key*= pkinfo->actual_rec_per_key(i-1);
rec_per_key/= pkinfo->actual_rec_per_key(i);
}
}
}
@ -23051,7 +23051,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
select_limit= (ha_rows) (select_limit *
(double) table_records /
table->quick_condition_rows);
rec_per_key= keyinfo->real_rec_per_key(keyinfo->key_parts-1);
rec_per_key= keyinfo->actual_rec_per_key(keyinfo->key_parts-1);
set_if_bigger(rec_per_key, 1);
/*
Here we take into account the fact that rows are

View File

@ -45,6 +45,7 @@
#include "set_var.h"
#include "sql_trigger.h"
#include "sql_derived.h"
#include "sql_statistics.h"
#include "sql_connect.h"
#include "authors.h"
#include "contributors.h"
@ -5765,7 +5766,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables,
if (key->rec_per_key[j])
{
ha_rows records=((double) show_table->stat_records() /
key->real_rec_per_key(j));
key->actual_rec_per_key(j));
table->field[9]->store((longlong) records, TRUE);
table->field[9]->set_notnull();
}

View File

@ -206,6 +206,8 @@ private:
Count_distinct_field *count_distinct; /* The container for distinct
column values */
bool is_single_pk_col; /* TRUE <-> the only column of the primary key */
public:
inline void init(THD *thd, Field * table_field);
@ -1399,6 +1401,8 @@ private:
public:
bool is_single_comp_pk;
Index_prefix_calc(TABLE *table, KEY *key_info)
: index_table(table), index_info(key_info)
{
@ -1407,6 +1411,16 @@ public:
uint key_parts= table->actual_n_key_parts(key_info);
empty= TRUE;
prefixes= 0;
is_single_comp_pk= FALSE;
uint pk= table->s->primary_key;
if (table->key_info - key_info == pk && table->key_info[pk].key_parts == 1)
{
prefixes= 1;
is_single_comp_pk= TRUE;
return;
}
if ((calc_state=
(Prefix_calc_state *) sql_alloc(sizeof(Prefix_calc_state)*key_parts)))
{
@ -1430,6 +1444,7 @@ public:
}
}
/**
@breif
Change the elements of calc_state after reading the next index entry
@ -1487,6 +1502,13 @@ public:
{
uint i;
Prefix_calc_state *state;
if (is_single_comp_pk)
{
index_info->collected_stats->set_avg_frequency(0, 1.0);
return;
}
for (i= 0, state= calc_state; i < prefixes; i++, state++)
{
if (i < prefixes)
@ -1658,7 +1680,7 @@ void create_min_max_stistical_fields_for_table_share(THD *thd,
int alloc_statistics_for_table(THD* thd, TABLE *table)
{
Field **field_ptr;
uint cnt= 0;
uint fields;
DBUG_ENTER("alloc_statistics_for_table");
@ -1666,10 +1688,11 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
(Table_statistics *) alloc_root(&table->mem_root,
sizeof(Table_statistics));
for (field_ptr= table->field; *field_ptr; field_ptr++, cnt++) ;
fields= table->s->fields ;
Column_statistics_collected *column_stats=
(Column_statistics_collected *) alloc_root(&table->mem_root,
sizeof(Column_statistics_collected) * cnt);
sizeof(Column_statistics_collected) *
fields);
uint keys= table->s->keys;
Index_statistics *index_stats=
@ -1688,7 +1711,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
table_stats->index_stats= index_stats;
table_stats->idx_avg_frequency= idx_avg_frequency;
memset(column_stats, 0, sizeof(Column_statistics) * cnt);
memset(column_stats, 0, sizeof(Column_statistics) * fields);
for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++)
(*field_ptr)->collected_stats= column_stats;
@ -1838,13 +1861,23 @@ inline
void Column_statistics_collected::init(THD *thd, Field *table_field)
{
uint max_heap_table_size= thd->variables.max_heap_table_size;
TABLE *table= table_field->table;
uint pk= table->s->primary_key;
is_single_pk_col= FALSE;
if (pk != MAX_KEY && table->key_info[pk].key_parts == 1 &&
table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1)
is_single_pk_col= TRUE;
column= table_field;
set_all_nulls();
nulls= 0;
column_total_length= 0;
if (is_single_pk_col)
count_distinct= NULL;
if (table_field->flags & BLOB_FLAG)
count_distinct= NULL;
else
@ -1923,6 +1956,12 @@ void Column_statistics_collected::finish(ha_rows rows)
delete count_distinct;
count_distinct= NULL;
}
else if (is_single_pk_col)
{
val= 1.0;
set_avg_frequency(val);
set_not_null(COLUMN_STAT_AVG_FREQUENCY);
}
}
@ -1986,6 +2025,12 @@ int collect_statistics_for_index(THD *thd, TABLE *table, uint index)
DEBUG_SYNC(table->in_use, "statistics_collection_start1");
DEBUG_SYNC(table->in_use, "statistics_collection_start2");
if (index_prefix_calc.is_single_comp_pk)
{
index_prefix_calc.get_avg_frequency();
DBUG_RETURN(rc);
}
table->key_read= 1;
table->file->extra(HA_EXTRA_KEYREAD);
@ -2078,7 +2123,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
table->collected_stats->cardinality_is_null= TRUE;
table->collected_stats->cardinality= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
@ -2949,9 +2994,9 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
void set_statistics_for_table(THD *thd, TABLE *table)
{
uint use_stat_table_mode= thd->variables.use_stat_tables;
Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
table->used_stat_records=
(use_stat_table_mode <= 1 ||
(use_stat_table_mode <= COMPLEMENTARY ||
!table->s->stats_is_read || !table->s->read_stats ||
table->s->read_stats->cardinality_is_null) ?
table->file->stats.records : table->s->read_stats->cardinality;
@ -2960,7 +3005,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
key_info < key_info_end; key_info++)
{
key_info->is_statistics_from_stat_tables=
(use_stat_table_mode > 1 && table->s->stats_is_read &&
(use_stat_table_mode > COMPLEMENTARY &&
table->s->stats_is_read &&
key_info->read_stats &&
key_info->read_stats->avg_frequency_is_inited() &&
key_info->read_stats->get_avg_frequency(0) > 0.5);

View File

@ -25,6 +25,14 @@
generated automatically by the table definitions.
*/
typedef
enum enum_use_stat_tables_mode
{
NEVER,
COMPLEMENTARY,
PEFERABLY,
} Use_stat_tables_mode;
enum enum_stat_tables
{
TABLE_STAT,
@ -60,6 +68,27 @@ enum enum_index_stat_col
INDEX_STAT_AVG_FREQUENCY
};
inline
Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
{
return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
}
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
int collect_statistics_for_table(THD *thd, TABLE *table);
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
bool is_safe);
int alloc_statistics_for_table(THD *thd, TABLE *table);
int update_statistics_for_table(THD *thd, TABLE *table);
int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
bool ext_prefixes_only);
int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
LEX_STRING *new_db, LEX_STRING *new_tab);
int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
const char *new_name);
void set_statistics_for_table(THD *thd, TABLE *table);
class Columns_statistics;
class Index_statistics;

View File

@ -43,6 +43,7 @@
#include "discover.h" // readfrm
#include "my_pthread.h" // pthread_mutex_t
#include "log_event.h" // Query_log_event
#include "sql_statistics.h"
#include <hash.h>
#include <myisam.h>
#include <my_dir.h>

View File

@ -142,7 +142,7 @@ typedef struct st_key {
engine_option_value *option_list;
ha_index_option_struct *option_struct; /* structure with parsed options */
double real_rec_per_key(uint i);
double actual_rec_per_key(uint i);
} KEY;

View File

@ -6783,7 +6783,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd)
}
double KEY::real_rec_per_key(uint i)
double KEY::actual_rec_per_key(uint i)
{
if (rec_per_key == 0)
return 0;