MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
The problem here is EITS statistics does not calculate statistics for the partitions of the table. So a temporary solution would be to not read EITS statistics for partitioned tables. Also disabling reading of EITS for columns that participate in the partition list of a table.
This commit is contained in:
parent
12b1ba195c
commit
4886d14827
@ -2645,3 +2645,103 @@ Warnings:
|
||||
Note 1517 Duplicate partition name p2
|
||||
DEALLOCATE PREPARE stmt;
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
|
||||
#
|
||||
create table t0(a int);
|
||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t1(a int);
|
||||
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
|
||||
create table t2 (
|
||||
part_key int,
|
||||
a int,
|
||||
b int
|
||||
) partition by list(part_key) (
|
||||
partition p0 values in (0),
|
||||
partition p1 values in (1),
|
||||
partition p2 values in (2),
|
||||
partition p3 values in (3),
|
||||
partition p4 values in (4)
|
||||
);
|
||||
insert into t2
|
||||
select mod(a,5), a/100, mod(a,5) from t1;
|
||||
set @save_use_stat_tables= @@use_stat_tables;
|
||||
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
|
||||
#
|
||||
# Tests using stats provided by the storage engine
|
||||
#
|
||||
explain extended select * from t2 where part_key=1;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
|
||||
explain partitions select * from t2 where part_key=1;
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
|
||||
explain extended select * from t2 where part_key in (1,2);
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
|
||||
explain partitions select * from t2 where part_key in (1,2);
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
|
||||
explain extended select * from t2 where b=5;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
|
||||
explain partitions select * from t2 where b=5;
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
|
||||
explain extended select * from t2 partition(p0) where b=1;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
|
||||
set @save_histogram_size=@@histogram_size;
|
||||
set @@histogram_size=100;
|
||||
set @@use_stat_tables= PREFERABLY;
|
||||
set @@optimizer_use_condition_selectivity=4;
|
||||
analyze table t2;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t2 analyze status Engine-independent statistics collected
|
||||
test.t2 analyze status OK
|
||||
#
|
||||
# Tests using EITS
|
||||
#
|
||||
# filtered should be 100
|
||||
explain extended select * from t2 where part_key=1;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
|
||||
explain partitions select * from t2 where part_key=1;
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
|
||||
# filtered should be 100
|
||||
explain extended select * from t2 where part_key in (1,2);
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
|
||||
explain partitions select * from t2 where part_key in (1,2);
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
|
||||
explain extended select * from t2 where b=5;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 19.80 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
|
||||
explain partitions select * from t2 where b=5;
|
||||
id select_type table partitions type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
|
||||
explain extended select * from t2 partition(p0) where b=1;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 19.80 Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
|
||||
set @@use_stat_tables= @save_use_stat_tables;
|
||||
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
|
||||
set @@histogram_size= @save_histogram_size;
|
||||
drop table t0,t1,t2;
|
||||
|
@ -2897,3 +2897,64 @@ EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
|
||||
--echo #
|
||||
|
||||
create table t0(a int);
|
||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
|
||||
create table t1(a int);
|
||||
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
|
||||
|
||||
|
||||
create table t2 (
|
||||
part_key int,
|
||||
a int,
|
||||
b int
|
||||
) partition by list(part_key) (
|
||||
partition p0 values in (0),
|
||||
partition p1 values in (1),
|
||||
partition p2 values in (2),
|
||||
partition p3 values in (3),
|
||||
partition p4 values in (4)
|
||||
);
|
||||
insert into t2
|
||||
select mod(a,5), a/100, mod(a,5) from t1;
|
||||
|
||||
set @save_use_stat_tables= @@use_stat_tables;
|
||||
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
|
||||
--echo #
|
||||
--echo # Tests using stats provided by the storage engine
|
||||
--echo #
|
||||
explain extended select * from t2 where part_key=1;
|
||||
explain partitions select * from t2 where part_key=1;
|
||||
explain extended select * from t2 where part_key in (1,2);
|
||||
explain partitions select * from t2 where part_key in (1,2);
|
||||
explain extended select * from t2 where b=5;
|
||||
explain partitions select * from t2 where b=5;
|
||||
explain extended select * from t2 partition(p0) where b=1;
|
||||
|
||||
|
||||
set @save_histogram_size=@@histogram_size;
|
||||
set @@histogram_size=100;
|
||||
set @@use_stat_tables= PREFERABLY;
|
||||
set @@optimizer_use_condition_selectivity=4;
|
||||
analyze table t2;
|
||||
--echo #
|
||||
--echo # Tests using EITS
|
||||
--echo #
|
||||
--echo # filtered should be 100
|
||||
explain extended select * from t2 where part_key=1;
|
||||
explain partitions select * from t2 where part_key=1;
|
||||
--echo # filtered should be 100
|
||||
explain extended select * from t2 where part_key in (1,2);
|
||||
explain partitions select * from t2 where part_key in (1,2);
|
||||
explain extended select * from t2 where b=5;
|
||||
explain partitions select * from t2 where b=5;
|
||||
explain extended select * from t2 partition(p0) where b=1;
|
||||
|
||||
set @@use_stat_tables= @save_use_stat_tables;
|
||||
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
|
||||
set @@histogram_size= @save_histogram_size;
|
||||
drop table t0,t1,t2;
|
||||
|
@ -3322,14 +3322,17 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
|
||||
{
|
||||
Field **field_ptr;
|
||||
TABLE *table= param->table;
|
||||
partition_info *part_info= NULL;
|
||||
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
||||
part_info= table->part_info;
|
||||
#endif
|
||||
uint parts= 0;
|
||||
|
||||
for (field_ptr= table->field; *field_ptr; field_ptr++)
|
||||
{
|
||||
Column_statistics* col_stats= (*field_ptr)->read_stats;
|
||||
if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
|
||||
&& col_stats && !col_stats->no_stat_values_provided()
|
||||
&& !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
|
||||
Field *field= *field_ptr;
|
||||
if (bitmap_is_set(used_fields, field->field_index) &&
|
||||
is_eits_usable(field))
|
||||
parts++;
|
||||
}
|
||||
|
||||
@ -3347,12 +3350,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
|
||||
uint max_key_len= 0;
|
||||
for (field_ptr= table->field; *field_ptr; field_ptr++)
|
||||
{
|
||||
if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
|
||||
Field *field= *field_ptr;
|
||||
if (bitmap_is_set(used_fields, field->field_index))
|
||||
{
|
||||
Field *field= *field_ptr;
|
||||
Column_statistics* col_stats= field->read_stats;
|
||||
if (field->type() == MYSQL_TYPE_GEOMETRY ||
|
||||
!col_stats || col_stats->no_stat_values_provided())
|
||||
if (!is_eits_usable(field))
|
||||
continue;
|
||||
|
||||
uint16 store_length;
|
||||
|
@ -3164,6 +3164,23 @@ void partition_info::print_debug(const char *str, uint *value)
|
||||
DBUG_PRINT("info", ("parser: %s", str));
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
bool partition_info::field_in_partition_expr(Field *field) const
|
||||
{
|
||||
uint i;
|
||||
for (i= 0; i < num_part_fields; i++)
|
||||
{
|
||||
if (field->eq(part_field_array[i]))
|
||||
return TRUE;
|
||||
}
|
||||
for (i= 0; i < num_subpart_fields; i++)
|
||||
{
|
||||
if (field->eq(subpart_field_array[i]))
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
#else /* WITH_PARTITION_STORAGE_ENGINE */
|
||||
/*
|
||||
For builds without partitioning we need to define these functions
|
||||
|
@ -384,6 +384,7 @@ private:
|
||||
bool is_full_part_expr_in_fields(List<Item> &fields);
|
||||
public:
|
||||
bool has_unique_name(partition_element *element);
|
||||
bool field_in_partition_expr(Field *field) const;
|
||||
};
|
||||
|
||||
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "opt_range.h"
|
||||
#include "my_atomic.h"
|
||||
#include "sql_show.h"
|
||||
#include "sql_partition.h"
|
||||
|
||||
/*
|
||||
The system variable 'use_stat_tables' can take one of the
|
||||
@ -3589,6 +3590,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
|
||||
(use_stat_table_mode <= COMPLEMENTARY ||
|
||||
!table->stats_is_read || read_stats->cardinality_is_null) ?
|
||||
table->file->stats.records : read_stats->cardinality;
|
||||
|
||||
/*
|
||||
For partitioned table, EITS statistics is based on data from all partitions.
|
||||
|
||||
On the other hand, Partition Pruning figures which partitions will be
|
||||
accessed and then computes the estimate of rows in used_partitions.
|
||||
|
||||
Use the estimate from Partition Pruning as it is typically more precise.
|
||||
Ideally, EITS should provide per-partition statistics but this is not
|
||||
implemented currently.
|
||||
*/
|
||||
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
||||
if (table->part_info)
|
||||
table->used_stat_records= table->file->stats.records;
|
||||
#endif
|
||||
|
||||
KEY *key_info, *key_info_end;
|
||||
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
|
||||
key_info < key_info_end; key_info++)
|
||||
@ -3904,3 +3921,31 @@ bool is_stat_table(const char *db, const char *table)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
Check wheter we can use EITS statistics for a field or not
|
||||
|
||||
TRUE : Use EITS for the columns
|
||||
FALSE: Otherwise
|
||||
*/
|
||||
|
||||
bool is_eits_usable(Field *field)
|
||||
{
|
||||
partition_info *part_info= NULL;
|
||||
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
||||
part_info= field->table->part_info;
|
||||
#endif
|
||||
/*
|
||||
(1): checks if we have EITS statistics for a particular column
|
||||
(2): Don't use EITS for GEOMETRY columns
|
||||
(3): Disabling reading EITS statistics for columns involved in the
|
||||
partition list of a table. We assume the selecticivity for
|
||||
such columns would be handled during partition pruning.
|
||||
*/
|
||||
Column_statistics* col_stats= field->read_stats;
|
||||
if (col_stats && !col_stats->no_stat_values_provided() && //(1)
|
||||
field->type() != MYSQL_TYPE_GEOMETRY && //(2)
|
||||
(!part_info || !part_info->field_in_partition_expr(field))) //(3)
|
||||
return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ double get_column_range_cardinality(Field *field,
|
||||
key_range *max_endp,
|
||||
uint range_flag);
|
||||
bool is_stat_table(const char *db, const char *table);
|
||||
bool is_eits_usable(Field* field);
|
||||
|
||||
class Histogram
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user