Fixed bug mdev-11096.

1. When min/max value is provided the null flag for it must be set to 0
in the bitmap Culumn_statistics::column_stat_nulls.
2. When the calculation of the selectivity of the range condition
over a column requires min and max values for the column then we
have to check that these values are provided.
This commit is contained in:
Igor Babaev 2016-10-24 10:15:11 -07:00
parent 26b87c332f
commit 9d4a0dde0a
5 changed files with 79 additions and 14 deletions

View File

@ -1446,3 +1446,27 @@ a b i
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP TABLE t1,t2;
set use_stat_tables=@save_use_stat_tables;
#
# Bug mdev-11096: range condition over column without statistical data
#
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=3;
create table t1(col1 char(32));
insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
analyze table t1 persistent for columns () indexes ();
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
explain extended
select * from t1 where col1 > 'b' and col1 < 'e';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
select * from t1 where col1 > 'b' and col1 < 'e';
col1
c
d
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;

View File

@ -802,9 +802,9 @@ insert into t2 values (2),(3);
explain extended
select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 1 0.00 Using where
1 PRIMARY t1 ALL NULL NULL NULL NULL 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1 100.00
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 0.00
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 100.00
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3))
select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
@ -1450,6 +1450,30 @@ a b i
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP TABLE t1,t2;
set use_stat_tables=@save_use_stat_tables;
#
# Bug mdev-11096: range condition over column without statistical data
#
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=3;
create table t1(col1 char(32));
insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
analyze table t1 persistent for columns () indexes ();
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
explain extended
select * from t1 where col1 > 'b' and col1 < 'e';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
select * from t1 where col1 > 'b' and col1 < 'e';
col1
c
d
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
set @tmp_ust= @@use_stat_tables;
set @tmp_oucs= @@optimizer_use_condition_selectivity;

View File

@ -970,6 +970,25 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit
DROP TABLE t1,t2;
set use_stat_tables=@save_use_stat_tables;
--echo #
--echo # Bug mdev-11096: range condition over column without statistical data
--echo #
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=3;
create table t1(col1 char(32));
insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
analyze table t1 persistent for columns () indexes ();
explain extended
select * from t1 where col1 > 'b' and col1 < 'e';
select * from t1 where col1 > 'b' and col1 < 'e';
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;

View File

@ -1003,11 +1003,13 @@ public:
switch (i) {
case COLUMN_STAT_MIN_VALUE:
table_field->read_stats->min_value->set_notnull();
stat_field->val_str(&val);
table_field->read_stats->min_value->store(val.ptr(), val.length(),
&my_charset_bin);
break;
case COLUMN_STAT_MAX_VALUE:
table_field->read_stats->max_value->set_notnull();
stat_field->val_str(&val);
table_field->read_stats->max_value->store(val.ptr(), val.length(),
&my_charset_bin);
@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field,
{
double avg_frequency= col_stats->get_avg_frequency();
res= avg_frequency;
/*
psergey-todo: what does check for min_value, max_value mean?
min/max_value are set to NULL in alloc_statistics_for_table() and
alloc_statistics_for_table_share(). Both functions will immediately
call create_min_max_statistical_fields_for_table and
create_min_max_statistical_fields_for_table_share() respectively,
which will set min/max_value to be valid pointers, unless OOM
occurs.
*/
if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_value && col_stats->max_value)
col_stats->min_max_values_are_provided())
{
Histogram *hist= &col_stats->histogram;
if (hist->is_available())
@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field,
}
else
{
if (col_stats->min_value && col_stats->max_value)
if (col_stats->min_max_values_are_provided())
{
double sel, min_mp_pos, max_mp_pos;

View File

@ -388,6 +388,11 @@ public:
avg_frequency= (ulong) (val * Scale_factor_avg_frequency);
}
bool min_max_values_are_provided()
{
return !is_null(COLUMN_STAT_MIN_VALUE) &&
!is_null(COLUMN_STAT_MIN_VALUE);
}
};