From e37053049341e425e139dd5bb723f1d99f56a2ad Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sun, 31 Mar 2013 23:41:47 -0700 Subject: [PATCH] Take into account the number of null values in any used column when calculating selectivity of conditions. --- mysql-test/r/selectivity.result | 31 ++++++++++++++++-- mysql-test/r/selectivity_innodb.result | 31 ++++++++++++++++-- mysql-test/t/selectivity.test | 32 ++++++++++++++++--- sql/sql_statistics.cc | 44 ++++++++++++++++++-------- 4 files changed, 114 insertions(+), 24 deletions(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 6dd0f03eb05..57b6cc9e20a 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -6,12 +6,37 @@ select @@session.use_stat_tables; COMPLEMENTARY set @save_use_stat_tables=@@use_stat_tables; set use_stat_tables='preferably'; -DROP DATABASE IF EXISTS dbt3_s001; -CREATE DATABASE dbt3_s001; -use dbt3_s001; set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; set @save_histogram_size=@@histogram_size; set @save_histogram_type=@@histogram_type; +set optimizer_use_condition_selectivity=3; +create table t1 (a int); +insert into t1 values +(9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 9 0.4000 4.0000 1.2000 0 NULL NULL +flush table t1; +explain extended +select * from t1 where a is null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where isnull(`test`.`t1`.`a`) +explain extended +select * from t1 where a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` is not null) +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +DROP DATABASE IF EXISTS dbt3_s001; +CREATE DATABASE dbt3_s001; +use dbt3_s001; === Q15 === create view revenue0 (supplier_no, total_revenue) as select l_suppkey, sum(l_extendedprice * (1 - l_discount)) diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index fb10fb684fc..f2e0301ac8b 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -9,12 +9,37 @@ select @@session.use_stat_tables; COMPLEMENTARY set @save_use_stat_tables=@@use_stat_tables; set use_stat_tables='preferably'; -DROP DATABASE IF EXISTS dbt3_s001; -CREATE DATABASE dbt3_s001; -use dbt3_s001; set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; set @save_histogram_size=@@histogram_size; set @save_histogram_type=@@histogram_type; +set optimizer_use_condition_selectivity=3; +create table t1 (a int); +insert into t1 values +(9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 9 0.4000 4.0000 1.2000 0 NULL NULL +flush table t1; +explain extended +select * from t1 where a is null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where isnull(`test`.`t1`.`a`) +explain extended +select * from t1 where a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` is not null) +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +DROP DATABASE IF EXISTS dbt3_s001; +CREATE DATABASE dbt3_s001; +use dbt3_s001; === Q15 === create view revenue0 (supplier_no, total_revenue) as select l_suppkey, sum(l_extendedprice * (1 - l_discount)) diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index 9dda4af61eb..abc720791e8 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -7,6 +7,34 @@ set @save_use_stat_tables=@@use_stat_tables; set use_stat_tables='preferably'; +set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; +set @save_histogram_size=@@histogram_size; +set @save_histogram_type=@@histogram_type; + +# check that statistics on nulls is used + +set optimizer_use_condition_selectivity=3; + +create table t1 (a int); +insert into t1 values + (9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL); + +analyze table t1; + +select * from mysql.column_stats; + +flush table t1; + +explain extended +select * from t1 where a is null; + +explain extended +select * from t1 where a is not null; + +drop table t1; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; + --disable_warnings DROP DATABASE IF EXISTS dbt3_s001; --enable_warnings @@ -15,10 +43,6 @@ CREATE DATABASE dbt3_s001; use dbt3_s001; -set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; -set @save_histogram_size=@@histogram_size; -set @save_histogram_type=@@histogram_type; - --disable_query_log --disable_result_log --disable_warnings diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 74ed90cf7a1..736fb3e1f91 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3342,25 +3342,41 @@ double get_column_range_cardinality(Field *field, double res; TABLE *table= field->table; Column_statistics *col_stats= table->field[field->field_index]->read_stats; + double tab_records= table->stat_records(); if (!col_stats) - res= table->stat_records(); + return tab_records; + + double col_nulls= tab_records * col_stats->get_nulls_ratio(); + + double col_non_nulls= tab_records - col_nulls; + + if (col_non_nulls < 1) + res= 0; else if (min_endp && max_endp && min_endp->length == max_endp->length && !memcmp(min_endp->key, max_endp->key, min_endp->length)) { - double avg_frequency= col_stats->get_avg_frequency(); - res= avg_frequency; - if (avg_frequency > 1.0 + 0.000001 && - col_stats->min_value && col_stats->max_value) + if (field->null_ptr && min_endp->key[0]) { - Histogram *hist= &col_stats->histogram; - if (hist->get_size() > 0) + /* This is null single point range */ + res= col_nulls; + } + else + { + double avg_frequency= col_stats->get_avg_frequency(); + res= avg_frequency; + if (avg_frequency > 1.0 + 0.000001 && + col_stats->min_value && col_stats->max_value) { - double pos= field->middle_point_pos(col_stats->min_value, - col_stats->max_value); - res= table->stat_records() * - hist->point_selectivity(pos, - avg_frequency / table->stat_records()); + Histogram *hist= &col_stats->histogram; + if (hist->get_size() > 0) + { + double pos= field->middle_point_pos(col_stats->min_value, + col_stats->max_value); + res= col_non_nulls * + hist->point_selectivity(pos, + avg_frequency / col_non_nulls); + } } } } @@ -3394,10 +3410,10 @@ double get_column_range_cardinality(Field *field, sel= (max_mp_pos - min_mp_pos); else sel= hist->range_selectivity(min_mp_pos, max_mp_pos); - res= table->stat_records() * sel; + res= col_non_nulls * sel; } else - res= table->stat_records(); + res= col_non_nulls; } return res; }