Code cleanup:
- Move [some] engine-agnostic tests from t/selectivity.test to t/selectivity_no_engine.test - Move Histogram::point_selectivity to sql_statistics.cc
This commit is contained in:
parent
0d67aafaa2
commit
79a8a6130b
@ -1289,104 +1289,6 @@ a b c d a b
|
|||||||
221 56120 56120 28296 28296 3
|
221 56120 56120 28296 28296 3
|
||||||
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
#
|
|
||||||
# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
|
|
||||||
#
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
create table t2 (col1 int);
|
|
||||||
# one value in 1..100 range
|
|
||||||
insert into t2 select A.a + B.a*10 from t1 A, t1 B;
|
|
||||||
# ten values in 100...200 range
|
|
||||||
insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
|
|
||||||
set histogram_type='SINGLE_PREC_HB';
|
|
||||||
set histogram_size=100;
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
analyze table t2 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t2 analyze status OK
|
|
||||||
# The following two must have the same in 'Extra' column:
|
|
||||||
explain extended select * from t2 where col1 IN (20, 180);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
|
|
||||||
explain extended select * from t2 where col1 IN (180, 20);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
|
|
||||||
drop table t1, t2;
|
|
||||||
#
|
|
||||||
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
|
|
||||||
#
|
|
||||||
create table t0(a int);
|
|
||||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 select A.a from t0 A, t0 B, t0 C;
|
|
||||||
set histogram_size=20;
|
|
||||||
set histogram_type='single_prec_hb';
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t1 analyze status OK
|
|
||||||
set use_stat_tables='preferably';
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
# Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=2;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
|
|
||||||
# Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
|
|
||||||
# Must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=0;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
|
|
||||||
# Again, must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=-1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
|
|
||||||
drop table t0, t1;
|
|
||||||
#
|
|
||||||
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
|
|
||||||
#
|
|
||||||
create table t1 (col1 int);
|
|
||||||
set @a=-1;
|
|
||||||
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
|
|
||||||
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
|
|
||||||
select min(col1), max(col1), count(*) from t1;
|
|
||||||
min(col1) max(col1) count(*)
|
|
||||||
0 99 10000
|
|
||||||
set histogram_size=100;
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t1 analyze status OK
|
|
||||||
explain extended select * from t1 where col1 in (1,2,3);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
|
|
||||||
# Must not cause fp division by zero, or produce nonsense numbers:
|
|
||||||
explain extended select * from t1 where col1 in (-1,-2,-3);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
|
||||||
explain extended select * from t1 where col1<=-1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
|
|
||||||
drop table t1, t2;
|
|
||||||
set histogram_type=@save_histogram_type;
|
set histogram_type=@save_histogram_type;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
|
@ -1299,104 +1299,6 @@ a b c d a b
|
|||||||
221 56120 56120 28296 28296 3
|
221 56120 56120 28296 28296 3
|
||||||
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
#
|
|
||||||
# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
|
|
||||||
#
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
create table t2 (col1 int);
|
|
||||||
# one value in 1..100 range
|
|
||||||
insert into t2 select A.a + B.a*10 from t1 A, t1 B;
|
|
||||||
# ten values in 100...200 range
|
|
||||||
insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
|
|
||||||
set histogram_type='SINGLE_PREC_HB';
|
|
||||||
set histogram_size=100;
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
analyze table t2 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t2 analyze status OK
|
|
||||||
# The following two must have the same in 'Extra' column:
|
|
||||||
explain extended select * from t2 where col1 IN (20, 180);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
|
|
||||||
explain extended select * from t2 where col1 IN (180, 20);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
|
|
||||||
drop table t1, t2;
|
|
||||||
#
|
|
||||||
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
|
|
||||||
#
|
|
||||||
create table t0(a int);
|
|
||||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 select A.a from t0 A, t0 B, t0 C;
|
|
||||||
set histogram_size=20;
|
|
||||||
set histogram_type='single_prec_hb';
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t1 analyze status OK
|
|
||||||
set use_stat_tables='preferably';
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
# Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=2;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
|
|
||||||
# Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
|
|
||||||
# Must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=0;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
|
|
||||||
# Again, must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=-1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
|
|
||||||
drop table t0, t1;
|
|
||||||
#
|
|
||||||
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
|
|
||||||
#
|
|
||||||
create table t1 (col1 int);
|
|
||||||
set @a=-1;
|
|
||||||
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
|
|
||||||
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
|
|
||||||
select min(col1), max(col1), count(*) from t1;
|
|
||||||
min(col1) max(col1) count(*)
|
|
||||||
0 99 10000
|
|
||||||
set histogram_size=100;
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
Table Op Msg_type Msg_text
|
|
||||||
test.t1 analyze status OK
|
|
||||||
explain extended select * from t1 where col1 in (1,2,3);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
|
|
||||||
# Must not cause fp division by zero, or produce nonsense numbers:
|
|
||||||
explain extended select * from t1 where col1 in (-1,-2,-3);
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
|
||||||
explain extended select * from t1 where col1<=-1;
|
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
|
|
||||||
Warnings:
|
|
||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
|
|
||||||
drop table t1, t2;
|
|
||||||
set histogram_type=@save_histogram_type;
|
set histogram_type=@save_histogram_type;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
|
123
mysql-test/r/selectivity_no_engine.result
Normal file
123
mysql-test/r/selectivity_no_engine.result
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
#
|
||||||
|
# Engine-agnostic tests for statistics-based selectivity calculations.
|
||||||
|
# - selectivity tests that depend on the engine should go into
|
||||||
|
# t/selectivity.test. That test is run with myisam/innodb/xtradb.
|
||||||
|
# - this file is for tests that don't depend on the engine.
|
||||||
|
#
|
||||||
|
drop table if exists t0,t1,t2,t3;
|
||||||
|
select @@global.use_stat_tables;
|
||||||
|
@@global.use_stat_tables
|
||||||
|
COMPLEMENTARY
|
||||||
|
select @@session.use_stat_tables;
|
||||||
|
@@session.use_stat_tables
|
||||||
|
COMPLEMENTARY
|
||||||
|
set @save_use_stat_tables=@@use_stat_tables;
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
|
||||||
|
set @save_histogram_size=@@histogram_size;
|
||||||
|
set @save_histogram_type=@@histogram_type;
|
||||||
|
#
|
||||||
|
# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
|
||||||
|
#
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t2 (col1 int);
|
||||||
|
# one value in 1..100 range
|
||||||
|
insert into t2 select A.a + B.a*10 from t1 A, t1 B;
|
||||||
|
# ten values in 100...200 range
|
||||||
|
insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
|
||||||
|
set histogram_type='SINGLE_PREC_HB';
|
||||||
|
set histogram_size=100;
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
analyze table t2 persistent for all;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t2 analyze status OK
|
||||||
|
# The following two must have the same in 'Extra' column:
|
||||||
|
explain extended select * from t2 where col1 IN (20, 180);
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
|
||||||
|
explain extended select * from t2 where col1 IN (180, 20);
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
|
||||||
|
drop table t1, t2;
|
||||||
|
#
|
||||||
|
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
|
||||||
|
#
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 select A.a from t0 A, t0 B, t0 C;
|
||||||
|
set histogram_size=20;
|
||||||
|
set histogram_type='single_prec_hb';
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 analyze status OK
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
# Should select about 10%:
|
||||||
|
explain extended select * from t1 where a=2;
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
|
||||||
|
# Should select about 10%:
|
||||||
|
explain extended select * from t1 where a=1;
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
|
||||||
|
# Must not have filtered=100%:
|
||||||
|
explain extended select * from t1 where a=0;
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
|
||||||
|
# Again, must not have filtered=100%:
|
||||||
|
explain extended select * from t1 where a=-1;
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
|
||||||
|
drop table t0, t1;
|
||||||
|
#
|
||||||
|
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
|
||||||
|
#
|
||||||
|
create table t1 (col1 int);
|
||||||
|
set @a=-1;
|
||||||
|
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
|
||||||
|
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
|
||||||
|
select min(col1), max(col1), count(*) from t1;
|
||||||
|
min(col1) max(col1) count(*)
|
||||||
|
0 99 10000
|
||||||
|
set histogram_size=100;
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 analyze status OK
|
||||||
|
explain extended select * from t1 where col1 in (1,2,3);
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
|
||||||
|
# Must not cause fp division by zero, or produce nonsense numbers:
|
||||||
|
explain extended select * from t1 where col1 in (-1,-2,-3);
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
|
||||||
|
explain extended select * from t1 where col1<=-1;
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
|
||||||
|
drop table t1, t2;
|
||||||
|
#
|
||||||
|
# End of the test file
|
||||||
|
#
|
||||||
|
set use_stat_tables= @save_use_stat_tables;
|
||||||
|
set histogram_type=@save_histogram_type;
|
||||||
|
set histogram_size=@save_histogram_size;
|
||||||
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
@ -862,68 +862,6 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit
|
|||||||
|
|
||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
|
|
||||||
--echo #
|
|
||||||
--echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
|
|
||||||
--echo #
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
|
|
||||||
create table t2 (col1 int);
|
|
||||||
--echo # one value in 1..100 range
|
|
||||||
insert into t2 select A.a + B.a*10 from t1 A, t1 B;
|
|
||||||
--echo # ten values in 100...200 range
|
|
||||||
insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
|
|
||||||
|
|
||||||
set histogram_type='SINGLE_PREC_HB';
|
|
||||||
set histogram_size=100;
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
analyze table t2 persistent for all;
|
|
||||||
|
|
||||||
--echo # The following two must have the same in 'Extra' column:
|
|
||||||
explain extended select * from t2 where col1 IN (20, 180);
|
|
||||||
explain extended select * from t2 where col1 IN (180, 20);
|
|
||||||
|
|
||||||
drop table t1, t2;
|
|
||||||
|
|
||||||
--echo #
|
|
||||||
--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
|
|
||||||
--echo #
|
|
||||||
create table t0(a int);
|
|
||||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
||||||
create table t1(a int);
|
|
||||||
insert into t1 select A.a from t0 A, t0 B, t0 C;
|
|
||||||
set histogram_size=20;
|
|
||||||
set histogram_type='single_prec_hb';
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
set use_stat_tables='preferably';
|
|
||||||
set optimizer_use_condition_selectivity=4;
|
|
||||||
--echo # Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=2;
|
|
||||||
--echo # Should select about 10%:
|
|
||||||
explain extended select * from t1 where a=1;
|
|
||||||
--echo # Must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=0;
|
|
||||||
--echo # Again, must not have filtered=100%:
|
|
||||||
explain extended select * from t1 where a=-1;
|
|
||||||
|
|
||||||
drop table t0, t1;
|
|
||||||
|
|
||||||
--echo #
|
|
||||||
--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
|
|
||||||
--echo #
|
|
||||||
create table t1 (col1 int);
|
|
||||||
set @a=-1;
|
|
||||||
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
|
|
||||||
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
|
|
||||||
select min(col1), max(col1), count(*) from t1;
|
|
||||||
set histogram_size=100;
|
|
||||||
analyze table t1 persistent for all;
|
|
||||||
explain extended select * from t1 where col1 in (1,2,3);
|
|
||||||
--echo # Must not cause fp division by zero, or produce nonsense numbers:
|
|
||||||
explain extended select * from t1 where col1 in (-1,-2,-3);
|
|
||||||
explain extended select * from t1 where col1<=-1;
|
|
||||||
drop table t1, t2;
|
|
||||||
|
|
||||||
set histogram_type=@save_histogram_type;
|
set histogram_type=@save_histogram_type;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
|
96
mysql-test/t/selectivity_no_engine.test
Normal file
96
mysql-test/t/selectivity_no_engine.test
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
--source include/have_stat_tables.inc
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Engine-agnostic tests for statistics-based selectivity calculations.
|
||||||
|
--echo # - selectivity tests that depend on the engine should go into
|
||||||
|
--echo # t/selectivity.test. That test is run with myisam/innodb/xtradb.
|
||||||
|
--echo # - this file is for tests that don't depend on the engine.
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
drop table if exists t0,t1,t2,t3;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
select @@global.use_stat_tables;
|
||||||
|
select @@session.use_stat_tables;
|
||||||
|
|
||||||
|
set @save_use_stat_tables=@@use_stat_tables;
|
||||||
|
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
|
||||||
|
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
|
||||||
|
set @save_histogram_size=@@histogram_size;
|
||||||
|
set @save_histogram_type=@@histogram_type;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
|
||||||
|
--echo #
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
|
||||||
|
create table t2 (col1 int);
|
||||||
|
--echo # one value in 1..100 range
|
||||||
|
insert into t2 select A.a + B.a*10 from t1 A, t1 B;
|
||||||
|
--echo # ten values in 100...200 range
|
||||||
|
insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
|
||||||
|
|
||||||
|
set histogram_type='SINGLE_PREC_HB';
|
||||||
|
set histogram_size=100;
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
analyze table t2 persistent for all;
|
||||||
|
|
||||||
|
--echo # The following two must have the same in 'Extra' column:
|
||||||
|
explain extended select * from t2 where col1 IN (20, 180);
|
||||||
|
explain extended select * from t2 where col1 IN (180, 20);
|
||||||
|
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
|
||||||
|
--echo #
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 select A.a from t0 A, t0 B, t0 C;
|
||||||
|
set histogram_size=20;
|
||||||
|
set histogram_type='single_prec_hb';
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
--echo # Should select about 10%:
|
||||||
|
explain extended select * from t1 where a=2;
|
||||||
|
--echo # Should select about 10%:
|
||||||
|
explain extended select * from t1 where a=1;
|
||||||
|
--echo # Must not have filtered=100%:
|
||||||
|
explain extended select * from t1 where a=0;
|
||||||
|
--echo # Again, must not have filtered=100%:
|
||||||
|
explain extended select * from t1 where a=-1;
|
||||||
|
|
||||||
|
drop table t0, t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
|
||||||
|
--echo #
|
||||||
|
create table t1 (col1 int);
|
||||||
|
set @a=-1;
|
||||||
|
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
|
||||||
|
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
|
||||||
|
select min(col1), max(col1), count(*) from t1;
|
||||||
|
set histogram_size=100;
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
explain extended select * from t1 where col1 in (1,2,3);
|
||||||
|
--echo # Must not cause fp division by zero, or produce nonsense numbers:
|
||||||
|
explain extended select * from t1 where col1 in (-1,-2,-3);
|
||||||
|
explain extended select * from t1 where col1<=-1;
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of the test file
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
set use_stat_tables= @save_use_stat_tables;
|
||||||
|
set histogram_type=@save_histogram_type;
|
||||||
|
set histogram_size=@save_histogram_size;
|
||||||
|
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
|
||||||
|
|
@ -3565,3 +3565,120 @@ double get_column_range_cardinality(Field *field,
|
|||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Estimate selectivity of "col=const" using a histogram
|
||||||
|
|
||||||
|
@param pos Position of the "const" between column's min_value and
|
||||||
|
max_value. This is a number in [0..1] range.
|
||||||
|
@param avg_sel Average selectivity of condition "col=const" in this table.
|
||||||
|
It is calcuated as (#non_null_values / #distinct_values).
|
||||||
|
|
||||||
|
@return
|
||||||
|
Expected condition selectivity (a number between 0 and 1)
|
||||||
|
|
||||||
|
@notes
|
||||||
|
[re_zero_length_buckets] If a bucket with zero value-length is in the
|
||||||
|
middle of the histogram, we will not have min==max. Example: suppose,
|
||||||
|
pos_value=0x12, and the histogram is:
|
||||||
|
|
||||||
|
#n #n+1 #n+2
|
||||||
|
... 0x10 0x12 0x12 0x14 ...
|
||||||
|
|
|
||||||
|
+------------- bucket with zero value-length
|
||||||
|
|
||||||
|
Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
|
||||||
|
|
||||||
|
The problem happens at the histogram ends. if pos_value=0, and the
|
||||||
|
histogram is:
|
||||||
|
|
||||||
|
0x00 0x10 ...
|
||||||
|
|
||||||
|
then min=0, max=0. This means pos_value is contained within bucket #0,
|
||||||
|
but on the other hand, histogram data says that the bucket has only one
|
||||||
|
value.
|
||||||
|
*/
|
||||||
|
|
||||||
|
double Histogram::point_selectivity(double pos, double avg_sel)
|
||||||
|
{
|
||||||
|
double sel;
|
||||||
|
/* Find the bucket that contains the value 'pos'. */
|
||||||
|
uint min= find_bucket(pos, TRUE);
|
||||||
|
uint pos_value= (uint) (pos * prec_factor());
|
||||||
|
|
||||||
|
/* Find how many buckets this value occupies */
|
||||||
|
uint max= min;
|
||||||
|
while (max + 1 < get_width() && get_value(max + 1) == pos_value)
|
||||||
|
max++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
A special case: we're looking at a single bucket, and that bucket has
|
||||||
|
zero value-length. Use the multi-bucket formula (attempt to use
|
||||||
|
single-bucket formula will cause divison by zero).
|
||||||
|
|
||||||
|
For more details see [re_zero_length_buckets] above.
|
||||||
|
*/
|
||||||
|
if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
|
||||||
|
max++;
|
||||||
|
|
||||||
|
if (max > min)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
The value occupies multiple buckets. Use start_bucket ... end_bucket as
|
||||||
|
selectivity.
|
||||||
|
*/
|
||||||
|
double bucket_sel= 1.0/(get_width() + 1);
|
||||||
|
sel= bucket_sel * (max - min + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
The value 'pos' fits within one single histogram bucket.
|
||||||
|
|
||||||
|
Histogram buckets have the same numbers of rows, but they cover
|
||||||
|
different ranges of values.
|
||||||
|
|
||||||
|
We assume that values are uniformly distributed across the [0..1] value
|
||||||
|
range.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
If all buckets covered value ranges of the same size, the width of
|
||||||
|
value range would be:
|
||||||
|
*/
|
||||||
|
double avg_bucket_width= 1.0 / (get_width() + 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Let's see what is the width of value range that our bucket is covering.
|
||||||
|
(min==max currently. they are kept in the formula just in case we
|
||||||
|
will want to extend it to handle multi-bucket case)
|
||||||
|
*/
|
||||||
|
double inv_prec_factor= (double) 1.0 / prec_factor();
|
||||||
|
double current_bucket_width=
|
||||||
|
(max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
|
||||||
|
(min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
|
||||||
|
|
||||||
|
DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
|
||||||
|
|
||||||
|
/*
|
||||||
|
So:
|
||||||
|
- each bucket has the same #rows
|
||||||
|
- values are unformly distributed across the [min_value,max_value] domain.
|
||||||
|
|
||||||
|
If a bucket has value range that's N times bigger then average, than
|
||||||
|
each value will have to have N times fewer rows than average.
|
||||||
|
*/
|
||||||
|
sel= avg_sel * avg_bucket_width / current_bucket_width;
|
||||||
|
|
||||||
|
/*
|
||||||
|
(Q: if we just follow this proportion we may end up in a situation
|
||||||
|
where number of different values we expect to find in this bucket
|
||||||
|
exceeds the number of rows that this histogram has in a bucket. Are
|
||||||
|
we ok with this or we would want to have certain caps?)
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
return sel;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -241,120 +241,10 @@ public:
|
|||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Estimate selectivity of "col=const" using a histogram
|
Estimate selectivity of "col=const" using a histogram
|
||||||
|
|
||||||
@param pos Position of the "const" between column's min_value and
|
|
||||||
max_value. This is a number in [0..1] range.
|
|
||||||
@param avg_sel Average selectivity of condition "col=const" in this table.
|
|
||||||
It is calcuated as (#non_null_values / #distinct_values).
|
|
||||||
|
|
||||||
@return
|
|
||||||
Expected condition selectivity (a number between 0 and 1)
|
|
||||||
|
|
||||||
@notes
|
|
||||||
[re_zero_length_buckets] If a bucket with zero value-length is in the
|
|
||||||
middle of the histogram, we will not have min==max. Example: suppose,
|
|
||||||
pos_value=0x12, and the histogram is:
|
|
||||||
|
|
||||||
#n #n+1 #n+2
|
|
||||||
... 0x10 0x12 0x12 0x14 ...
|
|
||||||
|
|
|
||||||
+------------- bucket with zero value-length
|
|
||||||
|
|
||||||
Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
|
|
||||||
|
|
||||||
The problem happens at the histogram ends. if pos_value=0, and the
|
|
||||||
histogram is:
|
|
||||||
|
|
||||||
0x00 0x10 ...
|
|
||||||
|
|
||||||
then min=0, max=0. This means pos_value is contained within bucket #0,
|
|
||||||
but on the other hand, histogram data says that the bucket has only one
|
|
||||||
value.
|
|
||||||
*/
|
*/
|
||||||
|
double point_selectivity(double pos, double avg_sel);
|
||||||
double point_selectivity(double pos, double avg_sel)
|
|
||||||
{
|
|
||||||
double sel;
|
|
||||||
/* Find the bucket that contains the value 'pos'. */
|
|
||||||
uint min= find_bucket(pos, TRUE);
|
|
||||||
uint pos_value= (uint) (pos * prec_factor());
|
|
||||||
|
|
||||||
/* Find how many buckets this value occupies */
|
|
||||||
uint max= min;
|
|
||||||
while (max + 1 < get_width() && get_value(max + 1) == pos_value)
|
|
||||||
max++;
|
|
||||||
|
|
||||||
/*
|
|
||||||
A special case: we're looking at a single bucket, and that bucket has
|
|
||||||
zero value-length. Use the multi-bucket formula (attempt to use
|
|
||||||
single-bucket formula will cause divison by zero).
|
|
||||||
|
|
||||||
For more details see [re_zero_length_buckets] above.
|
|
||||||
*/
|
|
||||||
if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
|
|
||||||
max++;
|
|
||||||
|
|
||||||
if (max > min)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
The value occupies multiple buckets. Use start_bucket ... end_bucket as
|
|
||||||
selectivity.
|
|
||||||
*/
|
|
||||||
double bucket_sel= 1.0/(get_width() + 1);
|
|
||||||
sel= bucket_sel * (max - min + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
The value 'pos' fits within one single histogram bucket.
|
|
||||||
|
|
||||||
Histogram buckets have the same numbers of rows, but they cover
|
|
||||||
different ranges of values.
|
|
||||||
|
|
||||||
We assume that values are uniformly distributed across the [0..1] value
|
|
||||||
range.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
If all buckets covered value ranges of the same size, the width of
|
|
||||||
value range would be:
|
|
||||||
*/
|
|
||||||
double avg_bucket_width= 1.0 / (get_width() + 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
Let's see what is the width of value range that our bucket is covering.
|
|
||||||
(min==max currently. they are kept in the formula just in case we
|
|
||||||
will want to extend it to handle multi-bucket case)
|
|
||||||
*/
|
|
||||||
double inv_prec_factor= (double) 1.0 / prec_factor();
|
|
||||||
double current_bucket_width=
|
|
||||||
(max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
|
|
||||||
(min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
|
|
||||||
|
|
||||||
DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
|
|
||||||
|
|
||||||
/*
|
|
||||||
So:
|
|
||||||
- each bucket has the same #rows
|
|
||||||
- values are unformly distributed across the [min_value,max_value] domain.
|
|
||||||
|
|
||||||
If a bucket has value range that's N times bigger then average, than
|
|
||||||
each value will have to have N times fewer rows than average.
|
|
||||||
*/
|
|
||||||
sel= avg_sel * avg_bucket_width / current_bucket_width;
|
|
||||||
|
|
||||||
/*
|
|
||||||
(Q: if we just follow this proportion we may end up in a situation
|
|
||||||
where number of different values we expect to find in this bucket
|
|
||||||
exceeds the number of rows that this histogram has in a bucket. Are
|
|
||||||
we ok with this or we would want to have certain caps?)
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
return sel;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user