MDEV-26849: JSON Histograms: point selectivity estimates are off
.. for non-existent values. Handle this special case.
This commit is contained in:
parent
f3f78bed85
commit
05877df472
@ -1885,6 +1885,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 0.15705,0.15711,0.21463,0.15705,0.15711,0.15
|
||||
explain select * from t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
||||
drop table t0;
|
||||
drop table t1;
|
||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||
set histogram_size=@save_histogram_size;
|
||||
|
@ -1019,7 +1019,7 @@ select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, a
|
||||
from mysql.column_stats;
|
||||
explain select * from t1;
|
||||
|
||||
|
||||
drop table t0;
|
||||
drop table t1;
|
||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||
set histogram_size=@save_histogram_size;
|
||||
|
@ -4148,6 +4148,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 {
|
||||
explain select * from t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
||||
drop table t0;
|
||||
drop table t1;
|
||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||
set histogram_size=@save_histogram_size;
|
||||
@ -7530,3 +7531,104 @@ select c from t1 where c > '1';
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
|
||||
drop table t1;
|
||||
#
|
||||
# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
|
||||
#
|
||||
#
|
||||
create table t0(a int);
|
||||
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t1(a int);
|
||||
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
|
||||
select a, count(*) from t1 group by a order by a;
|
||||
a count(*)
|
||||
0 100
|
||||
100 100
|
||||
200 100
|
||||
300 100
|
||||
400 100
|
||||
500 100
|
||||
600 100
|
||||
700 100
|
||||
800 100
|
||||
900 100
|
||||
set histogram_type=json_hb, histogram_size=default;
|
||||
analyze table t1 persistent for all;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status OK
|
||||
select * from mysql.column_stats where table_name='t1';
|
||||
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
|
||||
test t1 a 0 900 0.0000 4.0000 100.0000 10 JSON_HB {
|
||||
"histogram_hb_v2": [
|
||||
{
|
||||
"start": "0",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "100",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "200",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "300",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "400",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "500",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "600",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "700",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "800",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
},
|
||||
{
|
||||
"start": "900",
|
||||
"end": "900",
|
||||
"size": 0.1,
|
||||
"ndv": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
analyze select * from t1 where a=0;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||
analyze select * from t1 where a=50;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||
analyze select * from t1 where a=70;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||
analyze select * from t1 where a=100;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||
analyze select * from t1 where a=150;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||
analyze select * from t1 where a=200;
|
||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||
drop table t0,t1;
|
||||
|
@ -295,3 +295,25 @@ select c from t1 where c > '1';
|
||||
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
|
||||
--echo #
|
||||
--echo #
|
||||
|
||||
create table t0(a int);
|
||||
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t1(a int);
|
||||
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
|
||||
select a, count(*) from t1 group by a order by a;
|
||||
set histogram_type=json_hb, histogram_size=default;
|
||||
analyze table t1 persistent for all;
|
||||
select * from mysql.column_stats where table_name='t1';
|
||||
analyze select * from t1 where a=0;
|
||||
analyze select * from t1 where a=50;
|
||||
analyze select * from t1 where a=70;
|
||||
analyze select * from t1 where a=100;
|
||||
analyze select * from t1 where a=150;
|
||||
analyze select * from t1 where a=200;
|
||||
|
||||
drop table t0,t1;
|
||||
|
||||
|
@ -616,7 +616,7 @@ double position_in_interval(Field *field, const uchar *key, uint key_len,
|
||||
|
||||
|
||||
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
||||
double avg_sel)
|
||||
double avg_sel, double total_rows)
|
||||
{
|
||||
const uchar *key = endpoint->key;
|
||||
if (field->real_maybe_null())
|
||||
@ -631,9 +631,11 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
||||
|
||||
if (buckets[idx].ndv == 1 && !equal)
|
||||
{
|
||||
// The bucket has a single value and it doesn't match! Use the global
|
||||
// average.
|
||||
sel= avg_sel;
|
||||
/*
|
||||
The bucket has a single value and it doesn't match! Return a very
|
||||
small value.
|
||||
*/
|
||||
sel= 1.0 / total_rows;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -109,7 +109,8 @@ public:
|
||||
ulonglong size) override;
|
||||
|
||||
double point_selectivity(Field *field, key_range *endpoint,
|
||||
double avg_selection) override;
|
||||
double avg_selection,
|
||||
double total_rows) override;
|
||||
double range_selectivity(Field *field, key_range *min_endp,
|
||||
key_range *max_endp) override;
|
||||
|
||||
|
@ -3882,7 +3882,8 @@ double get_column_range_cardinality(Field *field,
|
||||
{
|
||||
res= col_non_nulls *
|
||||
hist->point_selectivity(field, min_endp,
|
||||
avg_frequency / col_non_nulls);
|
||||
avg_frequency / col_non_nulls,
|
||||
tab_records);
|
||||
}
|
||||
}
|
||||
else if (avg_frequency == 0.0)
|
||||
@ -3973,7 +3974,8 @@ double get_column_range_cardinality(Field *field,
|
||||
*/
|
||||
|
||||
double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
|
||||
double avg_sel)
|
||||
double avg_sel,
|
||||
double total_records)
|
||||
{
|
||||
double sel;
|
||||
Column_statistics *col_stats= field->read_stats;
|
||||
|
@ -186,7 +186,8 @@ public:
|
||||
|
||||
|
||||
virtual double point_selectivity(Field *field, key_range *endpoint,
|
||||
double avg_selection)=0;
|
||||
double avg_selectivity,
|
||||
double total_rows)=0;
|
||||
virtual double range_selectivity(Field *field, key_range *min_endp,
|
||||
key_range *max_endp)=0;
|
||||
|
||||
@ -355,7 +356,8 @@ public:
|
||||
Estimate selectivity of "col=const" using a histogram
|
||||
*/
|
||||
double point_selectivity(Field *field, key_range *endpoint,
|
||||
double avg_sel) override;
|
||||
double avg_sel,
|
||||
double total_rows) override;
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user