MDEV-26849: JSON Histograms: point selectivity estimates are off
.. for non-existent values. Handle this special case.
This commit is contained in:
parent
f3f78bed85
commit
05877df472
@ -1885,6 +1885,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 0.15705,0.15711,0.21463,0.15705,0.15711,0.15
|
|||||||
explain select * from t1;
|
explain select * from t1;
|
||||||
id select_type table type possible_keys key key_len ref rows Extra
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
||||||
|
drop table t0;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
|
@ -1019,7 +1019,7 @@ select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, a
|
|||||||
from mysql.column_stats;
|
from mysql.column_stats;
|
||||||
explain select * from t1;
|
explain select * from t1;
|
||||||
|
|
||||||
|
drop table t0;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
|
@ -4148,6 +4148,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 {
|
|||||||
explain select * from t1;
|
explain select * from t1;
|
||||||
id select_type table type possible_keys key key_len ref rows Extra
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
||||||
|
drop table t0;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
set analyze_sample_percentage=@save_analyze_sample_percentage;
|
||||||
set histogram_size=@save_histogram_size;
|
set histogram_size=@save_histogram_size;
|
||||||
@ -7530,3 +7531,104 @@ select c from t1 where c > '1';
|
|||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
#
|
||||||
|
# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
|
||||||
|
#
|
||||||
|
#
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
|
||||||
|
select a, count(*) from t1 group by a order by a;
|
||||||
|
a count(*)
|
||||||
|
0 100
|
||||||
|
100 100
|
||||||
|
200 100
|
||||||
|
300 100
|
||||||
|
400 100
|
||||||
|
500 100
|
||||||
|
600 100
|
||||||
|
700 100
|
||||||
|
800 100
|
||||||
|
900 100
|
||||||
|
set histogram_type=json_hb, histogram_size=default;
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 analyze status Engine-independent statistics collected
|
||||||
|
test.t1 analyze status OK
|
||||||
|
select * from mysql.column_stats where table_name='t1';
|
||||||
|
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
|
||||||
|
test t1 a 0 900 0.0000 4.0000 100.0000 10 JSON_HB {
|
||||||
|
"histogram_hb_v2": [
|
||||||
|
{
|
||||||
|
"start": "0",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "100",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "200",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "300",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "400",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "500",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "600",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "700",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "800",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": "900",
|
||||||
|
"end": "900",
|
||||||
|
"size": 0.1,
|
||||||
|
"ndv": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
analyze select * from t1 where a=0;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||||
|
analyze select * from t1 where a=50;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||||
|
analyze select * from t1 where a=70;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||||
|
analyze select * from t1 where a=100;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||||
|
analyze select * from t1 where a=150;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
|
||||||
|
analyze select * from t1 where a=200;
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
|
||||||
|
drop table t0,t1;
|
||||||
|
@ -295,3 +295,25 @@ select c from t1 where c > '1';
|
|||||||
|
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
|
||||||
|
--echo #
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
|
||||||
|
select a, count(*) from t1 group by a order by a;
|
||||||
|
set histogram_type=json_hb, histogram_size=default;
|
||||||
|
analyze table t1 persistent for all;
|
||||||
|
select * from mysql.column_stats where table_name='t1';
|
||||||
|
analyze select * from t1 where a=0;
|
||||||
|
analyze select * from t1 where a=50;
|
||||||
|
analyze select * from t1 where a=70;
|
||||||
|
analyze select * from t1 where a=100;
|
||||||
|
analyze select * from t1 where a=150;
|
||||||
|
analyze select * from t1 where a=200;
|
||||||
|
|
||||||
|
drop table t0,t1;
|
||||||
|
|
||||||
|
@ -616,7 +616,7 @@ double position_in_interval(Field *field, const uchar *key, uint key_len,
|
|||||||
|
|
||||||
|
|
||||||
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
||||||
double avg_sel)
|
double avg_sel, double total_rows)
|
||||||
{
|
{
|
||||||
const uchar *key = endpoint->key;
|
const uchar *key = endpoint->key;
|
||||||
if (field->real_maybe_null())
|
if (field->real_maybe_null())
|
||||||
@ -631,9 +631,11 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
|||||||
|
|
||||||
if (buckets[idx].ndv == 1 && !equal)
|
if (buckets[idx].ndv == 1 && !equal)
|
||||||
{
|
{
|
||||||
// The bucket has a single value and it doesn't match! Use the global
|
/*
|
||||||
// average.
|
The bucket has a single value and it doesn't match! Return a very
|
||||||
sel= avg_sel;
|
small value.
|
||||||
|
*/
|
||||||
|
sel= 1.0 / total_rows;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -109,7 +109,8 @@ public:
|
|||||||
ulonglong size) override;
|
ulonglong size) override;
|
||||||
|
|
||||||
double point_selectivity(Field *field, key_range *endpoint,
|
double point_selectivity(Field *field, key_range *endpoint,
|
||||||
double avg_selection) override;
|
double avg_selection,
|
||||||
|
double total_rows) override;
|
||||||
double range_selectivity(Field *field, key_range *min_endp,
|
double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp) override;
|
key_range *max_endp) override;
|
||||||
|
|
||||||
|
@ -3882,7 +3882,8 @@ double get_column_range_cardinality(Field *field,
|
|||||||
{
|
{
|
||||||
res= col_non_nulls *
|
res= col_non_nulls *
|
||||||
hist->point_selectivity(field, min_endp,
|
hist->point_selectivity(field, min_endp,
|
||||||
avg_frequency / col_non_nulls);
|
avg_frequency / col_non_nulls,
|
||||||
|
tab_records);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (avg_frequency == 0.0)
|
else if (avg_frequency == 0.0)
|
||||||
@ -3973,7 +3974,8 @@ double get_column_range_cardinality(Field *field,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
|
double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
|
||||||
double avg_sel)
|
double avg_sel,
|
||||||
|
double total_records)
|
||||||
{
|
{
|
||||||
double sel;
|
double sel;
|
||||||
Column_statistics *col_stats= field->read_stats;
|
Column_statistics *col_stats= field->read_stats;
|
||||||
|
@ -186,7 +186,8 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
virtual double point_selectivity(Field *field, key_range *endpoint,
|
virtual double point_selectivity(Field *field, key_range *endpoint,
|
||||||
double avg_selection)=0;
|
double avg_selectivity,
|
||||||
|
double total_rows)=0;
|
||||||
virtual double range_selectivity(Field *field, key_range *min_endp,
|
virtual double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp)=0;
|
key_range *max_endp)=0;
|
||||||
|
|
||||||
@ -355,7 +356,8 @@ public:
|
|||||||
Estimate selectivity of "col=const" using a histogram
|
Estimate selectivity of "col=const" using a histogram
|
||||||
*/
|
*/
|
||||||
double point_selectivity(Field *field, key_range *endpoint,
|
double point_selectivity(Field *field, key_range *endpoint,
|
||||||
double avg_sel) override;
|
double avg_sel,
|
||||||
|
double total_rows) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user