MDEV-27229: Estimation for filtered rows less precise ... #5
Followup: remove this line from get_column_range_cardinality() set_if_bigger(res, col_stats->get_avg_frequency()); and make sure it is only used with the binary histograms. For JSON histograms, it makes the estimates unnecessarily imprecise.
This commit is contained in:
parent
d3e511d421
commit
db8f15be93
@ -4644,7 +4644,7 @@ Warnings:
|
|||||||
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
|
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
|
||||||
analyze select * from t1_json where a > 'zzzzzzzzz';
|
analyze select * from t1_json where a > 'zzzzzzzzz';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
|
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 0.00 0.00 Using where
|
||||||
drop table ten;
|
drop table ten;
|
||||||
UPDATE mysql.column_stats
|
UPDATE mysql.column_stats
|
||||||
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
|
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
|
||||||
@ -4752,7 +4752,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
|
|||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where
|
||||||
explain extended select * from t2 where city < 'Lagos';
|
explain extended select * from t2 where city < 'Lagos';
|
||||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
|
||||||
Warnings:
|
Warnings:
|
||||||
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos'
|
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos'
|
||||||
drop table t1_bin;
|
drop table t1_bin;
|
||||||
@ -8164,7 +8164,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
|
|||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
|
||||||
analyze select * from t1 where a < 0;
|
analyze select * from t1 where a < 0;
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.00 0.00 Using where
|
||||||
analyze select * from t1 where a > 0;
|
analyze select * from t1 where a > 0;
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
|
||||||
@ -8189,7 +8189,7 @@ test.t2 analyze status Engine-independent statistics collected
|
|||||||
test.t2 analyze status OK
|
test.t2 analyze status OK
|
||||||
analyze select * from t2 where a < 1;
|
analyze select * from t2 where a < 1;
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 8.33 0.10 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
|
||||||
analyze select * from t2 where a =100;
|
analyze select * from t2 where a =100;
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
|
||||||
@ -8237,6 +8237,15 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
|
|||||||
analyze select COUNT(*) FROM t1 WHERE a >='bar';
|
analyze select COUNT(*) FROM t1 WHERE a >='bar';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where
|
||||||
|
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
|
||||||
|
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
|
||||||
|
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
|
||||||
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
|
||||||
analyze select COUNT(*) FROM t1 WHERE a <='bar';
|
analyze select COUNT(*) FROM t1 WHERE a <='bar';
|
||||||
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
|
||||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where
|
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where
|
||||||
|
@ -409,9 +409,9 @@ analyze select COUNT(*) FROM t1 WHERE a > 'bar';
|
|||||||
analyze select COUNT(*) FROM t1 WHERE a >='bar';
|
analyze select COUNT(*) FROM t1 WHERE a >='bar';
|
||||||
|
|
||||||
# Can enable these after get_avg_frequency issue is resolved:
|
# Can enable these after get_avg_frequency issue is resolved:
|
||||||
# analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
|
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
|
||||||
# analyze select COUNT(*) FROM t1 WHERE a <='aaa';
|
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
|
||||||
# analyze select COUNT(*) FROM t1 WHERE a < 'bar';
|
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
|
||||||
|
|
||||||
analyze select COUNT(*) FROM t1 WHERE a <='bar';
|
analyze select COUNT(*) FROM t1 WHERE a <='bar';
|
||||||
|
|
||||||
|
@ -962,7 +962,7 @@ std::string& Histogram_json_hb::get_end_value(int idx)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp)
|
key_range *max_endp, double avg_sel)
|
||||||
{
|
{
|
||||||
double min, max;
|
double min, max;
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ public:
|
|||||||
double avg_selection,
|
double avg_selection,
|
||||||
double total_rows) override;
|
double total_rows) override;
|
||||||
double range_selectivity(Field *field, key_range *min_endp,
|
double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp) override;
|
key_range *max_endp, double avg_sel) override;
|
||||||
|
|
||||||
void set_json_text(ulonglong sz, const char *json_text_arg,
|
void set_json_text(ulonglong sz, const char *json_text_arg,
|
||||||
size_t json_text_len)
|
size_t json_text_len)
|
||||||
|
@ -3278,6 +3278,9 @@ double records_in_column_ranges(PARAM *param, uint idx,
|
|||||||
}
|
}
|
||||||
total_rows += rows;
|
total_rows += rows;
|
||||||
}
|
}
|
||||||
|
if (total_rows == 0)
|
||||||
|
total_rows= MY_MIN(1, param->table->stat_records());
|
||||||
|
|
||||||
return total_rows;
|
return total_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3910,10 +3910,13 @@ double get_column_range_cardinality(Field *field,
|
|||||||
if (col_stats->min_max_values_are_provided())
|
if (col_stats->min_max_values_are_provided())
|
||||||
{
|
{
|
||||||
Histogram_base *hist= col_stats->histogram;
|
Histogram_base *hist= col_stats->histogram;
|
||||||
|
double avg_frequency= col_stats->get_avg_frequency();
|
||||||
double sel;
|
double sel;
|
||||||
if (hist && hist->is_usable(thd))
|
if (hist && hist->is_usable(thd))
|
||||||
{
|
{
|
||||||
sel= hist->range_selectivity(field, min_endp, max_endp);
|
sel= hist->range_selectivity(field, min_endp, max_endp,
|
||||||
|
avg_frequency / col_non_nulls);
|
||||||
|
res= col_non_nulls * sel;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -3938,9 +3941,9 @@ double get_column_range_cardinality(Field *field,
|
|||||||
max_mp_pos= 1.0;
|
max_mp_pos= 1.0;
|
||||||
|
|
||||||
sel = (max_mp_pos - min_mp_pos);
|
sel = (max_mp_pos - min_mp_pos);
|
||||||
}
|
|
||||||
res= col_non_nulls * sel;
|
res= col_non_nulls * sel;
|
||||||
set_if_bigger(res, col_stats->get_avg_frequency());
|
set_if_bigger(res, avg_frequency);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
res= col_non_nulls;
|
res= col_non_nulls;
|
||||||
@ -4076,7 +4079,8 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
|
|||||||
|
|
||||||
double Histogram_binary::range_selectivity(Field *field,
|
double Histogram_binary::range_selectivity(Field *field,
|
||||||
key_range *min_endp,
|
key_range *min_endp,
|
||||||
key_range *max_endp)
|
key_range *max_endp,
|
||||||
|
double avg_sel)
|
||||||
{
|
{
|
||||||
double sel, min_mp_pos, max_mp_pos;
|
double sel, min_mp_pos, max_mp_pos;
|
||||||
Column_statistics *col_stats= field->read_stats;
|
Column_statistics *col_stats= field->read_stats;
|
||||||
@ -4105,6 +4109,7 @@ double Histogram_binary::range_selectivity(Field *field,
|
|||||||
uint max= find_bucket(max_mp_pos, FALSE);
|
uint max= find_bucket(max_mp_pos, FALSE);
|
||||||
sel= bucket_sel * (max - min + 1);
|
sel= bucket_sel * (max - min + 1);
|
||||||
|
|
||||||
|
set_if_bigger(sel, avg_sel);
|
||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +191,7 @@ public:
|
|||||||
double avg_selectivity,
|
double avg_selectivity,
|
||||||
double total_rows)=0;
|
double total_rows)=0;
|
||||||
virtual double range_selectivity(Field *field, key_range *min_endp,
|
virtual double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp)=0;
|
key_range *max_endp, double avg_sel)=0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Legacy: return the size of the histogram on disk.
|
Legacy: return the size of the histogram on disk.
|
||||||
@ -353,7 +353,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
double range_selectivity(Field *field, key_range *min_endp,
|
double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp) override;
|
key_range *max_endp, double avg_sel) override;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Estimate selectivity of "col=const" using a histogram
|
Estimate selectivity of "col=const" using a histogram
|
||||||
|
Loading…
x
Reference in New Issue
Block a user