MDEV-35958 Cost estimates for materialized derived tables are poor
Backport of commit 74f70c394456767c1c1b0bee6bf34faff5b72dce to 10.11. The new logic is disabled by default, to enable, use optimizer_adjust_secondary_key_costs=fix_derived_table_read_cost. == Original commit comment == Fixed costs in JOIN_TAB::estimate_scan_time() and HEAP Estimate_scan_time() calculates the cost of scanning a derivied table. The old code did not take into account that the temporary table heap table may be converted to Aria. Things fixed: - Added checking if the temporary tables data will fit in the heap. If not, then calculate the cost based on the designated internal temporary table engine (Aria). - Removed MY_MAX(records, 1000) and instead trust the optimizer's estimate of records. This reduces the cost of temporary tables a bit for small tables, which caused a few changes in mtr results. - Fixed cost calculation for HEAP. - HEAP costs->row_next_find_cost was not set. This does not affect old costs calculation as this cost slot was not used anywhere. Now HEAP cost->row_next_find_cost is set, which allowed me to remove some duplicated computation in ha_heap::scan_time()
This commit is contained in:
parent
c9fe55ff7a
commit
43c5d1303f
@ -748,7 +748,8 @@ The following specify which files/extra groups are read (specified before remain
|
||||
keys. fix_reuse_range_for_ref = Do a better job at
|
||||
reusing range access estimates when estimating ref
|
||||
access. fix_card_multiplier = Fix the computation in
|
||||
selectivity_for_indexes. selectivity_multiplier. This
|
||||
selectivity_for_indexes. fix_derived_table_read_cost =
|
||||
Fix the cost of reading materialized derived table. This
|
||||
variable will be deleted in MariaDB 11.0 as it is not
|
||||
needed with the new 11.0 optimizer.
|
||||
Use 'ALL' to set all combinations.
|
||||
|
@ -177,4 +177,80 @@ disconnect user2;
|
||||
drop table t1,t2;
|
||||
set global userstat=@save_userstat;
|
||||
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
||||
#
|
||||
# MDEV-35958: Cost estimates for materialized derived tables are poor
|
||||
#
|
||||
set optimizer_trace=1;
|
||||
create table t1 (
|
||||
a int
|
||||
);
|
||||
insert into t1 select seq from seq_1_to_10000;
|
||||
explain
|
||||
select *
|
||||
from
|
||||
t1 as t1_base,
|
||||
(select a from t1 limit 10000) as TBL;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
|
||||
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
|
||||
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
|
||||
set @trace=(select trace from information_schema.optimizer_trace);
|
||||
# BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
|
||||
select json_detailed(
|
||||
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||
) as Trace;
|
||||
Trace
|
||||
[
|
||||
{
|
||||
"table": "t1_base",
|
||||
"table_scan":
|
||||
{
|
||||
"rows": 10000,
|
||||
"cost": 19.08984375
|
||||
}
|
||||
},
|
||||
{
|
||||
"table": "<derived2>",
|
||||
"table_scan":
|
||||
{
|
||||
"rows": 10000,
|
||||
"cost": 10000
|
||||
}
|
||||
}
|
||||
]
|
||||
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
|
||||
explain
|
||||
select *
|
||||
from
|
||||
t1 as t1_base,
|
||||
(select a from t1 limit 10000) as TBL;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
|
||||
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
|
||||
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
|
||||
set @trace=(select trace from information_schema.optimizer_trace);
|
||||
# AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
|
||||
select json_detailed(
|
||||
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||
) as Trace;
|
||||
Trace
|
||||
[
|
||||
{
|
||||
"table": "t1_base",
|
||||
"table_scan":
|
||||
{
|
||||
"rows": 10000,
|
||||
"cost": 19.08984375
|
||||
}
|
||||
},
|
||||
{
|
||||
"table": "<derived2>",
|
||||
"table_scan":
|
||||
{
|
||||
"rows": 10000,
|
||||
"cost": 501
|
||||
}
|
||||
}
|
||||
]
|
||||
drop table t1;
|
||||
set @@optimizer_adjust_secondary_key_costs=default;
|
||||
|
@ -109,4 +109,41 @@ drop table t1,t2;
|
||||
set global userstat=@save_userstat;
|
||||
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-35958: Cost estimates for materialized derived tables are poor
|
||||
--echo #
|
||||
set optimizer_trace=1;
|
||||
create table t1 (
|
||||
a int
|
||||
);
|
||||
insert into t1 select seq from seq_1_to_10000;
|
||||
|
||||
explain
|
||||
select *
|
||||
from
|
||||
t1 as t1_base,
|
||||
(select a from t1 limit 10000) as TBL;
|
||||
|
||||
set @trace=(select trace from information_schema.optimizer_trace);
|
||||
--echo # BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
|
||||
select json_detailed(
|
||||
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||
) as Trace;
|
||||
|
||||
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
|
||||
|
||||
explain
|
||||
select *
|
||||
from
|
||||
t1 as t1_base,
|
||||
(select a from t1 limit 10000) as TBL;
|
||||
|
||||
set @trace=(select trace from information_schema.optimizer_trace);
|
||||
--echo # AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
|
||||
select json_detailed(
|
||||
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||
) as Trace;
|
||||
|
||||
drop table t1;
|
||||
|
||||
set @@optimizer_adjust_secondary_key_costs=default;
|
||||
|
@ -2325,11 +2325,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE SET
|
||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||
NUMERIC_MIN_VALUE NULL
|
||||
NUMERIC_MAX_VALUE NULL
|
||||
NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
|
||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
||||
|
@ -2495,11 +2495,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE SET
|
||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||
NUMERIC_MIN_VALUE NULL
|
||||
NUMERIC_MAX_VALUE NULL
|
||||
NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
|
||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
||||
|
@ -281,6 +281,7 @@
|
||||
#define OPTIMIZER_FIX_INNODB_CARDINALITY (8)
|
||||
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16)
|
||||
#define OPTIMIZER_ADJ_FIX_CARD_MULT (32)
|
||||
#define OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST (64)
|
||||
|
||||
#define OPTIMIZER_ADJ_DEFAULT (OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF | \
|
||||
OPTIMIZER_ADJ_FIX_CARD_MULT)
|
||||
|
@ -6001,7 +6001,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
|
||||
s->table->opt_range_condition_rows=s->records;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update s->records and s->read_time */
|
||||
s->scan_time();
|
||||
}
|
||||
|
||||
if (s->table->is_splittable())
|
||||
s->add_keyuses_for_splitting();
|
||||
@ -15468,6 +15471,7 @@ void JOIN_TAB::cleanup()
|
||||
double JOIN_TAB::scan_time()
|
||||
{
|
||||
double res;
|
||||
THD *thd= join->thd;
|
||||
if (table->is_created())
|
||||
{
|
||||
if (table->is_filled_at_execution())
|
||||
@ -15488,10 +15492,53 @@ double JOIN_TAB::scan_time()
|
||||
}
|
||||
res= read_time;
|
||||
}
|
||||
else if (!(thd->variables.optimizer_adjust_secondary_key_costs &
|
||||
OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST))
|
||||
{
|
||||
/*
|
||||
Old code, do not merge into 11.0+:
|
||||
*/
|
||||
found_records= records=table->stat_records();
|
||||
read_time= found_records ? (double)found_records: 10.0;
|
||||
res= read_time;
|
||||
}
|
||||
else
|
||||
{
|
||||
found_records= records=table->stat_records();
|
||||
read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
|
||||
bool using_heap= 0;
|
||||
TABLE_SHARE *share= table->s;
|
||||
found_records= records= table->stat_records();
|
||||
|
||||
if (share->db_type() == heap_hton)
|
||||
{
|
||||
/* Check that the rows will fit into the heap table */
|
||||
ha_rows max_rows;
|
||||
max_rows= (ha_rows) ((MY_MIN(thd->variables.tmp_memory_table_size,
|
||||
thd->variables.max_heap_table_size)) /
|
||||
MY_ALIGN(share->reclength, sizeof(char*)));
|
||||
if (records <= max_rows)
|
||||
{
|
||||
/* The rows will fit into the heap table */
|
||||
using_heap= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Code for the following is taken from the heap and aria storage engine.
|
||||
In 11.# this is done without explict engine code
|
||||
*/
|
||||
if (using_heap)
|
||||
read_time= (records / 20.0) + 1;
|
||||
else
|
||||
{
|
||||
handler *file= table->file;
|
||||
file->stats.data_file_length= share->reclength * records;
|
||||
/*
|
||||
Call the default scan_time() method as this is the cost for the
|
||||
scan when heap is converted to Aria
|
||||
*/
|
||||
read_time= file->handler::scan_time();
|
||||
file->stats.data_file_length= 0;
|
||||
}
|
||||
res= read_time;
|
||||
}
|
||||
return res;
|
||||
|
@ -2982,7 +2982,7 @@ static const char *adjust_secondary_key_cost[]=
|
||||
{
|
||||
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by",
|
||||
"fix_innodb_cardinality", "fix_reuse_range_for_ref",
|
||||
"fix_card_multiplier", 0
|
||||
"fix_card_multiplier", "fix_derived_table_read_cost", 0
|
||||
};
|
||||
|
||||
|
||||
@ -2999,8 +2999,9 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
|
||||
"secondary keys. "
|
||||
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
|
||||
"when estimating ref access. "
|
||||
"fix_card_multiplier = Fix the computation in selectivity_for_indexes."
|
||||
" selectivity_multiplier. "
|
||||
"fix_card_multiplier = Fix the computation in selectivity_for_indexes. "
|
||||
"fix_derived_table_read_cost = Fix the cost of reading materialized "
|
||||
"derived table. "
|
||||
|
||||
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
|
||||
"new 11.0 optimizer.",
|
||||
|
Loading…
x
Reference in New Issue
Block a user