MDEV-35958 Cost estimates for materialized derived tables are poor
Backport of commit 74f70c394456767c1c1b0bee6bf34faff5b72dce to 10.11. The new logic is disabled by default, to enable, use optimizer_adjust_secondary_key_costs=fix_derived_table_read_cost. == Original commit comment == Fixed costs in JOIN_TAB::estimate_scan_time() and HEAP Estimate_scan_time() calculates the cost of scanning a derivied table. The old code did not take into account that the temporary table heap table may be converted to Aria. Things fixed: - Added checking if the temporary tables data will fit in the heap. If not, then calculate the cost based on the designated internal temporary table engine (Aria). - Removed MY_MAX(records, 1000) and instead trust the optimizer's estimate of records. This reduces the cost of temporary tables a bit for small tables, which caused a few changes in mtr results. - Fixed cost calculation for HEAP. - HEAP costs->row_next_find_cost was not set. This does not affect old costs calculation as this cost slot was not used anywhere. Now HEAP cost->row_next_find_cost is set, which allowed me to remove some duplicated computation in ha_heap::scan_time()
This commit is contained in:
parent
c9fe55ff7a
commit
43c5d1303f
@ -748,7 +748,8 @@ The following specify which files/extra groups are read (specified before remain
|
|||||||
keys. fix_reuse_range_for_ref = Do a better job at
|
keys. fix_reuse_range_for_ref = Do a better job at
|
||||||
reusing range access estimates when estimating ref
|
reusing range access estimates when estimating ref
|
||||||
access. fix_card_multiplier = Fix the computation in
|
access. fix_card_multiplier = Fix the computation in
|
||||||
selectivity_for_indexes. selectivity_multiplier. This
|
selectivity_for_indexes. fix_derived_table_read_cost =
|
||||||
|
Fix the cost of reading materialized derived table. This
|
||||||
variable will be deleted in MariaDB 11.0 as it is not
|
variable will be deleted in MariaDB 11.0 as it is not
|
||||||
needed with the new 11.0 optimizer.
|
needed with the new 11.0 optimizer.
|
||||||
Use 'ALL' to set all combinations.
|
Use 'ALL' to set all combinations.
|
||||||
|
@ -177,4 +177,80 @@ disconnect user2;
|
|||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
set global userstat=@save_userstat;
|
set global userstat=@save_userstat;
|
||||||
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
||||||
|
#
|
||||||
|
# MDEV-35958: Cost estimates for materialized derived tables are poor
|
||||||
|
#
|
||||||
|
set optimizer_trace=1;
|
||||||
|
create table t1 (
|
||||||
|
a int
|
||||||
|
);
|
||||||
|
insert into t1 select seq from seq_1_to_10000;
|
||||||
|
explain
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
t1 as t1_base,
|
||||||
|
(select a from t1 limit 10000) as TBL;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
|
||||||
|
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
|
||||||
|
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
|
||||||
|
set @trace=(select trace from information_schema.optimizer_trace);
|
||||||
|
# BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
|
||||||
|
select json_detailed(
|
||||||
|
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||||
|
) as Trace;
|
||||||
|
Trace
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"table": "t1_base",
|
||||||
|
"table_scan":
|
||||||
|
{
|
||||||
|
"rows": 10000,
|
||||||
|
"cost": 19.08984375
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"table": "<derived2>",
|
||||||
|
"table_scan":
|
||||||
|
{
|
||||||
|
"rows": 10000,
|
||||||
|
"cost": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
|
||||||
|
explain
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
t1 as t1_base,
|
||||||
|
(select a from t1 limit 10000) as TBL;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
|
||||||
|
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
|
||||||
|
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
|
||||||
|
set @trace=(select trace from information_schema.optimizer_trace);
|
||||||
|
# AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
|
||||||
|
select json_detailed(
|
||||||
|
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||||
|
) as Trace;
|
||||||
|
Trace
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"table": "t1_base",
|
||||||
|
"table_scan":
|
||||||
|
{
|
||||||
|
"rows": 10000,
|
||||||
|
"cost": 19.08984375
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"table": "<derived2>",
|
||||||
|
"table_scan":
|
||||||
|
{
|
||||||
|
"rows": 10000,
|
||||||
|
"cost": 501
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
drop table t1;
|
||||||
set @@optimizer_adjust_secondary_key_costs=default;
|
set @@optimizer_adjust_secondary_key_costs=default;
|
||||||
|
@ -109,4 +109,41 @@ drop table t1,t2;
|
|||||||
set global userstat=@save_userstat;
|
set global userstat=@save_userstat;
|
||||||
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
set global innodb_stats_persistent_sample_pages=@save_ispsp;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-35958: Cost estimates for materialized derived tables are poor
|
||||||
|
--echo #
|
||||||
|
set optimizer_trace=1;
|
||||||
|
create table t1 (
|
||||||
|
a int
|
||||||
|
);
|
||||||
|
insert into t1 select seq from seq_1_to_10000;
|
||||||
|
|
||||||
|
explain
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
t1 as t1_base,
|
||||||
|
(select a from t1 limit 10000) as TBL;
|
||||||
|
|
||||||
|
set @trace=(select trace from information_schema.optimizer_trace);
|
||||||
|
--echo # BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
|
||||||
|
select json_detailed(
|
||||||
|
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||||
|
) as Trace;
|
||||||
|
|
||||||
|
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
|
||||||
|
|
||||||
|
explain
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
t1 as t1_base,
|
||||||
|
(select a from t1 limit 10000) as TBL;
|
||||||
|
|
||||||
|
set @trace=(select trace from information_schema.optimizer_trace);
|
||||||
|
--echo # AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
|
||||||
|
select json_detailed(
|
||||||
|
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
|
||||||
|
) as Trace;
|
||||||
|
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
set @@optimizer_adjust_secondary_key_costs=default;
|
set @@optimizer_adjust_secondary_key_costs=default;
|
||||||
|
@ -2325,11 +2325,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
|
|||||||
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
||||||
VARIABLE_SCOPE SESSION
|
VARIABLE_SCOPE SESSION
|
||||||
VARIABLE_TYPE SET
|
VARIABLE_TYPE SET
|
||||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||||
NUMERIC_MIN_VALUE NULL
|
NUMERIC_MIN_VALUE NULL
|
||||||
NUMERIC_MAX_VALUE NULL
|
NUMERIC_MAX_VALUE NULL
|
||||||
NUMERIC_BLOCK_SIZE NULL
|
NUMERIC_BLOCK_SIZE NULL
|
||||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
|
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
|
||||||
READ_ONLY NO
|
READ_ONLY NO
|
||||||
COMMAND_LINE_ARGUMENT REQUIRED
|
COMMAND_LINE_ARGUMENT REQUIRED
|
||||||
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
||||||
|
@ -2495,11 +2495,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
|
|||||||
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
|
||||||
VARIABLE_SCOPE SESSION
|
VARIABLE_SCOPE SESSION
|
||||||
VARIABLE_TYPE SET
|
VARIABLE_TYPE SET
|
||||||
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
|
||||||
NUMERIC_MIN_VALUE NULL
|
NUMERIC_MIN_VALUE NULL
|
||||||
NUMERIC_MAX_VALUE NULL
|
NUMERIC_MAX_VALUE NULL
|
||||||
NUMERIC_BLOCK_SIZE NULL
|
NUMERIC_BLOCK_SIZE NULL
|
||||||
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
|
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
|
||||||
READ_ONLY NO
|
READ_ONLY NO
|
||||||
COMMAND_LINE_ARGUMENT REQUIRED
|
COMMAND_LINE_ARGUMENT REQUIRED
|
||||||
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH
|
||||||
|
@ -281,6 +281,7 @@
|
|||||||
#define OPTIMIZER_FIX_INNODB_CARDINALITY (8)
|
#define OPTIMIZER_FIX_INNODB_CARDINALITY (8)
|
||||||
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16)
|
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16)
|
||||||
#define OPTIMIZER_ADJ_FIX_CARD_MULT (32)
|
#define OPTIMIZER_ADJ_FIX_CARD_MULT (32)
|
||||||
|
#define OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST (64)
|
||||||
|
|
||||||
#define OPTIMIZER_ADJ_DEFAULT (OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF | \
|
#define OPTIMIZER_ADJ_DEFAULT (OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF | \
|
||||||
OPTIMIZER_ADJ_FIX_CARD_MULT)
|
OPTIMIZER_ADJ_FIX_CARD_MULT)
|
||||||
|
@ -6001,7 +6001,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
|
|||||||
s->table->opt_range_condition_rows=s->records;
|
s->table->opt_range_condition_rows=s->records;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
/* Update s->records and s->read_time */
|
||||||
s->scan_time();
|
s->scan_time();
|
||||||
|
}
|
||||||
|
|
||||||
if (s->table->is_splittable())
|
if (s->table->is_splittable())
|
||||||
s->add_keyuses_for_splitting();
|
s->add_keyuses_for_splitting();
|
||||||
@ -15468,6 +15471,7 @@ void JOIN_TAB::cleanup()
|
|||||||
double JOIN_TAB::scan_time()
|
double JOIN_TAB::scan_time()
|
||||||
{
|
{
|
||||||
double res;
|
double res;
|
||||||
|
THD *thd= join->thd;
|
||||||
if (table->is_created())
|
if (table->is_created())
|
||||||
{
|
{
|
||||||
if (table->is_filled_at_execution())
|
if (table->is_filled_at_execution())
|
||||||
@ -15488,10 +15492,53 @@ double JOIN_TAB::scan_time()
|
|||||||
}
|
}
|
||||||
res= read_time;
|
res= read_time;
|
||||||
}
|
}
|
||||||
|
else if (!(thd->variables.optimizer_adjust_secondary_key_costs &
|
||||||
|
OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Old code, do not merge into 11.0+:
|
||||||
|
*/
|
||||||
|
found_records= records=table->stat_records();
|
||||||
|
read_time= found_records ? (double)found_records: 10.0;
|
||||||
|
res= read_time;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
found_records= records=table->stat_records();
|
bool using_heap= 0;
|
||||||
read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
|
TABLE_SHARE *share= table->s;
|
||||||
|
found_records= records= table->stat_records();
|
||||||
|
|
||||||
|
if (share->db_type() == heap_hton)
|
||||||
|
{
|
||||||
|
/* Check that the rows will fit into the heap table */
|
||||||
|
ha_rows max_rows;
|
||||||
|
max_rows= (ha_rows) ((MY_MIN(thd->variables.tmp_memory_table_size,
|
||||||
|
thd->variables.max_heap_table_size)) /
|
||||||
|
MY_ALIGN(share->reclength, sizeof(char*)));
|
||||||
|
if (records <= max_rows)
|
||||||
|
{
|
||||||
|
/* The rows will fit into the heap table */
|
||||||
|
using_heap= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Code for the following is taken from the heap and aria storage engine.
|
||||||
|
In 11.# this is done without explict engine code
|
||||||
|
*/
|
||||||
|
if (using_heap)
|
||||||
|
read_time= (records / 20.0) + 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
handler *file= table->file;
|
||||||
|
file->stats.data_file_length= share->reclength * records;
|
||||||
|
/*
|
||||||
|
Call the default scan_time() method as this is the cost for the
|
||||||
|
scan when heap is converted to Aria
|
||||||
|
*/
|
||||||
|
read_time= file->handler::scan_time();
|
||||||
|
file->stats.data_file_length= 0;
|
||||||
|
}
|
||||||
res= read_time;
|
res= read_time;
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -2982,7 +2982,7 @@ static const char *adjust_secondary_key_cost[]=
|
|||||||
{
|
{
|
||||||
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by",
|
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by",
|
||||||
"fix_innodb_cardinality", "fix_reuse_range_for_ref",
|
"fix_innodb_cardinality", "fix_reuse_range_for_ref",
|
||||||
"fix_card_multiplier", 0
|
"fix_card_multiplier", "fix_derived_table_read_cost", 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -2999,8 +2999,9 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
|
|||||||
"secondary keys. "
|
"secondary keys. "
|
||||||
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
|
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
|
||||||
"when estimating ref access. "
|
"when estimating ref access. "
|
||||||
"fix_card_multiplier = Fix the computation in selectivity_for_indexes."
|
"fix_card_multiplier = Fix the computation in selectivity_for_indexes. "
|
||||||
" selectivity_multiplier. "
|
"fix_derived_table_read_cost = Fix the cost of reading materialized "
|
||||||
|
"derived table. "
|
||||||
|
|
||||||
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
|
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
|
||||||
"new 11.0 optimizer.",
|
"new 11.0 optimizer.",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user