MDEV-35958 Cost estimates for materialized derived tables are poor

Backport of commit 74f70c394456767c1c1b0bee6bf34faff5b72dce to 10.11.
The new logic is disabled by default, to enable, use
optimizer_adjust_secondary_key_costs=fix_derived_table_read_cost.

== Original commit comment ==
Fixed costs in JOIN_TAB::estimate_scan_time() and HEAP

Estimate_scan_time() calculates the cost of scanning a derivied table.
The old code did not take into account that the temporary table heap table
may be converted to Aria.

  Things fixed:
  - Added checking if the temporary tables data will fit in the heap.
    If not, then calculate the cost based on the designated internal
    temporary table engine (Aria).
  - Removed MY_MAX(records, 1000) and instead trust the optimizer's
    estimate of records. This reduces the cost of temporary tables a bit
    for small tables, which caused a few changes in mtr results.
  - Fixed cost calculation for HEAP.
  - HEAP costs->row_next_find_cost was not set. This does not affect old
    costs calculation as this cost slot was not used anywhere.
    Now HEAP cost->row_next_find_cost is set, which allowed me to remove
    some duplicated computation in ha_heap::scan_time()
This commit is contained in:
Sergei Petrunia 2025-02-10 14:36:56 +02:00
parent c9fe55ff7a
commit 43c5d1303f
8 changed files with 173 additions and 10 deletions

View File

@ -748,7 +748,8 @@ The following specify which files/extra groups are read (specified before remain
keys. fix_reuse_range_for_ref = Do a better job at
reusing range access estimates when estimating ref
access. fix_card_multiplier = Fix the computation in
selectivity_for_indexes. selectivity_multiplier. This
selectivity_for_indexes. fix_derived_table_read_cost =
Fix the cost of reading materialized derived table. This
variable will be deleted in MariaDB 11.0 as it is not
needed with the new 11.0 optimizer.
Use 'ALL' to set all combinations.

View File

@ -177,4 +177,80 @@ disconnect user2;
drop table t1,t2;
set global userstat=@save_userstat;
set global innodb_stats_persistent_sample_pages=@save_ispsp;
#
# MDEV-35958: Cost estimates for materialized derived tables are poor
#
set optimizer_trace=1;
create table t1 (
a int
);
insert into t1 select seq from seq_1_to_10000;
explain
select *
from
t1 as t1_base,
(select a from t1 limit 10000) as TBL;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
set @trace=(select trace from information_schema.optimizer_trace);
# BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
select json_detailed(
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
) as Trace;
Trace
[
{
"table": "t1_base",
"table_scan":
{
"rows": 10000,
"cost": 19.08984375
}
},
{
"table": "<derived2>",
"table_scan":
{
"rows": 10000,
"cost": 10000
}
}
]
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
explain
select *
from
t1 as t1_base,
(select a from t1 limit 10000) as TBL;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join)
2 DERIVED t1 ALL NULL NULL NULL NULL 10000
set @trace=(select trace from information_schema.optimizer_trace);
# AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
select json_detailed(
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
) as Trace;
Trace
[
{
"table": "t1_base",
"table_scan":
{
"rows": 10000,
"cost": 19.08984375
}
},
{
"table": "<derived2>",
"table_scan":
{
"rows": 10000,
"cost": 501
}
}
]
drop table t1;
set @@optimizer_adjust_secondary_key_costs=default;

View File

@ -109,4 +109,41 @@ drop table t1,t2;
set global userstat=@save_userstat;
set global innodb_stats_persistent_sample_pages=@save_ispsp;
--echo #
--echo # MDEV-35958: Cost estimates for materialized derived tables are poor
--echo #
set optimizer_trace=1;
create table t1 (
a int
);
insert into t1 select seq from seq_1_to_10000;
explain
select *
from
t1 as t1_base,
(select a from t1 limit 10000) as TBL;
set @trace=(select trace from information_schema.optimizer_trace);
--echo # BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000
select json_detailed(
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
) as Trace;
set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost';
explain
select *
from
t1 as t1_base,
(select a from t1 limit 10000) as TBL;
set @trace=(select trace from information_schema.optimizer_trace);
--echo # AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost
select json_detailed(
json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]')
) as Trace;
drop table t1;
set @@optimizer_adjust_secondary_key_costs=default;

View File

@ -2325,11 +2325,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE SET
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH

View File

@ -2495,11 +2495,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE SET
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH

View File

@ -281,6 +281,7 @@
#define OPTIMIZER_FIX_INNODB_CARDINALITY (8)
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16)
#define OPTIMIZER_ADJ_FIX_CARD_MULT (32)
#define OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST (64)
#define OPTIMIZER_ADJ_DEFAULT (OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF | \
OPTIMIZER_ADJ_FIX_CARD_MULT)

View File

@ -6001,7 +6001,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->table->opt_range_condition_rows=s->records;
}
else
{
/* Update s->records and s->read_time */
s->scan_time();
}
if (s->table->is_splittable())
s->add_keyuses_for_splitting();
@ -15468,6 +15471,7 @@ void JOIN_TAB::cleanup()
double JOIN_TAB::scan_time()
{
double res;
THD *thd= join->thd;
if (table->is_created())
{
if (table->is_filled_at_execution())
@ -15488,10 +15492,53 @@ double JOIN_TAB::scan_time()
}
res= read_time;
}
else if (!(thd->variables.optimizer_adjust_secondary_key_costs &
OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST))
{
/*
Old code, do not merge into 11.0+:
*/
found_records= records=table->stat_records();
read_time= found_records ? (double)found_records: 10.0;
res= read_time;
}
else
{
found_records= records=table->stat_records();
read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
bool using_heap= 0;
TABLE_SHARE *share= table->s;
found_records= records= table->stat_records();
if (share->db_type() == heap_hton)
{
/* Check that the rows will fit into the heap table */
ha_rows max_rows;
max_rows= (ha_rows) ((MY_MIN(thd->variables.tmp_memory_table_size,
thd->variables.max_heap_table_size)) /
MY_ALIGN(share->reclength, sizeof(char*)));
if (records <= max_rows)
{
/* The rows will fit into the heap table */
using_heap= 1;
}
}
/*
Code for the following is taken from the heap and aria storage engine.
In 11.# this is done without explict engine code
*/
if (using_heap)
read_time= (records / 20.0) + 1;
else
{
handler *file= table->file;
file->stats.data_file_length= share->reclength * records;
/*
Call the default scan_time() method as this is the cost for the
scan when heap is converted to Aria
*/
read_time= file->handler::scan_time();
file->stats.data_file_length= 0;
}
res= read_time;
}
return res;

View File

@ -2982,7 +2982,7 @@ static const char *adjust_secondary_key_cost[]=
{
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by",
"fix_innodb_cardinality", "fix_reuse_range_for_ref",
"fix_card_multiplier", 0
"fix_card_multiplier", "fix_derived_table_read_cost", 0
};
@ -2999,8 +2999,9 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
"secondary keys. "
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
"when estimating ref access. "
"fix_card_multiplier = Fix the computation in selectivity_for_indexes."
" selectivity_multiplier. "
"fix_card_multiplier = Fix the computation in selectivity_for_indexes. "
"fix_derived_table_read_cost = Fix the cost of reading materialized "
"derived table. "
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
"new 11.0 optimizer.",