Add limits for how many IO operations a table access will do

This solves the current problem in the optimizer
- SELECT FROM big_table
  - SELECT from small_table where small_table.eq_ref_key=big_table.id

The old code assumed that each eq_ref access will cause an IO.
As the cost of IO is high, this dominated the cost for the later table
which caused the optimizer to prefer table scans + join cache over
index reads.

This patch fixes this issue by limit the number of expected IO calls,
for rows and index separately, to the size of the table or index or
the number of accesses that we except in a range for the index.

The major changes are:

- Adding a new structure ALL_READ_COST that is mainly used in
  best_access_path() to hold the costs parts of the cost we are
  calculating. This allows us to limit the number of IO when multiplying
  the cost with the previous row combinations.
- All storage engine cost functions are changed to return IO_AND_CPU_COST.
  The virtual cost functions should now return in IO_AND_CPU_COST.io
  the number of disk blocks that will be accessed instead of the cost
  of the access.
- We are not limiting the io_blocks for table or index scans as we
  assume that engines may not store these in the 'hot' part of the
  cache. Table and index scan also uses much less IO blocks than
  key accesses, so the original issue is not as critical with scans.

Other things:
  OPT_RANGE now holds a 'Cost_estimate cost' instead a lot of different
  costs. All the old costs, like index_only_read, can be extracted
  from 'cost'.
- Added to the start of some functions 'handler *file= table->file'
  to shorten the code that is using the handler.
- handler->cost() is used to change a ALL_READ_COST or IO_AND_CPU_COST
  to 'cost in milliseconds'
- New functions:  handler::index_blocks() and handler::row_blocks()
  which are used to limit the IO.
- Added index_cost and row_cost to Cost_estimate and removed all not
  needed members.
- Removed cost coefficients from Cost_estimate as these don't make sense
  when costs (except IO_BLOCKS) are in milliseconds.
- Removed handler::avg_io_cost() and replaced it with DISK_READ_COST.
- Renamed best_range_rowid_filter_for_partial_join() to
  best_range_rowid_filter() as using the old name made rows too long.
- Changed all SJ_MATERIALIZATION_INFO 'Cost_estimate' variables to
  'double' as Cost_estimate power was not used for these and thus
  just caused storage and performance overhead.
- Changed cost_for_index_read() to use 'worst_seeks' to only limit
  IO, not number of table accesses. With this patch worst_seeks is
  probably not needed anymore, but I kept it around just in case.
- Applying cost for filter got to be much shorter and easier thanks
  to the API changes.
- Adjusted cost for fulltext keys in collaboration with Sergei Golubchik.
- Most test changes caused by this patch is that table scans are changed
  to use indexes.
- Added ha_seq::keyread_time() and ha_seq::key_scan_time() to get
  make checking number of potential IO blocks easier during debugging.
This commit is contained in:
Monty 2022-09-30 17:10:37 +03:00 committed by Sergei Petrunia
parent 7afa819f72
commit d9d0e78039
29 changed files with 607 additions and 520 deletions

View File

@ -237,9 +237,10 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
if (queue_size < num_available_keys) if (queue_size < num_available_keys)
{ {
handler *file= param->sort_form->file;
costs[PQ_SORT_ORDER_BY_FIELDS]= costs[PQ_SORT_ORDER_BY_FIELDS]=
get_pq_sort_cost(num_rows, queue_size, false) + get_pq_sort_cost(num_rows, queue_size, false) +
param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows)); file->cost(file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows)));
} }
/* Calculate cost with addon fields */ /* Calculate cost with addon fields */
@ -270,14 +271,15 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param,
costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX; costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX;
if (num_available_keys) if (num_available_keys)
{
handler *file= param->sort_form->file;
costs[MERGE_SORT_ORDER_BY_FIELDS]= costs[MERGE_SORT_ORDER_BY_FIELDS]=
get_merge_many_buffs_cost_fast(num_rows, num_available_keys, get_merge_many_buffs_cost_fast(num_rows, num_available_keys,
row_length, DEFAULT_KEY_COMPARE_COST, row_length, DEFAULT_KEY_COMPARE_COST,
default_optimizer_costs.disk_read_cost, default_optimizer_costs.disk_read_cost,
false) + false) +
param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, file->cost(file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, num_rows)));
num_rows)); }
if (with_addon_fields) if (with_addon_fields)
{ {
/* Compute cost of merge sort *if* we strip addon fields. */ /* Compute cost of merge sort *if* we strip addon fields. */

View File

@ -6629,7 +6629,7 @@ ha_rows ha_partition::multi_range_read_info_const(uint keyno,
ha_rows tmp_rows; ha_rows tmp_rows;
uint tmp_mrr_mode; uint tmp_mrr_mode;
m_mrr_buffer_size[i]= 0; m_mrr_buffer_size[i]= 0;
part_cost.reset(); part_cost.reset(*file);
tmp_mrr_mode= *mrr_mode; tmp_mrr_mode= *mrr_mode;
tmp_rows= (*file)-> tmp_rows= (*file)->
multi_range_read_info_const(keyno, &m_part_seq_if, multi_range_read_info_const(keyno, &m_part_seq_if,
@ -6680,7 +6680,7 @@ ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges,
{ {
ha_rows tmp_rows; ha_rows tmp_rows;
m_mrr_buffer_size[i]= 0; m_mrr_buffer_size[i]= 0;
part_cost.reset(); part_cost.reset(*file);
if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys, if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys,
key_parts, key_parts,
&m_mrr_buffer_size[i], &m_mrr_buffer_size[i],

View File

@ -3283,6 +3283,23 @@ LEX_CSTRING *handler::engine_name()
return hton_name(ht); return hton_name(ht);
} }
/*
Calclate the number of index blocks we are going to access when
doing 'ranges' index dives reading a total of 'rows' rows.
*/
ulonglong handler::index_blocks(uint index, uint ranges, ha_rows rows)
{
if (!stats.block_size)
return 0; // No disk storage
size_t len= table->key_storage_length(index);
ulonglong blocks= (rows * len / INDEX_BLOCK_FILL_FACTOR_DIV *
INDEX_BLOCK_FILL_FACTOR_MUL) / stats.block_size + ranges;
return blocks * stats.block_size / IO_SIZE;
}
/* /*
Calculate cost for an index scan for given index and number of records. Calculate cost for an index scan for given index and number of records.
@ -3329,7 +3346,7 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows,
else else
io_blocks= blocks * stats.block_size / IO_SIZE; io_blocks= blocks * stats.block_size / IO_SIZE;
} }
cost.io= (double) io_blocks * avg_io_cost(); cost.io= (double) io_blocks;
cost.cpu= blocks * INDEX_BLOCK_COPY_COST; cost.cpu= blocks * INDEX_BLOCK_COPY_COST;
return cost; return cost;
} }
@ -3342,36 +3359,35 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows,
in which case there should an additional rnd_pos_time() cost. in which case there should an additional rnd_pos_time() cost.
*/ */
double handler::ha_keyread_time(uint index, ulong ranges, ha_rows rows, IO_AND_CPU_COST handler::ha_keyread_time(uint index, ulong ranges,
ulonglong blocks) ha_rows rows,
ulonglong blocks)
{ {
if (rows < ranges) if (rows < ranges)
rows= ranges; rows= ranges;
IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
return (cost.io * DISK_READ_RATIO + cost.cpu+= ranges * KEY_LOOKUP_COST + (rows - ranges) * KEY_NEXT_FIND_COST;
cost.cpu + ranges * KEY_LOOKUP_COST + return cost;
(rows - ranges) * KEY_NEXT_FIND_COST);
} }
/* /*
Read a row from a clustered index Read rows from a clustered index
Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clusterd Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clustered
key has identical code as rnd_pos() (At least in InnoDB:) key has identical code as rnd_pos() (At least in InnoDB:)
*/ */
double handler::ha_keyread_clustered_and_copy_time(uint index, ulong ranges, IO_AND_CPU_COST
ha_rows rows, handler::ha_keyread_clustered_time(uint index, ulong ranges,
ulonglong blocks) ha_rows rows,
ulonglong blocks)
{ {
if (rows < ranges) if (rows < ranges)
rows= ranges; rows= ranges;
IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
return (cost.io * DISK_READ_RATIO + cost.cpu+= (ranges * ROW_LOOKUP_COST + (rows - ranges) * ROW_NEXT_FIND_COST);
cost.cpu + ranges * ROW_LOOKUP_COST + return cost;
(rows - ranges) * ROW_NEXT_FIND_COST +
rows * ROW_COPY_COST);
} }
THD *handler::ha_thd(void) const THD *handler::ha_thd(void) const

View File

@ -2777,25 +2777,50 @@ typedef struct st_range_seq_if
typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info); typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
/* Separated costs for IO and CPU */
struct IO_AND_CPU_COST
{
double io;
double cpu;
void add(IO_AND_CPU_COST cost)
{
io+= cost.io;
cpu+= cost.cpu;
}
};
/* Cost for reading a row through an index */
struct ALL_READ_COST
{
IO_AND_CPU_COST index_cost, row_cost;
longlong max_index_blocks, max_row_blocks;
/* index_only_read = index_cost + copy_cost */
double copy_cost;
void reset()
{
row_cost= {0,0};
index_cost= {0,0};
max_index_blocks= max_row_blocks= 0;
copy_cost= 0.0;
}
};
class Cost_estimate class Cost_estimate
{ {
public: public:
double io_count; /* number of I/O to fetch records */
double avg_io_cost; /* cost of an average I/O oper. to fetch records */ double avg_io_cost; /* cost of an average I/O oper. to fetch records */
double idx_io_count; /* number of I/O to read keys */ double cpu_cost; /* Cpu cost unrelated to engine costs */
double idx_avg_io_cost; /* cost of an average I/O oper. to fetch records */
double cpu_cost; /* Cost of reading the rows based on a key */
double idx_cpu_cost; /* Cost of reading the key from the index tree */
double import_cost; /* cost of remote operations */
double comp_cost; /* Cost of comparing found rows with WHERE clause */ double comp_cost; /* Cost of comparing found rows with WHERE clause */
double copy_cost; /* Copying the data to 'record' */ double copy_cost; /* Copying the data to 'record' */
double mem_cost; /* cost of used memory */
double limit_cost; /* Total cost when restricting rows with limit */ double limit_cost; /* Total cost when restricting rows with limit */
static constexpr double IO_COEFF= 1; IO_AND_CPU_COST index_cost;
static constexpr double CPU_COEFF= 1; IO_AND_CPU_COST row_cost;
static constexpr double MEM_COEFF= 1;
static constexpr double IMPORT_COEFF= 1;
Cost_estimate() Cost_estimate()
{ {
@ -2809,30 +2834,18 @@ public:
double total_cost() const double total_cost() const
{ {
return IO_COEFF*io_count*avg_io_cost + DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0);
IO_COEFF*idx_io_count*idx_avg_io_cost + return ((index_cost.io + row_cost.io) * avg_io_cost+
CPU_COEFF*(cpu_cost + idx_cpu_cost + comp_cost + copy_cost) + index_cost.cpu + row_cost.cpu + comp_cost + copy_cost +
MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost; cpu_cost);
} }
/* /* Cost for just fetching and copying a row (no compare costs) */
Cost of fetching a key and use the key to find a row (if not clustered or double fetch_cost() const
covering key). Does not include row copy or compare with WHERE clause.
*/
double find_cost() const
{ {
return IO_COEFF*io_count*avg_io_cost + DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0);
IO_COEFF*idx_io_count*idx_avg_io_cost + return ((index_cost.io + row_cost.io) * avg_io_cost+
CPU_COEFF*(cpu_cost + idx_cpu_cost) + index_cost.cpu + row_cost.cpu + copy_cost);
MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
}
/*
Cost of comparing the row with the WHERE clause
*/
inline double compare_cost() const
{
return CPU_COEFF*comp_cost;
} }
/* /*
@ -2840,72 +2853,48 @@ public:
*/ */
inline double data_copy_cost() const inline double data_copy_cost() const
{ {
return CPU_COEFF*copy_cost; return copy_cost;
} }
/* Cost of finding an index entry, without copying or comparing it */ /*
double index_only_cost() Multiply costs to simulate a scan where we read
We assume that io blocks will be cached and we only
allocate memory once. There should also be no import_cost
that needs to be done multiple times
*/
void multiply(uint n)
{ {
return IO_COEFF*idx_io_count*idx_avg_io_cost + index_cost.io*= n;
CPU_COEFF*idx_cpu_cost; index_cost.cpu*= n;
row_cost.io*= n;
row_cost.cpu*= n;
copy_cost*= n;
comp_cost*= n;
cpu_cost*= n;
}
void add(Cost_estimate *cost)
{
DBUG_ASSERT(cost->avg_io_cost != 0.0 || (index_cost.io + row_cost.io == 0));
avg_io_cost= cost->avg_io_cost;
index_cost.io+= cost->index_cost.io;
index_cost.cpu+= cost->index_cost.cpu;
row_cost.io+= cost->row_cost.io;
row_cost.cpu+= cost->row_cost.cpu;
copy_cost+= cost->copy_cost;
comp_cost+= cost->comp_cost;
cpu_cost+= cost->cpu_cost;
} }
inline void reset() inline void reset()
{ {
avg_io_cost= 1.0; avg_io_cost= 0;
idx_avg_io_cost= 1.0; comp_cost= cpu_cost= 0.0;
io_count= idx_io_count= cpu_cost= idx_cpu_cost= mem_cost= import_cost= 0.0; copy_cost= limit_cost= 0.0;
comp_cost= copy_cost= limit_cost= 0.0; index_cost= {0,0};
} row_cost= {0,0};
void multiply(double m)
{
io_count *= m;
cpu_cost *= m;
idx_io_count *= m;
idx_cpu_cost *= m;
import_cost *= m;
comp_cost *= m;
limit_cost*= m;
/* Don't multiply mem_cost */
}
void add(const Cost_estimate* cost)
{
if (cost->io_count != 0.0)
{
double io_count_sum= io_count + cost->io_count;
avg_io_cost= (io_count * avg_io_cost +
cost->io_count * cost->avg_io_cost)
/io_count_sum;
io_count= io_count_sum;
}
if (cost->idx_io_count != 0.0)
{
double idx_io_count_sum= idx_io_count + cost->idx_io_count;
idx_avg_io_cost= (idx_io_count * idx_avg_io_cost +
cost->idx_io_count * cost->idx_avg_io_cost)
/idx_io_count_sum;
idx_io_count= idx_io_count_sum;
}
cpu_cost += cost->cpu_cost;
idx_cpu_cost += cost->idx_cpu_cost;
import_cost += cost->import_cost;
comp_cost+= cost->comp_cost;
limit_cost+= cost->limit_cost;
}
void add_io(double add_io_cnt, double add_avg_cost)
{
/* In edge cases add_io_cnt may be zero */
if (add_io_cnt > 0)
{
double io_count_sum= io_count + add_io_cnt;
avg_io_cost= (io_count * avg_io_cost +
add_io_cnt * add_avg_cost) / io_count_sum;
io_count= io_count_sum;
}
} }
inline void reset(handler *file);
/* /*
To be used when we go from old single value-based cost calculations to To be used when we go from old single value-based cost calculations to
@ -2914,13 +2903,10 @@ public:
void convert_from_cost(double cost) void convert_from_cost(double cost)
{ {
reset(); reset();
io_count= cost; cpu_cost= cost;
} }
}; };
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
Cost_estimate *cost);
/* /*
Indicates that all scanned ranges will be singlepoint (aka equality) ranges. Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
The ranges may not use the full key but all of them will use the same number The ranges may not use the full key but all of them will use the same number
@ -3094,20 +3080,6 @@ enum class Compare_keys : uint32_t
NotEqual NotEqual
}; };
/* Cost for reading a row through an index */
struct INDEX_READ_COST
{
double read_cost;
double index_only_cost;
};
/* Separated costs for IO and CPU. For handler::keyread_time() */
struct IO_AND_CPU_COST
{
double io;
double cpu;
};
/** /**
The handler class is the interface for dynamically loadable The handler class is the interface for dynamically loadable
@ -3183,6 +3155,7 @@ public:
HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */ HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
uint ranges_in_seq; /* Total number of ranges in the traversed sequence */ uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
/** Current range (the one we're now returning rows from) */ /** Current range (the one we're now returning rows from) */
KEY_MULTI_RANGE mrr_cur_range; KEY_MULTI_RANGE mrr_cur_range;
/** The following are for read_range() */ /** The following are for read_range() */
@ -3610,6 +3583,58 @@ public:
} }
virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
inline double io_cost(IO_AND_CPU_COST cost)
{
return cost.io * DISK_READ_COST * DISK_READ_RATIO;
}
inline double cost(IO_AND_CPU_COST cost)
{
return io_cost(cost) + cost.cpu;
}
/*
Calculate cost with capping io_blocks to the given maximum.
This is done here instead of earlier to allow filtering to work
with the original' io_block counts.
*/
inline double cost(ALL_READ_COST *cost)
{
double blocks= (MY_MIN(cost->index_cost.io,(double) cost->max_index_blocks) +
MY_MIN(cost->row_cost.io, (double) cost->max_row_blocks));
return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) +
blocks * DISK_READ_COST * DISK_READ_RATIO);
}
/*
Calculate cost when we are going to excute the given read method
multiple times
*/
inline double cost_for_reading_multiple_times(double multiple,
ALL_READ_COST *cost)
{
double blocks= (MY_MIN(cost->index_cost.io * multiple,
(double) cost->max_index_blocks) +
MY_MIN(cost->row_cost.io * multiple,
(double) cost->max_row_blocks));
return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) *
multiple +
blocks * DISK_READ_COST * DISK_READ_RATIO);
}
inline ulonglong row_blocks()
{
return (stats.data_file_length + IO_SIZE-1) / IO_SIZE;
}
virtual ulonglong index_blocks(uint index, uint ranges, ha_rows rows);
inline ulonglong index_blocks(uint index)
{
return index_blocks(index, 1, stats.records);
}
/* /*
Time for a full table data scan. To be overrided by engines, should not Time for a full table data scan. To be overrided by engines, should not
be used by the sql level. be used by the sql level.
@ -3619,7 +3644,7 @@ protected:
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
ulonglong length= stats.data_file_length; ulonglong length= stats.data_file_length;
cost.io= (double) (length / IO_SIZE) * avg_io_cost(); cost.io= (double) (length / IO_SIZE);
cost.cpu= (!stats.block_size ? 0.0 : cost.cpu= (!stats.block_size ? 0.0 :
(double) ((length + stats.block_size-1)/stats.block_size) * (double) ((length + stats.block_size-1)/stats.block_size) *
INDEX_BLOCK_COPY_COST); INDEX_BLOCK_COPY_COST);
@ -3639,27 +3664,23 @@ public:
a few rows and the extra cost has no practical effect. a few rows and the extra cost has no practical effect.
*/ */
inline double ha_scan_time(ha_rows rows) inline IO_AND_CPU_COST ha_scan_time(ha_rows rows)
{ {
IO_AND_CPU_COST cost= scan_time(); IO_AND_CPU_COST cost= scan_time();
return (cost.io * DISK_READ_RATIO + cost.cpu+= (TABLE_SCAN_SETUP_COST +
cost.cpu + TABLE_SCAN_SETUP_COST + (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
(double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); return cost;
} }
/* /*
Time for a full table scan, fetching the rows from the table and comparing Time for a full table scan, fetching the rows from the table and comparing
the row with the where clause the row with the where clause
*/ */
inline double ha_scan_and_compare_time(ha_rows rows) inline IO_AND_CPU_COST ha_scan_and_compare_time(ha_rows rows)
{ {
return ha_scan_time(rows) + (double) rows * WHERE_COST; IO_AND_CPU_COST cost= ha_scan_time(rows);
} cost.cpu+= (double) rows * WHERE_COST;
return cost;
/* Cost of (random) reading a block of IO_SIZE */
virtual double avg_io_cost()
{
return DISK_READ_COST;
} }
/* /*
@ -3684,7 +3705,7 @@ protected:
double r= rows2double(rows); double r= rows2double(rows);
return return
{ {
r * avg_io_cost() * stats.block_size/IO_SIZE, // Blocks read r * ((stats.block_size + IO_SIZE -1 )/IO_SIZE), // Blocks read
r * INDEX_BLOCK_COPY_COST // Copy block from cache r * INDEX_BLOCK_COPY_COST // Copy block from cache
}; };
} }
@ -3699,11 +3720,12 @@ public:
row). row).
*/ */
inline double ha_rnd_pos_time(ha_rows rows) inline IO_AND_CPU_COST ha_rnd_pos_time(ha_rows rows)
{ {
IO_AND_CPU_COST cost= rnd_pos_time(rows); IO_AND_CPU_COST cost= rnd_pos_time(rows);
return (cost.io * DISK_READ_RATIO + set_if_smaller(cost.io, (double) row_blocks());
cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST);
return cost;
} }
/* /*
@ -3712,20 +3734,24 @@ public:
but that may change in the future after we do more cost checks for but that may change in the future after we do more cost checks for
more engines. more engines.
*/ */
inline double ha_rnd_pos_call_time(ha_rows rows) inline IO_AND_CPU_COST ha_rnd_pos_call_time(ha_rows rows)
{ {
IO_AND_CPU_COST cost= rnd_pos_time(rows); IO_AND_CPU_COST cost= rnd_pos_time(rows);
return (cost.io * DISK_READ_RATIO + set_if_smaller(cost.io, (double) row_blocks());
cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST);
return cost;
} }
inline double ha_rnd_pos_call_and_compare_time(ha_rows rows) inline IO_AND_CPU_COST ha_rnd_pos_call_and_compare_time(ha_rows rows)
{ {
return (ha_rnd_pos_call_time(rows) + rows2double(rows) * WHERE_COST); IO_AND_CPU_COST cost;
cost= ha_rnd_pos_call_time(rows);
cost.cpu+= rows2double(rows) * WHERE_COST;
return cost;
} }
/** /**
Calculate cost of 'index_only' scan for given index, a number of reanges Calculate cost of 'index_only' scan for given index, a number of ranges
and number of records. and number of records.
@param index Index to read @param index Index to read
@ -3742,25 +3768,30 @@ public:
Calculate cost of 'keyread' scan for given index and number of records Calculate cost of 'keyread' scan for given index and number of records
including fetching the key to the 'record' buffer. including fetching the key to the 'record' buffer.
*/ */
double ha_keyread_time(uint index, ulong ranges, ha_rows rows, IO_AND_CPU_COST ha_keyread_time(uint index, ulong ranges, ha_rows rows,
ulonglong blocks); ulonglong blocks);
/* Same as above, but take into account copying the key the the SQL layer */ /* Same as above, but take into account copying the key the the SQL layer */
inline double ha_keyread_and_copy_time(uint index, ulong ranges, inline IO_AND_CPU_COST ha_keyread_and_copy_time(uint index, ulong ranges,
ha_rows rows, ulonglong blocks) ha_rows rows,
ulonglong blocks)
{ {
return (ha_keyread_time(index, ranges, rows, blocks) + IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks);
(double) rows * KEY_COPY_COST); cost.cpu+= (double) rows * KEY_COPY_COST;
return cost;
} }
inline double ha_keyread_and_compare_time(uint index, ulong ranges, inline IO_AND_CPU_COST ha_keyread_and_compare_time(uint index, ulong ranges,
ha_rows rows, ulonglong blocks) ha_rows rows,
ulonglong blocks)
{ {
return (ha_keyread_time(index, ranges, rows, blocks) + IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks);
(double) rows * (KEY_COPY_COST + WHERE_COST)); cost.cpu+= (double) rows * (KEY_COPY_COST + WHERE_COST);
return cost;
} }
double ha_keyread_clustered_and_copy_time(uint index, ulong ranges, IO_AND_CPU_COST ha_keyread_clustered_time(uint index,
ulong ranges,
ha_rows rows, ha_rows rows,
ulonglong blocks); ulonglong blocks);
/* /*
@ -3776,21 +3807,23 @@ protected:
public: public:
/* Cost of doing a full index scan */ /* Cost of doing a full index scan */
inline double ha_key_scan_time(uint index, ha_rows rows) inline IO_AND_CPU_COST ha_key_scan_time(uint index, ha_rows rows)
{ {
IO_AND_CPU_COST cost= key_scan_time(index, rows); IO_AND_CPU_COST cost= key_scan_time(index, rows);
return (cost.io * DISK_READ_RATIO + cost.cpu+= (INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST +
cost.cpu + INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST + (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST));
(double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST)); return cost;
} }
/* /*
Cost of doing a full index scan with record copy and compare Cost of doing a full index scan with record copy and compare
@param rows Rows from stat tables @param rows Rows from stat tables
*/ */
inline double ha_key_scan_and_compare_time(uint index, ha_rows rows) inline IO_AND_CPU_COST ha_key_scan_and_compare_time(uint index, ha_rows rows)
{ {
return ha_key_scan_time(index, rows) + (double) rows * WHERE_COST; IO_AND_CPU_COST cost= ha_key_scan_time(index, rows);
cost.cpu+= (double) rows * WHERE_COST;
return cost;
} }
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
@ -5602,4 +5635,10 @@ uint ha_count_rw_2pc(THD *thd, bool all);
uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
bool all); bool all);
inline void Cost_estimate::reset(handler *file)
{
reset();
avg_io_cost= file->DISK_READ_COST * file->DISK_READ_RATIO;
}
#endif /* HANDLER_INCLUDED */ #endif /* HANDLER_INCLUDED */

View File

@ -22,6 +22,9 @@
#include "rowid_filter.h" #include "rowid_filter.h"
#include "optimizer_defaults.h" #include "optimizer_defaults.h"
static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
Cost_estimate *cost);
/* The following calculation is the same as in multi_range_read_info() */ /* The following calculation is the same as in multi_range_read_info() */
@ -32,34 +35,35 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
ulonglong io_blocks, ulonglong io_blocks,
ulonglong unassigned_single_point_ranges) ulonglong unassigned_single_point_ranges)
{ {
double key_cost; cost->reset(this);
cost->reset();
cost->avg_io_cost= cost->idx_avg_io_cost= 0; // Not used!
if (!is_clustering_key(keyno)) if (!is_clustering_key(keyno))
{ {
key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
cost->idx_cpu_cost= key_cost;
if (!(flags & HA_MRR_INDEX_ONLY)) if (!(flags & HA_MRR_INDEX_ONLY))
{ {
/* ha_rnd_pos_time includes ROW_COPY_COST */ /* ha_rnd_pos_time includes ROW_COPY_COST */
cost->cpu_cost= ha_rnd_pos_time(total_rows); cost->row_cost= ha_rnd_pos_time(total_rows);
/* Adjust io cost to data size */
cost->row_cost.io= MY_MIN(cost->row_cost.io, row_blocks());
} }
else else
{ {
/* Index only read */ /* Index only read */
cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
} }
} }
else else
{ {
/* Clustered index */ /* Clustered index */
io_blocks+= unassigned_single_point_ranges; io_blocks= unassigned_single_point_ranges;
key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
cost->idx_cpu_cost= key_cost; cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
} }
/* Adjust io cost to data size */
cost->index_cost.io= MY_MIN(cost->index_cost.io, index_blocks(keyno));
cost->comp_cost= (rows2double(total_rows) * WHERE_COST + cost->comp_cost= (rows2double(total_rows) * WHERE_COST +
MULTI_RANGE_READ_SETUP_COST); MULTI_RANGE_READ_SETUP_COST);
} }
@ -357,7 +361,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
{ {
/* /*
Calculate what the cost would be if we only have to read 'top_limit' Calculate what the cost would be if we only have to read 'top_limit'
rows. This is the lowest possible cost fwhen using the range rows. This is the lowest possible cost when using the range
when we find the 'accepted rows' at once. when we find the 'accepted rows' at once.
*/ */
Cost_estimate limit_cost; Cost_estimate limit_cost;
@ -365,16 +369,14 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
io_blocks, unassigned_single_point_ranges); io_blocks, unassigned_single_point_ranges);
cost->limit_cost= limit_cost.total_cost(); cost->limit_cost= limit_cost.total_cost();
} }
DBUG_PRINT("statistics",
("key: %s rows: %llu total_cost: %.3f io_blocks: %llu "
"cpu_cost: %.3f",
table->s->keynames.type_names[keyno],
(ulonglong) total_rows, cost->total_cost(),
(ulonglong) (cost->row_cost.io + cost->index_cost.io),
(double) (cost->row_cost.cpu + cost->index_cost.cpu)));
} }
DBUG_PRINT("statistics",
("key: %s rows: %llu total_cost: %.3f io_blocks: %llu "
"idx_io_count: %.3f cpu_cost: %.3f io_count: %.3f "
"compare_cost: %.3f",
table->s->keynames.type_names[keyno],
(ulonglong) total_rows, cost->total_cost(),
(ulonglong) io_blocks,
cost->idx_io_count, cost->cpu_cost, cost->io_count,
cost->comp_cost));
DBUG_RETURN(total_rows); DBUG_RETURN(total_rows);
} }
@ -413,7 +415,8 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
other Error or can't perform the requested scan other Error or can't perform the requested scan
*/ */
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_rows, ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges,
uint total_rows,
uint key_parts, uint *bufsz, uint key_parts, uint *bufsz,
uint *flags, Cost_estimate *cost) uint *flags, Cost_estimate *cost)
{ {
@ -426,17 +429,17 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row
*bufsz= 0; /* Default implementation doesn't need a buffer */ *bufsz= 0; /* Default implementation doesn't need a buffer */
*flags |= HA_MRR_USE_DEFAULT_IMPL; *flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->reset(); cost->reset(this);
/* Produce the same cost as non-MRR code does */ /* Produce the same cost as non-MRR code does */
if (!is_clustering_key(keyno)) if (!is_clustering_key(keyno))
{ {
double key_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
cost->idx_cpu_cost= key_cost;
if (!(*flags & HA_MRR_INDEX_ONLY)) if (!(*flags & HA_MRR_INDEX_ONLY))
{ {
/* ha_rnd_pos_time includes ROW_COPY_COST */ /* ha_rnd_pos_time includes ROW_COPY_COST */
cost->cpu_cost= ha_rnd_pos_time(total_rows); cost->row_cost= ha_rnd_pos_time(total_rows);
} }
else else
{ {
@ -447,7 +450,8 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row
else else
{ {
/* Clustering key */ /* Clustering key */
cost->cpu_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); cost->index_cost= ha_keyread_clustered_time(keyno, n_ranges, total_rows,
0);
cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
} }
cost->comp_cost= rows2double(total_rows) * WHERE_COST; cost->comp_cost= rows2double(total_rows) * WHERE_COST;
@ -1966,7 +1970,8 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
} }
static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost); static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
Cost_estimate *cost);
/** /**
@ -1997,7 +2002,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
ha_rows rows_in_full_step; ha_rows rows_in_full_step;
ha_rows rows_in_last_step; ha_rows rows_in_last_step;
uint n_full_steps; uint n_full_steps;
double index_read_cost;
elem_size= primary_file->ref_length + elem_size= primary_file->ref_length +
sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION)); sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
@ -2030,6 +2034,8 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
rows_in_full_step= max_buff_entries; rows_in_full_step= max_buff_entries;
rows_in_last_step= rows % max_buff_entries; rows_in_last_step= rows % max_buff_entries;
cost->reset(primary_file);
/* Adjust buffer size if we expect to use only part of the buffer */ /* Adjust buffer size if we expect to use only part of the buffer */
if (n_full_steps) if (n_full_steps)
{ {
@ -2038,7 +2044,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
} }
else else
{ {
cost->reset();
*buffer_size= ((uint) MY_MAX(*buffer_size, *buffer_size= ((uint) MY_MAX(*buffer_size,
(size_t)(1.2*rows_in_last_step) * elem_size + (size_t)(1.2*rows_in_last_step) * elem_size +
primary_file->ref_length + primary_file->ref_length +
@ -2046,17 +2051,12 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
} }
Cost_estimate last_step_cost; Cost_estimate last_step_cost;
last_step_cost.avg_io_cost= cost->avg_io_cost;
get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
cost->add(&last_step_cost); cost->add(&last_step_cost);
if (n_full_steps != 0)
cost->mem_cost= *buffer_size;
else
cost->mem_cost= (double)rows_in_last_step * elem_size;
/* Total cost of all index accesses */ /* Total cost of all index accesses */
index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0); cost->index_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0);
cost->add_io(index_read_cost, 1 /* Random seeks */);
return FALSE; return FALSE;
} }
@ -2085,8 +2085,6 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
cmp_op= 3; cmp_op= 3;
cost->cpu_cost += cmp_op * log2(cmp_op); cost->cpu_cost += cmp_op * log2(cmp_op);
} }
else
cost->reset();
} }
@ -2100,14 +2098,13 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
@param cost OUT The cost. @param cost OUT The cost.
*/ */
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
Cost_estimate *cost) Cost_estimate *cost)
{ {
DBUG_ENTER("get_sweep_read_cost"); DBUG_ENTER("get_sweep_read_cost");
cost->reset();
#ifndef OLD_SWEEP_COST #ifndef OLD_SWEEP_COST
cost->cpu_cost= table->file->ha_rnd_pos_call_time(nrows); cost->row_cost= table->file->ha_rnd_pos_call_time(nrows);
#else #else
if (table->file->pk_is_clustering_key(table->s->primary_key)) if (table->file->pk_is_clustering_key(table->s->primary_key))
{ {

View File

@ -2713,6 +2713,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
TABLE_READ_PLAN *best_trp= NULL; TABLE_READ_PLAN *best_trp= NULL;
SEL_ARG **backup_keys= 0; SEL_ARG **backup_keys= 0;
ha_rows table_records= head->stat_records(); ha_rows table_records= head->stat_records();
handler *file= head->file;
/* We trust that if stat_records() is 0 the table is really empty! */ /* We trust that if stat_records() is 0 the table is really empty! */
bool impossible_range= table_records == 0; bool impossible_range= table_records == 0;
DBUG_ENTER("SQL_SELECT::test_quick_select"); DBUG_ENTER("SQL_SELECT::test_quick_select");
@ -2732,14 +2733,14 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
DBUG_RETURN(0); DBUG_RETURN(0);
records= table_records; records= table_records;
notnull_cond= head->notnull_cond; notnull_cond= head->notnull_cond;
if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) if (file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
only_single_index_range_scan= 1; only_single_index_range_scan= 1;
if (head->force_index || force_quick_range) if (head->force_index || force_quick_range)
read_time= DBL_MAX; read_time= DBL_MAX;
else else
{ {
read_time= head->file->ha_scan_and_compare_time(records); read_time= file->cost(file->ha_scan_and_compare_time(records));
if (limit < records) if (limit < records)
notnull_cond= NULL; notnull_cond= NULL;
} }
@ -2775,7 +2776,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
/* set up parameter that is passed to all functions */ /* set up parameter that is passed to all functions */
param.thd= thd; param.thd= thd;
param.baseflag= head->file->ha_table_flags(); param.baseflag= file->ha_table_flags();
param.prev_tables=prev_tables | const_tables; param.prev_tables=prev_tables | const_tables;
param.read_tables=read_tables; param.read_tables=read_tables;
param.current_table= head->map; param.current_table= head->map;
@ -2884,8 +2885,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
{ {
double key_read_time; double key_read_time;
uint key_for_use= find_shortest_key(head, &head->covering_keys); uint key_for_use= find_shortest_key(head, &head->covering_keys);
key_read_time= head->file->ha_key_scan_and_compare_time(key_for_use, key_read_time= file->cost(file->
records); ha_key_scan_and_compare_time(key_for_use,
records));
DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
"read time %g", key_for_use, key_read_time)); "read time %g", key_for_use, key_read_time));
@ -5095,9 +5097,15 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records,
{ {
DBUG_ENTER("get_sweep_read_cost"); DBUG_ENTER("get_sweep_read_cost");
#ifndef OLD_SWEEP_COST #ifndef OLD_SWEEP_COST
double cost= (param->table->file->ha_rnd_pos_call_time(records) + handler *file= param->table->file;
(add_time_for_compare ? IO_AND_CPU_COST engine_cost= file->ha_rnd_pos_call_time(records);
records * param->thd->variables.optimizer_where_cost : 0)); double cost;
if (add_time_for_compare)
{
engine_cost.cpu+= records * param->thd->variables.optimizer_where_cost;
}
cost= file->cost(engine_cost);
DBUG_PRINT("return", ("cost: %g", cost)); DBUG_PRINT("return", ("cost: %g", cost));
DBUG_RETURN(cost); DBUG_RETURN(cost);
#else #else
@ -5481,9 +5489,9 @@ skip_to_ror_scan:
double cost; double cost;
if ((*cur_child)->is_ror) if ((*cur_child)->is_ror)
{ {
/* Ok, we have index_only cost, now get full rows lokoup cost */ handler *file= param->table->file;
cost= param->table->file-> /* Ok, we have index_only cost, now get full rows scan cost */
ha_rnd_pos_call_and_compare_time((*cur_child)->records); cost= file->cost(file->ha_rnd_pos_call_and_compare_time((*cur_child)->records));
} }
else else
cost= read_time; cost= read_time;
@ -6681,6 +6689,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
ROR_SCAN_INFO *ror_scan; ROR_SCAN_INFO *ror_scan;
my_bitmap_map *bitmap_buf; my_bitmap_map *bitmap_buf;
uint keynr; uint keynr;
handler *file= param->table->file;
DBUG_ENTER("make_ror_scan"); DBUG_ENTER("make_ror_scan");
if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
@ -6690,7 +6699,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
ror_scan->idx= idx; ror_scan->idx= idx;
ror_scan->keynr= keynr= param->real_keynr[idx]; ror_scan->keynr= keynr= param->real_keynr[idx];
ror_scan->key_rec_length= (param->table->key_info[keynr].key_length + ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
param->table->file->ref_length); file->ref_length);
ror_scan->sel_arg= sel_arg; ror_scan->sel_arg= sel_arg;
ror_scan->records= param->quick_rows[keynr]; ror_scan->records= param->quick_rows[keynr];
@ -6717,8 +6726,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
ror queue. ror queue.
*/ */
ror_scan->index_read_cost= ror_scan->index_read_cost=
param->table->file->ha_keyread_and_copy_time(ror_scan->keynr, 1, file->cost(file->ha_keyread_and_copy_time(ror_scan->keynr, 1,
ror_scan->records, 0); ror_scan->records, 0));
DBUG_RETURN(ror_scan); DBUG_RETURN(ror_scan);
} }
@ -7664,8 +7673,8 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
Json_writer_object trace_idx(thd); Json_writer_object trace_idx(thd);
trace_idx.add("index", param->table->key_info[keynr].name); trace_idx.add("index", param->table->key_info[keynr].name);
found_records= check_quick_select(param, idx, limit, read_index_only, key, found_records= check_quick_select(param, idx, limit, read_index_only,
for_range_access, &mrr_flags, key, for_range_access, &mrr_flags,
&buf_size, &cost, &is_ror_scan); &buf_size, &cost, &is_ror_scan);
if (found_records == HA_POS_ERROR || if (found_records == HA_POS_ERROR ||
@ -11868,22 +11877,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, ha_rows limit,
rows) : rows) :
1.0); // ok as rows is 0 1.0); // ok as rows is 0
range->rows= rows; range->rows= rows;
/* cost of finding a row without copy or checking the where */ range->cost= *cost;
range->find_cost= cost->find_cost(); range->max_index_blocks= file->index_blocks(keynr, range->ranges,
/* cost of finding a row copying it to the row buffer */ rows);
range->fetch_cost= range->find_cost + cost->data_copy_cost(); range->max_row_blocks= MY_MIN(file->row_blocks(), rows * file->stats.block_size / IO_SIZE);
/* Add comparing it to the where. Same as cost.total_cost() */
range->cost= (range->fetch_cost + cost->compare_cost());
/* Calculate the cost of just finding the key. Used by filtering */
if (param->table->file->is_clustering_key(keynr))
range->index_only_cost= range->find_cost;
else
{
range->index_only_cost= cost->index_only_cost();
DBUG_ASSERT(!(*mrr_flags & HA_MRR_INDEX_ONLY) ||
range->index_only_cost ==
range->find_cost);
}
range->first_key_part_has_only_one_value= range->first_key_part_has_only_one_value=
check_if_first_key_part_has_only_one_value(tree); check_if_first_key_part_has_only_one_value(tree);
} }
@ -15120,8 +15117,8 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
1); 1);
if (keys_per_group == 0) /* If there is no statistics try to guess */ if (keys_per_group == 0) /* If there is no statistics try to guess */
{ {
/* each group contains 1% of all records */ /* each group contains 10% of all records */
keys_per_group= (records / 100) + 1; keys_per_group= (records / 10) + 1;
} }
} }
if (keys_per_group > 1) if (keys_per_group > 1)
@ -15168,12 +15165,11 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
} }
DBUG_ASSERT(num_groups <= records); DBUG_ASSERT(num_groups <= records);
/* Calculate the number of blocks we will touch for the table or range scan */ /* Calculate the number of blocks we will touch for the table or range scan */
num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV * num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV *
INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1; INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1;
io_cost= (have_max) ? num_groups*2 : num_groups; io_cost= (have_max) ? num_groups * 2 : num_groups;
set_if_smaller(io_cost, num_blocks); set_if_smaller(io_cost, num_blocks);
/* /*
@ -15184,9 +15180,10 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
group. group.
*/ */
uint keyno= (uint) (index_info - table->key_info); uint keyno= (uint) (index_info - table->key_info);
*read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups, *read_cost= file->cost(file->ha_keyread_and_compare_time(keyno,
num_groups, (ulong) num_groups,
io_cost); num_groups,
io_cost));
*out_records= num_groups; *out_records= num_groups;
DBUG_PRINT("info", DBUG_PRINT("info",

View File

@ -1472,7 +1472,8 @@ void get_delayed_table_estimates(TABLE *table,
hash_sj_engine->tmp_table->s->reclength); hash_sj_engine->tmp_table->s->reclength);
/* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */ /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
*scan_time= ((data_size/IO_SIZE * table->file->avg_io_cost()) + *scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST *
table->file->DISK_READ_RATIO) +
*out_rows * file->ROW_COPY_COST); *out_rows * file->ROW_COPY_COST);
} }
@ -2521,7 +2522,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
&subjoin_read_time, &subjoin_read_time,
&subjoin_out_rows); &subjoin_out_rows);
sjm->materialization_cost.convert_from_cost(subjoin_read_time); sjm->materialization_cost=subjoin_read_time;
sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows; sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows;
/* /*
@ -2586,8 +2587,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
temporary table. Note that smj->materialization_cost already includes temporary table. Note that smj->materialization_cost already includes
row copy and compare costs of finding the original row. row copy and compare costs of finding the original row.
*/ */
sjm->materialization_cost.add_io(subjoin_out_rows, cost.write); sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create;
sjm->materialization_cost.copy_cost+= cost.create;
/* /*
Set the cost to do a full scan of the temptable (will need this to Set the cost to do a full scan of the temptable (will need this to
@ -2600,10 +2600,10 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost + total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost +
TABLE_SCAN_SETUP_COST_THD(thd) + TABLE_SCAN_SETUP_COST_THD(thd) +
row_copy_cost * sjm->rows); row_copy_cost * sjm->rows);
sjm->scan_cost.convert_from_cost(total_cost); sjm->scan_cost=total_cost;
/* When reading a row, we have also to check the where clause */ /* When reading a row, we have also to check the where clause */
sjm->lookup_cost.convert_from_cost(cost.lookup + WHERE_COST_THD(thd)); sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd);
sj_nest->sj_mat_info= sjm; sj_nest->sj_mat_info= sjm;
DBUG_EXECUTE("opt", print_sjm(sjm);); DBUG_EXECUTE("opt", print_sjm(sjm););
} }
@ -3183,9 +3183,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
mat_read_time= mat_read_time=
COST_ADD(prefix_cost, COST_ADD(prefix_cost,
COST_ADD(mat_info->materialization_cost.total_cost(), COST_ADD(mat_info->materialization_cost,
COST_MULT(prefix_rec_count, COST_MULT(prefix_rec_count,
mat_info->lookup_cost.total_cost()))); mat_info->lookup_cost)));
/* /*
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
@ -3235,9 +3235,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
/* Add materialization cost */ /* Add materialization cost */
prefix_cost= prefix_cost=
COST_ADD(prefix_cost, COST_ADD(prefix_cost,
COST_ADD(mat_info->materialization_cost.total_cost(), COST_ADD(mat_info->materialization_cost,
COST_MULT(prefix_rec_count, COST_MULT(prefix_rec_count,
mat_info->scan_cost.total_cost()))); mat_info->scan_cost)));
prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows);
uint i; uint i;

View File

@ -228,15 +228,16 @@ public:
{ {
double records, read_time; double records, read_time;
part1_conds_met= TRUE; part1_conds_met= TRUE;
handler *file= s->table->file;
DBUG_PRINT("info", ("Can use full index scan for LooseScan")); DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
/* Calculate the cost of complete loose index scan. */ /* Calculate the cost of complete loose index scan. */
records= rows2double(s->table->file->stats.records); records= rows2double(file->stats.records);
/* The cost is entire index scan cost (divided by 2) */ /* The cost is entire index scan cost (divided by 2) */
read_time= s->table->file->ha_keyread_and_copy_time(key, 1, read_time= file->cost(file->ha_keyread_and_copy_time(key, 1,
(ha_rows) records, (ha_rows) records,
0); 0));
/* /*
Now find out how many different keys we will get (for now we Now find out how many different keys we will get (for now we

View File

@ -175,7 +175,7 @@
#define TMPFILE_CREATE_COST 0.5 // Cost of creating and deleting files #define TMPFILE_CREATE_COST 0.5 // Cost of creating and deleting files
#define HEAP_TEMPTABLE_CREATE_COST 0.025 // ms #define HEAP_TEMPTABLE_CREATE_COST 0.025 // ms
/* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */ /* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */
#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097*1000 + heap_optimizer_costs.row_copy_cost) #define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097)
#define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost) #define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost)
#define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables #define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables
#define DISK_TEMPTABLE_BLOCK_SIZE IO_SIZE #define DISK_TEMPTABLE_BLOCK_SIZE IO_SIZE

View File

@ -485,12 +485,9 @@ void Range_rowid_filter_cost_info::trace_info(THD *thd)
*/ */
Range_rowid_filter_cost_info * Range_rowid_filter_cost_info *
TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, TABLE::best_range_rowid_filter(uint access_key_no, double records,
double records, double fetch_cost, double index_only_cost,
double fetch_cost, double prev_records, double *records_out)
double index_only_cost,
double prev_records,
double *records_out)
{ {
if (range_rowid_filter_cost_info_elems == 0 || if (range_rowid_filter_cost_info_elems == 0 ||
covering_keys.is_set(access_key_no)) covering_keys.is_set(access_key_no))

View File

@ -472,18 +472,20 @@ public:
friend friend
void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd); void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd);
/* Best range row id filter for parital join */
friend friend
Range_rowid_filter_cost_info * Range_rowid_filter_cost_info *
TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, TABLE::best_range_rowid_filter(uint access_key_no,
double records, double records,
double fetch_cost, double fetch_cost,
double index_only_cost, double index_only_cost,
double prev_records, double prev_records,
double *records_out); double *records_out);
Range_rowid_filter_cost_info * Range_rowid_filter_cost_info *
apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
double *startup_cost, double fetch_cost, double *records_arg,
double index_only_cost, uint ranges, double record_count); double *startup_cost,
uint ranges, double record_count);
}; };
#endif /* ROWID_FILTER_INCLUDED */ #endif /* ROWID_FILTER_INCLUDED */

View File

@ -6814,13 +6814,13 @@ public:
/* /*
Cost to materialize - execute the sub-join and write rows into temp.table Cost to materialize - execute the sub-join and write rows into temp.table
*/ */
Cost_estimate materialization_cost; double materialization_cost;
/* Cost to make one lookup in the temptable */ /* Cost to make one lookup in the temptable */
Cost_estimate lookup_cost; double lookup_cost;
/* Cost of scanning the materialized table */ /* Cost of scanning the materialized table */
Cost_estimate scan_cost; double scan_cost;
/* --- Execution structures ---------- */ /* --- Execution structures ---------- */

View File

@ -5445,6 +5445,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
{ {
set_position(join,const_count++,s,(KEYUSE*) 0); set_position(join,const_count++,s,(KEYUSE*) 0);
no_rows_const_tables |= table->map; no_rows_const_tables |= table->map;
table->file->stats.records= 0;
} }
} }
@ -7861,42 +7862,49 @@ static double matching_candidates_in_table(JOIN_TAB *s,
WHERE_COST cost is not added to any result. WHERE_COST cost is not added to any result.
*/ */
INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, ALL_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
uint key, uint key, ha_rows records,
ha_rows records, ha_rows worst_seeks) ha_rows worst_seeks)
{ {
INDEX_READ_COST cost; ALL_READ_COST cost;
handler *file= table->file; handler *file= table->file;
double rows_adjusted; ha_rows max_seeks;
DBUG_ENTER("cost_for_index_read"); DBUG_ENTER("cost_for_index_read");
rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key); max_seeks= (ha_rows) thd->variables.max_seeks_for_key;
set_if_bigger(rows_adjusted, 1); set_if_bigger(records, 1);
#ifdef OLD_CODE_LIMITED_SEEKS
set_if_smaller(rows_adjusted, worst_seeks);
#endif
if (file->is_clustering_key(key)) if (file->is_clustering_key(key))
{ {
cost.index_only_cost= cost.index_cost=
file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0); file->ha_keyread_clustered_time(key, 1, records, 0);
cost.copy_cost= rows2double(records) * file->ROW_COPY_COST;
/* There is no 'index_only_read' with a clustered index */ /* There is no 'index_only_read' with a clustered index */
cost.read_cost= cost.index_only_cost; cost.row_cost= {0,0};
/* Caping of index_blocks will happen in handler::cost() */
cost.max_index_blocks= MY_MIN(file->row_blocks(), max_seeks);
cost.max_row_blocks= 0;
} }
else if (table->covering_keys.is_set(key) && !table->no_keyread) else if (table->covering_keys.is_set(key) && !table->no_keyread)
{ {
cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); cost.index_cost= file->ha_keyread_time(key, 1, records, 0);
/* Same computation as in ha_keyread_and_copy_time() */ cost.row_cost= {0,0};
cost.read_cost= (cost.index_only_cost + cost.copy_cost= rows2double(records) * file->KEY_COPY_COST;
rows2double(records) * file->KEY_COPY_COST); cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
cost.max_row_blocks= 0;
} }
else else
{ {
cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); cost.index_cost= file->ha_keyread_time(key, 1, records, 0);
cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records)); /* ha_rnd_pos_time() includes time for copying the row */
cost.row_cost= file->ha_rnd_pos_time(records);
cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
cost.max_row_blocks= MY_MIN(file->row_blocks(), max_seeks);
cost.copy_cost= 0;
} }
DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f", DBUG_PRINT("statistics", ("index_cost: %.3f row_cost: %.3f",
cost.index_only_cost, cost.read_cost)); file->cost(cost.index_cost),
file->cost(cost.row_cost)));
DBUG_RETURN(cost); DBUG_RETURN(cost);
} }
@ -7906,14 +7914,15 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
@param thd Thread handler @param thd Thread handler
@param table Table @param table Table
@param cost Pointer to cost for *records_arg rows, not including @param cost Pointer to cost for current cost, which does not
WHERE_COST cost. include WHERE_COST cost. Will be updated to
new cost if filter is chosen.
Will be updated to new cost if filter is used. Will be updated to new cost if filter is used.
@param records_arg Pointer to number of records for the current key. @param records_arg Pointer to number of records for the current key.
Will be updated to records after filter, if filter is Will be updated to records after filter, if filter is
used. used.
@param startup_cost Startup cost. Will be updated if filter is used. @param startup_cost Startup cost. Will be updated if filter is used.
@param fetch_cost Cost of finding the row, without copy or compare cost @param fetch_cost Cost of finding the row, without where compare cost
@param index_only_cost Cost if fetching '*records_arg' key values @param index_only_cost Cost if fetching '*records_arg' key values
@param prev_records Number of record combinations in previous tables @param prev_records Number of record combinations in previous tables
@ -7922,16 +7931,18 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
*/ */
Range_rowid_filter_cost_info* Range_rowid_filter_cost_info:: Range_rowid_filter_cost_info* Range_rowid_filter_cost_info::
apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
double *startup_cost, double fetch_cost, double index_only_cost, double *records_arg,
double *startup_cost,
uint ranges, double prev_records) uint ranges, double prev_records)
{ {
handler *file= table->file;
bool use_filter; bool use_filter;
double new_cost, new_total_cost, records= *records_arg, new_records; double new_cost, org_cost, records= *records_arg, new_records;
double cost_of_accepted_rows, cost_of_rejected_rows;
double filter_startup_cost= get_setup_cost(); double filter_startup_cost= get_setup_cost();
double io_cost= table->file->avg_io_cost();
double filter_lookup_cost= records * lookup_cost(); double filter_lookup_cost= records * lookup_cost();
double tmp;
ALL_READ_COST adjusted_cost;
/* /*
Calculate number of resulting rows after filtering Calculate number of resulting rows after filtering
@ -7955,42 +7966,50 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
The io_cost is used to take into account that we have to do 1 key The io_cost is used to take into account that we have to do 1 key
lookup to find the first matching key in each range. lookup to find the first matching key in each range.
*/ */
cost_of_accepted_rows= fetch_cost * selectivity;
cost_of_rejected_rows= index_only_cost * (1-selectivity); adjusted_cost= *cost;
/* /* We are going to read 'selectivity' fewer rows */
The MAX() is used below to ensure that we take into account the index adjusted_cost.row_cost.io*= selectivity;
read even if selectivity (and thus new_records) would be very low. adjusted_cost.row_cost.cpu*= selectivity;
*/ adjusted_cost.copy_cost*= selectivity;
new_cost= (MY_MAX(cost_of_accepted_rows, adjusted_cost.index_cost.cpu+= filter_lookup_cost;
ranges * table->file->KEY_LOOKUP_COST +
ranges * io_cost * table->file->DISK_READ_RATIO) + tmp= prev_records * WHERE_COST_THD(thd);
cost_of_rejected_rows + filter_lookup_cost); org_cost= (file->cost_for_reading_multiple_times(prev_records,
new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) * cost) +
prev_records + filter_startup_cost); records * tmp);
new_cost= (file->cost_for_reading_multiple_times(prev_records,
&adjusted_cost) +
new_records * tmp + filter_startup_cost);
DBUG_ASSERT(new_cost >= 0 && new_records >= 0); DBUG_ASSERT(new_cost >= 0 && new_records >= 0);
use_filter= ((*cost + records * WHERE_COST_THD(thd)) * prev_records > use_filter= new_cost < org_cost;
new_total_cost);
if (unlikely(thd->trace_started())) if (unlikely(thd->trace_started()))
{ {
Json_writer_object trace_filter(thd, "filter"); Json_writer_object trace_filter(thd, "filter");
trace_filter.add("rowid_filter_key", trace_filter.add("rowid_filter_key",
table->key_info[get_key_no()].name). table->key_info[get_key_no()].name).
add("index_only_cost", index_only_cost). add("index_only_cost", file->cost(cost->index_cost)).
add("filter_startup_cost", filter_startup_cost). add("filter_startup_cost", filter_startup_cost).
add("find_key_and_filter_lookup_cost", filter_lookup_cost). add("find_key_and_filter_lookup_cost", filter_lookup_cost).
add("filter_selectivity", selectivity). add("filter_selectivity", selectivity).
add("orginal_rows", records). add("original_rows", records).
add("new_rows", new_records). add("new_rows", new_records).
add("original_found_rows_cost", fetch_cost). add("original_access_cost", file->cost(cost)).
add("new_found_rows_cost", new_cost). add("with_filter_access_cost", file->cost(&adjusted_cost)).
add("cost", new_total_cost). add("original_found_rows_cost", file->cost(cost->row_cost)).
add("with_filter_found_rows_cost", file->cost(adjusted_cost.row_cost)).
add("org_cost", org_cost).
add("filter_cost", new_cost).
add("filter_used", use_filter); add("filter_used", use_filter);
} }
if (use_filter) if (use_filter)
{ {
*cost= new_cost; cost->row_cost= adjusted_cost.row_cost;
cost->index_cost= adjusted_cost.index_cost;
cost->copy_cost= adjusted_cost.copy_cost;
*records_arg= new_records; *records_arg= new_records;
(*startup_cost)+= filter_startup_cost; (*startup_cost)+= filter_startup_cost;
return this; return this;
@ -8060,6 +8079,7 @@ best_access_path(JOIN *join,
uint use_cond_selectivity= uint use_cond_selectivity=
thd->variables.optimizer_use_condition_selectivity; thd->variables.optimizer_use_condition_selectivity;
TABLE *table= s->table; TABLE *table= s->table;
handler *file= table->file;
my_bool found_constraint= 0; my_bool found_constraint= 0;
/* /*
key_dependent is 0 if all key parts could be used or if there was an key_dependent is 0 if all key parts could be used or if there was an
@ -8068,7 +8088,7 @@ best_access_path(JOIN *join,
Otherwise it's a bitmap of tables that could improve key usage. Otherwise it's a bitmap of tables that could improve key usage.
*/ */
table_map key_dependent= 0; table_map key_dependent= 0;
double tmp; ALL_READ_COST tmp;
ha_rows rec; ha_rows rec;
MY_BITMAP *eq_join_set= &s->table->eq_join_set; MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0; KEYUSE *hj_start_key= 0;
@ -8112,9 +8132,9 @@ best_access_path(JOIN *join,
if (s->keyuse) if (s->keyuse)
{ /* Use key if possible */ { /* Use key if possible */
KEYUSE *keyuse, *start_key= 0; KEYUSE *keyuse, *start_key= 0;
double index_only_cost= DBL_MAX;
uint max_key_part=0; uint max_key_part=0;
enum join_type type= JT_UNKNOWN; enum join_type type= JT_UNKNOWN;
double cur_cost;
/* Test how we can use keys */ /* Test how we can use keys */
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
@ -8245,16 +8265,26 @@ best_access_path(JOIN *join,
if (ft_key) if (ft_key)
{ {
/* /*
Calculate an adjusted cost based on how many records are read Fulltext indexes are preformed the following way:
This will be later multipled by record_count. - In the prepare step it performs the search, collects all positions
in an array, sorts it.
- If optimizer decides to use the ft index access method it simply'
returns positions from the array one by one
- If optimizer decides to use something else (another index, table
scan), then it'll use binary search in the array to find the
position.
The following code puts the cost down to very small as the prep
step will always be done and the cost to fetch the row from memory
is very small.
Alternatively we could use the cost of an EQ_REF here.
*/ */
tmp= (prev_record_reads(join_positions, idx, found_ref) / tmp.reset();
record_count); tmp.row_cost.cpu= file->ROW_COPY_COST;
set_if_smaller(tmp, 1.0);
index_only_cost= tmp;
/* /*
Really, there should be records=0.0 (yes!) We don't know how many records will match. However, we want to have
but 1.0 would be probably safer the fulltext search done early, so we put the number of records
to be very low.
*/ */
records= 1.0; records= 1.0;
type= JT_FT; type= JT_FT;
@ -8293,25 +8323,21 @@ best_access_path(JOIN *join,
if (!found_ref && table->opt_range_keys.is_set(key)) if (!found_ref && table->opt_range_keys.is_set(key))
{ {
/* Ensure that the cost is identical to the range cost */ /* Ensure that the cost is identical to the range cost */
tmp= table->opt_range[key].fetch_cost; table->opt_range[key].get_costs(&tmp);
index_only_cost= table->opt_range[key].index_only_cost;
} }
else else
{ {
INDEX_READ_COST cost= cost_for_index_read(thd, table, key, tmp= cost_for_index_read(thd, table, key, 1, 1);
1,1);
tmp= cost.read_cost;
index_only_cost= cost.index_only_cost;
} }
/* /*
Calculate an adjusted cost based on how many records are read Calculate an adjusted cost based on how many records are read
This will be later multipled by record_count. This will be multipled by record_count.
*/ */
adjusted_cost= (prev_record_reads(join_positions, idx, found_ref) / adjusted_cost= (prev_record_reads(join_positions, idx, found_ref) /
record_count); record_count);
set_if_smaller(adjusted_cost, 1.0); set_if_smaller(adjusted_cost, 1.0);
tmp*= adjusted_cost; tmp.row_cost.cpu*= adjusted_cost;
index_only_cost*= adjusted_cost; tmp.index_cost.cpu*= adjusted_cost;
records= 1.0; records= 1.0;
} }
else else
@ -8345,8 +8371,8 @@ best_access_path(JOIN *join,
/* Ensure that the cost is identical to the range cost */ /* Ensure that the cost is identical to the range cost */
records= (double) table->opt_range[key].rows; records= (double) table->opt_range[key].rows;
trace_access_idx.add("used_range_estimates", true); trace_access_idx.add("used_range_estimates", true);
tmp= table->opt_range[key].fetch_cost;
index_only_cost= table->opt_range[key].index_only_cost; table->opt_range[key].get_costs(&tmp);
goto got_cost2; goto got_cost2;
} }
/* quick_range couldn't use key! */ /* quick_range couldn't use key! */
@ -8408,16 +8434,14 @@ best_access_path(JOIN *join,
} }
} }
/* Calculate the cost of the index access */ /* Calculate the cost of the index access */
INDEX_READ_COST cost= tmp= cost_for_index_read(thd, table, key,
cost_for_index_read(thd, table, key, (ha_rows) records,
(ha_rows) records, (ha_rows) s->worst_seeks);
(ha_rows) s->worst_seeks);
tmp= cost.read_cost;
index_only_cost= cost.index_only_cost;
} }
} }
else else
{ {
ha_rows tmp_records;
type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF; type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF;
if (unlikely(trace_access_idx.trace_started())) if (unlikely(trace_access_idx.trace_started()))
trace_access_idx. trace_access_idx.
@ -8430,7 +8454,7 @@ best_access_path(JOIN *join,
records. records.
*/ */
if ((found_part & 1) && if ((found_part & 1) &&
(!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || (!(file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
{ {
double extra_cost= 0; double extra_cost= 0;
@ -8480,8 +8504,7 @@ best_access_path(JOIN *join,
table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3) table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3)
{ {
records= (double) table->opt_range[key].rows; records= (double) table->opt_range[key].rows;
tmp= table->opt_range[key].fetch_cost; table->opt_range[key].get_costs(&tmp);
index_only_cost= table->opt_range[key].index_only_cost;
/* /*
TODO: Disable opt_range testing below for this range as we can TODO: Disable opt_range testing below for this range as we can
always use this ref instead. always use this ref instead.
@ -8599,13 +8622,12 @@ best_access_path(JOIN *join,
/* Limit the number of matched rows */ /* Limit the number of matched rows */
set_if_smaller(records, (double) s->records); set_if_smaller(records, (double) s->records);
tmp= records; tmp_records= records;
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); set_if_smaller(tmp_records, thd->variables.max_seeks_for_key);
INDEX_READ_COST cost= cost_for_index_read(thd, table, key, tmp= cost_for_index_read(thd, table, key,
(ha_rows) tmp, tmp_records,
(ha_rows) s->worst_seeks); (ha_rows) s->worst_seeks);
tmp= cost.read_cost; tmp.copy_cost+= extra_cost;
index_only_cost= cost.index_only_cost+extra_cost;
} }
else else
{ {
@ -8620,7 +8642,7 @@ best_access_path(JOIN *join,
got_cost2: got_cost2:
loose_scan_opt.check_ref_access_part2(key, start_key, records, loose_scan_opt.check_ref_access_part2(key, start_key, records,
tmp + startup_cost, file->cost(&tmp) + startup_cost,
found_ref); found_ref);
} /* not ft_key */ } /* not ft_key */
@ -8630,14 +8652,13 @@ best_access_path(JOIN *join,
records_best_filter= records_after_filter= records; records_best_filter= records_after_filter= records;
/* /*
Check that start_key->key can be used for index access Check if we can use a filter.
Records can be 0 in case of empty tables. Records can be 0 in case of empty tables.
*/ */
if ((found_part & 1) && records && if ((found_part & 1) && records &&
(table->file->index_flags(start_key->key,0,1) & (table->file->index_flags(start_key->key,0,1) &
HA_DO_RANGE_FILTER_PUSHDOWN)) HA_DO_RANGE_FILTER_PUSHDOWN))
{ {
/* /*
If we use filter F with selectivity s the the cost of fetching data If we use filter F with selectivity s the the cost of fetching data
by key using this filter will be by key using this filter will be
@ -8683,38 +8704,39 @@ best_access_path(JOIN *join,
number of rows from prev_record_read() and keyread_tmp is 0. These number of rows from prev_record_read() and keyread_tmp is 0. These
numbers are not usable with rowid filter code. numbers are not usable with rowid filter code.
*/ */
filter= filter= table->best_range_rowid_filter(start_key->key,
table->best_range_rowid_filter_for_partial_join(start_key->key, records,
records, file->cost(&tmp),
tmp, file->cost(tmp.index_cost),
index_only_cost, record_count,
record_count, &records_best_filter);
&records_best_filter);
set_if_smaller(best.records_out, records_best_filter); set_if_smaller(best.records_out, records_best_filter);
if (filter) if (filter)
filter= filter->apply_filter(thd, table, &tmp, &records_after_filter, filter= filter->apply_filter(thd, table, &tmp,
&records_after_filter,
&startup_cost, &startup_cost,
tmp, index_only_cost,
1, record_count); 1, record_count);
} }
tmp= COST_ADD(tmp, records_after_filter * WHERE_COST_THD(thd));
tmp= COST_MULT(tmp, record_count); tmp.copy_cost+= records_after_filter * WHERE_COST_THD(thd);
tmp= COST_ADD(tmp, startup_cost); cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp);
cur_cost= COST_ADD(cur_cost, startup_cost);
if (unlikely(trace_access_idx.trace_started())) if (unlikely(trace_access_idx.trace_started()))
{ {
trace_access_idx. trace_access_idx.
add("rows", records_after_filter). add("rows", records_after_filter).
add("cost", tmp); add("cost", cur_cost);
} }
/* /*
The COST_EPS is here to ensure we use the first key if there are The COST_EPS is here to ensure we use the first key if there are
two 'identical keys' that could be used. two 'identical keys' that could be used.
*/ */
if (tmp + COST_EPS < best.cost) if (cur_cost + COST_EPS < best.cost)
{ {
trace_access_idx.add("chosen", true); trace_access_idx.add("chosen", true);
best.cost= tmp; best.cost= cur_cost;
/* /*
We use 'records' instead of 'records_after_filter' here as we want We use 'records' instead of 'records_after_filter' here as we want
to have EXPLAIN print the number of rows found by the key access. to have EXPLAIN print the number of rows found by the key access.
@ -8792,10 +8814,11 @@ best_access_path(JOIN *join,
(!(table->map & join->outer_join) || (!(table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2) join->allowed_outer_join_with_cache)) // (2)
{ {
double refills, row_copy_cost, cmp_time; double refills, row_copy_cost, cmp_time, cur_cost;
/* Estimate the cost of the hash join access to the table */ /* Estimate the cost of the hash join access to the table */
double rnd_records= matching_candidates_in_table(s, 0, double rnd_records= matching_candidates_in_table(s, 0,
use_cond_selectivity); use_cond_selectivity);
DBUG_ASSERT(rnd_records <= s->found_records);
set_if_smaller(best.records_out, rnd_records); set_if_smaller(best.records_out, rnd_records);
/* /*
@ -8808,16 +8831,16 @@ best_access_path(JOIN *join,
Cost of reading rows through opt_range including comparing the rows Cost of reading rows through opt_range including comparing the rows
with the attached WHERE clause. with the attached WHERE clause.
*/ */
tmp= s->quick->read_time; cur_cost= s->quick->read_time;
} }
else else
tmp= s->cached_scan_and_compare_time; cur_cost= s->cached_scan_and_compare_time;
/* We read the table as many times as join buffer becomes full. */ /* We read the table as many times as join buffer becomes full. */
refills= (1.0 + floor((double) cache_record_length(join,idx) * refills= (1.0 + floor((double) cache_record_length(join,idx) *
record_count / record_count /
(double) thd->variables.join_buff_size)); (double) thd->variables.join_buff_size));
tmp= COST_MULT(tmp, refills); cur_cost= COST_MULT(cur_cost, refills);
/* /*
Cost of doing the hash lookup and check all matching rows with the Cost of doing the hash lookup and check all matching rows with the
@ -8831,9 +8854,9 @@ best_access_path(JOIN *join,
rnd_records * record_count * HASH_FANOUT * rnd_records * record_count * HASH_FANOUT *
((idx - join->const_tables) * row_copy_cost + ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd))); WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time); cur_cost= COST_ADD(cur_cost, cmp_time);
best.cost= tmp; best.cost= cur_cost;
best.records_read= best.records_after_filter= rows2double(s->records); best.records_read= best.records_after_filter= rows2double(s->records);
best.records= rnd_records; best.records= rnd_records;
best.key= hj_start_key; best.key= hj_start_key;
@ -8895,13 +8918,13 @@ best_access_path(JOIN *join,
s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
best.key && s->quick->index == best.key->key && // (2) best.key && s->quick->index == best.key->key && // (2)
best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2) best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
!((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) !((file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
!table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3) !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
!(table->force_index_join && best.key && !s->quick) && // (4) !(table->force_index_join && best.key && !s->quick) && // (4)
!(best.key && table->pos_in_table_list->jtbm_subselect)) // (5) !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5)
{ // Check full join { // Check full join
double records_after_filter, org_records; double records_after_filter, org_records;
double records_best_filter; double records_best_filter, cur_cost;
Range_rowid_filter_cost_info *filter= 0; Range_rowid_filter_cost_info *filter= 0;
double startup_cost= s->startup_cost; double startup_cost= s->startup_cost;
const char *scan_type= ""; const char *scan_type= "";
@ -8929,7 +8952,7 @@ best_access_path(JOIN *join,
access (see first else-branch below), but we don't take it into access (see first else-branch below), but we don't take it into
account here for range/index_merge access. Find out why this is so. account here for range/index_merge access. Find out why this is so.
*/ */
tmp= COST_MULT(s->quick->read_time, record_count); cur_cost= COST_MULT(s->quick->read_time, record_count);
/* /*
Use record count from range optimizer. Use record count from range optimizer.
@ -8938,6 +8961,7 @@ best_access_path(JOIN *join,
*/ */
org_records= records_after_filter= rows2double(s->found_records); org_records= records_after_filter= rows2double(s->found_records);
records_best_filter= org_records; records_best_filter= org_records;
set_if_smaller(best.records_out, records_best_filter);
if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{ {
@ -8950,37 +8974,32 @@ best_access_path(JOIN *join,
registers complications when costs are calculated. registers complications when costs are calculated.
*/ */
DBUG_ASSERT(range->rows == s->found_records); DBUG_ASSERT(range->rows == s->found_records);
DBUG_ASSERT((range->cost == 0.0 && s->quick->read_time == 0.0) || DBUG_ASSERT((range->cost.total_cost() == 0.0 &&
(range->cost / s->quick->read_time <= 1.0000001 && s->quick->read_time == 0.0) ||
range->cost / s->quick->read_time >= 0.9999999)); (range->cost.total_cost() / s->quick->read_time <= 1.0000001 &&
range->cost.total_cost() / s->quick->read_time >= 0.9999999));
filter= range->get_costs(&tmp);
table->best_range_rowid_filter_for_partial_join(key_no, filter= table->best_range_rowid_filter(key_no,
rows2double(range->rows), rows2double(range->rows),
range->find_cost, file->cost(&tmp),
range->index_only_cost, file->cost(tmp.index_cost),
record_count, record_count,
&records_best_filter); &records_best_filter);
set_if_smaller(best.records_out, records_best_filter); set_if_smaller(best.records_out, records_best_filter);
if (filter) if (filter)
{ {
double filter_cost= range->fetch_cost; filter= filter->apply_filter(thd, table, &tmp,
filter= filter->apply_filter(thd, table, &filter_cost,
&records_after_filter, &records_after_filter,
&startup_cost, &startup_cost,
range->fetch_cost,
range->index_only_cost,
range->ranges, range->ranges,
record_count); record_count);
if (filter) if (filter)
{ {
tmp= filter_cost; tmp.row_cost.cpu+= records_after_filter * WHERE_COST_THD(thd);
/* Filter returns cost without WHERE_COST */ cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp);
tmp= COST_ADD(tmp, records_after_filter * cur_cost= COST_ADD(cur_cost, startup_cost);
WHERE_COST_THD(thd)); startup_cost= 0; // Avoid adding it again later
tmp= COST_MULT(tmp, record_count);
tmp= COST_ADD(tmp, startup_cost);
startup_cost= 0; // Avoid adding it later
table->opt_range[key_no].selectivity= filter->selectivity; table->opt_range[key_no].selectivity= filter->selectivity;
} }
} }
@ -8998,6 +9017,7 @@ best_access_path(JOIN *join,
records_best_filter= records_after_filter= records_best_filter= records_after_filter=
matching_candidates_in_table(s, 0, use_cond_selectivity); matching_candidates_in_table(s, 0, use_cond_selectivity);
DBUG_ASSERT(records_after_filter <= s->records); DBUG_ASSERT(records_after_filter <= s->records);
DBUG_ASSERT(records_after_filter <= s->found_records);
set_if_smaller(best.records_out, records_after_filter); set_if_smaller(best.records_out, records_after_filter);
@ -9007,7 +9027,7 @@ best_access_path(JOIN *join,
if (s->cached_forced_index_type) if (s->cached_forced_index_type)
{ {
type= s->cached_forced_index_type; type= s->cached_forced_index_type;
tmp= s->cached_forced_index_cost; cur_cost= s->cached_forced_index_cost;
forced_index= s->cached_forced_index; forced_index= s->cached_forced_index;
} }
else else
@ -9023,42 +9043,42 @@ best_access_path(JOIN *join,
{ {
/* Use value from estimate_scan_time */ /* Use value from estimate_scan_time */
forced_index= s->cached_covering_key; forced_index= s->cached_covering_key;
tmp= s->cached_scan_and_compare_time; cur_cost= s->cached_scan_and_compare_time;
} }
else else
{ {
#ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN #ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN
/* No cached key, use shortest allowed key */ /* No cached key, use shortest allowed key */
key_map keys= *table->file->keys_to_use_for_scanning(); key_map keys= *file->keys_to_use_for_scanning();
keys.intersect(table->keys_in_use_for_query); keys.intersect(table->keys_in_use_for_query);
if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY) if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY)
{ {
INDEX_READ_COST cost= cost_for_index_read(thd, table, ALL_READ_COST cost= cost_for_index_read(thd, table,
forced_index, forced_index,
s->records, s->records,
s->worst_seeks); s->worst_seeks);
tmp= cost.read_cost; cur_cost= file->cost(cost);
/* Calculate cost of checking the attached WHERE */ /* Calculate cost of checking the attached WHERE */
tmp= COST_ADD(cost.read_cost, cur_cost= COST_ADD(cur_cost,
s->records * WHERE_COST_THD(thd)); s->records * WHERE_COST_THD(thd));
} }
else else
#endif #endif
{ {
/* No usable key, use table scan */ /* No usable key, use table scan */
tmp= s->cached_scan_and_compare_time; cur_cost= s->cached_scan_and_compare_time;
type= JT_ALL; type= JT_ALL;
} }
} }
} }
else // table scan else // table scan
{ {
tmp= s->cached_scan_and_compare_time; cur_cost= s->cached_scan_and_compare_time;
type= JT_ALL; type= JT_ALL;
} }
/* Cache result for other calls */ /* Cache result for other calls */
s->cached_forced_index_type= type; s->cached_forced_index_type= type;
s->cached_forced_index_cost= tmp; s->cached_forced_index_cost= cur_cost;
s->cached_forced_index= forced_index; s->cached_forced_index= forced_index;
} }
@ -9078,7 +9098,7 @@ best_access_path(JOIN *join,
If this is not the first table we have to compare the rows against If this is not the first table we have to compare the rows against
all previous row combinations all previous row combinations
*/ */
tmp= COST_MULT(tmp, record_count); cur_cost= COST_MULT(cur_cost, record_count);
} }
else else
{ {
@ -9096,7 +9116,7 @@ best_access_path(JOIN *join,
refills= (1.0 + floor((double) cache_record_length(join,idx) * refills= (1.0 + floor((double) cache_record_length(join,idx) *
(record_count / (record_count /
(double) thd->variables.join_buff_size))); (double) thd->variables.join_buff_size)));
tmp= COST_MULT(tmp, refills); cur_cost= COST_MULT(cur_cost, refills);
/* We come here only if there are already rows in the join cache */ /* We come here only if there are already rows in the join cache */
DBUG_ASSERT(idx != join->const_tables); DBUG_ASSERT(idx != join->const_tables);
@ -9112,14 +9132,14 @@ best_access_path(JOIN *join,
records_after_filter * record_count * records_after_filter * record_count *
((idx - join->const_tables) * row_copy_cost + ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd))); WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time); cur_cost= COST_ADD(cur_cost, cmp_time);
} }
} }
/* Splitting technique cannot be used with join cache */ /* Splitting technique cannot be used with join cache */
if (table->is_splittable()) if (table->is_splittable())
startup_cost= table->get_materialization_cost(); startup_cost= table->get_materialization_cost();
tmp+= startup_cost; cur_cost+= startup_cost;
if (unlikely(trace_access_scan.trace_started())) if (unlikely(trace_access_scan.trace_started()))
{ {
@ -9129,7 +9149,7 @@ best_access_path(JOIN *join,
add("rows", org_records). add("rows", org_records).
add("rows_after_filter", records_after_filter). add("rows_after_filter", records_after_filter).
add("rows_out", best.records_out). add("rows_out", best.records_out).
add("cost", tmp); add("cost", cur_cost);
if (type == JT_ALL) if (type == JT_ALL)
{ {
trace_access_scan.add("index_only", trace_access_scan.add("index_only",
@ -9137,15 +9157,16 @@ best_access_path(JOIN *join,
} }
} }
if (tmp + COST_EPS < best.cost) if (cur_cost + COST_EPS < best.cost)
{ {
/* /*
If the table has a range (s->quick is set) make_join_select() If the table has a range (s->quick is set) make_join_select()
will ensure that this will be used will ensure that this will be used
*/ */
best.cost= tmp; best.cost= cur_cost;
best.records_read= org_records; // Records accessed best.records_read= org_records; // Records accessed
best.records= records_after_filter; // Records to be checked with WHERE best.records= records_after_filter; // Records to be checked with WHERE
/* /*
If we are using 'use_cond_selectivity > 1' then If we are using 'use_cond_selectivity > 1' then
table_after_join_selectivity may take into account other table_after_join_selectivity may take into account other
@ -11019,7 +11040,7 @@ best_extension_by_limited_search(JOIN *join,
trace_one_table trace_one_table
.add("pruned_by_cost", true) .add("pruned_by_cost", true)
.add("current_cost", current_read_time) .add("current_cost", current_read_time)
.add("best_cost", join->best_read + COST_EPS); .add("best_cost", join->best_read);
restore_prev_nj_state(s); restore_prev_nj_state(s);
restore_prev_sj_state(remaining_tables, s, idx); restore_prev_sj_state(remaining_tables, s, idx);
@ -13355,6 +13376,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
if (!sel->quick_keys.is_subset(tab->checked_keys) || if (!sel->quick_keys.is_subset(tab->checked_keys) ||
!sel->needed_reg.is_subset(tab->checked_keys)) !sel->needed_reg.is_subset(tab->checked_keys))
{ {
handler *file= tab->table->file;
/* /*
"Range checked for each record" is a "last resort" access method "Range checked for each record" is a "last resort" access method
that should only be used when the other option is a cross-product that should only be used when the other option is a cross-product
@ -13370,9 +13392,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
(sel->quick_keys.is_clear_all() || (sel->quick_keys.is_clear_all() ||
(sel->quick && (sel->quick &&
sel->quick->read_time > sel->quick->read_time >
tab->table->file-> file->cost(file->ha_scan_and_compare_time(tab->table->file-> stats.records))))) ?
ha_scan_and_compare_time(tab->table->file->
stats.records)))) ?
2 : 1; 2 : 1;
sel->read_tables= used_tables & ~current_map; sel->read_tables= used_tables & ~current_map;
sel->quick_keys.clear_all(); sel->quick_keys.clear_all();
@ -14237,7 +14257,6 @@ uint check_join_cache_usage(JOIN_TAB *tab,
uint table_index, uint table_index,
JOIN_TAB *prev_tab) JOIN_TAB *prev_tab)
{ {
Cost_estimate cost;
uint flags= 0; uint flags= 0;
ha_rows rows= 0; ha_rows rows= 0;
uint bufsz= 4096; uint bufsz= 4096;
@ -14396,6 +14415,8 @@ uint check_join_cache_usage(JOIN_TAB *tab,
if (!tab->is_ref_for_hash_join() && !no_bka_cache) if (!tab->is_ref_for_hash_join() && !no_bka_cache)
{ {
Cost_estimate cost;
cost.reset();
flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT; flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
if (tab->table->covering_keys.is_set(tab->ref.key)) if (tab->table->covering_keys.is_set(tab->ref.key))
flags|= HA_MRR_INDEX_ONLY; flags|= HA_MRR_INDEX_ONLY;
@ -15132,6 +15153,7 @@ void JOIN_TAB::cleanup()
void JOIN_TAB::estimate_scan_time() void JOIN_TAB::estimate_scan_time()
{ {
THD *thd= join->thd; THD *thd= join->thd;
handler *file= table->file;
double copy_cost; double copy_cost;
cached_covering_key= MAX_KEY; cached_covering_key= MAX_KEY;
@ -15143,7 +15165,7 @@ void JOIN_TAB::estimate_scan_time()
&startup_cost); &startup_cost);
table->opt_range_condition_rows= records; table->opt_range_condition_rows= records;
table->used_stat_records= records; table->used_stat_records= records;
copy_cost= table->file->ROW_COPY_COST; copy_cost= file->ROW_COPY_COST;
} }
else else
{ {
@ -15157,12 +15179,13 @@ void JOIN_TAB::estimate_scan_time()
if (!table->covering_keys.is_clear_all() && ! table->no_keyread) if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
{ {
cached_covering_key= find_shortest_key(table, &table->covering_keys); cached_covering_key= find_shortest_key(table, &table->covering_keys);
read_time= table->file->ha_key_scan_time(cached_covering_key, records); read_time= file->cost(file->ha_key_scan_time(cached_covering_key,
records));
copy_cost= 0; // included in ha_key_scan_time copy_cost= 0; // included in ha_key_scan_time
} }
else else
{ {
read_time= table->file->ha_scan_time(records); read_time= file->cost(file->ha_scan_time(records));
copy_cost= 0; copy_cost= 0;
} }
} }
@ -15179,12 +15202,13 @@ void JOIN_TAB::estimate_scan_time()
else else
memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs, memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
sizeof(tmp_table_optimizer_costs)); sizeof(tmp_table_optimizer_costs));
table->file->set_optimizer_costs(thd); file->set_optimizer_costs(thd);
table->s->optimizer_costs_inited=1 ; table->s->optimizer_costs_inited=1;
records= table->stat_records(); records= table->stat_records();
DBUG_ASSERT(table->opt_range_condition_rows == records); DBUG_ASSERT(table->opt_range_condition_rows == records);
read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix.. // Needs fix..
read_time= file->cost(table->file->ha_scan_time(MY_MAX(records, 1000)));
copy_cost= table->s->optimizer_costs.row_copy_cost; copy_cost= table->s->optimizer_costs.row_copy_cost;
} }
@ -30253,7 +30277,7 @@ static bool get_range_limit_read_cost(const POSITION *pos,
full index scan/cost. full index scan/cost.
*/ */
double best_rows, range_rows; double best_rows, range_rows;
double range_cost= (double) table->opt_range[keynr].fetch_cost; double range_cost= (double) table->opt_range[keynr].cost.fetch_cost();
best_rows= range_rows= (double) table->opt_range[keynr].rows; best_rows= range_rows= (double) table->opt_range[keynr].rows;
if (pos) if (pos)
@ -30309,12 +30333,12 @@ static bool get_range_limit_read_cost(const POSITION *pos,
N/(refkey_rows_estimate/table_records) > table_records N/(refkey_rows_estimate/table_records) > table_records
<=> N > refkey_rows_estimate. <=> N > refkey_rows_estimate.
*/ */
INDEX_READ_COST cost= cost_for_index_read(table->in_use, table, keynr, ALL_READ_COST cost= cost_for_index_read(table->in_use, table, keynr,
rows_to_scan, rows_to_scan,
pos ? pos ?
(ha_rows) pos->table->worst_seeks : (ha_rows) pos->table->worst_seeks :
HA_ROWS_MAX); HA_ROWS_MAX);
*read_cost= (cost.read_cost + *read_cost= (table->file->cost(&cost) +
rows_to_scan * WHERE_COST_THD(table->in_use)); rows_to_scan * WHERE_COST_THD(table->in_use));
*read_rows= rows2double(rows_to_scan); *read_rows= rows2double(rows_to_scan);
return 0; return 0;
@ -30449,7 +30473,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
{ {
/* Probably an update or delete. Assume we will do a full table scan */ /* Probably an update or delete. Assume we will do a full table scan */
fanout= 1.0; fanout= 1.0;
read_time= table->file->ha_scan_and_compare_time(rows_estimate); read_time= table->file->cost(table->file->ha_scan_and_compare_time(rows_estimate));
set_if_smaller(select_limit_arg, table_records); set_if_smaller(select_limit_arg, table_records);
} }

View File

@ -383,7 +383,7 @@ void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
} }
fprintf(DBUG_FILE, " }\n"); fprintf(DBUG_FILE, " }\n");
fprintf(DBUG_FILE, " materialize_cost= %g\n", fprintf(DBUG_FILE, " materialize_cost= %g\n",
sjm->materialization_cost.total_cost()); sjm->materialization_cost);
fprintf(DBUG_FILE, " rows= %g\n", sjm->rows); fprintf(DBUG_FILE, " rows= %g\n", sjm->rows);
fprintf(DBUG_FILE, "}\n"); fprintf(DBUG_FILE, "}\n");
DBUG_UNLOCK_FILE; DBUG_UNLOCK_FILE;

View File

@ -10460,17 +10460,26 @@ bool TABLE::export_structure(THD *thd, Row_definition_list *defs)
inline void TABLE::initialize_opt_range_structures() inline void TABLE::initialize_opt_range_structures()
{ {
TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys)); TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys));
TRASH_ALLOC(opt_range, s->keys * sizeof(*opt_range)); TRASH_ALLOC((void*)opt_range, s->keys * sizeof(*opt_range));
TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts)); TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts));
} }
double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table) double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table)
{ {
return (index_only_cost + return (table->file->cost(cost.index_cost)+
(double) rows * table->s->optimizer_costs.key_copy_cost); (double) rows * table->s->optimizer_costs.key_copy_cost);
} }
void TABLE::OPT_RANGE::get_costs(ALL_READ_COST *res)
{
res->index_cost= cost.index_cost;
res->row_cost= cost.row_cost;
res->copy_cost= cost.copy_cost;
res->max_index_blocks= max_index_blocks;
res->max_row_blocks= max_row_blocks;
}
/* /*
Mark table to be reopened after query Mark table to be reopened after query

View File

@ -1394,27 +1394,8 @@ public:
{ {
uint key_parts; uint key_parts;
uint ranges; uint ranges;
ha_rows rows; ha_rows rows, max_index_blocks, max_row_blocks;
/* Cost_estimate cost;
The full cost of using 'range'. Includes fetching the rows
through keys, copying them and comparing the rows aginst the
WHERE clause.
*/
double cost;
/*
Cost of finding the key and fetching the row with row id.
In case of clustered keys or covering keys the fetch of the row is
not counted for.
*/
double find_cost;
/* find_cost + cost of copying the rows to record */
double fetch_cost;
/*
Cost of fetching the keys, not including copying the keys to
record or comparing them with the WHERE clause. Used only when
working with filters.
*/
double index_only_cost;
/* Selectivity, in case of filters */ /* Selectivity, in case of filters */
double selectivity; double selectivity;
bool first_key_part_has_only_one_value; bool first_key_part_has_only_one_value;
@ -1424,6 +1405,7 @@ public:
sql level. sql level.
*/ */
double index_only_fetch_cost(TABLE *table); double index_only_fetch_cost(TABLE *table);
void get_costs(ALL_READ_COST *cost);
} *opt_range; } *opt_range;
/* /*
Bitmaps of key parts that =const for the duration of join execution. If Bitmaps of key parts that =const for the duration of join execution. If
@ -1818,12 +1800,12 @@ public:
void prune_range_rowid_filters(); void prune_range_rowid_filters();
void trace_range_rowid_filters(THD *thd) const; void trace_range_rowid_filters(THD *thd) const;
Range_rowid_filter_cost_info * Range_rowid_filter_cost_info *
best_range_rowid_filter_for_partial_join(uint access_key_no, best_range_rowid_filter(uint access_key_no,
double records, double records,
double fetch_cost, double fetch_cost,
double index_only_cost, double index_only_cost,
double prev_records, double prev_records,
double *records_out); double *records_out);
/** /**
System Versioning support System Versioning support
*/ */

View File

@ -348,7 +348,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
First, add cost of writing all trees to disk, assuming that all disk First, add cost of writing all trees to disk, assuming that all disk
writes are sequential. writes are sequential.
*/ */
disk_read_cost= DISK_READ_COST_THD(thd); disk_read_cost= default_optimizer_costs.disk_read_cost;
result += disk_read_cost * n_full_trees * result += disk_read_cost * n_full_trees *
ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE); ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE);
result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE); result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE);
@ -365,8 +365,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
Add cost of reading the resulting sequence, assuming there were no Add cost of reading the resulting sequence, assuming there were no
duplicate elements. duplicate elements.
*/ */
result+= (ceil((double)key_size*nkeys/IO_SIZE) * result+= (ceil((double)key_size*nkeys/IO_SIZE) * disk_read_cost);
default_optimizer_costs.disk_read_cost);
return result; return result;
} }

View File

@ -309,7 +309,7 @@ public:
Called in test_quick_select to determine if indexes should be used. Called in test_quick_select to determine if indexes should be used.
*/ */
virtual IO_AND_CPU_COST scan_time() virtual IO_AND_CPU_COST scan_time()
{ return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; }; { return { 0, (double) (stats.records+stats.deleted) * DISK_READ_COST }; };
/** @brief /** @brief
This method will never be called if you do not implement indexes. This method will never be called if you do not implement indexes.

View File

@ -126,9 +126,9 @@ public:
*/ */
virtual IO_AND_CPU_COST scan_time() virtual IO_AND_CPU_COST scan_time()
{ {
return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE * return
avg_io_cost(), { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE,
(stats.records+stats.deleted) * ROW_NEXT_FIND_COST }; (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
} }
/* The next method will never be called */ /* The next method will never be called */
virtual bool fast_key_read() { return 1;} virtual bool fast_key_read() { return 1;}

View File

@ -156,7 +156,7 @@ public:
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
/* 0 blocks, 0.001 ms / row */ /* 0 blocks, 0.001 ms / row */
cost.io= (double) (stats.records+stats.deleted) * avg_io_cost(); cost.io= (double) (stats.records+stats.deleted) * DISK_READ_COST;
cost.cpu= 0; cost.cpu= 0;
return cost; return cost;
} }
@ -168,7 +168,7 @@ public:
ulonglong blocks) ulonglong blocks)
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
cost.io= blocks * avg_io_cost(); cost.io= blocks * DISK_READ_COST;
cost.cpu= (double) rows * 0.001; cost.cpu= (double) rows * 0.001;
return cost; return cost;
} }
@ -181,7 +181,7 @@ public:
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
/* 0 blocks, 0.001 ms / row */ /* 0 blocks, 0.001 ms / row */
cost.io= 0; cost.io= 0;
cost.cpu= (double) rows * avg_io_cost(); cost.cpu= (double) rows * DISK_READ_COST;
return cost; return cost;
} }

View File

@ -186,20 +186,20 @@ public:
DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
return return
{ {
(double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(), 0,
0 (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
1000,
}; };
} }
IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
return { (double) stats.records * avg_io_cost(), 0 };
}
IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
ulonglong blocks) ulonglong blocks)
{ {
return { (double) (ranges + rows) * avg_io_cost(), 0 }; return {0, (double) (ranges + rows) * DISK_READ_COST };
}
IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
return {0, (double) rows * DISK_READ_COST };
} }
const key_map *keys_to_use_for_scanning() { return &key_map_full; } const key_map *keys_to_use_for_scanning() { return &key_map_full; }
/* /*
Everything below are methods that we implment in ha_federated.cc. Everything below are methods that we implment in ha_federated.cc.

View File

@ -364,26 +364,26 @@ public:
Talk to Kostja about this - how to get the Talk to Kostja about this - how to get the
number of rows * ... number of rows * ...
disk scan time on other side (block size, size of the row) + network time ... disk scan time on other side (block size, size of the row) + network time ...
The reason for "records * 1000" is that such a large number forces The reason for "1000" is that such a large number forces this to use indexes "
this to use indexes "
*/ */
IO_AND_CPU_COST scan_time() IO_AND_CPU_COST scan_time()
{ {
DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
return return
{ {
(double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(), 0,
0 (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
1000,
}; };
} }
IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
ulonglong blocks) ulonglong blocks)
{ {
return { (double) (ranges + rows) * avg_io_cost(), 0 }; return {0, (double) (ranges + rows) * DISK_READ_COST };
} }
IO_AND_CPU_COST rnd_pos_time(ha_rows rows) IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{ {
return { (double) rows * avg_io_cost(), 0 }; return {0, (double) rows * DISK_READ_COST };
} }
const key_map *keys_to_use_for_scanning() { return &key_map_full; } const key_map *keys_to_use_for_scanning() { return &key_map_full; }

View File

@ -67,7 +67,6 @@ public:
ulonglong blocks) override; ulonglong blocks) override;
IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
/* 0 for avg_io_cost ensures that there are no read-block calculations */ /* 0 for avg_io_cost ensures that there are no read-block calculations */
double avg_io_cost() override { return 0.0; }
int open(const char *name, int mode, uint test_if_locked); int open(const char *name, int mode, uint test_if_locked);
int close(void); int close(void);

View File

@ -14337,7 +14337,7 @@ ha_innobase::scan_time()
TODO: This will be further improved to return some approximate TODO: This will be further improved to return some approximate
estimate but that would also needs pre-population of stats estimate but that would also needs pre-population of stats
structure. As of now approach is in sync with MyISAM. */ structure. As of now approach is in sync with MyISAM. */
return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 }; return { (ulonglong2double(stats.data_file_length) / IO_SIZE * DISK_READ_COST), 0.0 };
} }
ulint stat_clustered_index_size; ulint stat_clustered_index_size;
@ -14347,7 +14347,7 @@ ha_innobase::scan_time()
stat_clustered_index_size = stat_clustered_index_size =
m_prebuilt->table->stat_clustered_index_size; m_prebuilt->table->stat_clustered_index_size;
cost.io= (double) stat_clustered_index_size * avg_io_cost(); cost.io= (double) stat_clustered_index_size * DISK_READ_COST;
cost.cpu= 0; cost.cpu= 0;
return(cost); return(cost);
} }

View File

@ -106,7 +106,7 @@ public:
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE + cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE +
file->tables) * avg_io_cost(); file->tables),
cost.cpu= records() * ROW_NEXT_FIND_COST; cost.cpu= records() * ROW_NEXT_FIND_COST;
return cost; return cost;
} }

View File

@ -14660,6 +14660,15 @@ IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges,
DBUG_RETURN(cost); DBUG_RETURN(cost);
} }
ulonglong ha_rocksdb::index_blocks(uint index, uint ranges, ha_rows rows)
{
size_t len= table->key_storage_length(index);
ulonglong blocks= (rows * len / 4) / stats.block_size + ranges; // 75 % compression
return blocks * stats.block_size / IO_SIZE;
}
void ha_rocksdb::print_error(int error, myf errflag) { void ha_rocksdb::print_error(int error, myf errflag) {
if (error == HA_ERR_ROCKSDB_STATUS_BUSY) { if (error == HA_ERR_ROCKSDB_STATUS_BUSY) {
error = HA_ERR_LOCK_DEADLOCK; error = HA_ERR_LOCK_DEADLOCK;

View File

@ -623,7 +623,7 @@ public:
bool sorted) override bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__)); MY_ATTRIBUTE((__warn_unused_result__));
virtual IO_AND_CPU_COST scan_time() override IO_AND_CPU_COST scan_time() override
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
DBUG_ENTER_FUNC(); DBUG_ENTER_FUNC();
@ -634,7 +634,8 @@ public:
IO_AND_CPU_COST keyread_time(uint index, ulong ranges, IO_AND_CPU_COST keyread_time(uint index, ulong ranges,
ha_rows rows, ulonglong blocks) override; ha_rows rows, ulonglong blocks) override;
virtual void print_error(int error, myf errflag) override; ulonglong index_blocks(uint index, uint ranges, ha_rows rows) override;
void print_error(int error, myf errflag) override;
int open(const char *const name, int mode, uint test_if_locked) override int open(const char *const name, int mode, uint test_if_locked) override
MY_ATTRIBUTE((__warn_unused_result__)); MY_ATTRIBUTE((__warn_unused_result__));

View File

@ -86,7 +86,17 @@ public:
void position(const uchar *record); void position(const uchar *record);
int rnd_pos(uchar *buf, uchar *pos); int rnd_pos(uchar *buf, uchar *pos);
int info(uint flag); int info(uint flag);
IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
ulonglong blocks) override
{
/* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
return {0,0};
}
IO_AND_CPU_COST scan_time()
{
/* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
return {0, 0};
}
/* indexes */ /* indexes */
ulong index_flags(uint inx, uint part, bool all_parts) const ulong index_flags(uint inx, uint part, bool all_parts) const
{ return HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | { return HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
@ -100,7 +110,6 @@ public:
int index_last(uchar *buf); int index_last(uchar *buf);
ha_rows records_in_range(uint inx, const key_range *start_key, ha_rows records_in_range(uint inx, const key_range *start_key,
const key_range *end_key, page_range *pages); const key_range *end_key, page_range *pages);
double avg_io_cost() override { return 0.0; }
private: private:
void set(uchar *buf); void set(uchar *buf);
@ -492,10 +501,14 @@ int ha_seq_group_by_handler::next_row()
static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs) static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs)
{ {
costs->disk_read_cost= 0;
costs->disk_read_ratio= 0.0; // No disk costs->disk_read_ratio= 0.0; // No disk
costs->key_next_find_cost= costs->key_lookup_cost= costs->key_next_find_cost=
costs->key_copy_cost= costs->row_lookup_cost= costs->key_lookup_cost=
costs->row_copy_cost= 0.0000062391530550; costs->key_copy_cost=
costs->row_next_find_cost=
costs->row_lookup_cost=
costs->row_copy_cost= 0.0000062391530550;
} }
/***************************************************************************** /*****************************************************************************

View File

@ -76,7 +76,7 @@ public:
{ {
IO_AND_CPU_COST cost; IO_AND_CPU_COST cost;
cost.io= 0; cost.io= 0;
cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost(); cost.cpu= (double) (stats.records+stats.deleted) * DISK_READ_COST;
return cost; return cost;
} }
IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,