diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc index 1aa17deb16e..e1cd7c566bb 100644 --- a/sql/filesort_utils.cc +++ b/sql/filesort_utils.cc @@ -237,9 +237,10 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, if (queue_size < num_available_keys) { + handler *file= param->sort_form->file; costs[PQ_SORT_ORDER_BY_FIELDS]= get_pq_sort_cost(num_rows, queue_size, false) + - param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows)); + file->cost(file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows))); } /* Calculate cost with addon fields */ @@ -270,14 +271,15 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param, costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX; if (num_available_keys) + { + handler *file= param->sort_form->file; costs[MERGE_SORT_ORDER_BY_FIELDS]= get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length, DEFAULT_KEY_COMPARE_COST, default_optimizer_costs.disk_read_cost, false) + - param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, - num_rows)); - + file->cost(file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, num_rows))); + } if (with_addon_fields) { /* Compute cost of merge sort *if* we strip addon fields. */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 30c1832fadc..08e3b45e02b 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -6629,7 +6629,7 @@ ha_rows ha_partition::multi_range_read_info_const(uint keyno, ha_rows tmp_rows; uint tmp_mrr_mode; m_mrr_buffer_size[i]= 0; - part_cost.reset(); + part_cost.reset(*file); tmp_mrr_mode= *mrr_mode; tmp_rows= (*file)-> multi_range_read_info_const(keyno, &m_part_seq_if, @@ -6680,7 +6680,7 @@ ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges, { ha_rows tmp_rows; m_mrr_buffer_size[i]= 0; - part_cost.reset(); + part_cost.reset(*file); if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys, key_parts, &m_mrr_buffer_size[i], diff --git a/sql/handler.cc b/sql/handler.cc index a9c832c9274..7a8e4d6b036 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -3283,6 +3283,23 @@ LEX_CSTRING *handler::engine_name() return hton_name(ht); } + +/* + Calclate the number of index blocks we are going to access when + doing 'ranges' index dives reading a total of 'rows' rows. +*/ + +ulonglong handler::index_blocks(uint index, uint ranges, ha_rows rows) +{ + if (!stats.block_size) + return 0; // No disk storage + size_t len= table->key_storage_length(index); + ulonglong blocks= (rows * len / INDEX_BLOCK_FILL_FACTOR_DIV * + INDEX_BLOCK_FILL_FACTOR_MUL) / stats.block_size + ranges; + return blocks * stats.block_size / IO_SIZE; +} + + /* Calculate cost for an index scan for given index and number of records. @@ -3329,7 +3346,7 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows, else io_blocks= blocks * stats.block_size / IO_SIZE; } - cost.io= (double) io_blocks * avg_io_cost(); + cost.io= (double) io_blocks; cost.cpu= blocks * INDEX_BLOCK_COPY_COST; return cost; } @@ -3342,36 +3359,35 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows, in which case there should an additional rnd_pos_time() cost. */ -double handler::ha_keyread_time(uint index, ulong ranges, ha_rows rows, - ulonglong blocks) +IO_AND_CPU_COST handler::ha_keyread_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) { if (rows < ranges) rows= ranges; IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); - return (cost.io * DISK_READ_RATIO + - cost.cpu + ranges * KEY_LOOKUP_COST + - (rows - ranges) * KEY_NEXT_FIND_COST); + cost.cpu+= ranges * KEY_LOOKUP_COST + (rows - ranges) * KEY_NEXT_FIND_COST; + return cost; } /* - Read a row from a clustered index + Read rows from a clustered index - Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clusterd + Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clustered key has identical code as rnd_pos() (At least in InnoDB:) */ -double handler::ha_keyread_clustered_and_copy_time(uint index, ulong ranges, - ha_rows rows, - ulonglong blocks) +IO_AND_CPU_COST +handler::ha_keyread_clustered_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) { if (rows < ranges) rows= ranges; IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); - return (cost.io * DISK_READ_RATIO + - cost.cpu + ranges * ROW_LOOKUP_COST + - (rows - ranges) * ROW_NEXT_FIND_COST + - rows * ROW_COPY_COST); + cost.cpu+= (ranges * ROW_LOOKUP_COST + (rows - ranges) * ROW_NEXT_FIND_COST); + return cost; } THD *handler::ha_thd(void) const diff --git a/sql/handler.h b/sql/handler.h index 9ad868a7692..01998189921 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2777,25 +2777,50 @@ typedef struct st_range_seq_if typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info); + +/* Separated costs for IO and CPU */ + +struct IO_AND_CPU_COST +{ + double io; + double cpu; + + void add(IO_AND_CPU_COST cost) + { + io+= cost.io; + cpu+= cost.cpu; + } +}; + +/* Cost for reading a row through an index */ +struct ALL_READ_COST +{ + IO_AND_CPU_COST index_cost, row_cost; + longlong max_index_blocks, max_row_blocks; + /* index_only_read = index_cost + copy_cost */ + double copy_cost; + + void reset() + { + row_cost= {0,0}; + index_cost= {0,0}; + max_index_blocks= max_row_blocks= 0; + copy_cost= 0.0; + } +}; + + class Cost_estimate { public: - double io_count; /* number of I/O to fetch records */ double avg_io_cost; /* cost of an average I/O oper. to fetch records */ - double idx_io_count; /* number of I/O to read keys */ - double idx_avg_io_cost; /* cost of an average I/O oper. to fetch records */ - double cpu_cost; /* Cost of reading the rows based on a key */ - double idx_cpu_cost; /* Cost of reading the key from the index tree */ - double import_cost; /* cost of remote operations */ + double cpu_cost; /* Cpu cost unrelated to engine costs */ double comp_cost; /* Cost of comparing found rows with WHERE clause */ double copy_cost; /* Copying the data to 'record' */ - double mem_cost; /* cost of used memory */ double limit_cost; /* Total cost when restricting rows with limit */ - static constexpr double IO_COEFF= 1; - static constexpr double CPU_COEFF= 1; - static constexpr double MEM_COEFF= 1; - static constexpr double IMPORT_COEFF= 1; + IO_AND_CPU_COST index_cost; + IO_AND_CPU_COST row_cost; Cost_estimate() { @@ -2809,30 +2834,18 @@ public: double total_cost() const { - return IO_COEFF*io_count*avg_io_cost + - IO_COEFF*idx_io_count*idx_avg_io_cost + - CPU_COEFF*(cpu_cost + idx_cpu_cost + comp_cost + copy_cost) + - MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost; + DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0); + return ((index_cost.io + row_cost.io) * avg_io_cost+ + index_cost.cpu + row_cost.cpu + comp_cost + copy_cost + + cpu_cost); } - /* - Cost of fetching a key and use the key to find a row (if not clustered or - covering key). Does not include row copy or compare with WHERE clause. - */ - double find_cost() const + /* Cost for just fetching and copying a row (no compare costs) */ + double fetch_cost() const { - return IO_COEFF*io_count*avg_io_cost + - IO_COEFF*idx_io_count*idx_avg_io_cost + - CPU_COEFF*(cpu_cost + idx_cpu_cost) + - MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost; - } - - /* - Cost of comparing the row with the WHERE clause - */ - inline double compare_cost() const - { - return CPU_COEFF*comp_cost; + DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0); + return ((index_cost.io + row_cost.io) * avg_io_cost+ + index_cost.cpu + row_cost.cpu + copy_cost); } /* @@ -2840,72 +2853,48 @@ public: */ inline double data_copy_cost() const { - return CPU_COEFF*copy_cost; + return copy_cost; } - /* Cost of finding an index entry, without copying or comparing it */ - double index_only_cost() + /* + Multiply costs to simulate a scan where we read + We assume that io blocks will be cached and we only + allocate memory once. There should also be no import_cost + that needs to be done multiple times + */ + void multiply(uint n) { - return IO_COEFF*idx_io_count*idx_avg_io_cost + - CPU_COEFF*idx_cpu_cost; + index_cost.io*= n; + index_cost.cpu*= n; + row_cost.io*= n; + row_cost.cpu*= n; + copy_cost*= n; + comp_cost*= n; + cpu_cost*= n; + } + + void add(Cost_estimate *cost) + { + DBUG_ASSERT(cost->avg_io_cost != 0.0 || (index_cost.io + row_cost.io == 0)); + avg_io_cost= cost->avg_io_cost; + index_cost.io+= cost->index_cost.io; + index_cost.cpu+= cost->index_cost.cpu; + row_cost.io+= cost->row_cost.io; + row_cost.cpu+= cost->row_cost.cpu; + copy_cost+= cost->copy_cost; + comp_cost+= cost->comp_cost; + cpu_cost+= cost->cpu_cost; } inline void reset() { - avg_io_cost= 1.0; - idx_avg_io_cost= 1.0; - io_count= idx_io_count= cpu_cost= idx_cpu_cost= mem_cost= import_cost= 0.0; - comp_cost= copy_cost= limit_cost= 0.0; - } - - void multiply(double m) - { - io_count *= m; - cpu_cost *= m; - idx_io_count *= m; - idx_cpu_cost *= m; - import_cost *= m; - comp_cost *= m; - limit_cost*= m; - /* Don't multiply mem_cost */ - } - - void add(const Cost_estimate* cost) - { - if (cost->io_count != 0.0) - { - double io_count_sum= io_count + cost->io_count; - avg_io_cost= (io_count * avg_io_cost + - cost->io_count * cost->avg_io_cost) - /io_count_sum; - io_count= io_count_sum; - } - if (cost->idx_io_count != 0.0) - { - double idx_io_count_sum= idx_io_count + cost->idx_io_count; - idx_avg_io_cost= (idx_io_count * idx_avg_io_cost + - cost->idx_io_count * cost->idx_avg_io_cost) - /idx_io_count_sum; - idx_io_count= idx_io_count_sum; - } - cpu_cost += cost->cpu_cost; - idx_cpu_cost += cost->idx_cpu_cost; - import_cost += cost->import_cost; - comp_cost+= cost->comp_cost; - limit_cost+= cost->limit_cost; - } - - void add_io(double add_io_cnt, double add_avg_cost) - { - /* In edge cases add_io_cnt may be zero */ - if (add_io_cnt > 0) - { - double io_count_sum= io_count + add_io_cnt; - avg_io_cost= (io_count * avg_io_cost + - add_io_cnt * add_avg_cost) / io_count_sum; - io_count= io_count_sum; - } + avg_io_cost= 0; + comp_cost= cpu_cost= 0.0; + copy_cost= limit_cost= 0.0; + index_cost= {0,0}; + row_cost= {0,0}; } + inline void reset(handler *file); /* To be used when we go from old single value-based cost calculations to @@ -2914,13 +2903,10 @@ public: void convert_from_cost(double cost) { reset(); - io_count= cost; + cpu_cost= cost; } }; -void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, - Cost_estimate *cost); - /* Indicates that all scanned ranges will be singlepoint (aka equality) ranges. The ranges may not use the full key but all of them will use the same number @@ -3094,20 +3080,6 @@ enum class Compare_keys : uint32_t NotEqual }; -/* Cost for reading a row through an index */ -struct INDEX_READ_COST -{ - double read_cost; - double index_only_cost; -}; - -/* Separated costs for IO and CPU. For handler::keyread_time() */ -struct IO_AND_CPU_COST -{ - double io; - double cpu; -}; - /** The handler class is the interface for dynamically loadable @@ -3183,6 +3155,7 @@ public: HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */ uint ranges_in_seq; /* Total number of ranges in the traversed sequence */ /** Current range (the one we're now returning rows from) */ + KEY_MULTI_RANGE mrr_cur_range; /** The following are for read_range() */ @@ -3610,6 +3583,58 @@ public: } virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); + inline double io_cost(IO_AND_CPU_COST cost) + { + return cost.io * DISK_READ_COST * DISK_READ_RATIO; + } + + inline double cost(IO_AND_CPU_COST cost) + { + return io_cost(cost) + cost.cpu; + } + + /* + Calculate cost with capping io_blocks to the given maximum. + This is done here instead of earlier to allow filtering to work + with the original' io_block counts. + */ + inline double cost(ALL_READ_COST *cost) + { + double blocks= (MY_MIN(cost->index_cost.io,(double) cost->max_index_blocks) + + MY_MIN(cost->row_cost.io, (double) cost->max_row_blocks)); + return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) + + blocks * DISK_READ_COST * DISK_READ_RATIO); + } + + /* + Calculate cost when we are going to excute the given read method + multiple times + */ + inline double cost_for_reading_multiple_times(double multiple, + ALL_READ_COST *cost) + + { + double blocks= (MY_MIN(cost->index_cost.io * multiple, + (double) cost->max_index_blocks) + + MY_MIN(cost->row_cost.io * multiple, + (double) cost->max_row_blocks)); + return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) * + multiple + + blocks * DISK_READ_COST * DISK_READ_RATIO); + } + + inline ulonglong row_blocks() + { + return (stats.data_file_length + IO_SIZE-1) / IO_SIZE; + } + + virtual ulonglong index_blocks(uint index, uint ranges, ha_rows rows); + + inline ulonglong index_blocks(uint index) + { + return index_blocks(index, 1, stats.records); + } + /* Time for a full table data scan. To be overrided by engines, should not be used by the sql level. @@ -3619,7 +3644,7 @@ protected: { IO_AND_CPU_COST cost; ulonglong length= stats.data_file_length; - cost.io= (double) (length / IO_SIZE) * avg_io_cost(); + cost.io= (double) (length / IO_SIZE); cost.cpu= (!stats.block_size ? 0.0 : (double) ((length + stats.block_size-1)/stats.block_size) * INDEX_BLOCK_COPY_COST); @@ -3639,27 +3664,23 @@ public: a few rows and the extra cost has no practical effect. */ - inline double ha_scan_time(ha_rows rows) + inline IO_AND_CPU_COST ha_scan_time(ha_rows rows) { IO_AND_CPU_COST cost= scan_time(); - return (cost.io * DISK_READ_RATIO + - cost.cpu + TABLE_SCAN_SETUP_COST + - (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + cost.cpu+= (TABLE_SCAN_SETUP_COST + + (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + return cost; } /* Time for a full table scan, fetching the rows from the table and comparing the row with the where clause */ - inline double ha_scan_and_compare_time(ha_rows rows) + inline IO_AND_CPU_COST ha_scan_and_compare_time(ha_rows rows) { - return ha_scan_time(rows) + (double) rows * WHERE_COST; - } - - /* Cost of (random) reading a block of IO_SIZE */ - virtual double avg_io_cost() - { - return DISK_READ_COST; + IO_AND_CPU_COST cost= ha_scan_time(rows); + cost.cpu+= (double) rows * WHERE_COST; + return cost; } /* @@ -3684,7 +3705,7 @@ protected: double r= rows2double(rows); return { - r * avg_io_cost() * stats.block_size/IO_SIZE, // Blocks read + r * ((stats.block_size + IO_SIZE -1 )/IO_SIZE), // Blocks read r * INDEX_BLOCK_COPY_COST // Copy block from cache }; } @@ -3699,11 +3720,12 @@ public: row). */ - inline double ha_rnd_pos_time(ha_rows rows) + inline IO_AND_CPU_COST ha_rnd_pos_time(ha_rows rows) { IO_AND_CPU_COST cost= rnd_pos_time(rows); - return (cost.io * DISK_READ_RATIO + - cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); + set_if_smaller(cost.io, (double) row_blocks()); + cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST); + return cost; } /* @@ -3712,20 +3734,24 @@ public: but that may change in the future after we do more cost checks for more engines. */ - inline double ha_rnd_pos_call_time(ha_rows rows) + inline IO_AND_CPU_COST ha_rnd_pos_call_time(ha_rows rows) { IO_AND_CPU_COST cost= rnd_pos_time(rows); - return (cost.io * DISK_READ_RATIO + - cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); + set_if_smaller(cost.io, (double) row_blocks()); + cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST); + return cost; } - inline double ha_rnd_pos_call_and_compare_time(ha_rows rows) + inline IO_AND_CPU_COST ha_rnd_pos_call_and_compare_time(ha_rows rows) { - return (ha_rnd_pos_call_time(rows) + rows2double(rows) * WHERE_COST); + IO_AND_CPU_COST cost; + cost= ha_rnd_pos_call_time(rows); + cost.cpu+= rows2double(rows) * WHERE_COST; + return cost; } /** - Calculate cost of 'index_only' scan for given index, a number of reanges + Calculate cost of 'index_only' scan for given index, a number of ranges and number of records. @param index Index to read @@ -3742,25 +3768,30 @@ public: Calculate cost of 'keyread' scan for given index and number of records including fetching the key to the 'record' buffer. */ - double ha_keyread_time(uint index, ulong ranges, ha_rows rows, - ulonglong blocks); + IO_AND_CPU_COST ha_keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks); /* Same as above, but take into account copying the key the the SQL layer */ - inline double ha_keyread_and_copy_time(uint index, ulong ranges, - ha_rows rows, ulonglong blocks) + inline IO_AND_CPU_COST ha_keyread_and_copy_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) { - return (ha_keyread_time(index, ranges, rows, blocks) + - (double) rows * KEY_COPY_COST); + IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks); + cost.cpu+= (double) rows * KEY_COPY_COST; + return cost; } - inline double ha_keyread_and_compare_time(uint index, ulong ranges, - ha_rows rows, ulonglong blocks) + inline IO_AND_CPU_COST ha_keyread_and_compare_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) { - return (ha_keyread_time(index, ranges, rows, blocks) + - (double) rows * (KEY_COPY_COST + WHERE_COST)); + IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks); + cost.cpu+= (double) rows * (KEY_COPY_COST + WHERE_COST); + return cost; } - double ha_keyread_clustered_and_copy_time(uint index, ulong ranges, + IO_AND_CPU_COST ha_keyread_clustered_time(uint index, + ulong ranges, ha_rows rows, ulonglong blocks); /* @@ -3776,21 +3807,23 @@ protected: public: /* Cost of doing a full index scan */ - inline double ha_key_scan_time(uint index, ha_rows rows) + inline IO_AND_CPU_COST ha_key_scan_time(uint index, ha_rows rows) { IO_AND_CPU_COST cost= key_scan_time(index, rows); - return (cost.io * DISK_READ_RATIO + - cost.cpu + INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST + - (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST)); + cost.cpu+= (INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST + + (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST)); + return cost; } /* Cost of doing a full index scan with record copy and compare @param rows Rows from stat tables */ - inline double ha_key_scan_and_compare_time(uint index, ha_rows rows) + inline IO_AND_CPU_COST ha_key_scan_and_compare_time(uint index, ha_rows rows) { - return ha_key_scan_time(index, rows) + (double) rows * WHERE_COST; + IO_AND_CPU_COST cost= ha_key_scan_time(index, rows); + cost.cpu+= (double) rows * WHERE_COST; + return cost; } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } @@ -5602,4 +5635,10 @@ uint ha_count_rw_2pc(THD *thd, bool all); uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, bool all); +inline void Cost_estimate::reset(handler *file) +{ + reset(); + avg_io_cost= file->DISK_READ_COST * file->DISK_READ_RATIO; +} + #endif /* HANDLER_INCLUDED */ diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index e4fd1e75176..a71667ab9fe 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -22,6 +22,9 @@ #include "rowid_filter.h" #include "optimizer_defaults.h" +static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + Cost_estimate *cost); + /* The following calculation is the same as in multi_range_read_info() */ @@ -32,34 +35,35 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno, ulonglong io_blocks, ulonglong unassigned_single_point_ranges) { - double key_cost; - cost->reset(); - cost->avg_io_cost= cost->idx_avg_io_cost= 0; // Not used! + cost->reset(this); if (!is_clustering_key(keyno)) { - key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); - cost->idx_cpu_cost= key_cost; + cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); if (!(flags & HA_MRR_INDEX_ONLY)) { /* ha_rnd_pos_time includes ROW_COPY_COST */ - cost->cpu_cost= ha_rnd_pos_time(total_rows); + cost->row_cost= ha_rnd_pos_time(total_rows); + /* Adjust io cost to data size */ + cost->row_cost.io= MY_MIN(cost->row_cost.io, row_blocks()); } else { /* Index only read */ - cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; + cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; } } else { /* Clustered index */ - io_blocks+= unassigned_single_point_ranges; - key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); - cost->idx_cpu_cost= key_cost; - cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; + io_blocks= unassigned_single_point_ranges; + cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); + cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; } + /* Adjust io cost to data size */ + cost->index_cost.io= MY_MIN(cost->index_cost.io, index_blocks(keyno)); + cost->comp_cost= (rows2double(total_rows) * WHERE_COST + MULTI_RANGE_READ_SETUP_COST); } @@ -357,7 +361,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, { /* Calculate what the cost would be if we only have to read 'top_limit' - rows. This is the lowest possible cost fwhen using the range + rows. This is the lowest possible cost when using the range when we find the 'accepted rows' at once. */ Cost_estimate limit_cost; @@ -365,16 +369,14 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, io_blocks, unassigned_single_point_ranges); cost->limit_cost= limit_cost.total_cost(); } + DBUG_PRINT("statistics", + ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu " + "cpu_cost: %.3f", + table->s->keynames.type_names[keyno], + (ulonglong) total_rows, cost->total_cost(), + (ulonglong) (cost->row_cost.io + cost->index_cost.io), + (double) (cost->row_cost.cpu + cost->index_cost.cpu))); } - DBUG_PRINT("statistics", - ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu " - "idx_io_count: %.3f cpu_cost: %.3f io_count: %.3f " - "compare_cost: %.3f", - table->s->keynames.type_names[keyno], - (ulonglong) total_rows, cost->total_cost(), - (ulonglong) io_blocks, - cost->idx_io_count, cost->cpu_cost, cost->io_count, - cost->comp_cost)); DBUG_RETURN(total_rows); } @@ -413,7 +415,8 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, other Error or can't perform the requested scan */ -ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_rows, +ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, + uint total_rows, uint key_parts, uint *bufsz, uint *flags, Cost_estimate *cost) { @@ -426,17 +429,17 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row *bufsz= 0; /* Default implementation doesn't need a buffer */ *flags |= HA_MRR_USE_DEFAULT_IMPL; - cost->reset(); + cost->reset(this); + /* Produce the same cost as non-MRR code does */ if (!is_clustering_key(keyno)) { - double key_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); - cost->idx_cpu_cost= key_cost; + cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); if (!(*flags & HA_MRR_INDEX_ONLY)) { /* ha_rnd_pos_time includes ROW_COPY_COST */ - cost->cpu_cost= ha_rnd_pos_time(total_rows); + cost->row_cost= ha_rnd_pos_time(total_rows); } else { @@ -447,7 +450,8 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row else { /* Clustering key */ - cost->cpu_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); + cost->index_cost= ha_keyread_clustered_time(keyno, n_ranges, total_rows, + 0); cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; } cost->comp_cost= rows2double(total_rows) * WHERE_COST; @@ -1966,7 +1970,8 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size) } -static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost); +static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, + Cost_estimate *cost); /** @@ -1997,7 +2002,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, ha_rows rows_in_full_step; ha_rows rows_in_last_step; uint n_full_steps; - double index_read_cost; elem_size= primary_file->ref_length + sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION)); @@ -2030,6 +2034,8 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, rows_in_full_step= max_buff_entries; rows_in_last_step= rows % max_buff_entries; + cost->reset(primary_file); + /* Adjust buffer size if we expect to use only part of the buffer */ if (n_full_steps) { @@ -2038,7 +2044,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, } else { - cost->reset(); *buffer_size= ((uint) MY_MAX(*buffer_size, (size_t)(1.2*rows_in_last_step) * elem_size + primary_file->ref_length + @@ -2046,17 +2051,12 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, } Cost_estimate last_step_cost; + last_step_cost.avg_io_cost= cost->avg_io_cost; get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); cost->add(&last_step_cost); - if (n_full_steps != 0) - cost->mem_cost= *buffer_size; - else - cost->mem_cost= (double)rows_in_last_step * elem_size; - /* Total cost of all index accesses */ - index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0); - cost->add_io(index_read_cost, 1 /* Random seeks */); + cost->index_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0); return FALSE; } @@ -2085,8 +2085,6 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) cmp_op= 3; cost->cpu_cost += cmp_op * log2(cmp_op); } - else - cost->reset(); } @@ -2100,14 +2098,13 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) @param cost OUT The cost. */ -void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, - Cost_estimate *cost) +static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + Cost_estimate *cost) { DBUG_ENTER("get_sweep_read_cost"); - cost->reset(); #ifndef OLD_SWEEP_COST - cost->cpu_cost= table->file->ha_rnd_pos_call_time(nrows); + cost->row_cost= table->file->ha_rnd_pos_call_time(nrows); #else if (table->file->pk_is_clustering_key(table->s->primary_key)) { diff --git a/sql/opt_range.cc b/sql/opt_range.cc index b2e109a5a72..215fe603623 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2713,6 +2713,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, TABLE_READ_PLAN *best_trp= NULL; SEL_ARG **backup_keys= 0; ha_rows table_records= head->stat_records(); + handler *file= head->file; /* We trust that if stat_records() is 0 the table is really empty! */ bool impossible_range= table_records == 0; DBUG_ENTER("SQL_SELECT::test_quick_select"); @@ -2732,14 +2733,14 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_RETURN(0); records= table_records; notnull_cond= head->notnull_cond; - if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + if (file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) only_single_index_range_scan= 1; if (head->force_index || force_quick_range) read_time= DBL_MAX; else { - read_time= head->file->ha_scan_and_compare_time(records); + read_time= file->cost(file->ha_scan_and_compare_time(records)); if (limit < records) notnull_cond= NULL; } @@ -2775,7 +2776,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, /* set up parameter that is passed to all functions */ param.thd= thd; - param.baseflag= head->file->ha_table_flags(); + param.baseflag= file->ha_table_flags(); param.prev_tables=prev_tables | const_tables; param.read_tables=read_tables; param.current_table= head->map; @@ -2884,8 +2885,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { double key_read_time; uint key_for_use= find_shortest_key(head, &head->covering_keys); - key_read_time= head->file->ha_key_scan_and_compare_time(key_for_use, - records); + key_read_time= file->cost(file-> + ha_key_scan_and_compare_time(key_for_use, + records)); DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " "read time %g", key_for_use, key_read_time)); @@ -5095,9 +5097,15 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records, { DBUG_ENTER("get_sweep_read_cost"); #ifndef OLD_SWEEP_COST - double cost= (param->table->file->ha_rnd_pos_call_time(records) + - (add_time_for_compare ? - records * param->thd->variables.optimizer_where_cost : 0)); + handler *file= param->table->file; + IO_AND_CPU_COST engine_cost= file->ha_rnd_pos_call_time(records); + double cost; + if (add_time_for_compare) + { + engine_cost.cpu+= records * param->thd->variables.optimizer_where_cost; + } + cost= file->cost(engine_cost); + DBUG_PRINT("return", ("cost: %g", cost)); DBUG_RETURN(cost); #else @@ -5481,9 +5489,9 @@ skip_to_ror_scan: double cost; if ((*cur_child)->is_ror) { - /* Ok, we have index_only cost, now get full rows lokoup cost */ - cost= param->table->file-> - ha_rnd_pos_call_and_compare_time((*cur_child)->records); + handler *file= param->table->file; + /* Ok, we have index_only cost, now get full rows scan cost */ + cost= file->cost(file->ha_rnd_pos_call_and_compare_time((*cur_child)->records)); } else cost= read_time; @@ -6681,6 +6689,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ROR_SCAN_INFO *ror_scan; my_bitmap_map *bitmap_buf; uint keynr; + handler *file= param->table->file; DBUG_ENTER("make_ror_scan"); if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, @@ -6690,7 +6699,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->idx= idx; ror_scan->keynr= keynr= param->real_keynr[idx]; ror_scan->key_rec_length= (param->table->key_info[keynr].key_length + - param->table->file->ref_length); + file->ref_length); ror_scan->sel_arg= sel_arg; ror_scan->records= param->quick_rows[keynr]; @@ -6717,8 +6726,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror queue. */ ror_scan->index_read_cost= - param->table->file->ha_keyread_and_copy_time(ror_scan->keynr, 1, - ror_scan->records, 0); + file->cost(file->ha_keyread_and_copy_time(ror_scan->keynr, 1, + ror_scan->records, 0)); DBUG_RETURN(ror_scan); } @@ -7664,8 +7673,8 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, Json_writer_object trace_idx(thd); trace_idx.add("index", param->table->key_info[keynr].name); - found_records= check_quick_select(param, idx, limit, read_index_only, key, - for_range_access, &mrr_flags, + found_records= check_quick_select(param, idx, limit, read_index_only, + key, for_range_access, &mrr_flags, &buf_size, &cost, &is_ror_scan); if (found_records == HA_POS_ERROR || @@ -11868,22 +11877,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, ha_rows limit, rows) : 1.0); // ok as rows is 0 range->rows= rows; - /* cost of finding a row without copy or checking the where */ - range->find_cost= cost->find_cost(); - /* cost of finding a row copying it to the row buffer */ - range->fetch_cost= range->find_cost + cost->data_copy_cost(); - /* Add comparing it to the where. Same as cost.total_cost() */ - range->cost= (range->fetch_cost + cost->compare_cost()); - /* Calculate the cost of just finding the key. Used by filtering */ - if (param->table->file->is_clustering_key(keynr)) - range->index_only_cost= range->find_cost; - else - { - range->index_only_cost= cost->index_only_cost(); - DBUG_ASSERT(!(*mrr_flags & HA_MRR_INDEX_ONLY) || - range->index_only_cost == - range->find_cost); - } + range->cost= *cost; + range->max_index_blocks= file->index_blocks(keynr, range->ranges, + rows); + range->max_row_blocks= MY_MIN(file->row_blocks(), rows * file->stats.block_size / IO_SIZE); range->first_key_part_has_only_one_value= check_if_first_key_part_has_only_one_value(tree); } @@ -15120,8 +15117,8 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, 1); if (keys_per_group == 0) /* If there is no statistics try to guess */ { - /* each group contains 1% of all records */ - keys_per_group= (records / 100) + 1; + /* each group contains 10% of all records */ + keys_per_group= (records / 10) + 1; } } if (keys_per_group > 1) @@ -15168,12 +15165,11 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, } DBUG_ASSERT(num_groups <= records); - /* Calculate the number of blocks we will touch for the table or range scan */ num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV * INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1; - io_cost= (have_max) ? num_groups*2 : num_groups; + io_cost= (have_max) ? num_groups * 2 : num_groups; set_if_smaller(io_cost, num_blocks); /* @@ -15184,9 +15180,10 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, group. */ uint keyno= (uint) (index_info - table->key_info); - *read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups, - num_groups, - io_cost); + *read_cost= file->cost(file->ha_keyread_and_compare_time(keyno, + (ulong) num_groups, + num_groups, + io_cost)); *out_records= num_groups; DBUG_PRINT("info", diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 93459c49c23..f58410ccb15 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -1472,7 +1472,8 @@ void get_delayed_table_estimates(TABLE *table, hash_sj_engine->tmp_table->s->reclength); /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */ - *scan_time= ((data_size/IO_SIZE * table->file->avg_io_cost()) + + *scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST * + table->file->DISK_READ_RATIO) + *out_rows * file->ROW_COPY_COST); } @@ -2521,7 +2522,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) &subjoin_read_time, &subjoin_out_rows); - sjm->materialization_cost.convert_from_cost(subjoin_read_time); + sjm->materialization_cost=subjoin_read_time; sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows; /* @@ -2586,8 +2587,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) temporary table. Note that smj->materialization_cost already includes row copy and compare costs of finding the original row. */ - sjm->materialization_cost.add_io(subjoin_out_rows, cost.write); - sjm->materialization_cost.copy_cost+= cost.create; + sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create; /* Set the cost to do a full scan of the temptable (will need this to @@ -2600,10 +2600,10 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost + TABLE_SCAN_SETUP_COST_THD(thd) + row_copy_cost * sjm->rows); - sjm->scan_cost.convert_from_cost(total_cost); + sjm->scan_cost=total_cost; /* When reading a row, we have also to check the where clause */ - sjm->lookup_cost.convert_from_cost(cost.lookup + WHERE_COST_THD(thd)); + sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd); sj_nest->sj_mat_info= sjm; DBUG_EXECUTE("opt", print_sjm(sjm);); } @@ -3183,9 +3183,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, mat_read_time= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->lookup_cost.total_cost()))); + mat_info->lookup_cost))); /* NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION @@ -3235,9 +3235,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, /* Add materialization cost */ prefix_cost= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->scan_cost.total_cost()))); + mat_info->scan_cost))); prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); uint i; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index b0053d3db14..2eb5ff56b02 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -228,15 +228,16 @@ public: { double records, read_time; part1_conds_met= TRUE; + handler *file= s->table->file; DBUG_PRINT("info", ("Can use full index scan for LooseScan")); /* Calculate the cost of complete loose index scan. */ - records= rows2double(s->table->file->stats.records); + records= rows2double(file->stats.records); /* The cost is entire index scan cost (divided by 2) */ - read_time= s->table->file->ha_keyread_and_copy_time(key, 1, - (ha_rows) records, - 0); + read_time= file->cost(file->ha_keyread_and_copy_time(key, 1, + (ha_rows) records, + 0)); /* Now find out how many different keys we will get (for now we diff --git a/sql/optimizer_defaults.h b/sql/optimizer_defaults.h index 8d74bb91cc3..26c54464219 100644 --- a/sql/optimizer_defaults.h +++ b/sql/optimizer_defaults.h @@ -175,7 +175,7 @@ #define TMPFILE_CREATE_COST 0.5 // Cost of creating and deleting files #define HEAP_TEMPTABLE_CREATE_COST 0.025 // ms /* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */ -#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097*1000 + heap_optimizer_costs.row_copy_cost) +#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097) #define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost) #define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables #define DISK_TEMPTABLE_BLOCK_SIZE IO_SIZE diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc index 4f713edb47f..4d6bf4ea98c 100644 --- a/sql/rowid_filter.cc +++ b/sql/rowid_filter.cc @@ -485,12 +485,9 @@ void Range_rowid_filter_cost_info::trace_info(THD *thd) */ Range_rowid_filter_cost_info * -TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, - double records, - double fetch_cost, - double index_only_cost, - double prev_records, - double *records_out) +TABLE::best_range_rowid_filter(uint access_key_no, double records, + double fetch_cost, double index_only_cost, + double prev_records, double *records_out) { if (range_rowid_filter_cost_info_elems == 0 || covering_keys.is_set(access_key_no)) diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h index 91ffd8c065a..f761e1220aa 100644 --- a/sql/rowid_filter.h +++ b/sql/rowid_filter.h @@ -472,18 +472,20 @@ public: friend void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd); + /* Best range row id filter for parital join */ friend Range_rowid_filter_cost_info * - TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, - double records, - double fetch_cost, - double index_only_cost, - double prev_records, - double *records_out); + TABLE::best_range_rowid_filter(uint access_key_no, + double records, + double fetch_cost, + double index_only_cost, + double prev_records, + double *records_out); Range_rowid_filter_cost_info * - apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, - double *startup_cost, double fetch_cost, - double index_only_cost, uint ranges, double record_count); + apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost, + double *records_arg, + double *startup_cost, + uint ranges, double record_count); }; #endif /* ROWID_FILTER_INCLUDED */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 6cdb553629c..69907208dec 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6814,13 +6814,13 @@ public: /* Cost to materialize - execute the sub-join and write rows into temp.table */ - Cost_estimate materialization_cost; + double materialization_cost; /* Cost to make one lookup in the temptable */ - Cost_estimate lookup_cost; + double lookup_cost; /* Cost of scanning the materialized table */ - Cost_estimate scan_cost; + double scan_cost; /* --- Execution structures ---------- */ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e92f22ebc4a..61f609aa592 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5445,6 +5445,7 @@ make_join_statistics(JOIN *join, List &tables_list, { set_position(join,const_count++,s,(KEYUSE*) 0); no_rows_const_tables |= table->map; + table->file->stats.records= 0; } } @@ -7861,42 +7862,49 @@ static double matching_candidates_in_table(JOIN_TAB *s, WHERE_COST cost is not added to any result. */ -INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, - uint key, - ha_rows records, ha_rows worst_seeks) +ALL_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, + uint key, ha_rows records, + ha_rows worst_seeks) { - INDEX_READ_COST cost; + ALL_READ_COST cost; handler *file= table->file; - double rows_adjusted; + ha_rows max_seeks; DBUG_ENTER("cost_for_index_read"); - rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key); - set_if_bigger(rows_adjusted, 1); + max_seeks= (ha_rows) thd->variables.max_seeks_for_key; + set_if_bigger(records, 1); -#ifdef OLD_CODE_LIMITED_SEEKS - set_if_smaller(rows_adjusted, worst_seeks); -#endif if (file->is_clustering_key(key)) { - cost.index_only_cost= - file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0); + cost.index_cost= + file->ha_keyread_clustered_time(key, 1, records, 0); + cost.copy_cost= rows2double(records) * file->ROW_COPY_COST; /* There is no 'index_only_read' with a clustered index */ - cost.read_cost= cost.index_only_cost; + cost.row_cost= {0,0}; + /* Caping of index_blocks will happen in handler::cost() */ + cost.max_index_blocks= MY_MIN(file->row_blocks(), max_seeks); + cost.max_row_blocks= 0; } else if (table->covering_keys.is_set(key) && !table->no_keyread) { - cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); - /* Same computation as in ha_keyread_and_copy_time() */ - cost.read_cost= (cost.index_only_cost + - rows2double(records) * file->KEY_COPY_COST); + cost.index_cost= file->ha_keyread_time(key, 1, records, 0); + cost.row_cost= {0,0}; + cost.copy_cost= rows2double(records) * file->KEY_COPY_COST; + cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks); + cost.max_row_blocks= 0; } else { - cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); - cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records)); + cost.index_cost= file->ha_keyread_time(key, 1, records, 0); + /* ha_rnd_pos_time() includes time for copying the row */ + cost.row_cost= file->ha_rnd_pos_time(records); + cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks); + cost.max_row_blocks= MY_MIN(file->row_blocks(), max_seeks); + cost.copy_cost= 0; } - DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f", - cost.index_only_cost, cost.read_cost)); + DBUG_PRINT("statistics", ("index_cost: %.3f row_cost: %.3f", + file->cost(cost.index_cost), + file->cost(cost.row_cost))); DBUG_RETURN(cost); } @@ -7906,14 +7914,15 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, @param thd Thread handler @param table Table - @param cost Pointer to cost for *records_arg rows, not including - WHERE_COST cost. + @param cost Pointer to cost for current cost, which does not + include WHERE_COST cost. Will be updated to + new cost if filter is chosen. Will be updated to new cost if filter is used. @param records_arg Pointer to number of records for the current key. Will be updated to records after filter, if filter is used. @param startup_cost Startup cost. Will be updated if filter is used. - @param fetch_cost Cost of finding the row, without copy or compare cost + @param fetch_cost Cost of finding the row, without where compare cost @param index_only_cost Cost if fetching '*records_arg' key values @param prev_records Number of record combinations in previous tables @@ -7922,16 +7931,18 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, */ Range_rowid_filter_cost_info* Range_rowid_filter_cost_info:: -apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, - double *startup_cost, double fetch_cost, double index_only_cost, +apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost, + double *records_arg, + double *startup_cost, uint ranges, double prev_records) { + handler *file= table->file; bool use_filter; - double new_cost, new_total_cost, records= *records_arg, new_records; - double cost_of_accepted_rows, cost_of_rejected_rows; + double new_cost, org_cost, records= *records_arg, new_records; double filter_startup_cost= get_setup_cost(); - double io_cost= table->file->avg_io_cost(); double filter_lookup_cost= records * lookup_cost(); + double tmp; + ALL_READ_COST adjusted_cost; /* Calculate number of resulting rows after filtering @@ -7955,42 +7966,50 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, The io_cost is used to take into account that we have to do 1 key lookup to find the first matching key in each range. */ - cost_of_accepted_rows= fetch_cost * selectivity; - cost_of_rejected_rows= index_only_cost * (1-selectivity); - /* - The MAX() is used below to ensure that we take into account the index - read even if selectivity (and thus new_records) would be very low. - */ - new_cost= (MY_MAX(cost_of_accepted_rows, - ranges * table->file->KEY_LOOKUP_COST + - ranges * io_cost * table->file->DISK_READ_RATIO) + - cost_of_rejected_rows + filter_lookup_cost); - new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) * - prev_records + filter_startup_cost); + + adjusted_cost= *cost; + /* We are going to read 'selectivity' fewer rows */ + adjusted_cost.row_cost.io*= selectivity; + adjusted_cost.row_cost.cpu*= selectivity; + adjusted_cost.copy_cost*= selectivity; + adjusted_cost.index_cost.cpu+= filter_lookup_cost; + + tmp= prev_records * WHERE_COST_THD(thd); + org_cost= (file->cost_for_reading_multiple_times(prev_records, + cost) + + records * tmp); + + new_cost= (file->cost_for_reading_multiple_times(prev_records, + &adjusted_cost) + + new_records * tmp + filter_startup_cost); DBUG_ASSERT(new_cost >= 0 && new_records >= 0); - use_filter= ((*cost + records * WHERE_COST_THD(thd)) * prev_records > - new_total_cost); + use_filter= new_cost < org_cost; if (unlikely(thd->trace_started())) { Json_writer_object trace_filter(thd, "filter"); trace_filter.add("rowid_filter_key", table->key_info[get_key_no()].name). - add("index_only_cost", index_only_cost). + add("index_only_cost", file->cost(cost->index_cost)). add("filter_startup_cost", filter_startup_cost). add("find_key_and_filter_lookup_cost", filter_lookup_cost). add("filter_selectivity", selectivity). - add("orginal_rows", records). - add("new_rows", new_records). - add("original_found_rows_cost", fetch_cost). - add("new_found_rows_cost", new_cost). - add("cost", new_total_cost). + add("original_rows", records). + add("new_rows", new_records). + add("original_access_cost", file->cost(cost)). + add("with_filter_access_cost", file->cost(&adjusted_cost)). + add("original_found_rows_cost", file->cost(cost->row_cost)). + add("with_filter_found_rows_cost", file->cost(adjusted_cost.row_cost)). + add("org_cost", org_cost). + add("filter_cost", new_cost). add("filter_used", use_filter); } if (use_filter) { - *cost= new_cost; + cost->row_cost= adjusted_cost.row_cost; + cost->index_cost= adjusted_cost.index_cost; + cost->copy_cost= adjusted_cost.copy_cost; *records_arg= new_records; (*startup_cost)+= filter_startup_cost; return this; @@ -8060,6 +8079,7 @@ best_access_path(JOIN *join, uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; TABLE *table= s->table; + handler *file= table->file; my_bool found_constraint= 0; /* key_dependent is 0 if all key parts could be used or if there was an @@ -8068,7 +8088,7 @@ best_access_path(JOIN *join, Otherwise it's a bitmap of tables that could improve key usage. */ table_map key_dependent= 0; - double tmp; + ALL_READ_COST tmp; ha_rows rec; MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; @@ -8112,9 +8132,9 @@ best_access_path(JOIN *join, if (s->keyuse) { /* Use key if possible */ KEYUSE *keyuse, *start_key= 0; - double index_only_cost= DBL_MAX; uint max_key_part=0; enum join_type type= JT_UNKNOWN; + double cur_cost; /* Test how we can use keys */ rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key @@ -8245,16 +8265,26 @@ best_access_path(JOIN *join, if (ft_key) { /* - Calculate an adjusted cost based on how many records are read - This will be later multipled by record_count. + Fulltext indexes are preformed the following way: + - In the prepare step it performs the search, collects all positions + in an array, sorts it. + - If optimizer decides to use the ft index access method it simply' + returns positions from the array one by one + - If optimizer decides to use something else (another index, table + scan), then it'll use binary search in the array to find the + position. + + The following code puts the cost down to very small as the prep + step will always be done and the cost to fetch the row from memory + is very small. + Alternatively we could use the cost of an EQ_REF here. */ - tmp= (prev_record_reads(join_positions, idx, found_ref) / - record_count); - set_if_smaller(tmp, 1.0); - index_only_cost= tmp; + tmp.reset(); + tmp.row_cost.cpu= file->ROW_COPY_COST; /* - Really, there should be records=0.0 (yes!) - but 1.0 would be probably safer + We don't know how many records will match. However, we want to have + the fulltext search done early, so we put the number of records + to be very low. */ records= 1.0; type= JT_FT; @@ -8293,25 +8323,21 @@ best_access_path(JOIN *join, if (!found_ref && table->opt_range_keys.is_set(key)) { /* Ensure that the cost is identical to the range cost */ - tmp= table->opt_range[key].fetch_cost; - index_only_cost= table->opt_range[key].index_only_cost; + table->opt_range[key].get_costs(&tmp); } else { - INDEX_READ_COST cost= cost_for_index_read(thd, table, key, - 1,1); - tmp= cost.read_cost; - index_only_cost= cost.index_only_cost; + tmp= cost_for_index_read(thd, table, key, 1, 1); } /* Calculate an adjusted cost based on how many records are read - This will be later multipled by record_count. + This will be multipled by record_count. */ adjusted_cost= (prev_record_reads(join_positions, idx, found_ref) / record_count); set_if_smaller(adjusted_cost, 1.0); - tmp*= adjusted_cost; - index_only_cost*= adjusted_cost; + tmp.row_cost.cpu*= adjusted_cost; + tmp.index_cost.cpu*= adjusted_cost; records= 1.0; } else @@ -8345,8 +8371,8 @@ best_access_path(JOIN *join, /* Ensure that the cost is identical to the range cost */ records= (double) table->opt_range[key].rows; trace_access_idx.add("used_range_estimates", true); - tmp= table->opt_range[key].fetch_cost; - index_only_cost= table->opt_range[key].index_only_cost; + + table->opt_range[key].get_costs(&tmp); goto got_cost2; } /* quick_range couldn't use key! */ @@ -8408,16 +8434,14 @@ best_access_path(JOIN *join, } } /* Calculate the cost of the index access */ - INDEX_READ_COST cost= - cost_for_index_read(thd, table, key, - (ha_rows) records, - (ha_rows) s->worst_seeks); - tmp= cost.read_cost; - index_only_cost= cost.index_only_cost; + tmp= cost_for_index_read(thd, table, key, + (ha_rows) records, + (ha_rows) s->worst_seeks); } } else { + ha_rows tmp_records; type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF; if (unlikely(trace_access_idx.trace_started())) trace_access_idx. @@ -8430,7 +8454,7 @@ best_access_path(JOIN *join, records. */ if ((found_part & 1) && - (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || + (!(file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) { double extra_cost= 0; @@ -8480,8 +8504,7 @@ best_access_path(JOIN *join, table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3) { records= (double) table->opt_range[key].rows; - tmp= table->opt_range[key].fetch_cost; - index_only_cost= table->opt_range[key].index_only_cost; + table->opt_range[key].get_costs(&tmp); /* TODO: Disable opt_range testing below for this range as we can always use this ref instead. @@ -8599,13 +8622,12 @@ best_access_path(JOIN *join, /* Limit the number of matched rows */ set_if_smaller(records, (double) s->records); - tmp= records; - set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); - INDEX_READ_COST cost= cost_for_index_read(thd, table, key, - (ha_rows) tmp, - (ha_rows) s->worst_seeks); - tmp= cost.read_cost; - index_only_cost= cost.index_only_cost+extra_cost; + tmp_records= records; + set_if_smaller(tmp_records, thd->variables.max_seeks_for_key); + tmp= cost_for_index_read(thd, table, key, + tmp_records, + (ha_rows) s->worst_seeks); + tmp.copy_cost+= extra_cost; } else { @@ -8620,7 +8642,7 @@ best_access_path(JOIN *join, got_cost2: loose_scan_opt.check_ref_access_part2(key, start_key, records, - tmp + startup_cost, + file->cost(&tmp) + startup_cost, found_ref); } /* not ft_key */ @@ -8630,14 +8652,13 @@ best_access_path(JOIN *join, records_best_filter= records_after_filter= records; /* - Check that start_key->key can be used for index access + Check if we can use a filter. Records can be 0 in case of empty tables. */ if ((found_part & 1) && records && (table->file->index_flags(start_key->key,0,1) & HA_DO_RANGE_FILTER_PUSHDOWN)) { - /* If we use filter F with selectivity s the the cost of fetching data by key using this filter will be @@ -8683,38 +8704,39 @@ best_access_path(JOIN *join, number of rows from prev_record_read() and keyread_tmp is 0. These numbers are not usable with rowid filter code. */ - filter= - table->best_range_rowid_filter_for_partial_join(start_key->key, - records, - tmp, - index_only_cost, - record_count, - &records_best_filter); + filter= table->best_range_rowid_filter(start_key->key, + records, + file->cost(&tmp), + file->cost(tmp.index_cost), + record_count, + &records_best_filter); set_if_smaller(best.records_out, records_best_filter); if (filter) - filter= filter->apply_filter(thd, table, &tmp, &records_after_filter, + filter= filter->apply_filter(thd, table, &tmp, + &records_after_filter, &startup_cost, - tmp, index_only_cost, 1, record_count); } - tmp= COST_ADD(tmp, records_after_filter * WHERE_COST_THD(thd)); - tmp= COST_MULT(tmp, record_count); - tmp= COST_ADD(tmp, startup_cost); + + tmp.copy_cost+= records_after_filter * WHERE_COST_THD(thd); + cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp); + cur_cost= COST_ADD(cur_cost, startup_cost); + if (unlikely(trace_access_idx.trace_started())) { trace_access_idx. add("rows", records_after_filter). - add("cost", tmp); + add("cost", cur_cost); } /* The COST_EPS is here to ensure we use the first key if there are two 'identical keys' that could be used. */ - if (tmp + COST_EPS < best.cost) + if (cur_cost + COST_EPS < best.cost) { trace_access_idx.add("chosen", true); - best.cost= tmp; + best.cost= cur_cost; /* We use 'records' instead of 'records_after_filter' here as we want to have EXPLAIN print the number of rows found by the key access. @@ -8792,10 +8814,11 @@ best_access_path(JOIN *join, (!(table->map & join->outer_join) || join->allowed_outer_join_with_cache)) // (2) { - double refills, row_copy_cost, cmp_time; + double refills, row_copy_cost, cmp_time, cur_cost; /* Estimate the cost of the hash join access to the table */ double rnd_records= matching_candidates_in_table(s, 0, use_cond_selectivity); + DBUG_ASSERT(rnd_records <= s->found_records); set_if_smaller(best.records_out, rnd_records); /* @@ -8808,16 +8831,16 @@ best_access_path(JOIN *join, Cost of reading rows through opt_range including comparing the rows with the attached WHERE clause. */ - tmp= s->quick->read_time; + cur_cost= s->quick->read_time; } else - tmp= s->cached_scan_and_compare_time; + cur_cost= s->cached_scan_and_compare_time; /* We read the table as many times as join buffer becomes full. */ refills= (1.0 + floor((double) cache_record_length(join,idx) * record_count / (double) thd->variables.join_buff_size)); - tmp= COST_MULT(tmp, refills); + cur_cost= COST_MULT(cur_cost, refills); /* Cost of doing the hash lookup and check all matching rows with the @@ -8831,9 +8854,9 @@ best_access_path(JOIN *join, rnd_records * record_count * HASH_FANOUT * ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); - tmp= COST_ADD(tmp, cmp_time); + cur_cost= COST_ADD(cur_cost, cmp_time); - best.cost= tmp; + best.cost= cur_cost; best.records_read= best.records_after_filter= rows2double(s->records); best.records= rnd_records; best.key= hj_start_key; @@ -8895,13 +8918,13 @@ best_access_path(JOIN *join, s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) best.key && s->quick->index == best.key->key && // (2) best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2) - !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) + !((file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3) !(table->force_index_join && best.key && !s->quick) && // (4) !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join double records_after_filter, org_records; - double records_best_filter; + double records_best_filter, cur_cost; Range_rowid_filter_cost_info *filter= 0; double startup_cost= s->startup_cost; const char *scan_type= ""; @@ -8929,7 +8952,7 @@ best_access_path(JOIN *join, access (see first else-branch below), but we don't take it into account here for range/index_merge access. Find out why this is so. */ - tmp= COST_MULT(s->quick->read_time, record_count); + cur_cost= COST_MULT(s->quick->read_time, record_count); /* Use record count from range optimizer. @@ -8938,6 +8961,7 @@ best_access_path(JOIN *join, */ org_records= records_after_filter= rows2double(s->found_records); records_best_filter= org_records; + set_if_smaller(best.records_out, records_best_filter); if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { @@ -8950,37 +8974,32 @@ best_access_path(JOIN *join, registers complications when costs are calculated. */ DBUG_ASSERT(range->rows == s->found_records); - DBUG_ASSERT((range->cost == 0.0 && s->quick->read_time == 0.0) || - (range->cost / s->quick->read_time <= 1.0000001 && - range->cost / s->quick->read_time >= 0.9999999)); + DBUG_ASSERT((range->cost.total_cost() == 0.0 && + s->quick->read_time == 0.0) || + (range->cost.total_cost() / s->quick->read_time <= 1.0000001 && + range->cost.total_cost() / s->quick->read_time >= 0.9999999)); - filter= - table->best_range_rowid_filter_for_partial_join(key_no, - rows2double(range->rows), - range->find_cost, - range->index_only_cost, - record_count, - &records_best_filter); + range->get_costs(&tmp); + filter= table->best_range_rowid_filter(key_no, + rows2double(range->rows), + file->cost(&tmp), + file->cost(tmp.index_cost), + record_count, + &records_best_filter); set_if_smaller(best.records_out, records_best_filter); if (filter) { - double filter_cost= range->fetch_cost; - filter= filter->apply_filter(thd, table, &filter_cost, + filter= filter->apply_filter(thd, table, &tmp, &records_after_filter, &startup_cost, - range->fetch_cost, - range->index_only_cost, range->ranges, record_count); if (filter) { - tmp= filter_cost; - /* Filter returns cost without WHERE_COST */ - tmp= COST_ADD(tmp, records_after_filter * - WHERE_COST_THD(thd)); - tmp= COST_MULT(tmp, record_count); - tmp= COST_ADD(tmp, startup_cost); - startup_cost= 0; // Avoid adding it later + tmp.row_cost.cpu+= records_after_filter * WHERE_COST_THD(thd); + cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp); + cur_cost= COST_ADD(cur_cost, startup_cost); + startup_cost= 0; // Avoid adding it again later table->opt_range[key_no].selectivity= filter->selectivity; } } @@ -8998,6 +9017,7 @@ best_access_path(JOIN *join, records_best_filter= records_after_filter= matching_candidates_in_table(s, 0, use_cond_selectivity); DBUG_ASSERT(records_after_filter <= s->records); + DBUG_ASSERT(records_after_filter <= s->found_records); set_if_smaller(best.records_out, records_after_filter); @@ -9007,7 +9027,7 @@ best_access_path(JOIN *join, if (s->cached_forced_index_type) { type= s->cached_forced_index_type; - tmp= s->cached_forced_index_cost; + cur_cost= s->cached_forced_index_cost; forced_index= s->cached_forced_index; } else @@ -9023,42 +9043,42 @@ best_access_path(JOIN *join, { /* Use value from estimate_scan_time */ forced_index= s->cached_covering_key; - tmp= s->cached_scan_and_compare_time; + cur_cost= s->cached_scan_and_compare_time; } else { #ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN /* No cached key, use shortest allowed key */ - key_map keys= *table->file->keys_to_use_for_scanning(); + key_map keys= *file->keys_to_use_for_scanning(); keys.intersect(table->keys_in_use_for_query); if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY) { - INDEX_READ_COST cost= cost_for_index_read(thd, table, + ALL_READ_COST cost= cost_for_index_read(thd, table, forced_index, s->records, s->worst_seeks); - tmp= cost.read_cost; + cur_cost= file->cost(cost); /* Calculate cost of checking the attached WHERE */ - tmp= COST_ADD(cost.read_cost, + cur_cost= COST_ADD(cur_cost, s->records * WHERE_COST_THD(thd)); } else #endif { /* No usable key, use table scan */ - tmp= s->cached_scan_and_compare_time; + cur_cost= s->cached_scan_and_compare_time; type= JT_ALL; } } } else // table scan { - tmp= s->cached_scan_and_compare_time; + cur_cost= s->cached_scan_and_compare_time; type= JT_ALL; } /* Cache result for other calls */ s->cached_forced_index_type= type; - s->cached_forced_index_cost= tmp; + s->cached_forced_index_cost= cur_cost; s->cached_forced_index= forced_index; } @@ -9078,7 +9098,7 @@ best_access_path(JOIN *join, If this is not the first table we have to compare the rows against all previous row combinations */ - tmp= COST_MULT(tmp, record_count); + cur_cost= COST_MULT(cur_cost, record_count); } else { @@ -9096,7 +9116,7 @@ best_access_path(JOIN *join, refills= (1.0 + floor((double) cache_record_length(join,idx) * (record_count / (double) thd->variables.join_buff_size))); - tmp= COST_MULT(tmp, refills); + cur_cost= COST_MULT(cur_cost, refills); /* We come here only if there are already rows in the join cache */ DBUG_ASSERT(idx != join->const_tables); @@ -9112,14 +9132,14 @@ best_access_path(JOIN *join, records_after_filter * record_count * ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); - tmp= COST_ADD(tmp, cmp_time); + cur_cost= COST_ADD(cur_cost, cmp_time); } } /* Splitting technique cannot be used with join cache */ if (table->is_splittable()) startup_cost= table->get_materialization_cost(); - tmp+= startup_cost; + cur_cost+= startup_cost; if (unlikely(trace_access_scan.trace_started())) { @@ -9129,7 +9149,7 @@ best_access_path(JOIN *join, add("rows", org_records). add("rows_after_filter", records_after_filter). add("rows_out", best.records_out). - add("cost", tmp); + add("cost", cur_cost); if (type == JT_ALL) { trace_access_scan.add("index_only", @@ -9137,15 +9157,16 @@ best_access_path(JOIN *join, } } - if (tmp + COST_EPS < best.cost) + if (cur_cost + COST_EPS < best.cost) { /* If the table has a range (s->quick is set) make_join_select() will ensure that this will be used */ - best.cost= tmp; + best.cost= cur_cost; best.records_read= org_records; // Records accessed best.records= records_after_filter; // Records to be checked with WHERE + /* If we are using 'use_cond_selectivity > 1' then table_after_join_selectivity may take into account other @@ -11019,7 +11040,7 @@ best_extension_by_limited_search(JOIN *join, trace_one_table .add("pruned_by_cost", true) .add("current_cost", current_read_time) - .add("best_cost", join->best_read + COST_EPS); + .add("best_cost", join->best_read); restore_prev_nj_state(s); restore_prev_sj_state(remaining_tables, s, idx); @@ -13355,6 +13376,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (!sel->quick_keys.is_subset(tab->checked_keys) || !sel->needed_reg.is_subset(tab->checked_keys)) { + handler *file= tab->table->file; /* "Range checked for each record" is a "last resort" access method that should only be used when the other option is a cross-product @@ -13370,9 +13392,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) (sel->quick_keys.is_clear_all() || (sel->quick && sel->quick->read_time > - tab->table->file-> - ha_scan_and_compare_time(tab->table->file-> - stats.records)))) ? + file->cost(file->ha_scan_and_compare_time(tab->table->file-> stats.records))))) ? 2 : 1; sel->read_tables= used_tables & ~current_map; sel->quick_keys.clear_all(); @@ -14237,7 +14257,6 @@ uint check_join_cache_usage(JOIN_TAB *tab, uint table_index, JOIN_TAB *prev_tab) { - Cost_estimate cost; uint flags= 0; ha_rows rows= 0; uint bufsz= 4096; @@ -14396,6 +14415,8 @@ uint check_join_cache_usage(JOIN_TAB *tab, if (!tab->is_ref_for_hash_join() && !no_bka_cache) { + Cost_estimate cost; + cost.reset(); flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT; if (tab->table->covering_keys.is_set(tab->ref.key)) flags|= HA_MRR_INDEX_ONLY; @@ -15132,6 +15153,7 @@ void JOIN_TAB::cleanup() void JOIN_TAB::estimate_scan_time() { THD *thd= join->thd; + handler *file= table->file; double copy_cost; cached_covering_key= MAX_KEY; @@ -15143,7 +15165,7 @@ void JOIN_TAB::estimate_scan_time() &startup_cost); table->opt_range_condition_rows= records; table->used_stat_records= records; - copy_cost= table->file->ROW_COPY_COST; + copy_cost= file->ROW_COPY_COST; } else { @@ -15157,12 +15179,13 @@ void JOIN_TAB::estimate_scan_time() if (!table->covering_keys.is_clear_all() && ! table->no_keyread) { cached_covering_key= find_shortest_key(table, &table->covering_keys); - read_time= table->file->ha_key_scan_time(cached_covering_key, records); + read_time= file->cost(file->ha_key_scan_time(cached_covering_key, + records)); copy_cost= 0; // included in ha_key_scan_time } else { - read_time= table->file->ha_scan_time(records); + read_time= file->cost(file->ha_scan_time(records)); copy_cost= 0; } } @@ -15179,12 +15202,13 @@ void JOIN_TAB::estimate_scan_time() else memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs, sizeof(tmp_table_optimizer_costs)); - table->file->set_optimizer_costs(thd); - table->s->optimizer_costs_inited=1 ; + file->set_optimizer_costs(thd); + table->s->optimizer_costs_inited=1; records= table->stat_records(); DBUG_ASSERT(table->opt_range_condition_rows == records); - read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix.. + // Needs fix.. + read_time= file->cost(table->file->ha_scan_time(MY_MAX(records, 1000))); copy_cost= table->s->optimizer_costs.row_copy_cost; } @@ -30253,7 +30277,7 @@ static bool get_range_limit_read_cost(const POSITION *pos, full index scan/cost. */ double best_rows, range_rows; - double range_cost= (double) table->opt_range[keynr].fetch_cost; + double range_cost= (double) table->opt_range[keynr].cost.fetch_cost(); best_rows= range_rows= (double) table->opt_range[keynr].rows; if (pos) @@ -30309,12 +30333,12 @@ static bool get_range_limit_read_cost(const POSITION *pos, N/(refkey_rows_estimate/table_records) > table_records <=> N > refkey_rows_estimate. */ - INDEX_READ_COST cost= cost_for_index_read(table->in_use, table, keynr, - rows_to_scan, - pos ? - (ha_rows) pos->table->worst_seeks : - HA_ROWS_MAX); - *read_cost= (cost.read_cost + + ALL_READ_COST cost= cost_for_index_read(table->in_use, table, keynr, + rows_to_scan, + pos ? + (ha_rows) pos->table->worst_seeks : + HA_ROWS_MAX); + *read_cost= (table->file->cost(&cost) + rows_to_scan * WHERE_COST_THD(table->in_use)); *read_rows= rows2double(rows_to_scan); return 0; @@ -30449,7 +30473,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, { /* Probably an update or delete. Assume we will do a full table scan */ fanout= 1.0; - read_time= table->file->ha_scan_and_compare_time(rows_estimate); + read_time= table->file->cost(table->file->ha_scan_and_compare_time(rows_estimate)); set_if_smaller(select_limit_arg, table_records); } diff --git a/sql/sql_test.cc b/sql/sql_test.cc index f50452a9d6c..6c2bbedef6d 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -383,7 +383,7 @@ void print_sjm(SJ_MATERIALIZATION_INFO *sjm) } fprintf(DBUG_FILE, " }\n"); fprintf(DBUG_FILE, " materialize_cost= %g\n", - sjm->materialization_cost.total_cost()); + sjm->materialization_cost); fprintf(DBUG_FILE, " rows= %g\n", sjm->rows); fprintf(DBUG_FILE, "}\n"); DBUG_UNLOCK_FILE; diff --git a/sql/table.cc b/sql/table.cc index 57844e7734c..0e196b64379 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -10460,17 +10460,26 @@ bool TABLE::export_structure(THD *thd, Row_definition_list *defs) inline void TABLE::initialize_opt_range_structures() { TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys)); - TRASH_ALLOC(opt_range, s->keys * sizeof(*opt_range)); + TRASH_ALLOC((void*)opt_range, s->keys * sizeof(*opt_range)); TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts)); } double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table) { - return (index_only_cost + + return (table->file->cost(cost.index_cost)+ (double) rows * table->s->optimizer_costs.key_copy_cost); } +void TABLE::OPT_RANGE::get_costs(ALL_READ_COST *res) +{ + res->index_cost= cost.index_cost; + res->row_cost= cost.row_cost; + res->copy_cost= cost.copy_cost; + res->max_index_blocks= max_index_blocks; + res->max_row_blocks= max_row_blocks; +} + /* Mark table to be reopened after query diff --git a/sql/table.h b/sql/table.h index edeeb6e6241..218fb0e8104 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1394,27 +1394,8 @@ public: { uint key_parts; uint ranges; - ha_rows rows; - /* - The full cost of using 'range'. Includes fetching the rows - through keys, copying them and comparing the rows aginst the - WHERE clause. - */ - double cost; - /* - Cost of finding the key and fetching the row with row id. - In case of clustered keys or covering keys the fetch of the row is - not counted for. - */ - double find_cost; - /* find_cost + cost of copying the rows to record */ - double fetch_cost; - /* - Cost of fetching the keys, not including copying the keys to - record or comparing them with the WHERE clause. Used only when - working with filters. - */ - double index_only_cost; + ha_rows rows, max_index_blocks, max_row_blocks; + Cost_estimate cost; /* Selectivity, in case of filters */ double selectivity; bool first_key_part_has_only_one_value; @@ -1424,6 +1405,7 @@ public: sql level. */ double index_only_fetch_cost(TABLE *table); + void get_costs(ALL_READ_COST *cost); } *opt_range; /* Bitmaps of key parts that =const for the duration of join execution. If @@ -1818,12 +1800,12 @@ public: void prune_range_rowid_filters(); void trace_range_rowid_filters(THD *thd) const; Range_rowid_filter_cost_info * - best_range_rowid_filter_for_partial_join(uint access_key_no, - double records, - double fetch_cost, - double index_only_cost, - double prev_records, - double *records_out); + best_range_rowid_filter(uint access_key_no, + double records, + double fetch_cost, + double index_only_cost, + double prev_records, + double *records_out); /** System Versioning support */ diff --git a/sql/uniques.cc b/sql/uniques.cc index 8555fc21624..1886ad278da 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -348,7 +348,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size, First, add cost of writing all trees to disk, assuming that all disk writes are sequential. */ - disk_read_cost= DISK_READ_COST_THD(thd); + disk_read_cost= default_optimizer_costs.disk_read_cost; result += disk_read_cost * n_full_trees * ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE); result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE); @@ -365,8 +365,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size, Add cost of reading the resulting sequence, assuming there were no duplicate elements. */ - result+= (ceil((double)key_size*nkeys/IO_SIZE) * - default_optimizer_costs.disk_read_cost); + result+= (ceil((double)key_size*nkeys/IO_SIZE) * disk_read_cost); return result; } diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h index a5b139a5689..c83584a62e4 100644 --- a/storage/connect/ha_connect.h +++ b/storage/connect/ha_connect.h @@ -309,7 +309,7 @@ public: Called in test_quick_select to determine if indexes should be used. */ virtual IO_AND_CPU_COST scan_time() - { return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; }; + { return { 0, (double) (stats.records+stats.deleted) * DISK_READ_COST }; }; /** @brief This method will never be called if you do not implement indexes. diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h index 5a56dc6c4dd..856bb789320 100644 --- a/storage/csv/ha_tina.h +++ b/storage/csv/ha_tina.h @@ -126,9 +126,9 @@ public: */ virtual IO_AND_CPU_COST scan_time() { - return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE * - avg_io_cost(), - (stats.records+stats.deleted) * ROW_NEXT_FIND_COST }; + return + { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE, + (stats.records+stats.deleted) * ROW_NEXT_FIND_COST }; } /* The next method will never be called */ virtual bool fast_key_read() { return 1;} diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h index 3b11945b182..92acce5b7bb 100644 --- a/storage/example/ha_example.h +++ b/storage/example/ha_example.h @@ -156,7 +156,7 @@ public: { IO_AND_CPU_COST cost; /* 0 blocks, 0.001 ms / row */ - cost.io= (double) (stats.records+stats.deleted) * avg_io_cost(); + cost.io= (double) (stats.records+stats.deleted) * DISK_READ_COST; cost.cpu= 0; return cost; } @@ -168,7 +168,7 @@ public: ulonglong blocks) { IO_AND_CPU_COST cost; - cost.io= blocks * avg_io_cost(); + cost.io= blocks * DISK_READ_COST; cost.cpu= (double) rows * 0.001; return cost; } @@ -181,7 +181,7 @@ public: IO_AND_CPU_COST cost; /* 0 blocks, 0.001 ms / row */ cost.io= 0; - cost.cpu= (double) rows * avg_io_cost(); + cost.cpu= (double) rows * DISK_READ_COST; return cost; } diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h index 35e5f5c8215..317271f60b4 100644 --- a/storage/federated/ha_federated.h +++ b/storage/federated/ha_federated.h @@ -186,20 +186,20 @@ public: DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); return { - (double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(), - 0 + 0, + (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+ + 1000, }; } - IO_AND_CPU_COST rnd_pos_time(ha_rows rows) - { - return { (double) stats.records * avg_io_cost(), 0 }; - } IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) { - return { (double) (ranges + rows) * avg_io_cost(), 0 }; + return {0, (double) (ranges + rows) * DISK_READ_COST }; + } + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) + { + return {0, (double) rows * DISK_READ_COST }; } - const key_map *keys_to_use_for_scanning() { return &key_map_full; } /* Everything below are methods that we implment in ha_federated.cc. diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index beebf405686..6876db5cbb8 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -364,26 +364,26 @@ public: Talk to Kostja about this - how to get the number of rows * ... disk scan time on other side (block size, size of the row) + network time ... - The reason for "records * 1000" is that such a large number forces - this to use indexes " + The reason for "1000" is that such a large number forces this to use indexes " */ IO_AND_CPU_COST scan_time() { DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); return { - (double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(), - 0 + 0, + (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+ + 1000, }; } IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) { - return { (double) (ranges + rows) * avg_io_cost(), 0 }; + return {0, (double) (ranges + rows) * DISK_READ_COST }; } IO_AND_CPU_COST rnd_pos_time(ha_rows rows) { - return { (double) rows * avg_io_cost(), 0 }; + return {0, (double) rows * DISK_READ_COST }; } const key_map *keys_to_use_for_scanning() { return &key_map_full; } diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index 74a0a00a04c..663e75fc0fe 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -67,7 +67,6 @@ public: ulonglong blocks) override; IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; /* 0 for avg_io_cost ensures that there are no read-block calculations */ - double avg_io_cost() override { return 0.0; } int open(const char *name, int mode, uint test_if_locked); int close(void); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index fc20003c7f5..10de7f54603 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -14337,7 +14337,7 @@ ha_innobase::scan_time() TODO: This will be further improved to return some approximate estimate but that would also needs pre-population of stats structure. As of now approach is in sync with MyISAM. */ - return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 }; + return { (ulonglong2double(stats.data_file_length) / IO_SIZE * DISK_READ_COST), 0.0 }; } ulint stat_clustered_index_size; @@ -14347,7 +14347,7 @@ ha_innobase::scan_time() stat_clustered_index_size = m_prebuilt->table->stat_clustered_index_size; - cost.io= (double) stat_clustered_index_size * avg_io_cost(); + cost.io= (double) stat_clustered_index_size * DISK_READ_COST; cost.cpu= 0; return(cost); } diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index 6ccf29c7042..a5e35540c55 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -106,7 +106,7 @@ public: { IO_AND_CPU_COST cost; cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE + - file->tables) * avg_io_cost(); + file->tables), cost.cpu= records() * ROW_NEXT_FIND_COST; return cost; } diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 1dbf98245f7..7c1af7217ca 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -14660,6 +14660,15 @@ IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges, DBUG_RETURN(cost); } + +ulonglong ha_rocksdb::index_blocks(uint index, uint ranges, ha_rows rows) +{ + size_t len= table->key_storage_length(index); + ulonglong blocks= (rows * len / 4) / stats.block_size + ranges; // 75 % compression + return blocks * stats.block_size / IO_SIZE; +} + + void ha_rocksdb::print_error(int error, myf errflag) { if (error == HA_ERR_ROCKSDB_STATUS_BUSY) { error = HA_ERR_LOCK_DEADLOCK; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 2cfaf305682..f05f373cbfd 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -623,7 +623,7 @@ public: bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); - virtual IO_AND_CPU_COST scan_time() override + IO_AND_CPU_COST scan_time() override { IO_AND_CPU_COST cost; DBUG_ENTER_FUNC(); @@ -634,7 +634,8 @@ public: IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) override; - virtual void print_error(int error, myf errflag) override; + ulonglong index_blocks(uint index, uint ranges, ha_rows rows) override; + void print_error(int error, myf errflag) override; int open(const char *const name, int mode, uint test_if_locked) override MY_ATTRIBUTE((__warn_unused_result__)); diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc index eb79d25630c..fd95a897a46 100644 --- a/storage/sequence/sequence.cc +++ b/storage/sequence/sequence.cc @@ -86,7 +86,17 @@ public: void position(const uchar *record); int rnd_pos(uchar *buf, uchar *pos); int info(uint flag); - + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) override + { + /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */ + return {0,0}; + } + IO_AND_CPU_COST scan_time() + { + /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */ + return {0, 0}; + } /* indexes */ ulong index_flags(uint inx, uint part, bool all_parts) const { return HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | @@ -100,7 +110,6 @@ public: int index_last(uchar *buf); ha_rows records_in_range(uint inx, const key_range *start_key, const key_range *end_key, page_range *pages); - double avg_io_cost() override { return 0.0; } private: void set(uchar *buf); @@ -492,10 +501,14 @@ int ha_seq_group_by_handler::next_row() static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs) { + costs->disk_read_cost= 0; costs->disk_read_ratio= 0.0; // No disk - costs->key_next_find_cost= costs->key_lookup_cost= - costs->key_copy_cost= costs->row_lookup_cost= - costs->row_copy_cost= 0.0000062391530550; + costs->key_next_find_cost= + costs->key_lookup_cost= + costs->key_copy_cost= + costs->row_next_find_cost= + costs->row_lookup_cost= + costs->row_copy_cost= 0.0000062391530550; } /***************************************************************************** diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h index 0b3883f107c..f5651fc6eb5 100644 --- a/storage/sphinx/ha_sphinx.h +++ b/storage/sphinx/ha_sphinx.h @@ -76,7 +76,7 @@ public: { IO_AND_CPU_COST cost; cost.io= 0; - cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost(); + cost.cpu= (double) (stats.records+stats.deleted) * DISK_READ_COST; return cost; } IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,