MDEV-21136 InnoDB's records_in_range estimates can be way off
Get rid of BTR_ESTIMATE and btr_cur_t::path_arr. Before the fix btr_estimate_n_rows_in_range_low() used two btr_cur_search_to_nth_level() calls to create two arrays of tree path, the array per border. And then it tried to estimate the number of rows diving level-by-level with the array elements. As the path pages are unlatched during the arrays iterating, the tree could be modified, the estimation function called itself until the number of attempts exceed. After the fix the estimation happens during search process. Roughly, the algorithm is the following. Dive in the left page, then if there are pages between left and right ones, read a few pages to the right, if the right page is reached, fetch it and count the exact number of rows, otherwise count the estimated number of rows, and fetch the right page. The latching order corresponds to WL#6326 rules, i.e.: (2.1) [same as (1.1)]: Page latches must be acquired in descending order of tree level. (2.2) When acquiring a node pointer page latch at level L, we must hold the left sibling page latch (at level L) or some ancestor latch (at level>L). When we dive to the level down, the parent page is unlatched only after the the current level page is latched. When we estimate the number of rows on some level, we latch the left border, then fetch the next page, and then fetch the next page unlatching the previous page after the current page is latched until the right border is reached. I.e. the left sibling is always latched when we acquire page latch on the same level. When we reach the right border, the current page is unlatched, and then the right border is latched. Following to (2.2) rule, we can do this because the right border's parent is latched.
This commit is contained in:
parent
6156a2be30
commit
222e800e24
File diff suppressed because it is too large
Load Diff
@ -129,10 +129,6 @@ enum btr_latch_mode {
|
||||
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE
|
||||
};
|
||||
|
||||
/** This flag ORed to btr_latch_mode says that we do the search in query
|
||||
optimization */
|
||||
#define BTR_ESTIMATE 1024U
|
||||
|
||||
/** This flag ORed to BTR_INSERT says that we can ignore possible
|
||||
UNIQUE definition on secondary indexes when we decide if we can use
|
||||
the insert buffer to speed up inserts */
|
||||
@ -160,7 +156,6 @@ record is in spatial index */
|
||||
| BTR_RTREE_UNDO_INS \
|
||||
| BTR_RTREE_DELETE_MARK \
|
||||
| BTR_DELETE \
|
||||
| BTR_ESTIMATE \
|
||||
| BTR_IGNORE_SEC_UNIQUE \
|
||||
| BTR_ALREADY_S_LATCHED \
|
||||
| BTR_LATCH_FOR_INSERT \
|
||||
|
@ -165,7 +165,7 @@ btr_cur_search_to_nth_level_func(
|
||||
search the position! */
|
||||
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
|
||||
at most one of BTR_INSERT, BTR_DELETE_MARK,
|
||||
BTR_DELETE, or BTR_ESTIMATE;
|
||||
BTR_DELETE;
|
||||
cursor->left_block is used to store a pointer
|
||||
to the left neighbor page, in the cases
|
||||
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
|
||||
@ -531,16 +531,20 @@ struct btr_pos_t
|
||||
page_id_t page_id; /* Out: Page where we found the tuple */
|
||||
};
|
||||
|
||||
/** Estimates the number of rows in a given index range.
|
||||
@param[in] index index
|
||||
@param[in/out] range_start
|
||||
@param[in/out] range_ end
|
||||
@return estimated number of rows */
|
||||
ha_rows
|
||||
btr_estimate_n_rows_in_range(
|
||||
dict_index_t* index,
|
||||
btr_pos_t* range_start,
|
||||
btr_pos_t* range_end);
|
||||
/** Estimates the number of rows in a given index range. Do search in the
|
||||
left page, then if there are pages between left and right ones, read a few
|
||||
pages to the right, if the right page is reached, fetch it and count the exact
|
||||
number of rows, otherwise count the estimated(see
|
||||
btr_estimate_n_rows_in_range_on_level() for details) number if rows, and
|
||||
fetch the right page. If leaves are reached, unlatch non-leaf pages except
|
||||
the right leaf parent. After the right leaf page is fetched, commit mtr.
|
||||
@param[in] index index
|
||||
@param[in] range_start range start
|
||||
@param[in] range_end range end
|
||||
@return estimated number of rows; */
|
||||
ha_rows btr_estimate_n_rows_in_range(dict_index_t *index,
|
||||
btr_pos_t *range_start,
|
||||
btr_pos_t *range_end);
|
||||
|
||||
/** Gets the externally stored size of a record, in units of a database page.
|
||||
@param[in] rec record
|
||||
|
@ -54,14 +54,11 @@ page_zip_des_t*
|
||||
page_cur_get_page_zip(
|
||||
/*==================*/
|
||||
page_cur_t* cur); /*!< in: page cursor */
|
||||
/*********************************************************//**
|
||||
Gets the record where the cursor is positioned.
|
||||
/* Gets the record where the cursor is positioned.
|
||||
@param cur page cursor
|
||||
@return record */
|
||||
UNIV_INLINE
|
||||
rec_t*
|
||||
page_cur_get_rec(
|
||||
/*=============*/
|
||||
page_cur_t* cur); /*!< in: page cursor */
|
||||
rec_t *page_cur_get_rec(const page_cur_t *cur);
|
||||
#else /* UNIV_DEBUG */
|
||||
# define page_cur_get_page(cur) page_align((cur)->rec)
|
||||
# define page_cur_get_block(cur) (cur)->block
|
||||
|
@ -63,14 +63,11 @@ page_cur_get_page_zip(
|
||||
return(buf_block_get_page_zip(page_cur_get_block(cur)));
|
||||
}
|
||||
|
||||
/*********************************************************//**
|
||||
Gets the record where the cursor is positioned.
|
||||
/* Gets the record where the cursor is positioned.
|
||||
@param cur page cursor
|
||||
@return record */
|
||||
UNIV_INLINE
|
||||
rec_t*
|
||||
page_cur_get_rec(
|
||||
/*=============*/
|
||||
page_cur_t* cur) /*!< in: page cursor */
|
||||
rec_t *page_cur_get_rec(const page_cur_t *cur)
|
||||
{
|
||||
ut_ad(cur);
|
||||
ut_ad(!cur->rec || page_align(cur->rec) == cur->block->page.frame);
|
||||
|
Loading…
x
Reference in New Issue
Block a user