From 0f9c818545946dbee97bc7a8ff80ce09ed7e7cd1 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Fri, 30 Jul 2010 10:39:16 -0400 Subject: [PATCH 01/18] When the caller of buf_flush_list() provides us with the number of pages that it wants to flush then we should honor that value as in not going beyond that in our eagerness to flush the neighbors of the selected victim. --- storage/innobase/buf/buf0flu.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c index 3737627301f..4131d863e6a 100644 --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -1248,8 +1248,12 @@ buf_flush_try_neighbors( /*====================*/ ulint space, /*!< in: space id */ ulint offset, /*!< in: page offset */ - enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or + enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + ulint n_flushed, /*!< in: number of pages + flushed so far in this batch */ + ulint n_to_flush) /*!< in: maximum number of pages + we are allowed to flush */ { ulint i; ulint low; @@ -1290,6 +1294,21 @@ buf_flush_try_neighbors( buf_page_t* bpage; + if ((count + n_flushed) >= n_to_flush) { + + /* We have already flushed enough pages and + should call it a day. There is, however, one + exception. If the page whose neighbors we + are flushing has not been flushed yet then + we'll try to flush the victim that we + selected originally. */ + if (i <= offset) { + i = offset; + } else { + break; + } + } + buf_pool = buf_pool_get(space, i); buf_pool_mutex_enter(buf_pool); @@ -1357,6 +1376,8 @@ buf_flush_page_and_try_neighbors( buf_page_in_file(bpage) */ enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + ulint n_to_flush, /*!< in: number of pages to + flush */ ulint* count) /*!< in/out: number of pages flushed */ { @@ -1390,7 +1411,11 @@ buf_flush_page_and_try_neighbors( mutex_exit(block_mutex); /* Try to flush also all the neighbors */ - *count += buf_flush_try_neighbors(space, offset, flush_type); + *count += buf_flush_try_neighbors(space, + offset, + flush_type, + *count, + n_to_flush); buf_pool_mutex_enter(buf_pool); flushed = TRUE; @@ -1430,7 +1455,7 @@ buf_flush_LRU_list_batch( a page that isn't ready for flushing. */ while (bpage != NULL && !buf_flush_page_and_try_neighbors( - bpage, BUF_FLUSH_LRU, &count)) { + bpage, BUF_FLUSH_LRU, max, &count)) { bpage = UT_LIST_GET_PREV(LRU, bpage); } @@ -1511,7 +1536,7 @@ buf_flush_flush_list_batch( while (bpage != NULL && len > 0 && !buf_flush_page_and_try_neighbors( - bpage, BUF_FLUSH_LIST, &count)) { + bpage, BUF_FLUSH_LIST, min_n, &count)) { buf_flush_list_mutex_enter(buf_pool); From b003544f6511d32320d6240de87823b719ab0a02 Mon Sep 17 00:00:00 2001 From: Calvin Sun Date: Tue, 3 Aug 2010 01:12:03 -0500 Subject: [PATCH 02/18] Bug #54702: revert the default of innodb_strict_mode to false. --- storage/innobase/handler/ha_innodb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index ab9df9a0272..e78f167beb6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -425,7 +425,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG, "Use strict mode when evaluating create options.", - NULL, NULL, TRUE); + NULL, NULL, FALSE); static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", From b4a25f462ebd91612d7faeaf0df82ed34d862f38 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Thu, 5 Aug 2010 11:09:05 -0400 Subject: [PATCH 03/18] Currently we do a full validation of AHI whenever check tables is called on any table. This patch fixes this by only doing this full check in debug versions. bug#55716 rb://423 approved by: Marko --- storage/innobase/btr/btr0sea.c | 2 ++ storage/innobase/ha/ha0ha.c | 2 ++ storage/innobase/include/btr0sea.h | 4 ++++ storage/innobase/include/ha0ha.h | 2 ++ 4 files changed, 10 insertions(+) diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c index 06cc48c7c60..fb667bcae82 100644 --- a/storage/innobase/btr/btr0sea.c +++ b/storage/innobase/btr/btr0sea.c @@ -1746,6 +1746,7 @@ function_exit: } } +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /********************************************************************//** Validates the search system. @return TRUE if ok */ @@ -1913,3 +1914,4 @@ btr_search_validate(void) return(ok); } +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c index f9e798012f8..7f11917de0a 100644 --- a/storage/innobase/ha/ha0ha.c +++ b/storage/innobase/ha/ha0ha.c @@ -354,6 +354,7 @@ ha_remove_all_nodes_to_page( #endif } +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /*************************************************************//** Validates a given range of the cells in hash table. @return TRUE if ok */ @@ -400,6 +401,7 @@ ha_validate( return(ok); } +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /*************************************************************//** Prints info of a hash table. */ diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index 20a2be7f877..6493689a969 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -180,6 +180,7 @@ btr_search_update_hash_on_delete( btr_cur_t* cursor);/*!< in: cursor which was positioned on the record to delete using btr_cur_search_..., the record is not yet deleted */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /********************************************************************//** Validates the search system. @return TRUE if ok */ @@ -187,6 +188,9 @@ UNIV_INTERN ibool btr_search_validate(void); /*======================*/ +#else +# define btr_search_validate() TRUE +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h index 1ffbd3440aa..3299000bf3c 100644 --- a/storage/innobase/include/ha0ha.h +++ b/storage/innobase/include/ha0ha.h @@ -186,6 +186,7 @@ ha_remove_all_nodes_to_page( hash_table_t* table, /*!< in: hash table */ ulint fold, /*!< in: fold value */ const page_t* page); /*!< in: buffer page */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /*************************************************************//** Validates a given range of the cells in hash table. @return TRUE if ok */ @@ -196,6 +197,7 @@ ha_validate( hash_table_t* table, /*!< in: hash table */ ulint start_index, /*!< in: start index */ ulint end_index); /*!< in: end index */ +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /*************************************************************//** Prints info of a hash table. */ UNIV_INTERN From a4c5cf7ca9efcf386600c5da5f049dcab9e86046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 9 Aug 2010 11:58:37 +0300 Subject: [PATCH 04/18] Reduce the ibuf_mutex hold time. This does not fix the update regression in Bug #54914, but it does speed up the execution for innodb_change_buffering=inserts. ibuf_add_ops(), ibuf_merge_or_delete_for_page(), ibuf_delete_for_discarded_space(): Use atomic built-ins instead of ibuf_mutex, when available. ibuf_add_free_page(), ibuf_remove_free_page(), ibuf_contract_ext(): Release ibuf_mutex earlier. ibuf_free_excess_pages(): Release ibuf_mutex before a conditional branch. ibuf_insert_low(): Release ibuf_mutex before a conditional branch. Create ibuf_entry before re-acquiring ibuf_mutex. Simplify a loop to reduce code footprint. Release ibuf_mutex before mtr_commit() [btr_pcur_close()]. ibuf_is_empty(): Release ibuf_mutex before mtr_commit(). --- storage/innobase/ibuf/ibuf0ibuf.c | 123 +++++++++++++++++------------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index dc8e61e5070..1d162f82b93 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -1350,10 +1350,18 @@ ibuf_add_ops( const ulint* ops) /*!< in: operation counts */ { +#ifndef HAVE_ATOMIC_BUILTINS + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* !HAVE_ATOMIC_BUILTINS */ + ulint i; for (i = 0; i < IBUF_OP_COUNT; i++) { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&arr[i], ops[i]); +#else /* HAVE_ATOMIC_BUILTINS */ arr[i] += ops[i]; +#endif /* HAVE_ATOMIC_BUILTINS */ } } @@ -2096,13 +2104,13 @@ ibuf_add_free_page(void) bitmap_page = ibuf_bitmap_get_map_page( IBUF_SPACE_ID, page_no, zip_size, &mtr); + mutex_exit(&ibuf_mutex); + ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr); mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); - ibuf_exit(); return(DB_SUCCESS); @@ -2158,6 +2166,8 @@ ibuf_remove_free_page(void) root = ibuf_tree_root_get(&mtr2); + mutex_exit(&ibuf_mutex); + page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, &mtr2).page; @@ -2166,7 +2176,6 @@ ibuf_remove_free_page(void) is a level 2 page. */ mtr_commit(&mtr2); - mutex_exit(&ibuf_mutex); ibuf_exit(); @@ -2220,6 +2229,8 @@ ibuf_remove_free_page(void) bitmap_page = ibuf_bitmap_get_map_page( IBUF_SPACE_ID, page_no, zip_size, &mtr); + mutex_exit(&ibuf_mutex); + ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); @@ -2228,8 +2239,6 @@ ibuf_remove_free_page(void) #endif mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); - ibuf_exit(); } @@ -2270,17 +2279,16 @@ ibuf_free_excess_pages(void) for (i = 0; i < 4; i++) { + ibool too_much_free; + mutex_enter(&ibuf_mutex); + too_much_free = ibuf_data_too_much_free(); + mutex_exit(&ibuf_mutex); - if (!ibuf_data_too_much_free()) { - - mutex_exit(&ibuf_mutex); - + if (!too_much_free) { return; } - mutex_exit(&ibuf_mutex); - ibuf_remove_free_page(); } } @@ -2486,8 +2494,8 @@ ibuf_contract_ext( mutex_enter(&ibuf_mutex); if (ibuf->empty) { -ibuf_is_empty: mutex_exit(&ibuf_mutex); +ibuf_is_empty: #if 0 /* TODO */ if (srv_shutdown_state) { @@ -2515,6 +2523,7 @@ ibuf_is_empty: position within the leaf */ btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr); + mutex_exit(&ibuf_mutex); ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); @@ -2535,8 +2544,6 @@ ibuf_is_empty: goto ibuf_is_empty; } - mutex_exit(&ibuf_mutex); - sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), space_ids, space_versions, page_nos, &n_stored); @@ -3304,6 +3311,7 @@ ibuf_insert_low( ulint n_stored; mtr_t mtr; mtr_t bitmap_mtr; + ibool too_big; ut_a(!dict_index_is_clust(index)); ut_ad(dtuple_check_typed(entry)); @@ -3316,12 +3324,13 @@ ibuf_insert_low( do_merge = FALSE; mutex_enter(&ibuf_mutex); + too_big = ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT; + mutex_exit(&ibuf_mutex); - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { + if (too_big) { /* Insert buffer is now too big, contract it but do not try to insert */ - mutex_exit(&ibuf_mutex); #ifdef UNIV_IBUF_DEBUG fputs("Ibuf too big\n", stderr); @@ -3332,40 +3341,6 @@ ibuf_insert_low( return(DB_STRONG_FAIL); } - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - while (!ibuf_data_enough_free_for_insert()) { - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - err = ibuf_add_free_page(); - - if (err == DB_STRONG_FAIL) { - - return(err); - } - - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - } - } else { - ibuf_enter(); - } - heap = mem_heap_create(512); /* Build the entry which contains the space id and the page number @@ -3384,6 +3359,37 @@ ibuf_insert_low( the new entry to it without exceeding the free space limit for the page. */ + if (mode == BTR_MODIFY_TREE) { + for (;;) { + mutex_enter(&ibuf_pessimistic_insert_mutex); + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { + + break; + } + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + mutex_exit(&ibuf_pessimistic_insert_mutex); + + err = ibuf_add_free_page(); + + if (UNIV_UNLIKELY(err == DB_STRONG_FAIL)) { + + mem_heap_free(heap); + return(err); + } + } + } else { + ibuf_enter(); + } + mtr_start(&mtr); btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); @@ -4118,9 +4124,8 @@ ibuf_delete_rec( btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: - btr_pcur_close(pcur); - mutex_exit(&ibuf_mutex); + btr_pcur_close(pcur); return(TRUE); } @@ -4495,6 +4500,11 @@ reset_bit: btr_pcur_close(&pcur); mem_heap_free(heap); +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&ibuf->n_merges, 1); + ibuf_add_ops(ibuf->n_merged_ops, mops); + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ /* Protect our statistics keeping from race conditions */ mutex_enter(&ibuf_mutex); @@ -4503,6 +4513,7 @@ reset_bit: ibuf_add_ops(ibuf->n_discarded_ops, dops); mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ if (update_ibuf_bitmap && !tablespace_being_deleted) { @@ -4604,10 +4615,14 @@ leave_loop: mtr_commit(&mtr); btr_pcur_close(&pcur); +#ifdef HAVE_ATOMIC_BUILTINS + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ /* Protect our statistics keeping from race conditions */ mutex_enter(&ibuf_mutex); ibuf_add_ops(ibuf->n_discarded_ops, dops); mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ ibuf_exit(); @@ -4652,10 +4667,10 @@ ibuf_is_empty(void) is_empty = FALSE; } - mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); + mtr_commit(&mtr); + ibuf_exit(); return(is_empty); From 271e6ae34117db9475da877beb5ec2a0c7495872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 10 Aug 2010 13:22:48 +0300 Subject: [PATCH 05/18] Bug#54914: InnoDB: performance drop with innodb_change_buffering=all Reduce ibuf_mutex and ibuf_pessimistic_insert_mutex contention further. Protect ibuf->empty by the insert buffer root page latch, not ibuf_mutex. ibuf_tree_root_get(): Assert that ibuf_mutex is owned by the caller. Assert that the stamped page number is correct. Assert that ibuf->empty agrees with the root page. ibuf_size_update(): Do not update ibuf->empty. ibuf_init_at_db_start(): Update ibuf->empty while holding the root page latch. ibuf_add_free_page(): Return TRUE/FALSE instead of DB_SUCCESS/DB_STRONG_FAIL. ibuf_remove_free_page(): Release ibuf_pessimistic_insert_mutex as early as possible. ibuf_contract_ext(): Rely on a dirty read of ibuf->empty, unless the server is being shut down. Never acquire ibuf_mutex. Eliminate n_stored. ibuf_contract_after_insert(): Never acquire ibuf_mutex. Perform dirty reads of ibuf->size and ibuf->max_size. ibuf_insert_low(): Only acquire ibuf_mutex for mode==BTR_MODIFY_TREE. Perform dirty reads of ibuf->size and ibuf->max_size. Update ibuf->empty while holding the root page latch. ibuf_delete_rec(): Update ibuf->empty while holding the root page latch. ibuf_is_empty(): Release ibuf_mutex earlier. --- storage/innobase/ibuf/ibuf0ibuf.c | 246 +++++++++++++------------- storage/innobase/include/ibuf0ibuf.ic | 11 +- 2 files changed, 132 insertions(+), 125 deletions(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 1d162f82b93..a048de0e884 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri #include "lock0lock.h" #include "log0recv.h" #include "que0que.h" +#include "srv0start.h" /* srv_shutdown_state */ /* STRUCTURE OF AN INSERT BUFFER RECORD @@ -395,8 +396,10 @@ ibuf_tree_root_get( mtr_t* mtr) /*!< in: mtr */ { buf_block_t* block; + page_t* root; ut_ad(ibuf_inside()); + ut_ad(mutex_own(&ibuf_mutex)); mtr_x_lock(dict_index_get_lock(ibuf->index), mtr); @@ -405,7 +408,13 @@ ibuf_tree_root_get( buf_block_dbg_add_level(block, SYNC_TREE_NODE); - return(buf_block_get_frame(block)); + root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO); + ut_ad(ibuf->empty == (page_get_n_recs(root) == 0)); + + return(root); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -482,8 +491,6 @@ ibuf_size_update( /* the '1 +' is the ibuf header page */ ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len); - - ibuf->empty = page_get_n_recs(root) == 0; } /******************************************************************//** @@ -554,6 +561,7 @@ ibuf_init_at_db_start(void) ibuf_size_update(root, &mtr); mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); mtr_commit(&mtr); ibuf_exit(); @@ -2025,9 +2033,9 @@ ibuf_data_too_much_free(void) /*********************************************************************//** Allocates a new page from the ibuf file segment and adds it to the free list. -@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */ +@return TRUE on success, FALSE if no space left */ static -ulint +ibool ibuf_add_free_page(void) /*====================*/ { @@ -2063,10 +2071,10 @@ ibuf_add_free_page(void) header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, &mtr); - if (page_no == FIL_NULL) { + if (UNIV_UNLIKELY(page_no == FIL_NULL)) { mtr_commit(&mtr); - return(DB_STRONG_FAIL); + return(FALSE); } { @@ -2113,7 +2121,7 @@ ibuf_add_free_page(void) ibuf_exit(); - return(DB_SUCCESS); + return(TRUE); } /*********************************************************************//** @@ -2143,20 +2151,17 @@ ibuf_remove_free_page(void) header_page = ibuf_header_page_get(&mtr); /* Prevent pessimistic inserts to insert buffer trees for a while */ - mutex_enter(&ibuf_pessimistic_insert_mutex); - ibuf_enter(); - + mutex_enter(&ibuf_pessimistic_insert_mutex); mutex_enter(&ibuf_mutex); if (!ibuf_data_too_much_free()) { mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_exit(); - mutex_exit(&ibuf_pessimistic_insert_mutex); - mtr_commit(&mtr); return; @@ -2218,11 +2223,11 @@ ibuf_remove_free_page(void) flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf->seg_size--; ibuf->free_list_len--; - mutex_exit(&ibuf_pessimistic_insert_mutex); - /* Set the bit indicating that this page is no more an ibuf tree page (level 2 page) */ @@ -2484,17 +2489,19 @@ ibuf_contract_ext( ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; ulint sum_sizes; mtr_t mtr; *n_pages = 0; ut_ad(!ibuf_inside()); - mutex_enter(&ibuf_mutex); + /* We perform a dirty read of ibuf->empty, without latching + the insert buffer root page. We trust this dirty read except + when a slow shutdown is being executed. During a slow + shutdown, the insert buffer merge must be completed. */ - if (ibuf->empty) { - mutex_exit(&ibuf_mutex); + if (UNIV_UNLIKELY(ibuf->empty) + && UNIV_LIKELY(!srv_shutdown_state)) { ibuf_is_empty: #if 0 /* TODO */ @@ -2523,18 +2530,18 @@ ibuf_is_empty: position within the leaf */ btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr); - mutex_exit(&ibuf_mutex); ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) { - /* When the ibuf tree is emptied completely, the last record - is removed using an optimistic delete and ibuf_size_update - is not called, causing ibuf->empty to remain FALSE. If we do - not reset it to TRUE here then database shutdown will hang - in the loop in ibuf_contract_for_n_pages. */ - - ibuf->empty = TRUE; + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + ut_ad(ibuf->empty); + ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) + == IBUF_SPACE_ID); + ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) + == FSP_IBUF_TREE_ROOT_PAGE_NO); ibuf_exit(); @@ -2546,10 +2553,10 @@ ibuf_is_empty: sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), space_ids, space_versions, - page_nos, &n_stored); + page_nos, n_pages); #if 0 /* defined UNIV_IBUF_DEBUG */ fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", - sync, n_stored, sum_sizes); + sync, *n_pages, sum_sizes); #endif ibuf_exit(); @@ -2557,8 +2564,7 @@ ibuf_is_empty: btr_pcur_close(&pcur); buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos, - n_stored); - *n_pages = n_stored; + *n_pages); return(sum_sizes + 1); } @@ -2628,33 +2634,33 @@ ibuf_contract_after_insert( ibool sync; ulint sum_sizes; ulint size; + ulint max_size; - mutex_enter(&ibuf_mutex); - - if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { - mutex_exit(&ibuf_mutex); + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + size = ibuf->size; + max_size = ibuf->max_size; + if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { return; } - sync = FALSE; - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) { - - sync = TRUE; - } - - mutex_exit(&ibuf_mutex); + sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); /* Contract at least entry_size many bytes */ sum_sizes = 0; size = 1; - while ((size > 0) && (sum_sizes < entry_size)) { + do { size = ibuf_contract(sync); sum_sizes += size; - } + } while (size > 0 && sum_sizes < entry_size); } /*********************************************************************//** @@ -3272,7 +3278,7 @@ ibuf_set_entry_counter( /*********************************************************************//** Buffer an operation in the insert/delete buffer, instead of doing it directly to the disk page, if this is possible. -@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */ +@return DB_SUCCESS, DB_STRONG_FAIL or other error */ static ulint ibuf_insert_low( @@ -3302,6 +3308,7 @@ ibuf_insert_low( rec_t* ins_rec; ibool old_bit_value; page_t* bitmap_page; + buf_block_t* block; page_t* root; ulint err; ibool do_merge; @@ -3311,7 +3318,6 @@ ibuf_insert_low( ulint n_stored; mtr_t mtr; mtr_t bitmap_mtr; - ibool too_big; ut_a(!dict_index_is_clust(index)); ut_ad(dtuple_check_typed(entry)); @@ -3323,11 +3329,14 @@ ibuf_insert_low( do_merge = FALSE; - mutex_enter(&ibuf_mutex); - too_big = ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT; - mutex_exit(&ibuf_mutex); - - if (too_big) { + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { /* Insert buffer is now too big, contract it but do not try to insert */ @@ -3361,10 +3370,8 @@ ibuf_insert_low( if (mode == BTR_MODIFY_TREE) { for (;;) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - ibuf_enter(); - + mutex_enter(&ibuf_pessimistic_insert_mutex); mutex_enter(&ibuf_mutex); if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { @@ -3373,17 +3380,13 @@ ibuf_insert_low( } mutex_exit(&ibuf_mutex); - + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_exit(); - mutex_exit(&ibuf_pessimistic_insert_mutex); - - err = ibuf_add_free_page(); - - if (UNIV_UNLIKELY(err == DB_STRONG_FAIL)) { + if (UNIV_UNLIKELY(!ibuf_add_free_page())) { mem_heap_free(heap); - return(err); + return(DB_STRONG_FAIL); } } } else { @@ -3423,9 +3426,14 @@ ibuf_insert_low( before mtr_commit(&mtr). We must not mtr_commit(&mtr) until after the IBUF_OP_DELETE has been buffered. */ - err = DB_STRONG_FAIL; +fail_exit: + if (mode == BTR_MODIFY_TREE) { + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + } - goto function_exit; + err = DB_STRONG_FAIL; + goto func_exit; } /* After this point, the page could still be loaded to the @@ -3471,9 +3479,7 @@ ibuf_insert_low( space_ids, space_versions, page_nos, &n_stored); - err = DB_STRONG_FAIL; - - goto function_exit; + goto fail_exit; } } @@ -3484,11 +3490,9 @@ ibuf_insert_low( && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur, mode == BTR_MODIFY_PREV, &mtr)) { bitmap_fail: - err = DB_STRONG_FAIL; - mtr_commit(&bitmap_mtr); - goto function_exit; + goto fail_exit; } /* Set the bitmap bit denoting that the insert buffer contains @@ -3512,10 +3516,19 @@ bitmap_fail: err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + + /* If this is the root page, update ibuf->empty. */ + if (UNIV_UNLIKELY(buf_block_get_page_no(block) + == FSP_IBUF_TREE_ROOT_PAGE_NO)) { + const page_t* root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + ibuf->empty = (page_get_n_recs(root) == 0); } } else { ut_ad(mode == BTR_MODIFY_TREE); @@ -3532,16 +3545,22 @@ bitmap_fail: cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); - } - + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_size_update(root, &mtr); + mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); + + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); } -function_exit: + if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { + /* Update the page max trx id field */ + page_update_max_trx_id(block, NULL, + thr_get_trx(thr)->id, &mtr); + } + +func_exit: #ifdef UNIV_IBUF_COUNT_DEBUG if (err == DB_SUCCESS) { fprintf(stderr, @@ -3553,11 +3572,6 @@ function_exit: ibuf_count_get(space, page_no) + 1); } #endif - if (mode == BTR_MODIFY_TREE) { - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - } mtr_commit(&mtr); btr_pcur_close(&pcur); @@ -3565,16 +3579,8 @@ function_exit: mem_heap_free(heap); - if (err == DB_SUCCESS) { - mutex_enter(&ibuf_mutex); - - ibuf->empty = FALSE; - - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - ibuf_contract_after_insert(entry_size); - } + if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) { + ibuf_contract_after_insert(entry_size); } if (do_merge) { @@ -4081,6 +4087,22 @@ ibuf_delete_rec( success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr); if (success) { + if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + root = btr_pcur_get_page(pcur); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + /* ibuf->empty is protected by the root page latch. + Before the deletion, it had to be FALSE. */ + ut_ad(!ibuf->empty); + ibuf->empty = TRUE; + } + #ifdef UNIV_IBUF_COUNT_DEBUG fprintf(stderr, "Decrementing ibuf count of space %lu page %lu\n" @@ -4108,6 +4130,7 @@ ibuf_delete_rec( if (!ibuf_restore_pos(space, page_no, search_tuple, BTR_MODIFY_TREE, pcur, mtr)) { + mutex_exit(&ibuf_mutex); goto func_exit; } @@ -4121,10 +4144,12 @@ ibuf_delete_rec( ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); #endif ibuf_size_update(root, mtr); + mutex_exit(&ibuf_mutex); + + ibuf->empty = (page_get_n_recs(root) == 0); btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: - mutex_exit(&ibuf_mutex); btr_pcur_close(pcur); return(TRUE); @@ -4642,37 +4667,18 @@ ibuf_is_empty(void) mtr_t mtr; ibuf_enter(); - - mutex_enter(&ibuf_mutex); - mtr_start(&mtr); + mutex_enter(&ibuf_mutex); root = ibuf_tree_root_get(&mtr); - - if (page_get_n_recs(root) == 0) { - - is_empty = TRUE; - - if (ibuf->empty == FALSE) { - fprintf(stderr, - "InnoDB: Warning: insert buffer tree is empty" - " but the data struct does not\n" - "InnoDB: know it. This condition is legal" - " if the master thread has not yet\n" - "InnoDB: run to completion.\n"); - } - } else { - ut_a(ibuf->empty == FALSE); - - is_empty = FALSE; - } - mutex_exit(&ibuf_mutex); + is_empty = (page_get_n_recs(root) == 0); mtr_commit(&mtr); - ibuf_exit(); + ut_a(is_empty == ibuf->empty); + return(is_empty); } diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index aee27cf9739..e3fa6e3e929 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -46,11 +46,12 @@ struct ibuf_struct{ ulint seg_size; /*!< allocated pages of the file segment containing ibuf header and tree */ - ibool empty; /*!< after an insert to the ibuf tree - is performed, this is set to FALSE, - and if a contract operation finds - the tree empty, this is set to - TRUE */ + ibool empty; /*!< Protected by the page + latch of the root page of the + insert buffer tree + (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE + if and only if the insert + buffer tree is empty. */ ulint free_list_len; /*!< length of the free list */ ulint height; /*!< tree height */ dict_index_t* index; /*!< insert buffer index */ From 9d2a49d16d8f1a4bf0fb97c3718f05a59e9ed718 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 10 Aug 2010 17:18:21 +0300 Subject: [PATCH 06/18] Adjust tree name in .bzr-mysql/default.conf after rename --- .bzr-mysql/default.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bzr-mysql/default.conf b/.bzr-mysql/default.conf index df9a60f35ad..255e320de4a 100644 --- a/.bzr-mysql/default.conf +++ b/.bzr-mysql/default.conf @@ -1,4 +1,4 @@ [MYSQL] post_commit_to = commits@lists.mysql.com, innodb_dev_ww@oracle.com post_push_to = commits@lists.mysql.com, innodb_dev_ww@oracle.com -tree_name = "mysql-trunk-innodb" +tree_name = "mysql-5.5-innodb" From 34a05995dfd2b9248bc78f06f291c59fc6d456fb Mon Sep 17 00:00:00 2001 From: Sunny Bains Date: Thu, 12 Aug 2010 20:00:07 +1000 Subject: [PATCH 07/18] Fix bug #52263 innodb does not compile on OpenSolaris with gcc4.3.2 Disable the GCC visibility attribute on all sun platforms. Approved by Marko on IRC. --- storage/innobase/include/univ.i | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index ac87942f255..5a5af76e175 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -254,8 +254,10 @@ by one. */ option off; also some ibuf tests are suppressed */ /* Linkage specifier for non-static InnoDB symbols (variables and functions) -that are only referenced from within InnoDB, not from MySQL */ -#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER) +that are only referenced from within InnoDB, not from MySQL. We disable the +GCC visibility directive on all Sun operating systems because there is no +easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */ +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER) # define UNIV_INTERN __attribute__((visibility ("hidden"))) #else # define UNIV_INTERN From 50af6a8aea0d74789bd6d210e874f34a932c7268 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Fri, 13 Aug 2010 12:14:59 -0400 Subject: [PATCH 08/18] Undo changes to innodb_strict_mode that went in by mistake in r3149 --- storage/innobase/handler/ha_innodb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e78f167beb6..ab9df9a0272 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -425,7 +425,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG, "Use strict mode when evaluating create options.", - NULL, NULL, FALSE); + NULL, NULL, TRUE); static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", From 1a649603f81c376fd8860fc636d2f4f185a5c945 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Fri, 13 Aug 2010 15:07:22 -0400 Subject: [PATCH 09/18] Change default for innodb_strict_mode to FALSE. Note that this was originally pushed by Calvin but the was later reverted by mistake. bug#54702 --- .../sys_vars/r/innodb_strict_mode_basic.result | 16 ++++++++-------- storage/innobase/handler/ha_innodb.cc | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result index 5e55faa99c9..200f9166215 100644 --- a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result @@ -1,32 +1,32 @@ SET @start_global_value = @@global.innodb_strict_mode; SELECT @start_global_value; @start_global_value -1 +0 Valid values are 'ON' and 'OFF' select @@global.innodb_strict_mode in (0, 1); @@global.innodb_strict_mode in (0, 1) 1 select @@global.innodb_strict_mode; @@global.innodb_strict_mode -1 +0 select @@session.innodb_strict_mode in (0, 1); @@session.innodb_strict_mode in (0, 1) 1 select @@session.innodb_strict_mode; @@session.innodb_strict_mode -1 +0 show global variables like 'innodb_strict_mode'; Variable_name Value -innodb_strict_mode ON +innodb_strict_mode OFF show session variables like 'innodb_strict_mode'; Variable_name Value -innodb_strict_mode ON +innodb_strict_mode OFF select * from information_schema.global_variables where variable_name='innodb_strict_mode'; VARIABLE_NAME VARIABLE_VALUE -INNODB_STRICT_MODE ON +INNODB_STRICT_MODE OFF select * from information_schema.session_variables where variable_name='innodb_strict_mode'; VARIABLE_NAME VARIABLE_VALUE -INNODB_STRICT_MODE ON +INNODB_STRICT_MODE OFF set global innodb_strict_mode='OFF'; set session innodb_strict_mode='OFF'; select @@global.innodb_strict_mode; @@ -117,4 +117,4 @@ INNODB_STRICT_MODE ON SET @@global.innodb_strict_mode = @start_global_value; SELECT @@global.innodb_strict_mode; @@global.innodb_strict_mode -1 +0 diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index ab9df9a0272..e78f167beb6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -425,7 +425,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG, "Use strict mode when evaluating create options.", - NULL, NULL, TRUE); + NULL, NULL, FALSE); static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", From 7f62ec7b38a3bceb23132dad117a7e4f8b592898 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Mon, 16 Aug 2010 17:23:29 +0300 Subject: [PATCH 10/18] Fix Bug#53761 RANGE estimation for matched rows may be 200 times different Improve the range estimation algorithm. Previously: For a given level the algo knows the number of pages in the requested range and the n With this change: Same idea, but peek a few (10) of the intermediate pages to get a better estimate of In the bug report one of the examples has a btree with a snippet of the leaf level li page1(899 records), page2(1 record), page3(1 record), page4(1 record) so when trying to estimate, the previous algo, assumed there are average (899+1)/2=45 Fix Bug#53761 RANGE estimation for matched rows may be 200 times different Improve the range estimation algorithm. Previously: For a given level the algo knows the number of pages in the requested range and the number of records on the leftmost and the rightmost page. Then it assumes all pages in between contain the average between the two border pages and multiplies this average number by the number of intermediate pages. With this change: Same idea, but peek a few (10) of the intermediate pages to get a better estimate of the average number of records per page. If there are less than 10 intermediate pages then all of them will be scanned and the result will be precise, not an estimation. In the bug report one of the examples has a btree with a snippet of the leaf level like this: page1(899 records), page2(1 record), page3(1 record), page4(1 record) so when trying to estimate, the previous algo, assumed there are average (899+1)/2=450 records per page which went terribly wrong. With this change page2 and page3 will be read and the exact number of records will be returned. Approved by: Sunny (rb://401) --- storage/innobase/btr/btr0cur.c | 176 +++++++++++++++++++++++++++-- storage/innobase/include/btr0cur.h | 5 + 2 files changed, 172 insertions(+), 9 deletions(-) diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 537d5f51184..2549589b0c7 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -3153,6 +3153,7 @@ btr_cur_add_path_info( { btr_path_t* slot; rec_t* rec; + page_t* page; ut_a(cursor->path_arr); @@ -3175,8 +3176,155 @@ btr_cur_add_path_info( slot = cursor->path_arr + (root_height - height); + page = page_align(rec); + slot->nth_rec = page_rec_get_n_recs_before(rec); - slot->n_recs = page_get_n_recs(page_align(rec)); + slot->n_recs = page_get_n_recs(page); + slot->page_no = page_get_page_no(page); + slot->page_level = btr_page_get_level_low(page); +} + +/*******************************************************************//** +Estimate the number of rows between slot1 and slot2 for any level on a +B-tree. This function starts from slot1->page and reads a few pages to +the right, counting their records. If we reach slot2->page quickly then +we know exactly how many records there are between slot1 and slot2 and +we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly +then we calculate the average number of records in the pages scanned +so far and assume that all pages that we did not scan up to slot2->page +contain the same number of records, then we multiply that average to +the number of pages between slot1->page and slot2->page (which is +n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE. +@return number of rows (exact or estimated) */ +static +ib_int64_t +btr_estimate_n_rows_in_range_on_level( +/*==================================*/ + dict_index_t* index, /*!< in: index */ + btr_path_t* slot1, /*!< in: left border */ + btr_path_t* slot2, /*!< in: right border */ + ib_int64_t n_rows_on_prev_level, /*!< in: number of rows + on the previous level for the + same descend paths; used to + determine the numbe of pages + on this level */ + ibool* is_n_rows_exact) /*!< out: TRUE if the returned + value is exact i.e. not an + estimation */ +{ + ulint space; + ib_int64_t n_rows; + ulint n_pages_read; + ulint page_no; + ulint zip_size; + ulint level; + + space = dict_index_get_space(index); + + n_rows = 0; + n_pages_read = 0; + + /* Assume by default that we will scan all pages between + slot1->page_no and slot2->page_no */ + *is_n_rows_exact = TRUE; + + /* add records from slot1->page_no which are to the right of + the record which serves as a left border of the range, if any */ + if (slot1->nth_rec < slot1->n_recs) { + n_rows += slot1->n_recs - slot1->nth_rec; + } + + /* add records from slot2->page_no which are to the left of + the record which servers as a right border of the range, if any */ + if (slot2->nth_rec > 1) { + n_rows += slot2->nth_rec - 1; + } + + /* count the records in the pages between slot1->page_no and + slot2->page_no (non inclusive), if any */ + + zip_size = fil_space_get_zip_size(space); + + /* Do not read more than this number of pages in order not to hurt + performance with this code which is just an estimation. If we read + this many pages before reaching slot2->page_no then we estimate the + average from the pages scanned so far */ + #define N_PAGES_READ_LIMIT 10 + + page_no = slot1->page_no; + level = slot1->page_level; + + do { + mtr_t mtr; + page_t* page; + buf_block_t* block; + + mtr_start(&mtr); + + /* fetch the page */ + block = buf_page_get(space, zip_size, page_no, RW_S_LATCH, + &mtr); + + page = buf_block_get_frame(block); + + /* It is possible that the tree has been reorganized in the + meantime and this is a different page. If this happens the + calculated estimate will be bogus, which is not fatal as + this is only an estimate. We are sure that a page with + page_no exists because InnoDB never frees pages, only + reuses them. */ + if (fil_page_get_type(page) != FIL_PAGE_INDEX + || btr_page_get_index_id(page) != index->id + || btr_page_get_level_low(page) != level) { + + /* The page got reused for something else */ + goto inexact; + } + + n_pages_read++; + + if (page_no != slot1->page_no) { + /* Do not count the records on slot1->page_no, + we already counted them before this loop. */ + n_rows += page_get_n_recs(page); + } + + page_no = btr_page_get_next(page, &mtr); + + mtr_commit(&mtr); + + if (n_pages_read == N_PAGES_READ_LIMIT + || page_no == FIL_NULL) { + /* Either we read too many pages or + we reached the end of the level without passing + through slot2->page_no, the tree must have changed + in the meantime */ + goto inexact; + } + + } while (page_no != slot2->page_no); + + return(n_rows); + +inexact: + + *is_n_rows_exact = FALSE; + + /* We did interrupt before reaching slot2->page */ + + if (n_pages_read > 0) { + /* The number of pages on this level is + n_rows_on_prev_level, multiply it by the + average number of recs per page so far */ + n_rows = n_rows_on_prev_level + * n_rows / n_pages_read; + } else { + /* The tree changed before we could even + start with slot1->page_no */ + n_rows = 10; + } + + return(n_rows); } /*******************************************************************//** @@ -3201,6 +3349,7 @@ btr_estimate_n_rows_in_range( ibool diverged_lot; ulint divergence_level; ib_int64_t n_rows; + ibool is_n_rows_exact; ulint i; mtr_t mtr; @@ -3243,6 +3392,7 @@ btr_estimate_n_rows_in_range( /* We have the path information for the range in path1 and path2 */ n_rows = 1; + is_n_rows_exact = TRUE; diverged = FALSE; /* This becomes true when the path is not the same any more */ diverged_lot = FALSE; /* This becomes true when the paths are @@ -3258,7 +3408,7 @@ btr_estimate_n_rows_in_range( if (slot1->nth_rec == ULINT_UNDEFINED || slot2->nth_rec == ULINT_UNDEFINED) { - if (i > divergence_level + 1) { + if (i > divergence_level + 1 && !is_n_rows_exact) { /* In trees whose height is > 1 our algorithm tends to underestimate: multiply the estimate by 2: */ @@ -3270,7 +3420,9 @@ btr_estimate_n_rows_in_range( to over 1 / 2 of the estimated rows in the whole table */ - if (n_rows > index->table->stat_n_rows / 2) { + if (n_rows > index->table->stat_n_rows / 2 + && !is_n_rows_exact) { + n_rows = index->table->stat_n_rows / 2; /* If there are just 0 or 1 rows in the table, @@ -3296,10 +3448,15 @@ btr_estimate_n_rows_in_range( divergence_level = i; } } else { - /* Maybe the tree has changed between - searches */ - - return(10); + /* It is possible that + slot1->nth_rec >= slot2->nth_rec + if, for example, we have a single page + tree which contains (inf, 5, 6, supr) + and we select where x > 20 and x < 30; + in this case slot1->nth_rec will point + to the supr record and slot2->nth_rec + will point to 6 */ + n_rows = 0; } } else if (diverged && !diverged_lot) { @@ -3323,8 +3480,9 @@ btr_estimate_n_rows_in_range( } } else if (diverged_lot) { - n_rows = (n_rows * (slot1->n_recs + slot2->n_recs)) - / 2; + n_rows = btr_estimate_n_rows_in_range_on_level( + index, slot1, slot2, n_rows, + &is_n_rows_exact); } } } diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 757477838ee..7cafa6e0df5 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -615,6 +615,11 @@ struct btr_path_struct{ order); value ULINT_UNDEFINED denotes array end */ ulint n_recs; /*!< number of records on the page */ + ulint page_no; /*!< no of the page containing the record */ + ulint page_level; /*!< level of the page, if later we fetch + the page under page_no and it is no different + level then we know that the tree has been + reorganized */ }; #define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */ From 393aaa4c515f724f110e962680db6148c0cf2a0a Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 09:17:04 +0300 Subject: [PATCH 11/18] Adjust innodb_mysql.result This is a followup to vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a which improved the sampling algorithm. I have manually checked that the new values are actually the correct ones, for example: -rows 16 +rows 32 the number of rows returned by the query is 32. --- mysql-test/suite/innodb/r/innodb_mysql.result | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result index 9a677f83080..51beed66f0b 100644 --- a/mysql-test/suite/innodb/r/innodb_mysql.result +++ b/mysql-test/suite/innodb/r/innodb_mysql.result @@ -889,13 +889,13 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a; id 1 select_type SIMPLE table t1 -type range +type index possible_keys bkey -key bkey -key_len 5 +key PRIMARY +key_len 4 ref NULL -rows 16 -Extra Using where; Using index; Using filesort +rows 32 +Extra Using where SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a; a b 1 2 @@ -934,12 +934,12 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a; id 1 select_type SIMPLE table t1 -type range +type index possible_keys bkey key bkey key_len 5 ref NULL -rows 16 +rows 32 Extra Using where; Using index SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a; a b @@ -989,7 +989,7 @@ possible_keys bkey key bkey key_len 5 ref const -rows 8 +rows 16 Extra Using where; Using index; Using filesort SELECT * FROM t2 WHERE b=1 ORDER BY a; a b c @@ -1018,7 +1018,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a; a b c @@ -1047,7 +1047,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a; a b c @@ -1076,7 +1076,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a; a b c @@ -1213,7 +1213,7 @@ possible_keys b key b key_len 5 ref const -rows 1 +rows 2 Extra Using where; Using index SELECT * FROM t1 WHERE b=2 ORDER BY a ASC; a b @@ -1228,7 +1228,7 @@ possible_keys b key b key_len 5 ref const -rows 1 +rows 2 Extra Using where; Using index SELECT * FROM t1 WHERE b=2 ORDER BY a DESC; a b @@ -1372,7 +1372,7 @@ INSERT INTO t1 (a,b,c) VALUES (1,1,1), (2,1,1), (3,1,1), (4,1,1); INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1; EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where +1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; a b c 8 1 1 @@ -1735,7 +1735,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 10 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort DROP TABLE t1; CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3)) ENGINE=InnoDB; @@ -1749,7 +1749,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 18 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort DROP TABLE t1; CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2), KEY (c3), KEY (c2, c3)) @@ -1764,7 +1764,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 14 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort DROP TABLE t1; End of 5.1 tests # @@ -1871,7 +1871,7 @@ possible_keys b key b key_len 5 ref NULL -rows 3 +rows 5 Extra Using where; Using index EXPLAIN SELECT c FROM bar WHERE c>2;; id 1 @@ -2536,7 +2536,7 @@ f1 f2 f3 f4 EXPLAIN SELECT * FROM t1 WHERE f2 = 1 AND f4 = TRUE ORDER BY f1 DESC LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range f2,f4 f4 1 NULL 11 Using where +1 SIMPLE t1 range f2,f4 f4 1 NULL 22 Using where DROP TABLE t1; # # Bug#54117 crash in thr_multi_unlock, temporary table From aed93f872745f6b46b24dc2948a13074d1f368ea Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 09:24:33 +0300 Subject: [PATCH 12/18] Adjust innodb_gis.result This is a followup to vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a which improved the sampling algorithm. --- mysql-test/suite/innodb/r/innodb_gis.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/r/innodb_gis.result b/mysql-test/suite/innodb/r/innodb_gis.result index 0ce1ebe56ad..5712d08c9fa 100644 --- a/mysql-test/suite/innodb/r/innodb_gis.result +++ b/mysql-test/suite/innodb/r/innodb_gis.result @@ -572,7 +572,7 @@ COUNT(*) EXPLAIN SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ref p p 28 const 1 Using where +1 SIMPLE t2 ref p p 28 const 2 Using where SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); COUNT(*) 2 From f0ba35c617945b0b767ce152d7ce0a91124bd13f Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 09:25:08 +0300 Subject: [PATCH 13/18] Adjust rowid_order_innodb.result This is a followup to vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a which improved the sampling algorithm. --- mysql-test/r/rowid_order_innodb.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/r/rowid_order_innodb.result b/mysql-test/r/rowid_order_innodb.result index e0796cd7ab5..dc339304041 100644 --- a/mysql-test/r/rowid_order_innodb.result +++ b/mysql-test/r/rowid_order_innodb.result @@ -15,7 +15,7 @@ insert into t1 values (-5, 1, 1), (10, 1, 1); explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 4 Using sort_union(key1,key2); Using where +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 5 Using sort_union(key1,key2); Using where select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; pk1 key1 key2 -100 1 1 From 8e168c5c27711b9ba36ce61ebfa674fc15190330 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 09:26:41 +0300 Subject: [PATCH 14/18] Adjust type_bit_innodb.result This is a followup to vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a which improved the sampling algorithm. --- mysql-test/r/type_bit_innodb.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/r/type_bit_innodb.result b/mysql-test/r/type_bit_innodb.result index a9c3cae1770..909db576b27 100644 --- a/mysql-test/r/type_bit_innodb.result +++ b/mysql-test/r/type_bit_innodb.result @@ -233,7 +233,7 @@ a+0 b+0 127 403 explain select a+0, b+0 from t1 where a > 40 and b > 200 order by 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 2 NULL 19 Using where; Using index; Using filesort +1 SIMPLE t1 range a a 2 NULL 27 Using where; Using index; Using filesort select a+0, b+0 from t1 where a > 40 and b > 200 order by 1; a+0 b+0 44 307 From 524e0dc4d54205bebf7bd5d590246c215f0fb801 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 09:34:30 +0300 Subject: [PATCH 15/18] Adjust endspace.result This is a followup to vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a which improved the sampling algorithm. The endspace test is non-deterministic because it does not include ORDER BY clause in its queries. --- mysql-test/r/endspace.result | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result index 9c8d12362c4..25e2238e7bb 100644 --- a/mysql-test/r/endspace.result +++ b/mysql-test/r/endspace.result @@ -107,8 +107,8 @@ concat('|', text1, '|') |teststring | select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; concat('|', text1, '|') -|teststring | |teststring| +|teststring | |teststring | select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; concat('|', text1, '|') @@ -203,13 +203,13 @@ teststring teststring select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%'; text1 length(text1) -teststring 11 teststring 10 +teststring 11 teststring 11 select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t'; text1 length(text1) -teststring 11 teststring 10 +teststring 11 teststring 11 select concat('|', text1, '|') from t1 order by text1; concat('|', text1, '|') From b17b122b7daa2f6fbc04ab7a32269d6f2d22cbfe Mon Sep 17 00:00:00 2001 From: Jimmy Yang Date: Tue, 17 Aug 2010 01:19:24 -0700 Subject: [PATCH 16/18] Fix bug #53496 Use Lock_time in slow query log output for InnoDB row lock wait time. Including the InnoDB lock time in the exiting "Lock_time" output. --- include/mysql/plugin.h | 1 + include/mysql/plugin.h.pp | 1 + sql/sql_class.cc | 5 +++++ sql/sql_class.h | 2 +- storage/innobase/handler/ha_innodb.cc | 14 ++++++++++++++ storage/innobase/include/ha_prototypes.h | 8 ++++++++ storage/innobase/srv/srv0srv.c | 3 +++ 7 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 19cf0ed050d..15f7d785ead 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -528,6 +528,7 @@ long long thd_test_options(const MYSQL_THD thd, long long test_options); int thd_sql_command(const MYSQL_THD thd); const char *thd_proc_info(MYSQL_THD thd, const char *info); void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton); +void thd_storage_lock_wait(MYSQL_THD thd, long long value); int thd_tx_isolation(const MYSQL_THD thd); char *thd_security_context(MYSQL_THD thd, char *buffer, unsigned int length, unsigned int max_query_len); diff --git a/include/mysql/plugin.h.pp b/include/mysql/plugin.h.pp index 3a1b03742da..9d2877be5a2 100644 --- a/include/mysql/plugin.h.pp +++ b/include/mysql/plugin.h.pp @@ -154,6 +154,7 @@ long long thd_test_options(const void* thd, long long test_options); int thd_sql_command(const void* thd); const char *thd_proc_info(void* thd, const char *info); void **thd_ha_data(const void* thd, const struct handlerton *hton); +void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); char *thd_security_context(void* thd, char *buffer, unsigned int length, unsigned int max_query_len); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1bec02afa96..28e86ecc67f 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -307,6 +307,11 @@ void **thd_ha_data(const THD *thd, const struct handlerton *hton) return (void **) &thd->ha_data[hton->slot].ha_ptr; } +extern "C" +void thd_storage_lock_wait(THD *thd, long long value) +{ + thd->utime_after_lock+= value; +} /** Provide a handler data getter to simplify coding diff --git a/sql/sql_class.h b/sql/sql_class.h index c095fee6232..b135af41af0 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1505,7 +1505,7 @@ public: // track down slow pthread_create ulonglong prior_thr_create_utime, thr_create_utime; ulonglong start_utime, utime_after_lock; - + thr_lock_type update_lock_default; Delayed_insert *di; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e78f167beb6..a004cba9603 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -807,6 +807,20 @@ thd_lock_wait_timeout( return(THDVAR((THD*) thd, lock_wait_timeout)); } +/******************************************************************//** +Set the time waited for the lock for the current query. */ +extern "C" UNIV_INTERN +void +thd_set_lock_wait_time( +/*===================*/ + void* thd, /*!< in: thread handle (THD*) */ + ulint value) /*!< in: time waited for the lock */ +{ + if (thd) { + thd_storage_lock_wait((THD*)thd, value); + } +} + /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index a9ee1d66b99..b75002944bd 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -267,5 +267,13 @@ thd_lock_wait_timeout( /*==================*/ void* thd); /*!< in: thread handle (THD*), or NULL to query the global innodb_lock_wait_timeout */ +/******************************************************************//** +Add up the time waited for the lock for the current query. */ +UNIV_INTERN +void +thd_set_lock_wait_time( +/*===================*/ + void* thd, /*!< in: thread handle (THD*) */ + ulint value); /*!< in: time waited for the lock */ #endif diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 97d699dde99..bea8d7f8fdc 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1643,6 +1643,9 @@ srv_suspend_mysql_thread( start_time != -1 && finish_time != -1) { srv_n_lock_max_wait_time = diff_time; } + + /* Record the lock wait time for this thread */ + thd_set_lock_wait_time(trx->mysql_thd, diff_time); } if (trx->was_chosen_as_deadlock_victim) { From 026d301f960e2413d615799d76a1f59552a5f1d8 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Tue, 17 Aug 2010 14:54:29 +0300 Subject: [PATCH 17/18] Make main.endspace more deterministic Followup to vasil.dimov@oracle.com-20100817063430-inglmzgdtj95t29d which didn't fully fix the test because the order of the returned rows was different in embedded and non-embedded version. So the only way to fix this is to add an ORDER BY clause. --- mysql-test/r/endspace.result | 14 +++++++------- mysql-test/t/endspace.test | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result index 25e2238e7bb..4eca88774b4 100644 --- a/mysql-test/r/endspace.result +++ b/mysql-test/r/endspace.result @@ -54,8 +54,8 @@ text1 like 'teststring_%' ORDER BY text1; text1 teststring teststring -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | |teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; @@ -105,11 +105,11 @@ select concat('|', text1, '|') from t1 where text1 like 'teststring_%'; concat('|', text1, '|') |teststring | |teststring | -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') -|teststring| +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | |teststring | +|teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; concat('|', text1, '|') |teststring| @@ -123,8 +123,8 @@ concat('|', text1, '|') drop table t1; create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | |teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t'; diff --git a/mysql-test/t/endspace.test b/mysql-test/t/endspace.test index b223c683cde..7c71b05f687 100644 --- a/mysql-test/t/endspace.test +++ b/mysql-test/t/endspace.test @@ -27,7 +27,7 @@ alter table t1 modify text1 char(32) binary not null; check table t1; select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%' ORDER BY text1; -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; select text1, length(text1) from t1 order by text1; select text1, length(text1) from t1 order by binary text1; @@ -44,14 +44,14 @@ select concat('|', text1, '|') from t1 where text1='teststring'; select concat('|', text1, '|') from t1 where text1='teststring '; explain select concat('|', text1, '|') from t1 where text1='teststring '; select concat('|', text1, '|') from t1 where text1 like 'teststring_%'; -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; select concat('|', text1, '|') from t1 order by text1; drop table t1; create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t'; drop table t1; From 085bb22ab275ee0b4733764f51feb986c0cac63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 17 Aug 2010 15:07:54 +0300 Subject: [PATCH 18/18] A non-functional change: dict_load_index_low(): Rename the parameter "cached" to "allocated" and clarify the comments. --- storage/innobase/dict/dict0load.c | 26 +++++++++++++------------- storage/innobase/include/dict0load.h | 16 ++++++++++------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 20a18c72a39..6bd15f0556a 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -1175,23 +1175,23 @@ static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch"; /********************************************************************//** Loads an index definition from a SYS_INDEXES record to dict_index_t. -If "cached" is set to "TRUE", we will create a dict_index_t structure -and fill it accordingly. Otherwise, the dict_index_t will -be supplied by the caller and filled with information read from -the record. -@return error message, or NULL on success */ +If allocate=TRUE, we will create a dict_index_t structure and fill it +accordingly. If allocated=FALSE, the dict_index_t will be supplied by +the caller and filled with information read from the record. @return +error message, or NULL on success */ UNIV_INTERN const char* dict_load_index_low( /*================*/ byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if cached=TRUE - and "out" when cached=FALSE */ + an "in" value if allocate=TRUE + and "out" when allocate=FALSE */ const char* table_name, /*!< in: table name */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool cached, /*!< in: TRUE = add to cache, - FALSE = do not */ + ibool allocate, /*!< in: TRUE=allocate *index, + FALSE=fill in a pre-allocated + *index */ dict_index_t** index) /*!< out,own: index, or NULL */ { const byte* field; @@ -1203,8 +1203,8 @@ dict_load_index_low( ulint type; ulint space; - if (cached) { - /* If "cached" is set to TRUE, no dict_index_t will + if (allocate) { + /* If allocate=TRUE, no dict_index_t will be supplied. Initialize "*index" to NULL */ *index = NULL; } @@ -1223,7 +1223,7 @@ err_len: return("incorrect column length in SYS_INDEXES"); } - if (!cached) { + if (!allocate) { /* We are reading a SYS_INDEXES record. Copy the table_id */ memcpy(table_id, (const char*)field, 8); } else if (memcmp(field, table_id, 8)) { @@ -1279,7 +1279,7 @@ err_len: goto err_len; } - if (cached) { + if (allocate) { *index = dict_mem_index_create(table_name, name_buf, space, type, n_fields); } else { diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index 6a718a464ab..05d3532d59a 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -116,19 +116,23 @@ dict_load_column_low( const rec_t* rec); /*!< in: SYS_COLUMNS record */ /********************************************************************//** Loads an index definition from a SYS_INDEXES record to dict_index_t. -@return error message, or NULL on success */ +If allocate=TRUE, we will create a dict_index_t structure and fill it +accordingly. If allocated=FALSE, the dict_index_t will be supplied by +the caller and filled with information read from the record. @return +error message, or NULL on success */ UNIV_INTERN const char* dict_load_index_low( /*================*/ - byte* table_id, /*!< in/out: table id (8 bytes_, - an "in" value if cached=TRUE - and "out" when cached=FALSE */ + byte* table_id, /*!< in/out: table id (8 bytes), + an "in" value if allocate=TRUE + and "out" when allocate=FALSE */ const char* table_name, /*!< in: table name */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool cached, /*!< in: TRUE = add to cache - FALSE = do not */ + ibool allocate, /*!< in: TRUE=allocate *index, + FALSE=fill in a pre-allocated + *index */ dict_index_t** index); /*!< out,own: index, or NULL */ /********************************************************************//** Loads an index field definition from a SYS_FIELDS record to