From 360a4a037271d65ab6471f7ab3f9b6a893d90a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 16 May 2017 14:16:11 +0300 Subject: [PATCH] 5.6.36-82.0 --- storage/xtradb/btr/btr0sea.cc | 2 +- storage/xtradb/buf/buf0buf.cc | 2 +- storage/xtradb/buf/buf0dblwr.cc | 4 +- storage/xtradb/buf/buf0dump.cc | 4 +- storage/xtradb/buf/buf0flu.cc | 38 ++- storage/xtradb/dict/dict0boot.cc | 7 +- storage/xtradb/dict/dict0dict.cc | 21 +- storage/xtradb/dict/dict0stats.cc | 15 +- storage/xtradb/dict/dict0stats_bg.cc | 4 +- storage/xtradb/fil/fil0fil.cc | 24 +- storage/xtradb/fts/fts0que.cc | 20 ++ storage/xtradb/handler/ha_innodb.cc | 181 +++++++----- storage/xtradb/handler/ha_innodb.h | 6 +- storage/xtradb/handler/handler0alter.cc | 6 +- storage/xtradb/include/buf0dblwr.h | 4 +- storage/xtradb/include/dict0dict.h | 11 +- storage/xtradb/include/dict0dict.ic | 9 +- storage/xtradb/include/fil0fil.h | 14 +- storage/xtradb/include/ha0ha.h | 6 +- storage/xtradb/include/log0online.h | 2 +- storage/xtradb/include/os0file.h | 335 ++++++++++++++++++---- storage/xtradb/include/os0file.ic | 215 ++++++++++++-- storage/xtradb/include/row0mysql.h | 18 +- storage/xtradb/include/srv0srv.h | 17 +- storage/xtradb/include/srv0start.h | 4 +- storage/xtradb/include/trx0xa.h | 15 +- storage/xtradb/include/univ.i | 12 +- storage/xtradb/log/log0log.cc | 19 +- storage/xtradb/log/log0online.cc | 62 ++-- storage/xtradb/log/log0recv.cc | 4 +- storage/xtradb/os/os0file.cc | 240 ++++++++++------ storage/xtradb/pars/pars0opt.cc | 6 +- storage/xtradb/pars/pars0pars.cc | 3 +- storage/xtradb/row/row0log.cc | 17 +- storage/xtradb/row/row0merge.cc | 35 ++- storage/xtradb/row/row0sel.cc | 361 ++++++++++++++++++++---- storage/xtradb/srv/srv0srv.cc | 41 ++- storage/xtradb/srv/srv0start.cc | 66 +++-- storage/xtradb/trx/trx0purge.cc | 5 +- storage/xtradb/trx/trx0rec.cc | 3 +- storage/xtradb/trx/trx0roll.cc | 4 +- storage/xtradb/trx/trx0sys.cc | 18 +- 42 files changed, 1420 insertions(+), 460 deletions(-) diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc index 78cb9dfd6e0..2ed383b0dcb 100644 --- a/storage/xtradb/btr/btr0sea.cc +++ b/storage/xtradb/btr/btr0sea.cc @@ -199,7 +199,7 @@ btr_search_sys_create( &btr_search_latch_arr[i], SYNC_SEARCH_SYS); btr_search_sys->hash_tables[i] - = ha_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); + = ib_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG btr_search_sys->hash_tables[i]->adaptive = TRUE; diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 978d94f07ec..b946b1dd4e4 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -1413,7 +1413,7 @@ buf_pool_init_instance( ut_a(srv_n_page_hash_locks != 0); ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS); - buf_pool->page_hash = ha_create(2 * buf_pool->curr_size, + buf_pool->page_hash = ib_create(2 * buf_pool->curr_size, srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH, SYNC_BUF_PAGE_HASH); diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 3c12d6da73f..46296814bb6 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -355,7 +355,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages) { diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index 88ff4399115..8ecdedc613b 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -597,6 +597,7 @@ DECLARE_THREAD(buf_dump_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); srv_buf_dump_thread_active = TRUE; @@ -632,6 +633,7 @@ DECLARE_THREAD(buf_dump_thread)( srv_buf_dump_thread_active = FALSE; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 5dd2efcf0c3..36e24fb3a1b 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -356,6 +356,7 @@ buf_flush_insert_into_flush_list( buf_block_t* block, /*!< in/out: block which is modified */ lsn_t lsn) /*!< in: oldest modification */ { + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); @@ -414,6 +415,7 @@ buf_flush_insert_sorted_into_flush_list( buf_page_t* prev_b; buf_page_t* b; + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); @@ -724,6 +726,7 @@ buf_flush_write_complete( buf_page_set_io_fix(bpage, BUF_IO_NONE); buf_pool->n_flush[flush_type]--; + ut_ad(buf_pool->n_flush[flush_type] != ULINT_MAX); /* fprintf(stderr, "n pending flush %lu\n", buf_pool->n_flush[flush_type]); */ @@ -1061,6 +1064,7 @@ buf_flush_page( } ++buf_pool->n_flush[flush_type]; + ut_ad(buf_pool->n_flush[flush_type] != 0); mutex_exit(&buf_pool->flush_state_mutex); @@ -2206,13 +2210,14 @@ Clears up tail of the LRU lists: * Flush dirty pages at the tail of LRU to the disk The depth to which we scan each buffer pool is controlled by dynamic config parameter innodb_LRU_scan_depth. -@return number of pages flushed */ +@return number of flushed and evicted pages */ UNIV_INTERN ulint buf_flush_LRU_tail(void) /*====================*/ { ulint total_flushed = 0; + ulint total_evicted = 0; ulint start_time = ut_time_ms(); ulint scan_depth[MAX_BUFFER_POOLS]; ulint requested_pages[MAX_BUFFER_POOLS]; @@ -2278,6 +2283,7 @@ buf_flush_LRU_tail(void) limited_scan[i] = (previous_evicted[i] > n.evicted); previous_evicted[i] = n.evicted; + total_evicted += n.evicted; requested_pages[i] += lru_chunk_size; @@ -2310,7 +2316,7 @@ buf_flush_LRU_tail(void) MONITOR_LRU_BATCH_PAGES, total_flushed); } - return(total_flushed); + return(total_flushed + total_evicted); } /*********************************************************************//** @@ -2604,6 +2610,23 @@ buf_get_total_free_list_length(void) return result; } +/** Returns the aggregate LRU list length over all buffer pool instances. +@return total LRU list length. */ +MY_ATTRIBUTE((warn_unused_result)) +static +ulint +buf_get_total_LRU_list_length(void) +{ + ulint result = 0; + + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + + result += UT_LIST_GET_LEN(buf_pool_from_array(i)->LRU); + } + + return result; +} + /*********************************************************************//** Adjust the desired page cleaner thread sleep time for LRU flushes. */ MY_ATTRIBUTE((nonnull)) @@ -2616,8 +2639,9 @@ page_cleaner_adapt_lru_sleep_time( ulint lru_n_flushed) /*!< in: number of flushed in previous batch */ { - ulint free_len = buf_get_total_free_list_length(); - ulint max_free_len = srv_LRU_scan_depth * srv_buf_pool_instances; + ulint free_len = buf_get_total_free_list_length(); + ulint max_free_len = ut_min(buf_get_total_LRU_list_length(), + srv_LRU_scan_depth * srv_buf_pool_instances); if (free_len < max_free_len / 100 && lru_n_flushed) { @@ -2629,7 +2653,7 @@ page_cleaner_adapt_lru_sleep_time( /* Free lists filled more than 20% or no pages flushed in previous batch, sleep a bit more */ - *lru_sleep_time += 50; + *lru_sleep_time += 1; if (*lru_sleep_time > srv_cleaner_max_lru_time) *lru_sleep_time = srv_cleaner_max_lru_time; } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) { @@ -2676,6 +2700,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ulint next_loop_time = ut_time_ms() + 1000; ulint n_flushed = 0; ulint last_activity = srv_get_activity_count(); @@ -2812,6 +2837,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( thread_exit: buf_page_cleaner_is_active = FALSE; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc index c0bb0298bea..4c8420252c2 100644 --- a/storage/xtradb/dict/dict0boot.cc +++ b/storage/xtradb/dict/dict0boot.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -464,7 +464,10 @@ dict_boot(void) dberr_t err = DB_SUCCESS; - if (srv_read_only_mode && !ibuf_is_empty()) { + /** If innodb_force_recovery is set to 6 then allow + the server to start even though ibuf is not empty. */ + if (srv_force_recovery != SRV_FORCE_NO_LOG_REDO + && srv_read_only_mode && !ibuf_is_empty()) { ib_logf(IB_LOG_LEVEL_ERROR, "Change buffer must be empty when --innodb-read-only " diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index bd62744a49c..eec681c4005 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -835,16 +835,24 @@ dict_index_get_nth_col_or_prefix_pos( /*=================================*/ const dict_index_t* index, /*!< in: index */ ulint n, /*!< in: column number */ - ibool inc_prefix) /*!< in: TRUE=consider + ibool inc_prefix, /*!< in: TRUE=consider column prefixes too */ + ulint* prefix_col_pos) /*!< out: col num if prefix */ { const dict_field_t* field; const dict_col_t* col; ulint pos; ulint n_fields; + ulint prefixed_pos_dummy; ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad((inc_prefix && !prefix_col_pos) || (!inc_prefix)); + + if (!prefix_col_pos) { + prefix_col_pos = &prefixed_pos_dummy; + } + *prefix_col_pos = ULINT_UNDEFINED; col = dict_table_get_nth_col(index->table, n); @@ -858,10 +866,11 @@ dict_index_get_nth_col_or_prefix_pos( for (pos = 0; pos < n_fields; pos++) { field = dict_index_get_nth_field(index, pos); - if (col == field->col - && (inc_prefix || field->prefix_len == 0)) { - - return(pos); + if (col == field->col) { + *prefix_col_pos = pos; + if (inc_prefix || field->prefix_len == 0) { + return(pos); + } } } @@ -1005,7 +1014,7 @@ dict_table_get_nth_col_pos( ulint n) /*!< in: column number */ { return(dict_index_get_nth_col_pos(dict_table_get_first_index(table), - n)); + n, NULL)); } /********************************************************************//** diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index b0ba98308be..55a26268579 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2009, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -690,6 +690,9 @@ dict_stats_copy( && (src_idx = dict_table_get_next_index(src_idx)))) { if (dict_stats_should_ignore_index(dst_idx)) { + if (!(dst_idx->type & DICT_FTS)) { + dict_stats_empty_index(dst_idx); + } continue; } @@ -1096,10 +1099,10 @@ dict_stats_analyze_index_level( leaf-level delete marks because delete marks on non-leaf level do not make sense. */ - if (level == 0 && srv_stats_include_delete_marked? 0: + if (level == 0 && (srv_stats_include_delete_marked ? 0: rec_get_deleted_flag( rec, - page_is_comp(btr_pcur_get_page(&pcur)))) { + page_is_comp(btr_pcur_get_page(&pcur))))) { if (rec_is_last_on_page && !prev_rec_is_copied @@ -3229,12 +3232,6 @@ dict_stats_update( dict_table_stats_lock(table, RW_X_LATCH); - /* Initialize all stats to dummy values before - copying because dict_stats_table_clone_create() does - skip corrupted indexes so our dummy object 't' may - have less indexes than the real object 'table'. */ - dict_stats_empty_table(table); - dict_stats_copy(table, t); dict_stats_assert_initialized(table); diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc index 6f01c379776..975c8a50803 100644 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -334,6 +334,7 @@ DECLARE_THREAD(dict_stats_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_a(!srv_read_only_mode); srv_dict_stats_thread_active = TRUE; @@ -359,6 +360,7 @@ DECLARE_THREAD(dict_stats_thread)( srv_dict_stats_thread_active = FALSE; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit instead of return(). */ os_thread_exit(NULL); diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 57e415ae939..d701bddebfc 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -146,7 +146,7 @@ struct fil_node_t { belongs */ char* name; /*!< path to the file */ ibool open; /*!< TRUE if file open */ - os_file_t handle; /*!< OS handle to the file, if file open */ + pfs_os_file_t handle; /*!< OS handle to the file, if file open */ os_event_t sync_event;/*!< Condition event to group and serialize calls to fsync */ ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw @@ -317,7 +317,8 @@ initialized. */ static fil_system_t* fil_system = NULL; /** Determine if (i) is a user tablespace id or not. */ -# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open) +# define fil_is_user_tablespace_id(i) (i != 0 \ + && !srv_is_undo_tablespace(i)) /** Determine if user has explicitly disabled fsync(). */ #ifndef __WIN__ @@ -2084,7 +2085,7 @@ UNIV_INTERN const char* fil_read_first_page( /*================*/ - os_file_t data_file, /*!< in: open data file */ + pfs_os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ @@ -3407,7 +3408,7 @@ fil_open_linked_file( /*===============*/ const char* tablename, /*!< in: database/tablename */ char** remote_filepath,/*!< out: remote filepath */ - os_file_t* remote_file) /*!< out: remote file handle */ + pfs_os_file_t* remote_file) /*!< out: remote file handle */ { ibool success; @@ -3467,7 +3468,8 @@ fil_create_new_single_table_tablespace( tablespace file in pages, must be >= FIL_IBD_FILE_INITIAL_SIZE */ { - os_file_t file; + pfs_os_file_t file; + ibool ret; dberr_t err; byte* buf2; @@ -5206,8 +5208,8 @@ retry: os_offset_t end_offset = (size_after_extend - file_start_page_no) * page_size; - success = (posix_fallocate(node->handle, start_offset, - end_offset) == 0); + success = (os_file_allocate(node->handle, start_offset, + end_offset) == 0); if (!success) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -5960,7 +5962,7 @@ fil_flush( { fil_space_t* space; fil_node_t* node; - os_file_t file; + pfs_os_file_t file; mutex_enter(&fil_system->mutex); @@ -6319,7 +6321,7 @@ fil_buf_block_init( } struct fil_iterator_t { - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ const char* filepath; /*!< File path name */ os_offset_t start; /*!< From where to start */ os_offset_t end; /*!< Where to stop */ @@ -6454,7 +6456,7 @@ fil_tablespace_iterate( PageCallback& callback) { dberr_t err; - os_file_t file; + pfs_os_file_t file; char* filepath; ut_a(n_io_buffers > 0); diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 2c44a21a8f2..d926d9b0675 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -953,6 +953,18 @@ fts_query_free_doc_ids( query->total_size -= SIZEOF_RBT_CREATE; } +/** +Free the query intersection +@param[in] query query instance */ +static +void +fts_query_free_intersection( + fts_query_t* query) +{ + fts_query_free_doc_ids(query, query->intersection); + query->intersection = NULL; +} + /*******************************************************************//** Add the word to the documents "list" of matching words from the query. We make a copy of the word from the query heap. */ @@ -1311,6 +1323,7 @@ fts_query_intersect( /* error is passed by 'query->error' */ if (query->error != DB_SUCCESS) { ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + fts_query_free_intersection(query); return(query->error); } @@ -1339,6 +1352,8 @@ fts_query_intersect( ut_a(!query->multi_exist || (query->multi_exist && rbt_size(query->doc_ids) <= n_doc_ids)); + } else if (query->intersection != NULL) { + fts_query_free_intersection(query); } } @@ -1557,6 +1572,11 @@ fts_merge_doc_ids( query, ranking->doc_id, ranking->rank); if (query->error != DB_SUCCESS) { + if (query->intersection != NULL) + { + ut_a(query->oper == FTS_EXIST); + fts_query_free_intersection(query); + } DBUG_RETURN(query->error); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 05a77436a12..d1f6c3b499a 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2012, Facebook Inc. @@ -701,6 +701,32 @@ innobase_is_fake_change( THD* thd); /*!< in: MySQL thread handle of the user for whom the transaction is being committed */ +/** Empty free list algorithm. +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@param[in] algorithm desired algorithm from srv_empty_free_list_t +@return true if it's possible to enable backoff. */ +static inline +bool +innodb_empty_free_list_algorithm_allowed( + srv_empty_free_list_t algorithm) +{ + long long buf_pool_pages = srv_buf_pool_size / srv_page_size + / srv_buf_pool_instances; + + return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) + || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); +} + /** Get the list of foreign keys referencing a specified table table. @param thd The thread handle @@ -970,6 +996,10 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG}, {"x_lock_spin_waits", (char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG}, + {"secondary_index_triggered_cluster_reads", + (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG}, + {"secondary_index_triggered_cluster_reads_avoided", + (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG}, {NullS, NullS, SHOW_LONG} }; @@ -1384,28 +1414,6 @@ innobase_drop_zip_dict( ulint* name_len); /*!< in/out: zip dictionary name length */ -/*************************************************************//** -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t - algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages); /*!< in: total number - of pages inside buffer pool */ - /*************************************************************//** Removes old archived transaction log files. @return true on error */ @@ -3446,7 +3454,8 @@ innobase_init( innobase_hton->flush_logs = innobase_flush_logs; innobase_hton->show_status = innobase_show_status; innobase_hton->flags = HTON_SUPPORTS_EXTENDED_KEYS | - HTON_SUPPORTS_ONLINE_BACKUPS | HTON_SUPPORTS_FOREIGN_KEYS; + HTON_SUPPORTS_ONLINE_BACKUPS | HTON_SUPPORTS_FOREIGN_KEYS | + HTON_SUPPORTS_COMPRESSED_COLUMNS; innobase_hton->release_temporary_latches = innobase_release_temporary_latches; @@ -3915,10 +3924,9 @@ innobase_change_buffering_inited_ok: innobase_commit_concurrency_init_default(); /* Do not enable backoff algorithm for small buffer pool. */ - if (!innodb_empty_free_list_algorithm_backoff_allowed( + if (!innodb_empty_free_list_algorithm_allowed( static_cast( - srv_empty_free_list_algorithm), - innobase_buffer_pool_size / srv_page_size)) { + srv_empty_free_list_algorithm))) { sql_print_information( "InnoDB: innodb_empty_free_list_algorithm " "has been changed to legacy " @@ -4160,6 +4168,10 @@ innobase_create_zip_dict( if (UNIV_UNLIKELY(high_level_read_only)) { DBUG_RETURN(HA_CREATE_ZIP_DICT_READ_ONLY); } + + if (UNIV_UNLIKELY(THDVAR(NULL, fake_changes))) { + DBUG_RETURN(HA_CREATE_ZIP_DICT_FAKE_CHANGES); + } if (UNIV_UNLIKELY(*name_len > ZIP_DICT_MAX_NAME_LENGTH)) { *name_len = ZIP_DICT_MAX_NAME_LENGTH; @@ -4178,6 +4190,15 @@ innobase_create_zip_dict( case DB_DUPLICATE_KEY: result = HA_CREATE_ZIP_DICT_ALREADY_EXISTS; break; + case DB_OUT_OF_MEMORY: + result = HA_CREATE_ZIP_DICT_OUT_OF_MEMORY; + break; + case DB_OUT_OF_FILE_SPACE: + result = HA_CREATE_ZIP_DICT_OUT_OF_FILE_SPACE; + break; + case DB_TOO_MANY_CONCURRENT_TRXS: + result = HA_CREATE_ZIP_DICT_TOO_MANY_CONCURRENT_TRXS; + break; default: ut_ad(0); result = HA_CREATE_ZIP_DICT_UNKNOWN_ERROR; @@ -4204,6 +4225,10 @@ innobase_drop_zip_dict( DBUG_RETURN(HA_DROP_ZIP_DICT_READ_ONLY); } + if (UNIV_UNLIKELY(THDVAR(NULL, fake_changes))) { + DBUG_RETURN(HA_DROP_ZIP_DICT_FAKE_CHANGES); + } + switch (dict_drop_zip_dict(name, *name_len)) { case DB_SUCCESS: result = HA_DROP_ZIP_DICT_OK; @@ -4214,6 +4239,15 @@ innobase_drop_zip_dict( case DB_ROW_IS_REFERENCED: result = HA_DROP_ZIP_DICT_IS_REFERENCED; break; + case DB_OUT_OF_MEMORY: + result = HA_DROP_ZIP_DICT_OUT_OF_MEMORY; + break; + case DB_OUT_OF_FILE_SPACE: + result = HA_DROP_ZIP_DICT_OUT_OF_FILE_SPACE; + break; + case DB_TOO_MANY_CONCURRENT_TRXS: + result = HA_DROP_ZIP_DICT_TOO_MANY_CONCURRENT_TRXS; + break; default: ut_ad(0); result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR; @@ -6032,6 +6066,8 @@ table_opened: prebuilt->default_rec = table->s->default_values; ut_ad(prebuilt->default_rec); + prebuilt->mysql_handler = this; + /* Looks like MySQL-3.23 sometimes has primary key number != 0 */ primary_key = table->s->primary_key; key_used_on_scan = primary_key; @@ -7181,9 +7217,31 @@ build_template_field( ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED); if (dict_index_is_clust(index)) { + templ->rec_field_is_prefix = false; templ->rec_field_no = templ->clust_rec_field_no; + templ->rec_prefix_field_no = ULINT_UNDEFINED; } else { - templ->rec_field_no = dict_index_get_nth_col_pos(index, i); + /* If we're in a secondary index, keep track of the original + index position even if this is just a prefix index; we will use + this later to avoid a cluster index lookup in some cases.*/ + + templ->rec_field_no = dict_index_get_nth_col_pos(index, i, + &templ->rec_prefix_field_no); + templ->rec_field_is_prefix + = (templ->rec_field_no == ULINT_UNDEFINED) + && (templ->rec_prefix_field_no != ULINT_UNDEFINED); +#ifdef UNIV_DEBUG + if (templ->rec_prefix_field_no != ULINT_UNDEFINED) + { + const dict_field_t* field = dict_index_get_nth_field( + index, + templ->rec_prefix_field_no); + ut_ad(templ->rec_field_is_prefix + == (field->prefix_len != 0)); + } else { + ut_ad(!templ->rec_field_is_prefix); + } +#endif } if (field->real_maybe_null()) { @@ -7373,7 +7431,8 @@ ha_innobase::build_template( } else { templ->icp_rec_field_no = dict_index_get_nth_col_pos( - prebuilt->index, i); + prebuilt->index, i, + NULL); } if (dict_index_is_clust(prebuilt->index)) { @@ -7403,7 +7462,7 @@ ha_innobase::build_template( templ->icp_rec_field_no = dict_index_get_nth_col_or_prefix_pos( - prebuilt->index, i, TRUE); + prebuilt->index, i, TRUE, NULL); ut_ad(templ->icp_rec_field_no != ULINT_UNDEFINED); @@ -11288,7 +11347,8 @@ ha_innobase::delete_table( extension, in contrast to ::create */ normalize_table_name(norm_name, name); - if (srv_read_only_mode) { + if (srv_read_only_mode + || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (row_is_magic_monitor_table(norm_name) && check_global_access(thd, PROCESS_ACL)) { @@ -15301,16 +15361,21 @@ and SYS_ZIP_DICT_COLS for all columns marked with COLUMN_FORMAT_TYPE_COMPRESSED flag and updates zip_dict_name / zip_dict_data for those which have associated compression dictionaries. + +@param part_name Full table name (including partition part). + Must be non-NULL only if called from ha_partition. */ UNIV_INTERN void -ha_innobase::update_field_defs_with_zip_dict_info() +ha_innobase::update_field_defs_with_zip_dict_info(const char *part_name) { DBUG_ENTER("update_field_defs_with_zip_dict_info"); ut_ad(!mutex_own(&dict_sys->mutex)); char norm_name[FN_REFLEN]; - normalize_table_name(norm_name, table_share->normalized_path.str); + normalize_table_name(norm_name, + part_name != 0 ? part_name : + table_share->normalized_path.str); dict_table_t* ib_table = dict_table_open_on_name( norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); @@ -16687,15 +16752,17 @@ innodb_buffer_pool_evict_uncompressed(void) ut_ad(block->page.in_LRU_list); mutex_enter(&block->mutex); - if (!buf_LRU_free_page(&block->page, false)) { - mutex_exit(&block->mutex); - all_evicted = false; - } else { - mutex_exit(&block->mutex); - mutex_enter(&buf_pool->LRU_list_mutex); - } + all_evicted = buf_LRU_free_page(&block->page, false); + mutex_exit(&block->mutex); - block = prev_block; + if (all_evicted) { + + mutex_enter(&buf_pool->LRU_list_mutex); + block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); + } else { + + block = prev_block; + } } mutex_exit(&buf_pool->LRU_list_mutex); @@ -17497,32 +17564,6 @@ innodb_status_output_update( os_event_set(lock_sys->timeout_event); } -/*************************************************************//** -Empty free list algorithm. -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages) /*!< in: total number - of pages inside buffer pool */ -{ - return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) - || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); -} - /*************************************************************//** Empty free list algorithm. This function is registered as a callback with MySQL. @@ -17564,13 +17605,11 @@ innodb_srv_empty_free_list_algorithm_validate( return(1); algorithm = static_cast(algo); - if (!innodb_empty_free_list_algorithm_backoff_allowed( - algorithm, - innobase_buffer_pool_size / srv_page_size)) { + if (!innodb_empty_free_list_algorithm_allowed(algorithm)) { sql_print_warning( "InnoDB: innodb_empty_free_list_algorithm " "= 'backoff' requires at least" - " 20MB buffer pool.\n"); + " 20MB buffer pool instances.\n"); return(1); } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 39de6b179ab..99953a22121 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -295,8 +295,12 @@ class ha_innobase: public handler COLUMN_FORMAT_TYPE_COMPRESSED flag and updates zip_dict_name / zip_dict_data for those which have associated compression dictionaries. + + @param part_name Full table name (including partition part). + Must be non-NULL only if called from ha_partition. */ - virtual void update_field_defs_with_zip_dict_info(); + virtual void update_field_defs_with_zip_dict_info( + const char* part_name); private: /** Builds a 'template' to the prebuilt struct. diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 098d93d4529..d6576fe27d5 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1148,7 +1148,8 @@ innobase_rec_to_mysql( field->reset(); - ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE); + ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE, + NULL); if (ipos == ULINT_UNDEFINED || rec_offs_nth_extern(offsets, ipos)) { @@ -1196,7 +1197,8 @@ innobase_fields_to_mysql( field->reset(); - ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE); + ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE, + NULL); if (ipos == ULINT_UNDEFINED || dfield_is_ext(&fields[ipos]) diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h index a62a6400d97..18e124b7437 100644 --- a/storage/xtradb/include/buf0dblwr.h +++ b/storage/xtradb/include/buf0dblwr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,7 +55,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages); diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 99d745eb49f..7ca2f45cb80 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1142,8 +1142,9 @@ ulint dict_index_get_nth_col_pos( /*=======================*/ const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + ulint n, /*!< in: column number */ + ulint* prefix_col_pos) /*!< out: col num if prefix */ + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Looks for column n in an index. @return position in internal representation of the index; @@ -1154,9 +1155,11 @@ dict_index_get_nth_col_or_prefix_pos( /*=================================*/ const dict_index_t* index, /*!< in: index */ ulint n, /*!< in: column number */ - ibool inc_prefix) /*!< in: TRUE=consider + ibool inc_prefix, /*!< in: TRUE=consider column prefixes too */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + ulint* prefix_col_pos) /*!< out: col num if prefix */ + + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index 58a9ef4d65d..32bc4f376d9 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -1049,7 +1049,8 @@ dict_index_get_sys_col_pos( } return(dict_index_get_nth_col_pos( - index, dict_table_get_sys_col_no(index->table, type))); + index, dict_table_get_sys_col_no(index->table, type), + NULL)); } /*********************************************************************//** @@ -1101,9 +1102,11 @@ ulint dict_index_get_nth_col_pos( /*=======================*/ const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ + ulint n, /*!< in: column number */ + ulint* prefix_col_pos) /*!< out: col num if prefix */ { - return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE)); + return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE, + prefix_col_pos)); } #ifndef UNIV_HOTBACKUP diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 14dfa296435..78222b3122a 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -182,7 +182,7 @@ struct fsp_open_info { ibool success; /*!< Has the tablespace been opened? */ const char* check_msg; /*!< fil_check_first_page() message */ ibool valid; /*!< Is the tablespace valid? */ - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ char* filepath; /*!< File path to open */ lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ @@ -390,7 +390,7 @@ UNIV_INTERN const char* fil_read_first_page( /*================*/ - os_file_t data_file, /*!< in: open data file */ + pfs_os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ @@ -906,12 +906,12 @@ struct PageCallback { Called for every page in the tablespace. If the page was not updated then its state must be set to BUF_PAGE_NOT_USED. For compressed tables the page descriptor memory will be at offset: - block->frame + UNIV_PAGE_SIZE; + block->frame + UNIV_PAGE_SIZE; @param offset - physical offset within the file @param block - block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ virtual dberr_t operator()( - os_offset_t offset, + os_offset_t offset, buf_block_t* block) UNIV_NOTHROW = 0; /** @@ -919,7 +919,7 @@ struct PageCallback { to open it for the file that is being iterated over. @param filename - then physical name of the tablespace file. @param file - OS file handle */ - void set_file(const char* filename, os_file_t file) UNIV_NOTHROW + void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW { m_file = file; m_filepath = filename; @@ -955,7 +955,7 @@ struct PageCallback { ulint m_page_size; /** File handle to the tablespace */ - os_file_t m_file; + pfs_os_file_t m_file; /** Physical file path. */ const char* m_filepath; diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h index 7351b407e8c..58eb581e76a 100644 --- a/storage/xtradb/include/ha0ha.h +++ b/storage/xtradb/include/ha0ha.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -107,7 +107,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) #else /* UNIV_SYNC_DEBUG */ /** Creates a hash table. @return own: created table @@ -116,7 +116,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) #endif /* UNIV_SYNC_DEBUG */ /*************************************************************//** diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 722336dd6b4..5c3e7d07fd9 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -130,7 +130,7 @@ log_online_bitmap_iterator_next( /** Struct for single bitmap file information */ struct log_online_bitmap_file_struct { char name[FN_REFLEN]; /*!< Name with full path */ - os_file_t file; /*!< Handle to opened file */ + pfs_os_file_t file; /*!< Handle to opened file */ ib_uint64_t size; /*!< Size of the file */ os_offset_t offset; /*!< Offset of the next read, or count of already-read bytes diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index cf8935ff7d2..4dab704ad50 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -78,7 +78,6 @@ typedef ib_uint64_t os_offset_t; #define SRV_PATH_SEPARATOR '\\' /** File handle */ # define os_file_t HANDLE -# define os_file_invalid INVALID_HANDLE_VALUE /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ @@ -87,13 +86,22 @@ typedef ib_uint64_t os_offset_t; #define SRV_PATH_SEPARATOR '/' /** File handle */ typedef int os_file_t; -# define os_file_invalid (-1) /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ # define OS_FILE_FROM_FD(fd) fd #endif +/*Common file descriptor for file IO instrumentation with PFS +on windows and other platforms */ +struct pfs_os_file_t +{ + os_file_t m_file; +#ifdef UNIV_PFS_IO + struct PSI_file *m_psi; +#endif +}; + /** Umask for creating files */ extern ulint os_innodb_umask; @@ -129,6 +137,21 @@ enum os_file_create_t { ON_ERROR_NO_EXIT is set */ }; +/** Options for os_file_advise_func @{ */ +enum os_file_advise_t { + OS_FILE_ADVISE_NORMAL = 1, /*!< no advice on access pattern + (default) */ + OS_FILE_ADVISE_RANDOM = 2, /*!< access in random order */ + OS_FILE_ADVISE_SEQUENTIAL = 4, /*!< access the specified data + sequentially (with lower offsets read + before higher ones) */ + OS_FILE_ADVISE_WILLNEED = 8, /*!< specified data will be accessed + in the near future */ + OS_FILE_ADVISE_DONTNEED = 16, /*!< specified data will not be + accessed in the near future */ + OS_FILE_ADVISE_NOREUSE = 32 /*!< access only once */ +}; + #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 #define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ @@ -230,6 +253,8 @@ extern mysql_pfs_key_t innodb_file_bmp_key; various file I/O operations with performance schema. 1) register_pfs_file_open_begin() and register_pfs_file_open_end() are used to register file creation, opening, closing and renaming. +2) register_pfs_file_rename_begin() and register_pfs_file_rename_end() +are used to register file renaming 2) register_pfs_file_io_begin() and register_pfs_file_io_end() are used to register actual file read, write and flush 3) register_pfs_file_close_begin() and register_pfs_file_close_end() @@ -239,17 +264,30 @@ are used to register file deletion operations*/ do { \ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \ state, key, op, name, &locker); \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_open_wait)( \ locker, src_file, src_line); \ } \ } while (0) -# define register_pfs_file_open_end(locker, file) \ +# define register_pfs_file_open_end(locker, file, result) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ - PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\ - locker, file); \ + if (locker != NULL) { \ + file.m_psi = PSI_FILE_CALL( \ + end_file_open_wait)( \ + locker, result); \ + } \ +} while (0) + +# define register_pfs_file_rename_begin(state, locker, key, op, name, \ + src_file, src_line) \ + register_pfs_file_open_begin(state, locker, key, op, name, \ + src_file, src_line) \ + +# define register_pfs_file_rename_end(locker, result) \ +do { \ + if (locker != NULL) { \ + PSI_FILE_CALL(end_file_open_wait)(locker, result); \ } \ } while (0) @@ -275,9 +313,9 @@ do { \ # define register_pfs_file_io_begin(state, locker, file, count, op, \ src_file, src_line) \ do { \ - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( \ - state, file, op); \ - if (UNIV_LIKELY(locker != NULL)) { \ + locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \ + state, file.m_psi, op); \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_wait)( \ locker, count, src_file, src_line); \ } \ @@ -285,7 +323,7 @@ do { \ # define register_pfs_file_io_end(locker, count) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(end_file_wait)(locker, count); \ } \ } while (0) @@ -299,11 +337,16 @@ os_file_create os_file_create_simple os_file_create_simple_no_error_handling os_file_close +os_file_close_no_error_handling os_file_rename os_aio os_file_read os_file_read_no_error_handling +os_file_read_no_error_handling_int_fd os_file_write +os_file_write_int_fd +os_file_set_eof_at +os_file_allocate The wrapper functions have the prefix of "innodb_". */ @@ -321,20 +364,23 @@ The wrapper functions have the prefix of "innodb_". */ pfs_os_file_create_simple_no_error_handling_func( \ key, name, create_mode, access, success, __FILE__, __LINE__) -# define os_file_close(file) \ +# define os_file_close_pfs(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) +# define os_file_close_no_error_handling_pfs(file) \ + pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__) + # define os_aio(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, trx) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, trx, \ __FILE__, __LINE__) -# define os_file_read(file, buf, offset, n) \ +# define os_file_read_pfs(file, buf, offset, n) \ pfs_os_file_read_func(file, buf, offset, n, NULL, \ __FILE__, __LINE__) -# define os_file_read_trx(file, buf, offset, n, trx) \ +# define os_file_read_trx_pfs(file, buf, offset, n, trx) \ pfs_os_file_read_func(file, buf, offset, n, trx, \ __FILE__, __LINE__) @@ -342,11 +388,20 @@ The wrapper functions have the prefix of "innodb_". */ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ __FILE__, __LINE__) -# define os_file_write(name, file, buf, offset, n) \ +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + pfs_os_file_read_no_error_handling_int_fd_func( \ + file, buf, offset, n, __FILE__, __LINE__) + +# define os_file_write_pfs(name, file, buf, offset, n) \ pfs_os_file_write_func(name, file, buf, offset, \ n, __FILE__, __LINE__) -# define os_file_flush(file) \ +# define os_file_write_int_fd(name, file, buf, offset, n) \ + pfs_os_file_write_int_fd_func(name, file, buf, offset, \ + n, __FILE__, __LINE__) + +# define os_file_flush_pfs(file) \ pfs_os_file_flush_func(file, __FILE__, __LINE__) # define os_file_rename(key, oldpath, newpath) \ @@ -357,6 +412,15 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_delete_if_exists(key, name) \ pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__) + +# define os_file_set_eof_at_pfs(file, new_len) \ + pfs_os_file_set_eof_at_func(file, new_len, __FILE__, __LINE__) + +# ifdef HAVE_POSIX_FALLOCATE +# define os_file_allocate_pfs(file, offset, len) \ + pfs_os_file_allocate_func(file, offset, len, __FILE__, __LINE__) +# endif + #else /* UNIV_PFS_IO */ /* If UNIV_PFS_IO is not defined, these I/O APIs point @@ -372,26 +436,36 @@ to original un-instrumented file I/O APIs */ os_file_create_simple_no_error_handling_func( \ name, create_mode, access, success) -# define os_file_close(file) os_file_close_func(file) +# define os_file_close_pfs(file) \ + os_file_close_func(file) + +# define os_file_close_no_error_handling_pfs(file) \ + os_file_close_no_error_handling_func(file) # define os_aio(type, mode, name, file, buf, offset, n, message1, \ message2, space_id, trx) \ os_aio_func(type, mode, name, file, buf, offset, n, \ message1, message2, space_id, trx) -# define os_file_read(file, buf, offset, n) \ +# define os_file_read_pfs(file, buf, offset, n) \ os_file_read_func(file, buf, offset, n, NULL) -# define os_file_read_trx(file, buf, offset, n, trx) \ +# define os_file_read_trx_pfs(file, buf, offset, n, trx) \ os_file_read_func(file, buf, offset, n, trx) # define os_file_read_no_error_handling(file, buf, offset, n) \ os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + os_file_read_no_error_handling_func(file, buf, offset, n) -# define os_file_write(name, file, buf, offset, n) \ +# define os_file_write_int_fd(name, file, buf, offset, n) \ + os_file_write_func(name, file, buf, offset, n) +# define os_file_write_pfs(name, file, buf, offset, n) \ os_file_write_func(name, file, buf, offset, n) -# define os_file_flush(file) os_file_flush_func(file) + +# define os_file_flush_pfs(file) os_file_flush_func(file) # define os_file_rename(key, oldpath, newpath) \ os_file_rename_func(oldpath, newpath) @@ -401,8 +475,78 @@ to original un-instrumented file I/O APIs */ # define os_file_delete_if_exists(key, name) \ os_file_delete_if_exists_func(name) +# define os_file_set_eof_at_pfs(file, new_len) \ + os_file_set_eof_at_func(file, new_len) + +# ifdef HAVE_POSIX_FALLOCATE +# define os_file_allocate_pfs(file, offset, len) \ + os_file_allocate_func(file, offset, len) +# endif + #endif /* UNIV_PFS_IO */ +#ifdef UNIV_PFS_IO + #define os_file_close(file) os_file_close_pfs(file) +#else + #define os_file_close(file) os_file_close_pfs((file).m_file) +#endif + +#ifdef UNIV_PFS_IO + #define os_file_close_no_error_handling(file) \ + os_file_close_no_error_handling_pfs(file) +#else + #define os_file_close_no_error_handling(file) \ + os_file_close_no_error_handling_pfs((file).m_file) +#endif + +#ifdef UNIV_PFS_IO + #define os_file_read(file, buf, offset, n) \ + os_file_read_pfs(file, buf, offset, n) +#else + #define os_file_read(file, buf, offset, n) \ + os_file_read_pfs(file.m_file, buf, offset, n) +#endif + +#ifdef UNIV_PFS_IO + # define os_file_read_trx(file, buf, offset, n, trx) \ + os_file_read_trx_pfs(file, buf, offset, n, trx) +#else + # define os_file_read_trx(file, buf, offset, n, trx) \ + os_file_read_trx_pfs(file.m_file, buf, offset, n, trx) +#endif + +#ifdef UNIV_PFS_IO + #define os_file_flush(file) os_file_flush_pfs(file) +#else + #define os_file_flush(file) os_file_flush_pfs(file.m_file) +#endif + +#ifdef UNIV_PFS_IO + #define os_file_write(name, file, buf, offset, n) \ + os_file_write_pfs(name, file, buf, offset, n) +#else + #define os_file_write(name, file, buf, offset, n) \ + os_file_write_pfs(name, file.m_file, buf, offset, n) +#endif + +#ifdef UNIV_PFS_IO + #define os_file_set_eof_at(file, new_len) \ + os_file_set_eof_at_pfs(file, new_len) +#else + #define os_file_set_eof_at(file, new_len) \ + os_file_set_eof_at_pfs(file.m_file, new_len) +#endif + +#ifdef HAVE_POSIX_FALLOCATE +#ifdef UNIV_PFS_IO + #define os_file_allocate(file, offset, len) \ + os_file_allocate_pfs(file, offset, len) +#else + #define os_file_allocate(file, offset, len) \ + os_file_allocate_pfs(file.m_file, offset, len) +#endif +#endif + /* File types for directory entry data type */ enum os_file_type_t { @@ -536,7 +680,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -569,7 +713,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -628,6 +772,42 @@ ibool os_file_close_func( /*===============*/ os_file_t file); /*!< in, own: handle to a file */ +/***********************************************************************//** +NOTE! Use the corresponding macro os_file_close(), not directly this +function! +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_close_no_error_handling_func( +/*===============*/ + os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +Truncates a file at the specified position. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_set_eof_at_func( + os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len);/*!< in: new file length */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len); /*!< in: file region length */ +#endif #ifdef UNIV_PFS_IO /****************************************************************//** @@ -638,7 +818,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -661,7 +841,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -685,7 +865,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -714,7 +894,20 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -727,7 +920,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -746,7 +939,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -767,7 +960,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -796,7 +989,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -813,7 +1006,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -865,16 +1058,66 @@ pfs_os_file_delete_if_exists_func( string */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +#endif + #endif /* UNIV_PFS_IO */ /***********************************************************************//** -Closes a file handle. -@return TRUE if success */ +Checks if the file is marked as invalid. +@return TRUE if invalid */ UNIV_INTERN -ibool -os_file_close_no_error_handling( -/*============================*/ - os_file_t file); /*!< in, own: handle to a file */ +bool +os_file_is_invalid( + pfs_os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file); /*!< out: pointer to a handle to a file */ + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice);/*!< in: advice for access pattern */ + /***********************************************************************//** Gets a file size. @return file size, or (os_offset_t) -1 on failure */ @@ -882,7 +1125,7 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ MY_ATTRIBUTE((warn_unused_result)); /***********************************************************************//** Write the specified number of zeros to a newly created file. @@ -893,7 +1136,7 @@ os_file_set_size( /*=============*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ os_offset_t size) /*!< in: file size */ MY_ATTRIBUTE((nonnull, warn_unused_result)); /***********************************************************************//** @@ -905,14 +1148,6 @@ os_file_set_eof( /*============*/ FILE* file); /*!< in: file to be truncated */ /***********************************************************************//** -Truncates a file at the specified position. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_set_eof_at( - os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len);/*!< in: new file length */ -/***********************************************************************//** NOTE! Use the corresponding macro os_file_flush(), not directly this function! Flushes the write buffers of a given file to the disk. @return TRUE if success */ @@ -1140,7 +1375,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index 25a1397147e..c82a2559fed 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -34,7 +34,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -47,7 +47,7 @@ pfs_os_file_create_simple_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -58,11 +58,13 @@ pfs_os_file_create_simple_func( : PSI_FILE_OPEN), name, src_file, src_line); - file = os_file_create_simple_func(name, create_mode, + file.m_file = os_file_create_simple_func(name, create_mode, access_type, success); + file.m_psi = NULL; - /* Regsiter the returning "file" value with the system */ - register_pfs_file_open_end(locker, file); + /* Regsiter psi value for the file */ + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -76,7 +78,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -91,7 +93,7 @@ pfs_os_file_create_simple_no_error_handling_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -104,8 +106,10 @@ pfs_os_file_create_simple_no_error_handling_func( file = os_file_create_simple_no_error_handling_func( name, create_mode, access_type, success); + file.m_psi = NULL; - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -118,7 +122,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -137,7 +141,7 @@ pfs_os_file_create_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -149,8 +153,10 @@ pfs_os_file_create_func( name, src_file, src_line); file = os_file_create_func(name, create_mode, purpose, type, success); + file.m_psi = NULL; - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -164,7 +170,7 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -176,7 +182,35 @@ pfs_os_file_close_func( register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, src_file, src_line); - result = os_file_close_func(file); + result = os_file_close_func(file.m_file); + + register_pfs_file_io_end(locker, 0); + + return(result); +} +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + /* register the file close */ + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, + src_file, src_line); + + result = os_file_close_no_error_handling_func(file.m_file); register_pfs_file_io_end(locker, 0); @@ -197,7 +231,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -244,7 +278,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -259,7 +293,7 @@ pfs_os_file_read_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_func(file, buf, offset, n, trx); + result = os_file_read_func(file.m_file, buf, offset, n, trx); register_pfs_file_io_end(locker, n); @@ -278,7 +312,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -292,13 +326,50 @@ pfs_os_file_read_no_error_handling_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_no_error_handling_func(file, buf, offset, n); + result = os_file_read_no_error_handling_func(file.m_file, buf, offset, n); register_pfs_file_io_end(locker, n); return(result); } +/** NOTE! Please use the corresponding macro +os_file_read_no_error_handling_int_fd(), not directly this function! +This is the performance schema instrumented wrapper function for +os_file_read_no_error_handling_int_fd_func() which requests a +synchronous read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_no_error_handling_int_fd_func( + int file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + os_offset_t offset, /*!< in: file offset where to read */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + + PSI_file_locker_state state; + struct PSI_file_locker* locker = NULL; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_READ); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_read_no_error_handling_func( + OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /*******************************************************************//** NOTE! Please use the corresponding macro os_file_write(), not directly this function! @@ -311,7 +382,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -325,13 +396,50 @@ pfs_os_file_write_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE, src_file, src_line); - result = os_file_write_func(name, file, buf, offset, n); + result = os_file_write_func(name, file.m_file, buf, offset, n); register_pfs_file_io_end(locker, n); return(result); } +/** NOTE! Please use the corresponding macro os_file_write(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_write() which requests a synchronous write operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_write_int_fd_func( + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + int file, /*!< in: handle to a file */ + const void* buf, /*!< in: buffer from which to write */ + os_offset_t offset, /*!< in: file offset where to write */ + ulint n, /*!< in: number of bytes to write */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + PSI_file_locker_state state; + struct PSI_file_locker* locker = NULL; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_WRITE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_write_func( + name, OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /***********************************************************************//** NOTE! Please use the corresponding macro os_file_flush(), not directly this function! @@ -342,7 +450,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -352,7 +460,7 @@ pfs_os_file_flush_func( register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC, src_file, src_line); - result = os_file_flush_func(file); + result = os_file_flush_func(file.m_file); register_pfs_file_io_end(locker, 0); @@ -380,12 +488,12 @@ pfs_os_file_rename_func( struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath, + register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath, src_file, src_line); result = os_file_rename_func(oldpath, newpath); - register_pfs_file_open_end(locker, 0); + register_pfs_file_rename_end(locker, 0); return(result); } @@ -449,4 +557,61 @@ pfs_os_file_delete_if_exists_func( return(result); } + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_set_eof_at_func(file.m_file, new_len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_allocate_func(file.m_file, offset, len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} +#endif + #endif /* UNIV_PFS_IO */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 27d3adfc7f0..eb604d05557 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -681,6 +681,12 @@ struct mysql_row_templ_t { Innobase record in the current index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + bool rec_field_is_prefix; /* is this field in a prefix index? */ + ulint rec_prefix_field_no; /* record field, even if just a + prefix; same as rec_field_no when not a + prefix, otherwise rec_field_no is + ULINT_UNDEFINED but this is the true + field number*/ ulint clust_rec_field_no; /*!< field number of the column in an Innobase record in the clustered index; not defined if template_type is @@ -729,6 +735,8 @@ struct mysql_row_templ_t { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 +class ha_innobase; + /** A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ @@ -784,7 +792,9 @@ struct row_prebuilt_t { columns through a secondary index and at least one column is not in the secondary index, then this is - set to TRUE */ + set to TRUE; note that sometimes this + is set but we later optimize out the + clustered index lookup */ unsigned templ_contains_blob:1;/*!< TRUE if the template contains a column with DATA_BLOB == get_innobase_type_from_mysql_type(); @@ -958,6 +968,12 @@ struct row_prebuilt_t { to InnoDB format.*/ uint srch_key_val_len; /*!< Size of search key */ + /** MySQL handler object. */ + ha_innobase* mysql_handler; + + /** True if exceeded the end_range while filling the prefetch cache. */ + bool end_range; + }; /** Callback for row_mysql_sys_index_iterate() */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 882ceae2417..0fb563f539c 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -588,6 +588,11 @@ extern my_bool srv_print_all_deadlocks; extern my_bool srv_cmp_per_index_enabled; +/** Number of times secondary index lookup triggered cluster lookup */ +extern ulint srv_sec_rec_cluster_reads; +/** Number of times prefix optimization avoided triggering cluster lookup */ +extern ulint srv_sec_rec_cluster_reads_avoided; + /** Status variables to be passed to MySQL */ extern struct export_var_t export_vars; @@ -973,6 +978,13 @@ void srv_purge_wakeup(void); /*==================*/ +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id); + /** Status variables to be passed to MySQL */ struct export_var_t{ ulint innodb_adaptive_hash_hash_searches; @@ -1085,6 +1097,9 @@ struct export_var_t{ #endif /* UNIV_DEBUG */ ulint innodb_column_compressed; /*!< srv_column_compressed */ ulint innodb_column_decompressed; /*!< srv_column_decompressed */ + + ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ + ulint innodb_sec_rec_cluster_reads_avoided; /*!< srv_sec_rec_cluster_reads_avoided */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h index 963b767f0fb..a60776a4665 100644 --- a/storage/xtradb/include/srv0start.h +++ b/storage/xtradb/include/srv0start.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -139,6 +139,8 @@ extern ibool srv_startup_is_before_trx_rollback_phase; /** TRUE if a raw partition is in use */ extern ibool srv_start_raw_disk_in_use; +/** Undo tablespaces starts with space_id. */ +extern ulint srv_undo_space_id_start; /** Shutdown state */ enum srv_shutdown_state { diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h index 7caddfb7ba4..255431293f5 100644 --- a/storage/xtradb/include/trx0xa.h +++ b/storage/xtradb/include/trx0xa.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,6 +24,8 @@ this program; if not, write to the Free Software Foundation, Inc., #ifndef XA_H #define XA_H +#include "xa.h" + /* * Transaction branch identification: XID and NULLXID: */ @@ -35,17 +37,6 @@ this program; if not, write to the Free Software Foundation, Inc., #define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */ #define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */ -/** X/Open XA distributed transaction identifier */ -struct xid_t { - long formatID; /*!< format identifier; -1 - means that the XID is null */ - long gtrid_length; /*!< value from 1 through 64 */ - long bqual_length; /*!< value from 1 through 64 */ - char data[XIDDATASIZE]; /*!< distributed transaction - identifier */ -}; -/** X/Open XA distributed transaction identifier */ -typedef struct xid_t XID; #endif /** X/Open XA distributed transaction status codes */ /* @{ */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 7da293542f5..b4ad030a565 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 80.0 +#define PERCONA_INNODB_VERSION 82.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ @@ -145,14 +145,8 @@ HAVE_PSI_INTERFACE is defined. */ #if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP # define UNIV_PFS_MUTEX # define UNIV_PFS_RWLOCK -/* For I/O instrumentation, performance schema rely -on a native descriptor to identify the file, this -descriptor could conflict with our OS level descriptor. -Disable IO instrumentation on Windows until this is -resolved */ -# ifndef __WIN__ -# define UNIV_PFS_IO -# endif + +# define UNIV_PFS_IO # define UNIV_PFS_THREAD /* There are mutexes/rwlocks that we want to exclude from diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 7784e8538b7..f73d5b458d1 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -2627,7 +2627,7 @@ log_group_archive( /*==============*/ log_group_t* group) /*!< in: log group */ { - os_file_t file_handle; + pfs_os_file_t file_handle; lsn_t start_lsn; lsn_t end_lsn; char name[OS_FILE_MAX_PATH]; @@ -3607,9 +3607,15 @@ loop: lsn = log_sys->lsn; - if (lsn != log_sys->last_checkpoint_lsn - || (srv_track_changed_pages - && (tracked_lsn != log_sys->last_checkpoint_lsn)) + const bool is_last = + ((srv_force_recovery == SRV_FORCE_NO_LOG_REDO + && lsn == log_sys->last_checkpoint_lsn + + LOG_BLOCK_HDR_SIZE) + || lsn == log_sys->last_checkpoint_lsn) + && (!srv_track_changed_pages + || tracked_lsn == log_sys->last_checkpoint_lsn); + + if (!is_last #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) @@ -3675,7 +3681,8 @@ loop: ut_a(freed); ut_a(lsn == log_sys->lsn); - ut_ad(lsn == log_sys->last_checkpoint_lsn); + ut_ad(srv_force_recovery >= SRV_FORCE_NO_LOG_REDO + || lsn == log_sys->last_checkpoint_lsn); if (lsn < srv_start_lsn) { ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 8d1952fdd04..99762c00a08 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -336,7 +336,7 @@ log_online_read_last_tracked_lsn(void) lsn_t result; os_offset_t read_offset = log_bmp_sys->out.offset; - while (!checksum_ok && read_offset > 0 && !is_last_page) + while ((!checksum_ok || !is_last_page) && read_offset > 0) { read_offset -= MODIFIED_PAGE_BLOCK_SIZE; log_bmp_sys->out.offset = read_offset; @@ -564,9 +564,9 @@ log_online_rotate_bitmap_file( lsn_t next_file_start_lsn) /*!out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } log_bmp_sys->out_seq_num++; log_online_make_bitmap_name(next_file_start_lsn); @@ -736,7 +736,11 @@ log_online_read_init(void) } last_tracked_lsn = log_online_read_last_tracked_lsn(); + /* Do not rotate if we truncated the file to zero length - we + can just start writing there */ + const bool need_rotate = (last_tracked_lsn != 0); if (!last_tracked_lsn) { + last_tracked_lsn = last_file_start_lsn; } @@ -748,7 +752,10 @@ log_online_read_init(void) } else { file_start_lsn = tracking_start_lsn; } - if (!log_online_rotate_bitmap_file(file_start_lsn)) { + + if (need_rotate + && !log_online_rotate_bitmap_file(file_start_lsn)) { + exit(1); } @@ -788,9 +795,9 @@ log_online_read_shutdown(void) ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; - if (log_bmp_sys->out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } rbt_free(log_bmp_sys->modified_pages); @@ -1129,6 +1136,18 @@ log_online_write_bitmap_page( } }); + /* A crash injection site that ensures last checkpoint LSN > last + tracked LSN, so that LSN tracking for this interval is tested. */ + DBUG_EXECUTE_IF("crash_before_bitmap_write", + { + ulint space_id + = mach_read_from_4(block + + MODIFIED_PAGE_SPACE_ID); + if (space_id > 0) + DBUG_SUICIDE(); + }); + + ibool success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, @@ -1152,10 +1171,8 @@ log_online_write_bitmap_page( return FALSE; } -#ifdef UNIV_LINUX - posix_fadvise(log_bmp_sys->out.file, log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE, POSIX_FADV_DONTNEED); -#endif + os_file_advise(log_bmp_sys->out.file, log_bmp_sys->out.offset, + MODIFIED_PAGE_BLOCK_SIZE, OS_FILE_ADVISE_DONTNEED); log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE; return TRUE; @@ -1277,10 +1294,6 @@ log_online_follow_redo_log(void) group = UT_LIST_GET_NEXT(log_groups, group); } - /* A crash injection site that ensures last checkpoint LSN > last - tracked LSN, so that LSN tracking for this interval is tested. */ - DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE();); - result = log_online_write_bitmap(); log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; log_set_tracked_lsn(log_bmp_sys->start_lsn); @@ -1550,10 +1563,8 @@ log_online_open_bitmap_file_read_only( bitmap_file->size = os_file_get_size(bitmap_file->file); bitmap_file->offset = 0; -#ifdef UNIV_LINUX - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL); - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE); -#endif + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_SEQUENTIAL); + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_NOREUSE); return TRUE; } @@ -1639,7 +1650,7 @@ log_online_bitmap_iterator_init( /* Empty range */ i->in_files.count = 0; i->in_files.files = NULL; - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1657,7 +1668,7 @@ log_online_bitmap_iterator_init( if (i->in_files.count == 0) { /* Empty range */ - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1696,10 +1707,10 @@ log_online_bitmap_iterator_release( { ut_a(i); - if (i->in.file != os_file_invalid) { + if (!os_file_is_invalid(i->in.file)) { os_file_close(i->in.file); - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); } if (i->in_files.files) { @@ -1753,8 +1764,9 @@ log_online_bitmap_iterator_next( /* Advance file */ i->in_i++; - success = os_file_close_no_error_handling(i->in.file); - i->in.file = os_file_invalid; + success = os_file_close_no_error_handling( + i->in.file); + os_file_mark_invalid(&i->in.file); if (UNIV_UNLIKELY(!success)) { os_file_get_last_error(TRUE); @@ -1863,7 +1875,7 @@ log_online_purge_changed_page_bitmaps( /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } for (i = 0; i < bitmap_files.count; i++) { diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index b80f1f8597e..b2e55faffca 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -324,6 +324,7 @@ DECLARE_THREAD(recv_writer_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -356,6 +357,7 @@ DECLARE_THREAD(recv_writer_thread)( recv_writer_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index dac67055114..9b2a2746976 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -82,9 +82,11 @@ my_umask */ #ifndef __WIN__ /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; +# define os_file_invalid (-1) #else /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = 0; +# define os_file_invalid INVALID_HANDLE_VALUE #endif /* __WIN__ */ #ifndef UNIV_HOTBACKUP @@ -173,7 +175,7 @@ struct os_aio_slot_t{ byte* buf; /*!< buffer used in i/o */ ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ os_offset_t offset; /*!< file offset in bytes */ - os_file_t file; /*!< file where to read or write */ + pfs_os_file_t file; /*!< file where to read or write */ const char* name; /*!< file name or path */ ibool io_already_done;/*!< used only in simulated aio: TRUE if the physical i/o already @@ -1347,7 +1349,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -1361,7 +1363,7 @@ os_file_create_simple_no_error_handling_func( if it would be enabled otherwise) */ ibool* success)/*!< out: TRUE if succeed, FALSE if error */ { - os_file_t file; + pfs_os_file_t file; *success = FALSE; #ifdef __WIN__ @@ -1369,7 +1371,6 @@ os_file_create_simple_no_error_handling_func( DWORD create_flag; DWORD attributes = 0; DWORD share_mode = FILE_SHARE_READ; - ut_a(name); ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); @@ -1386,8 +1387,8 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file.m_file = (os_file_t)-1; + return(file); } if (access_type == OS_FILE_READ_ONLY) { @@ -1411,11 +1412,11 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file access type (%lu) for file '%s'", access_type, name); - - return((os_file_t) -1); + file.m_file = (os_file_t)-1; + return(file); } - file = CreateFile((LPCTSTR) name, + file.m_file = CreateFile((LPCTSTR) name, access, share_mode, NULL, // Security attributes @@ -1423,11 +1424,10 @@ os_file_create_simple_no_error_handling_func( attributes, NULL); // No template file - *success = (file != INVALID_HANDLE_VALUE); + *success = (file.m_file != INVALID_HANDLE_VALUE); #else /* __WIN__ */ int create_flag; const char* mode_str = NULL; - ut_a(name); ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); @@ -1470,19 +1470,19 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file.m_file = -1; + return(file); } - file = ::open(name, create_flag, os_innodb_umask); + file.m_file = ::open(name, create_flag, os_innodb_umask); - *success = file == -1 ? FALSE : TRUE; + *success = file.m_file == -1 ? FALSE : TRUE; /* This function is always called for data files, we should disable OS caching (O_DIRECT) here as we do in os_file_create_func(), so we open the same file in the same mode, see man page of open(2). */ if (*success) { - os_file_set_nocache_if_needed(file, name, mode_str, + os_file_set_nocache_if_needed(file.m_file, name, mode_str, OS_DATA_FILE, access_type); } @@ -1491,11 +1491,11 @@ os_file_create_simple_no_error_handling_func( && *success && (access_type == OS_FILE_READ_WRITE || access_type == OS_FILE_READ_WRITE_CACHED) - && os_file_lock(file, name)) { + && os_file_lock(file.m_file, name)) { *success = FALSE; - close(file); - file = -1; + close(file.m_file); + file.m_file = -1; } #endif /* USE_FILE_LOCK */ @@ -1606,7 +1606,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -1622,24 +1622,25 @@ os_file_create_func( ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success)/*!< out: TRUE if succeed, FALSE if error */ { - os_file_t file; + pfs_os_file_t file; ibool retry; ibool on_error_no_exit; ibool on_error_silent; - #ifdef __WIN__ DBUG_EXECUTE_IF( "ib_create_table_fail_disk_full", *success = FALSE; SetLastError(ERROR_DISK_FULL); - return((os_file_t) -1); + file.m_file = (os_file_t)-1; + return(file); ); #else /* __WIN__ */ DBUG_EXECUTE_IF( "ib_create_table_fail_disk_full", *success = FALSE; errno = ENOSPC; - return((os_file_t) -1); + file.m_file = -1; + return(file); ); #endif /* __WIN__ */ @@ -1690,7 +1691,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file.m_file = (os_file_t)-1; + return(file); } DWORD attributes = 0; @@ -1715,8 +1717,8 @@ os_file_create_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown purpose flag (%lu) while opening file '%s'", purpose, name); - - return((os_file_t)(-1)); + file.m_file = (os_file_t)-1; + return(file); } #ifdef UNIV_NON_BUFFERED_IO @@ -1743,11 +1745,11 @@ os_file_create_func( do { /* Use default security attributes and no template file. */ - file = CreateFile( + file.m_file = CreateFile( (LPCTSTR) name, access, share_mode, NULL, create_flag, attributes, NULL); - if (file == INVALID_HANDLE_VALUE) { + if (file.m_file == INVALID_HANDLE_VALUE) { const char* operation; operation = (create_mode == OS_FILE_CREATE @@ -1772,7 +1774,6 @@ os_file_create_func( #else /* __WIN__ */ int create_flag; const char* mode_str = NULL; - on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT ? TRUE : FALSE; on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT @@ -1810,7 +1811,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file.m_file = -1; + return(file); } ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE); @@ -1830,9 +1832,9 @@ os_file_create_func( #endif /* O_SYNC */ do { - file = ::open(name, create_flag, os_innodb_umask); + file.m_file = ::open(name, create_flag, os_innodb_umask); - if (file == -1) { + if (file.m_file == -1) { const char* operation; operation = (create_mode == OS_FILE_CREATE @@ -1856,14 +1858,15 @@ os_file_create_func( if (*success) { - os_file_set_nocache_if_needed(file, name, mode_str, type, 0); + os_file_set_nocache_if_needed(file.m_file, name, mode_str, + type, 0); } #ifdef USE_FILE_LOCK if (!srv_read_only_mode && *success && create_mode != OS_FILE_OPEN_RAW - && os_file_lock(file, name)) { + && os_file_lock(file.m_file, name)) { if (create_mode == OS_FILE_OPEN_RETRY) { @@ -1875,7 +1878,7 @@ os_file_create_func( for (int i = 0; i < 100; i++) { os_thread_sleep(1000000); - if (!os_file_lock(file, name)) { + if (!os_file_lock(file.m_file, name)) { *success = TRUE; return(file); } @@ -1886,17 +1889,18 @@ os_file_create_func( } *success = FALSE; - close(file); - file = -1; + close(file.m_file); + file.m_file = -1; } #endif /* USE_FILE_LOCK */ if (srv_use_atomic_writes && type == OS_DATA_FILE - && file != -1 && !os_file_set_atomic_writes(name, file)) { + && file.m_file != -1 + && !os_file_set_atomic_writes(name, file.m_file)) { *success = FALSE; - close(file); - file = -1; + close(file.m_file); + file.m_file = -1; } #endif /* __WIN__ */ @@ -2127,8 +2131,8 @@ os_file_close_func( Closes a file handle. @return TRUE if success */ UNIV_INTERN -ibool -os_file_close_no_error_handling( +bool +os_file_close_no_error_handling_func( /*============================*/ os_file_t file) /*!< in, own: handle to a file */ { @@ -2140,10 +2144,10 @@ os_file_close_no_error_handling( ret = CloseHandle(file); if (ret) { - return(TRUE); + return(true); } - return(FALSE); + return(false); #else int ret; @@ -2151,10 +2155,83 @@ os_file_close_no_error_handling( if (ret == -1) { - return(FALSE); + return(false); } - return(TRUE); + return(true); +#endif /* __WIN__ */ +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len) /*!< in: file region length */ +{ + return(posix_fallocate(file, offset, len) == 0); +} +#endif + +/***********************************************************************//** +Checks if the file is marked as invalid. +@return TRUE if invalid */ +UNIV_INTERN +bool +os_file_is_invalid( + pfs_os_file_t file) /*!< in, own: handle to a file */ +{ + return(file.m_file == os_file_invalid); +} + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file) /*!< out: pointer to a handle to a file */ +{ + file->m_file = os_file_invalid; +} + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice)/*!< in: advice for access pattern */ +{ +#ifdef __WIN__ + return(true); +#else +#ifdef UNIV_LINUX + int native_advice = 0; + if ((advice & OS_FILE_ADVISE_NORMAL) != 0) + native_advice |= POSIX_FADV_NORMAL; + if ((advice & OS_FILE_ADVISE_RANDOM) != 0) + native_advice |= POSIX_FADV_RANDOM; + if ((advice & OS_FILE_ADVISE_SEQUENTIAL) != 0) + native_advice |= POSIX_FADV_SEQUENTIAL; + if ((advice & OS_FILE_ADVISE_WILLNEED) != 0) + native_advice |= POSIX_FADV_WILLNEED; + if ((advice & OS_FILE_ADVISE_DONTNEED) != 0) + native_advice |= POSIX_FADV_DONTNEED; + if ((advice & OS_FILE_ADVISE_NOREUSE) != 0) + native_advice |= POSIX_FADV_NOREUSE; + + return(posix_fadvise(file.m_file, offset, len, native_advice) == 0); +#else + return(true); +#endif #endif /* __WIN__ */ } @@ -2165,14 +2242,14 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ { #ifdef __WIN__ os_offset_t offset; DWORD high; DWORD low; - low = GetFileSize(file, &high); + low = GetFileSize(file.m_file, &high); if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) { return((os_offset_t) -1); @@ -2182,7 +2259,8 @@ os_file_get_size( return(offset); #else - return((os_offset_t) lseek(file, 0, SEEK_END)); + return((os_offset_t) lseek(file.m_file, 0, SEEK_END)); + #endif /* __WIN__ */ } @@ -2195,7 +2273,7 @@ os_file_set_size( /*=============*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ os_offset_t size) /*!< in: file size */ { os_offset_t current_size; @@ -2209,7 +2287,7 @@ os_file_set_size( #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - if (posix_fallocate(file, current_size, size) == -1) { + if (posix_fallocate(file.m_file, current_size, size) == -1) { ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " "space for file \'%s\' failed. Current size " @@ -2304,8 +2382,8 @@ os_file_set_eof( Truncates a file at the specified position. @return TRUE if success */ UNIV_INTERN -ibool -os_file_set_eof_at( +bool +os_file_set_eof_at_func( os_file_t file, /*!< in: handle to a file */ ib_uint64_t new_len)/*!< in: new file length */ { @@ -4400,7 +4478,7 @@ os_aio_array_reserve_slot( the aio operation */ void* message2,/*!< in: message to be passed along with the aio operation */ - os_file_t file, /*!< in: file handle */ + pfs_os_file_t file, /*!< in: file handle */ const char* name, /*!< in: name of the file or path as a null-terminated string */ void* buf, /*!< in: buffer where to read or from which @@ -4522,10 +4600,10 @@ found: iocb = &slot->control; if (type == OS_FILE_READ) { - io_prep_pread(iocb, file, buf, len, aio_offset); + io_prep_pread(iocb, file.m_file, buf, len, aio_offset); } else { ut_a(type == OS_FILE_WRITE); - io_prep_pwrite(iocb, file, buf, len, aio_offset); + io_prep_pwrite(iocb, file.m_file, buf, len, aio_offset); } iocb->data = (void*) slot; @@ -4763,7 +4841,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -4790,8 +4868,7 @@ os_aio_func( ulint dummy_type; #endif /* WIN_ASYNC_IO */ ulint wake_later; - - ut_ad(file); + ut_ad(file.m_file); ut_ad(buf); ut_ad(n > 0); ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0); @@ -4823,13 +4900,12 @@ os_aio_func( and os_file_write_func() */ if (type == OS_FILE_READ) { - return(os_file_read_func(file, buf, offset, n, trx)); + return(os_file_read_func(file.m_file, buf, offset, n, + trx)); } - ut_ad(!srv_read_only_mode); ut_a(type == OS_FILE_WRITE); - - return(os_file_write_func(name, file, buf, offset, n)); + return(os_file_write_func(name, file.m_file, buf, offset, n)); } try_again: @@ -4886,9 +4962,8 @@ try_again: os_n_file_reads++; os_bytes_read_since_printout += n; #ifdef WIN_ASYNC_IO - ret = ReadFile(file, buf, (DWORD) n, &len, + ret = ReadFile(file.m_file, buf, (DWORD) n, &len, &(slot->control)); - #elif defined(LINUX_NATIVE_AIO) if (!os_aio_linux_dispatch(array, slot)) { goto err_exit; @@ -4906,9 +4981,8 @@ try_again: if (srv_use_native_aio) { os_n_file_writes++; #ifdef WIN_ASYNC_IO - ret = WriteFile(file, buf, (DWORD) n, &len, + ret = WriteFile(file.m_file, buf, (DWORD) n, &len, &(slot->control)); - #elif defined(LINUX_NATIVE_AIO) if (!os_aio_linux_dispatch(array, slot)) { goto err_exit; @@ -5063,8 +5137,7 @@ os_aio_windows_handle( srv_set_io_thread_op_info( orig_seg, "get windows aio return value"); } - - ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); + ret = GetOverlappedResult(slot->file.m_file, &(slot->control), &len, TRUE); *message1 = slot->message1; *message2 = slot->message2; @@ -5094,7 +5167,8 @@ os_aio_windows_handle( and os_file_write APIs, need to register with performance schema explicitly here. */ struct PSI_file_locker* locker = NULL; - register_pfs_file_io_begin(locker, slot->file, slot->len, + PSI_file_locker_state state; + register_pfs_file_io_begin(&state, locker, slot->file, slot->len, (slot->type == OS_FILE_WRITE) ? PSI_FILE_WRITE : PSI_FILE_READ, @@ -5105,16 +5179,14 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret = WriteFile(slot->file, slot->buf, + ret = WriteFile(slot->file.m_file, slot->buf, (DWORD) slot->len, &len, &(slot->control)); - break; case OS_FILE_READ: - ret = ReadFile(slot->file, slot->buf, + ret = ReadFile(slot->file.m_file, slot->buf, (DWORD) slot->len, &len, &(slot->control)); - break; default: ut_error; @@ -5130,8 +5202,7 @@ os_aio_windows_handle( file where we also use async i/o: in Windows we must use the same wait mechanism as for async i/o */ - - ret = GetOverlappedResult(slot->file, + ret = GetOverlappedResult(slot->file.m_file, &(slot->control), &len, TRUE); } @@ -5385,12 +5456,14 @@ found: iocb = &(slot->control); if (slot->type == OS_FILE_READ) { - io_prep_pread(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pread(&slot->control, slot->file.m_file, + slot->buf, slot->len, + (off_t) slot->offset); } else { ut_a(slot->type == OS_FILE_WRITE); - io_prep_pwrite(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pwrite(&slot->control, slot->file.m_file, + slot->buf, slot->len, + (off_t) slot->offset); } /* Resubmit an I/O request */ submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb); @@ -5617,12 +5690,11 @@ consecutive_loop: os_aio_slot_t* slot; slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved && slot != aio_slot && slot->offset == aio_slot->offset + aio_slot->len && slot->type == aio_slot->type - && slot->file == aio_slot->file) { + && slot->file.m_file == aio_slot->file.m_file) { /* Found a consecutive i/o request */ diff --git a/storage/xtradb/pars/pars0opt.cc b/storage/xtradb/pars/pars0opt.cc index cbed2b39eeb..5a7e1861d74 100644 --- a/storage/xtradb/pars/pars0opt.cc +++ b/storage/xtradb/pars/pars0opt.cc @@ -948,12 +948,14 @@ opt_find_all_cols( /* Fill in the field_no fields in sym_node */ sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos( - dict_table_get_first_index(index->table), sym_node->col_no); + dict_table_get_first_index(index->table), sym_node->col_no, + NULL); if (!dict_index_is_clust(index)) { ut_a(plan); - col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no); + col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no, + NULL); if (col_pos == ULINT_UNDEFINED) { diff --git a/storage/xtradb/pars/pars0pars.cc b/storage/xtradb/pars/pars0pars.cc index b116357c5b9..ea65d16e9dc 100644 --- a/storage/xtradb/pars/pars0pars.cc +++ b/storage/xtradb/pars/pars0pars.cc @@ -1232,7 +1232,8 @@ pars_process_assign_list( col_sym = assign_node->col; upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos( - clust_index, col_sym->col_no), + clust_index, col_sym->col_no, + NULL), clust_index, NULL); upd_field->exp = assign_node->val; diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 54183759e8d..4596e2fb951 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -363,9 +363,9 @@ row_log_online_op( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -479,9 +479,9 @@ row_log_table_close_func( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -2609,11 +2609,10 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); - if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" " for table %s\n", index->table_name); @@ -3436,8 +3435,8 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 5139adf4746..691b6b1e8de 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -876,8 +876,9 @@ row_merge_read( } #endif /* UNIV_DEBUG */ - success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, + success = os_file_read_no_error_handling_int_fd(fd, buf, ofs, srv_sort_buf_size); + #ifdef POSIX_FADV_DONTNEED /* Each block is read exactly once. Free up the file cache. */ posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); @@ -911,7 +912,7 @@ row_merge_write( DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE);); - ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, ofs, buf_len); + ret = os_file_write_int_fd("(merge)", fd, buf, ofs, buf_len); #ifdef UNIV_DEBUG if (row_merge_print_block_write) { @@ -3134,14 +3135,21 @@ row_merge_file_create_low( performance schema */ struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, innodb_file_temp_key, - PSI_FILE_OPEN, - "Innodb Merge Temp File", - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_name_locker)( + &state, innodb_file_temp_key, PSI_FILE_OPEN, + "Innodb Merge Temp File", &locker); + if (locker != NULL) { + PSI_FILE_CALL(start_file_open_wait)(locker, + __FILE__, + __LINE__); + } #endif fd = innobase_mysql_tmpfile(path); #ifdef UNIV_PFS_IO - register_pfs_file_open_end(locker, fd); + if (locker != NULL) { + PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)( + locker, fd); + } #endif if (fd < 0) { @@ -3188,15 +3196,20 @@ row_merge_file_destroy_low( #ifdef UNIV_PFS_IO struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_io_begin(&state, locker, - fd, 0, PSI_FILE_CLOSE, - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, fd, PSI_FILE_CLOSE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, 0, __FILE__, __LINE__); + } #endif if (fd >= 0) { close(fd); } #ifdef UNIV_PFS_IO - register_pfs_file_io_end(locker, 0); + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, 0); + } #endif } /*********************************************************************//** diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index d2821abdc2e..9f66d12283a 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -62,6 +62,9 @@ Created 12/19/1997 Heikki Tuuri #include "my_sys.h" /* DEBUG_SYNC_C */ #include "my_compare.h" /* enum icp_result */ +#include "thr_lock.h" +#include "handler.h" +#include "ha_innodb.h" /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2733,7 +2736,8 @@ row_sel_field_store_in_mysql_format_func( || !(templ->mysql_col_len % templ->mbmaxlen)); ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0)); + && field->prefix_len > 0) + || templ->rec_field_is_prefix); ut_ad(!(field->prefix_len % templ->mbmaxlen)); if (templ->mbminlen == 1 && templ->mbmaxlen != 1) { @@ -2768,34 +2772,43 @@ row_sel_field_store_in_mysql_format_func( #ifdef UNIV_DEBUG /** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,i,o,f,t) +# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \ + row_sel_store_mysql_field_func(m,p,r,i,o,f,t,c) #else /* UNIV_DEBUG */ /** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,o,f,t) +# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \ + row_sel_store_mysql_field_func(m,p,r,o,f,t,c) #endif /* UNIV_DEBUG */ -/**************************************************************//** -Convert a field in the Innobase format to a field in the MySQL format. */ +/** Convert a field in the Innobase format to a field in the MySQL format. +@param[out] mysql_rec record in the MySQL format +@param[in,out] prebuilt prebuilt struct +@param[in] rec InnoDB record; must be protected + by a page latch +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets() +@param[in] field_no templ->rec_field_no or + templ->clust_rec_field_no + or templ->icp_rec_field_no + or sec field no if clust_templ_for_sec + is TRUE +@param[in] templ row template +@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index + but prebuilt template is in clustered + index format and used only for end + range comparison. */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_field_func( -/*===========================*/ - byte* mysql_rec, /*!< out: record in the - MySQL format */ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - const rec_t* rec, /*!< in: InnoDB record; - must be protected by - a page latch */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, #ifdef UNIV_DEBUG - const dict_index_t* index, /*!< in: index of rec */ + const dict_index_t* index, #endif - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint field_no, /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ - const mysql_row_templ_t*templ) /*!< in: row template */ + const ulint* offsets, + ulint field_no, + const mysql_row_templ_t*templ, + bool clust_templ_for_sec) { const byte* data; ulint len; @@ -2804,10 +2817,12 @@ row_sel_store_mysql_field_func( ut_ad(templ); ut_ad(templ >= prebuilt->mysql_template); ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); - ut_ad(field_no == templ->clust_rec_field_no + ut_ad(clust_templ_for_sec + || field_no == templ->clust_rec_field_no || field_no == templ->rec_field_no || field_no == templ->icp_rec_field_no); - ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(rec_offs_validate(rec, + clust_templ_for_sec == true ? prebuilt->index : index, offsets)); if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { @@ -2924,30 +2939,37 @@ row_sel_store_mysql_field_func( return(TRUE); } -/**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. +/** Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. +@param[out] mysql_rec row in the MySQL format +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] rec_clust TRUE if the rec in the clustered index +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) +@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index + but the prebuilt->template is in + clustered index format and it is + used only for end range comparison @return TRUE on success, FALSE if not all columns could be retrieved */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: array returned by - rec_get_offsets(rec) */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + ibool rec_clust, + const dict_index_t* index, + const ulint* offsets, + bool clust_templ_for_sec) { - ulint i; + ulint i; + std::vector template_col; ut_ad(rec_clust || index == prebuilt->index); ut_ad(!rec_clust || dict_index_is_clust(index)); @@ -2960,20 +2982,48 @@ row_sel_store_mysql_rec( if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) mem_heap_empty(prebuilt->compress_heap); + if (clust_templ_for_sec) { + /* Store all clustered index field of + secondary index record. */ + for (i = 0; i < dict_index_get_n_fields( + prebuilt->index); i++) { + ulint sec_field = dict_index_get_nth_field_pos( + index, prebuilt->index, i); + template_col.push_back(sec_field); + } + } + for (i = 0; i < prebuilt->n_template; i++) { const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; - const ulint field_no + ulint field_no = rec_clust ? templ->clust_rec_field_no : templ->rec_field_no; /* We should never deliver column prefixes to MySQL, - except for evaluating innobase_index_cond(). */ + except for evaluating innobase_index_cond() and if the prefix + index is longer than the actual row data. */ + ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len - == 0); + == 0 || templ->rec_field_is_prefix); + + if (clust_templ_for_sec) { + std::vector::iterator it; + it = std::find(template_col.begin(), + template_col.end(), field_no); + + if (it == template_col.end()) { + continue; + } + + ut_ad(templ->rec_field_no == templ->clust_rec_field_no); + + field_no = it - template_col.begin(); + } if (!row_sel_store_mysql_field(mysql_rec, prebuilt, rec, index, offsets, - field_no, templ)) { + field_no, templ, + clust_templ_for_sec)) { return(FALSE); } } @@ -3063,6 +3113,8 @@ row_sel_get_clust_rec_for_mysql( dberr_t err; trx_t* trx; + os_atomic_increment_ulint(&srv_sec_rec_cluster_reads, 1); + *out_rec = NULL; trx = thr_get_trx(thr); @@ -3613,7 +3665,7 @@ row_search_idx_cond_check( if (!row_sel_store_mysql_field(mysql_rec, prebuilt, rec, prebuilt->index, offsets, templ->icp_rec_field_no, - templ)) { + templ, false)) { return(ICP_NO_MATCH); } } @@ -3634,7 +3686,7 @@ row_search_idx_cond_check( || dict_index_is_clust(prebuilt->index)) { if (!row_sel_store_mysql_rec( mysql_rec, prebuilt, rec, FALSE, - prebuilt->index, offsets)) { + prebuilt->index, offsets, false)) { ut_ad(dict_index_is_clust(prebuilt->index)); return(ICP_NO_MATCH); } @@ -3653,6 +3705,50 @@ row_search_idx_cond_check( return(result); } +/** Check the pushed down end range condition to avoid extra traversal +if records are not within view and also to avoid prefetching in the +cache buffer. +@param[in] mysql_rec record in MySQL format +@param[in,out] handler the MySQL handler performing the scan +@retval true if the row in mysql_rec is out of range +@retval false if the row in mysql_rec is in range */ +static +bool +row_search_end_range_check( + const byte* mysql_rec, + ha_innobase* handler) +{ + if (handler->end_range && + handler->compare_key_in_buffer(mysql_rec) > 0) { + return(true); + } + + return(false); +} + +/** Return the record field length in characters. +@param[in] col table column of the field +@param[in] field_no field number +@param[in] rec physical record +@param[in] offsets field offsets in the physical record + +@return field length in characters */ +static +size_t +rec_field_len_in_chars(const dict_col_t &col, + const ulint field_no, + const rec_t *rec, + const ulint *offsets) +{ + const ulint cset = dtype_get_charset_coll(col.prtype); + const CHARSET_INFO* cs = all_charsets[cset]; + ulint rec_field_len; + const char* rec_field = reinterpret_cast( + rec_get_nth_field( + rec, offsets, field_no, &rec_field_len)); + return(cs->cset->numchars(cs, rec_field, rec_field + rec_field_len)); +} + /********************************************************************//** Searches for rows in the database. This is used in the interface to MySQL. This function opens a cursor, and also implements fetch next @@ -3690,7 +3786,9 @@ row_search_for_mysql( trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; - const rec_t* rec; + const rec_t* prev_rec = NULL; + const rec_t* rec = NULL; + byte* end_range_cache = NULL; const rec_t* result_rec = NULL; const rec_t* clust_rec; dberr_t err = DB_SUCCESS; @@ -3715,6 +3813,8 @@ row_search_for_mysql( ulint* offsets = offsets_; ibool table_lock_waited = FALSE; byte* next_buf = 0; + ulint end_loop = 0; + bool use_clustered_index = false; rec_offs_init(offsets_); @@ -3838,6 +3938,10 @@ row_search_for_mysql( err = DB_SUCCESS; goto func_exit; + } else if (prebuilt->end_range == true) { + prebuilt->end_range = false; + err = DB_RECORD_NOT_FOUND; + goto func_exit; } if (prebuilt->fetch_cache_first > 0 @@ -3970,7 +4074,8 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec( buf, prebuilt, - rec, FALSE, index, offsets)) { + rec, FALSE, index, + offsets, false)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -4224,11 +4329,62 @@ rec_loop: and neither can a record lock be placed on it: we skip such a record. */ + prev_rec = NULL; goto next_rec; } if (page_rec_is_supremum(rec)) { + /** Compare the last record of the page with end range + passed to InnoDB when there is no ICP and number of loops + in row_search_for_mysql for rows found but not + reporting due to search views etc. */ + if (prev_rec != NULL + && prebuilt->mysql_handler->end_range != NULL + && prebuilt->idx_cond == NULL + && end_loop >= 100) { + + dict_index_t* key_index = prebuilt->index; + bool clust_templ_for_sec = false; + + if (end_range_cache == NULL) { + end_range_cache = static_cast( + ut_malloc(prebuilt->mysql_row_len)); + } + + if (index != clust_index + && prebuilt->need_to_access_clustered) { + /** Secondary index record but the template + based on PK. */ + key_index = clust_index; + clust_templ_for_sec = true; + } + + /** Create offsets based on prebuilt index. */ + offsets = rec_get_offsets(prev_rec, prebuilt->index, + offsets, ULINT_UNDEFINED, &heap); + + if (row_sel_store_mysql_rec( + end_range_cache, prebuilt, prev_rec, + clust_templ_for_sec, key_index, offsets, + clust_templ_for_sec)) { + + if (row_search_end_range_check( + end_range_cache, + prebuilt->mysql_handler)) { + + /** In case of prebuilt->fetch, + set the error in prebuilt->end_range. */ + if (prebuilt->n_fetch_cached > 0) { + prebuilt->end_range = true; + } + + err = DB_RECORD_NOT_FOUND; + goto normal_return; + } + } + } + if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level <= TRX_ISO_READ_COMMITTED) @@ -4260,6 +4416,7 @@ rec_loop: /* A page supremum record cannot be in the result set: skip it now that we have placed a possible lock on it */ + prev_rec = NULL; goto next_rec; } @@ -4334,6 +4491,7 @@ wrong_offs: btr_pcur_move_to_last_on_page(pcur, &mtr); + prev_rec = NULL; goto next_rec; } } @@ -4362,10 +4520,13 @@ wrong_offs: fputs(". We try to skip the record.\n", stderr); + prev_rec = NULL; goto next_rec; } } + prev_rec = rec; + /* Note that we cannot trust the up_match value in the cursor at this place because we can arrive here after moving the cursor! Thus we have to recompare rec and search_tuple to determine if they @@ -4590,6 +4751,7 @@ no_gap_lock: did_semi_consistent_read = TRUE; rec = old_vers; + prev_rec = rec; break; default: @@ -4636,6 +4798,7 @@ no_gap_lock: } rec = old_vers; + prev_rec = rec; } } else { /* We are looking into a non-clustered index, @@ -4729,10 +4892,97 @@ locks_ok: } /* Get the clustered index record if needed, if we did not do the - search using the clustered index. */ + search using the clustered index... */ - if (index != clust_index && prebuilt->need_to_access_clustered) { + use_clustered_index = + (index != clust_index && prebuilt->need_to_access_clustered); + if (use_clustered_index && prebuilt->n_template <= index->n_fields) { + /* ...but, perhaps avoid the clustered index lookup if + all of the following are true: + 1) all columns are in the secondary index + 2) all values for columns that are prefix-only + indexes are shorter than the prefix size + This optimization can avoid many IOs for certain schemas. + */ + bool row_contains_all_values = true; + unsigned int i; + for (i = 0; i < prebuilt->n_template; i++) { + /* Condition (1) from above: is the field in the + index (prefix or not)? */ + const mysql_row_templ_t* templ = + prebuilt->mysql_template + i; + ulint secondary_index_field_no = + templ->rec_prefix_field_no; + if (secondary_index_field_no == ULINT_UNDEFINED) { + row_contains_all_values = false; + break; + } + /* Condition (2) from above: if this is a + prefix, is this row's value size shorter + than the prefix? */ + if (templ->rec_field_is_prefix) { + ulint record_size = rec_offs_nth_size( + offsets, + secondary_index_field_no); + const dict_field_t *field = + dict_index_get_nth_field( + index, + secondary_index_field_no); + ut_a(field->prefix_len > 0); + if (record_size + < field->prefix_len / templ->mbmaxlen) { + + /* Record in bytes shorter than the + index prefix length in characters */ + continue; + + } else if (record_size * templ->mbminlen + >= field->prefix_len) { + + /* The shortest represantable string by + the byte length of the record is longer + than the maximum possible index + prefix. */ + row_contains_all_values = false; + break; + } else { + + /* The record could or could not fit + into the index prefix, calculate length + to find out */ + + if (rec_field_len_in_chars( + *field->col, + secondary_index_field_no, + rec, offsets) + >= (field->prefix_len + / templ->mbmaxlen)) { + + row_contains_all_values = false; + break; + } + } + } + } + /* If (1) and (2) were true for all columns above, use + rec_prefix_field_no instead of rec_field_no, and skip + the clustered lookup below. */ + if (row_contains_all_values) { + for (i = 0; i < prebuilt->n_template; i++) { + mysql_row_templ_t* templ = + prebuilt->mysql_template + i; + templ->rec_field_no = + templ->rec_prefix_field_no; + ut_a(templ->rec_field_no != ULINT_UNDEFINED); + } + use_clustered_index = false; + os_atomic_increment_ulint( + &srv_sec_rec_cluster_reads_avoided, 1); + } + } + + if (use_clustered_index) { requires_clust_rec: ut_ad(index != clust_index); /* We use a 'goto' to the preceding label if a consistent @@ -4813,7 +5063,7 @@ requires_clust_rec: appropriate version of the clustered index record. */ if (!row_sel_store_mysql_rec( buf, prebuilt, result_rec, - TRUE, clust_index, offsets)) { + TRUE, clust_index, offsets, false)) { goto next_rec; } } @@ -4881,7 +5131,7 @@ requires_clust_rec: next_buf, prebuilt, result_rec, result_rec != rec, result_rec != rec ? clust_index : index, - offsets)) { + offsets, false)) { if (next_buf == buf) { ut_a(prebuilt->n_fetch_cached == 0); @@ -4936,7 +5186,7 @@ requires_clust_rec: buf, prebuilt, result_rec, result_rec != rec, result_rec != rec ? clust_index : index, - offsets)) { + offsets, false)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do @@ -4988,6 +5238,8 @@ idx_cond_failed: goto normal_return; next_rec: + end_loop++; + /* Reset the old and new "did semi-consistent read" flags. */ if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { @@ -5174,6 +5426,11 @@ normal_return: func_exit: trx->op_info = ""; + + if (end_range_cache != NULL) { + ut_free(end_range_cache); + } + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 24c5a9bd5d9..2153440886c 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -155,7 +155,8 @@ UNIV_INTERN unsigned long long srv_online_max_size; OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ -UNIV_INTERN my_bool srv_use_native_aio = TRUE; +/* make srv_use_native_aio to be visible for other plugins */ +my_bool srv_use_native_aio = TRUE; UNIV_INTERN my_bool srv_numa_interleave = FALSE; #ifdef __WIN__ @@ -574,6 +575,12 @@ static ulint srv_main_shutdown_loops = 0; /** Log writes involving flush. */ static ulint srv_log_writes_and_flush = 0; +/** Number of times secondary index lookup triggered cluster lookup */ +ulint srv_sec_rec_cluster_reads = 0; + +/** Number of times prefix optimization avoided triggering cluster lookup */ +ulint srv_sec_rec_cluster_reads_avoided = 0; + /* This is only ever touched by the master thread. It records the time when the last flush of log file has happened. The master thread ensures that we flush the log files at least once per @@ -1895,6 +1902,12 @@ srv_export_innodb_status(void) } #endif /* UNIV_DEBUG */ + os_rmb; + export_vars.innodb_sec_rec_cluster_reads = + srv_sec_rec_cluster_reads; + export_vars.innodb_sec_rec_cluster_reads_avoided = + srv_sec_rec_cluster_reads_avoided; + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2956,6 +2969,8 @@ DECLARE_THREAD(srv_master_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint old_activity_count = srv_get_activity_count(); ulint old_ibuf_merge_activity_count @@ -3029,6 +3044,7 @@ suspend_thread: os_event_wait(slot->event); if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + my_thread_end(); os_thread_exit(NULL); } @@ -3115,6 +3131,8 @@ DECLARE_THREAD(srv_worker_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1); @@ -3181,6 +3199,7 @@ DECLARE_THREAD(srv_worker_thread)( os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3383,6 +3402,8 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint n_total_purged = ULINT_UNDEFINED; @@ -3495,6 +3516,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1); } + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3563,3 +3585,18 @@ srv_purge_wakeup(void) } } +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id) +{ + if (srv_undo_space_id_start == 0) { + return (false); + } + + return(space_id >= srv_undo_space_id_start + && space_id < (srv_undo_space_id_start + + srv_undo_tablespaces_open)); +} diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index c53faba88ba..ac634df06dd 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. @@ -110,6 +110,9 @@ UNIV_INTERN ibool srv_have_fullfsync = FALSE; /** TRUE if a raw partition is in use */ UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE; +/** UNDO tablespaces starts with space id. */ +ulint srv_undo_space_id_start; + /** TRUE if the server is being started, before rolling back any incomplete transactions */ UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE; @@ -125,7 +128,7 @@ SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; /** Files comprising the system tablespace */ -static os_file_t files[1000]; +static pfs_os_file_t files[1000]; /** io_handler_thread parameters for thread identification */ static ulint n[SRV_MAX_N_IO_THREADS]; @@ -553,7 +556,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t create_log_file( /*============*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name) /*!< in: log file name */ { ibool ret; @@ -770,7 +773,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t open_log_file( /*==========*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name, /*!< in: log file name */ os_offset_t* size) /*!< out: file size */ { @@ -886,7 +889,7 @@ open_or_create_data_files( && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS #ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have + /* AIX 5.1 after security patch ML7 may have errno set to 0 here, which causes our function to return 100; work around that AIX problem */ @@ -1183,7 +1186,7 @@ srv_undo_tablespace_create( const char* name, /*!< in: tablespace name */ ulint size) /*!< in: tablespace size in pages */ { - os_file_t fh; + pfs_os_file_t fh; ibool ret; dberr_t err = DB_SUCCESS; @@ -1260,7 +1263,7 @@ srv_undo_tablespace_open( const char* name, /*!< in: tablespace name */ ulint space) /*!< in: tablespace id */ { - os_file_t fh; + pfs_os_file_t fh; dberr_t err = DB_ERROR; ibool ret; ulint flags; @@ -1359,13 +1362,23 @@ srv_undo_tablespaces_init( for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) { char name[OS_FILE_MAX_PATH]; + ulint space_id = i + 1; + + DBUG_EXECUTE_IF("innodb_undo_upgrade", + space_id = i + 3;); ut_snprintf( name, sizeof(name), "%s%cundo%03lu", - srv_undo_dir, SRV_PATH_SEPARATOR, i + 1); + srv_undo_dir, SRV_PATH_SEPARATOR, space_id); + + if (i == 0) { + srv_undo_space_id_start = space_id; + prev_space_id = srv_undo_space_id_start - 1; + } + + undo_tablespace_ids[i] = space_id; - /* Undo space ids start from 1. */ err = srv_undo_tablespace_create( name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); @@ -1387,14 +1400,16 @@ srv_undo_tablespaces_init( if (!create_new_db) { n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( undo_tablespace_ids); + + if (n_undo_tablespaces != 0) { + srv_undo_space_id_start = undo_tablespace_ids[0]; + prev_space_id = srv_undo_space_id_start - 1; + } + } else { n_undo_tablespaces = n_conf_tablespaces; - for (i = 1; i <= n_undo_tablespaces; ++i) { - undo_tablespace_ids[i - 1] = i; - } - - undo_tablespace_ids[i] = ULINT_UNDEFINED; + undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED; } /* Open all the undo tablespaces that are currently in use. If we @@ -1418,8 +1433,6 @@ srv_undo_tablespaces_init( ut_a(undo_tablespace_ids[i] != 0); ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); - /* Undo space ids start from 1. */ - err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); if (err != DB_SUCCESS) { @@ -1454,11 +1467,23 @@ srv_undo_tablespaces_init( break; } + /** Note the first undo tablespace id in case of + no active undo tablespace. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = i; + } + ++n_undo_tablespaces; ++*n_opened; } + /** Explictly specify the srv_undo_space_id_start + as zero when there are no undo tablespaces. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = 0; + } + /* If the user says that there are fewer than what we find we tolerate that discrepancy but not the inverse. Because there could be unused undo tablespaces for future use. */ @@ -1503,10 +1528,11 @@ srv_undo_tablespaces_init( mtr_start(&mtr); /* The undo log tablespace */ - for (i = 1; i <= n_undo_tablespaces; ++i) { + for (i = 0; i < n_undo_tablespaces; ++i) { fsp_header_init( - i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + undo_tablespace_ids[i], + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); } mtr_commit(&mtr); @@ -1604,6 +1630,10 @@ innobase_start_or_create_for_mysql(void) char* logfile0 = NULL; size_t dirnamelen; + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { + srv_read_only_mode = 1; + } + high_level_read_only = srv_read_only_mode || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; diff --git a/storage/xtradb/trx/trx0purge.cc b/storage/xtradb/trx/trx0purge.cc index d9e40c5d6f5..d0d7ef2432e 100644 --- a/storage/xtradb/trx/trx0purge.cc +++ b/storage/xtradb/trx/trx0purge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -696,7 +696,8 @@ trx_purge_get_rseg_with_min_trx_id( /* We assume in purge of externally stored fields that space id is in the range of UNDO tablespace space ids */ - ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open); + ut_a(purge_sys->rseg->space == 0 + || srv_is_undo_tablespace(purge_sys->rseg->space)); zip_size = purge_sys->rseg->zip_size; diff --git a/storage/xtradb/trx/trx0rec.cc b/storage/xtradb/trx/trx0rec.cc index 868a8a6c0b6..200ce015a7d 100644 --- a/storage/xtradb/trx/trx0rec.cc +++ b/storage/xtradb/trx/trx0rec.cc @@ -781,7 +781,8 @@ trx_undo_page_report_modify( } pos = dict_index_get_nth_col_pos(index, - col_no); + col_no, + NULL); ptr += mach_write_compressed(ptr, pos); /* Save the old value of field */ diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc index e1e253cbb76..09e8e018c4f 100644 --- a/storage/xtradb/trx/trx0roll.cc +++ b/storage/xtradb/trx/trx0roll.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -800,6 +800,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -810,6 +811,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( trx_rollback_or_clean_is_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc index ccec5a7c113..4e8c50784bb 100644 --- a/storage/xtradb/trx/trx0sys.cc +++ b/storage/xtradb/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -918,18 +918,12 @@ trx_sys_create_rsegs( ulint new_rsegs = n_rsegs - n_used; for (i = 0; i < new_rsegs; ++i) { - ulint space; + ulint space_id; + space_id = (n_spaces == 0) ? 0 + : (srv_undo_space_id_start + i % n_spaces); - /* Tablespace 0 is the system tablespace. All UNDO - log tablespaces start from 1. */ - - if (n_spaces > 0) { - space = (i % n_spaces) + 1; - } else { - space = 0; /* System tablespace */ - } - - if (trx_rseg_create(space) != NULL) { + /* Tablespace 0 is the system tablespace. */ + if (trx_rseg_create(space_id) != NULL) { ++n_used; } else { break;