diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc index acab501e7cd..7a9b8556c1a 100644 --- a/storage/innobase/dict/dict0boot.cc +++ b/storage/innobase/dict/dict0boot.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. +Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -64,52 +64,14 @@ dict_hdr_get_new_id( (not assigned if NULL) */ index_id_t* index_id, /*!< out: index id (not assigned if NULL) */ - ulint* space_id, /*!< out: space id + ulint* space_id) /*!< out: space id (not assigned if NULL) */ - const dict_table_t* table, /*!< in: table */ - bool disable_redo) /*!< in: if true and table - object is NULL - then disable-redo */ { dict_hdr_t* dict_hdr; ib_id_t id; mtr_t mtr; mtr_start(&mtr); - if (table) { - if (table->is_temporary()) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } - } else if (disable_redo) { - /* In non-read-only mode we need to ensure that space-id header - page is written to disk else if page is removed from buffer - cache and re-loaded it would assign temporary tablespace id - to another tablespace. - This is not a case with read-only mode as there is no new object - that is created except temporary tablespace. */ - mtr.set_log_mode(srv_read_only_mode - ? MTR_LOG_NONE : MTR_LOG_NO_REDO); - } - - /* Server started and let's say space-id = x - - table created with file-per-table - - space-id = x + 1 - - crash - Case 1: If it was redo logged then we know that it will be - restored to x + 1 - Case 2: if not redo-logged - Header will have the old space-id = x - This is OK because on restart there is no object with - space id = x + 1 - Case 3: - space-id = x (on start) - space-id = x+1 (temp-table allocation) - no redo logging - space-id = x+2 (non-temp-table allocation), this get's - redo logged. - If there is a crash there will be only 2 entries - x (original) and x+2 (new) and disk hdr will be updated - to reflect x + 2 entry. - We cannot allocate the same space id to different objects. */ dict_hdr = dict_hdr_get(&mtr); if (table_id) { diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 6606d732808..b1ddb7032ab 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -352,10 +352,12 @@ dict_build_table_def_step( { ut_ad(mutex_own(&dict_sys->mutex)); dict_table_t* table = node->table; + trx_t* trx = thr_get_trx(thr); ut_ad(!table->is_temporary()); ut_ad(!table->space); ut_ad(table->space_id == ULINT_UNDEFINED); - dict_table_assign_new_id(table, thr_get_trx(thr)); + dict_hdr_get_new_id(&table->id, NULL, NULL); + trx->table_id = table->id; /* Always set this bit for all new created tables */ DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); @@ -368,8 +370,6 @@ dict_build_table_def_step( ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0 || dict_table_has_atomic_blobs(table)); - trx_t* trx = thr_get_trx(thr); - ut_ad(trx->table_id); mtr_t mtr; trx_undo_t* undo = trx->rsegs.m_redo.undo; if (undo && !undo->table_id @@ -397,7 +397,7 @@ dict_build_table_def_step( } /* Get a new tablespace ID */ ulint space_id; - dict_hdr_get_new_id(NULL, NULL, &space_id, table, false); + dict_hdr_get_new_id(NULL, NULL, &space_id); DBUG_EXECUTE_IF( "ib_create_table_fail_out_of_space_ids", @@ -745,7 +745,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - dict_hdr_get_new_id(NULL, &index->id, NULL, table, false); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -785,7 +785,7 @@ dict_build_index_def( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - dict_hdr_get_new_id(NULL, &index->id, NULL, table, false); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Note that the index was created by this transaction. */ index->trx_id = trx->id; @@ -2376,15 +2376,3 @@ dict_delete_tablespace_and_datafiles( return(err); } - -/** Assign a new table ID and put it into the table cache and the transaction. -@param[in,out] table Table that needs an ID -@param[in,out] trx Transaction */ -void -dict_table_assign_new_id( - dict_table_t* table, - trx_t* trx) -{ - dict_hdr_get_new_id(&table->id, NULL, NULL, table, false); - trx->table_id = table->id; -} diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 19a4aec4df4..0d3d40ff46f 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -406,6 +406,27 @@ dict_table_stats_unlock( } } + +/** Open a persistent table. +@param[in] table_id persistent table identifier +@param[in] ignore_err errors to ignore +@param[in] cached_only whether to skip loading +@return persistent table +@retval NULL if not found */ +static dict_table_t* dict_table_open_on_id_low( + table_id_t table_id, + dict_err_ignore_t ignore_err, + bool cached_only) +{ + dict_table_t* table = dict_sys->get_table(table_id); + + if (!table && !cached_only) { + table = dict_load_table_on_id(table_id, ignore_err); + } + + return table; +} + /**********************************************************************//** Try to drop any indexes after an aborted index creation. This can also be after a server kill during DROP INDEX. */ @@ -1084,20 +1105,19 @@ dict_init(void) dict_operation_lock = static_cast( ut_zalloc_nokey(sizeof(*dict_operation_lock))); - dict_sys = static_cast(ut_zalloc_nokey(sizeof(*dict_sys))); + dict_sys = new (ut_zalloc_nokey(sizeof(*dict_sys))) dict_sys_t(); UT_LIST_INIT(dict_sys->table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(dict_sys->table_non_LRU, &dict_table_t::table_LRU); mutex_create(LATCH_ID_DICT_SYS, &dict_sys->mutex); - dict_sys->table_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + const ulint hash_size = buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); - dict_sys->table_id_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + dict_sys->table_hash = hash_create(hash_size); + dict_sys->table_id_hash = hash_create(hash_size); + dict_sys->temp_id_hash = hash_create(hash_size); rw_lock_create(dict_operation_lock_key, dict_operation_lock, SYNC_DICT_OPERATION); @@ -1257,8 +1277,7 @@ dict_table_add_system_columns( } /** Add the table definition to the data dictionary cache */ -void -dict_table_t::add_to_cache() +void dict_table_t::add_to_cache() { ut_ad(dict_lru_validate()); ut_ad(mutex_own(&dict_sys->mutex)); @@ -1266,7 +1285,6 @@ dict_table_t::add_to_cache() cached = TRUE; ulint fold = ut_fold_string(name.m_name); - ulint id_fold = ut_fold_ull(id); /* Look for a table with the same name: error if such exists */ { @@ -1284,32 +1302,31 @@ dict_table_t::add_to_cache() ut_ad(table2 == NULL); #endif /* UNIV_DEBUG */ } + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + this); /* Look for a table with the same id: error if such exists */ + hash_table_t* id_hash = is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + const ulint id_fold = ut_fold_ull(id); { dict_table_t* table2; - HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, + HASH_SEARCH(id_hash, id_hash, id_fold, dict_table_t*, table2, ut_ad(table2->cached), table2->id == id); ut_a(table2 == NULL); #ifdef UNIV_DEBUG /* Look for the same table pointer with a different id */ - HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash, + HASH_SEARCH_ALL(id_hash, id_hash, dict_table_t*, table2, ut_ad(table2->cached), table2 == this); ut_ad(table2 == NULL); #endif /* UNIV_DEBUG */ + + HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, this); } - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - this); - - /* Add table to hash table of tables based on table id */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, - this); - if (can_be_evicted) { UT_LIST_ADD_FIRST(dict_sys->table_LRU, this); } else { @@ -1955,6 +1972,7 @@ dict_table_change_id_in_cache( ut_ad(table); ut_ad(mutex_own(&dict_sys->mutex)); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!table->is_temporary()); /* Remove the table from the hash table of id's */ @@ -2012,8 +2030,10 @@ void dict_table_remove_from_cache(dict_table_t* table, bool lru, bool keep) HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(table->name.m_name), table); - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_ull(table->id), table); + hash_table_t* id_hash = table->is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + const ulint id_fold = ut_fold_ull(table->id); + HASH_DELETE(dict_table_t, id_hash, id_hash, id_fold, table); /* Remove table from LRU or non-LRU list. */ if (table->can_be_evicted) { @@ -6535,17 +6555,17 @@ dict_resize() /* all table entries are in table_LRU and table_non_LRU lists */ hash_table_free(dict_sys->table_hash); hash_table_free(dict_sys->table_id_hash); + hash_table_free(dict_sys->temp_id_hash); - dict_sys->table_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); - - dict_sys->table_id_hash = hash_create( - buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE)); + const ulint hash_size = buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); + dict_sys->table_hash = hash_create(hash_size); + dict_sys->table_id_hash = hash_create(hash_size); + dict_sys->temp_id_hash = hash_create(hash_size); for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table; table = UT_LIST_GET_NEXT(table_LRU, table)) { + ut_ad(!table->is_temporary()); ulint fold = ut_fold_string(table->name.m_name); ulint id_fold = ut_fold_ull(table->id); @@ -6564,8 +6584,10 @@ dict_resize() HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, - id_fold, table); + hash_table_t* id_hash = table->is_temporary() + ? dict_sys->temp_id_hash : dict_sys->table_id_hash; + + HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table); } mutex_exit(&dict_sys->mutex); @@ -6588,7 +6610,7 @@ dict_close(void) /* Free the hash elements. We don't remove them from the table because we are going to destroy the table anyway. */ - for (ulint i = 0; i < hash_get_n_cells(dict_sys->table_id_hash); i++) { + for (ulint i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) { dict_table_t* table; table = static_cast( @@ -6609,6 +6631,7 @@ dict_close(void) /* The elements are the same instance as in dict_sys->table_hash, therefore we don't delete the individual elements. */ hash_table_free(dict_sys->table_id_hash); + hash_table_free(dict_sys->temp_id_hash); mutex_exit(&dict_sys->mutex); mutex_free(&dict_sys->mutex); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index cc2cd20d1b6..3e86ae267c2 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -31,13 +31,13 @@ Created 10/25/1995 Heikki Tuuri #include "buf0buf.h" #include "dict0boot.h" #include "dict0dict.h" +#include "dict0load.h" #include "fsp0file.h" #include "fsp0fsp.h" #include "hash0hash.h" #include "log0log.h" #include "log0recv.h" #include "mach0data.h" -#include "mem0mem.h" #include "mtr0log.h" #include "os0file.h" #include "page0zip.h" diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 0effdc35fb2..9895071d1fd 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -11030,9 +11030,8 @@ err_col: dict_table_add_system_columns(table, heap); if (table->is_temporary()) { - /* Get a new table ID. FIXME: Make this a private - sequence, not shared with persistent tables! */ - dict_table_assign_new_id(table, m_trx); + m_trx->table_id = table->id + = dict_sys->get_temporary_table_id(); ut_ad(dict_tf_get_rec_format(table->flags) != REC_FORMAT_COMPRESSED); table->space_id = SRV_TMP_SPACE_ID; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 8da03522d02..a7ed09f6f38 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -5228,9 +5228,10 @@ inline dberr_t dict_table_t::reassign_id(trx_t* trx) { DBUG_ASSERT(instant); ut_ad(magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!is_temporary()); table_id_t new_id; - dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, false); + dict_hdr_get_new_id(&new_id, NULL, NULL); pars_info_t* pinfo = pars_info_create(); pars_info_add_ull_literal(pinfo, "old", id); diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index 25aced44b2e..473a45717a9 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -26,8 +27,6 @@ Created 4/18/1996 Heikki Tuuri #ifndef dict0boot_h #define dict0boot_h -#include "univ.i" - #include "mtr0mtr.h" #include "mtr0log.h" #include "ut0byte.h" @@ -53,12 +52,8 @@ dict_hdr_get_new_id( (not assigned if NULL) */ index_id_t* index_id, /*!< out: index id (not assigned if NULL) */ - ulint* space_id, /*!< out: space id + ulint* space_id); /*!< out: space id (not assigned if NULL) */ - const dict_table_t* table, /*!< in: table */ - bool disable_redo); /*!< in: if true and table - object is NULL - then disable-redo */ /**********************************************************************//** Writes the current value of the row id counter to the dictionary header file page. */ @@ -127,13 +122,6 @@ dict_is_sys_table( /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID 5 -#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start - from this number, except for basic - system tables and their above defined - indexes; ibuf tables and indexes are - assigned as the id the number - DICT_IBUF_ID_MIN plus the space id */ - /* The offset of the dictionary header on the page */ #define DICT_HDR FSEG_PAGE_DATA diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h index 7106d1ba85e..243ff8a345c 100644 --- a/storage/innobase/include/dict0crea.h +++ b/storage/innobase/include/dict0crea.h @@ -69,14 +69,6 @@ dict_create_table_step( /*===================*/ que_thr_t* thr); /*!< in: query thread */ -/** Assign a new table ID and put it into the table cache and the transaction. -@param[in,out] table Table that needs an ID -@param[in,out] trx Transaction */ -void -dict_table_assign_new_id( - dict_table_t* table, - trx_t* trx); - /***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 5e5e8c2ae65..3f133110b4e 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -28,7 +28,6 @@ Created 1/8/1996 Heikki Tuuri #ifndef dict0dict_h #define dict0dict_h -#include "univ.i" #include "data0data.h" #include "data0type.h" #include "dict0mem.h" @@ -42,14 +41,16 @@ Created 1/8/1996 Heikki Tuuri #include "ut0byte.h" #include "ut0mem.h" #include "ut0rnd.h" -#include #include "fsp0fsp.h" -#include "dict0pagecompress.h" +#include "sync0rw.h" +#include extern bool innodb_table_stats_not_found; extern bool innodb_index_stats_not_found; -#include "sync0rw.h" +/** the first table or index ID for other than hard-coded system tables */ +#define DICT_HDR_FIRST_ID 10 + /********************************************************************//** Get the database name length in a table name. @return database name length */ @@ -1572,8 +1573,10 @@ struct dict_sys_t{ the log records */ hash_table_t* table_hash; /*!< hash table of the tables, based on name */ - hash_table_t* table_id_hash; /*!< hash table of the tables, based - on id */ + /** hash table of persistent table IDs */ + hash_table_t* table_id_hash; + /** hash table of temporary table IDs */ + hash_table_t* temp_id_hash; dict_table_t* sys_tables; /*!< SYS_TABLES table */ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ @@ -1587,6 +1590,52 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + + /** @return a new temporary table ID */ + table_id_t get_temporary_table_id() { + return temp_table_id.fetch_add(1, std::memory_order_relaxed); + } + + /** Look up a temporary table. + @param id temporary table ID + @return temporary table + @retval NULL if the table does not exist + (should only happen during the rollback of CREATE...SELECT) */ + dict_table_t* get_temporary_table(table_id_t id) + { + ut_ad(mutex_own(&mutex)); + dict_table_t* table; + ulint fold = ut_fold_ull(id); + HASH_SEARCH(id_hash, temp_id_hash, fold, dict_table_t*, table, + ut_ad(table->cached), table->id == id); + if (UNIV_LIKELY(table != NULL)) { + DBUG_ASSERT(table->is_temporary()); + DBUG_ASSERT(table->id >= DICT_HDR_FIRST_ID); + table->acquire(); + } + return table; + } + + /** Look up a persistent table. + @param id table ID + @return table + @retval NULL if not cached */ + dict_table_t* get_table(table_id_t id) + { + ut_ad(mutex_own(&mutex)); + dict_table_t* table; + ulint fold = ut_fold_ull(id); + HASH_SEARCH(id_hash, table_id_hash, fold, dict_table_t*, table, + ut_ad(table->cached), table->id == id); + DBUG_ASSERT(!table || !table->is_temporary()); + return table; + } + + dict_sys_t() : temp_table_id(DICT_HDR_FIRST_ID) {} + +private: + /** the sequence of temporary table IDs */ + std::atomic temp_table_id; }; /** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */ diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index a6523c9560a..fc03c446415 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -24,13 +24,9 @@ Data dictionary system Created 1/8/1996 Heikki Tuuri ***********************************************************************/ -#include "data0type.h" -#include "dict0load.h" -#include "rem0types.h" -#include "fsp0fsp.h" #include "srv0srv.h" -#include "sync0rw.h" #include "fsp0sysspace.h" +#include "dict0pagecompress.h" /*********************************************************************//** Gets the minimum number of bytes per character. diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h index 593c27125a2..f08a56a9411 100644 --- a/storage/innobase/include/dict0priv.h +++ b/storage/innobase/include/dict0priv.h @@ -47,18 +47,6 @@ dict_table_check_if_in_cache_low( /*=============================*/ const char* table_name); /*!< in: table name */ -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*=====================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache); - #include "dict0priv.ic" #endif /* dict0priv.h */ diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic index fb7af2772fc..6d7fbf07394 100644 --- a/storage/innobase/include/dict0priv.ic +++ b/storage/innobase/include/dict0priv.ic @@ -25,7 +25,6 @@ Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains #include "dict0dict.h" #include "dict0load.h" -#include "dict0priv.h" /**********************************************************************//** Gets a table; loads it to the dictionary cache if necessary. A low-level @@ -63,40 +62,6 @@ dict_table_get_low( return(table); } -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*======================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache) -{ - dict_table_t* table; - ulint fold; - - ut_ad(mutex_own(&dict_sys->mutex)); - - /* Look for the table name in the hash table */ - fold = ut_fold_ull(table_id); - - HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, - dict_table_t*, table, ut_ad(table->cached), - table->id == table_id); - if (table == NULL && !open_only_if_in_cache) { - table = dict_load_table_on_id(table_id, ignore_err); - } - - ut_ad(!table || table->cached); - - /* TODO: should get the type information from MySQL */ - - return(table); -} - /**********************************************************************//** Checks if a table is in the dictionary cache. @return table, NULL if not found */ diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic index 58ac3b73be5..f86643ddd62 100644 --- a/storage/innobase/include/rem0rec.ic +++ b/storage/innobase/include/rem0rec.ic @@ -1389,6 +1389,7 @@ rec_get_converted_size( } else if (index->table->id == DICT_INDEXES_ID) { /* The column SYS_INDEXES.MERGE_THRESHOLD was instantly added in MariaDB 10.2.2 (MySQL 5.7). */ + ut_ad(!index->table->is_temporary()); ut_ad(index->n_fields == DICT_NUM_FIELDS__SYS_INDEXES); ut_ad(dtuple->n_fields == DICT_NUM_FIELDS__SYS_INDEXES || dtuple->n_fields diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h index f7cec643b33..2458f6c13b6 100644 --- a/storage/innobase/include/row0undo.h +++ b/storage/innobase/include/row0undo.h @@ -86,17 +86,20 @@ that index record. */ enum undo_exec { UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next undo log record */ - UNDO_NODE_INSERT, /*!< undo a fresh insert of a - row to a table */ - UNDO_NODE_MODIFY /*!< undo a modify operation - (DELETE or UPDATE) on a row - of a table */ + /** rollback an insert into persistent table */ + UNDO_INSERT_PERSISTENT, + /** rollback an update (or delete) in a persistent table */ + UNDO_UPDATE_PERSISTENT, + /** rollback an insert into temporary table */ + UNDO_INSERT_TEMPORARY, + /** rollback an update (or delete) in a temporary table */ + UNDO_UPDATE_TEMPORARY, }; /** Undo node structure */ struct undo_node_t{ que_common_t common; /*!< node type: QUE_NODE_UNDO */ - enum undo_exec state; /*!< node execution state */ + undo_exec state; /*!< rollback execution state */ trx_t* trx; /*!< trx for which undo is done */ roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */ trx_undo_rec_t* undo_rec;/*!< undo log record */ diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h index af5ed73f04b..751cc1d7d0c 100644 --- a/storage/innobase/include/trx0roll.h +++ b/storage/innobase/include/trx0roll.h @@ -53,16 +53,6 @@ trx_savept_take( /*============*/ trx_t* trx); /*!< in: transaction */ -/** Get the last undo log record of a transaction (for rollback). -@param[in,out] trx transaction -@param[out] roll_ptr DB_ROLL_PTR to the undo record -@param[in,out] heap memory heap for allocation -@return undo log record copied to heap -@retval NULL if none left or the roll_limit (savepoint) was reached */ -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - /** Report progress when rolling back a row of a recovered transaction. */ void trx_roll_report_progress(); /*******************************************************************//** diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index ec197948a69..f3c52fff7b5 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -180,9 +180,7 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) @param[in,out] undo undo log @param[in] limit all undo logs after this limit will be discarded @param[in] is_temp whether this is temporary undo log */ -void -trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp) - MY_ATTRIBUTE((nonnull)); +void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp); /** Truncate the head of an undo log. NOTE that only whole pages are freed; the header page is not diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 5f706a5aaaf..062892a8c88 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4253,6 +4253,7 @@ lock_check_dict_lock( const lock_t* lock) /*!< in: lock to check */ { if (lock_get_type_low(lock) == LOCK_REC) { + ut_ad(!lock->index->table->is_temporary()); /* Check if the transcation locked a record in a system table in X mode. It should have set @@ -4266,9 +4267,8 @@ lock_check_dict_lock( } else { ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - const dict_table_t* table; - - table = lock->un_member.tab_lock.table; + const dict_table_t* table = lock->un_member.tab_lock.table; + ut_ad(!table->is_temporary()); /* Check if the transcation locked a system table in IX mode. It should have set the dict_op code @@ -6127,10 +6127,8 @@ lock_get_table_id( /*==============*/ const lock_t* lock) /*!< in: lock */ { - dict_table_t* table; - - table = lock_get_table(lock); - + dict_table_t* table = lock_get_table(lock); + ut_ad(!table->is_temporary()); return(table->id); } diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 4163a3a5166..1c8012f9b73 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -28,6 +28,7 @@ Created 2012-02-08 by Sunny Bains. #include "btr0pcur.h" #include "que0que.h" #include "dict0boot.h" +#include "dict0load.h" #include "ibuf0ibuf.h" #include "pars0pars.h" #include "row0sel.h" diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 01e7b9d45c2..1fe1fad7858 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2948,7 +2948,7 @@ row_mysql_table_id_reassign( dberr_t err; pars_info_t* info = pars_info_create(); - dict_hdr_get_new_id(new_id, NULL, NULL, table, false); + dict_hdr_get_new_id(new_id, NULL, NULL); pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", *new_id); diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 3c8e77b2023..3c03f8277ae 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -725,6 +725,7 @@ row_rec_to_index_entry_impl( ut_ad(heap != NULL); ut_ad(index != NULL); ut_ad(!mblob || index->is_primary()); + ut_ad(!mblob || !index->table->is_temporary()); ut_ad(!mblob || !dict_index_is_spatial(index)); compile_time_assert(!mblob || metadata); compile_time_assert(mblob <= 2); @@ -759,7 +760,8 @@ row_rec_to_index_entry_impl( || rec_len == dict_index_get_n_fields(index) + uint(mblob == 1) /* a record for older SYS_INDEXES table (missing merge_threshold column) is acceptable. */ - || (index->table->id == DICT_INDEXES_ID + || (!index->table->is_temporary() + && index->table->id == DICT_INDEXES_ID && rec_len == dict_index_get_n_fields(index) - 1)); ulint i; diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 3a1690782ff..eedf73204a3 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -80,8 +80,19 @@ row_undo_ins_remove_clust_rec( if (index->table->is_temporary()) { ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); mtr.set_log_mode(MTR_LOG_NO_REDO); + ut_ad(!dict_index_is_online_ddl(index)); + ut_ad(index->table->id >= DICT_HDR_FIRST_ID); + online = false; } else { index->set_modified(mtr); + online = dict_index_is_online_ddl(index); + if (online) { + ut_ad(node->trx->dict_operation_lock_mode + != RW_X_LATCH); + ut_ad(node->table->id != DICT_INDEXES_ID); + ut_ad(node->table->id != DICT_COLUMNS_ID); + mtr_s_lock(dict_index_get_lock(index), &mtr); + } } /* This is similar to row_undo_mod_clust(). The DDL thread may @@ -90,14 +101,6 @@ row_undo_ins_remove_clust_rec( purged. However, we can log the removal out of sync with the B-tree modification. */ - online = dict_index_is_online_ddl(index); - if (online) { - ut_ad(node->trx->dict_operation_lock_mode - != RW_X_LATCH); - ut_ad(node->table->id != DICT_INDEXES_ID); - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - success = btr_pcur_restore_position( online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED @@ -119,47 +122,47 @@ row_undo_ins_remove_clust_rec( rec, index, NULL, true, ULINT_UNDEFINED, &heap); row_log_table_delete(rec, index, offsets, NULL); mem_heap_free(heap); - } + } else { + switch (node->table->id) { + case DICT_INDEXES_ID: + ut_ad(!online); + ut_ad(node->trx->dict_operation_lock_mode + == RW_X_LATCH); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - switch (node->table->id) { - case DICT_INDEXES_ID: - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + dict_drop_index_tree(btr_pcur_get_rec(&node->pcur), + &node->pcur, &mtr); + mtr.commit(); - dict_drop_index_tree( - btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr); - - mtr.commit(); - - mtr.start(); - - success = btr_pcur_restore_position( - BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); - break; - case DICT_COLUMNS_ID: - /* This is rolling back an INSERT into SYS_COLUMNS. - If it was part of an instant ALTER TABLE operation, we - must evict the table definition, so that it can be - reloaded after the dictionary operation has been - completed. At this point, any corresponding operation - to the metadata record will have been rolled back. */ - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - const rec_t* rec = btr_pcur_get_rec(&node->pcur); - if (rec_get_n_fields_old(rec) - != DICT_NUM_FIELDS__SYS_COLUMNS) { + mtr.start(); + success = btr_pcur_restore_position( + BTR_MODIFY_LEAF, &node->pcur, &mtr); + ut_a(success); break; + case DICT_COLUMNS_ID: + /* This is rolling back an INSERT into SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + ut_ad(!online); + ut_ad(node->trx->dict_operation_lock_mode + == RW_X_LATCH); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + const rec_t* rec = btr_pcur_get_rec(&node->pcur); + if (rec_get_n_fields_old(rec) + != DICT_NUM_FIELDS__SYS_COLUMNS) { + break; + } + ulint len; + const byte* data = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); + if (len != 8) { + break; + } + node->trx->evict_table(mach_read_from_8(data)); } - ulint len; - const byte* data = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); - if (len != 8) { - break; - } - node->trx->evict_table(mach_read_from_8(data)); } if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { @@ -363,14 +366,10 @@ retry: return(err); } -/***********************************************************//** -Parses the row reference and other info in a fresh insert undo record. */ -static -void -row_undo_ins_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in/out: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ +/** Parse an insert undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; byte* ptr; @@ -379,18 +378,28 @@ row_undo_ins_parse_undo_rec( ulint dummy; bool dummy_extern; - ut_ad(node); + ut_ad(node->state == UNDO_INSERT_PERSISTENT + || node->state == UNDO_INSERT_TEMPORARY); + ut_ad(node->trx->in_rollback); + ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy, &dummy_extern, &undo_no, &table_id); node->update = NULL; - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); + if (node->state == UNDO_INSERT_PERSISTENT) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + node->table = dict_sys->get_temporary_table(table_id); + mutex_exit(&dict_sys->mutex); + } else { + node->table = dict_sys->get_temporary_table(table_id); + } - /* Skip the UNDO if we can't find the table or the .ibd file. */ - if (UNIV_UNLIKELY(node->table == NULL)) { - return; + if (!node->table) { + return false; } switch (node->rec_type) { @@ -429,6 +438,7 @@ close_table: connection, instead of doing this rollback. */ dict_table_close(node->table, dict_locked, FALSE); node->table = NULL; + return false; } else { ut_ad(!node->table->skip_alter_undo); clust_index = dict_table_get_first_index(node->table); @@ -460,6 +470,8 @@ close_table: goto close_table; } } + + return true; } /***************************************************************//** @@ -536,18 +548,10 @@ row_undo_ins( que_thr_t* thr) /*!< in: query thread */ { dberr_t err; - ibool dict_locked; + bool dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; - ut_ad(node->state == UNDO_NODE_INSERT); - ut_ad(node->trx->in_rollback); - ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); - - dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; - - row_undo_ins_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - return(DB_SUCCESS); + if (!row_undo_ins_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; } /* Iterate over all the indexes and undo the insert.*/ @@ -570,26 +574,19 @@ row_undo_ins( break; } - /* fall through */ - case TRX_UNDO_INSERT_METADATA: log_free_check(); if (node->table->id == DICT_INDEXES_ID) { - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - + ut_ad(!node->table->is_temporary()); if (!dict_locked) { mutex_enter(&dict_sys->mutex); } - } - - // FIXME: We need to update the dict_index_t::space and - // page number fields too. - err = row_undo_ins_remove_clust_rec(node); - - if (node->table->id == DICT_INDEXES_ID - && !dict_locked) { - - mutex_exit(&dict_sys->mutex); + err = row_undo_ins_remove_clust_rec(node); + if (!dict_locked) { + mutex_exit(&dict_sys->mutex); + } + } else { + err = row_undo_ins_remove_clust_rec(node); } if (err == DB_SUCCESS && node->table->stat_initialized) { @@ -609,6 +606,12 @@ row_undo_ins( node->table, node->trx->mysql_thd); } } + break; + + case TRX_UNDO_INSERT_METADATA: + log_free_check(); + ut_ad(!node->table->is_temporary()); + err = row_undo_ins_remove_clust_rec(node); } dict_table_close(node->table, dict_locked, FALSE); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index a00614709d7..fbbe6d4eef9 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -500,8 +500,6 @@ mtr_commit_exit: btr_pcur_commit_specify_mtr(pcur, &mtr); func_exit: - node->state = UNDO_NODE_FETCH_NEXT; - if (offsets_heap) { mem_heap_free(offsets_heap); } @@ -1203,14 +1201,10 @@ row_undo_mod_upd_exist_sec( return(err); } -/***********************************************************//** -Parses the row reference and other info in a modify undo log record. */ -static MY_ATTRIBUTE((nonnull)) -void -row_undo_mod_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ +/** Parse an update undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; byte* ptr; @@ -1223,19 +1217,28 @@ row_undo_mod_parse_undo_rec( ulint cmpl_info; bool dummy_extern; + ut_ad(node->state == UNDO_UPDATE_PERSISTENT + || node->state == UNDO_UPDATE_TEMPORARY); + ut_ad(node->trx->in_rollback); + ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); + ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, &dummy_extern, &undo_no, &table_id); node->rec_type = type; - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); + if (node->state == UNDO_UPDATE_PERSISTENT) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + node->table = dict_sys->get_temporary_table(table_id); + mutex_exit(&dict_sys->mutex); + } else { + node->table = dict_sys->get_temporary_table(table_id); + } - /* TODO: other fixes associated with DROP TABLE + rollback in the - same table by another user */ - - if (node->table == NULL) { - /* Table was dropped */ - return; + if (!node->table) { + return false; } ut_ad(!node->table->skip_alter_undo); @@ -1253,7 +1256,7 @@ close_table: connection, instead of doing this rollback. */ dict_table_close(node->table, dict_locked, FALSE); node->table = NULL; - return; + return false; } clust_index = dict_table_get_first_index(node->table); @@ -1324,6 +1327,8 @@ close_table: (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) ? NULL : ptr); } + + return true; } /***********************************************************//** @@ -1336,27 +1341,12 @@ row_undo_mod( que_thr_t* thr) /*!< in: query thread */ { dberr_t err; - ibool dict_locked; - - ut_ad(node != NULL); - ut_ad(thr != NULL); - ut_ad(node->state == UNDO_NODE_MODIFY); - ut_ad(node->trx->in_rollback); - ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); - - dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH; - ut_ad(thr_get_trx(thr) == node->trx); + const bool dict_locked = node->trx->dict_operation_lock_mode + == RW_X_LATCH; - row_undo_mod_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - /* It is already undone, or will be undone by another query - thread, or table was dropped */ - - node->state = UNDO_NODE_FETCH_NEXT; - - return(DB_SUCCESS); + if (!row_undo_mod_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; } node->index = dict_table_get_first_index(node->table); diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 101c3b4721c..2b8187171f9 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -217,7 +217,8 @@ row_undo_search_clust_to_pcur( log, first mark them DATA_MISSING. So we will know if the value gets updated */ if (node->table->n_v_cols - && node->state != UNDO_NODE_INSERT + && (node->state == UNDO_UPDATE_PERSISTENT + || node->state == UNDO_UPDATE_TEMPORARY) && !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { for (ulint i = 0; i < dict_table_get_n_v_cols(node->table); i++) { @@ -253,6 +254,149 @@ func_exit: return(found); } +/** Try to truncate the undo logs. +@param[in,out] trx transaction */ +static void row_undo_try_truncate(trx_t* trx) +{ + if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { + ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); + trx_undo_truncate_end(*undo, trx->undo_no, false); + } + + if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { + ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); + trx_undo_truncate_end(*undo, trx->undo_no, true); + } +} + +/** Get the latest undo log record for rollback. +@param[in,out] node rollback context +@return whether an undo log record was fetched */ +static bool row_undo_rec_get(undo_node_t* node) +{ + trx_t* trx = node->trx; + + if (trx->pages_undone) { + trx->pages_undone = 0; + row_undo_try_truncate(trx); + } + + trx_undo_t* undo = NULL; + trx_undo_t* insert = trx->rsegs.m_redo.old_insert; + trx_undo_t* update = trx->rsegs.m_redo.undo; + trx_undo_t* temp = trx->rsegs.m_noredo.undo; + const undo_no_t limit = trx->roll_limit; + + ut_ad(!insert || !update || insert->empty() || update->empty() + || insert->top_undo_no != update->top_undo_no); + ut_ad(!insert || !temp || insert->empty() || temp->empty() + || insert->top_undo_no != temp->top_undo_no); + ut_ad(!update || !temp || update->empty() || temp->empty() + || update->top_undo_no != temp->top_undo_no); + + if (UNIV_LIKELY_NULL(insert) + && !insert->empty() && limit <= insert->top_undo_no) { + undo = insert; + } + + if (update && !update->empty() && update->top_undo_no >= limit) { + if (!undo) { + undo = update; + } else if (undo->top_undo_no < update->top_undo_no) { + undo = update; + } + } + + if (temp && !temp->empty() && temp->top_undo_no >= limit) { + if (!undo) { + undo = temp; + } else if (undo->top_undo_no < temp->top_undo_no) { + undo = temp; + } + } + + if (undo == NULL) { + row_undo_try_truncate(trx); + /* Mark any ROLLBACK TO SAVEPOINT completed, so that + if the transaction object is committed and reused + later, we will default to a full ROLLBACK. */ + trx->roll_limit = 0; + trx->in_rollback = false; + return false; + } + + ut_ad(!undo->empty()); + ut_ad(limit <= undo->top_undo_no); + + node->roll_ptr = trx_undo_build_roll_ptr( + false, undo->rseg->id, undo->top_page_no, undo->top_offset); + + mtr_t mtr; + mtr.start(); + + page_t* undo_page = trx_undo_page_get_s_latched( + page_id_t(undo->rseg->space->id, undo->top_page_no), &mtr); + + ulint offset = undo->top_offset; + + trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( + undo_page + offset, undo->hdr_page_no, undo->hdr_offset, + true, &mtr); + + if (prev_rec == NULL) { + undo->top_undo_no = IB_ID_MAX; + ut_ad(undo->empty()); + } else { + page_t* prev_rec_page = page_align(prev_rec); + + if (prev_rec_page != undo_page) { + + trx->pages_undone++; + } + + undo->top_page_no = page_get_page_no(prev_rec_page); + undo->top_offset = ulint(prev_rec - prev_rec_page); + undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); + ut_ad(!undo->empty()); + } + + { + const trx_undo_rec_t* undo_rec = undo_page + offset; + node->undo_rec = trx_undo_rec_copy(undo_rec, node->heap); + } + + mtr.commit(); + + switch (trx_undo_rec_get_type(node->undo_rec)) { + case TRX_UNDO_INSERT_METADATA: + /* This record type was introduced in MDEV-11369 + instant ADD COLUMN, which was implemented after + MDEV-12288 removed the insert_undo log. There is no + instant ADD COLUMN for temporary tables. Therefore, + this record can only be present in the main undo log. */ + ut_ad(undo == update); + /* fall through */ + case TRX_UNDO_RENAME_TABLE: + ut_ad(undo == insert || undo == update); + /* fall through */ + case TRX_UNDO_INSERT_REC: + ut_ad(undo == insert || undo == update || undo == temp); + node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; + node->state = undo == temp + ? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT; + break; + default: + ut_ad(undo == update || undo == temp); + node->state = undo == temp + ? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT; + break; + } + + trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no( + node->undo_rec); + return true; +} + /***********************************************************//** Fetches an undo log record and does the undo for the recorded operation. If none left, or a partial rollback completed, returns control to the @@ -265,23 +409,12 @@ row_undo( undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx = node->trx; - ut_ad(trx->in_rollback); + ut_ad(node->trx->in_rollback); - if (node->state == UNDO_NODE_FETCH_NEXT) { - - node->undo_rec = trx_roll_pop_top_rec_of_trx( - trx, &node->roll_ptr, node->heap); - - if (!node->undo_rec) { - /* Rollback completed for this query thread */ - thr->run_node = que_node_get_parent(node); - return(DB_SUCCESS); - } - - node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); - node->state = trx_undo_roll_ptr_is_insert(node->roll_ptr) - ? UNDO_NODE_INSERT : UNDO_NODE_MODIFY; + if (node->state == UNDO_NODE_FETCH_NEXT && !row_undo_rec_get(node)) { + /* Rollback completed for this query thread */ + thr->run_node = que_node_get_parent(node); + return DB_SUCCESS; } /* Prevent DROP TABLE etc. while we are rolling back this row. @@ -289,31 +422,33 @@ row_undo( then we already have dict_operation_lock locked in x-mode. Do not try to lock again, because that would cause a hang. */ + trx_t* trx = node->trx; const bool locked_data_dict = (trx->dict_operation_lock_mode == 0); if (locked_data_dict) { - row_mysql_freeze_data_dictionary(trx); } dberr_t err; - if (node->state == UNDO_NODE_INSERT) { - + switch (node->state) { + case UNDO_INSERT_PERSISTENT: + case UNDO_INSERT_TEMPORARY: err = row_undo_ins(node, thr); - - node->state = UNDO_NODE_FETCH_NEXT; - } else { - ut_ad(node->state == UNDO_NODE_MODIFY); + break; + case UNDO_UPDATE_PERSISTENT: + case UNDO_UPDATE_TEMPORARY: err = row_undo_mod(node, thr); + break; + case UNDO_NODE_FETCH_NEXT: + ut_ad(!"wrong state"); } if (locked_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); } - /* Do some cleanup */ + node->state = UNDO_NODE_FETCH_NEXT; btr_pcur_close(&(node->pcur)); mem_heap_empty(node->heap); diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 9e992d2f145..4b0684d1735 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -44,10 +44,6 @@ Created 3/26/1996 Heikki Tuuri #include "trx0trx.h" #include "trx0undo.h" -/** This many pages must be undone before a truncate is tried within -rollback */ -static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1; - /** true if trx_rollback_all_recovered() thread is active */ bool trx_rollback_is_active; @@ -874,175 +870,6 @@ DECLARE_THREAD(trx_rollback_all_recovered)(void*) OS_THREAD_DUMMY_RETURN; } -/** Try to truncate the undo logs. -@param[in,out] trx transaction */ -static -void -trx_roll_try_truncate(trx_t* trx) -{ - trx->pages_undone = 0; - - undo_no_t undo_no = trx->undo_no; - - if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); - mutex_enter(&undo->rseg->mutex); - trx_undo_truncate_end(undo, undo_no, false); - mutex_exit(&undo->rseg->mutex); - } - - if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); - mutex_enter(&undo->rseg->mutex); - trx_undo_truncate_end(undo, undo_no, true); - mutex_exit(&undo->rseg->mutex); - } - -#ifdef WITH_WSREP_OUT - if (wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif /* WITH_WSREP */ -} - -/***********************************************************************//** -Pops the topmost undo log record in a single undo log and updates the info -about the topmost record in the undo log memory struct. -@return undo log record, the page s-latched */ -static -trx_undo_rec_t* -trx_roll_pop_top_rec( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* undo_page = trx_undo_page_get_s_latched( - page_id_t(undo->rseg->space->id, undo->top_page_no), mtr); - - ulint offset = undo->top_offset; - - trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( - undo_page + offset, undo->hdr_page_no, undo->hdr_offset, - true, mtr); - - if (prev_rec == NULL) { - undo->top_undo_no = IB_ID_MAX; - ut_ad(undo->empty()); - } else { - page_t* prev_rec_page = page_align(prev_rec); - - if (prev_rec_page != undo_page) { - - trx->pages_undone++; - } - - undo->top_page_no = page_get_page_no(prev_rec_page); - undo->top_offset = ulint(prev_rec - prev_rec_page); - undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); - ut_ad(!undo->empty()); - } - - return(undo_page + offset); -} - -/** Get the last undo log record of a transaction (for rollback). -@param[in,out] trx transaction -@param[out] roll_ptr DB_ROLL_PTR to the undo record -@param[in,out] heap memory heap for allocation -@return undo log record copied to heap -@retval NULL if none left or the roll_limit (savepoint) was reached */ -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) -{ - if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { - trx_roll_try_truncate(trx); - } - - trx_undo_t* undo = NULL; - trx_undo_t* insert = trx->rsegs.m_redo.old_insert; - trx_undo_t* update = trx->rsegs.m_redo.undo; - trx_undo_t* temp = trx->rsegs.m_noredo.undo; - const undo_no_t limit = trx->roll_limit; - - ut_ad(!insert || !update || insert->empty() || update->empty() - || insert->top_undo_no != update->top_undo_no); - ut_ad(!insert || !temp || insert->empty() || temp->empty() - || insert->top_undo_no != temp->top_undo_no); - ut_ad(!update || !temp || update->empty() || temp->empty() - || update->top_undo_no != temp->top_undo_no); - - if (UNIV_LIKELY_NULL(insert) - && !insert->empty() && limit <= insert->top_undo_no) { - undo = insert; - } - - if (update && !update->empty() && update->top_undo_no >= limit) { - if (!undo) { - undo = update; - } else if (undo->top_undo_no < update->top_undo_no) { - undo = update; - } - } - - if (temp && !temp->empty() && temp->top_undo_no >= limit) { - if (!undo) { - undo = temp; - } else if (undo->top_undo_no < temp->top_undo_no) { - undo = temp; - } - } - - if (undo == NULL) { - trx_roll_try_truncate(trx); - /* Mark any ROLLBACK TO SAVEPOINT completed, so that - if the transaction object is committed and reused - later, we will default to a full ROLLBACK. */ - trx->roll_limit = 0; - trx->in_rollback = false; - return(NULL); - } - - ut_ad(!undo->empty()); - ut_ad(limit <= undo->top_undo_no); - - *roll_ptr = trx_undo_build_roll_ptr( - false, undo->rseg->id, undo->top_page_no, undo->top_offset); - - mtr_t mtr; - mtr.start(); - - trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); - const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec); - switch (trx_undo_rec_get_type(undo_rec)) { - case TRX_UNDO_INSERT_METADATA: - /* This record type was introduced in MDEV-11369 - instant ADD COLUMN, which was implemented after - MDEV-12288 removed the insert_undo log. There is no - instant ADD COLUMN for temporary tables. Therefore, - this record can only be present in the main undo log. */ - ut_ad(undo == update); - /* fall through */ - case TRX_UNDO_RENAME_TABLE: - ut_ad(undo == insert || undo == update); - /* fall through */ - case TRX_UNDO_INSERT_REC: - ut_ad(undo == insert || undo == update || undo == temp); - *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; - break; - default: - ut_ad(undo == update || undo == temp); - break; - } - - trx->undo_no = undo_no; - - trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); - mtr.commit(); - - return(undo_rec_copy); -} - /****************************************************************//** Builds an undo 'query' graph for a transaction. The actual rollback is performed by executing this query graph like a query subprocedure call. diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 61ba65ebc19..e084b0b67bf 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -888,54 +888,55 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) @param[in,out] undo undo log @param[in] limit all undo logs after this limit will be discarded @param[in] is_temp whether this is temporary undo log */ -void -trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp) +void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp) { - ut_ad(mutex_own(&undo->rseg->mutex)); - ut_ad(is_temp == !undo->rseg->is_persistent()); + mtr_t mtr; + ut_ad(is_temp == !undo.rseg->is_persistent()); for (;;) { - mtr_t mtr; mtr.start(); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); } trx_undo_rec_t* trunc_here = NULL; + mutex_enter(&undo.rseg->mutex); page_t* undo_page = trx_undo_page_get( - page_id_t(undo->rseg->space->id, undo->last_page_no), + page_id_t(undo.rseg->space->id, undo.last_page_no), &mtr); trx_undo_rec_t* rec = trx_undo_page_get_last_rec( - undo_page, undo->hdr_page_no, undo->hdr_offset); + undo_page, undo.hdr_page_no, undo.hdr_offset); while (rec) { - if (trx_undo_rec_get_undo_no(rec) >= limit) { - /* Truncate at least this record off, maybe - more */ - trunc_here = rec; - } else { - goto function_exit; + if (trx_undo_rec_get_undo_no(rec) < limit) { + goto func_exit; } + /* Truncate at least this record off, maybe more */ + trunc_here = rec; rec = trx_undo_page_get_prev_rec(rec, - undo->hdr_page_no, - undo->hdr_offset); + undo.hdr_page_no, + undo.hdr_offset); } - if (undo->last_page_no == undo->hdr_page_no) { -function_exit: - if (trunc_here) { - mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE, - ulint(trunc_here - undo_page), - MLOG_2BYTES, &mtr); - } - + if (undo.last_page_no != undo.hdr_page_no) { + trx_undo_free_last_page(&undo, &mtr); + mutex_exit(&undo.rseg->mutex); mtr.commit(); - return; + continue; + } + +func_exit: + mutex_exit(&undo.rseg->mutex); + + if (trunc_here) { + mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE, + ulint(trunc_here - undo_page), + MLOG_2BYTES, &mtr); } - trx_undo_free_last_page(undo, &mtr); mtr.commit(); + return; } }