diff --git a/sql/sql_base.cc b/sql/sql_base.cc index a08a96465ca..8d7b0a3c88c 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -869,7 +869,7 @@ void intern_close_table(TABLE *table) free_io_cache(table); delete table->triggers; - if (table->file) // Not true if name lock + if (table->file) // Not true if placeholder (void) closefrm(table, 1); // close file DBUG_VOID_RETURN; } @@ -2461,12 +2461,6 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) if (reopen_table_entry(thd, table, table_list, table_name, key, key_length)) { - /* - If there was an error during opening of table (for example if it - does not exist) '*table' object can be wiped out. To be able - properly release name-lock in this case we should restore this - object to its original state. - */ my_free((uchar*)table, MYF(0)); DBUG_RETURN(TRUE); } @@ -2509,8 +2503,8 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) exists and to FALSE otherwise. @note This function assumes that caller owns LOCK_open mutex. - It also assumes that the fact that there are no name-locks - on the table was checked beforehand. + It also assumes that the fact that there are no exclusive + metadata locks on the table was checked beforehand. @note If there is no .FRM file for the table but it exists in one of engines (e.g. it was created on another node of NDB cluster) @@ -2605,10 +2599,10 @@ void table_share_release_hook(void *share) IMPLEMENTATION Uses a cache of open tables to find a table not in use. - If table list element for the table to be opened has "create" flag - set and table does not exist, this function will automatically insert - a placeholder for exclusive name lock into the open tables cache and - will return the TABLE instance that corresponds to this placeholder. + If table list element for the table to be opened has "open_type" set + to OPEN_OR_CREATE and table does not exist, this function will take + exclusive metadata lock on the table, also it will do this if + "open_type" is TAKE_EXCLUSIVE_MDL. RETURN NULL Open failed. If refresh is set then one should close @@ -4708,11 +4702,11 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) if (action) { /* - We have met name-locked or old version of table. Now we have - to close all tables which are not up to date. We also have to - throw away set of prelocked tables (and thus close tables from - this set that were open by now) since it possible that one of - tables which determined its content was changed. + We have met exclusive metadata lock or old version of table. Now we + have to close all tables which are not up to date/release metadata + locks. We also have to throw away set of prelocked tables (and thus + close tables from this set that were open by now) since it possible + that one of tables which determined its content was changed. Instead of implementing complex/non-robust logic mentioned above we simply close and then reopen all tables. diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index f5c6dfd8986..612f9d1954d 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -1079,7 +1079,8 @@ static bool mysql_truncate_by_delete(THD *thd, TABLE_LIST *table_list) normally can't safely do this. - We don't want an ok to be sent to the end user. - We don't want to log the truncate command - - If we want to have a name lock on the table on exit without errors. + - If we want to keep exclusive metadata lock on the table (obtained by + caller) on exit without errors. */ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index 15c73fcadfe..a2c1f0e3782 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -282,7 +282,8 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen) The thd->handler_tables list is kept as-is to avoid deadlocks if open_table(), called by open_tables(), needs to back-off because - of a pending name-lock on the table being opened. + of a pending exclusive metadata lock or flush for the table being + opened. See open_table() back-off comments for more details. */ diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 3c67574d8c1..edc9055fb39 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -6528,10 +6528,10 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, 3) Lock the table in TL_WRITE_ONLY to ensure all other accesses to the table have completed. This ensures that other threads can not execute on the table in parallel. - 4) Get a name lock on the table. This ensures that we can release all - locks on the table and since no one can open the table, there can - be no new threads accessing the table. They will be hanging on the - name lock. + 4) Get an exclusive metadata lock on the table. This ensures that we + can release all other locks on the table and since no one can open + the table, there can be no new threads accessing the table. They + will be hanging on this exclusive lock. 5) Close all tables that have already been opened but didn't stumble on the abort locked previously. This is done as part of the close_data_files_and_morph_locks call. @@ -6606,14 +6606,15 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, 3) Lock all partitions in TL_WRITE_ONLY to ensure that no users are still using the old partitioning scheme. Wait until all ongoing users have completed before progressing. - 4) Get a name lock on the table. This ensures that we can release all - locks on the table and since no one can open the table, there can - be no new threads accessing the table. They will be hanging on the - name lock. + 4) Get an exclusive metadata lock on the table. This ensures that we + can release all other locks on the table and since no one can open + the table, there can be no new threads accessing the table. They + will be hanging on this exclusive lock. 5) Close all tables that have already been opened but didn't stumble on the abort locked previously. This is done as part of the close_data_files_and_morph_locks call. - 6) Close all table handlers and unlock all handlers but retain name lock + 6) Close all table handlers and unlock all handlers but retain + metadata lock. 7) Write binlog 8) Now the change is completed except for the installation of the new frm file. We thus write an action in the log to change to @@ -6694,23 +6695,18 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, Copy from the reorganised partitions to the new partitions 4) Log that operation is completed and log all complete actions needed to complete operation from here - 5) Lock all partitions in TL_WRITE_ONLY to ensure that no users - are still using the old partitioning scheme. Wait until all - ongoing users have completed before progressing. - 6) Get a name lock of the table - 7) Close all tables opened but not yet locked, after this call we are - certain that no other thread is in the lock wait queue or has - opened the table. The name lock will ensure that they are blocked - on the open call. - This is achieved also by close_data_files_and_morph_locks call. - 8) Close all partitions opened by this thread, but retain name lock. - 9) Write bin log - 10) Prepare handlers for rename and delete of partitions - 11) Rename and drop the reorged partitions such that they are no - longer used and rename those added to their real new names. - 12) Install the shadow frm file - 13) Reopen the table if under lock tables - 14) Complete query + 5) Upgrade shared metadata lock on the table to an exclusive one. + After this we can be sure that there is no other connection + using this table (they will be waiting for metadata lock). + 6) Close all table instances opened by this thread, but retain + exclusive metadata lock. + 7) Write bin log + 8) Prepare handlers for rename and delete of partitions + 9) Rename and drop the reorged partitions such that they are no + longer used and rename those added to their real new names. + 10) Install the shadow frm file + 11) Reopen the table if under lock tables + 12) Complete query */ if (write_log_add_change_partition(lpt) || ERROR_INJECT_CRASH("crash_change_partition_1") || diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 647f914d28c..4d8e482cf04 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -3271,7 +3271,8 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) /* We should not introduce deadlocks even if we already have some tables open and locked, since we won't lock tables which we will - open and will ignore possible name-locks for these tables. + open and will ignore pending exclusive metadata locks for these + tables by using high-priority requests for shared metadata locks. */ thd->reset_n_backup_open_tables_state(&open_tables_state_backup); @@ -7301,8 +7302,8 @@ bool show_create_trigger(THD *thd, const sp_name *trg_name) Open the table by name in order to load Table_triggers_list object. NOTE: there is race condition here -- the table can be dropped after - LOCK_open is released. It will be fixed later by introducing - acquire-shared-table-name-lock functionality. + LOCK_open is released. It will be fixed later by acquiring shared + metadata lock on trigger or table name. */ uint num_tables; /* NOTE: unused, only to pass to open_tables(). */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 306fa5dcdf4..b5f7fad36a6 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -3674,9 +3674,9 @@ static inline void write_create_table_bin_log(THD *thd, If one creates a temporary table, this is automatically opened Note that this function assumes that caller already have taken - name-lock on table being created or used some other way to ensure - that concurrent operations won't intervene. mysql_create_table() - is a wrapper that can be used for this. + exclusive metadata lock on table being created or used some other + way to ensure that concurrent operations won't intervene. + mysql_create_table() is a wrapper that can be used for this. no_log is needed for the case of CREATE ... SELECT, as the logging will be done later in sql_insert.cc @@ -5279,13 +5279,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table, /* - By opening source table we guarantee that it exists and no concurrent - DDL operation will mess with it. Later we also take an exclusive - name-lock on target table name, which makes copying of .frm file, - call to ha_create_table() and binlogging atomic against concurrent DML - and DDL operations on target table. Thus by holding both these "locks" - we ensure that our statement is properly isolated from all concurrent - operations which matter. + By opening source table and thus acquiring shared metadata lock on it + we guarantee that it exists and no concurrent DDL operation will mess + with it. Later we also take an exclusive metadata lock on target table + name, which makes copying of .frm file, call to ha_create_table() and + binlogging atomic against concurrent DML and DDL operations on target + table. Thus by holding both these "locks" we ensure that our statement + is properly isolated from all concurrent operations which matter. */ if (open_tables(thd, &src_table, ¬_used, 0)) DBUG_RETURN(TRUE); @@ -5338,15 +5338,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table, Create a new table by copying from source table and sync the new table if the flag MY_SYNC is set - Altough exclusive name-lock on target table protects us from concurrent - DML and DDL operations on it we still want to wrap .FRM creation and call - to ha_create_table() in critical section protected by LOCK_open in order - to provide minimal atomicity against operations which disregard name-locks, - like I_S implementation, for example. This is a temporary and should not - be copied. Instead we should fix our code to always honor name-locks. - - Also some engines (e.g. NDB cluster) require that LOCK_open should be held - during the call to ha_create_table(). See bug #28614 for more info. + TODO: Obtaining LOCK_open mutex here is actually a legacy from the + times when some operations (e.g. I_S implementation) ignored + exclusive metadata lock on target table. Also some engines + (e.g. NDB cluster) require that LOCK_open should be held + during the call to ha_create_table() (See bug #28614 for more + info). So we should double check and probably fix this code + to not acquire this mutex. */ pthread_mutex_lock(&LOCK_open); if (src_table->schema_table) @@ -5461,9 +5459,9 @@ binlog: /* Here we open the destination table, on which we already have - name-lock. This is needed for store_create_info() to work. - The table will be closed by unlink_open_table() at the end - of this function. + exclusive metada lock. This is needed for store_create_info() + to work. The table will be closed by unlink_open_table() at + the end of this function. */ pthread_mutex_lock(&LOCK_open); if (reopen_name_locked_table(thd, table)) @@ -6794,9 +6792,9 @@ view_err: /* Then, we want check once again that target table does not exist. Actually the order of these two steps does not matter since - earlier we took name-lock on the target table, so we do them - in this particular order only to be consistent with 5.0, in which - we don't take this name-lock and where this order really matters. + earlier we took exclusive metadata lock on the target table, so + we do them in this particular order only to be consistent with 5.0, + in which we don't take this lock and where this order really matters. TODO: Investigate if we need this access() check at all. */ if (!access(new_name_buff,F_OK)) @@ -7354,18 +7352,19 @@ view_err: /* Data is copied. Now we: - 1) Wait until all other threads close old version of table. + 1) Wait until all other threads will stop using old version of table + by upgrading shared metadata lock to exclusive one. 2) Close instances of table open by this thread and replace them - with exclusive name-locks. + with placeholders to simplify reopen process. 3) Rename the old table to a temp name, rename the new one to the old name. 4) If we are under LOCK TABLES and don't do ALTER TABLE ... RENAME we reopen new version of table. 5) Write statement to the binary log. 6) If we are under LOCK TABLES and do ALTER TABLE ... RENAME we - remove name-locks from list of open tables and table cache. + remove placeholders and release metadata locks. 7) If we are not not under LOCK TABLES we rely on close_thread_tables() - call to remove name-locks from table cache and list of open table. + call to remove placeholders and releasing metadata locks. */ thd_proc_info(thd, "rename result table"); @@ -7602,9 +7601,9 @@ err: err_with_placeholders: /* - An error happened while we were holding exclusive name-lock on table - being altered. To be safe under LOCK TABLES we should remove placeholders - from list of open tables list and table cache. + An error happened while we were holding exclusive name metadata lock + on table being altered. To be safe under LOCK TABLES we should remove + placeholders from the list of open tables and relese metadata lock. */ unlink_open_table(thd, table, FALSE); pthread_mutex_unlock(&LOCK_open); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index caf5c84e1f9..61cd9bffa57 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -525,7 +525,7 @@ end: /* If we are under LOCK TABLES we should restore original state of meta-data locks. Otherwise call to close_thread_tables() will take care about both - TABLE instance created by reopen_name_locked_table() and meta-data lock. + TABLE instance created by reopen_name_locked_table() and metadata lock. */ if (thd->locked_tables && tables && tables->table) mdl_downgrade_exclusive_lock(&thd->mdl_context, @@ -1872,7 +1872,7 @@ Table_triggers_list::change_table_name_in_trignames(const char *old_db_name, i.e. it either will complete successfully, or will fail leaving files in their initial state. Also this method assumes that subject table is not renamed to itself. - This method needs to be called under an exclusive table name lock. + This method needs to be called under an exclusive table metadata lock. @retval FALSE Success @retval TRUE Error @@ -1894,8 +1894,8 @@ bool Table_triggers_list::change_table_name(THD *thd, const char *db, /* This method interfaces the mysql server code protected by - either LOCK_open mutex or with an exclusive table name lock. - In the future, only an exclusive table name lock will be enough. + either LOCK_open mutex or with an exclusive metadata lock. + In the future, only an exclusive metadata lock will be enough. */ #ifndef DBUG_OFF if (mdl_is_exclusive_lock_owner(&thd->mdl_context, 0, db, old_table)) diff --git a/sql/table.h b/sql/table.h index e7d7e2b08bf..48caf962894 100644 --- a/sql/table.h +++ b/sql/table.h @@ -408,7 +408,6 @@ struct TABLE_SHARE bool db_low_byte_first; /* Portable row format */ bool crashed; bool is_view; - bool name_lock, replace_with_name_lock; ulong table_map_id; /* for row-based replication */ ulonglong table_map_version;