diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 83c356a8879..8ae0b062729 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -485,7 +485,7 @@ lock_rec_unlock( /** Release the explicit locks of a committing transaction, and release possible other transactions waiting because of these locks. */ -void lock_trx_release_locks(trx_t* trx); +void lock_release(trx_t* trx); /*********************************************************************//** Calculates the fold value of a page file address: used in inserting or diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index c92907236e6..913e2d25172 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -628,12 +628,7 @@ public: The caller should already have handled trx_id==0 specially. */ ut_ad(trx_id); - if (caller_trx && caller_trx->id == trx_id) - { - if (do_ref_count) - caller_trx->reference(); - return caller_trx; - } + ut_ad(!caller_trx || caller_trx->id != trx_id || !do_ref_count); trx_t *trx= 0; LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash); @@ -648,9 +643,25 @@ public: lf_hash_search_unpin(pins); if ((trx= element->trx)) { DBUG_ASSERT(trx_id == trx->id); - if (do_ref_count) - trx->reference(); ut_d(validate_element(trx)); + if (do_ref_count) + { + /* + We have an early state check here to avoid committer + starvation in a wait loop for transaction references, + when there's a stream of trx_sys.find() calls from other + threads. The trx->state may change to COMMITTED after + trx->mutex is released, and it will have to be rechecked + by the caller after reacquiring the mutex. + */ + trx_mutex_enter(trx); + const trx_state_t state= trx->state; + trx_mutex_exit(trx); + if (state == TRX_STATE_COMMITTED_IN_MEMORY) + trx= NULL; + else + trx->reference(); + } } mutex_exit(&element->mutex); } diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 3aa357ab2e4..da690a3c65e 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -699,10 +699,9 @@ Normally, only the thread that is currently associated with a running transaction may access (read and modify) the trx object, and it may do so without holding any mutex. The following are exceptions to this: -* trx_rollback_resurrected() may access resurrected (connectionless) -transactions while the system is already processing new user -transactions. The trx_sys.mutex and trx->is_recovered prevent -a race condition between it and trx_commit(). +* trx_rollback_recovered() may access resurrected (connectionless) +transactions (state == TRX_STATE_ACTIVE && is_recovered) +while the system is already processing new user transactions (!is_recovered). * trx_print_low() may access transactions not associated with the current thread. The caller must be holding lock_sys.mutex. @@ -839,10 +838,6 @@ public: Transitions to COMMITTED are protected by trx_t::mutex. */ trx_state_t state; - /** whether this is a recovered transaction that should be - rolled back by trx_rollback_or_clean_recovered(). - Protected by trx_t::mutex for transactions that are in trx_sys. */ - bool is_recovered; ReadView read_view; /*!< consistent read view used in the transaction, or NULL if not yet set */ @@ -852,6 +847,15 @@ public: by trx_t::mutex). */ /* These fields are not protected by any mutex. */ + + /** false=normal transaction, true=recovered (must be rolled back) + or disconnected transaction in XA PREPARE STATE. + + This field is accessed by the thread that owns the transaction, + without holding any mutex. + There is only one foreign-thread access in trx_print_low() + and a possible race condition with trx_disconnect_prepared(). */ + bool is_recovered; const char* op_info; /*!< English text describing the current operation, or an empty string */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 90049ad10a7..5104c2f4505 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4274,23 +4274,17 @@ lock_check_dict_lock( } #endif /* UNIV_DEBUG */ -/*********************************************************************//** -Releases transaction locks, and releases possible other transactions waiting -because of these locks. */ -static -void -lock_release( -/*=========*/ - trx_t* trx) /*!< in/out: transaction */ +/** Release the explicit locks of a committing transaction, +and release possible other transactions waiting because of these locks. */ +void lock_release(trx_t* trx) { - lock_t* lock; ulint count = 0; trx_id_t max_trx_id = trx_sys.get_max_trx_id(); - ut_ad(lock_mutex_own()); + lock_mutex_enter(); ut_ad(!trx_mutex_own(trx)); - for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); + for (lock_t* lock = UT_LIST_GET_LAST(trx->lock.trx_locks); lock != NULL; lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) { @@ -4330,6 +4324,8 @@ lock_release( ++count; } + + lock_mutex_exit(); } /* True if a lock mode is S or X */ @@ -4784,7 +4780,7 @@ lock_table_queue_validate( lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { /* lock->trx->state cannot change from or to NOT_STARTED - while we are holding the trx_sys.mutex. It may change + while we are holding the lock_sys.mutex. It may change from ACTIVE or PREPARED to PREPARED or COMMITTED. */ trx_mutex_enter(lock->trx); check_trx_state(lock->trx); @@ -5390,13 +5386,13 @@ lock_rec_convert_impl_to_expl_for_trx( trx_t* trx, /*!< in/out: active transaction */ ulint heap_no)/*!< in: rec heap number to lock */ { + ut_ad(trx->is_referenced()); ut_ad(page_rec_is_leaf(rec)); ut_ad(!rec_is_metadata(rec, index)); DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); lock_mutex_enter(); trx_mutex_enter(trx); - ut_ad(trx->is_referenced()); ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) @@ -6235,27 +6231,6 @@ lock_unlock_table_autoinc( } } -/** Release the explicit locks of a committing transaction, -and release possible other transactions waiting because of these locks. */ -void lock_trx_release_locks(trx_t* trx) -{ - ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks)); - - lock_mutex_enter(); - lock_release(trx); - trx->lock.n_rec_locks = 0; - /* We don't remove the locks one by one from the vector for - efficiency reasons. We simply reset it because we would have - released all the locks anyway. */ - - trx->lock.table_locks.clear(); - - ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - ut_ad(ib_vector_is_empty(trx->autoinc_locks)); - lock_mutex_exit(); - mem_heap_empty(trx->lock.lock_heap); -} - static inline dberr_t lock_trx_handle_wait_low(trx_t* trx) { ut_ad(lock_mutex_own()); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index b6206c9b3be..475babb6195 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -252,7 +252,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) /* After the purge thread has been given permission to exit, we may roll back transactions (trx->undo_no==0) in THD::cleanup() invoked from unlink_thd() in fast shutdown, - or in trx_rollback_resurrected() in slow shutdown. + or in trx_rollback_recovered() in slow shutdown. Before any transaction-generating background threads or the purge have been started, recv_recovery_rollback_active() can diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 709ec98259c..a52724444ec 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -764,12 +764,8 @@ static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element, mutex_enter(&element->mutex); if (trx_t *trx= element->trx) { - /* The trx->is_recovered flag and trx->state are set - atomically under the protection of the trx->mutex in - trx_t::commit_state(). We do not want to accidentally clean up - a non-recovered transaction here. */ mutex_enter(&trx->mutex); - if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)) + if (trx_state_eq(trx, TRX_STATE_ACTIVE) && trx->is_recovered) trx_list->push_back(trx); mutex_exit(&trx->mutex); } @@ -815,7 +811,8 @@ void trx_rollback_recovered(bool all) ut_ad(trx); ut_d(trx_mutex_enter(trx)); - ut_ad(trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)); + ut_ad(trx->is_recovered); + ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); ut_d(trx_mutex_exit(trx)); if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown) @@ -831,6 +828,22 @@ void trx_rollback_recovered(bool all) ut_ad(!srv_undo_sources); ut_ad(srv_fast_shutdown); discard: + /* Note: before kill_server() invoked innobase_end() via + unireg_end(), it invoked close_connections(), which should initiate + the rollback of any user transactions via THD::cleanup() in the + connection threads, and wait for all THD::cleanup() to complete. + So, no active user transactions should exist at this point. + + srv_undo_sources=false was cleared early in innobase_end(). + + Generally, the server guarantees that all connections using + InnoDB must be disconnected by the time we are reaching this code, + be it during shutdown or UNINSTALL PLUGIN. + + Because there is no possible race condition with any + concurrent user transaction, we do not have to invoke + trx->commit_state() or wait for !trx->is_referenced() + before trx_sys.deregister_rw(trx). */ trx_sys.deregister_rw(trx); trx_free_at_shutdown(trx); } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 6cbf0c273d9..f612be4d177 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -463,6 +463,9 @@ void trx_free(trx_t*& trx) /** Transition to committed state, to release implicit locks. */ inline void trx_t::commit_state() { + ut_ad(state == TRX_STATE_PREPARED + || state == TRX_STATE_PREPARED_RECOVERED + || state == TRX_STATE_ACTIVE); /* This makes the transaction committed in memory and makes its changes to data visible to other transactions. NOTE that there is a small discrepancy from the strict formal visibility rules here: a @@ -475,23 +478,9 @@ inline void trx_t::commit_state() makes modifications to the database, will get an lsn larger than the committing transaction T. In the case where the log flush fails, and T never gets committed, also T2 will never get committed. */ - ut_ad(trx_mutex_own(this)); - ut_ad(state != TRX_STATE_NOT_STARTED); - ut_ad(state != TRX_STATE_COMMITTED_IN_MEMORY - || (is_recovered && !UT_LIST_GET_LEN(lock.trx_locks))); + trx_mutex_enter(this); state= TRX_STATE_COMMITTED_IN_MEMORY; - - /* If the background thread trx_rollback_or_clean_recovered() - is still active then there is a chance that the rollback - thread may see this trx as COMMITTED_IN_MEMORY and goes ahead - to clean it up calling trx_cleanup_at_db_startup(). This can - happen in the case we are committing a trx here that is left - in PREPARED state during the crash. Note that commit of the - rollback of a PREPARED trx happens in the recovery thread - while the rollback of other transactions happen in the - background thread. To avoid this race we unconditionally unset - the is_recovered flag. */ - is_recovered= false; + trx_mutex_exit(this); ut_ad(id || !is_referenced()); } @@ -499,18 +488,24 @@ inline void trx_t::commit_state() inline void trx_t::release_locks() { DBUG_ASSERT(state == TRX_STATE_COMMITTED_IN_MEMORY); + DBUG_ASSERT(!is_referenced()); if (UT_LIST_GET_LEN(lock.trx_locks)) - lock_trx_release_locks(this); - else - lock.table_locks.clear(); // Work around a bug + { + lock_release(this); + lock.n_rec_locks = 0; + ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0); + ut_ad(ib_vector_is_empty(autoinc_locks)); + mem_heap_empty(lock.lock_heap); + } + + lock.table_locks.clear(); /* outside "if" to work around MDEV-20483 */ } /** At shutdown, frees a transaction object. */ void trx_free_at_shutdown(trx_t *trx) { - trx_mutex_enter(trx); ut_ad(trx->is_recovered); ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED) @@ -525,7 +520,6 @@ trx_free_at_shutdown(trx_t *trx) ut_a(trx->magic_n == TRX_MAGIC_N); trx->commit_state(); - trx_mutex_exit(trx); trx->release_locks(); trx_undo_free_at_shutdown(trx); @@ -1369,9 +1363,7 @@ trx_commit_in_memory( DBUG_LOG("trx", "Autocommit in memory: " << trx); trx->state = TRX_STATE_NOT_STARTED; } else { - trx_mutex_enter(trx); trx->commit_state(); - trx_mutex_exit(trx); if (trx->id) { trx_sys.deregister_rw(trx); @@ -1397,6 +1389,7 @@ trx_commit_in_memory( } else { trx_update_mod_tables_timestamp(trx); MONITOR_INC(MONITOR_TRX_RW_COMMIT); + trx->is_recovered = false; } }