diff --git a/mysql-test/suite/innodb/r/lock_move_wait_lock_race.result b/mysql-test/suite/innodb/r/lock_move_wait_lock_race.result new file mode 100644 index 00000000000..572fbc9b1d1 --- /dev/null +++ b/mysql-test/suite/innodb/r/lock_move_wait_lock_race.result @@ -0,0 +1,34 @@ +CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB; +INSERT INTO t VALUES (10, "0123456789"); +connection default; +BEGIN; +SELECT * FROM t WHERE c = 10 FOR UPDATE; +pk c +connect trx2, localhost,root,,; +BEGIN; +SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting"; +SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd"; +SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont"; +UPDATE t SET c = NULL WHERE pk = 10; +connect trx3, localhost,root,,; +SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting"; +SET innodb_lock_wait_timeout=1; +BEGIN; +SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting"; +SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting"; +UPDATE t SET c = "abcdefghij" WHERE pk = 10; +connection default; +SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting"; +COMMIT; +SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end"; +SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting"; +SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter"; +SET DEBUG_SYNC="now SIGNAL trx2_cont_upd"; +SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks"; +SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting"; +SET DEBUG_SYNC="now SIGNAL trx2_cont"; +disconnect trx2; +disconnect trx3; +connection default; +SET DEBUG_SYNC="RESET"; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/lock_move_wait_lock_race.test b/mysql-test/suite/innodb/t/lock_move_wait_lock_race.test new file mode 100644 index 00000000000..3a04c7127c8 --- /dev/null +++ b/mysql-test/suite/innodb/t/lock_move_wait_lock_race.test @@ -0,0 +1,58 @@ +--source include/have_innodb.inc +--source include/count_sessions.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB; +INSERT INTO t VALUES (10, "0123456789"); + +--connection default +BEGIN; +SELECT * FROM t WHERE c = 10 FOR UPDATE; + +--connect(trx2, localhost,root,,) +BEGIN; +SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting"; +SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd"; +SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont"; +################# +# We need to update clustered record without changing ordering fields and +# changing the size of non-ordering fields to cause locks moving from deleted +# record to infimum. +### +--send UPDATE t SET c = NULL WHERE pk = 10 + + +--connect(trx3, localhost,root,,) +SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting"; +################# +# The condition wariable waiting in lock_wait() must be finished by timeout +### +SET innodb_lock_wait_timeout=1; +BEGIN; +SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting"; +SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting"; +--send UPDATE t SET c = "abcdefghij" WHERE pk = 10 + +--connection default +SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting"; +COMMIT; +SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end"; +SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting"; +SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter"; +SET DEBUG_SYNC="now SIGNAL trx2_cont_upd"; +SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks"; +################# +# If the bug is not fixed, there will be assertion failure here, because trx2 +# moved trx3 lock from deleted record to infimum when trx3 tried to cancel the +# lock. +### +SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting"; +SET DEBUG_SYNC="now SIGNAL trx2_cont"; + +--disconnect trx2 +--disconnect trx3 +--connection default +SET DEBUG_SYNC="RESET"; +DROP TABLE t; +--source include/wait_until_count_sessions.inc diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index b67a1011f6b..16acd031177 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -891,8 +891,8 @@ public: /** Cancel a waiting lock request. @tparam check_victim whether to check for DB_DEADLOCK - @param lock waiting lock request @param trx active transaction + @param lock waiting lock request @retval DB_SUCCESS if no lock existed @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_LOCK_WAIT if the lock was canceled */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 9577dfc62aa..2b30b9b1a03 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -46,12 +46,12 @@ Created 5/7/1996 Heikki Tuuri #include "srv0mon.h" #include "que0que.h" #include "scope.h" +#include #include #ifdef WITH_WSREP #include -#include #endif /* WITH_WSREP */ /** The value of innodb_deadlock_detect */ @@ -1882,6 +1882,7 @@ check_trx_error: if (row_lock_wait) lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse()); + /* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */ if (lock_t *lock= trx->lock.wait_lock) { lock_sys_t::cancel(trx, lock); @@ -1905,6 +1906,12 @@ void lock_wait_end(trx_t *trx) ut_d(const auto state= trx->state); ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED); + /* lock_wait() checks trx->lock.was_chosen_as_deadlock_victim flag before + requesting lock_sys.wait_mutex, and if the flag is set, it returns error, + what causes transaction rollback, which can reset trx->lock.wait_thr before + deadlock resolution starts cancelling victim's waiting lock. That's why we + don't check trx->lock.wait_thr here if the function was called from deadlock + resolution function. */ ut_ad(from_deadlock || trx->lock.wait_thr); if (trx->lock.was_chosen_as_deadlock_victim) @@ -3193,6 +3200,8 @@ lock_rec_store_on_page_infimum( ut_ad(block->page.frame == page_align(rec)); const page_id_t id{block->page.id()}; + ut_d(SCOPE_EXIT( + []() { DEBUG_SYNC_C("lock_rec_store_on_page_infimum_end"); })); LockGuard g{lock_sys.rec_hash, id}; lock_rec_move(g.cell(), *block, id, g.cell(), id, @@ -5770,17 +5779,30 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx) /** Cancel a waiting lock request. @tparam check_victim whether to check for DB_DEADLOCK -@param lock waiting lock request @param trx active transaction +@param lock waiting lock request @retval DB_SUCCESS if no lock existed @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_LOCK_WAIT if the lock was canceled */ template dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) { + DEBUG_SYNC_C("lock_sys_t_cancel_enter"); mysql_mutex_assert_owner(&lock_sys.wait_mutex); - ut_ad(trx->lock.wait_lock == lock); ut_ad(trx->state == TRX_STATE_ACTIVE); + /* trx->lock.wait_lock may be changed by other threads as long as + we are not holding lock_sys.latch. + + So, trx->lock.wait_lock==lock does not necessarily hold, but both + pointers should be valid, because other threads cannot assign + trx->lock.wait_lock=nullptr (or invalidate *lock) while we are + holding lock_sys.wait_mutex. Also, the type of trx->lock.wait_lock + (record or table lock) cannot be changed by other threads. So, it is + safe to call lock->is_table() while not holding lock_sys.latch. If + we have to release and reacquire lock_sys.wait_mutex, we must reread + trx->lock.wait_lock. We must also reread trx->lock.wait_lock after + lock_sys.latch acquiring, as it can be changed to not-null in lock moving + functions even if we hold lock_sys.wait_mutex. */ dberr_t err= DB_SUCCESS; /* This would be too large for a memory transaction, except in the DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */ @@ -5802,6 +5824,15 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) } else { + /* This function is invoked from the thread which executes the + transaction. Table locks are requested before record locks. Some other + transaction can't change trx->lock.wait_lock from table to record for the + current transaction at this point, because the current transaction has not + requested record locks yet. There is no need to move any table locks by + other threads. And trx->lock.wait_lock can't be set to null while we are + holding lock_sys.wait_mutex. That's why there is no need to reload + trx->lock.wait_lock here. */ + ut_ad(lock == trx->lock.wait_lock); resolve_table_lock: dict_table_t *table= lock->un_member.tab_lock.table; if (!table->lock_mutex_trylock()) @@ -5812,6 +5843,7 @@ resolve_table_lock: mysql_mutex_unlock(&lock_sys.wait_mutex); table->lock_mutex_lock(); mysql_mutex_lock(&lock_sys.wait_mutex); + /* Cache trx->lock.wait_lock under the corresponding latches. */ lock= trx->lock.wait_lock; if (!lock) goto retreat; @@ -5821,6 +5853,10 @@ resolve_table_lock: goto retreat; } } + else + /* Cache trx->lock.wait_lock under the corresponding latches if + it was not cached yet */ + lock= trx->lock.wait_lock; if (lock->is_waiting()) lock_cancel_waiting_and_release(lock); /* Even if lock->is_waiting() did not hold above, we must return @@ -5844,6 +5880,7 @@ retreat: mysql_mutex_unlock(&lock_sys.wait_mutex); lock_sys.wr_lock(SRW_LOCK_CALL); mysql_mutex_lock(&lock_sys.wait_mutex); + /* Cache trx->lock.wait_lock under the corresponding latches. */ lock= trx->lock.wait_lock; /* Even if waiting lock was cancelled while lock_sys.wait_mutex was unlocked, we need to return deadlock error if transaction was chosen @@ -5855,6 +5892,9 @@ retreat: } else { + /* Cache trx->lock.wait_lock under the corresponding latches if + it was not cached yet */ + lock= trx->lock.wait_lock; resolve_record_lock: if (lock->is_waiting()) lock_cancel_waiting_and_release(lock); @@ -5876,6 +5916,7 @@ resolve_record_lock: void lock_sys_t::cancel(trx_t *trx) { mysql_mutex_lock(&lock_sys.wait_mutex); + /* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */ if (lock_t *lock= trx->lock.wait_lock) { /* Dictionary transactions must be immune to KILL, because they @@ -5943,6 +5984,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx) mysql_mutex_lock(&lock_sys.wait_mutex); if (trx->lock.was_chosen_as_deadlock_victim) err= DB_DEADLOCK; + /* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */ else if (lock_t *wait_lock= trx->lock.wait_lock) err= lock_sys_t::cancel(trx, wait_lock); lock_sys.deadlock_check();