diff --git a/mysql-test/suite/innodb/r/deadlock_on_lock_upgrade.result b/mysql-test/suite/innodb/r/deadlock_on_lock_upgrade.result new file mode 100644 index 00000000000..d015e74c65d --- /dev/null +++ b/mysql-test/suite/innodb/r/deadlock_on_lock_upgrade.result @@ -0,0 +1,78 @@ +# +# Bug #23755664 DEADLOCK WITH 3 CONCURRENT DELETES BY UNIQUE KEY +# +connection default; +CREATE TABLE `t`( +`id` INT, +`a` INT DEFAULT NULL, +PRIMARY KEY(`id`), +UNIQUE KEY `u`(`a`) +) ENGINE=InnoDB; +INSERT INTO t (`id`,`a`) VALUES +(1,1), +(2,9999), +(3,10000); +connect deleter,localhost,root,,; +connect holder,localhost,root,,; +connect waiter,localhost,root,,; +connection deleter; +SET DEBUG_SYNC = +'lock_sec_rec_read_check_and_lock_has_locked + SIGNAL deleter_has_locked + WAIT_FOR waiter_has_locked'; +DELETE FROM t WHERE a = 9999; +connection holder; +SET DEBUG_SYNC= +'now WAIT_FOR deleter_has_locked'; +SET DEBUG_SYNC= +'lock_sec_rec_read_check_and_lock_has_locked SIGNAL holder_has_locked'; +DELETE FROM t WHERE a = 9999; +connection waiter; +SET DEBUG_SYNC= +'now WAIT_FOR holder_has_locked'; +SET DEBUG_SYNC= +'lock_sec_rec_read_check_and_lock_has_locked SIGNAL waiter_has_locked'; +DELETE FROM t WHERE a = 9999; +connection deleter; +connection holder; +connection waiter; +connection default; +disconnect deleter; +disconnect holder; +disconnect waiter; +DROP TABLE `t`; +SET DEBUG_SYNC='reset'; +CREATE TABLE `t`( +`id` INT NOT NULL PRIMARY KEY +) ENGINE=InnoDB; +INSERT INTO t (`id`) VALUES (1), (2); +connect holder,localhost,root,,; +connect waiter,localhost,root,,; +connection holder; +BEGIN; +SELECT id FROM t WHERE id=1 FOR UPDATE; +id +1 +SELECT id FROM t WHERE id=2 FOR UPDATE; +id +2 +connection waiter; +SET DEBUG_SYNC= +'lock_wait_suspend_thread_enter SIGNAL waiter_will_wait'; +SELECT id FROM t WHERE id = 1 FOR UPDATE; +connection holder; +SET DEBUG_SYNC= +'now WAIT_FOR waiter_will_wait'; +SELECT * FROM t FOR UPDATE; +id +1 +2 +COMMIT; +connection waiter; +id +1 +connection default; +disconnect holder; +disconnect waiter; +DROP TABLE `t`; +SET DEBUG_SYNC='reset'; diff --git a/mysql-test/suite/innodb/t/deadlock_on_lock_upgrade.test b/mysql-test/suite/innodb/t/deadlock_on_lock_upgrade.test new file mode 100644 index 00000000000..7f3f34ce063 --- /dev/null +++ b/mysql-test/suite/innodb/t/deadlock_on_lock_upgrade.test @@ -0,0 +1,143 @@ +--echo # +--echo # Bug #23755664 DEADLOCK WITH 3 CONCURRENT DELETES BY UNIQUE KEY +--echo # + +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/count_sessions.inc + +--connection default +# There are various scenarious in which a transaction already holds "half" +# of a record lock (for example, a lock on the record but not on the gap) +# and wishes to "upgrade it" to a full lock (i.e. on both gap and record). +# This is often a cause for a deadlock, if there is another transaction +# which is already waiting for the lock being blocked by us: +# 1. our granted lock for one half +# 2. her waiting lock for the same half +# 3. our waiting lock for the whole + +# +# SCENARIO 1 +# +# In this scenario, three different threads try to delete the same row, +# identified by a secondary index key. +# This kind of operation (besides LOCK_IX on a table) requires +# an LOCK_REC_NOT_GAP|LOCK_REC|LOCK_X lock on a secondary index +# 1. `deleter` is the first to get the required lock +# 2. `holder` enqueues a waiting lock +# 3. `waiter` enqueues right after `holder` +# 4. `deleter` commits, releasing the lock, and granting it to `holder` +# 5. `holder` now observes that the row was deleted, so it needs to +# "seal the gap", by obtaining a LOCK_X|LOCK_REC, but.. +# 6. this causes a deadlock between `holder` and `waiter` +# +# This scenario does not fail if MDEV-10962 is not fixed because of MDEV-30225 +# fix, as the 'holder' does not "seal the gap" after 'deleter' was committed, +# because it was initially sealed, as row_search_mvcc() requests next-key lock +# after MDEV-30225 fix in the case when it requested not-gap lock before the +# fix. +# +# But let the scenario be in the tests, because it can fail if MDEV-30225 +# related code is changed + +CREATE TABLE `t`( + `id` INT, + `a` INT DEFAULT NULL, + PRIMARY KEY(`id`), + UNIQUE KEY `u`(`a`) +) ENGINE=InnoDB; + +INSERT INTO t (`id`,`a`) VALUES + (1,1), + (2,9999), + (3,10000); + +--connect(deleter,localhost,root,,) +--connect(holder,localhost,root,,) +--connect(waiter,localhost,root,,) + + +--connection deleter + SET DEBUG_SYNC = + 'lock_sec_rec_read_check_and_lock_has_locked + SIGNAL deleter_has_locked + WAIT_FOR waiter_has_locked'; + --send DELETE FROM t WHERE a = 9999 + +--connection holder + SET DEBUG_SYNC= + 'now WAIT_FOR deleter_has_locked'; + SET DEBUG_SYNC= + 'lock_sec_rec_read_check_and_lock_has_locked SIGNAL holder_has_locked'; + --send DELETE FROM t WHERE a = 9999 + +--connection waiter + SET DEBUG_SYNC= + 'now WAIT_FOR holder_has_locked'; + SET DEBUG_SYNC= + 'lock_sec_rec_read_check_and_lock_has_locked SIGNAL waiter_has_locked'; + --send DELETE FROM t WHERE a = 9999 + +--connection deleter + --reap + +--connection holder + --reap + +--connection waiter + --reap + +--connection default + +--disconnect deleter +--disconnect holder +--disconnect waiter + +DROP TABLE `t`; +SET DEBUG_SYNC='reset'; + +# SCENARIO 2 +# +# Here, we form a situation in which con1 has LOCK_REC_NOT_GAP on rows 1 and 2 +# con2 waits for lock on row 1, and then con1 wants to upgrade the lock on row 1, +# which might cause a deadlock, unless con1 properly notices that even though the +# lock on row 1 can not be upgraded, a separate LOCK_GAP can be obtaied easily. + +CREATE TABLE `t`( + `id` INT NOT NULL PRIMARY KEY +) ENGINE=InnoDB; + +INSERT INTO t (`id`) VALUES (1), (2); + +--connect(holder,localhost,root,,) +--connect(waiter,localhost,root,,) + +--connection holder + BEGIN; + SELECT id FROM t WHERE id=1 FOR UPDATE; + SELECT id FROM t WHERE id=2 FOR UPDATE; + +--connection waiter + SET DEBUG_SYNC= + 'lock_wait_suspend_thread_enter SIGNAL waiter_will_wait'; + --send SELECT id FROM t WHERE id = 1 FOR UPDATE + +--connection holder + SET DEBUG_SYNC= + 'now WAIT_FOR waiter_will_wait'; + SELECT * FROM t FOR UPDATE; + COMMIT; + +--connection waiter + --reap + +--connection default + +--disconnect holder +--disconnect waiter + +DROP TABLE `t`; +SET DEBUG_SYNC='reset'; + +--source include/wait_until_count_sessions.inc diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h index cb04afdf9db..861885fc7a2 100644 --- a/storage/innobase/include/lock0types.h +++ b/storage/innobase/include/lock0types.h @@ -175,6 +175,26 @@ operator<<(std::ostream& out, const lock_rec_t& lock) #endif /* @} */ +/** +Checks if the `mode` is LOCK_S or LOCK_X (possibly ORed with LOCK_WAIT or +LOCK_REC) which means the lock is a +Next Key Lock, a.k.a. LOCK_ORDINARY, as opposed to Predicate Lock, +GAP lock, Insert Intention or Record Lock. +@param mode A mode and flags, of a lock. +@return true if the only bits set in `mode` are LOCK_S or LOCK_X and optionally +LOCK_WAIT or LOCK_REC */ +static inline bool lock_mode_is_next_key_lock(ulint mode) +{ + static_assert(LOCK_ORDINARY == 0, "LOCK_ORDINARY must be 0 (no flags)"); + ut_ad((mode & LOCK_TABLE) == 0); + mode&= ~(LOCK_WAIT | LOCK_REC); + ut_ad((mode & LOCK_WAIT) == 0); + ut_ad((mode & LOCK_TYPE_MASK) == 0); + ut_ad(((mode & ~(LOCK_MODE_MASK)) == LOCK_ORDINARY) == + (mode == LOCK_S || mode == LOCK_X)); + return (mode & ~(LOCK_MODE_MASK)) == LOCK_ORDINARY; +} + /** Lock struct; protected by lock_sys.mutex */ struct ib_lock_t { @@ -231,6 +251,13 @@ struct ib_lock_t return(type_mode & LOCK_REC_NOT_GAP); } + /** @return true if the lock is a Next Key Lock */ + bool is_next_key_lock() const + { + return is_record_lock() + && lock_mode_is_next_key_lock(type_mode); + } + bool is_insert_intention() const { return(type_mode & LOCK_INSERT_INTENTION); diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 26388ad95e2..8e75955c8af 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1054,6 +1054,14 @@ lock_rec_has_expl( static_cast( precise_mode & LOCK_MODE_MASK)) && !lock_get_wait(lock) + /* If we unfold the following expression, we will see it's + true when: + (heap_no is supremum) + or + (the found lock is LOCK_ORDINARY) + or + (the requested and the found lock modes are equal to each + other and equal to LOCK_REC_GAP | LOCK_REC_NOT_GAP). */ && (!lock_rec_get_rec_not_gap(lock) || (precise_mode & LOCK_REC_NOT_GAP) || heap_no == PAGE_HEAP_NO_SUPREMUM) @@ -1900,6 +1908,46 @@ lock_rec_add_to_queue( type_mode, block, heap_no, index, trx, caller_owns_trx_mutex); } +/** A helper function for lock_rec_lock_slow(), which grants a Next Key Lock +(either LOCK_X or LOCK_S as specified by `mode`) on <`block`,`heap_no`> in the +`index` to the `trx`, assuming that it already has a granted `held_lock`, which +is at least as strong as mode|LOCK_REC_NOT_GAP. It does so by either reusing the +lock if it already covers the gap, or by ensuring a separate GAP Lock, which in +combination with Record Lock satisfies the request. +@param[in] held_lock a lock granted to `trx` which is at least as strong + as mode|LOCK_REC_NOT_GAP +@param[in] mode requested lock mode: LOCK_X or LOCK_S +@param[in] block buffer block containing the record to be locked +@param[in] heap_no heap number of the record to be locked +@param[in] index index of record to be locked +@param[in] trx the transaction requesting the Next Key Lock */ +static void lock_reuse_for_next_key_lock(const lock_t *held_lock, ulint mode, + const buf_block_t *block, + ulint heap_no, dict_index_t *index, + trx_t *trx) +{ + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(trx)); + ut_ad(mode == LOCK_S || mode == LOCK_X); + ut_ad(lock_mode_is_next_key_lock(mode)); + + if (!held_lock->is_record_not_gap()) + { + ut_ad(held_lock->is_next_key_lock()); + return; + } + + /* We have a Record Lock granted, so we only need a GAP Lock. We assume + that GAP Locks do not conflict with anything. Therefore a GAP Lock + could be granted to us right now if we've requested: */ + mode|= LOCK_GAP; + ut_ad(nullptr == lock_rec_other_has_conflicting(mode, block, heap_no, trx)); + + /* It might be the case we already have one, so we first check that. */ + if (lock_rec_has_expl(mode, block, heap_no, trx) == nullptr) + lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx, true); +} + /*********************************************************************//** Tries to lock the specified record in the mode requested. If not immediately possible, enqueues a waiting lock request. This is a low-level function @@ -1950,8 +1998,17 @@ lock_rec_lock( lock->type_mode != (ulint(mode) | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { + + ulint checked_mode= (heap_no != PAGE_HEAP_NO_SUPREMUM && + lock_mode_is_next_key_lock(mode)) + ? mode | LOCK_REC_NOT_GAP + : mode; + + const lock_t *held_lock= + lock_rec_has_expl(checked_mode, block, heap_no, trx); + /* Do nothing if the trx already has a strong enough lock on rec */ - if (!lock_rec_has_expl(mode, block, heap_no, trx)) + if (!held_lock) { if ( #ifdef WITH_WSREP @@ -1978,6 +2035,16 @@ lock_rec_lock( err= DB_SUCCESS_LOCKED_REC; } } + /* If checked_mode == mode, trx already has a strong enough lock on rec */ + else if (checked_mode != mode) + { + /* As check_mode != mode, the mode is Next Key Lock, which can not be + emulated by implicit lock (which are LOCK_REC_NOT_GAP only). */ + ut_ad(!impl); + + lock_reuse_for_next_key_lock(held_lock, mode, block, heap_no, index, + trx); + } } else if (!impl) { @@ -5844,6 +5911,8 @@ lock_sec_rec_read_check_and_lock( ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); + DEBUG_SYNC_C("lock_sec_rec_read_check_and_lock_has_locked"); + return(err); }