MDEV-34212 InnoDB transaction recovery is incorrect

trx_undo_mem_create_at_db_start(): Invoke recv_sys_t::recover()
instead of buf_page_get_gen(), so that all undo log pages will be
recovered correctly. Failure to do this could prevent InnoDB from
starting up due to "Data structure corruption", or it could
potentially lead to a situation where InnoDB starts up but some
transactions were recovered incorrectly.

recv_sys_t::recover(): Only acquire a buffer-fix on the pages,
not a shared latch. This is adequate protection, because this function
is only being invoked during early startup when no "users" are modifying
buffer pool pages. The only writes are due to server bootstrap
(the data files being created) or crash recovery (changes from
ib_logfile0 being applied).

buf_page_get_gen(): Assert that the function is not invoked while crash
recovery is in progress, and that the special mode BUF_GET_RECOVER is
only invoked during crash recovery or server bootstrap.

All this should really have been part of
commit 850d61736deba354377634cf344256ee850b93b7 (MDEV-32042).
This commit is contained in:
Marko Mäkelä 2024-05-22 12:30:30 +03:00 committed by Oleksandr Byelkin
parent 6c0eb29ddd
commit 727b549310
4 changed files with 17 additions and 7 deletions

View File

@ -2587,6 +2587,15 @@ buf_page_get_gen(
{
ulint retries = 0;
/* BUF_GET_RECOVER is only used by recv_sys_t::recover(),
which must be invoked during early server startup when crash
recovery may be in progress. The only case when it may be
invoked outside recovery is when dict_create() has initialized
a new database and is invoking dict_boot(). In this case, the
LSN will be small. */
ut_ad(mode == BUF_GET_RECOVER
? recv_recovery_is_on() || log_sys.get_lsn() < 50000
: !recv_recovery_is_on() || recv_sys.after_apply);
ut_ad(!mtr || mtr->is_active());
ut_ad(mtr || mode == BUF_PEEK_IF_IN_POOL);
ut_ad((rw_latch == RW_S_LATCH)
@ -2608,6 +2617,7 @@ buf_page_get_gen(
/* The caller may pass a dummy page size,
because it does not really matter. */
break;
case BUF_GET_RECOVER:
case BUF_GET:
ut_ad(!mtr->is_freeing_tree());
fil_space_t* s = fil_space_get(page_id.space());

View File

@ -45,6 +45,7 @@ Created 11/5/1995 Heikki Tuuri
/** @name Modes for buf_page_get_gen */
/* @{ */
#define BUF_GET 10 /*!< get always */
#define BUF_GET_RECOVER 9 /*!< like BUF_GET, but in recv_sys.recover() */
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */

View File

@ -3678,8 +3678,8 @@ recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
{
if (!recovery_on)
must_read:
return buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
err);
return buf_page_get_gen(page_id, 0, RW_NO_LATCH, nullptr, BUF_GET_RECOVER,
mtr, err);
mysql_mutex_lock(&mutex);
map::iterator p= pages.find(page_id);
@ -3728,7 +3728,7 @@ recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
goto corrupted;
}
mtr->page_lock(block, RW_S_LATCH);
mtr->page_lock(block, RW_NO_LATCH);
return block;
}

View File

@ -980,7 +980,7 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
mtr.start();
const page_id_t page_id{rseg->space->id, page_no};
const buf_block_t* block = buf_page_get(page_id, 0, RW_X_LATCH, &mtr);
const buf_block_t* block = recv_sys.recover(page_id, &mtr, nullptr);
if (UNIV_UNLIKELY(!block)) {
corrupted:
mtr.commit();
@ -1094,9 +1094,8 @@ corrupted_type:
undo->last_page_no = last_addr.page;
undo->top_page_no = last_addr.page;
const buf_block_t* last = buf_page_get(
page_id_t(rseg->space->id, undo->last_page_no), 0,
RW_X_LATCH, &mtr);
const buf_block_t* last = recv_sys.recover(
page_id_t(rseg->space->id, undo->last_page_no), &mtr, nullptr);
if (UNIV_UNLIKELY(!last)) {
goto corrupted_undo;