From 90b95c6149c72f43aa2324353a76f370d018a5ac Mon Sep 17 00:00:00 2001 From: mariadb-DebarunBanerjee Date: Tue, 30 Apr 2024 16:55:27 +0530 Subject: [PATCH] MDEV-33543 Server hang caused by InnoDB change buffer Issue: When getting a page (buf_page_get_gen) with no latch option (RW_NO_LATCH), the caller is not expected to follow the B-tree latching order. However in buf_page_get_low we try to acquire shared page latch unconditionally to wait for a page that is being loaded by another thread concurrently. In general it could lead to latch order violation and deadlock. Currently it affects the change buffer insert path btr_latch_prev() which tries to load the previous page out of order with RW_NO_LATCH and two concurrent inserts into IBUF tree cause deadlock. This problem is introduced in 10.6 by following commit. commit 9436c778c3adba7c29dab5649668433d71e086f2 (MDEV-27058) Fix: While trying to latch a page with RW_NO_LATCH, always use the "*lock_try" interface and retry operation on failure after unfixing the page. --- storage/innobase/buf/buf0buf.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index dc90160a668..16592c05405 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2579,7 +2579,15 @@ ignore_unfixed: in buf_page_t::read_complete() or buf_pool_t::corrupted_evict(), or after buf_zip_decompress() in this function. */ - block->page.lock.s_lock(); + if (rw_latch != RW_NO_LATCH) { + block->page.lock.s_lock(); + } else if (!block->page.lock.s_lock_try()) { + /* For RW_NO_LATCH, we should not try to acquire S or X + latch directly as we could be violating the latching + order resulting in deadlock. Instead we try latching the + page and retry in case of a failure. */ + goto wait_for_read; + } state = block->page.state(); ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX); @@ -2587,15 +2595,15 @@ ignore_unfixed: block->page.lock.s_unlock(); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { - block->page.unfix(); if (UNIV_UNLIKELY(id == page_id)) { /* The page read was completed, and another thread marked the page as free while we were waiting. */ - goto ignore_unfixed; + goto ignore_block; } ut_ad(id == page_id_t{~0ULL}); + block->page.unfix(); if (++retries < BUF_PAGE_READ_MAX_RETRIES) { goto loop; @@ -2634,6 +2642,7 @@ free_unfixed_block: if (UNIV_UNLIKELY(!block->page.frame)) { if (!block->page.lock.x_lock_try()) { wait_for_unzip: +wait_for_read: /* The page is being read or written, or another thread is executing buf_zip_decompress() in buf_page_get_low() on it. */