MDEV-31185 rw_trx_hash_t::find() unpins pins too early
rw_trx_hash_t::find() acquires element->mutex, then unpins pins, used for lf_hash element search. After that the "element" can be deallocated and reused by some other thread. If we take a look rw_trx_hash_t::insert()->lf_hash_insert()->lf_alloc_new() calls, we will not find any element->mutex acquisition, as it was not initialized yet before it's allocation. rw_trx_hash_t::insert() can reuse the chunk, unpinned in rw_trx_hash_t::find(). The scenario is the following: 1. Thread 1 have just executed lf_hash_search() in rw_trx_hash_t::find(), but have not acquired element->mutex yet. 2. Thread 2 have removed the element from hash table with rw_trx_hash_t::erase() call. 3. Thread 1 acquired element->mutex and unpinned pin 2 pin with lf_hash_search_unpin(pins) call. 4. Some thread purged memory of the element. 5. Thread 3 reused the memory for the element, filled element->id, element->trx. 6. Thread 1 crashes with failed "DBUG_ASSERT(trx_id == trx->id)" assertion. Note that trx_t objects are also reused, see the code around trx_pools for details. The fix is to invoke "lf_hash_search_unpin(pins);" after element->trx is stored in local variable in rw_trx_hash_t::find(). Reviewed by: Nikita Malyavin, Marko Mäkelä.
This commit is contained in:
parent
f4ce1e487e
commit
b54e7b0cea
@ -0,0 +1,25 @@
|
||||
create table t1 (a int) engine=innodb STATS_PERSISTENT=0;
|
||||
create table t2 (a int) engine=innodb STATS_PERSISTENT=0;
|
||||
BEGIN;
|
||||
insert into t1 values(1);
|
||||
connect con_1, localhost, root,,;
|
||||
SET DEBUG_SYNC="before_trx_hash_find_element_mutex_enter SIGNAL before_mutex_enter WAIT_FOR cont1";
|
||||
SET DEBUG_SYNC="after_trx_hash_find_element_mutex_enter SIGNAL after_mutex_enter WAIT_FOR cont2";
|
||||
SELECT * FROM t1 WHERE a = 1 FOR UPDATE;
|
||||
connection default;
|
||||
SET DEBUG_SYNC="now WAIT_FOR before_mutex_enter";
|
||||
COMMIT;
|
||||
SET DEBUG_SYNC="now SIGNAL cont1";
|
||||
SET DEBUG_SYNC="now WAIT_FOR after_mutex_enter";
|
||||
insert into t2 values(1);
|
||||
BEGIN;
|
||||
INSERT INTO t2 VALUES(2);
|
||||
SET DEBUG_SYNC="now SIGNAL cont2";
|
||||
connection con_1;
|
||||
a
|
||||
1
|
||||
disconnect con_1;
|
||||
connection default;
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
||||
SET DEBUG_SYNC="reset";
|
87
mysql-test/suite/innodb/t/trx_sys_t_find_lf_hash_error.test
Normal file
87
mysql-test/suite/innodb/t/trx_sys_t_find_lf_hash_error.test
Normal file
@ -0,0 +1,87 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--source include/count_sessions.inc
|
||||
|
||||
create table t1 (a int) engine=innodb STATS_PERSISTENT=0;
|
||||
create table t2 (a int) engine=innodb STATS_PERSISTENT=0;
|
||||
|
||||
BEGIN; # trx1
|
||||
# register rw-transaction in trx_sys.rw_trx_hash
|
||||
insert into t1 values(1);
|
||||
|
||||
--connect (con_1, localhost, root,,)
|
||||
SET DEBUG_SYNC="before_trx_hash_find_element_mutex_enter SIGNAL before_mutex_enter WAIT_FOR cont1";
|
||||
SET DEBUG_SYNC="after_trx_hash_find_element_mutex_enter SIGNAL after_mutex_enter WAIT_FOR cont2";
|
||||
|
||||
# trx2 is converting implicit lock of trx1 to explicit one, it's invoking
|
||||
# ▾ l_search
|
||||
# ▾ lf_hash_search_using_hash_value
|
||||
# ▾ lf_hash_search
|
||||
# ▾ rw_trx_hash_t::find
|
||||
# ▾ trx_sys_t::find
|
||||
# ▾ lock_rec_convert_impl_to_expl
|
||||
# rw_trx_hash_t::find returns lf_hash element, pin 2 is pinned,
|
||||
# but element->mutex has not been acquired yet, what allows trx1 element to be
|
||||
# removed from trx_sys.rw_trx_hash at one hand, and at the other hand, the
|
||||
# content of the element is still valid as it's pinned.
|
||||
#
|
||||
# trx2
|
||||
--send SELECT * FROM t1 WHERE a = 1 FOR UPDATE
|
||||
--connection default
|
||||
SET DEBUG_SYNC="now WAIT_FOR before_mutex_enter";
|
||||
--disable_query_log
|
||||
SET @saved_dbug = @@debug_dbug;
|
||||
|
||||
# Usually pinbox purgatory is purged either when the number of elements in
|
||||
# purgatory is greater then some limit(see lf_pinbox_free()), or when thread
|
||||
# invokes rw_trx_hash_t::put_pins() explicitly. For this test the first
|
||||
# variant was choosen. The following option makes lf_pinbox_free() to purge
|
||||
# pinbox purgatory on each call, ignoring pins->purgatory_count.
|
||||
SET DEBUG_DBUG='+d,unconditional_pinbox_free';
|
||||
--enable_query_log
|
||||
|
||||
# trx1 is committed and removed from trx_sys.rw_trx_hash. It can be done as
|
||||
# trx2 has not been acquired element->mutex yet.
|
||||
COMMIT;
|
||||
--disable_query_log
|
||||
SET DEBUG_DBUG = @saved_dbug;
|
||||
--enable_query_log
|
||||
|
||||
# Let trx2 to acquire element->mutex and unpin pin 2
|
||||
SET DEBUG_SYNC="now SIGNAL cont1";
|
||||
SET DEBUG_SYNC="now WAIT_FOR after_mutex_enter";
|
||||
|
||||
--disable_query_log
|
||||
SET @saved_dbug = @@debug_dbug;
|
||||
SET DEBUG_DBUG='+d,unconditional_pinbox_free';
|
||||
--enable_query_log
|
||||
# trx3 commits and invokes lf_pinbox_free(), which purges pin 2 of trx2 and
|
||||
# places its pointer on trx_sys.rw_trx_hash.hash.alloc.top.
|
||||
insert into t2 values(1);
|
||||
--disable_query_log
|
||||
SET DEBUG_DBUG = @saved_dbug;
|
||||
--enable_query_log
|
||||
|
||||
BEGIN; # trx4
|
||||
# trx_sys.rw_trx_hash.hash.alloc.top points to "freed" trx2 lf_hash element,
|
||||
# lf_alloc_new() gets the pointer from trx_sys.rw_trx_hash.hash.alloc.top,
|
||||
# so the memory for lf_hash element will be reused for trx4 if MDEV-31185 is
|
||||
# not fixed
|
||||
INSERT INTO t2 VALUES(2);
|
||||
|
||||
# let trx2 to invoke DBUG_ASSERT(trx_id == trx->id) and crash if MDEV-31185
|
||||
# is not fixed
|
||||
SET DEBUG_SYNC="now SIGNAL cont2";
|
||||
|
||||
--connection con_1
|
||||
# trx 2 assertion failure if MDEV-31185 is not fixed
|
||||
--reap
|
||||
--disconnect con_1
|
||||
--connection default
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
||||
|
||||
SET DEBUG_SYNC="reset";
|
||||
|
||||
--source include/wait_until_count_sessions.inc
|
@ -270,6 +270,9 @@ void lf_pinbox_free(LF_PINS *pins, void *addr)
|
||||
add_to_purgatory(pins, addr);
|
||||
if (pins->purgatory_count % LF_PURGATORY_SIZE == 0)
|
||||
lf_pinbox_real_free(pins);
|
||||
DBUG_EXECUTE_IF("unconditional_pinbox_free",
|
||||
if (pins->purgatory_count % LF_PURGATORY_SIZE)
|
||||
lf_pinbox_real_free(pins););
|
||||
}
|
||||
|
||||
struct st_harvester {
|
||||
|
@ -824,6 +824,10 @@ retry:
|
||||
|
||||
element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
|
||||
thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
|
||||
/* It's safe to unpin the pins here, because an empty element was inserted
|
||||
above, "empty" means at least element->share = 0. Some other thread can't
|
||||
delete it while element->share == 0. And element->share is also protected
|
||||
with element->LOCK_table_share mutex. */
|
||||
lf_hash_search_unpin(thd->tdc_hash_pins);
|
||||
DBUG_ASSERT(element);
|
||||
|
||||
|
@ -246,9 +246,14 @@ static XID_cache_element *xid_cache_search(THD *thd, XID *xid)
|
||||
xid->key(), xid->key_length());
|
||||
if (element)
|
||||
{
|
||||
/* The element can be removed from lf_hash by other thread, but
|
||||
element->acquire_recovered() will return false in this case. */
|
||||
if (!element->acquire_recovered())
|
||||
element= 0;
|
||||
lf_hash_search_unpin(thd->xid_hash_pins);
|
||||
/* Once the element is acquired (i.e. got the ACQUIRED bit) by this thread,
|
||||
only this thread can delete it. The deletion happens in xid_cache_delete().
|
||||
See also the XID_cache_element documentation. */
|
||||
DEBUG_SYNC(thd, "xa_after_search");
|
||||
}
|
||||
return element;
|
||||
|
@ -632,9 +632,21 @@ public:
|
||||
sizeof(trx_id_t)));
|
||||
if (element)
|
||||
{
|
||||
/* rw_trx_hash_t::erase() sets element->trx to nullptr under
|
||||
element->mutex protection before removing the element from hash table.
|
||||
If the element was removed before the mutex acquisition, element->trx
|
||||
will be equal to nullptr. */
|
||||
DEBUG_SYNC_C("before_trx_hash_find_element_mutex_enter");
|
||||
mutex_enter(&element->mutex);
|
||||
/* element_trx can't point to reused object now. If transaction was
|
||||
deregistered before element->mutex acquisition, element->trx is nullptr.
|
||||
It can't be deregistered while element->mutex is held. */
|
||||
trx_t *element_trx = element->trx;
|
||||
lf_hash_search_unpin(pins);
|
||||
if ((trx= element->trx)) {
|
||||
/* The *element can be reused now, as element->trx value is stored
|
||||
locally in element_trx. */
|
||||
DEBUG_SYNC_C("after_trx_hash_find_element_mutex_enter");
|
||||
if ((trx= element_trx)) {
|
||||
DBUG_ASSERT(trx_id == trx->id);
|
||||
ut_d(validate_element(trx));
|
||||
if (do_ref_count)
|
||||
@ -650,12 +662,19 @@ public:
|
||||
trx_mutex_enter(trx);
|
||||
const trx_state_t state= trx->state;
|
||||
trx_mutex_exit(trx);
|
||||
/* trx_t::commit_in_memory() sets the state to
|
||||
TRX_STATE_COMMITTED_IN_MEMORY before deregistering the transaction.
|
||||
It also waits for any implicit-to-explicit lock conversions to cease
|
||||
after deregistering. */
|
||||
if (state == TRX_STATE_COMMITTED_IN_MEMORY)
|
||||
trx= NULL;
|
||||
else
|
||||
trx->reference();
|
||||
}
|
||||
}
|
||||
/* element's lifetime is equal to the hash lifetime, that's why
|
||||
element->mutex is valid here despite the element is unpinned. In the
|
||||
worst case some thread will wait for element->mutex releasing. */
|
||||
mutex_exit(&element->mutex);
|
||||
}
|
||||
if (!caller_trx)
|
||||
|
Loading…
x
Reference in New Issue
Block a user