Fix for BUG#41159 "Maria: deadlock between checkpoint and maria_write() when extending data file".
No testcase (concurrency, tested by pushbuild2). storage/maria/ha_maria.cc: a comment about what Sanja had discovered a while ago storage/maria/ma_bitmap.c: guard against concurrent flush of bitmap by checkpoint: we must have close_lock here storage/maria/ma_blockrec.c: comment fixed for new behaviour storage/maria/ma_checkpoint.c: Release intern_lock before flushing bitmap, or it deadlocks with allocate_and_write_block_record() when that function needs to increase the data file's length (that function makes bitmap non flushable, then wants intern_lock to increase data_file_length). The checkpoint section which looks at the share's content (bitmap, state) needs to be protected from the possible my_free-ing done by a concurrent maria_close(); intern_lock is not enough as both maria_close() and checkpoint now have to release it in the middle. So the protection is done with close_lock. in_checkpoint is now protected by close_lock in places where it was protected by intern_lock. storage/maria/ma_close.c: hold close_lock in maria_close() from start to end, to guard against checkpoint trying to flush bitmap while we have my_free'd its structures, for example. intern_lock was not enough as both maria_close() and checkpoint have to release it in the middle, to avoid deadlocks. storage/maria/ma_open.c: initialize new mutex storage/maria/ma_recovery.c: a comment about what Sanja had discovered a while ago storage/maria/maria_def.h: comment. new mutex protecting the close of a MARIA_SHARE, from _start_ to _end_ of it.
This commit is contained in:
parent
131a6a57a4
commit
c9a29373e1
@ -2365,6 +2365,10 @@ int ha_maria::external_lock(THD *thd, int lock_type)
|
|||||||
We always re-enable, don't rely on thd->transaction.on as it is
|
We always re-enable, don't rely on thd->transaction.on as it is
|
||||||
sometimes reset to true after unlocking (see mysql_truncate() for a
|
sometimes reset to true after unlocking (see mysql_truncate() for a
|
||||||
partitioned table based on Maria).
|
partitioned table based on Maria).
|
||||||
|
Note that we can come here without having an exclusive lock on the
|
||||||
|
table, for example in this case:
|
||||||
|
external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
|
||||||
|
abortion; external_lock(F_UNLCK).
|
||||||
*/
|
*/
|
||||||
if (_ma_reenable_logging_for_table(file, TRUE))
|
if (_ma_reenable_logging_for_table(file, TRUE))
|
||||||
DBUG_RETURN(1);
|
DBUG_RETURN(1);
|
||||||
|
@ -260,6 +260,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
|
|||||||
my_bool _ma_bitmap_end(MARIA_SHARE *share)
|
my_bool _ma_bitmap_end(MARIA_SHARE *share)
|
||||||
{
|
{
|
||||||
my_bool res= _ma_bitmap_flush(share);
|
my_bool res= _ma_bitmap_flush(share);
|
||||||
|
safe_mutex_assert_owner(&share->close_lock);
|
||||||
pthread_mutex_destroy(&share->bitmap.bitmap_lock);
|
pthread_mutex_destroy(&share->bitmap.bitmap_lock);
|
||||||
pthread_cond_destroy(&share->bitmap.bitmap_cond);
|
pthread_cond_destroy(&share->bitmap.bitmap_cond);
|
||||||
delete_dynamic(&share->bitmap.pinned_pages);
|
delete_dynamic(&share->bitmap.pinned_pages);
|
||||||
|
@ -453,7 +453,7 @@ my_bool _ma_once_end_block_record(MARIA_SHARE *share)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We de-assign the id even though index has not been flushed, this is ok
|
We de-assign the id even though index has not been flushed, this is ok
|
||||||
as intern_lock serializes us with a Checkpoint looking at our share.
|
as close_lock serializes us with a Checkpoint looking at our share.
|
||||||
*/
|
*/
|
||||||
translog_deassign_id_from_share(share);
|
translog_deassign_id_from_share(share);
|
||||||
}
|
}
|
||||||
|
@ -784,10 +784,8 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||||||
!(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP))
|
!(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
Why we didn't take intern_lock above: table had in_checkpoint==0 so no
|
Apart from us, only maria_close() reads/sets in_checkpoint but cannot
|
||||||
thread could set in_checkpoint. And no thread needs to know that we
|
run now as we hold THR_LOCK_maria.
|
||||||
are setting in_checkpoint, because only maria_close() needs it and
|
|
||||||
cannot run now as we hold THR_LOCK_maria.
|
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
This table is relevant for checkpoint and not already seen. Mark it,
|
This table is relevant for checkpoint and not already seen. Mark it,
|
||||||
@ -887,7 +885,10 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||||||
my_bool ignore_share;
|
my_bool ignore_share;
|
||||||
if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
|
if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
|
||||||
{
|
{
|
||||||
/* No need for a mutex to read the above, only us can write this flag */
|
/*
|
||||||
|
No need for a mutex to read the above, only us can write *this* bit of
|
||||||
|
the in_checkpoint bitmap
|
||||||
|
*/
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -956,6 +957,14 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||||||
|
|
||||||
/* OS file descriptors are ints which we stored in 4 bytes */
|
/* OS file descriptors are ints which we stored in 4 bytes */
|
||||||
compile_time_assert(sizeof(int) <= 4);
|
compile_time_assert(sizeof(int) <= 4);
|
||||||
|
/*
|
||||||
|
Protect against maria_close() (which does some memory freeing in
|
||||||
|
MARIA_FILE_BITMAP) with close_lock. intern_lock is not
|
||||||
|
sufficient as we, as well as maria_close(), are going to unlock
|
||||||
|
intern_lock in the middle of manipulating the table. Serializing us and
|
||||||
|
maria_close() should help avoid problems.
|
||||||
|
*/
|
||||||
|
pthread_mutex_lock(&share->close_lock);
|
||||||
pthread_mutex_lock(&share->intern_lock);
|
pthread_mutex_lock(&share->intern_lock);
|
||||||
/*
|
/*
|
||||||
Tables in a normal state have their two file descriptors open.
|
Tables in a normal state have their two file descriptors open.
|
||||||
@ -1045,6 +1054,20 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||||||
each checkpoint if the table was once written and then not anymore.
|
each checkpoint if the table was once written and then not anymore.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
_ma_bitmap_flush_all() may wait, so don't keep intern_lock as
|
||||||
|
otherwise this would deadlock with allocate_and_write_block_record()
|
||||||
|
calling _ma_set_share_data_file_length()
|
||||||
|
*/
|
||||||
|
pthread_mutex_unlock(&share->intern_lock);
|
||||||
|
|
||||||
|
if (!ignore_share)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
share->bitmap is valid because it's destroyed under close_lock which
|
||||||
|
we hold.
|
||||||
|
*/
|
||||||
if (_ma_bitmap_flush_all(share))
|
if (_ma_bitmap_flush_all(share))
|
||||||
{
|
{
|
||||||
sync_error= 1;
|
sync_error= 1;
|
||||||
@ -1057,23 +1080,28 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||||||
Clean up any unused states.
|
Clean up any unused states.
|
||||||
TODO: Only do this call if there has been # (10?) ended transactions
|
TODO: Only do this call if there has been # (10?) ended transactions
|
||||||
since last call.
|
since last call.
|
||||||
|
We had to release intern_lock to respect lock order with LOCK_trn_list.
|
||||||
*/
|
*/
|
||||||
pthread_mutex_unlock(&share->intern_lock);
|
|
||||||
_ma_remove_not_visible_states_with_lock(share, FALSE);
|
_ma_remove_not_visible_states_with_lock(share, FALSE);
|
||||||
pthread_mutex_lock(&share->intern_lock);
|
|
||||||
|
|
||||||
if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
|
if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
|
||||||
{
|
{
|
||||||
/* maria_close() left us to free the share */
|
/*
|
||||||
pthread_mutex_unlock(&share->intern_lock);
|
maria_close() left us free the share. When it run it set share->id
|
||||||
|
to 0. As it run before we locked close_lock, we should have seen this
|
||||||
|
and so this assertion should be true:
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(ignore_share);
|
||||||
pthread_mutex_destroy(&share->intern_lock);
|
pthread_mutex_destroy(&share->intern_lock);
|
||||||
|
pthread_mutex_unlock(&share->close_lock);
|
||||||
|
pthread_mutex_destroy(&share->close_lock);
|
||||||
my_free((uchar *)share, MYF(0));
|
my_free((uchar *)share, MYF(0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* share goes back to normal state */
|
/* share goes back to normal state */
|
||||||
share->in_checkpoint= 0;
|
share->in_checkpoint= 0;
|
||||||
pthread_mutex_unlock(&share->intern_lock);
|
pthread_mutex_unlock(&share->close_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -47,6 +47,7 @@ int maria_close(register MARIA_HA *info)
|
|||||||
if (maria_lock_database(info,F_UNLCK))
|
if (maria_lock_database(info,F_UNLCK))
|
||||||
error=my_errno;
|
error=my_errno;
|
||||||
}
|
}
|
||||||
|
pthread_mutex_lock(&share->close_lock);
|
||||||
pthread_mutex_lock(&share->intern_lock);
|
pthread_mutex_lock(&share->intern_lock);
|
||||||
|
|
||||||
if (share->options & HA_OPTION_READ_ONLY_DATA)
|
if (share->options & HA_OPTION_READ_ONLY_DATA)
|
||||||
@ -169,10 +170,17 @@ int maria_close(register MARIA_HA *info)
|
|||||||
}
|
}
|
||||||
pthread_mutex_unlock(&THR_LOCK_maria);
|
pthread_mutex_unlock(&THR_LOCK_maria);
|
||||||
pthread_mutex_unlock(&share->intern_lock);
|
pthread_mutex_unlock(&share->intern_lock);
|
||||||
|
pthread_mutex_unlock(&share->close_lock);
|
||||||
if (share_can_be_freed)
|
if (share_can_be_freed)
|
||||||
{
|
{
|
||||||
(void) pthread_mutex_destroy(&share->intern_lock);
|
(void) pthread_mutex_destroy(&share->intern_lock);
|
||||||
|
(void) pthread_mutex_destroy(&share->close_lock);
|
||||||
my_free((uchar *)share, MYF(0));
|
my_free((uchar *)share, MYF(0));
|
||||||
|
/*
|
||||||
|
If share cannot be freed, it's because checkpoint has previously
|
||||||
|
recorded to include this share in the checkpoint and so is soon going to
|
||||||
|
look at some of its content (share->in_checkpoint/id/last_version).
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR));
|
my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR));
|
||||||
if (info->dfile.file >= 0)
|
if (info->dfile.file >= 0)
|
||||||
|
@ -819,6 +819,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
|
|||||||
pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
|
||||||
pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
|
||||||
pthread_cond_init(&share->key_del_cond, 0);
|
pthread_cond_init(&share->key_del_cond, 0);
|
||||||
|
pthread_mutex_init(&share->close_lock, MY_MUTEX_INIT_FAST);
|
||||||
for (i=0; i<keys; i++)
|
for (i=0; i<keys; i++)
|
||||||
VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
|
VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
|
||||||
VOID(my_rwlock_init(&share->mmap_lock, NULL));
|
VOID(my_rwlock_init(&share->mmap_lock, NULL));
|
||||||
|
@ -3276,6 +3276,8 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
|
|||||||
/**
|
/**
|
||||||
Re-enables logging for a table which had it temporarily disabled.
|
Re-enables logging for a table which had it temporarily disabled.
|
||||||
|
|
||||||
|
Only the thread which disabled logging is allowed to reenable it.
|
||||||
|
|
||||||
@param info table
|
@param info table
|
||||||
@param flush_pages if function needs to flush pages first
|
@param flush_pages if function needs to flush pages first
|
||||||
*/
|
*/
|
||||||
|
@ -362,7 +362,10 @@ typedef struct st_maria_share
|
|||||||
myf write_flag;
|
myf write_flag;
|
||||||
enum data_file_type data_file_type;
|
enum data_file_type data_file_type;
|
||||||
enum pagecache_page_type page_type; /* value depending transactional */
|
enum pagecache_page_type page_type; /* value depending transactional */
|
||||||
uint8 in_checkpoint; /**< if Checkpoint looking at table */
|
/**
|
||||||
|
if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria
|
||||||
|
*/
|
||||||
|
uint8 in_checkpoint;
|
||||||
my_bool temporary;
|
my_bool temporary;
|
||||||
/* Below flag is needed to make log tables work with concurrent insert */
|
/* Below flag is needed to make log tables work with concurrent insert */
|
||||||
my_bool is_log_table;
|
my_bool is_log_table;
|
||||||
@ -386,9 +389,20 @@ typedef struct st_maria_share
|
|||||||
#ifdef THREAD
|
#ifdef THREAD
|
||||||
THR_LOCK lock;
|
THR_LOCK lock;
|
||||||
void (*lock_restore_status)(void *);
|
void (*lock_restore_status)(void *);
|
||||||
pthread_mutex_t intern_lock; /* Locking for use with _locking */
|
/**
|
||||||
|
Protects kfile, dfile, most members of the state, state disk writes,
|
||||||
|
versioning information (like in_trans, state_history).
|
||||||
|
@todo find the exhaustive list.
|
||||||
|
*/
|
||||||
|
pthread_mutex_t intern_lock;
|
||||||
pthread_mutex_t key_del_lock;
|
pthread_mutex_t key_del_lock;
|
||||||
pthread_cond_t key_del_cond;
|
pthread_cond_t key_del_cond;
|
||||||
|
/**
|
||||||
|
_Always_ held while closing table; prevents checkpoint from looking at
|
||||||
|
structures freed during closure (like bitmap). If you need close_lock and
|
||||||
|
intern_lock, lock them in this order.
|
||||||
|
*/
|
||||||
|
pthread_mutex_t close_lock;
|
||||||
#endif
|
#endif
|
||||||
my_off_t mmaped_length;
|
my_off_t mmaped_length;
|
||||||
uint nonmmaped_inserts; /* counter of writing in
|
uint nonmmaped_inserts; /* counter of writing in
|
||||||
|
Loading…
x
Reference in New Issue
Block a user