From c9b0c5a15c76b6611000d77d64efb7bca41b5f8d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 8 Dec 2008 22:09:59 +0200 Subject: [PATCH 1/2] Fixed mutexes lock order in maria_close(): LOCK_trn_list -> MARIA_SHARE::intern_lock (then will be WL to revert the order). (BUG#40981 Maria: deadlock between checkpoint and maria_close() when freeing state info) storage/maria/ma_checkpoint.c: The argument added to the function to use it in maria_close(). storage/maria/ma_close.c: Locking/unlocking MARIA_SHARE::intern_lock added to use correct order of the mutexes taking. storage/maria/ma_state.c: Removed assert becase now we have externally locked mutex in maria_close(). The argument added to the _ma_remove_not_visible_states_with_lock() to use it in maria_close(). _ma_remove_not_visible_states_with_lock() fixed tio be usable from maria_chk where transaction manager is not initialized. storage/maria/ma_state.h: The argument added to the function to use it in maria_close(). storage/maria/maria_def.h: Fixed comment to the variable. storage/maria/trnman.c: The debugging assert added. New function to detect transaction manager initialization added (maria_chk do not initialize it). storage/maria/trnman_public.h: New function to detect transaction manager initialization added (maria_chk do not initialize it). --- storage/maria/ma_checkpoint.c | 2 +- storage/maria/ma_close.c | 26 +++++++++++++++++--------- storage/maria/ma_state.c | 21 +++++++++++++++------ storage/maria/ma_state.h | 3 ++- storage/maria/maria_def.h | 2 +- storage/maria/trnman.c | 12 ++++++++++++ storage/maria/trnman_public.h | 1 + 7 files changed, 49 insertions(+), 18 deletions(-) diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 91cb026d5ed..4538865fa02 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -1059,7 +1059,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) since last call. */ pthread_mutex_unlock(&share->intern_lock); - _ma_remove_not_visible_states_with_lock(share); + _ma_remove_not_visible_states_with_lock(share, FALSE); pthread_mutex_lock(&share->intern_lock); if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME) diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 9463ad8078d..3f91a3723a1 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -124,22 +124,30 @@ int maria_close(register MARIA_HA *info) } #endif DBUG_ASSERT(share->now_transactional == share->base.born_transactional); - if (share->in_checkpoint == MARIA_CHECKPOINT_LOOKS_AT_ME) + /* + We assign -1 because checkpoint does not need to flush (in case we + have concurrent checkpoint if no then we do not need it here also) + */ + share->kfile.file= -1; + + /* + Remember share->history for future opens + + We have to unlock share->intern_lock then lock it after + LOCK_trn_list (trnman_lock()) to avoid dead locks. + */ + pthread_mutex_unlock(&share->intern_lock); + _ma_remove_not_visible_states_with_lock(share, TRUE); + pthread_mutex_lock(&share->intern_lock); + + if (share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME) { - share->kfile.file= -1; /* because Checkpoint does not need to flush */ /* we cannot my_free() the share, Checkpoint would see a bad pointer */ share->in_checkpoint|= MARIA_CHECKPOINT_SHOULD_FREE_ME; } else share_can_be_freed= TRUE; - /* - Remember share->history for future opens - Here we take share->intern_lock followed by trans_lock but this is - safe as no other thread one can use 'share' here. - */ - share->state_history= _ma_remove_not_visible_states(share->state_history, - 1, 0); if (share->state_history) { MARIA_STATE_HISTORY_CLOSED *history; diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c index 785f1689a37..4f7cebcd86a 100644 --- a/storage/maria/ma_state.c +++ b/storage/maria/ma_state.c @@ -166,7 +166,6 @@ MARIA_STATE_HISTORY if (all && parent == &org_history->next) { - DBUG_ASSERT(trnman_is_locked == 0); /* There is only one state left. Delete this if it's visible for all */ if (last_trid < trnman_get_min_trid()) { @@ -181,6 +180,11 @@ MARIA_STATE_HISTORY /** @brief Remove not used state history + @param share Maria table information + @param all 1 if we should delete the first state if it's + visible for all. For the moment this is only used + on close() of table. + @notes share and trnman are not locked. @@ -189,14 +193,19 @@ MARIA_STATE_HISTORY takes share->intern_lock. */ -void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share) +void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share, + my_bool all) { - trnman_lock(); + my_bool is_lock_trman; + if ((is_lock_trman= trman_is_inited())) + trnman_lock(); + pthread_mutex_lock(&share->intern_lock); - share->state_history= _ma_remove_not_visible_states(share->state_history, 0, - 1); + share->state_history= _ma_remove_not_visible_states(share->state_history, + all, 1); pthread_mutex_unlock(&share->intern_lock); - trnman_unlock(); + if (is_lock_trman) + trnman_unlock(); } diff --git a/storage/maria/ma_state.h b/storage/maria/ma_state.h index 968c526cd98..b5790aa17b8 100644 --- a/storage/maria/ma_state.h +++ b/storage/maria/ma_state.h @@ -78,6 +78,7 @@ my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit, my_bool _ma_row_visible_always(MARIA_HA *info); my_bool _ma_row_visible_non_transactional_table(MARIA_HA *info); my_bool _ma_row_visible_transactional_table(MARIA_HA *info); -void _ma_remove_not_visible_states_with_lock(struct st_maria_share *share); +void _ma_remove_not_visible_states_with_lock(struct st_maria_share *share, + my_bool all); void _ma_remove_table_from_trnman(struct st_maria_share *share, TRN *trn); void _ma_reset_history(struct st_maria_share *share); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 125cd461570..bd2aaba29d7 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -353,7 +353,7 @@ typedef struct st_maria_share PAGECACHE_FILE kfile; /* Shared keyfile */ File data_file; /* Shared data file */ int mode; /* mode of file on open */ - uint reopen; /* How many times reopened */ + uint reopen; /* How many times opened */ uint in_trans; /* Number of references by trn */ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ uint block_size; /* block_size of keyfile & data file*/ diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 538dc75b663..f7afea51a63 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -874,6 +874,7 @@ my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id, if (!trnman_is_locked) pthread_mutex_lock(&LOCK_trn_list); + safe_mutex_assert_owner(&LOCK_trn_list); for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) { if (trn->trid > min_id && trn->trid < max_id) @@ -897,6 +898,7 @@ void trnman_lock() pthread_mutex_lock(&LOCK_trn_list); } + /** unlock transaction list */ @@ -905,3 +907,13 @@ void trnman_unlock() { pthread_mutex_unlock(&LOCK_trn_list); } + + +/** + Is trman initialized +*/ + +my_bool trman_is_inited() +{ + return (short_trid_to_active_trn != NULL); +} diff --git a/storage/maria/trnman_public.h b/storage/maria/trnman_public.h index b89ce23df37..d29ada159ac 100644 --- a/storage/maria/trnman_public.h +++ b/storage/maria/trnman_public.h @@ -69,5 +69,6 @@ my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id, #define transid_korr(P) uint6korr(P) void trnman_lock(); void trnman_unlock(); +my_bool trman_is_inited(); C_MODE_END #endif From c9a29373e154dc7fb4f60246457dbf24d6e6f3c2 Mon Sep 17 00:00:00 2001 From: Guilhem Bichot Date: Tue, 9 Dec 2008 10:56:02 +0100 Subject: [PATCH 2/2] Fix for BUG#41159 "Maria: deadlock between checkpoint and maria_write() when extending data file". No testcase (concurrency, tested by pushbuild2). storage/maria/ha_maria.cc: a comment about what Sanja had discovered a while ago storage/maria/ma_bitmap.c: guard against concurrent flush of bitmap by checkpoint: we must have close_lock here storage/maria/ma_blockrec.c: comment fixed for new behaviour storage/maria/ma_checkpoint.c: Release intern_lock before flushing bitmap, or it deadlocks with allocate_and_write_block_record() when that function needs to increase the data file's length (that function makes bitmap non flushable, then wants intern_lock to increase data_file_length). The checkpoint section which looks at the share's content (bitmap, state) needs to be protected from the possible my_free-ing done by a concurrent maria_close(); intern_lock is not enough as both maria_close() and checkpoint now have to release it in the middle. So the protection is done with close_lock. in_checkpoint is now protected by close_lock in places where it was protected by intern_lock. storage/maria/ma_close.c: hold close_lock in maria_close() from start to end, to guard against checkpoint trying to flush bitmap while we have my_free'd its structures, for example. intern_lock was not enough as both maria_close() and checkpoint have to release it in the middle, to avoid deadlocks. storage/maria/ma_open.c: initialize new mutex storage/maria/ma_recovery.c: a comment about what Sanja had discovered a while ago storage/maria/maria_def.h: comment. new mutex protecting the close of a MARIA_SHARE, from _start_ to _end_ of it. --- storage/maria/ha_maria.cc | 4 +++ storage/maria/ma_bitmap.c | 1 + storage/maria/ma_blockrec.c | 2 +- storage/maria/ma_checkpoint.c | 48 +++++++++++++++++++++++++++-------- storage/maria/ma_close.c | 8 ++++++ storage/maria/ma_open.c | 1 + storage/maria/ma_recovery.c | 2 ++ storage/maria/maria_def.h | 18 +++++++++++-- 8 files changed, 71 insertions(+), 13 deletions(-) diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index cbd7dc5e64c..decfb94bc35 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2365,6 +2365,10 @@ int ha_maria::external_lock(THD *thd, int lock_type) We always re-enable, don't rely on thd->transaction.on as it is sometimes reset to true after unlocking (see mysql_truncate() for a partitioned table based on Maria). + Note that we can come here without having an exclusive lock on the + table, for example in this case: + external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock + abortion; external_lock(F_UNLCK). */ if (_ma_reenable_logging_for_table(file, TRUE)) DBUG_RETURN(1); diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e4e1fc98003..bf2c9291981 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -260,6 +260,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) my_bool _ma_bitmap_end(MARIA_SHARE *share) { my_bool res= _ma_bitmap_flush(share); + safe_mutex_assert_owner(&share->close_lock); pthread_mutex_destroy(&share->bitmap.bitmap_lock); pthread_cond_destroy(&share->bitmap.bitmap_cond); delete_dynamic(&share->bitmap.pinned_pages); diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 2709a2bba5c..59ce455c000 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -453,7 +453,7 @@ my_bool _ma_once_end_block_record(MARIA_SHARE *share) { /* We de-assign the id even though index has not been flushed, this is ok - as intern_lock serializes us with a Checkpoint looking at our share. + as close_lock serializes us with a Checkpoint looking at our share. */ translog_deassign_id_from_share(share); } diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 4538865fa02..ac44bbdf7a1 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -784,10 +784,8 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) !(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP)) { /* - Why we didn't take intern_lock above: table had in_checkpoint==0 so no - thread could set in_checkpoint. And no thread needs to know that we - are setting in_checkpoint, because only maria_close() needs it and - cannot run now as we hold THR_LOCK_maria. + Apart from us, only maria_close() reads/sets in_checkpoint but cannot + run now as we hold THR_LOCK_maria. */ /* This table is relevant for checkpoint and not already seen. Mark it, @@ -887,7 +885,10 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) my_bool ignore_share; if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME)) { - /* No need for a mutex to read the above, only us can write this flag */ + /* + No need for a mutex to read the above, only us can write *this* bit of + the in_checkpoint bitmap + */ continue; } /** @@ -956,6 +957,14 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) /* OS file descriptors are ints which we stored in 4 bytes */ compile_time_assert(sizeof(int) <= 4); + /* + Protect against maria_close() (which does some memory freeing in + MARIA_FILE_BITMAP) with close_lock. intern_lock is not + sufficient as we, as well as maria_close(), are going to unlock + intern_lock in the middle of manipulating the table. Serializing us and + maria_close() should help avoid problems. + */ + pthread_mutex_lock(&share->close_lock); pthread_mutex_lock(&share->intern_lock); /* Tables in a normal state have their two file descriptors open. @@ -1045,6 +1054,20 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) each checkpoint if the table was once written and then not anymore. */ } + } + /* + _ma_bitmap_flush_all() may wait, so don't keep intern_lock as + otherwise this would deadlock with allocate_and_write_block_record() + calling _ma_set_share_data_file_length() + */ + pthread_mutex_unlock(&share->intern_lock); + + if (!ignore_share) + { + /* + share->bitmap is valid because it's destroyed under close_lock which + we hold. + */ if (_ma_bitmap_flush_all(share)) { sync_error= 1; @@ -1057,23 +1080,28 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) Clean up any unused states. TODO: Only do this call if there has been # (10?) ended transactions since last call. + We had to release intern_lock to respect lock order with LOCK_trn_list. */ - pthread_mutex_unlock(&share->intern_lock); _ma_remove_not_visible_states_with_lock(share, FALSE); - pthread_mutex_lock(&share->intern_lock); if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME) { - /* maria_close() left us to free the share */ - pthread_mutex_unlock(&share->intern_lock); + /* + maria_close() left us free the share. When it run it set share->id + to 0. As it run before we locked close_lock, we should have seen this + and so this assertion should be true: + */ + DBUG_ASSERT(ignore_share); pthread_mutex_destroy(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); + pthread_mutex_destroy(&share->close_lock); my_free((uchar *)share, MYF(0)); } else { /* share goes back to normal state */ share->in_checkpoint= 0; - pthread_mutex_unlock(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); } /* diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index eb5f5b8ddaa..6141986fd72 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -47,6 +47,7 @@ int maria_close(register MARIA_HA *info) if (maria_lock_database(info,F_UNLCK)) error=my_errno; } + pthread_mutex_lock(&share->close_lock); pthread_mutex_lock(&share->intern_lock); if (share->options & HA_OPTION_READ_ONLY_DATA) @@ -169,10 +170,17 @@ int maria_close(register MARIA_HA *info) } pthread_mutex_unlock(&THR_LOCK_maria); pthread_mutex_unlock(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); if (share_can_be_freed) { (void) pthread_mutex_destroy(&share->intern_lock); + (void) pthread_mutex_destroy(&share->close_lock); my_free((uchar *)share, MYF(0)); + /* + If share cannot be freed, it's because checkpoint has previously + recorded to include this share in the checkpoint and so is soon going to + look at some of its content (share->in_checkpoint/id/last_version). + */ } my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR)); if (info->dfile.file >= 0) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 309ccf78ea1..88f7feb41fd 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -819,6 +819,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST); pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST); pthread_cond_init(&share->key_del_cond, 0); + pthread_mutex_init(&share->close_lock, MY_MUTEX_INIT_FAST); for (i=0; ikeyinfo[i].root_lock, NULL)); VOID(my_rwlock_init(&share->mmap_lock, NULL)); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index baebdcf2eb4..77dfc6b568a 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -3276,6 +3276,8 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, /** Re-enables logging for a table which had it temporarily disabled. + Only the thread which disabled logging is allowed to reenable it. + @param info table @param flush_pages if function needs to flush pages first */ diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index bd2aaba29d7..52b14b80aa6 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -362,7 +362,10 @@ typedef struct st_maria_share myf write_flag; enum data_file_type data_file_type; enum pagecache_page_type page_type; /* value depending transactional */ - uint8 in_checkpoint; /**< if Checkpoint looking at table */ + /** + if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria + */ + uint8 in_checkpoint; my_bool temporary; /* Below flag is needed to make log tables work with concurrent insert */ my_bool is_log_table; @@ -386,9 +389,20 @@ typedef struct st_maria_share #ifdef THREAD THR_LOCK lock; void (*lock_restore_status)(void *); - pthread_mutex_t intern_lock; /* Locking for use with _locking */ + /** + Protects kfile, dfile, most members of the state, state disk writes, + versioning information (like in_trans, state_history). + @todo find the exhaustive list. + */ + pthread_mutex_t intern_lock; pthread_mutex_t key_del_lock; pthread_cond_t key_del_cond; + /** + _Always_ held while closing table; prevents checkpoint from looking at + structures freed during closure (like bitmap). If you need close_lock and + intern_lock, lock them in this order. + */ + pthread_mutex_t close_lock; #endif my_off_t mmaped_length; uint nonmmaped_inserts; /* counter of writing in