diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index cbd7dc5e64c..decfb94bc35 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2365,6 +2365,10 @@ int ha_maria::external_lock(THD *thd, int lock_type) We always re-enable, don't rely on thd->transaction.on as it is sometimes reset to true after unlocking (see mysql_truncate() for a partitioned table based on Maria). + Note that we can come here without having an exclusive lock on the + table, for example in this case: + external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock + abortion; external_lock(F_UNLCK). */ if (_ma_reenable_logging_for_table(file, TRUE)) DBUG_RETURN(1); diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e4e1fc98003..bf2c9291981 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -260,6 +260,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) my_bool _ma_bitmap_end(MARIA_SHARE *share) { my_bool res= _ma_bitmap_flush(share); + safe_mutex_assert_owner(&share->close_lock); pthread_mutex_destroy(&share->bitmap.bitmap_lock); pthread_cond_destroy(&share->bitmap.bitmap_cond); delete_dynamic(&share->bitmap.pinned_pages); diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 2709a2bba5c..59ce455c000 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -453,7 +453,7 @@ my_bool _ma_once_end_block_record(MARIA_SHARE *share) { /* We de-assign the id even though index has not been flushed, this is ok - as intern_lock serializes us with a Checkpoint looking at our share. + as close_lock serializes us with a Checkpoint looking at our share. */ translog_deassign_id_from_share(share); } diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 4538865fa02..ac44bbdf7a1 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -784,10 +784,8 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) !(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP)) { /* - Why we didn't take intern_lock above: table had in_checkpoint==0 so no - thread could set in_checkpoint. And no thread needs to know that we - are setting in_checkpoint, because only maria_close() needs it and - cannot run now as we hold THR_LOCK_maria. + Apart from us, only maria_close() reads/sets in_checkpoint but cannot + run now as we hold THR_LOCK_maria. */ /* This table is relevant for checkpoint and not already seen. Mark it, @@ -887,7 +885,10 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) my_bool ignore_share; if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME)) { - /* No need for a mutex to read the above, only us can write this flag */ + /* + No need for a mutex to read the above, only us can write *this* bit of + the in_checkpoint bitmap + */ continue; } /** @@ -956,6 +957,14 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) /* OS file descriptors are ints which we stored in 4 bytes */ compile_time_assert(sizeof(int) <= 4); + /* + Protect against maria_close() (which does some memory freeing in + MARIA_FILE_BITMAP) with close_lock. intern_lock is not + sufficient as we, as well as maria_close(), are going to unlock + intern_lock in the middle of manipulating the table. Serializing us and + maria_close() should help avoid problems. + */ + pthread_mutex_lock(&share->close_lock); pthread_mutex_lock(&share->intern_lock); /* Tables in a normal state have their two file descriptors open. @@ -1045,6 +1054,20 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) each checkpoint if the table was once written and then not anymore. */ } + } + /* + _ma_bitmap_flush_all() may wait, so don't keep intern_lock as + otherwise this would deadlock with allocate_and_write_block_record() + calling _ma_set_share_data_file_length() + */ + pthread_mutex_unlock(&share->intern_lock); + + if (!ignore_share) + { + /* + share->bitmap is valid because it's destroyed under close_lock which + we hold. + */ if (_ma_bitmap_flush_all(share)) { sync_error= 1; @@ -1057,23 +1080,28 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) Clean up any unused states. TODO: Only do this call if there has been # (10?) ended transactions since last call. + We had to release intern_lock to respect lock order with LOCK_trn_list. */ - pthread_mutex_unlock(&share->intern_lock); _ma_remove_not_visible_states_with_lock(share, FALSE); - pthread_mutex_lock(&share->intern_lock); if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME) { - /* maria_close() left us to free the share */ - pthread_mutex_unlock(&share->intern_lock); + /* + maria_close() left us free the share. When it run it set share->id + to 0. As it run before we locked close_lock, we should have seen this + and so this assertion should be true: + */ + DBUG_ASSERT(ignore_share); pthread_mutex_destroy(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); + pthread_mutex_destroy(&share->close_lock); my_free((uchar *)share, MYF(0)); } else { /* share goes back to normal state */ share->in_checkpoint= 0; - pthread_mutex_unlock(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); } /* diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index eb5f5b8ddaa..6141986fd72 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -47,6 +47,7 @@ int maria_close(register MARIA_HA *info) if (maria_lock_database(info,F_UNLCK)) error=my_errno; } + pthread_mutex_lock(&share->close_lock); pthread_mutex_lock(&share->intern_lock); if (share->options & HA_OPTION_READ_ONLY_DATA) @@ -169,10 +170,17 @@ int maria_close(register MARIA_HA *info) } pthread_mutex_unlock(&THR_LOCK_maria); pthread_mutex_unlock(&share->intern_lock); + pthread_mutex_unlock(&share->close_lock); if (share_can_be_freed) { (void) pthread_mutex_destroy(&share->intern_lock); + (void) pthread_mutex_destroy(&share->close_lock); my_free((uchar *)share, MYF(0)); + /* + If share cannot be freed, it's because checkpoint has previously + recorded to include this share in the checkpoint and so is soon going to + look at some of its content (share->in_checkpoint/id/last_version). + */ } my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR)); if (info->dfile.file >= 0) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 309ccf78ea1..88f7feb41fd 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -819,6 +819,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST); pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST); pthread_cond_init(&share->key_del_cond, 0); + pthread_mutex_init(&share->close_lock, MY_MUTEX_INIT_FAST); for (i=0; ikeyinfo[i].root_lock, NULL)); VOID(my_rwlock_init(&share->mmap_lock, NULL)); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index baebdcf2eb4..77dfc6b568a 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -3276,6 +3276,8 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, /** Re-enables logging for a table which had it temporarily disabled. + Only the thread which disabled logging is allowed to reenable it. + @param info table @param flush_pages if function needs to flush pages first */ diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index bd2aaba29d7..52b14b80aa6 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -362,7 +362,10 @@ typedef struct st_maria_share myf write_flag; enum data_file_type data_file_type; enum pagecache_page_type page_type; /* value depending transactional */ - uint8 in_checkpoint; /**< if Checkpoint looking at table */ + /** + if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria + */ + uint8 in_checkpoint; my_bool temporary; /* Below flag is needed to make log tables work with concurrent insert */ my_bool is_log_table; @@ -386,9 +389,20 @@ typedef struct st_maria_share #ifdef THREAD THR_LOCK lock; void (*lock_restore_status)(void *); - pthread_mutex_t intern_lock; /* Locking for use with _locking */ + /** + Protects kfile, dfile, most members of the state, state disk writes, + versioning information (like in_trans, state_history). + @todo find the exhaustive list. + */ + pthread_mutex_t intern_lock; pthread_mutex_t key_del_lock; pthread_cond_t key_del_cond; + /** + _Always_ held while closing table; prevents checkpoint from looking at + structures freed during closure (like bitmap). If you need close_lock and + intern_lock, lock them in this order. + */ + pthread_mutex_t close_lock; #endif my_off_t mmaped_length; uint nonmmaped_inserts; /* counter of writing in