MDEV-17813 Crash in instant ALTER TABLE due to purge concurrently emptying table
Several race conditions between MDEV-15562 instant ALTER TABLE and purge were observed. The most obvious race condition resulted in a reported assertion failure in dict_index_t::instant_add_field(): instant.n_core_fields == n_core_fields would not hold if the table was emptied by purge after the time dict_table_t::prepare_instant() was called. During purge, it can turn out that the table is logically empty, only containing a metadata record. If the metadata record is of the type created by MDEV-11369 instant ADD COLUMN, it can be removed and dict_index_t::clear_instant_add() can be called. This will convert the table to the canonical non-instant format. (If the metadata record is of the MDEV-15562 type, then it can only be deleted if the table becomes empty as the result of rollback of an instant ALTER TABLE operation.) row_purge_remove_clust_if_poss_low(): Add a debug check that ensures that purge can never remove a MDEV-15562 metadata record. ha_innobase::open(): Add a comment about the necessity of rolling back any recovered instant ALTER TABLE transaction on the table. instant_metadata_lock(): An auxiliary function to acquire a page latch on the metadata record, to prevent race conditions. dict_table_t::prepare_instant(), dict_index_t::instant_add_field(), dict_table_t::rollback_instant(), innobase_instant_try(): Invoke instant_metadata_lock() in order to prevent race conditions. dict_index_t::instant_add_field(): Correct debug assertions. The == was guaranteed by code in dict_table_t::prepare_instant() that was introduced in MDEV-15562. Due to the race condition, we could occasionally have <=, but never >= like the code was after MDEV-11369. ha_innobase_inplace_ctx::instant_column(): Wrapper for dict_table_t::instant_column(). Add debug assertions.
This commit is contained in:
parent
46a411088c
commit
2c4844c9e7
@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra;
|
||||
disconnect prevent_purge;
|
||||
InnoDB 0 transactions not purged
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-17813 Crash in instant ALTER TABLE due to purge
|
||||
# concurrently emptying table
|
||||
#
|
||||
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
|
||||
INSERT INTO t1 SET f2=1;
|
||||
ALTER TABLE t1 ADD COLUMN f1 INT;
|
||||
connect purge_control,localhost,root;
|
||||
START TRANSACTION WITH CONSISTENT SNAPSHOT;
|
||||
connection default;
|
||||
DELETE FROM t1;
|
||||
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
|
||||
ALTER TABLE t1 ADD COLUMN f3 INT;
|
||||
connection purge_control;
|
||||
SET DEBUG_SYNC='now WAIT_FOR go';
|
||||
COMMIT;
|
||||
InnoDB 0 transactions not purged
|
||||
SET DEBUG_SYNC='now SIGNAL do';
|
||||
disconnect purge_control;
|
||||
connection default;
|
||||
SET DEBUG_SYNC=RESET;
|
||||
DROP TABLE t1;
|
||||
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
||||
|
@ -30,4 +30,34 @@ disconnect prevent_purge;
|
||||
let $wait_all_purged= 0;
|
||||
--source include/wait_all_purged.inc
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-17813 Crash in instant ALTER TABLE due to purge
|
||||
--echo # concurrently emptying table
|
||||
--echo #
|
||||
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
|
||||
INSERT INTO t1 SET f2=1;
|
||||
ALTER TABLE t1 ADD COLUMN f1 INT;
|
||||
|
||||
connect (purge_control,localhost,root);
|
||||
START TRANSACTION WITH CONSISTENT SNAPSHOT;
|
||||
|
||||
connection default;
|
||||
DELETE FROM t1;
|
||||
|
||||
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
|
||||
send ALTER TABLE t1 ADD COLUMN f3 INT;
|
||||
|
||||
connection purge_control;
|
||||
SET DEBUG_SYNC='now WAIT_FOR go';
|
||||
COMMIT;
|
||||
--source include/wait_all_purged.inc
|
||||
SET DEBUG_SYNC='now SIGNAL do';
|
||||
disconnect purge_control;
|
||||
|
||||
connection default;
|
||||
reap;
|
||||
SET DEBUG_SYNC=RESET;
|
||||
DROP TABLE t1;
|
||||
|
||||
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
||||
|
@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
|
||||
int
|
||||
ha_innobase::open(const char* name, int, uint)
|
||||
{
|
||||
/* TODO: If trx_rollback_recovered(bool all=false) is ever
|
||||
removed, the first-time open() must hold (or acquire and release)
|
||||
a table lock that conflicts with trx_resurrect_table_locks(),
|
||||
to ensure that any recovered incomplete ALTER TABLE will have been
|
||||
rolled back. Otherwise, dict_table_t::instant could be cleared by
|
||||
the rollback invoking dict_index_t::clear_instant_alter() while
|
||||
open table handles exist in client connections. */
|
||||
|
||||
dict_table_t* ib_table;
|
||||
char norm_name[FN_REFLEN];
|
||||
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
|
||||
|
@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT
|
||||
| ALTER_COLUMN_UNVERSIONED
|
||||
| ALTER_DROP_VIRTUAL_COLUMN;
|
||||
|
||||
/** Acquire a page latch on the possible metadata record,
|
||||
to prevent concurrent invocation of dict_index_t::clear_instant_alter()
|
||||
by purge when the table turns out to be empty.
|
||||
@param[in,out] index clustered index
|
||||
@param[in,out] mtr mini-transaction */
|
||||
static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr)
|
||||
{
|
||||
DBUG_ASSERT(index.is_primary());
|
||||
|
||||
if (!index.is_instant()) {
|
||||
/* dict_index_t::clear_instant_alter() cannot be called.
|
||||
No need for a latch. */
|
||||
return;
|
||||
}
|
||||
|
||||
btr_cur_t btr_cur;
|
||||
btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF,
|
||||
&btr_cur, 0, &mtr);
|
||||
ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur)));
|
||||
ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur)));
|
||||
ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur)));
|
||||
ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur)));
|
||||
}
|
||||
|
||||
/** Set is_instant() before instant_column().
|
||||
@param[in] old previous table definition
|
||||
@param[in] col_map map from old.cols[] and old.v_cols[] to this
|
||||
@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
|
||||
DBUG_ASSERT(old.supports_instant());
|
||||
DBUG_ASSERT(supports_instant());
|
||||
|
||||
const dict_index_t& oindex = *old.indexes.start;
|
||||
dict_index_t& oindex = *old.indexes.start;
|
||||
dict_index_t& index = *indexes.start;
|
||||
first_alter_pos = 0;
|
||||
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
/* Prevent oindex.n_core_fields and others, so that
|
||||
purge cannot invoke dict_index_t::clear_instant_alter(). */
|
||||
instant_metadata_lock(oindex, mtr);
|
||||
|
||||
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
|
||||
i++) {
|
||||
if (col_map[i] != i) {
|
||||
@ -315,6 +345,7 @@ found_j:
|
||||
DBUG_ASSERT(n_dropped() >= old.n_dropped());
|
||||
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
|
||||
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
|
||||
mtr.commit();
|
||||
}
|
||||
|
||||
|
||||
@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant)
|
||||
DBUG_ASSERT(n_uniq == instant.n_uniq);
|
||||
DBUG_ASSERT(instant.n_fields >= n_fields);
|
||||
DBUG_ASSERT(instant.n_nullable >= n_nullable);
|
||||
DBUG_ASSERT(instant.n_core_fields == n_core_fields);
|
||||
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes);
|
||||
/* dict_table_t::prepare_instant() initialized n_core_fields
|
||||
to be equal. However, after that purge could have emptied the
|
||||
table and invoked dict_index_t::clear_instant_alter(). */
|
||||
DBUG_ASSERT(instant.n_core_fields <= n_core_fields);
|
||||
DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes);
|
||||
DBUG_ASSERT(instant.n_core_fields == n_core_fields
|
||||
|| (!is_instant() && instant.is_instant()));
|
||||
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes
|
||||
|| (!is_instant() && instant.is_instant()));
|
||||
|
||||
/* instant will have all fields (including ones for columns
|
||||
that have been or are being instantly dropped) in the same position
|
||||
@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant(
|
||||
const ulint* col_map)
|
||||
{
|
||||
ut_ad(mutex_own(&dict_sys->mutex));
|
||||
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
|
||||
dict_index_t* index = indexes.start;
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
/* Prevent concurrent execution of dict_index_t::clear_instant_alter()
|
||||
by acquiring a latch on the leftmost leaf page. */
|
||||
instant_metadata_lock(*index, mtr);
|
||||
/* index->is_instant() does not necessarily hold here, because
|
||||
the table may have been emptied */
|
||||
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
|
||||
@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant(
|
||||
n_t_def = n_t_cols = n_cols + n_v_cols;
|
||||
|
||||
index->fields = old_fields;
|
||||
mtr.commit();
|
||||
|
||||
while ((index = dict_table_get_next_index(index)) != NULL) {
|
||||
if (index->to_be_dropped) {
|
||||
@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
|
||||
first_alter_pos);
|
||||
}
|
||||
|
||||
/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */
|
||||
void instant_column()
|
||||
{
|
||||
DBUG_ASSERT(is_instant());
|
||||
DBUG_ASSERT(old_n_fields
|
||||
== old_table->indexes.start->n_fields);
|
||||
old_table->instant_column(*instant_table, col_map);
|
||||
}
|
||||
|
||||
/** Revert prepare_instant() if the transaction is rolled back. */
|
||||
void rollback_instant()
|
||||
{
|
||||
@ -5287,13 +5341,24 @@ static bool innobase_instant_try(
|
||||
dict_table_t* user_table = ctx->old_table;
|
||||
|
||||
dict_index_t* index = dict_table_get_first_index(user_table);
|
||||
uint n_old_fields = index->n_fields;
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
/* Prevent purge from calling dict_index_t::clear_instant_alter(),
|
||||
to protect index->n_core_fields, index->table->instant and others
|
||||
from changing during ctx->instant_column(). */
|
||||
instant_metadata_lock(*index, mtr);
|
||||
const unsigned n_old_fields = index->n_fields;
|
||||
const dict_col_t* old_cols = user_table->cols;
|
||||
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
|
||||
|
||||
user_table->instant_column(*ctx->instant_table, ctx->col_map);
|
||||
ctx->instant_column();
|
||||
|
||||
DBUG_ASSERT(index->n_fields >= n_old_fields);
|
||||
/* Release the page latch. Between this and the next
|
||||
btr_pcur_open_at_index_side(), data fields such as
|
||||
index->n_core_fields and index->table->instant could change,
|
||||
but we would handle that in empty_table: below. */
|
||||
mtr.commit();
|
||||
/* The table may have been emptied and may have lost its
|
||||
'instantness' during this ALTER TABLE. */
|
||||
|
||||
@ -5441,7 +5506,6 @@ add_all_virtual:
|
||||
memset(roll_ptr, 0, sizeof roll_ptr);
|
||||
|
||||
dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
index->set_modified(mtr);
|
||||
btr_pcur_t pcur;
|
||||
|
@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low(
|
||||
purge_node_t* node, /*!< in/out: row purge node */
|
||||
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
||||
{
|
||||
dict_index_t* index;
|
||||
bool success = true;
|
||||
mtr_t mtr;
|
||||
rec_t* rec;
|
||||
mem_heap_t* heap = NULL;
|
||||
ulint* offsets;
|
||||
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
||||
rec_offs_init(offsets_);
|
||||
|
||||
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|
||||
|| node->vcol_info.is_used());
|
||||
|
||||
index = dict_table_get_first_index(node->table);
|
||||
dict_index_t* index = dict_table_get_first_index(node->table);
|
||||
|
||||
log_free_check();
|
||||
mtr_start(&mtr);
|
||||
index->set_modified(mtr);
|
||||
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
|
||||
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
|
||||
/* The record was already removed. */
|
||||
goto func_exit;
|
||||
mtr.commit();
|
||||
return true;
|
||||
}
|
||||
|
||||
rec = btr_pcur_get_rec(&node->pcur);
|
||||
ut_d(const bool was_instant = !!index->table->instant);
|
||||
index->set_modified(mtr);
|
||||
|
||||
offsets = rec_get_offsets(
|
||||
rec_t* rec = btr_pcur_get_rec(&node->pcur);
|
||||
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
||||
rec_offs_init(offsets_);
|
||||
mem_heap_t* heap = NULL;
|
||||
ulint* offsets = rec_get_offsets(
|
||||
rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
|
||||
bool success = true;
|
||||
|
||||
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
|
||||
/* Someone else has modified the record later: do not remove */
|
||||
@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low(
|
||||
}
|
||||
}
|
||||
|
||||
/* Prove that dict_index_t::clear_instant_alter() was
|
||||
not called with index->table->instant != NULL. */
|
||||
ut_ad(!was_instant || index->table->instant);
|
||||
|
||||
func_exit:
|
||||
if (heap) {
|
||||
mem_heap_free(heap);
|
||||
|
Loading…
x
Reference in New Issue
Block a user