MDEV-17813 Crash in instant ALTER TABLE due to purge concurrently emptying table
Several race conditions between MDEV-15562 instant ALTER TABLE and purge were observed. The most obvious race condition resulted in a reported assertion failure in dict_index_t::instant_add_field(): instant.n_core_fields == n_core_fields would not hold if the table was emptied by purge after the time dict_table_t::prepare_instant() was called. During purge, it can turn out that the table is logically empty, only containing a metadata record. If the metadata record is of the type created by MDEV-11369 instant ADD COLUMN, it can be removed and dict_index_t::clear_instant_add() can be called. This will convert the table to the canonical non-instant format. (If the metadata record is of the MDEV-15562 type, then it can only be deleted if the table becomes empty as the result of rollback of an instant ALTER TABLE operation.) row_purge_remove_clust_if_poss_low(): Add a debug check that ensures that purge can never remove a MDEV-15562 metadata record. ha_innobase::open(): Add a comment about the necessity of rolling back any recovered instant ALTER TABLE transaction on the table. instant_metadata_lock(): An auxiliary function to acquire a page latch on the metadata record, to prevent race conditions. dict_table_t::prepare_instant(), dict_index_t::instant_add_field(), dict_table_t::rollback_instant(), innobase_instant_try(): Invoke instant_metadata_lock() in order to prevent race conditions. dict_index_t::instant_add_field(): Correct debug assertions. The == was guaranteed by code in dict_table_t::prepare_instant() that was introduced in MDEV-15562. Due to the race condition, we could occasionally have <=, but never >= like the code was after MDEV-11369. ha_innobase_inplace_ctx::instant_column(): Wrapper for dict_table_t::instant_column(). Add debug assertions.
This commit is contained in:
parent
46a411088c
commit
2c4844c9e7
@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra;
|
|||||||
disconnect prevent_purge;
|
disconnect prevent_purge;
|
||||||
InnoDB 0 transactions not purged
|
InnoDB 0 transactions not purged
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# MDEV-17813 Crash in instant ALTER TABLE due to purge
|
||||||
|
# concurrently emptying table
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
|
||||||
|
INSERT INTO t1 SET f2=1;
|
||||||
|
ALTER TABLE t1 ADD COLUMN f1 INT;
|
||||||
|
connect purge_control,localhost,root;
|
||||||
|
START TRANSACTION WITH CONSISTENT SNAPSHOT;
|
||||||
|
connection default;
|
||||||
|
DELETE FROM t1;
|
||||||
|
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
|
||||||
|
ALTER TABLE t1 ADD COLUMN f3 INT;
|
||||||
|
connection purge_control;
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR go';
|
||||||
|
COMMIT;
|
||||||
|
InnoDB 0 transactions not purged
|
||||||
|
SET DEBUG_SYNC='now SIGNAL do';
|
||||||
|
disconnect purge_control;
|
||||||
|
connection default;
|
||||||
|
SET DEBUG_SYNC=RESET;
|
||||||
|
DROP TABLE t1;
|
||||||
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
||||||
|
@ -30,4 +30,34 @@ disconnect prevent_purge;
|
|||||||
let $wait_all_purged= 0;
|
let $wait_all_purged= 0;
|
||||||
--source include/wait_all_purged.inc
|
--source include/wait_all_purged.inc
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-17813 Crash in instant ALTER TABLE due to purge
|
||||||
|
--echo # concurrently emptying table
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
|
||||||
|
INSERT INTO t1 SET f2=1;
|
||||||
|
ALTER TABLE t1 ADD COLUMN f1 INT;
|
||||||
|
|
||||||
|
connect (purge_control,localhost,root);
|
||||||
|
START TRANSACTION WITH CONSISTENT SNAPSHOT;
|
||||||
|
|
||||||
|
connection default;
|
||||||
|
DELETE FROM t1;
|
||||||
|
|
||||||
|
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
|
||||||
|
send ALTER TABLE t1 ADD COLUMN f3 INT;
|
||||||
|
|
||||||
|
connection purge_control;
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR go';
|
||||||
|
COMMIT;
|
||||||
|
--source include/wait_all_purged.inc
|
||||||
|
SET DEBUG_SYNC='now SIGNAL do';
|
||||||
|
disconnect purge_control;
|
||||||
|
|
||||||
|
connection default;
|
||||||
|
reap;
|
||||||
|
SET DEBUG_SYNC=RESET;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
|
||||||
|
@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
|
|||||||
int
|
int
|
||||||
ha_innobase::open(const char* name, int, uint)
|
ha_innobase::open(const char* name, int, uint)
|
||||||
{
|
{
|
||||||
|
/* TODO: If trx_rollback_recovered(bool all=false) is ever
|
||||||
|
removed, the first-time open() must hold (or acquire and release)
|
||||||
|
a table lock that conflicts with trx_resurrect_table_locks(),
|
||||||
|
to ensure that any recovered incomplete ALTER TABLE will have been
|
||||||
|
rolled back. Otherwise, dict_table_t::instant could be cleared by
|
||||||
|
the rollback invoking dict_index_t::clear_instant_alter() while
|
||||||
|
open table handles exist in client connections. */
|
||||||
|
|
||||||
dict_table_t* ib_table;
|
dict_table_t* ib_table;
|
||||||
char norm_name[FN_REFLEN];
|
char norm_name[FN_REFLEN];
|
||||||
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
|
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
|
||||||
|
@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT
|
|||||||
| ALTER_COLUMN_UNVERSIONED
|
| ALTER_COLUMN_UNVERSIONED
|
||||||
| ALTER_DROP_VIRTUAL_COLUMN;
|
| ALTER_DROP_VIRTUAL_COLUMN;
|
||||||
|
|
||||||
|
/** Acquire a page latch on the possible metadata record,
|
||||||
|
to prevent concurrent invocation of dict_index_t::clear_instant_alter()
|
||||||
|
by purge when the table turns out to be empty.
|
||||||
|
@param[in,out] index clustered index
|
||||||
|
@param[in,out] mtr mini-transaction */
|
||||||
|
static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr)
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(index.is_primary());
|
||||||
|
|
||||||
|
if (!index.is_instant()) {
|
||||||
|
/* dict_index_t::clear_instant_alter() cannot be called.
|
||||||
|
No need for a latch. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
btr_cur_t btr_cur;
|
||||||
|
btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF,
|
||||||
|
&btr_cur, 0, &mtr);
|
||||||
|
ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur)));
|
||||||
|
ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur)));
|
||||||
|
ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur)));
|
||||||
|
ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur)));
|
||||||
|
}
|
||||||
|
|
||||||
/** Set is_instant() before instant_column().
|
/** Set is_instant() before instant_column().
|
||||||
@param[in] old previous table definition
|
@param[in] old previous table definition
|
||||||
@param[in] col_map map from old.cols[] and old.v_cols[] to this
|
@param[in] col_map map from old.cols[] and old.v_cols[] to this
|
||||||
@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
|
|||||||
DBUG_ASSERT(old.supports_instant());
|
DBUG_ASSERT(old.supports_instant());
|
||||||
DBUG_ASSERT(supports_instant());
|
DBUG_ASSERT(supports_instant());
|
||||||
|
|
||||||
const dict_index_t& oindex = *old.indexes.start;
|
dict_index_t& oindex = *old.indexes.start;
|
||||||
dict_index_t& index = *indexes.start;
|
dict_index_t& index = *indexes.start;
|
||||||
first_alter_pos = 0;
|
first_alter_pos = 0;
|
||||||
|
|
||||||
|
mtr_t mtr;
|
||||||
|
mtr.start();
|
||||||
|
/* Prevent oindex.n_core_fields and others, so that
|
||||||
|
purge cannot invoke dict_index_t::clear_instant_alter(). */
|
||||||
|
instant_metadata_lock(oindex, mtr);
|
||||||
|
|
||||||
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
|
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
|
||||||
i++) {
|
i++) {
|
||||||
if (col_map[i] != i) {
|
if (col_map[i] != i) {
|
||||||
@ -315,6 +345,7 @@ found_j:
|
|||||||
DBUG_ASSERT(n_dropped() >= old.n_dropped());
|
DBUG_ASSERT(n_dropped() >= old.n_dropped());
|
||||||
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
|
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
|
||||||
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
|
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
|
||||||
|
mtr.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant)
|
|||||||
DBUG_ASSERT(n_uniq == instant.n_uniq);
|
DBUG_ASSERT(n_uniq == instant.n_uniq);
|
||||||
DBUG_ASSERT(instant.n_fields >= n_fields);
|
DBUG_ASSERT(instant.n_fields >= n_fields);
|
||||||
DBUG_ASSERT(instant.n_nullable >= n_nullable);
|
DBUG_ASSERT(instant.n_nullable >= n_nullable);
|
||||||
DBUG_ASSERT(instant.n_core_fields == n_core_fields);
|
/* dict_table_t::prepare_instant() initialized n_core_fields
|
||||||
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes);
|
to be equal. However, after that purge could have emptied the
|
||||||
|
table and invoked dict_index_t::clear_instant_alter(). */
|
||||||
|
DBUG_ASSERT(instant.n_core_fields <= n_core_fields);
|
||||||
|
DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes);
|
||||||
|
DBUG_ASSERT(instant.n_core_fields == n_core_fields
|
||||||
|
|| (!is_instant() && instant.is_instant()));
|
||||||
|
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes
|
||||||
|
|| (!is_instant() && instant.is_instant()));
|
||||||
|
|
||||||
/* instant will have all fields (including ones for columns
|
/* instant will have all fields (including ones for columns
|
||||||
that have been or are being instantly dropped) in the same position
|
that have been or are being instantly dropped) in the same position
|
||||||
@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant(
|
|||||||
const ulint* col_map)
|
const ulint* col_map)
|
||||||
{
|
{
|
||||||
ut_ad(mutex_own(&dict_sys->mutex));
|
ut_ad(mutex_own(&dict_sys->mutex));
|
||||||
|
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
|
||||||
dict_index_t* index = indexes.start;
|
dict_index_t* index = indexes.start;
|
||||||
|
mtr_t mtr;
|
||||||
|
mtr.start();
|
||||||
|
/* Prevent concurrent execution of dict_index_t::clear_instant_alter()
|
||||||
|
by acquiring a latch on the leftmost leaf page. */
|
||||||
|
instant_metadata_lock(*index, mtr);
|
||||||
/* index->is_instant() does not necessarily hold here, because
|
/* index->is_instant() does not necessarily hold here, because
|
||||||
the table may have been emptied */
|
the table may have been emptied */
|
||||||
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
|
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
|
||||||
@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant(
|
|||||||
n_t_def = n_t_cols = n_cols + n_v_cols;
|
n_t_def = n_t_cols = n_cols + n_v_cols;
|
||||||
|
|
||||||
index->fields = old_fields;
|
index->fields = old_fields;
|
||||||
|
mtr.commit();
|
||||||
|
|
||||||
while ((index = dict_table_get_next_index(index)) != NULL) {
|
while ((index = dict_table_get_next_index(index)) != NULL) {
|
||||||
if (index->to_be_dropped) {
|
if (index->to_be_dropped) {
|
||||||
@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
|
|||||||
first_alter_pos);
|
first_alter_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */
|
||||||
|
void instant_column()
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(is_instant());
|
||||||
|
DBUG_ASSERT(old_n_fields
|
||||||
|
== old_table->indexes.start->n_fields);
|
||||||
|
old_table->instant_column(*instant_table, col_map);
|
||||||
|
}
|
||||||
|
|
||||||
/** Revert prepare_instant() if the transaction is rolled back. */
|
/** Revert prepare_instant() if the transaction is rolled back. */
|
||||||
void rollback_instant()
|
void rollback_instant()
|
||||||
{
|
{
|
||||||
@ -5287,13 +5341,24 @@ static bool innobase_instant_try(
|
|||||||
dict_table_t* user_table = ctx->old_table;
|
dict_table_t* user_table = ctx->old_table;
|
||||||
|
|
||||||
dict_index_t* index = dict_table_get_first_index(user_table);
|
dict_index_t* index = dict_table_get_first_index(user_table);
|
||||||
uint n_old_fields = index->n_fields;
|
mtr_t mtr;
|
||||||
|
mtr.start();
|
||||||
|
/* Prevent purge from calling dict_index_t::clear_instant_alter(),
|
||||||
|
to protect index->n_core_fields, index->table->instant and others
|
||||||
|
from changing during ctx->instant_column(). */
|
||||||
|
instant_metadata_lock(*index, mtr);
|
||||||
|
const unsigned n_old_fields = index->n_fields;
|
||||||
const dict_col_t* old_cols = user_table->cols;
|
const dict_col_t* old_cols = user_table->cols;
|
||||||
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
|
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
|
||||||
|
|
||||||
user_table->instant_column(*ctx->instant_table, ctx->col_map);
|
ctx->instant_column();
|
||||||
|
|
||||||
DBUG_ASSERT(index->n_fields >= n_old_fields);
|
DBUG_ASSERT(index->n_fields >= n_old_fields);
|
||||||
|
/* Release the page latch. Between this and the next
|
||||||
|
btr_pcur_open_at_index_side(), data fields such as
|
||||||
|
index->n_core_fields and index->table->instant could change,
|
||||||
|
but we would handle that in empty_table: below. */
|
||||||
|
mtr.commit();
|
||||||
/* The table may have been emptied and may have lost its
|
/* The table may have been emptied and may have lost its
|
||||||
'instantness' during this ALTER TABLE. */
|
'instantness' during this ALTER TABLE. */
|
||||||
|
|
||||||
@ -5441,7 +5506,6 @@ add_all_virtual:
|
|||||||
memset(roll_ptr, 0, sizeof roll_ptr);
|
memset(roll_ptr, 0, sizeof roll_ptr);
|
||||||
|
|
||||||
dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
|
dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
|
||||||
mtr_t mtr;
|
|
||||||
mtr.start();
|
mtr.start();
|
||||||
index->set_modified(mtr);
|
index->set_modified(mtr);
|
||||||
btr_pcur_t pcur;
|
btr_pcur_t pcur;
|
||||||
|
@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low(
|
|||||||
purge_node_t* node, /*!< in/out: row purge node */
|
purge_node_t* node, /*!< in/out: row purge node */
|
||||||
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
||||||
{
|
{
|
||||||
dict_index_t* index;
|
|
||||||
bool success = true;
|
|
||||||
mtr_t mtr;
|
|
||||||
rec_t* rec;
|
|
||||||
mem_heap_t* heap = NULL;
|
|
||||||
ulint* offsets;
|
|
||||||
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
||||||
rec_offs_init(offsets_);
|
|
||||||
|
|
||||||
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|
||||||
|| node->vcol_info.is_used());
|
|| node->vcol_info.is_used());
|
||||||
|
|
||||||
index = dict_table_get_first_index(node->table);
|
dict_index_t* index = dict_table_get_first_index(node->table);
|
||||||
|
|
||||||
log_free_check();
|
log_free_check();
|
||||||
mtr_start(&mtr);
|
|
||||||
index->set_modified(mtr);
|
mtr_t mtr;
|
||||||
|
mtr.start();
|
||||||
|
|
||||||
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
|
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
|
||||||
/* The record was already removed. */
|
/* The record was already removed. */
|
||||||
goto func_exit;
|
mtr.commit();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
rec = btr_pcur_get_rec(&node->pcur);
|
ut_d(const bool was_instant = !!index->table->instant);
|
||||||
|
index->set_modified(mtr);
|
||||||
|
|
||||||
offsets = rec_get_offsets(
|
rec_t* rec = btr_pcur_get_rec(&node->pcur);
|
||||||
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
||||||
|
rec_offs_init(offsets_);
|
||||||
|
mem_heap_t* heap = NULL;
|
||||||
|
ulint* offsets = rec_get_offsets(
|
||||||
rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
|
rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
|
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
|
||||||
/* Someone else has modified the record later: do not remove */
|
/* Someone else has modified the record later: do not remove */
|
||||||
@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Prove that dict_index_t::clear_instant_alter() was
|
||||||
|
not called with index->table->instant != NULL. */
|
||||||
|
ut_ad(!was_instant || index->table->instant);
|
||||||
|
|
||||||
func_exit:
|
func_exit:
|
||||||
if (heap) {
|
if (heap) {
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user