MDEV-10630 rpl.rpl_mdev6020 fails in buildbot with timeout
The issue was that when running with valgrind the wait for master_pos_Wait() was not long enough. This patch also fixes two other failures that could affect rpl_mdev6020: - check_if_conflicting_replication_locks() didn't properly check domains - 'did_mark_start_commit' was after signals to other threads was sent which could get the variable read too early.
This commit is contained in:
parent
5932fa7890
commit
b51109693e
@ -181,6 +181,7 @@ static uint my_end_arg= 0;
|
||||
static uint opt_tail_lines= 0;
|
||||
|
||||
static uint opt_connect_timeout= 0;
|
||||
static uint opt_wait_for_pos_timeout= 0;
|
||||
|
||||
static char delimiter[MAX_DELIMITER_LENGTH]= ";";
|
||||
static uint delimiter_length= 1;
|
||||
@ -4659,7 +4660,7 @@ void do_sync_with_master2(struct st_command *command, long offset,
|
||||
MYSQL_ROW row;
|
||||
MYSQL *mysql= cur_con->mysql;
|
||||
char query_buf[FN_REFLEN+128];
|
||||
int timeout= 300; /* seconds */
|
||||
int timeout= opt_wait_for_pos_timeout;
|
||||
|
||||
if (!master_pos.file[0])
|
||||
die("Calling 'sync_with_master' without calling 'save_master_pos'");
|
||||
@ -7098,6 +7099,10 @@ static struct my_option my_long_options[] =
|
||||
"Number of seconds before connection timeout.",
|
||||
&opt_connect_timeout, &opt_connect_timeout, 0, GET_UINT, REQUIRED_ARG,
|
||||
120, 0, 3600 * 12, 0, 0, 0},
|
||||
{"wait_for_pos_timeout", 0,
|
||||
"Number of seconds to wait for master_pos_wait",
|
||||
&opt_wait_for_pos_timeout, &opt_wait_for_pos_timeout, 0, GET_UINT,
|
||||
REQUIRED_ARG, 300, 0, 3600 * 12, 0, 0, 0},
|
||||
{"plugin_dir", 0, "Directory for client-side plugins.",
|
||||
&opt_plugin_dir, &opt_plugin_dir, 0,
|
||||
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
||||
|
@ -5899,7 +5899,7 @@ sub start_mysqltest ($) {
|
||||
{
|
||||
# We are running server under valgrind, which causes some replication
|
||||
# test to be much slower, notable rpl_mdev6020. Increase timeout.
|
||||
mtr_add_arg($args, "--wait-for-pos-timeout=1500");
|
||||
mtr_add_arg($args, "--wait-for-pos-timeout=0");
|
||||
}
|
||||
|
||||
if ( $opt_ssl )
|
||||
|
@ -1,8 +1,10 @@
|
||||
# Running this with valgrind can take > 5000 seconds with xtradb
|
||||
--source include/not_valgrind.inc
|
||||
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_partition.inc
|
||||
--source include/have_binlog_format_mixed_or_row.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection slave
|
||||
--source include/stop_slave.inc
|
||||
|
||||
|
20
sql/mdl.cc
20
sql/mdl.cc
@ -443,7 +443,9 @@ public:
|
||||
virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
|
||||
|
||||
virtual bitmap_t hog_lock_types_bitmap() const = 0;
|
||||
#ifndef DBUG_OFF
|
||||
bool check_if_conflicting_replication_locks(MDL_context *ctx);
|
||||
#endif
|
||||
|
||||
/** List of granted tickets for this lock. */
|
||||
Ticket_list m_granted;
|
||||
@ -2303,16 +2305,23 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
|
||||
and trying to get an exclusive lock for the table.
|
||||
*/
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
|
||||
{
|
||||
Ticket_iterator it(m_granted);
|
||||
MDL_ticket *conflicting_ticket;
|
||||
rpl_group_info *rgi_slave= ctx->get_thd()->rgi_slave;
|
||||
|
||||
if (!rgi_slave->gtid_sub_id)
|
||||
return 0;
|
||||
|
||||
while ((conflicting_ticket= it++))
|
||||
{
|
||||
if (conflicting_ticket->get_ctx() != ctx)
|
||||
{
|
||||
MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
|
||||
rpl_group_info *conflicting_rgi_slave;
|
||||
conflicting_rgi_slave= conflicting_ctx->get_thd()->rgi_slave;
|
||||
|
||||
/*
|
||||
If the conflicting thread is another parallel replication
|
||||
@ -2320,15 +2329,18 @@ bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
|
||||
the current transaction has started too early and something is
|
||||
seriously wrong.
|
||||
*/
|
||||
if (conflicting_ctx->get_thd()->rgi_slave &&
|
||||
conflicting_ctx->get_thd()->rgi_slave->rli ==
|
||||
ctx->get_thd()->rgi_slave->rli &&
|
||||
!conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit)
|
||||
if (conflicting_rgi_slave &&
|
||||
conflicting_rgi_slave->gtid_sub_id &&
|
||||
conflicting_rgi_slave->rli == rgi_slave->rli &&
|
||||
conflicting_rgi_slave->current_gtid.domain_id ==
|
||||
rgi_slave->current_gtid.domain_id &&
|
||||
!conflicting_rgi_slave->did_mark_start_commit)
|
||||
return 1; // Fatal error
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
|
@ -1921,8 +1921,8 @@ rpl_group_info::mark_start_commit_no_lock()
|
||||
{
|
||||
if (did_mark_start_commit)
|
||||
return;
|
||||
mark_start_commit_inner(parallel_entry, gco, this);
|
||||
did_mark_start_commit= true;
|
||||
mark_start_commit_inner(parallel_entry, gco, this);
|
||||
}
|
||||
|
||||
|
||||
@ -1933,12 +1933,12 @@ rpl_group_info::mark_start_commit()
|
||||
|
||||
if (did_mark_start_commit)
|
||||
return;
|
||||
did_mark_start_commit= true;
|
||||
|
||||
e= this->parallel_entry;
|
||||
mysql_mutex_lock(&e->LOCK_parallel_entry);
|
||||
mark_start_commit_inner(e, gco, this);
|
||||
mysql_mutex_unlock(&e->LOCK_parallel_entry);
|
||||
did_mark_start_commit= true;
|
||||
}
|
||||
|
||||
|
||||
@ -1981,12 +1981,12 @@ rpl_group_info::unmark_start_commit()
|
||||
|
||||
if (!did_mark_start_commit)
|
||||
return;
|
||||
did_mark_start_commit= false;
|
||||
|
||||
e= this->parallel_entry;
|
||||
mysql_mutex_lock(&e->LOCK_parallel_entry);
|
||||
--e->count_committing_event_groups;
|
||||
mysql_mutex_unlock(&e->LOCK_parallel_entry);
|
||||
did_mark_start_commit= false;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user