updating @@wsrep_cluster_address deadlocks
wsrep_cluster_address_update() causes LOCK_wsrep_slave_threads to be locked under LOCK_wsrep_cluster_config, while normally the order should be the opposite. Fix: don't protect @@wsrep_cluster_address value with the LOCK_wsrep_cluster_config, LOCK_global_system_variables is enough. Only protect wsrep reinitialization with the LOCK_wsrep_cluster_config. And make it use a local copy of the global @@wsrep_cluster_address. Also, introduce a helper function that checks whether wsrep_cluster_address is set and also asserts that it can be safely read by the caller.
This commit is contained in:
parent
b91e77cff3
commit
2696538723
@ -5804,9 +5804,12 @@ int mysqld_main(int argc, char **argv)
|
||||
wsrep_init_startup (false);
|
||||
}
|
||||
|
||||
WSREP_DEBUG("Startup creating %ld applier threads running %lu",
|
||||
wsrep_slave_threads - 1, wsrep_running_applier_threads);
|
||||
wsrep_create_appliers(wsrep_slave_threads - 1);
|
||||
if (wsrep_cluster_address_exists())
|
||||
{
|
||||
WSREP_DEBUG("Startup creating %ld applier threads running %lu",
|
||||
wsrep_slave_threads - 1, wsrep_running_applier_threads);
|
||||
wsrep_create_appliers(wsrep_slave_threads - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5445,13 +5445,12 @@ static Sys_var_charptr Sys_wsrep_cluster_name(
|
||||
ON_CHECK(wsrep_cluster_name_check),
|
||||
ON_UPDATE(wsrep_cluster_name_update));
|
||||
|
||||
static PolyLock_mutex PLock_wsrep_cluster_config(&LOCK_wsrep_cluster_config);
|
||||
static Sys_var_charptr Sys_wsrep_cluster_address (
|
||||
"wsrep_cluster_address", "Address to initially connect to cluster",
|
||||
PREALLOCATED GLOBAL_VAR(wsrep_cluster_address),
|
||||
CMD_LINE(REQUIRED_ARG),
|
||||
IN_SYSTEM_CHARSET, DEFAULT(""),
|
||||
&PLock_wsrep_cluster_config, NOT_IN_BINLOG,
|
||||
NO_MUTEX_GUARD, NOT_IN_BINLOG,
|
||||
ON_CHECK(wsrep_cluster_address_check),
|
||||
ON_UPDATE(wsrep_cluster_address_update));
|
||||
|
||||
@ -5482,7 +5481,7 @@ static Sys_var_ulong Sys_wsrep_slave_threads(
|
||||
"wsrep_slave_threads", "Number of slave appliers to launch",
|
||||
GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1),
|
||||
&PLock_wsrep_cluster_config, NOT_IN_BINLOG,
|
||||
NO_MUTEX_GUARD, NOT_IN_BINLOG,
|
||||
ON_CHECK(0),
|
||||
ON_UPDATE(wsrep_slave_threads_update));
|
||||
|
||||
|
@ -63,7 +63,7 @@ int wsrep_check_opts()
|
||||
else
|
||||
{
|
||||
// non-mysqldump SST requires wsrep_cluster_address on startup
|
||||
if (!wsrep_cluster_address || !wsrep_cluster_address[0])
|
||||
if (!wsrep_cluster_address_exists())
|
||||
{
|
||||
WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be "
|
||||
"configured on startup.", wsrep_sst_method);
|
||||
|
@ -878,13 +878,13 @@ void wsrep_init_startup (bool sst_first)
|
||||
if (!strcmp(wsrep_provider, WSREP_NONE)) return;
|
||||
|
||||
/* Skip replication start if no cluster address */
|
||||
if (!wsrep_cluster_address || wsrep_cluster_address[0] == 0) return;
|
||||
if (!wsrep_cluster_address_exists()) return;
|
||||
|
||||
/*
|
||||
Read value of wsrep_new_cluster before wsrep_start_replication(),
|
||||
the value is reset to FALSE inside wsrep_start_replication.
|
||||
*/
|
||||
if (!wsrep_start_replication()) unireg_abort(1);
|
||||
if (!wsrep_start_replication(wsrep_cluster_address)) unireg_abort(1);
|
||||
|
||||
wsrep_create_rollbacker();
|
||||
wsrep_create_appliers(1);
|
||||
@ -1034,7 +1034,7 @@ void wsrep_shutdown_replication()
|
||||
my_pthread_setspecific_ptr(THR_THD, NULL);
|
||||
}
|
||||
|
||||
bool wsrep_start_replication()
|
||||
bool wsrep_start_replication(const char *wsrep_cluster_address)
|
||||
{
|
||||
int rcode;
|
||||
WSREP_DEBUG("wsrep_start_replication");
|
||||
@ -1049,12 +1049,7 @@ bool wsrep_start_replication()
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0)
|
||||
{
|
||||
// if provider is non-trivial, but no address is specified, wait for address
|
||||
WSREP_DEBUG("wsrep_start_replication exit due to empty address");
|
||||
return true;
|
||||
}
|
||||
DBUG_ASSERT(wsrep_cluster_address[0]);
|
||||
|
||||
bool const bootstrap(TRUE == wsrep_new_cluster);
|
||||
wsrep_new_cluster= FALSE;
|
||||
|
@ -203,7 +203,7 @@ extern void wsrep_close_applier_threads(int count);
|
||||
|
||||
/* new defines */
|
||||
extern void wsrep_stop_replication(THD *thd);
|
||||
extern bool wsrep_start_replication();
|
||||
extern bool wsrep_start_replication(const char *wsrep_cluster_address);
|
||||
extern void wsrep_shutdown_replication();
|
||||
extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
|
||||
extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
|
||||
@ -280,6 +280,13 @@ void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...);
|
||||
#define WSREP_PROVIDER_EXISTS \
|
||||
(wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN))
|
||||
|
||||
static inline bool wsrep_cluster_address_exists()
|
||||
{
|
||||
if (mysqld_server_started)
|
||||
mysql_mutex_assert_owner(&LOCK_global_system_variables);
|
||||
return wsrep_cluster_address && wsrep_cluster_address[0];
|
||||
}
|
||||
|
||||
#define WSREP_QUERY(thd) (thd->query())
|
||||
|
||||
extern my_bool wsrep_ready_get();
|
||||
@ -501,6 +508,7 @@ wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table,
|
||||
#define wsrep_thr_deinit() do {} while(0)
|
||||
#define wsrep_init_globals() do {} while(0)
|
||||
#define wsrep_create_appliers(X) do {} while(0)
|
||||
#define wsrep_cluster_address_exists() (false)
|
||||
|
||||
#endif /* WITH_WSREP */
|
||||
|
||||
|
@ -125,11 +125,7 @@ bool wsrep_create_appliers(long threads, bool mutex_protected)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0)
|
||||
{
|
||||
WSREP_DEBUG("wsrep_create_appliers exit due to empty address");
|
||||
return false;
|
||||
}
|
||||
DBUG_ASSERT(wsrep_cluster_address[0]);
|
||||
|
||||
long wsrep_threads=0;
|
||||
|
||||
@ -284,16 +280,14 @@ static void wsrep_rollback_process(THD *rollbacker,
|
||||
|
||||
void wsrep_create_rollbacker()
|
||||
{
|
||||
if (wsrep_cluster_address && wsrep_cluster_address[0] != 0)
|
||||
{
|
||||
Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process,
|
||||
WSREP_ROLLBACKER_THREAD,
|
||||
pthread_self()));
|
||||
DBUG_ASSERT(wsrep_cluster_address[0]);
|
||||
Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process,
|
||||
WSREP_ROLLBACKER_THREAD,
|
||||
pthread_self()));
|
||||
|
||||
/* create rollbacker */
|
||||
if (create_wsrep_THD(args, false))
|
||||
WSREP_WARN("Can't create thread to manage wsrep rollback");
|
||||
}
|
||||
/* create rollbacker */
|
||||
if (create_wsrep_THD(args, false))
|
||||
WSREP_WARN("Can't create thread to manage wsrep rollback");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -554,30 +554,31 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
|
||||
/* stop replication is heavy operation, and includes closing all client
|
||||
connections. Closing clients may need to get LOCK_global_system_variables
|
||||
at least in MariaDB.
|
||||
|
||||
Note: releasing LOCK_global_system_variables may cause race condition, if
|
||||
there can be several concurrent clients changing wsrep_provider
|
||||
*/
|
||||
char *tmp= my_strdup(wsrep_cluster_address, MYF(MY_WME));
|
||||
WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address);
|
||||
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||
|
||||
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
|
||||
wsrep_stop_replication(thd);
|
||||
|
||||
if (wsrep_start_replication())
|
||||
if (*tmp && wsrep_start_replication(tmp))
|
||||
{
|
||||
wsrep_create_rollbacker();
|
||||
WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu",
|
||||
wsrep_slave_threads, wsrep_running_applier_threads);
|
||||
wsrep_create_appliers(wsrep_slave_threads);
|
||||
}
|
||||
/* locking order to be enforced is:
|
||||
1. LOCK_global_system_variables
|
||||
2. LOCK_wsrep_cluster_config
|
||||
=> have to juggle mutexes to comply with this
|
||||
*/
|
||||
|
||||
mysql_mutex_unlock(&LOCK_wsrep_cluster_config);
|
||||
|
||||
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
|
||||
if (strcmp(tmp, wsrep_cluster_address))
|
||||
{
|
||||
my_free((void*)wsrep_cluster_address);
|
||||
wsrep_cluster_address= tmp;
|
||||
}
|
||||
else
|
||||
my_free(tmp);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -674,11 +675,12 @@ static void wsrep_slave_count_change_update ()
|
||||
|
||||
bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
|
||||
{
|
||||
mysql_mutex_unlock(&LOCK_wsrep_cluster_config);
|
||||
if (!wsrep_cluster_address_exists())
|
||||
return false;
|
||||
|
||||
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||
mysql_mutex_lock(&LOCK_wsrep_slave_threads);
|
||||
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
|
||||
bool res= false;
|
||||
|
||||
wsrep_slave_count_change_update();
|
||||
|
Loading…
x
Reference in New Issue
Block a user