MDEV-15607: mysqld crashed few after node is being joined with sst
This is a typical systemd response where it tries to shutdown the joiner (due to "timeout") before the joiner manages to complete SST. wsrep_sst_wait wsrep_SE_init_wait While waiting the operation to finish use mysql_cond_timedwait instead of mysql_cond_wait and if operation is not finished extend systemd timeout (if needed).
This commit is contained in:
parent
c6392d52ee
commit
be5698265a
@ -30,6 +30,10 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#if MYSQL_VERSION_ID < 100200
|
||||
# include <my_service_manager.h>
|
||||
#endif
|
||||
|
||||
static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
|
||||
sizeof(WSREP_SST_OPT_CONF) +
|
||||
sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
|
||||
@ -186,6 +190,9 @@ bool wsrep_before_SE()
|
||||
static bool sst_complete = false;
|
||||
static bool sst_needed = false;
|
||||
|
||||
#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
|
||||
#define WSREP_TIMEDWAIT_SECONDS 10
|
||||
|
||||
void wsrep_sst_grab ()
|
||||
{
|
||||
WSREP_INFO("wsrep_sst_grab()");
|
||||
@ -197,11 +204,25 @@ void wsrep_sst_grab ()
|
||||
// Wait for end of SST
|
||||
bool wsrep_sst_wait ()
|
||||
{
|
||||
if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
|
||||
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
|
||||
uint32 total_wtime = 0;
|
||||
|
||||
if (mysql_mutex_lock (&LOCK_wsrep_sst))
|
||||
abort();
|
||||
|
||||
WSREP_INFO("Waiting for SST to complete.");
|
||||
|
||||
while (!sst_complete)
|
||||
{
|
||||
WSREP_INFO("Waiting for SST to complete.");
|
||||
mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
|
||||
mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);
|
||||
|
||||
if (!sst_complete)
|
||||
{
|
||||
total_wtime += wtime.tv_sec;
|
||||
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
|
||||
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
|
||||
"WSREP state transfer ongoing, current seqno: %ld", local_seqno);
|
||||
}
|
||||
}
|
||||
|
||||
if (local_seqno >= 0)
|
||||
@ -1298,10 +1319,22 @@ void wsrep_SE_init_grab()
|
||||
|
||||
void wsrep_SE_init_wait()
|
||||
{
|
||||
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
|
||||
uint32 total_wtime=0;
|
||||
|
||||
while (SE_initialized == false)
|
||||
{
|
||||
mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
|
||||
mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);
|
||||
|
||||
if (!SE_initialized)
|
||||
{
|
||||
total_wtime += wtime.tv_sec;
|
||||
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
|
||||
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
|
||||
"WSREP SE initialization ongoing.");
|
||||
}
|
||||
}
|
||||
|
||||
mysql_mutex_unlock (&LOCK_wsrep_sst_init);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user