MDEV-32363 Shut down Galera networking and logging on fatal signal

When handling fatal signal, shut down Galera networking
before printing out stack trace and writing core file.
This is to achieve fail-silent semantics on crashes which may
keep the process running for a long time, but not fully responding
e.g. due to core dumping or symbol resolving.

Also suppress all Galera/wsrep logging to avoid logging from
background threads to garble crash information from signal handler.

Notice that for fully fail-silent crash, Galera 26.4.19 is needed.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
Teemu Ollakka 2024-05-21 12:40:19 +03:00 committed by Julius Goryavsky
parent b65bbb2fae
commit 54a10a4293
5 changed files with 45 additions and 1 deletions

View File

@ -25,6 +25,10 @@
#include "my_stacktrace.h"
#include <source_revision.h>
#ifdef WITH_WSREP
#include "wsrep_server_state.h"
#endif /* WITH_WSREP */
#ifdef __WIN__
#include <crtdbg.h>
#include <direct.h>
@ -221,6 +225,10 @@ extern "C" sig_handler handle_fatal_signal(int sig)
"the equation.\n\n");
}
#ifdef WITH_WSREP
Wsrep_server_state::handle_fatal_signal();
#endif /* WITH_WSREP */
#ifdef HAVE_STACKTRACE
thd= current_thd;

View File

@ -161,9 +161,16 @@ void Wsrep_server_service::bootstrap()
wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined());
}
static std::atomic<bool> suppress_logging{false};
void wsrep_suppress_error_logging() { suppress_logging= true; }
void Wsrep_server_service::log_message(enum wsrep::log::level level,
const char* message)
const char *message)
{
if (suppress_logging.load(std::memory_order_relaxed))
{
return;
}
switch (level)
{
case wsrep::log::debug:

View File

@ -99,4 +99,8 @@ class Wsrep_storage_service;
Wsrep_storage_service*
wsrep_create_storage_service(THD *orig_thd, const char *ctx);
/**
Suppress all error logging from wsrep/Galera library.
*/
void wsrep_suppress_error_logging();
#endif /* WSREP_SERVER_SERVICE */

View File

@ -18,6 +18,8 @@
#include "wsrep_server_state.h"
#include "wsrep_binlog.h" /* init/deinit group commit */
#include "my_stacktrace.h" /* my_safe_printf_stderr() */
mysql_mutex_t LOCK_wsrep_server_state;
mysql_cond_t COND_wsrep_server_state;
@ -82,3 +84,24 @@ void Wsrep_server_state::destroy()
mysql_cond_destroy(&COND_wsrep_server_state);
}
}
void Wsrep_server_state::handle_fatal_signal()
{
if (m_instance)
{
/* Galera background threads are still running and the logging may be
relatively verbose in case of networking error. Silence all wsrep
logging before shutting down networking to avoid garbling signal
handler output. */
my_safe_printf_stderr("WSREP: Suppressing further logging\n");
wsrep_suppress_error_logging();
/* Shut down all communication with other nodes to fail silently. */
my_safe_printf_stderr("WSREP: Shutting down network communications\n");
if (m_instance->provider().set_node_isolation(
wsrep::provider::node_isolation::isolated)) {
my_safe_printf_stderr("WSREP: Galera library does not support node isolation\n");
}
my_safe_printf_stderr("\n");
}
}

View File

@ -56,6 +56,8 @@ public:
return (get_provider().capabilities() & capability);
}
static void handle_fatal_signal();
private:
Wsrep_server_state(const std::string& name,
const std::string& incoming_address,