From 54a10a429334a9579558a5d284c510d6f8b5bc97 Mon Sep 17 00:00:00 2001 From: Teemu Ollakka Date: Tue, 21 May 2024 12:40:19 +0300 Subject: [PATCH] MDEV-32363 Shut down Galera networking and logging on fatal signal When handling fatal signal, shut down Galera networking before printing out stack trace and writing core file. This is to achieve fail-silent semantics on crashes which may keep the process running for a long time, but not fully responding e.g. due to core dumping or symbol resolving. Also suppress all Galera/wsrep logging to avoid logging from background threads to garble crash information from signal handler. Notice that for fully fail-silent crash, Galera 26.4.19 is needed. Signed-off-by: Julius Goryavsky --- sql/signal_handler.cc | 8 ++++++++ sql/wsrep_server_service.cc | 9 ++++++++- sql/wsrep_server_service.h | 4 ++++ sql/wsrep_server_state.cc | 23 +++++++++++++++++++++++ sql/wsrep_server_state.h | 2 ++ 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index 3d497e39289..002a4c244d1 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -25,6 +25,10 @@ #include "my_stacktrace.h" #include +#ifdef WITH_WSREP +#include "wsrep_server_state.h" +#endif /* WITH_WSREP */ + #ifdef __WIN__ #include #include @@ -221,6 +225,10 @@ extern "C" sig_handler handle_fatal_signal(int sig) "the equation.\n\n"); } +#ifdef WITH_WSREP + Wsrep_server_state::handle_fatal_signal(); +#endif /* WITH_WSREP */ + #ifdef HAVE_STACKTRACE thd= current_thd; diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index af2c3efd214..6184ba2df59 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -161,9 +161,16 @@ void Wsrep_server_service::bootstrap() wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined()); } +static std::atomic suppress_logging{false}; +void wsrep_suppress_error_logging() { suppress_logging= true; } + void Wsrep_server_service::log_message(enum wsrep::log::level level, - const char* message) + const char *message) { + if (suppress_logging.load(std::memory_order_relaxed)) + { + return; + } switch (level) { case wsrep::log::debug: diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h index 3a7da229cd4..9a1e148b55f 100644 --- a/sql/wsrep_server_service.h +++ b/sql/wsrep_server_service.h @@ -99,4 +99,8 @@ class Wsrep_storage_service; Wsrep_storage_service* wsrep_create_storage_service(THD *orig_thd, const char *ctx); +/** + Suppress all error logging from wsrep/Galera library. + */ +void wsrep_suppress_error_logging(); #endif /* WSREP_SERVER_SERVICE */ diff --git a/sql/wsrep_server_state.cc b/sql/wsrep_server_state.cc index 6bc4eaf4d86..a936d9dd79d 100644 --- a/sql/wsrep_server_state.cc +++ b/sql/wsrep_server_state.cc @@ -18,6 +18,8 @@ #include "wsrep_server_state.h" #include "wsrep_binlog.h" /* init/deinit group commit */ +#include "my_stacktrace.h" /* my_safe_printf_stderr() */ + mysql_mutex_t LOCK_wsrep_server_state; mysql_cond_t COND_wsrep_server_state; @@ -82,3 +84,24 @@ void Wsrep_server_state::destroy() mysql_cond_destroy(&COND_wsrep_server_state); } } + +void Wsrep_server_state::handle_fatal_signal() +{ + if (m_instance) + { + /* Galera background threads are still running and the logging may be + relatively verbose in case of networking error. Silence all wsrep + logging before shutting down networking to avoid garbling signal + handler output. */ + my_safe_printf_stderr("WSREP: Suppressing further logging\n"); + wsrep_suppress_error_logging(); + + /* Shut down all communication with other nodes to fail silently. */ + my_safe_printf_stderr("WSREP: Shutting down network communications\n"); + if (m_instance->provider().set_node_isolation( + wsrep::provider::node_isolation::isolated)) { + my_safe_printf_stderr("WSREP: Galera library does not support node isolation\n"); + } + my_safe_printf_stderr("\n"); + } +} diff --git a/sql/wsrep_server_state.h b/sql/wsrep_server_state.h index 1ef937300f6..43a93fd5aef 100644 --- a/sql/wsrep_server_state.h +++ b/sql/wsrep_server_state.h @@ -56,6 +56,8 @@ public: return (get_provider().capabilities() & capability); } + static void handle_fatal_signal(); + private: Wsrep_server_state(const std::string& name, const std::string& incoming_address,