From ca66a2cbfa5f0c47b6f0af196c968fa356cfdabf Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 19 Sep 2023 11:33:51 +1000 Subject: [PATCH] MDEV-18200 MariaBackup full backup failed with InnoDB: Failing assertion: success There are many filesystem related errors that can occur with MariaBackup. These already outputed to stderr with a good description of the error. Many of these are permission or resource (file descriptor) limits where the assertion and resulting core crash doesn't offer developers anything more than the log message. To the user, assertions and core crashes come across as poor error handling. As such we return an error and handle this all the way up the stack. --- extra/mariabackup/xtrabackup.cc | 27 +++++++++++++------ .../suite/mariabackup/data_directory.result | 6 +++++ .../suite/mariabackup/data_directory.test | 15 +++++++++++ plugin/cracklib_password_check/CMakeLists.txt | 6 +++++ storage/innobase/fil/fil0fil.cc | 18 ++++++++----- storage/innobase/include/log0log.h | 5 ++-- storage/innobase/log/log0log.cc | 21 ++++++++------- storage/innobase/log/log0recv.cc | 14 +++++++--- 8 files changed, 82 insertions(+), 30 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 96c90b5afad..64a0f44e183 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4503,14 +4503,21 @@ bool Backup_datasinks::backup_low() if (recv_find_max_checkpoint(&max_cp_field) == DB_SUCCESS && log_sys.log.format != 0) { - if (max_cp_field == LOG_CHECKPOINT_1) { - log_header_read(max_cp_field); + switch (max_cp_field) { + case LOG_CHECKPOINT_1: + if (log_header_read(max_cp_field)) { + /* metadata_to_lsn still 0 so error returns below */ + msg("Error: recv_find_max_checkpoint() failed."); + break; + } + /* fallthrough */ + default: + metadata_to_lsn = mach_read_from_8( + log_sys.checkpoint_buf + LOG_CHECKPOINT_LSN); + msg("mariabackup: The latest check point" + " (for incremental): '" LSN_PF "'", + metadata_to_lsn); } - metadata_to_lsn = mach_read_from_8( - log_sys.checkpoint_buf + LOG_CHECKPOINT_LSN); - msg("mariabackup: The latest check point" - " (for incremental): '" LSN_PF "'", - metadata_to_lsn); } else { msg("Error: recv_find_max_checkpoint() failed."); } @@ -4721,7 +4728,11 @@ reread_log_header: checkpoint_lsn_start = log_sys.log.get_lsn(); checkpoint_no_start = log_sys.next_checkpoint_no; - log_header_read(max_cp_field); + if (log_header_read(max_cp_field)) { + log_mutex_exit(); + goto fail; + } + if (checkpoint_no_start != mach_read_from_8(buf + LOG_CHECKPOINT_NO) || checkpoint_lsn_start diff --git a/mysql-test/suite/mariabackup/data_directory.result b/mysql-test/suite/mariabackup/data_directory.result index 4e45127bd6a..8692c35239e 100644 --- a/mysql-test/suite/mariabackup/data_directory.result +++ b/mysql-test/suite/mariabackup/data_directory.result @@ -11,3 +11,9 @@ SELECT * FROM t; a 1 DROP TABLE t; +# +# MDEV-18200 MariaBackup full backup failed with InnoDB: Failing assertion: success +# +# +# End of 10.4 tests +# diff --git a/mysql-test/suite/mariabackup/data_directory.test b/mysql-test/suite/mariabackup/data_directory.test index a89b7bdccc4..ffb3ab3073c 100644 --- a/mysql-test/suite/mariabackup/data_directory.test +++ b/mysql-test/suite/mariabackup/data_directory.test @@ -21,4 +21,19 @@ rmdir $table_data_dir; SELECT * FROM t; DROP TABLE t; rmdir $targetdir; + +--echo # +--echo # MDEV-18200 MariaBackup full backup failed with InnoDB: Failing assertion: success +--echo # +let $DATADIR= `select @@datadir`; +chmod 0000 $DATADIR/ibdata1; +--disable_result_log +--error 1 +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir; +--enable_result_log +chmod 0755 $DATADIR/ibdata1; rmdir $table_data_dir; +rmdir $targetdir; +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/plugin/cracklib_password_check/CMakeLists.txt b/plugin/cracklib_password_check/CMakeLists.txt index 4a6337310f9..79b3b80fbef 100644 --- a/plugin/cracklib_password_check/CMakeLists.txt +++ b/plugin/cracklib_password_check/CMakeLists.txt @@ -1,3 +1,9 @@ + +IF(PLUGIN_CRACKLIB_PASSWORD_CHECK STREQUAL "NO") + ADD_FEATURE_INFO(CRACKLIB_PASSWORD_CHECK "OFF" "CrackLib Password Validation Plugin") + RETURN() +ENDIF() + INCLUDE (CheckIncludeFiles) INCLUDE (CheckLibraryExists) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index ebeb1f66101..45f8c341ee2 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -557,7 +557,9 @@ fail: &success); } - ut_a(success); + if (!success) + return false; + ut_a(node->is_open()); fil_system.n_open++; @@ -4062,8 +4064,10 @@ inline void IORequest::set_fil_node(fil_node_t* node) @param[in] message message for aio handler if non-sync aio used, else ignored @param[in] ignore_missing_space true=ignore missing space duging read -@return DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace which does not exist */ +@retval DB_SUCCESS, if successful; or +@retval DB_IO_ERROR, if unable to open file or the purpose==FIL_TYPE_IMPORT; or +@retval DB_TABLESPACE_DELETED, if we are trying to do i/o on a tablespace which + does not exist */ dberr_t fil_io( const IORequest& type, @@ -4232,11 +4236,13 @@ fil_io( return(DB_TABLESPACE_DELETED); } - /* The tablespace is for log. Currently, we just assert here + /* The tablespace is for log. We used to asssert here to prevent handling errors along the way fil_io returns. Also, if the log files are missing, it would be hard to - promise the server can continue running. */ - ut_a(0); + promise the server can continue running. However this + is also used by MariaDB-backup, so we need to handle it. */ + mutex_exit(&fil_system.mutex); + return(DB_IO_ERROR); } /* Check that at least the start offset is within the bounds of a diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 477bb0a1d05..7079479754f 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -209,8 +209,9 @@ void logs_empty_and_mark_files_at_shutdown(void); /*=======================================*/ /** Read a log group header page to log_sys.checkpoint_buf. -@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */ -void log_header_read(ulint header); +@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 +@return error code (from fil_io) or DB_SUCCESS */ +dberr_t log_header_read(ulint header); /** Write checkpoint info to the log header and invoke log_mutex_exit(). @param[in] sync whether to wait for the write to complete @param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index bf75b3b7c86..433483b662e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1355,20 +1355,21 @@ log_group_checkpoint(lsn_t end_lsn) } /** Read a log group header page to log_sys.checkpoint_buf. -@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */ -void log_header_read(ulint header) +@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 +@return DB_SUCCESS or error. */ +dberr_t log_header_read(ulint header) { - ut_ad(log_mutex_own()); + ut_ad(log_mutex_own()); - log_sys.n_log_ios++; + log_sys.n_log_ios++; - MONITOR_INC(MONITOR_LOG_IO); + MONITOR_INC(MONITOR_LOG_IO); - fil_io(IORequestLogRead, true, - page_id_t(SRV_LOG_SPACE_FIRST_ID, - header >> srv_page_size_shift), - 0, header & (srv_page_size - 1), - OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL); + return fil_io(IORequestLogRead, true, + page_id_t(SRV_LOG_SPACE_FIRST_ID, + header >> srv_page_size_shift), + 0, header & (srv_page_size - 1), + OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL); } /** Write checkpoint info to the log header and invoke log_mutex_exit(). diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index eb4cb910679..5645e56903d 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1068,6 +1068,7 @@ recv_find_max_checkpoint_0(ulint* max_field) { ib_uint64_t max_no = 0; ib_uint64_t checkpoint_no; + dberr_t err; byte* buf = log_sys.checkpoint_buf; ut_ad(log_sys.log.format == 0); @@ -1085,7 +1086,8 @@ recv_find_max_checkpoint_0(ulint* max_field) for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2; field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) { - log_header_read(field); + if ((err= log_header_read(field))) + return err; if (static_cast(ut_fold_binary(buf, CHECKSUM_1)) != mach_read_from_4(buf + CHECKSUM_1) @@ -1208,13 +1210,15 @@ recv_find_max_checkpoint(ulint* max_field) ib_uint64_t checkpoint_no; ulint field; byte* buf; + dberr_t err; max_no = 0; *max_field = 0; buf = log_sys.checkpoint_buf; - log_header_read(0); + if ((err= log_header_read(0))) + return err; /* Check the header page checksum. There was no checksum in the first redo log format (version 0). */ log_sys.log.format = mach_read_from_4(buf + LOG_HEADER_FORMAT); @@ -1252,7 +1256,8 @@ recv_find_max_checkpoint(ulint* max_field) for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2; field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) { - log_header_read(field); + if ((err= log_header_read(field))) + return err; const ulint crc32 = log_block_calc_checksum_crc32(buf); const ulint cksum = log_block_get_checksum(buf); @@ -3630,7 +3635,8 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) return(err); } - log_header_read(max_cp_field); + if ((err= log_header_read(max_cp_field))) + return err; buf = log_sys.checkpoint_buf;