diff --git a/extra/mariabackup/wsrep.cc b/extra/mariabackup/wsrep.cc index 6e02bf5ceab..fb66611b3ee 100644 --- a/extra/mariabackup/wsrep.cc +++ b/extra/mariabackup/wsrep.cc @@ -43,7 +43,7 @@ permission notice: #include #include #include -#include +#include #include "common.h" #ifdef WITH_WSREP @@ -181,7 +181,7 @@ xb_write_galera_info(bool incremental_prepare) memset(&xid, 0, sizeof(xid)); xid.formatID = -1; - if (!trx_sys_read_wsrep_checkpoint(&xid)) { + if (!trx_rseg_read_wsrep_checkpoint(xid)) { return; } diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 8d9e48854a8..823be4da0bb 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4843,37 +4843,23 @@ xtrabackup_prepare_func(char** argv) } if (ok) { - mtr_t mtr; - mtr.start(); - const buf_block_t* sys_header = trx_sysf_get(&mtr, false); + msg("Last binlog file %s, position %lld\n", + trx_sys.recovered_binlog_filename, + longlong(trx_sys.recovered_binlog_offset)); - if (mach_read_from_4(TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD - + TRX_SYS + sys_header->frame) - == TRX_SYS_MYSQL_LOG_MAGIC_N) { - ulonglong pos = mach_read_from_8( - TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET - + TRX_SYS + sys_header->frame); - const char* name = reinterpret_cast( - TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME - + TRX_SYS + sys_header->frame); - msg("Last binlog file %s, position %llu\n", name, pos); - - /* output to xtrabackup_binlog_pos_innodb and - (if backup_safe_binlog_info was available on - the server) to xtrabackup_binlog_info. In the - latter case xtrabackup_binlog_pos_innodb - becomes redundant and is created only for - compatibility. */ - ok = store_binlog_info( - "xtrabackup_binlog_pos_innodb", name, pos) - && (!recover_binlog_info || store_binlog_info( - XTRABACKUP_BINLOG_INFO, - name, pos)); - } - - mtr.commit(); + /* output to xtrabackup_binlog_pos_innodb and + (if backup_safe_binlog_info was available on + the server) to xtrabackup_binlog_info. In the + latter case xtrabackup_binlog_pos_innodb + becomes redundant and is created only for + compatibility. */ + ok = store_binlog_info("xtrabackup_binlog_pos_innodb", + trx_sys.recovered_binlog_filename, + trx_sys.recovered_binlog_offset) + && (!recover_binlog_info + || store_binlog_info(XTRABACKUP_BINLOG_INFO, + trx_sys.recovered_binlog_filename, + trx_sys.recovered_binlog_offset)); } /* Check whether the log is applied enough or not. */ diff --git a/mysql-test/suite/mariabackup/binlog.result b/mysql-test/suite/mariabackup/binlog.result new file mode 100644 index 00000000000..4f8ed4185ce --- /dev/null +++ b/mysql-test/suite/mariabackup/binlog.result @@ -0,0 +1,8 @@ +CREATE TABLE t(a varchar(60)) ENGINE INNODB; +INSERT INTO t VALUES(1); +SHOW VARIABLES like 'log_bin'; +Variable_name Value +log_bin ON +FOUND 1 /Last binlog file .*, position .*/ in current_test +# expect FOUND +DROP TABLE t; diff --git a/mysql-test/suite/mariabackup/binlog.test b/mysql-test/suite/mariabackup/binlog.test new file mode 100644 index 00000000000..998397bb495 --- /dev/null +++ b/mysql-test/suite/mariabackup/binlog.test @@ -0,0 +1,25 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc + +let $basedir=$MYSQLTEST_VARDIR/tmp/backup; + +CREATE TABLE t(a varchar(60)) ENGINE INNODB; +INSERT INTO t VALUES(1); + +SHOW VARIABLES like 'log_bin'; + +--disable_result_log +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir; +--enable_result_log + +exec $XTRABACKUP --prepare --binlog-info=1 --apply-log-only --target-dir=$basedir ; + +let SEARCH_FILE=$MYSQLTEST_VARDIR/log/current_test; +--let SEARCH_PATTERN= Last binlog file .*, position .* +--source include/search_pattern_in_file.inc +--echo # expect FOUND + +DROP TABLE t; + +# Cleanup +rmdir $basedir; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9ab36a97fba..699e2899f57 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -111,10 +111,9 @@ this program; if not, write to the Free Software Foundation, Inc., #include "trx0purge.h" #endif /* UNIV_DEBUG */ #include "trx0roll.h" -#include "trx0sys.h" +#include "trx0rseg.h" #include "trx0trx.h" #include "fil0pagecompress.h" -#include "trx0xa.h" #include "ut0mem.h" #include "row0ext.h" @@ -19679,12 +19678,8 @@ innobase_wsrep_set_checkpoint( DBUG_ASSERT(hton == innodb_hton_ptr); if (wsrep_is_wsrep_xid(xid)) { - mtr_t mtr; - mtr_start(&mtr); - if (buf_block_t* sys_header = trx_sysf_get(&mtr)) { - trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr); - } - mtr_commit(&mtr); + + trx_rseg_update_wsrep_checkpoint(xid); innobase_flush_logs(hton, false); return 0; } else { @@ -19700,7 +19695,7 @@ innobase_wsrep_get_checkpoint( XID* xid) { DBUG_ASSERT(hton == innodb_hton_ptr); - trx_sys_read_wsrep_checkpoint(xid); + trx_rseg_read_wsrep_checkpoint(*xid); return 0; } diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 5642dd0d4a8..8aa91c13add 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri #ifndef trx0rseg_h #define trx0rseg_h -#include "trx0types.h" #include "trx0sys.h" #include "fut0lst.h" -#include /** Gets a rollback segment header. @param[in] space space where placed @@ -226,6 +224,30 @@ struct trx_rseg_t { /** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */ #define TRX_RSEG_MAX_TRX_ID (TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \ * TRX_RSEG_SLOT_SIZE) + +/** 8 bytes offset within the binlog file */ +#define TRX_RSEG_BINLOG_OFFSET TRX_RSEG_MAX_TRX_ID + 8 +/** MySQL log file name, 512 bytes, including terminating NUL +(valid only if TRX_RSEG_FORMAT is 0). +If no binlog information is present, the first byte is NUL. */ +#define TRX_RSEG_BINLOG_NAME TRX_RSEG_MAX_TRX_ID + 16 +/** Maximum length of binlog file name, including terminating NUL, in bytes */ +#define TRX_RSEG_BINLOG_NAME_LEN 512 + +#ifdef WITH_WSREP +/** The offset to WSREP XID headers */ +#define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512 + +/** WSREP XID format (1 if present and valid, 0 if not present) */ +#define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO +/** WSREP XID GTRID length */ +#define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4 +/** WSREP XID bqual length */ +#define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8 +/** WSREP XID data (XIDDATASIZE bytes) */ +#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12 +#endif /* WITH_WSREP*/ + /*-------------------------------------------------------------*/ /** Read the page number of an undo log slot. @@ -240,6 +262,48 @@ trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n) + n * TRX_RSEG_SLOT_SIZE); } +#ifdef WITH_WSREP +/** Update the WSREP XID information in rollback segment header. +@param[in,out] rseg_header rollback segment header +@param[in] xid WSREP XID +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_wsrep_checkpoint( + trx_rsegf_t* rseg_header, + const XID* xid, + mtr_t* mtr); + +/** Update WSREP checkpoint XID in first rollback segment header. +@param[in] xid WSREP XID */ +void trx_rseg_update_wsrep_checkpoint(const XID* xid); + +/** Read the WSREP XID information in rollback segment header. +@param[in] rseg_header Rollback segment header +@param[out] xid Transaction XID +@return whether the WSREP XID was present */ +bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid); + +/** Recover the latest WSREP checkpoint XID. +@param[out] xid WSREP XID +@return whether the WSREP XID was found */ +bool trx_rseg_read_wsrep_checkpoint(XID& xid); +#endif /* WITH_WSREP */ + +/** Upgrade a rollback segment header page to MariaDB 10.3 format. +@param[in,out] rseg_header rollback segment header page +@param[in,out] mtr mini-transaction */ +void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr); + +/** Update the offset information about the end of the binlog entry +which corresponds to the transaction just being committed. +In a replication slave, this updates the master binlog position +up to which replication has proceeded. +@param[in,out] rseg_header rollback segment header +@param[in] trx committing transaction +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr); + #include "trx0rseg.ic" #endif diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index fae9d25d601..1469d8b1dc7 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -141,26 +141,6 @@ trx_sys_update_mysql_binlog_offset( system header. */ void trx_sys_print_mysql_binlog_offset(); -#ifdef WITH_WSREP - -/** Update WSREP XID info in the TRX_SYS page. -@param[in] xid Transaction XID -@param[in,out] sys_header TRX_SYS page -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, - buf_block_t* sys_header, - mtr_t* mtr); - -/** Read WSREP checkpoint XID from sys header. -@param[out] xid WSREP XID -@return whether the checkpoint was present */ -UNIV_INTERN -bool -trx_sys_read_wsrep_checkpoint(XID* xid); -#endif /* WITH_WSREP */ /** Create the rollback segments. @return whether the creation succeeded */ @@ -235,7 +215,8 @@ trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id) + sys_header->frame); } -/** Maximum length of MySQL binlog file name, in bytes. */ +/** Maximum length of MySQL binlog file name, in bytes. +(Used before MariaDB 10.3.5.) */ #define TRX_SYS_MYSQL_LOG_NAME_LEN 512 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 @@ -312,7 +293,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera */ #ifdef WITH_WSREP -/** The offset to WSREP XID headers */ +/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */ #define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL) #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265 @@ -856,7 +837,6 @@ public: by any mutex, because it is read-only during multi-threaded operation */ - /** Lock-free hash of in memory read-write transactions. Works faster when it is on it's own cache line (tested). @@ -865,6 +845,16 @@ public: MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash; +#ifdef WITH_WSREP + /** Latest recovered XID during startup */ + XID recovered_wsrep_xid; +#endif + /** Latest recovered binlog offset */ + int64_t recovered_binlog_offset; + /** Latest recovred binlog file name */ + char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN]; + + /** Constructor. diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 1fb306cec88..bef3fef9cde 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -2230,6 +2230,8 @@ files_checked: if (err != DB_SUCCESS) { return(srv_init_abort(err)); } + /* fall through */ + case SRV_OPERATION_RESTORE: /* This must precede recv_apply_hashed_log_recs(true). */ trx_lists_init_at_db_start(); @@ -2237,12 +2239,6 @@ files_checked: case SRV_OPERATION_RESTORE_DELTA: case SRV_OPERATION_BACKUP: ut_ad(!"wrong mariabackup mode"); - /* fall through */ - case SRV_OPERATION_RESTORE: - /* mariabackup --prepare only deals with - the redo log and the data files, not with - transactions or the data dictionary. */ - break; } if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 34f72929d03..feb283429a5 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -44,6 +44,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0roll.h" #include "trx0rseg.h" #include "trx0trx.h" +#include /** Maximum allowable purge history length. <=0 means 'infinite'. */ ulong srv_max_purge_lag = 0; @@ -239,6 +240,8 @@ Remove the undo log segment from the rseg slot if it is too big for reuse. void trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) { + DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")", + trx->id, trx->no)); ut_ad(undo == trx->rsegs.m_redo.undo || undo == trx->rsegs.m_redo.old_insert); trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; @@ -251,6 +254,12 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1); + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG_FORMAT + rseg_header))) { + /* This database must have been upgraded from + before MariaDB 10.3.5. */ + trx_rseg_format_upgrade(rseg_header, mtr); + } + if (undo->state != TRX_UNDO_CACHED) { ulint hist_size; #ifdef UNIV_DEBUG @@ -258,11 +267,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) #endif /* UNIV_DEBUG */ /* The undo log segment will not be reused */ - - if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { - ib::fatal() << "undo->id is " << undo->id; - } - + ut_a(undo->id < TRX_RSEG_N_SLOTS); trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); @@ -272,30 +277,17 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ut_ad(undo->size == flst_get_len( seg_header + TRX_UNDO_PAGE_LIST)); - byte* rseg_format = rseg_header + TRX_RSEG_FORMAT; - if (UNIV_UNLIKELY(mach_read_from_4(rseg_format))) { - /* This database must have been upgraded from - before MariaDB 10.3.5. */ - mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); - /* Clear also possible garbage at the end of - the page. Old InnoDB versions did not initialize - unused parts of pages. */ - ut_ad(page_offset(rseg_header) == TRX_RSEG); - byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; - ulint len = UNIV_PAGE_SIZE - - (FIL_PAGE_DATA_END - + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); - memset(b, 0, len); - mlog_log_string(b, len, mtr); - } mlog_write_ulint( rseg_header + TRX_RSEG_HISTORY_SIZE, hist_size + undo->size, MLOG_4BYTES, mtr); - mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, - trx_sys.get_max_trx_id(), mtr); } + /* This field now also serves as an identifier for the latest + binlog and WSREP XID information. */ + mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, + trx_sys.get_max_trx_id(), mtr); + /* Before any transaction-generating background threads or the purge have been started, recv_recovery_rollback_active() can start transactions in row_merge_drop_temp_indexes() and @@ -320,6 +312,19 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) || trx->internal) && srv_fast_shutdown)); +#ifdef WITH_WSREP + if (wsrep_is_wsrep_xid(trx->xid)) { + trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr); + } +#endif + + if (trx->mysql_log_file_name && *trx->mysql_log_file_name) { + /* Update the latest MySQL binlog name and offset info + in rollback segment header if MySQL binlogging is on + or the database server is a MySQL replication save. */ + trx_rseg_update_binlog_offset(rseg_header, trx, mtr); + } + /* Add the log as the first in the history list */ flst_add_first(rseg_header + TRX_RSEG_HISTORY, undo_header + TRX_UNDO_HISTORY_NODE, mtr); diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index 4b09a4cc492..45d260a4480 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -34,6 +34,214 @@ Created 3/26/1996 Heikki Tuuri #include +#ifdef WITH_WSREP + +#ifdef UNIV_DEBUG +static long long trx_sys_cur_xid_seqno = -1; +static unsigned char trx_sys_cur_xid_uuid[16]; + +/** Read WSREP XID seqno */ +static inline long long read_wsrep_xid_seqno(const XID* xid) +{ + long long seqno; + memcpy(&seqno, xid->data + 24, sizeof(long long)); + return seqno; +} + +/** Read WSREP XID UUID */ +static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) +{ + memcpy(buf, xid->data + 8, 16); +} + +#endif /* UNIV_DEBUG */ + +/** Update the WSREP XID information in rollback segment header. +@param[in,out] rseg_header rollback segment header +@param[in] xid WSREP XID +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_wsrep_checkpoint( + trx_rsegf_t* rseg_header, + const XID* xid, + mtr_t* mtr) +{ + ut_ad(xid->formatID == 1); + +#ifdef UNIV_DEBUG + /* Check that seqno is monotonically increasing */ + unsigned char xid_uuid[16]; + long long xid_seqno = read_wsrep_xid_seqno(xid); + read_wsrep_xid_uuid(xid, xid_uuid); + + if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) { + ut_ad(xid_seqno > trx_sys_cur_xid_seqno); + trx_sys_cur_xid_seqno = xid_seqno; + } else { + memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16); + } + trx_sys_cur_xid_seqno = xid_seqno; +#endif /* UNIV_DEBUG */ + + mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header, + uint32_t(xid->formatID), + MLOG_4BYTES, mtr); + + mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header, + uint32_t(xid->gtrid_length), + MLOG_4BYTES, mtr); + + mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header, + uint32_t(xid->bqual_length), + MLOG_4BYTES, mtr); + + mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header, + reinterpret_cast(xid->data), + XIDDATASIZE, mtr); +} + +/** Update WSREP checkpoint XID in first rollback segment header. +@param[in] xid WSREP XID */ +void trx_rseg_update_wsrep_checkpoint(const XID* xid) +{ + mtr_t mtr; + mtr.start(); + + const trx_rseg_t* rseg = trx_sys.rseg_array[0]; + + trx_rsegf_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, + &mtr); + if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) { + trx_rseg_format_upgrade(rseg_header, &mtr); + } + + mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, + trx_sys.get_max_trx_id(), &mtr); + trx_rseg_update_wsrep_checkpoint(rseg_header, xid, &mtr); + mtr.commit(); +} + +/** Read the WSREP XID information in rollback segment header. +@param[in] rseg_header Rollback segment header +@param[out] xid Transaction XID +@return whether the WSREP XID was present */ +bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid) +{ + xid.formatID = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_FORMAT + rseg_header); + + if (xid.formatID == 0) { + memset(&xid, 0, sizeof(xid)); + long long seqno= -1; + memcpy(xid.data + 24, &seqno, sizeof(long long)); + xid.formatID = -1; + return false; + } + + xid.gtrid_length = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header); + + xid.bqual_length = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header); + + memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE); + + return true; +} + +/** Read the WSREP XID from the TRX_SYS page (in case of upgrade). +@param[in] page TRX_SYS page +@param[out] xid WSREP XID (if present) +@return whether the WSREP XID is present */ +static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid) +{ + if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD + + page) + != TRX_SYS_WSREP_XID_MAGIC_N) { + return false; + } + + xid.formatID = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_FORMAT + page); + xid.gtrid_length = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_GTRID_LEN + page); + xid.bqual_length = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_BQUAL_LEN + page); + memcpy(xid.data, + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE); + return true; +} + +/** Recover the latest WSREP checkpoint XID. +@param[out] xid WSREP XID +@return whether the WSREP XID was found */ +bool trx_rseg_read_wsrep_checkpoint(XID& xid) +{ + mtr_t mtr; + trx_id_t max_id = 0; + bool found = false; + + for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; + rseg_id++, mtr.commit()) { + mtr.start(); + const buf_block_t* sys = trx_sysf_get(&mtr, false); + if (rseg_id == 0) { + found = trx_rseg_init_wsrep_xid(sys->frame, xid); + } + + const uint32_t page_no = trx_sysf_rseg_get_page_no( + sys, rseg_id); + + if (page_no == FIL_NULL) { + continue; + } + + const trx_rsegf_t* rseg_header = trx_rsegf_get_new( + trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr); + + if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) { + continue; + } + + trx_id_t id = mach_read_from_8(rseg_header + + TRX_RSEG_MAX_TRX_ID); + + if (id < max_id) { + continue; + } + + max_id = id; + found = trx_rseg_read_wsrep_checkpoint(rseg_header, xid) + || found; + } + + return found; +} +#endif /* WITH_WSREP */ + +/** Upgrade a rollback segment header page to MariaDB 10.3 format. +@param[in,out] rseg_header rollback segment header page +@param[in,out] mtr mini-transaction */ +void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr) +{ + ut_ad(page_offset(rseg_header) == TRX_RSEG); + byte* rseg_format = TRX_RSEG_FORMAT + rseg_header; + mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); + /* Clear also possible garbage at the end of the page. Old + InnoDB versions did not initialize unused parts of pages. */ + byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; + ulint len = UNIV_PAGE_SIZE + - (FIL_PAGE_DATA_END + + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); + memset(b, 0, len); + mlog_log_string(b, len, mtr); +} + /** Creates a rollback segment header. This function is called only when a new rollback segment is created in the database. @@ -192,19 +400,51 @@ trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id, /** Restore the state of a persistent rollback segment. @param[in,out] rseg persistent rollback segment @param[in,out] max_trx_id maximum observed transaction identifier +@param[in,out] max_rseg_trx_id maximum observed TRX_RSEG_MAX_TRX_ID @param[in,out] mtr mini-transaction */ static void -trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr) +trx_rseg_mem_restore( + trx_rseg_t* rseg, + trx_id_t& max_trx_id, + trx_id_t& max_rseg_trx_id, + mtr_t* mtr) { - const trx_rsegf_t* rseg_header = trx_rsegf_get_new( + trx_rsegf_t* rseg_header = trx_rsegf_get_new( rseg->space, rseg->page_no, mtr); + if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) { trx_id_t id = mach_read_from_8(rseg_header + TRX_RSEG_MAX_TRX_ID); + if (id > max_trx_id) { max_trx_id = id; } + + if (id > max_rseg_trx_id) { + max_rseg_trx_id = id; + + if (rseg_header[TRX_RSEG_BINLOG_NAME]) { + memcpy(trx_sys.recovered_binlog_filename, + rseg_header + TRX_RSEG_BINLOG_NAME, + TRX_RSEG_BINLOG_NAME_LEN); + trx_sys.recovered_binlog_offset = mach_read_from_8( + rseg_header + + TRX_RSEG_BINLOG_OFFSET); + } + +#ifdef WITH_WSREP + trx_rseg_read_wsrep_checkpoint( + rseg_header, trx_sys.recovered_wsrep_xid); +#endif + } + } + + if (srv_operation == SRV_OPERATION_RESTORE) { + /* mariabackup --prepare only deals with + the redo log and the data files, not with + transactions or the data dictionary. */ + return; } /* Initialize the undo log lists according to the rseg header */ @@ -252,21 +492,55 @@ trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr) } } +/** Read binlog metadata from the TRX_SYS page, in case we are upgrading +from MySQL or a MariaDB version older than 10.3.5. */ +static void trx_rseg_init_binlog_info(const page_t* page) +{ + if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO + + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD + + page) + == TRX_SYS_MYSQL_LOG_MAGIC_N) { + memcpy(trx_sys.recovered_binlog_filename, + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME + + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN); + trx_sys.recovered_binlog_offset = mach_read_from_8( + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET + + TRX_SYS + page); + } + +#ifdef WITH_WSREP + trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid); +#endif +} + /** Initialize the rollback segments in memory at database startup. */ void trx_rseg_array_init() { - trx_id_t max_trx_id = 0; + trx_id_t max_trx_id = 0, max_rseg_trx_id = 0; + + *trx_sys.recovered_binlog_filename = '\0'; + trx_sys.recovered_binlog_offset = -1; +#ifdef WITH_WSREP + memset(&trx_sys.recovered_wsrep_xid, 0, + sizeof trx_sys.recovered_wsrep_xid); + trx_sys.recovered_wsrep_xid.formatID = -1; +#endif for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) { mtr_t mtr; mtr.start(); if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) { if (rseg_id == 0) { + /* In case this is an upgrade from + before MariaDB 10.3.5, fetch the base + information from the TRX_SYS page. */ max_trx_id = mach_read_from_8( TRX_SYS + TRX_SYS_TRX_ID_STORE + sys->frame); + trx_rseg_init_binlog_info(sys->frame); } + const uint32_t page_no = trx_sysf_rseg_get_page_no( sys, rseg_id); if (page_no != FIL_NULL) { @@ -278,7 +552,9 @@ trx_rseg_array_init() ut_ad(rseg->id == rseg_id); ut_ad(!trx_sys.rseg_array[rseg_id]); trx_sys.rseg_array[rseg_id] = rseg; - trx_rseg_mem_restore(rseg, max_trx_id, &mtr); + trx_rseg_mem_restore( + rseg, max_trx_id, max_rseg_trx_id, + &mtr); } } @@ -404,3 +680,34 @@ trx_rseg_get_n_undo_tablespaces( return ulint(end - space_ids); } + +/** Update the offset information about the end of the binlog entry +which corresponds to the transaction just being committed. +In a replication slave, this updates the master binlog position +up to which replication has proceeded. +@param[in,out] rseg_header rollback segment header +@param[in] trx committing transaction +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr) +{ + DBUG_LOG("trx", "trx_mysql_binlog_offset: " << trx->mysql_log_offset); + + const size_t len = strlen(trx->mysql_log_file_name) + 1; + + ut_ad(len > 1); + + if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) { + return; + } + + mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET, + trx->mysql_log_offset, mtr); + byte* p = rseg_header + TRX_RSEG_BINLOG_NAME; + const byte* binlog_name = reinterpret_cast + (trx->mysql_log_file_name); + + if (memcmp(binlog_name, p, len)) { + mlog_write_string(p, binlog_name, len, mtr); + } +} diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 1b60e2bd121..13ebe27f539 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -88,207 +88,21 @@ ReadView::check_trx_id_sanity( uint trx_rseg_n_slots_debug = 0; #endif - -/*****************************************************************//** -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/*!< in: MySQL log file name */ - int64_t offset, /*!< in: position in that log file */ - buf_block_t* sys_header, /*!< in,out: trx sys header */ - mtr_t* mtr) /*!< in,out: mini-transaction */ -{ - DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset)); - - const size_t len = strlen(file_name) + 1; - - if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) { - - /* We cannot fit the name to the 512 bytes we have reserved */ - - return; - } - - byte* p = TRX_SYS + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD - + TRX_SYS_MYSQL_LOG_INFO + sys_header->frame; - - if (mach_read_from_4(p) != TRX_SYS_MYSQL_LOG_MAGIC_N) { - mlog_write_ulint(p, - TRX_SYS_MYSQL_LOG_MAGIC_N, - MLOG_4BYTES, mtr); - } - - p = TRX_SYS + TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO - + sys_header->frame; - - if (memcmp(file_name, p, len)) { - mlog_write_string(p, - reinterpret_cast(file_name), - len, mtr); - } - - mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET - + TRX_SYS + sys_header->frame, offset, mtr); -} - /** Display the MySQL binlog offset info if it is present in the trx system header. */ void trx_sys_print_mysql_binlog_offset() { - mtr_t mtr; - - mtr.start(); - - const buf_block_t* block = trx_sysf_get(&mtr, false); - - if (block - && mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD - + block->frame) - == TRX_SYS_MYSQL_LOG_MAGIC_N) { - ib::info() << "Last binlog file '" - << TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME - + TRX_SYS + block->frame - << "', position " - << mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET - + TRX_SYS + block->frame); + if (!*trx_sys.recovered_binlog_filename) { + return; } - mtr.commit(); + ib::info() << "Last binlog file '" + << trx_sys.recovered_binlog_filename + << "', position " + << trx_sys.recovered_binlog_offset; } -#ifdef WITH_WSREP - -#ifdef UNIV_DEBUG -static long long trx_sys_cur_xid_seqno = -1; -static unsigned char trx_sys_cur_xid_uuid[16]; - -/** Read WSREP XID seqno */ -static inline long long read_wsrep_xid_seqno(const XID* xid) -{ - long long seqno; - memcpy(&seqno, xid->data + 24, sizeof(long long)); - return seqno; -} - -/** Read WSREP XID UUID */ -static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) -{ - memcpy(buf, xid->data + 8, 16); -} - -#endif /* UNIV_DEBUG */ - -/** Update WSREP XID info in the TRX_SYS page. -@param[in] xid Transaction XID -@param[in,out] sys_header TRX_SYS page -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, - buf_block_t* sys_header, - mtr_t* mtr) -{ - ut_ad(xid->formatID == 1); - ut_ad(wsrep_is_wsrep_xid(xid)); - - byte* magic = TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD - + sys_header->frame; - - if (mach_read_from_4(magic) != TRX_SYS_WSREP_XID_MAGIC_N) { - mlog_write_ulint(magic, TRX_SYS_WSREP_XID_MAGIC_N, - MLOG_4BYTES, mtr); -#ifdef UNIV_DEBUG - } else { - /* Check that seqno is monotonically increasing */ - unsigned char xid_uuid[16]; - long long xid_seqno = read_wsrep_xid_seqno(xid); - read_wsrep_xid_uuid(xid, xid_uuid); - - if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) { - ut_ad(xid_seqno > trx_sys_cur_xid_seqno); - trx_sys_cur_xid_seqno = xid_seqno; - } else { - memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16); - } - - trx_sys_cur_xid_seqno = xid_seqno; -#endif /* UNIV_DEBUG */ - } - - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_FORMAT + sys_header->frame, - uint32_t(xid->formatID), - MLOG_4BYTES, mtr); - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_GTRID_LEN + sys_header->frame, - uint32_t(xid->gtrid_length), - MLOG_4BYTES, mtr); - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_BQUAL_LEN + sys_header->frame, - uint32_t(xid->bqual_length), - MLOG_4BYTES, mtr); - mlog_write_string(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_DATA + sys_header->frame, - reinterpret_cast(xid->data), - XIDDATASIZE, mtr); -} - -/** Read WSREP checkpoint XID from sys header. -@param[out] xid WSREP XID -@return whether the checkpoint was present */ -UNIV_INTERN -bool -trx_sys_read_wsrep_checkpoint(XID* xid) -{ - mtr_t mtr; - - ut_ad(xid); - - mtr.start(); - - const buf_block_t* block = trx_sysf_get(&mtr, false); - - if (!block || - mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD + block->frame) - != TRX_SYS_WSREP_XID_MAGIC_N) { - memset(xid, 0, sizeof(*xid)); - long long seqno= -1; - memcpy(xid->data + 24, &seqno, sizeof(long long)); - xid->formatID = -1; - mtr.commit(); - return false; - } - - xid->formatID = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT - + block->frame); - xid->gtrid_length = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN - + block->frame); - xid->bqual_length = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN - + block->frame); - memcpy(xid->data, - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA - + block->frame, - XIDDATASIZE); - - mtr.commit(); - return true; -} - -#endif /* WITH_WSREP */ - /** Find an available rollback segment. @param[in] sys_header @return an unallocated rollback segment slot in the TRX_SYS header diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7b104653cf9..0263b42812a 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -885,6 +885,14 @@ trx_lists_init_at_db_start() ut_ad(!srv_was_started); ut_ad(!purge_sys); + if (srv_operation == SRV_OPERATION_RESTORE) { + /* mariabackup --prepare only deals with + the redo log and the data files, not with + transactions or the data dictionary. */ + trx_rseg_array_init(); + return; + } + purge_sys = UT_NEW_NOKEY(purge_sys_t()); if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { @@ -1303,36 +1311,7 @@ trx_write_serialisation_history( MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); -#ifdef WITH_WSREP - const bool update_wsrep = wsrep_is_wsrep_xid(trx->xid); -#endif - const bool update_binlog_pos = trx->mysql_log_file_name - && *trx->mysql_log_file_name; - if (!update_binlog_pos -#ifdef WITH_WSREP - && !update_wsrep -#endif - ) return; - - buf_block_t* block = trx_sysf_get(mtr); -#ifdef WITH_WSREP - if (update_wsrep) - trx_sys_update_wsrep_checkpoint(trx->xid, block, mtr); -#endif /* WITH_WSREP */ - - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (update_binlog_pos) { - - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - block, mtr); - - trx->mysql_log_file_name = NULL; - } + trx->mysql_log_file_name = NULL; } /********************************************************************