From cbf60dba7404edfa73ddf9b29c4488a4b3ea5684 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 9 Nov 2022 19:44:11 +0200 Subject: [PATCH] Small improvements to aria recovery I spent 4 hours on work and 12 hours of testing to try to find the reason for aria crashing in recovery when starting a new test, in which case the 'data directory' should be a copy of "install.db", but aria_log.00000001 content was not correct. The following changes are mostly done to make it a bit easier to find out more in case of future similar crashes: - Mark last_checkpoint_lsn volatile (safety). - Write checkpoint message to aria_recovery.trace - When compling with DBUG and with HAVE_DBUG_TRANSLOG_SRC, use checksum's for Aria log pages. We cannot have it on by default for DBUG servers yet as there is bugs when changing CRC between restarts. - Added a message to mtr --verbose when copying the data directory. - Removed extra linefeed in Aria recovery message (cleanup) --- mysql-test/mariadb-test-run.pl | 1 + storage/maria/ma_control_file.c | 2 +- storage/maria/ma_control_file.h | 2 +- storage/maria/ma_loghandler.h | 4 ++++ storage/maria/ma_recovery.c | 12 +++++++----- storage/maria/ma_recovery_util.c | 2 +- 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index b7e83261298..b75278e9a6d 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -2713,6 +2713,7 @@ sub mysql_server_start($) { # Copy datadir from installed system db my $path= ($opt_parallel == 1) ? "$opt_vardir" : "$opt_vardir/.."; my $install_db= "$path/install.db"; + mtr_verbose("copying $install_db to $datadir"); copytree($install_db, $datadir) if -d $install_db; mtr_error("Failed to copy system db to '$datadir'") unless -d $datadir; } diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 21befb70bd9..237b75b99b7 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -104,7 +104,7 @@ one should increment the control file version number. This LSN serves for the two-checkpoint rule, and also to find the checkpoint record when doing a recovery. */ -LSN last_checkpoint_lsn= LSN_IMPOSSIBLE; +volatile LSN last_checkpoint_lsn= LSN_IMPOSSIBLE; uint32 last_logno= FILENO_IMPOSSIBLE; /** The maximum transaction id given to a transaction. It is only updated at diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 40428f665f4..c74957b8322 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -37,7 +37,7 @@ C_MODE_START LSN of the last checkoint (if last_checkpoint_lsn == LSN_IMPOSSIBLE then there was never a checkpoint) */ -extern LSN last_checkpoint_lsn; +extern volatile LSN last_checkpoint_lsn; /* Last log number (if last_logno == FILENO_IMPOSSIBLE then there is no log file yet) diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 3e5c58a8053..abe85a12727 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -25,7 +25,11 @@ /* minimum possible transaction log size */ #define TRANSLOG_MIN_FILE_SIZE (8*MB) /* transaction log default flags (TODO: make it global variable) */ +#ifdef HAVE_DBUG_TRANSLOG_CRC +#define TRANSLOG_DEFAULT_FLAGS IF_DBUG(TRANSLOG_PAGE_CRC,0) +#else #define TRANSLOG_DEFAULT_FLAGS 0 +#endif /* Transaction log flags. diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 006c8bef672..90d0ed3c708 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -133,7 +133,7 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id); static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, struct st_dirty_page *dirty_page); -static int close_all_tables(void); +static int close_all_tables(my_bool force_end_newline); static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr); static void print_redo_phase_progress(TRANSLOG_ADDRESS addr); static void delete_all_transactions(); @@ -467,7 +467,7 @@ int maria_apply_log(LSN from_lsn, LSN end_redo_lsn, LSN end_undo_lsn, we don't use maria_panic() because it would maria_end(), and Recovery does not want that (we want to keep some modules initialized for runtime). */ - if (close_all_tables()) + if (close_all_tables(0)) { ma_message_no_user(0, "closing of tables failed"); goto err; @@ -495,6 +495,8 @@ int maria_apply_log(LSN from_lsn, LSN end_redo_lsn, LSN end_undo_lsn, /* No dirty pages, all tables are closed, no active transactions, save: */ if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE)) goto err; + tprint(tracef, "checkpoint done at " LSN_FMT "\n", + LSN_IN_PARTS(last_checkpoint_lsn)); } goto end; @@ -505,7 +507,7 @@ err2: delete_all_transactions(); if (!abort_message_printed) error= 1; - if (close_all_tables()) + if (close_all_tables(1)) { ma_message_no_user(0, "closing of tables failed"); } @@ -3472,7 +3474,7 @@ static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, } -static int close_all_tables(void) +static int close_all_tables(my_bool force_end_newline) { int error= 0; uint count= 0; @@ -3537,7 +3539,7 @@ static int close_all_tables(void) } } end: - if (recovery_message_printed == REC_MSG_FLUSH) + if (recovery_message_printed == REC_MSG_FLUSH && (force_end_newline || error)) { fputc('\n', stderr); fflush(stderr); diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c index fe43d812600..b8123c422c1 100644 --- a/storage/maria/ma_recovery_util.c +++ b/storage/maria/ma_recovery_util.c @@ -87,7 +87,7 @@ void eprint(FILE *trace_file __attribute__ ((unused)), if (!trace_file) trace_file= stderr; - if (procent_printed) + if (procent_printed && trace_file == stderr) { procent_printed= 0; /* In silent mode, print on another line than the 0% 10% 20% line */