From 8a715a599077b7546af558aa47e37423dc322f7e Mon Sep 17 00:00:00 2001 From: Guilhem Bichot Date: Thu, 5 Mar 2009 14:46:45 +0100 Subject: [PATCH] Fix for BUG#42180 "Maria: maria-recovery-bitmap.test fails repeatedly" storage/maria/ma_loghandler.c: Normally, when we log LOGREC_LONG_TRANSACTION_ID, undo_lsn should be 0: assert this. storage/maria/ma_test_force_start.pl: this script does not work with mtr2, make it use mtr1 storage/maria/trnman.c: The bug's cause was that: transaction would log LOGREC_LONG_TRANSACTION_ID, then Checkpoint would run and skip it (because its undo_lsn is still 0), then transaction would log REDO+UNDO, then crash. At Recovery, REDO phase would start from Checkpoint's record LSN, so wouldn't see LOGREC_LONG_TRANSACTION_ID, and as Checkpoint record does not mention transaction, transaction would be unknown, so its REDO+UNDO would be thrown away (assumed to belong to a transaction committed long ago as unknown), so transaction would not be rolled back, which is wrong. Fix is: it was wrong to skip a transaction if undo_lsn is 0; as soon as LOGREC_LONG_TRANSACTION_ID has been logged, it becomes potentially invisible to the REDO phase, and so we must include this transaction in the checkpoint record. --- storage/maria/ma_loghandler.c | 1 + storage/maria/ma_test_force_start.pl | 3 ++- storage/maria/trnman.c | 21 ++++++++++++--------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 93b7d5e84c9..582095221d3 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -6147,6 +6147,7 @@ my_bool translog_write_record(LSN *lsn, LSN dummy_lsn; LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; uchar log_data[6]; + DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE); int6store(log_data, trn->trid); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); diff --git a/storage/maria/ma_test_force_start.pl b/storage/maria/ma_test_force_start.pl index 3aa6da5f5dc..7ab8190a738 100755 --- a/storage/maria/ma_test_force_start.pl +++ b/storage/maria/ma_test_force_start.pl @@ -42,7 +42,8 @@ my $sql_name= "./var/tmp/create_table.sql"; my $error_log_name= "./var/log/master.err"; my @cmd_output; my $whatever; # garbage data -my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--maria-force-start-after-recovery-failures=$force_after maria-recover "; +$ENV{MTR_VERSION} = 1; # MTR2 does not have --start-and-exit +my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--maria-force-start-after-recovery-failures=$force_after --suite=maria maria.maria-recover "; if ($^O =~ /^mswin/i) { print <next) { - /* - trns with a short trid of 0 are not even initialized, we can ignore - them. trns with undo_lsn==0 have done no writes, we can ignore them - too. XID not needed now. - */ uint sid; LSN rec_lsn, undo_lsn, first_undo_lsn; pthread_mutex_lock(&trn->state_lock); @@ -732,16 +727,24 @@ my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, */ continue; } - /* needed for low-water mark calculation */ + /* needed for low-water mark calculation */ if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) && (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0)) minimum_rec_lsn= rec_lsn; /* - trn may have logged REDOs but not yet UNDO, that's why we read rec_lsn - before deciding to ignore if undo_lsn==0. + If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be + discovered when seeing that log record which is for sure located after + checkpoint_start_log_horizon. */ - if ((undo_lsn= trn->undo_lsn) == 0) /* trn can be forgotten */ + if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) & + TRANSACTION_LOGGED_LONG_ID) == 0) continue; + /* + On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log + records; so we must include trn in the checkpoint now, because we cannot + count on LOGREC_LONG_TRANSACTION_ID (as we are already past it). + */ + undo_lsn= trn->undo_lsn; stored_transactions++; int2store(ptr, sid); ptr+= 2;