diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result index d03d99c2efb..18ad5d3d2cc 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result @@ -32,4 +32,75 @@ set @@global.binlog_commit_wait_count=@save_bgc_count; set @@global.binlog_commit_wait_usec=@save_bgc_usec; set @@global.debug_dbug=@save_debug_dbug; drop table t1, t2, t3; +# +# MDEV-34122 +# If semi-sync is switched off then on while a transaction is +# in-between binlogging and waiting for an ACK, ensure that the +# transaction skips the wait altogether (otherwise it would time-out). +# Note that prior to MDEV-34122, there was a debug assertion that would +# trigger if the transaction tried to wait but could not receive an ACK +# signal. +# +# MDEV-34122.a: Test wait_point = AFTER_SYNC +# Here, debug_sync is used to pause the leader thread between reporting +# the binlogging to semi-sync, and starting the wait for ACK; and during +# this pause, semi-sync is manually switched off and on. +connection master; +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; +set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC; +set @@global.rpl_semi_sync_master_wait_no_slave= 1; +set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"; +# Waiting for semi-sync to turn on.. +connection slave; +connection master; +create table t (a int) engine=innodb; +connection slave; +# Wait no slave is on, so we don't need to slave on to wait for the +# ACK, and this way we guarantee the ACK doesn't come, so we can verify +# the informational message is displayed +connection slave; +include/stop_slave.inc +connection server_1; +start transaction; +insert into t values (0); +set debug_sync= "commit_after_release_LOCK_log SIGNAL trx_binlogged WAIT_FOR continue_commit"; +commit; +connection master; +set debug_sync= "now WAIT_FOR trx_binlogged"; +# Switching semi-sync off/on +set @@global.rpl_semi_sync_master_enabled= 0; +set @@global.rpl_semi_sync_master_enabled= 1; +# Resuming transaction to await ACK +set debug_sync= "now SIGNAL continue_commit"; +connection server_1; +include/assert_grep.inc [Ensure error log shows that transaction is skipping its semi-sync wait] +# Cleanup +connection slave; +include/start_slave.inc +connection master; +drop table t; +connection slave; +# +# MDEV-34122.b: Test wait_point = AFTER_COMMIT +# Here, use a transaction with a non-transactional statement to write to +# the binlog directly, and turn off/on semi-sync before committing the +# transaction. +connection master; +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT; +# Waiting for semi-sync to turn on.. +connection slave; +connection master; +create table tn (a int) engine=Aria; +start transaction; +insert into tn values (NULL); +set @@global.rpl_semi_sync_master_enabled= 0; +set @@global.rpl_semi_sync_master_enabled= 1; +commit; +# Cleanup +connection master; +drop table tn; +set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point; +set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave; +set @@global.debug_dbug=@save_debug_dbug; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test index 5908a660c0f..537a4bf7fa8 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test @@ -19,9 +19,12 @@ # References: # MDEV-33551: Semi-sync Wait Point AFTER_COMMIT Slow on Workloads with Heavy # Concurrency +# MDEV-34122: Assertion `entry' failed in Active_tranx::assert_thd_is_waiter # --source include/have_binlog_format_row.inc +--source include/have_innodb.inc --source include/have_debug.inc +--source include/have_debug_sync.inc --source include/master-slave.inc --connection master @@ -77,4 +80,118 @@ set @@global.binlog_commit_wait_usec=@save_bgc_usec; set @@global.debug_dbug=@save_debug_dbug; drop table t1, t2, t3; + +--echo # +--echo # MDEV-34122 +--echo # If semi-sync is switched off then on while a transaction is +--echo # in-between binlogging and waiting for an ACK, ensure that the +--echo # transaction skips the wait altogether (otherwise it would time-out). +--echo # Note that prior to MDEV-34122, there was a debug assertion that would +--echo # trigger if the transaction tried to wait but could not receive an ACK +--echo # signal. + +--echo # +--echo # MDEV-34122.a: Test wait_point = AFTER_SYNC +--echo # Here, debug_sync is used to pause the leader thread between reporting +--echo # the binlogging to semi-sync, and starting the wait for ACK; and during +--echo # this pause, semi-sync is manually switched off and on. + + +--connection master +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; +set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC; +set @@global.rpl_semi_sync_master_wait_no_slave= 1; +--eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait" + +--echo # Waiting for semi-sync to turn on.. +let $status_var= rpl_semi_sync_master_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; +--connection slave +let $status_var= rpl_semi_sync_slave_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--connection master +create table t (a int) engine=innodb; +--sync_slave_with_master + +--echo # Wait no slave is on, so we don't need to slave on to wait for the +--echo # ACK, and this way we guarantee the ACK doesn't come, so we can verify +--echo # the informational message is displayed +--connection slave +--source include/stop_slave.inc + +--connection server_1 +start transaction; +insert into t values (0); +set debug_sync= "commit_after_release_LOCK_log SIGNAL trx_binlogged WAIT_FOR continue_commit"; +--send commit + +--connection master +set debug_sync= "now WAIT_FOR trx_binlogged"; + +--echo # Switching semi-sync off/on +set @@global.rpl_semi_sync_master_enabled= 0; +set @@global.rpl_semi_sync_master_enabled= 1; +let $status_var= rpl_semi_sync_master_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--echo # Resuming transaction to await ACK +set debug_sync= "now SIGNAL continue_commit"; + +--connection server_1 +--reap + +--let $assert_text= Ensure error log shows that transaction is skipping its semi-sync wait +--let $assert_select=Skipping semi-sync wait for transaction +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let $assert_count= 1 +--let $assert_only_after=CURRENT_TEST +--source include/assert_grep.inc + +--echo # Cleanup +--connection slave +--source include/start_slave.inc + +--connection master +drop table t; +--sync_slave_with_master + + +--echo # +--echo # MDEV-34122.b: Test wait_point = AFTER_COMMIT +--echo # Here, use a transaction with a non-transactional statement to write to +--echo # the binlog directly, and turn off/on semi-sync before committing the +--echo # transaction. + +--connection master +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT; + +--echo # Waiting for semi-sync to turn on.. +let $status_var= rpl_semi_sync_master_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; +--connection slave +let $status_var= rpl_semi_sync_slave_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--connection master +create table tn (a int) engine=Aria; +start transaction; +insert into tn values (NULL); +set @@global.rpl_semi_sync_master_enabled= 0; +set @@global.rpl_semi_sync_master_enabled= 1; +commit; + +--echo # Cleanup +--connection master +drop table tn; +set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point; +set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave; +set @@global.debug_dbug=@save_debug_dbug; + --source include/rpl_end.inc diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc index e7d7fa80251..00905160b46 100644 --- a/sql/semisync_master.cc +++ b/sql/semisync_master.cc @@ -360,10 +360,8 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name, DBUG_VOID_RETURN; } -#ifndef DBUG_OFF -void Active_tranx::assert_thd_is_waiter(THD *thd_to_check, - const char *log_file_name, - my_off_t log_file_pos) +bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name, + my_off_t log_file_pos) { DBUG_ENTER("Active_tranx::assert_thd_is_waiter"); mysql_mutex_assert_owner(m_lock); @@ -379,13 +377,8 @@ void Active_tranx::assert_thd_is_waiter(THD *thd_to_check, entry = entry->hash_next; } - DBUG_ASSERT(entry); - DBUG_ASSERT(entry->thd); - DBUG_ASSERT(entry->thd->thread_id == thd_to_check->thread_id); - - DBUG_VOID_RETURN; + DBUG_RETURN(static_cast(entry)); } -#endif /******************************************************************************* * @@ -922,6 +915,36 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name, } } + /* In between the binlogging of this transaction and this wait, it is + * possible that our entry in Active_tranx was removed (i.e. if + * semi-sync was switched off and on). It is also possible that the + * event was already sent to a replica; however, we don't know if + * semi-sync was on or off at that time, so an ACK may never come. So + * skip the wait. Note that rpl_semi_sync_master_request_acks was + * already incremented in report_binlog_update(), so to keep + * rpl_semi_sync_master_yes/no_tx consistent with it, we check for a + * semi-sync restart _after_ checking the reply state. + */ + if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name, + trx_wait_binlog_pos))) + { + DBUG_EXECUTE_IF( + "semisync_log_skip_trx_wait", + sql_print_information( + "Skipping semi-sync wait for transaction at pos %s, %lu. This " + "should be because semi-sync turned off and on during the " + "lifetime of this transaction.", + trx_wait_binlog_name, trx_wait_binlog_pos);); + + /* The only known reason for a missing entry at this point is if + * semi-sync was turned off then on, so on debug builds, we track + * the number of times semi-sync turned off at binlogging, and compare + * to the current value. */ + DBUG_ASSERT(rpl_semi_sync_master_off_times > thd->expected_semi_sync_offs); + + break; + } + /* Let us update the info about the minimum binlog position of waiting * threads. */ @@ -968,10 +991,6 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name, m_wait_timeout, m_wait_file_name, (ulong)m_wait_file_pos)); -#ifndef DBUG_OFF - m_active_tranxs->assert_thd_is_waiter(thd, trx_wait_binlog_name, - trx_wait_binlog_pos); -#endif create_timeout(&abstime, &start_ts); wait_result= mysql_cond_timedwait(&thd->COND_wakeup_ready, &LOCK_binlog, &abstime); @@ -1307,6 +1326,10 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd, else { rpl_semi_sync_master_request_ack++; + +#ifndef DBUG_OFF + thd->expected_semi_sync_offs= rpl_semi_sync_master_off_times; +#endif } } diff --git a/sql/semisync_master.h b/sql/semisync_master.h index 3978d21a61d..a1c57959165 100644 --- a/sql/semisync_master.h +++ b/sql/semisync_master.h @@ -377,14 +377,12 @@ public: */ void unlink_thd_as_waiter(const char *log_file_name, my_off_t log_file_pos); -#ifndef DBUG_OFF /* Uses DBUG_ASSERT statements to ensure that the argument thd_to_check * matches the thread of the respective Tranx_node::thd of the passed in * log_file_name and log_file_pos. */ - void assert_thd_is_waiter(THD *thd_to_check, const char *log_file_name, - my_off_t log_file_pos); -#endif + bool is_thd_waiter(THD *thd_to_check, const char *log_file_name, + my_off_t log_file_pos); /* Given a position, check to see whether the position is an active * transaction's ending position by probing the hash table. diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0dfda3bfa68..e75a6c78512 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -851,6 +851,11 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) query_id= 0; query_name_consts= 0; semisync_info= 0; + +#ifndef DBUG_OFF + expected_semi_sync_offs= 0; +#endif + db_charset= global_system_variables.collation_database; bzero((void*) ha_data, sizeof(ha_data)); mysys_var=0; diff --git a/sql/sql_class.h b/sql/sql_class.h index 1878385383f..e52ba914038 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2922,6 +2922,17 @@ public: /* Needed by MariaDB semi sync replication */ Trans_binlog_info *semisync_info; + +#ifndef DBUG_OFF + /* + If Active_tranx is missing an entry for a transaction which is planning to + await an ACK, this ensures that the reason is because semi-sync was turned + off then on in-between the binlogging of the transaction, and before it had + started waiting for the ACK. + */ + ulong expected_semi_sync_offs; +#endif + /* If this is a semisync slave connection. */ bool semi_sync_slave; ulonglong client_capabilities; /* What the client supports */